1# ----------------------------------------------------------------------------- 2# ply: yacc.py 3# 4# Copyright (C) 2001-2017 5# David M. Beazley (Dabeaz LLC) 6# All rights reserved. 7# 8# Redistribution and use in source and binary forms, with or without 9# modification, are permitted provided that the following conditions are 10# met: 11# 12# * Redistributions of source code must retain the above copyright notice, 13# this list of conditions and the following disclaimer. 14# * Redistributions in binary form must reproduce the above copyright notice, 15# this list of conditions and the following disclaimer in the documentation 16# and/or other materials provided with the distribution. 17# * Neither the name of the David Beazley or Dabeaz LLC may be used to 18# endorse or promote products derived from this software without 19# specific prior written permission. 20# 21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32# ----------------------------------------------------------------------------- 33# 34# This implements an LR parser that is constructed from grammar rules defined 35# as Python functions. The grammer is specified by supplying the BNF inside 36# Python documentation strings. The inspiration for this technique was borrowed 37# from John Aycock's Spark parsing system. PLY might be viewed as cross between 38# Spark and the GNU bison utility. 39# 40# The current implementation is only somewhat object-oriented. The 41# LR parser itself is defined in terms of an object (which allows multiple 42# parsers to co-exist). However, most of the variables used during table 43# construction are defined in terms of global variables. Users shouldn't 44# notice unless they are trying to define multiple parsers at the same 45# time using threads (in which case they should have their head examined). 46# 47# This implementation supports both SLR and LALR(1) parsing. LALR(1) 48# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu), 49# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles, 50# Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced 51# by the more efficient DeRemer and Pennello algorithm. 52# 53# :::::::: WARNING ::::::: 54# 55# Construction of LR parsing tables is fairly complicated and expensive. 56# To make this module run fast, a *LOT* of work has been put into 57# optimization---often at the expensive of readability and what might 58# consider to be good Python "coding style." Modify the code at your 59# own risk! 60# ---------------------------------------------------------------------------- 61 62import re 63import types 64import sys 65import os.path 66import inspect 67import base64 68import warnings 69 70__version__ = '3.10' 71__tabversion__ = '3.10' 72 73#----------------------------------------------------------------------------- 74# === User configurable parameters === 75# 76# Change these to modify the default behavior of yacc (if you wish) 77#----------------------------------------------------------------------------- 78 79yaccdebug = True # Debugging mode. If set, yacc generates a 80 # a 'parser.out' file in the current directory 81 82debug_file = 'parser.out' # Default name of the debugging file 83tab_module = 'parsetab' # Default name of the table module 84default_lr = 'LALR' # Default LR table generation method 85 86error_count = 3 # Number of symbols that must be shifted to leave recovery mode 87 88yaccdevel = False # Set to True if developing yacc. This turns off optimized 89 # implementations of certain functions. 90 91resultlimit = 40 # Size limit of results when running in debug mode. 92 93pickle_protocol = 0 # Protocol to use when writing pickle files 94 95# String type-checking compatibility 96if sys.version_info[0] < 3: 97 string_types = basestring 98else: 99 string_types = str 100 101MAXINT = sys.maxsize 102 103# This object is a stand-in for a logging object created by the 104# logging module. PLY will use this by default to create things 105# such as the parser.out file. If a user wants more detailed 106# information, they can create their own logging object and pass 107# it into PLY. 108 109class PlyLogger(object): 110 def __init__(self, f): 111 self.f = f 112 113 def debug(self, msg, *args, **kwargs): 114 self.f.write((msg % args) + '\n') 115 116 info = debug 117 118 def warning(self, msg, *args, **kwargs): 119 self.f.write('WARNING: ' + (msg % args) + '\n') 120 121 def error(self, msg, *args, **kwargs): 122 self.f.write('ERROR: ' + (msg % args) + '\n') 123 124 critical = debug 125 126# Null logger is used when no output is generated. Does nothing. 127class NullLogger(object): 128 def __getattribute__(self, name): 129 return self 130 131 def __call__(self, *args, **kwargs): 132 return self 133 134# Exception raised for yacc-related errors 135class YaccError(Exception): 136 pass 137 138# Format the result message that the parser produces when running in debug mode. 139def format_result(r): 140 repr_str = repr(r) 141 if '\n' in repr_str: 142 repr_str = repr(repr_str) 143 if len(repr_str) > resultlimit: 144 repr_str = repr_str[:resultlimit] + ' ...' 145 result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str) 146 return result 147 148# Format stack entries when the parser is running in debug mode 149def format_stack_entry(r): 150 repr_str = repr(r) 151 if '\n' in repr_str: 152 repr_str = repr(repr_str) 153 if len(repr_str) < 16: 154 return repr_str 155 else: 156 return '<%s @ 0x%x>' % (type(r).__name__, id(r)) 157 158# Panic mode error recovery support. This feature is being reworked--much of the 159# code here is to offer a deprecation/backwards compatible transition 160 161_errok = None 162_token = None 163_restart = None 164_warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error(). 165Instead, invoke the methods on the associated parser instance: 166 167 def p_error(p): 168 ... 169 # Use parser.errok(), parser.token(), parser.restart() 170 ... 171 172 parser = yacc.yacc() 173''' 174 175def errok(): 176 warnings.warn(_warnmsg) 177 return _errok() 178 179def restart(): 180 warnings.warn(_warnmsg) 181 return _restart() 182 183def token(): 184 warnings.warn(_warnmsg) 185 return _token() 186 187# Utility function to call the p_error() function with some deprecation hacks 188def call_errorfunc(errorfunc, token, parser): 189 global _errok, _token, _restart 190 _errok = parser.errok 191 _token = parser.token 192 _restart = parser.restart 193 r = errorfunc(token) 194 try: 195 del _errok, _token, _restart 196 except NameError: 197 pass 198 return r 199 200#----------------------------------------------------------------------------- 201# === LR Parsing Engine === 202# 203# The following classes are used for the LR parser itself. These are not 204# used during table construction and are independent of the actual LR 205# table generation algorithm 206#----------------------------------------------------------------------------- 207 208# This class is used to hold non-terminal grammar symbols during parsing. 209# It normally has the following attributes set: 210# .type = Grammar symbol type 211# .value = Symbol value 212# .lineno = Starting line number 213# .endlineno = Ending line number (optional, set automatically) 214# .lexpos = Starting lex position 215# .endlexpos = Ending lex position (optional, set automatically) 216 217class YaccSymbol: 218 def __str__(self): 219 return self.type 220 221 def __repr__(self): 222 return str(self) 223 224# This class is a wrapper around the objects actually passed to each 225# grammar rule. Index lookup and assignment actually assign the 226# .value attribute of the underlying YaccSymbol object. 227# The lineno() method returns the line number of a given 228# item (or 0 if not defined). The linespan() method returns 229# a tuple of (startline,endline) representing the range of lines 230# for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) 231# representing the range of positional information for a symbol. 232 233class YaccProduction: 234 def __init__(self, s, stack=None): 235 self.slice = s 236 self.stack = stack 237 self.lexer = None 238 self.parser = None 239 240 def __getitem__(self, n): 241 if isinstance(n, slice): 242 return [s.value for s in self.slice[n]] 243 elif n >= 0: 244 return self.slice[n].value 245 else: 246 return self.stack[n].value 247 248 def __setitem__(self, n, v): 249 self.slice[n].value = v 250 251 def __getslice__(self, i, j): 252 return [s.value for s in self.slice[i:j]] 253 254 def __len__(self): 255 return len(self.slice) 256 257 def lineno(self, n): 258 return getattr(self.slice[n], 'lineno', 0) 259 260 def set_lineno(self, n, lineno): 261 self.slice[n].lineno = lineno 262 263 def linespan(self, n): 264 startline = getattr(self.slice[n], 'lineno', 0) 265 endline = getattr(self.slice[n], 'endlineno', startline) 266 return startline, endline 267 268 def lexpos(self, n): 269 return getattr(self.slice[n], 'lexpos', 0) 270 271 def lexspan(self, n): 272 startpos = getattr(self.slice[n], 'lexpos', 0) 273 endpos = getattr(self.slice[n], 'endlexpos', startpos) 274 return startpos, endpos 275 276 def error(self): 277 raise SyntaxError 278 279# ----------------------------------------------------------------------------- 280# == LRParser == 281# 282# The LR Parsing engine. 283# ----------------------------------------------------------------------------- 284 285class LRParser: 286 def __init__(self, lrtab, errorf): 287 self.productions = lrtab.lr_productions 288 self.action = lrtab.lr_action 289 self.goto = lrtab.lr_goto 290 self.errorfunc = errorf 291 self.set_defaulted_states() 292 self.errorok = True 293 294 def errok(self): 295 self.errorok = True 296 297 def restart(self): 298 del self.statestack[:] 299 del self.symstack[:] 300 sym = YaccSymbol() 301 sym.type = '$end' 302 self.symstack.append(sym) 303 self.statestack.append(0) 304 305 # Defaulted state support. 306 # This method identifies parser states where there is only one possible reduction action. 307 # For such states, the parser can make a choose to make a rule reduction without consuming 308 # the next look-ahead token. This delayed invocation of the tokenizer can be useful in 309 # certain kinds of advanced parsing situations where the lexer and parser interact with 310 # each other or change states (i.e., manipulation of scope, lexer states, etc.). 311 # 312 # See: https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions 313 def set_defaulted_states(self): 314 self.defaulted_states = {} 315 for state, actions in self.action.items(): 316 rules = list(actions.values()) 317 if len(rules) == 1 and rules[0] < 0: 318 self.defaulted_states[state] = rules[0] 319 320 def disable_defaulted_states(self): 321 self.defaulted_states = {} 322 323 def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 324 if debug or yaccdevel: 325 if isinstance(debug, int): 326 debug = PlyLogger(sys.stderr) 327 return self.parsedebug(input, lexer, debug, tracking, tokenfunc) 328 elif tracking: 329 return self.parseopt(input, lexer, debug, tracking, tokenfunc) 330 else: 331 return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc) 332 333 334 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 335 # parsedebug(). 336 # 337 # This is the debugging enabled version of parse(). All changes made to the 338 # parsing engine should be made here. Optimized versions of this function 339 # are automatically created by the ply/ygen.py script. This script cuts out 340 # sections enclosed in markers such as this: 341 # 342 # #--! DEBUG 343 # statements 344 # #--! DEBUG 345 # 346 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 347 348 def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 349 #--! parsedebug-start 350 lookahead = None # Current lookahead symbol 351 lookaheadstack = [] # Stack of lookahead symbols 352 actions = self.action # Local reference to action table (to avoid lookup on self.) 353 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 354 prod = self.productions # Local reference to production list (to avoid lookup on self.) 355 defaulted_states = self.defaulted_states # Local reference to defaulted states 356 pslice = YaccProduction(None) # Production object passed to grammar rules 357 errorcount = 0 # Used during error recovery 358 359 #--! DEBUG 360 debug.info('PLY: PARSE DEBUG START') 361 #--! DEBUG 362 363 # If no lexer was given, we will try to use the lex module 364 if not lexer: 365 from . import lex 366 lexer = lex.lexer 367 368 # Set up the lexer and parser objects on pslice 369 pslice.lexer = lexer 370 pslice.parser = self 371 372 # If input was supplied, pass to lexer 373 if input is not None: 374 lexer.input(input) 375 376 if tokenfunc is None: 377 # Tokenize function 378 get_token = lexer.token 379 else: 380 get_token = tokenfunc 381 382 # Set the parser() token method (sometimes used in error recovery) 383 self.token = get_token 384 385 # Set up the state and symbol stacks 386 387 statestack = [] # Stack of parsing states 388 self.statestack = statestack 389 symstack = [] # Stack of grammar symbols 390 self.symstack = symstack 391 392 pslice.stack = symstack # Put in the production 393 errtoken = None # Err token 394 395 # The start state is assumed to be (0,$end) 396 397 statestack.append(0) 398 sym = YaccSymbol() 399 sym.type = '$end' 400 symstack.append(sym) 401 state = 0 402 while True: 403 # Get the next symbol on the input. If a lookahead symbol 404 # is already set, we just use that. Otherwise, we'll pull 405 # the next token off of the lookaheadstack or from the lexer 406 407 #--! DEBUG 408 debug.debug('') 409 debug.debug('State : %s', state) 410 #--! DEBUG 411 412 if state not in defaulted_states: 413 if not lookahead: 414 if not lookaheadstack: 415 lookahead = get_token() # Get the next token 416 else: 417 lookahead = lookaheadstack.pop() 418 if not lookahead: 419 lookahead = YaccSymbol() 420 lookahead.type = '$end' 421 422 # Check the action table 423 ltype = lookahead.type 424 t = actions[state].get(ltype) 425 else: 426 t = defaulted_states[state] 427 #--! DEBUG 428 debug.debug('Defaulted state %s: Reduce using %d', state, -t) 429 #--! DEBUG 430 431 #--! DEBUG 432 debug.debug('Stack : %s', 433 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 434 #--! DEBUG 435 436 if t is not None: 437 if t > 0: 438 # shift a symbol on the stack 439 statestack.append(t) 440 state = t 441 442 #--! DEBUG 443 debug.debug('Action : Shift and goto state %s', t) 444 #--! DEBUG 445 446 symstack.append(lookahead) 447 lookahead = None 448 449 # Decrease error count on successful shift 450 if errorcount: 451 errorcount -= 1 452 continue 453 454 if t < 0: 455 # reduce a symbol on the stack, emit a production 456 p = prod[-t] 457 pname = p.name 458 plen = p.len 459 460 # Get production function 461 sym = YaccSymbol() 462 sym.type = pname # Production name 463 sym.value = None 464 465 #--! DEBUG 466 if plen: 467 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, 468 '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']', 469 goto[statestack[-1-plen]][pname]) 470 else: 471 debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [], 472 goto[statestack[-1]][pname]) 473 474 #--! DEBUG 475 476 if plen: 477 targ = symstack[-plen-1:] 478 targ[0] = sym 479 480 #--! TRACKING 481 if tracking: 482 t1 = targ[1] 483 sym.lineno = t1.lineno 484 sym.lexpos = t1.lexpos 485 t1 = targ[-1] 486 sym.endlineno = getattr(t1, 'endlineno', t1.lineno) 487 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) 488 #--! TRACKING 489 490 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 491 # The code enclosed in this section is duplicated 492 # below as a performance optimization. Make sure 493 # changes get made in both locations. 494 495 pslice.slice = targ 496 497 try: 498 # Call the grammar rule with our special slice object 499 del symstack[-plen:] 500 self.state = state 501 p.callable(pslice) 502 del statestack[-plen:] 503 #--! DEBUG 504 debug.info('Result : %s', format_result(pslice[0])) 505 #--! DEBUG 506 symstack.append(sym) 507 state = goto[statestack[-1]][pname] 508 statestack.append(state) 509 except SyntaxError: 510 # If an error was set. Enter error recovery state 511 lookaheadstack.append(lookahead) # Save the current lookahead token 512 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 513 statestack.pop() # Pop back one state (before the reduce) 514 state = statestack[-1] 515 sym.type = 'error' 516 sym.value = 'error' 517 lookahead = sym 518 errorcount = error_count 519 self.errorok = False 520 521 continue 522 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 523 524 else: 525 526 #--! TRACKING 527 if tracking: 528 sym.lineno = lexer.lineno 529 sym.lexpos = lexer.lexpos 530 #--! TRACKING 531 532 targ = [sym] 533 534 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 535 # The code enclosed in this section is duplicated 536 # above as a performance optimization. Make sure 537 # changes get made in both locations. 538 539 pslice.slice = targ 540 541 try: 542 # Call the grammar rule with our special slice object 543 self.state = state 544 p.callable(pslice) 545 #--! DEBUG 546 debug.info('Result : %s', format_result(pslice[0])) 547 #--! DEBUG 548 symstack.append(sym) 549 state = goto[statestack[-1]][pname] 550 statestack.append(state) 551 except SyntaxError: 552 # If an error was set. Enter error recovery state 553 lookaheadstack.append(lookahead) # Save the current lookahead token 554 statestack.pop() # Pop back one state (before the reduce) 555 state = statestack[-1] 556 sym.type = 'error' 557 sym.value = 'error' 558 lookahead = sym 559 errorcount = error_count 560 self.errorok = False 561 562 continue 563 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 564 565 if t == 0: 566 n = symstack[-1] 567 result = getattr(n, 'value', None) 568 #--! DEBUG 569 debug.info('Done : Returning %s', format_result(result)) 570 debug.info('PLY: PARSE DEBUG END') 571 #--! DEBUG 572 return result 573 574 if t is None: 575 576 #--! DEBUG 577 debug.error('Error : %s', 578 ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()) 579 #--! DEBUG 580 581 # We have some kind of parsing error here. To handle 582 # this, we are going to push the current token onto 583 # the tokenstack and replace it with an 'error' token. 584 # If there are any synchronization rules, they may 585 # catch it. 586 # 587 # In addition to pushing the error token, we call call 588 # the user defined p_error() function if this is the 589 # first syntax error. This function is only called if 590 # errorcount == 0. 591 if errorcount == 0 or self.errorok: 592 errorcount = error_count 593 self.errorok = False 594 errtoken = lookahead 595 if errtoken.type == '$end': 596 errtoken = None # End of file! 597 if self.errorfunc: 598 if errtoken and not hasattr(errtoken, 'lexer'): 599 errtoken.lexer = lexer 600 self.state = state 601 tok = call_errorfunc(self.errorfunc, errtoken, self) 602 if self.errorok: 603 # User must have done some kind of panic 604 # mode recovery on their own. The 605 # returned token is the next lookahead 606 lookahead = tok 607 errtoken = None 608 continue 609 else: 610 if errtoken: 611 if hasattr(errtoken, 'lineno'): 612 lineno = lookahead.lineno 613 else: 614 lineno = 0 615 if lineno: 616 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) 617 else: 618 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) 619 else: 620 sys.stderr.write('yacc: Parse error in input. EOF\n') 621 return 622 623 else: 624 errorcount = error_count 625 626 # case 1: the statestack only has 1 entry on it. If we're in this state, the 627 # entire parse has been rolled back and we're completely hosed. The token is 628 # discarded and we just keep going. 629 630 if len(statestack) <= 1 and lookahead.type != '$end': 631 lookahead = None 632 errtoken = None 633 state = 0 634 # Nuke the pushback stack 635 del lookaheadstack[:] 636 continue 637 638 # case 2: the statestack has a couple of entries on it, but we're 639 # at the end of the file. nuke the top entry and generate an error token 640 641 # Start nuking entries on the stack 642 if lookahead.type == '$end': 643 # Whoa. We're really hosed here. Bail out 644 return 645 646 if lookahead.type != 'error': 647 sym = symstack[-1] 648 if sym.type == 'error': 649 # Hmmm. Error is on top of stack, we'll just nuke input 650 # symbol and continue 651 #--! TRACKING 652 if tracking: 653 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) 654 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) 655 #--! TRACKING 656 lookahead = None 657 continue 658 659 # Create the error symbol for the first time and make it the new lookahead symbol 660 t = YaccSymbol() 661 t.type = 'error' 662 663 if hasattr(lookahead, 'lineno'): 664 t.lineno = t.endlineno = lookahead.lineno 665 if hasattr(lookahead, 'lexpos'): 666 t.lexpos = t.endlexpos = lookahead.lexpos 667 t.value = lookahead 668 lookaheadstack.append(lookahead) 669 lookahead = t 670 else: 671 sym = symstack.pop() 672 #--! TRACKING 673 if tracking: 674 lookahead.lineno = sym.lineno 675 lookahead.lexpos = sym.lexpos 676 #--! TRACKING 677 statestack.pop() 678 state = statestack[-1] 679 680 continue 681 682 # Call an error function here 683 raise RuntimeError('yacc: internal parser error!!!\n') 684 685 #--! parsedebug-end 686 687 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 688 # parseopt(). 689 # 690 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY! 691 # This code is automatically generated by the ply/ygen.py script. Make 692 # changes to the parsedebug() method instead. 693 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 694 695 def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 696 #--! parseopt-start 697 lookahead = None # Current lookahead symbol 698 lookaheadstack = [] # Stack of lookahead symbols 699 actions = self.action # Local reference to action table (to avoid lookup on self.) 700 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 701 prod = self.productions # Local reference to production list (to avoid lookup on self.) 702 defaulted_states = self.defaulted_states # Local reference to defaulted states 703 pslice = YaccProduction(None) # Production object passed to grammar rules 704 errorcount = 0 # Used during error recovery 705 706 707 # If no lexer was given, we will try to use the lex module 708 if not lexer: 709 from . import lex 710 lexer = lex.lexer 711 712 # Set up the lexer and parser objects on pslice 713 pslice.lexer = lexer 714 pslice.parser = self 715 716 # If input was supplied, pass to lexer 717 if input is not None: 718 lexer.input(input) 719 720 if tokenfunc is None: 721 # Tokenize function 722 get_token = lexer.token 723 else: 724 get_token = tokenfunc 725 726 # Set the parser() token method (sometimes used in error recovery) 727 self.token = get_token 728 729 # Set up the state and symbol stacks 730 731 statestack = [] # Stack of parsing states 732 self.statestack = statestack 733 symstack = [] # Stack of grammar symbols 734 self.symstack = symstack 735 736 pslice.stack = symstack # Put in the production 737 errtoken = None # Err token 738 739 # The start state is assumed to be (0,$end) 740 741 statestack.append(0) 742 sym = YaccSymbol() 743 sym.type = '$end' 744 symstack.append(sym) 745 state = 0 746 while True: 747 # Get the next symbol on the input. If a lookahead symbol 748 # is already set, we just use that. Otherwise, we'll pull 749 # the next token off of the lookaheadstack or from the lexer 750 751 752 if state not in defaulted_states: 753 if not lookahead: 754 if not lookaheadstack: 755 lookahead = get_token() # Get the next token 756 else: 757 lookahead = lookaheadstack.pop() 758 if not lookahead: 759 lookahead = YaccSymbol() 760 lookahead.type = '$end' 761 762 # Check the action table 763 ltype = lookahead.type 764 t = actions[state].get(ltype) 765 else: 766 t = defaulted_states[state] 767 768 769 if t is not None: 770 if t > 0: 771 # shift a symbol on the stack 772 statestack.append(t) 773 state = t 774 775 776 symstack.append(lookahead) 777 lookahead = None 778 779 # Decrease error count on successful shift 780 if errorcount: 781 errorcount -= 1 782 continue 783 784 if t < 0: 785 # reduce a symbol on the stack, emit a production 786 p = prod[-t] 787 pname = p.name 788 plen = p.len 789 790 # Get production function 791 sym = YaccSymbol() 792 sym.type = pname # Production name 793 sym.value = None 794 795 796 if plen: 797 targ = symstack[-plen-1:] 798 targ[0] = sym 799 800 #--! TRACKING 801 if tracking: 802 t1 = targ[1] 803 sym.lineno = t1.lineno 804 sym.lexpos = t1.lexpos 805 t1 = targ[-1] 806 sym.endlineno = getattr(t1, 'endlineno', t1.lineno) 807 sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos) 808 #--! TRACKING 809 810 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 811 # The code enclosed in this section is duplicated 812 # below as a performance optimization. Make sure 813 # changes get made in both locations. 814 815 pslice.slice = targ 816 817 try: 818 # Call the grammar rule with our special slice object 819 del symstack[-plen:] 820 self.state = state 821 p.callable(pslice) 822 del statestack[-plen:] 823 symstack.append(sym) 824 state = goto[statestack[-1]][pname] 825 statestack.append(state) 826 except SyntaxError: 827 # If an error was set. Enter error recovery state 828 lookaheadstack.append(lookahead) # Save the current lookahead token 829 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 830 statestack.pop() # Pop back one state (before the reduce) 831 state = statestack[-1] 832 sym.type = 'error' 833 sym.value = 'error' 834 lookahead = sym 835 errorcount = error_count 836 self.errorok = False 837 838 continue 839 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 840 841 else: 842 843 #--! TRACKING 844 if tracking: 845 sym.lineno = lexer.lineno 846 sym.lexpos = lexer.lexpos 847 #--! TRACKING 848 849 targ = [sym] 850 851 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 852 # The code enclosed in this section is duplicated 853 # above as a performance optimization. Make sure 854 # changes get made in both locations. 855 856 pslice.slice = targ 857 858 try: 859 # Call the grammar rule with our special slice object 860 self.state = state 861 p.callable(pslice) 862 symstack.append(sym) 863 state = goto[statestack[-1]][pname] 864 statestack.append(state) 865 except SyntaxError: 866 # If an error was set. Enter error recovery state 867 lookaheadstack.append(lookahead) # Save the current lookahead token 868 statestack.pop() # Pop back one state (before the reduce) 869 state = statestack[-1] 870 sym.type = 'error' 871 sym.value = 'error' 872 lookahead = sym 873 errorcount = error_count 874 self.errorok = False 875 876 continue 877 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 878 879 if t == 0: 880 n = symstack[-1] 881 result = getattr(n, 'value', None) 882 return result 883 884 if t is None: 885 886 887 # We have some kind of parsing error here. To handle 888 # this, we are going to push the current token onto 889 # the tokenstack and replace it with an 'error' token. 890 # If there are any synchronization rules, they may 891 # catch it. 892 # 893 # In addition to pushing the error token, we call call 894 # the user defined p_error() function if this is the 895 # first syntax error. This function is only called if 896 # errorcount == 0. 897 if errorcount == 0 or self.errorok: 898 errorcount = error_count 899 self.errorok = False 900 errtoken = lookahead 901 if errtoken.type == '$end': 902 errtoken = None # End of file! 903 if self.errorfunc: 904 if errtoken and not hasattr(errtoken, 'lexer'): 905 errtoken.lexer = lexer 906 self.state = state 907 tok = call_errorfunc(self.errorfunc, errtoken, self) 908 if self.errorok: 909 # User must have done some kind of panic 910 # mode recovery on their own. The 911 # returned token is the next lookahead 912 lookahead = tok 913 errtoken = None 914 continue 915 else: 916 if errtoken: 917 if hasattr(errtoken, 'lineno'): 918 lineno = lookahead.lineno 919 else: 920 lineno = 0 921 if lineno: 922 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) 923 else: 924 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) 925 else: 926 sys.stderr.write('yacc: Parse error in input. EOF\n') 927 return 928 929 else: 930 errorcount = error_count 931 932 # case 1: the statestack only has 1 entry on it. If we're in this state, the 933 # entire parse has been rolled back and we're completely hosed. The token is 934 # discarded and we just keep going. 935 936 if len(statestack) <= 1 and lookahead.type != '$end': 937 lookahead = None 938 errtoken = None 939 state = 0 940 # Nuke the pushback stack 941 del lookaheadstack[:] 942 continue 943 944 # case 2: the statestack has a couple of entries on it, but we're 945 # at the end of the file. nuke the top entry and generate an error token 946 947 # Start nuking entries on the stack 948 if lookahead.type == '$end': 949 # Whoa. We're really hosed here. Bail out 950 return 951 952 if lookahead.type != 'error': 953 sym = symstack[-1] 954 if sym.type == 'error': 955 # Hmmm. Error is on top of stack, we'll just nuke input 956 # symbol and continue 957 #--! TRACKING 958 if tracking: 959 sym.endlineno = getattr(lookahead, 'lineno', sym.lineno) 960 sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos) 961 #--! TRACKING 962 lookahead = None 963 continue 964 965 # Create the error symbol for the first time and make it the new lookahead symbol 966 t = YaccSymbol() 967 t.type = 'error' 968 969 if hasattr(lookahead, 'lineno'): 970 t.lineno = t.endlineno = lookahead.lineno 971 if hasattr(lookahead, 'lexpos'): 972 t.lexpos = t.endlexpos = lookahead.lexpos 973 t.value = lookahead 974 lookaheadstack.append(lookahead) 975 lookahead = t 976 else: 977 sym = symstack.pop() 978 #--! TRACKING 979 if tracking: 980 lookahead.lineno = sym.lineno 981 lookahead.lexpos = sym.lexpos 982 #--! TRACKING 983 statestack.pop() 984 state = statestack[-1] 985 986 continue 987 988 # Call an error function here 989 raise RuntimeError('yacc: internal parser error!!!\n') 990 991 #--! parseopt-end 992 993 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 994 # parseopt_notrack(). 995 # 996 # Optimized version of parseopt() with line number tracking removed. 997 # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated 998 # by the ply/ygen.py script. Make changes to the parsedebug() method instead. 999 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1000 1001 def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None): 1002 #--! parseopt-notrack-start 1003 lookahead = None # Current lookahead symbol 1004 lookaheadstack = [] # Stack of lookahead symbols 1005 actions = self.action # Local reference to action table (to avoid lookup on self.) 1006 goto = self.goto # Local reference to goto table (to avoid lookup on self.) 1007 prod = self.productions # Local reference to production list (to avoid lookup on self.) 1008 defaulted_states = self.defaulted_states # Local reference to defaulted states 1009 pslice = YaccProduction(None) # Production object passed to grammar rules 1010 errorcount = 0 # Used during error recovery 1011 1012 1013 # If no lexer was given, we will try to use the lex module 1014 if not lexer: 1015 from . import lex 1016 lexer = lex.lexer 1017 1018 # Set up the lexer and parser objects on pslice 1019 pslice.lexer = lexer 1020 pslice.parser = self 1021 1022 # If input was supplied, pass to lexer 1023 if input is not None: 1024 lexer.input(input) 1025 1026 if tokenfunc is None: 1027 # Tokenize function 1028 get_token = lexer.token 1029 else: 1030 get_token = tokenfunc 1031 1032 # Set the parser() token method (sometimes used in error recovery) 1033 self.token = get_token 1034 1035 # Set up the state and symbol stacks 1036 1037 statestack = [] # Stack of parsing states 1038 self.statestack = statestack 1039 symstack = [] # Stack of grammar symbols 1040 self.symstack = symstack 1041 1042 pslice.stack = symstack # Put in the production 1043 errtoken = None # Err token 1044 1045 # The start state is assumed to be (0,$end) 1046 1047 statestack.append(0) 1048 sym = YaccSymbol() 1049 sym.type = '$end' 1050 symstack.append(sym) 1051 state = 0 1052 while True: 1053 # Get the next symbol on the input. If a lookahead symbol 1054 # is already set, we just use that. Otherwise, we'll pull 1055 # the next token off of the lookaheadstack or from the lexer 1056 1057 1058 if state not in defaulted_states: 1059 if not lookahead: 1060 if not lookaheadstack: 1061 lookahead = get_token() # Get the next token 1062 else: 1063 lookahead = lookaheadstack.pop() 1064 if not lookahead: 1065 lookahead = YaccSymbol() 1066 lookahead.type = '$end' 1067 1068 # Check the action table 1069 ltype = lookahead.type 1070 t = actions[state].get(ltype) 1071 else: 1072 t = defaulted_states[state] 1073 1074 1075 if t is not None: 1076 if t > 0: 1077 # shift a symbol on the stack 1078 statestack.append(t) 1079 state = t 1080 1081 1082 symstack.append(lookahead) 1083 lookahead = None 1084 1085 # Decrease error count on successful shift 1086 if errorcount: 1087 errorcount -= 1 1088 continue 1089 1090 if t < 0: 1091 # reduce a symbol on the stack, emit a production 1092 p = prod[-t] 1093 pname = p.name 1094 plen = p.len 1095 1096 # Get production function 1097 sym = YaccSymbol() 1098 sym.type = pname # Production name 1099 sym.value = None 1100 1101 1102 if plen: 1103 targ = symstack[-plen-1:] 1104 targ[0] = sym 1105 1106 1107 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1108 # The code enclosed in this section is duplicated 1109 # below as a performance optimization. Make sure 1110 # changes get made in both locations. 1111 1112 pslice.slice = targ 1113 1114 try: 1115 # Call the grammar rule with our special slice object 1116 del symstack[-plen:] 1117 self.state = state 1118 p.callable(pslice) 1119 del statestack[-plen:] 1120 symstack.append(sym) 1121 state = goto[statestack[-1]][pname] 1122 statestack.append(state) 1123 except SyntaxError: 1124 # If an error was set. Enter error recovery state 1125 lookaheadstack.append(lookahead) # Save the current lookahead token 1126 symstack.extend(targ[1:-1]) # Put the production slice back on the stack 1127 statestack.pop() # Pop back one state (before the reduce) 1128 state = statestack[-1] 1129 sym.type = 'error' 1130 sym.value = 'error' 1131 lookahead = sym 1132 errorcount = error_count 1133 self.errorok = False 1134 1135 continue 1136 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1137 1138 else: 1139 1140 1141 targ = [sym] 1142 1143 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1144 # The code enclosed in this section is duplicated 1145 # above as a performance optimization. Make sure 1146 # changes get made in both locations. 1147 1148 pslice.slice = targ 1149 1150 try: 1151 # Call the grammar rule with our special slice object 1152 self.state = state 1153 p.callable(pslice) 1154 symstack.append(sym) 1155 state = goto[statestack[-1]][pname] 1156 statestack.append(state) 1157 except SyntaxError: 1158 # If an error was set. Enter error recovery state 1159 lookaheadstack.append(lookahead) # Save the current lookahead token 1160 statestack.pop() # Pop back one state (before the reduce) 1161 state = statestack[-1] 1162 sym.type = 'error' 1163 sym.value = 'error' 1164 lookahead = sym 1165 errorcount = error_count 1166 self.errorok = False 1167 1168 continue 1169 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 1170 1171 if t == 0: 1172 n = symstack[-1] 1173 result = getattr(n, 'value', None) 1174 return result 1175 1176 if t is None: 1177 1178 1179 # We have some kind of parsing error here. To handle 1180 # this, we are going to push the current token onto 1181 # the tokenstack and replace it with an 'error' token. 1182 # If there are any synchronization rules, they may 1183 # catch it. 1184 # 1185 # In addition to pushing the error token, we call call 1186 # the user defined p_error() function if this is the 1187 # first syntax error. This function is only called if 1188 # errorcount == 0. 1189 if errorcount == 0 or self.errorok: 1190 errorcount = error_count 1191 self.errorok = False 1192 errtoken = lookahead 1193 if errtoken.type == '$end': 1194 errtoken = None # End of file! 1195 if self.errorfunc: 1196 if errtoken and not hasattr(errtoken, 'lexer'): 1197 errtoken.lexer = lexer 1198 self.state = state 1199 tok = call_errorfunc(self.errorfunc, errtoken, self) 1200 if self.errorok: 1201 # User must have done some kind of panic 1202 # mode recovery on their own. The 1203 # returned token is the next lookahead 1204 lookahead = tok 1205 errtoken = None 1206 continue 1207 else: 1208 if errtoken: 1209 if hasattr(errtoken, 'lineno'): 1210 lineno = lookahead.lineno 1211 else: 1212 lineno = 0 1213 if lineno: 1214 sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type)) 1215 else: 1216 sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type) 1217 else: 1218 sys.stderr.write('yacc: Parse error in input. EOF\n') 1219 return 1220 1221 else: 1222 errorcount = error_count 1223 1224 # case 1: the statestack only has 1 entry on it. If we're in this state, the 1225 # entire parse has been rolled back and we're completely hosed. The token is 1226 # discarded and we just keep going. 1227 1228 if len(statestack) <= 1 and lookahead.type != '$end': 1229 lookahead = None 1230 errtoken = None 1231 state = 0 1232 # Nuke the pushback stack 1233 del lookaheadstack[:] 1234 continue 1235 1236 # case 2: the statestack has a couple of entries on it, but we're 1237 # at the end of the file. nuke the top entry and generate an error token 1238 1239 # Start nuking entries on the stack 1240 if lookahead.type == '$end': 1241 # Whoa. We're really hosed here. Bail out 1242 return 1243 1244 if lookahead.type != 'error': 1245 sym = symstack[-1] 1246 if sym.type == 'error': 1247 # Hmmm. Error is on top of stack, we'll just nuke input 1248 # symbol and continue 1249 lookahead = None 1250 continue 1251 1252 # Create the error symbol for the first time and make it the new lookahead symbol 1253 t = YaccSymbol() 1254 t.type = 'error' 1255 1256 if hasattr(lookahead, 'lineno'): 1257 t.lineno = t.endlineno = lookahead.lineno 1258 if hasattr(lookahead, 'lexpos'): 1259 t.lexpos = t.endlexpos = lookahead.lexpos 1260 t.value = lookahead 1261 lookaheadstack.append(lookahead) 1262 lookahead = t 1263 else: 1264 sym = symstack.pop() 1265 statestack.pop() 1266 state = statestack[-1] 1267 1268 continue 1269 1270 # Call an error function here 1271 raise RuntimeError('yacc: internal parser error!!!\n') 1272 1273 #--! parseopt-notrack-end 1274 1275# ----------------------------------------------------------------------------- 1276# === Grammar Representation === 1277# 1278# The following functions, classes, and variables are used to represent and 1279# manipulate the rules that make up a grammar. 1280# ----------------------------------------------------------------------------- 1281 1282# regex matching identifiers 1283_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$') 1284 1285# ----------------------------------------------------------------------------- 1286# class Production: 1287# 1288# This class stores the raw information about a single production or grammar rule. 1289# A grammar rule refers to a specification such as this: 1290# 1291# expr : expr PLUS term 1292# 1293# Here are the basic attributes defined on all productions 1294# 1295# name - Name of the production. For example 'expr' 1296# prod - A list of symbols on the right side ['expr','PLUS','term'] 1297# prec - Production precedence level 1298# number - Production number. 1299# func - Function that executes on reduce 1300# file - File where production function is defined 1301# lineno - Line number where production function is defined 1302# 1303# The following attributes are defined or optional. 1304# 1305# len - Length of the production (number of symbols on right hand side) 1306# usyms - Set of unique symbols found in the production 1307# ----------------------------------------------------------------------------- 1308 1309class Production(object): 1310 reduced = 0 1311 def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0): 1312 self.name = name 1313 self.prod = tuple(prod) 1314 self.number = number 1315 self.func = func 1316 self.callable = None 1317 self.file = file 1318 self.line = line 1319 self.prec = precedence 1320 1321 # Internal settings used during table construction 1322 1323 self.len = len(self.prod) # Length of the production 1324 1325 # Create a list of unique production symbols used in the production 1326 self.usyms = [] 1327 for s in self.prod: 1328 if s not in self.usyms: 1329 self.usyms.append(s) 1330 1331 # List of all LR items for the production 1332 self.lr_items = [] 1333 self.lr_next = None 1334 1335 # Create a string representation 1336 if self.prod: 1337 self.str = '%s -> %s' % (self.name, ' '.join(self.prod)) 1338 else: 1339 self.str = '%s -> <empty>' % self.name 1340 1341 def __str__(self): 1342 return self.str 1343 1344 def __repr__(self): 1345 return 'Production(' + str(self) + ')' 1346 1347 def __len__(self): 1348 return len(self.prod) 1349 1350 def __nonzero__(self): 1351 return 1 1352 1353 def __getitem__(self, index): 1354 return self.prod[index] 1355 1356 # Return the nth lr_item from the production (or None if at the end) 1357 def lr_item(self, n): 1358 if n > len(self.prod): 1359 return None 1360 p = LRItem(self, n) 1361 # Precompute the list of productions immediately following. 1362 try: 1363 p.lr_after = Prodnames[p.prod[n+1]] 1364 except (IndexError, KeyError): 1365 p.lr_after = [] 1366 try: 1367 p.lr_before = p.prod[n-1] 1368 except IndexError: 1369 p.lr_before = None 1370 return p 1371 1372 # Bind the production function name to a callable 1373 def bind(self, pdict): 1374 if self.func: 1375 self.callable = pdict[self.func] 1376 1377# This class serves as a minimal standin for Production objects when 1378# reading table data from files. It only contains information 1379# actually used by the LR parsing engine, plus some additional 1380# debugging information. 1381class MiniProduction(object): 1382 def __init__(self, str, name, len, func, file, line): 1383 self.name = name 1384 self.len = len 1385 self.func = func 1386 self.callable = None 1387 self.file = file 1388 self.line = line 1389 self.str = str 1390 1391 def __str__(self): 1392 return self.str 1393 1394 def __repr__(self): 1395 return 'MiniProduction(%s)' % self.str 1396 1397 # Bind the production function name to a callable 1398 def bind(self, pdict): 1399 if self.func: 1400 self.callable = pdict[self.func] 1401 1402 1403# ----------------------------------------------------------------------------- 1404# class LRItem 1405# 1406# This class represents a specific stage of parsing a production rule. For 1407# example: 1408# 1409# expr : expr . PLUS term 1410# 1411# In the above, the "." represents the current location of the parse. Here 1412# basic attributes: 1413# 1414# name - Name of the production. For example 'expr' 1415# prod - A list of symbols on the right side ['expr','.', 'PLUS','term'] 1416# number - Production number. 1417# 1418# lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term' 1419# then lr_next refers to 'expr -> expr PLUS . term' 1420# lr_index - LR item index (location of the ".") in the prod list. 1421# lookaheads - LALR lookahead symbols for this item 1422# len - Length of the production (number of symbols on right hand side) 1423# lr_after - List of all productions that immediately follow 1424# lr_before - Grammar symbol immediately before 1425# ----------------------------------------------------------------------------- 1426 1427class LRItem(object): 1428 def __init__(self, p, n): 1429 self.name = p.name 1430 self.prod = list(p.prod) 1431 self.number = p.number 1432 self.lr_index = n 1433 self.lookaheads = {} 1434 self.prod.insert(n, '.') 1435 self.prod = tuple(self.prod) 1436 self.len = len(self.prod) 1437 self.usyms = p.usyms 1438 1439 def __str__(self): 1440 if self.prod: 1441 s = '%s -> %s' % (self.name, ' '.join(self.prod)) 1442 else: 1443 s = '%s -> <empty>' % self.name 1444 return s 1445 1446 def __repr__(self): 1447 return 'LRItem(' + str(self) + ')' 1448 1449# ----------------------------------------------------------------------------- 1450# rightmost_terminal() 1451# 1452# Return the rightmost terminal from a list of symbols. Used in add_production() 1453# ----------------------------------------------------------------------------- 1454def rightmost_terminal(symbols, terminals): 1455 i = len(symbols) - 1 1456 while i >= 0: 1457 if symbols[i] in terminals: 1458 return symbols[i] 1459 i -= 1 1460 return None 1461 1462# ----------------------------------------------------------------------------- 1463# === GRAMMAR CLASS === 1464# 1465# The following class represents the contents of the specified grammar along 1466# with various computed properties such as first sets, follow sets, LR items, etc. 1467# This data is used for critical parts of the table generation process later. 1468# ----------------------------------------------------------------------------- 1469 1470class GrammarError(YaccError): 1471 pass 1472 1473class Grammar(object): 1474 def __init__(self, terminals): 1475 self.Productions = [None] # A list of all of the productions. The first 1476 # entry is always reserved for the purpose of 1477 # building an augmented grammar 1478 1479 self.Prodnames = {} # A dictionary mapping the names of nonterminals to a list of all 1480 # productions of that nonterminal. 1481 1482 self.Prodmap = {} # A dictionary that is only used to detect duplicate 1483 # productions. 1484 1485 self.Terminals = {} # A dictionary mapping the names of terminal symbols to a 1486 # list of the rules where they are used. 1487 1488 for term in terminals: 1489 self.Terminals[term] = [] 1490 1491 self.Terminals['error'] = [] 1492 1493 self.Nonterminals = {} # A dictionary mapping names of nonterminals to a list 1494 # of rule numbers where they are used. 1495 1496 self.First = {} # A dictionary of precomputed FIRST(x) symbols 1497 1498 self.Follow = {} # A dictionary of precomputed FOLLOW(x) symbols 1499 1500 self.Precedence = {} # Precedence rules for each terminal. Contains tuples of the 1501 # form ('right',level) or ('nonassoc', level) or ('left',level) 1502 1503 self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer. 1504 # This is only used to provide error checking and to generate 1505 # a warning about unused precedence rules. 1506 1507 self.Start = None # Starting symbol for the grammar 1508 1509 1510 def __len__(self): 1511 return len(self.Productions) 1512 1513 def __getitem__(self, index): 1514 return self.Productions[index] 1515 1516 # ----------------------------------------------------------------------------- 1517 # set_precedence() 1518 # 1519 # Sets the precedence for a given terminal. assoc is the associativity such as 1520 # 'left','right', or 'nonassoc'. level is a numeric level. 1521 # 1522 # ----------------------------------------------------------------------------- 1523 1524 def set_precedence(self, term, assoc, level): 1525 assert self.Productions == [None], 'Must call set_precedence() before add_production()' 1526 if term in self.Precedence: 1527 raise GrammarError('Precedence already specified for terminal %r' % term) 1528 if assoc not in ['left', 'right', 'nonassoc']: 1529 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'") 1530 self.Precedence[term] = (assoc, level) 1531 1532 # ----------------------------------------------------------------------------- 1533 # add_production() 1534 # 1535 # Given an action function, this function assembles a production rule and 1536 # computes its precedence level. 1537 # 1538 # The production rule is supplied as a list of symbols. For example, 1539 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and 1540 # symbols ['expr','PLUS','term']. 1541 # 1542 # Precedence is determined by the precedence of the right-most non-terminal 1543 # or the precedence of a terminal specified by %prec. 1544 # 1545 # A variety of error checks are performed to make sure production symbols 1546 # are valid and that %prec is used correctly. 1547 # ----------------------------------------------------------------------------- 1548 1549 def add_production(self, prodname, syms, func=None, file='', line=0): 1550 1551 if prodname in self.Terminals: 1552 raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname)) 1553 if prodname == 'error': 1554 raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname)) 1555 if not _is_identifier.match(prodname): 1556 raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname)) 1557 1558 # Look for literal tokens 1559 for n, s in enumerate(syms): 1560 if s[0] in "'\"": 1561 try: 1562 c = eval(s) 1563 if (len(c) > 1): 1564 raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' % 1565 (file, line, s, prodname)) 1566 if c not in self.Terminals: 1567 self.Terminals[c] = [] 1568 syms[n] = c 1569 continue 1570 except SyntaxError: 1571 pass 1572 if not _is_identifier.match(s) and s != '%prec': 1573 raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname)) 1574 1575 # Determine the precedence level 1576 if '%prec' in syms: 1577 if syms[-1] == '%prec': 1578 raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line)) 1579 if syms[-2] != '%prec': 1580 raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' % 1581 (file, line)) 1582 precname = syms[-1] 1583 prodprec = self.Precedence.get(precname) 1584 if not prodprec: 1585 raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname)) 1586 else: 1587 self.UsedPrecedence.add(precname) 1588 del syms[-2:] # Drop %prec from the rule 1589 else: 1590 # If no %prec, precedence is determined by the rightmost terminal symbol 1591 precname = rightmost_terminal(syms, self.Terminals) 1592 prodprec = self.Precedence.get(precname, ('right', 0)) 1593 1594 # See if the rule is already in the rulemap 1595 map = '%s -> %s' % (prodname, syms) 1596 if map in self.Prodmap: 1597 m = self.Prodmap[map] 1598 raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) + 1599 'Previous definition at %s:%d' % (m.file, m.line)) 1600 1601 # From this point on, everything is valid. Create a new Production instance 1602 pnumber = len(self.Productions) 1603 if prodname not in self.Nonterminals: 1604 self.Nonterminals[prodname] = [] 1605 1606 # Add the production number to Terminals and Nonterminals 1607 for t in syms: 1608 if t in self.Terminals: 1609 self.Terminals[t].append(pnumber) 1610 else: 1611 if t not in self.Nonterminals: 1612 self.Nonterminals[t] = [] 1613 self.Nonterminals[t].append(pnumber) 1614 1615 # Create a production and add it to the list of productions 1616 p = Production(pnumber, prodname, syms, prodprec, func, file, line) 1617 self.Productions.append(p) 1618 self.Prodmap[map] = p 1619 1620 # Add to the global productions list 1621 try: 1622 self.Prodnames[prodname].append(p) 1623 except KeyError: 1624 self.Prodnames[prodname] = [p] 1625 1626 # ----------------------------------------------------------------------------- 1627 # set_start() 1628 # 1629 # Sets the starting symbol and creates the augmented grammar. Production 1630 # rule 0 is S' -> start where start is the start symbol. 1631 # ----------------------------------------------------------------------------- 1632 1633 def set_start(self, start=None): 1634 if not start: 1635 start = self.Productions[1].name 1636 if start not in self.Nonterminals: 1637 raise GrammarError('start symbol %s undefined' % start) 1638 self.Productions[0] = Production(0, "S'", [start]) 1639 self.Nonterminals[start].append(0) 1640 self.Start = start 1641 1642 # ----------------------------------------------------------------------------- 1643 # find_unreachable() 1644 # 1645 # Find all of the nonterminal symbols that can't be reached from the starting 1646 # symbol. Returns a list of nonterminals that can't be reached. 1647 # ----------------------------------------------------------------------------- 1648 1649 def find_unreachable(self): 1650 1651 # Mark all symbols that are reachable from a symbol s 1652 def mark_reachable_from(s): 1653 if s in reachable: 1654 return 1655 reachable.add(s) 1656 for p in self.Prodnames.get(s, []): 1657 for r in p.prod: 1658 mark_reachable_from(r) 1659 1660 reachable = set() 1661 mark_reachable_from(self.Productions[0].prod[0]) 1662 return [s for s in self.Nonterminals if s not in reachable] 1663 1664 # ----------------------------------------------------------------------------- 1665 # infinite_cycles() 1666 # 1667 # This function looks at the various parsing rules and tries to detect 1668 # infinite recursion cycles (grammar rules where there is no possible way 1669 # to derive a string of only terminals). 1670 # ----------------------------------------------------------------------------- 1671 1672 def infinite_cycles(self): 1673 terminates = {} 1674 1675 # Terminals: 1676 for t in self.Terminals: 1677 terminates[t] = True 1678 1679 terminates['$end'] = True 1680 1681 # Nonterminals: 1682 1683 # Initialize to false: 1684 for n in self.Nonterminals: 1685 terminates[n] = False 1686 1687 # Then propagate termination until no change: 1688 while True: 1689 some_change = False 1690 for (n, pl) in self.Prodnames.items(): 1691 # Nonterminal n terminates iff any of its productions terminates. 1692 for p in pl: 1693 # Production p terminates iff all of its rhs symbols terminate. 1694 for s in p.prod: 1695 if not terminates[s]: 1696 # The symbol s does not terminate, 1697 # so production p does not terminate. 1698 p_terminates = False 1699 break 1700 else: 1701 # didn't break from the loop, 1702 # so every symbol s terminates 1703 # so production p terminates. 1704 p_terminates = True 1705 1706 if p_terminates: 1707 # symbol n terminates! 1708 if not terminates[n]: 1709 terminates[n] = True 1710 some_change = True 1711 # Don't need to consider any more productions for this n. 1712 break 1713 1714 if not some_change: 1715 break 1716 1717 infinite = [] 1718 for (s, term) in terminates.items(): 1719 if not term: 1720 if s not in self.Prodnames and s not in self.Terminals and s != 'error': 1721 # s is used-but-not-defined, and we've already warned of that, 1722 # so it would be overkill to say that it's also non-terminating. 1723 pass 1724 else: 1725 infinite.append(s) 1726 1727 return infinite 1728 1729 # ----------------------------------------------------------------------------- 1730 # undefined_symbols() 1731 # 1732 # Find all symbols that were used the grammar, but not defined as tokens or 1733 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol 1734 # and prod is the production where the symbol was used. 1735 # ----------------------------------------------------------------------------- 1736 def undefined_symbols(self): 1737 result = [] 1738 for p in self.Productions: 1739 if not p: 1740 continue 1741 1742 for s in p.prod: 1743 if s not in self.Prodnames and s not in self.Terminals and s != 'error': 1744 result.append((s, p)) 1745 return result 1746 1747 # ----------------------------------------------------------------------------- 1748 # unused_terminals() 1749 # 1750 # Find all terminals that were defined, but not used by the grammar. Returns 1751 # a list of all symbols. 1752 # ----------------------------------------------------------------------------- 1753 def unused_terminals(self): 1754 unused_tok = [] 1755 for s, v in self.Terminals.items(): 1756 if s != 'error' and not v: 1757 unused_tok.append(s) 1758 1759 return unused_tok 1760 1761 # ------------------------------------------------------------------------------ 1762 # unused_rules() 1763 # 1764 # Find all grammar rules that were defined, but not used (maybe not reachable) 1765 # Returns a list of productions. 1766 # ------------------------------------------------------------------------------ 1767 1768 def unused_rules(self): 1769 unused_prod = [] 1770 for s, v in self.Nonterminals.items(): 1771 if not v: 1772 p = self.Prodnames[s][0] 1773 unused_prod.append(p) 1774 return unused_prod 1775 1776 # ----------------------------------------------------------------------------- 1777 # unused_precedence() 1778 # 1779 # Returns a list of tuples (term,precedence) corresponding to precedence 1780 # rules that were never used by the grammar. term is the name of the terminal 1781 # on which precedence was applied and precedence is a string such as 'left' or 1782 # 'right' corresponding to the type of precedence. 1783 # ----------------------------------------------------------------------------- 1784 1785 def unused_precedence(self): 1786 unused = [] 1787 for termname in self.Precedence: 1788 if not (termname in self.Terminals or termname in self.UsedPrecedence): 1789 unused.append((termname, self.Precedence[termname][0])) 1790 1791 return unused 1792 1793 # ------------------------------------------------------------------------- 1794 # _first() 1795 # 1796 # Compute the value of FIRST1(beta) where beta is a tuple of symbols. 1797 # 1798 # During execution of compute_first1, the result may be incomplete. 1799 # Afterward (e.g., when called from compute_follow()), it will be complete. 1800 # ------------------------------------------------------------------------- 1801 def _first(self, beta): 1802 1803 # We are computing First(x1,x2,x3,...,xn) 1804 result = [] 1805 for x in beta: 1806 x_produces_empty = False 1807 1808 # Add all the non-<empty> symbols of First[x] to the result. 1809 for f in self.First[x]: 1810 if f == '<empty>': 1811 x_produces_empty = True 1812 else: 1813 if f not in result: 1814 result.append(f) 1815 1816 if x_produces_empty: 1817 # We have to consider the next x in beta, 1818 # i.e. stay in the loop. 1819 pass 1820 else: 1821 # We don't have to consider any further symbols in beta. 1822 break 1823 else: 1824 # There was no 'break' from the loop, 1825 # so x_produces_empty was true for all x in beta, 1826 # so beta produces empty as well. 1827 result.append('<empty>') 1828 1829 return result 1830 1831 # ------------------------------------------------------------------------- 1832 # compute_first() 1833 # 1834 # Compute the value of FIRST1(X) for all symbols 1835 # ------------------------------------------------------------------------- 1836 def compute_first(self): 1837 if self.First: 1838 return self.First 1839 1840 # Terminals: 1841 for t in self.Terminals: 1842 self.First[t] = [t] 1843 1844 self.First['$end'] = ['$end'] 1845 1846 # Nonterminals: 1847 1848 # Initialize to the empty set: 1849 for n in self.Nonterminals: 1850 self.First[n] = [] 1851 1852 # Then propagate symbols until no change: 1853 while True: 1854 some_change = False 1855 for n in self.Nonterminals: 1856 for p in self.Prodnames[n]: 1857 for f in self._first(p.prod): 1858 if f not in self.First[n]: 1859 self.First[n].append(f) 1860 some_change = True 1861 if not some_change: 1862 break 1863 1864 return self.First 1865 1866 # --------------------------------------------------------------------- 1867 # compute_follow() 1868 # 1869 # Computes all of the follow sets for every non-terminal symbol. The 1870 # follow set is the set of all symbols that might follow a given 1871 # non-terminal. See the Dragon book, 2nd Ed. p. 189. 1872 # --------------------------------------------------------------------- 1873 def compute_follow(self, start=None): 1874 # If already computed, return the result 1875 if self.Follow: 1876 return self.Follow 1877 1878 # If first sets not computed yet, do that first. 1879 if not self.First: 1880 self.compute_first() 1881 1882 # Add '$end' to the follow list of the start symbol 1883 for k in self.Nonterminals: 1884 self.Follow[k] = [] 1885 1886 if not start: 1887 start = self.Productions[1].name 1888 1889 self.Follow[start] = ['$end'] 1890 1891 while True: 1892 didadd = False 1893 for p in self.Productions[1:]: 1894 # Here is the production set 1895 for i, B in enumerate(p.prod): 1896 if B in self.Nonterminals: 1897 # Okay. We got a non-terminal in a production 1898 fst = self._first(p.prod[i+1:]) 1899 hasempty = False 1900 for f in fst: 1901 if f != '<empty>' and f not in self.Follow[B]: 1902 self.Follow[B].append(f) 1903 didadd = True 1904 if f == '<empty>': 1905 hasempty = True 1906 if hasempty or i == (len(p.prod)-1): 1907 # Add elements of follow(a) to follow(b) 1908 for f in self.Follow[p.name]: 1909 if f not in self.Follow[B]: 1910 self.Follow[B].append(f) 1911 didadd = True 1912 if not didadd: 1913 break 1914 return self.Follow 1915 1916 1917 # ----------------------------------------------------------------------------- 1918 # build_lritems() 1919 # 1920 # This function walks the list of productions and builds a complete set of the 1921 # LR items. The LR items are stored in two ways: First, they are uniquely 1922 # numbered and placed in the list _lritems. Second, a linked list of LR items 1923 # is built for each production. For example: 1924 # 1925 # E -> E PLUS E 1926 # 1927 # Creates the list 1928 # 1929 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ] 1930 # ----------------------------------------------------------------------------- 1931 1932 def build_lritems(self): 1933 for p in self.Productions: 1934 lastlri = p 1935 i = 0 1936 lr_items = [] 1937 while True: 1938 if i > len(p): 1939 lri = None 1940 else: 1941 lri = LRItem(p, i) 1942 # Precompute the list of productions immediately following 1943 try: 1944 lri.lr_after = self.Prodnames[lri.prod[i+1]] 1945 except (IndexError, KeyError): 1946 lri.lr_after = [] 1947 try: 1948 lri.lr_before = lri.prod[i-1] 1949 except IndexError: 1950 lri.lr_before = None 1951 1952 lastlri.lr_next = lri 1953 if not lri: 1954 break 1955 lr_items.append(lri) 1956 lastlri = lri 1957 i += 1 1958 p.lr_items = lr_items 1959 1960# ----------------------------------------------------------------------------- 1961# == Class LRTable == 1962# 1963# This basic class represents a basic table of LR parsing information. 1964# Methods for generating the tables are not defined here. They are defined 1965# in the derived class LRGeneratedTable. 1966# ----------------------------------------------------------------------------- 1967 1968class VersionError(YaccError): 1969 pass 1970 1971class LRTable(object): 1972 def __init__(self): 1973 self.lr_action = None 1974 self.lr_goto = None 1975 self.lr_productions = None 1976 self.lr_method = None 1977 1978 def read_table(self, module): 1979 if isinstance(module, types.ModuleType): 1980 parsetab = module 1981 else: 1982 exec('import %s' % module) 1983 parsetab = sys.modules[module] 1984 1985 if parsetab._tabversion != __tabversion__: 1986 raise VersionError('yacc table file version is out of date') 1987 1988 self.lr_action = parsetab._lr_action 1989 self.lr_goto = parsetab._lr_goto 1990 1991 self.lr_productions = [] 1992 for p in parsetab._lr_productions: 1993 self.lr_productions.append(MiniProduction(*p)) 1994 1995 self.lr_method = parsetab._lr_method 1996 return parsetab._lr_signature 1997 1998 def read_pickle(self, filename): 1999 try: 2000 import cPickle as pickle 2001 except ImportError: 2002 import pickle 2003 2004 if not os.path.exists(filename): 2005 raise ImportError 2006 2007 in_f = open(filename, 'rb') 2008 2009 tabversion = pickle.load(in_f) 2010 if tabversion != __tabversion__: 2011 raise VersionError('yacc table file version is out of date') 2012 self.lr_method = pickle.load(in_f) 2013 signature = pickle.load(in_f) 2014 self.lr_action = pickle.load(in_f) 2015 self.lr_goto = pickle.load(in_f) 2016 productions = pickle.load(in_f) 2017 2018 self.lr_productions = [] 2019 for p in productions: 2020 self.lr_productions.append(MiniProduction(*p)) 2021 2022 in_f.close() 2023 return signature 2024 2025 # Bind all production function names to callable objects in pdict 2026 def bind_callables(self, pdict): 2027 for p in self.lr_productions: 2028 p.bind(pdict) 2029 2030 2031# ----------------------------------------------------------------------------- 2032# === LR Generator === 2033# 2034# The following classes and functions are used to generate LR parsing tables on 2035# a grammar. 2036# ----------------------------------------------------------------------------- 2037 2038# ----------------------------------------------------------------------------- 2039# digraph() 2040# traverse() 2041# 2042# The following two functions are used to compute set valued functions 2043# of the form: 2044# 2045# F(x) = F'(x) U U{F(y) | x R y} 2046# 2047# This is used to compute the values of Read() sets as well as FOLLOW sets 2048# in LALR(1) generation. 2049# 2050# Inputs: X - An input set 2051# R - A relation 2052# FP - Set-valued function 2053# ------------------------------------------------------------------------------ 2054 2055def digraph(X, R, FP): 2056 N = {} 2057 for x in X: 2058 N[x] = 0 2059 stack = [] 2060 F = {} 2061 for x in X: 2062 if N[x] == 0: 2063 traverse(x, N, stack, F, X, R, FP) 2064 return F 2065 2066def traverse(x, N, stack, F, X, R, FP): 2067 stack.append(x) 2068 d = len(stack) 2069 N[x] = d 2070 F[x] = FP(x) # F(X) <- F'(x) 2071 2072 rel = R(x) # Get y's related to x 2073 for y in rel: 2074 if N[y] == 0: 2075 traverse(y, N, stack, F, X, R, FP) 2076 N[x] = min(N[x], N[y]) 2077 for a in F.get(y, []): 2078 if a not in F[x]: 2079 F[x].append(a) 2080 if N[x] == d: 2081 N[stack[-1]] = MAXINT 2082 F[stack[-1]] = F[x] 2083 element = stack.pop() 2084 while element != x: 2085 N[stack[-1]] = MAXINT 2086 F[stack[-1]] = F[x] 2087 element = stack.pop() 2088 2089class LALRError(YaccError): 2090 pass 2091 2092# ----------------------------------------------------------------------------- 2093# == LRGeneratedTable == 2094# 2095# This class implements the LR table generation algorithm. There are no 2096# public methods except for write() 2097# ----------------------------------------------------------------------------- 2098 2099class LRGeneratedTable(LRTable): 2100 def __init__(self, grammar, method='LALR', log=None): 2101 if method not in ['SLR', 'LALR']: 2102 raise LALRError('Unsupported method %s' % method) 2103 2104 self.grammar = grammar 2105 self.lr_method = method 2106 2107 # Set up the logger 2108 if not log: 2109 log = NullLogger() 2110 self.log = log 2111 2112 # Internal attributes 2113 self.lr_action = {} # Action table 2114 self.lr_goto = {} # Goto table 2115 self.lr_productions = grammar.Productions # Copy of grammar Production array 2116 self.lr_goto_cache = {} # Cache of computed gotos 2117 self.lr0_cidhash = {} # Cache of closures 2118 2119 self._add_count = 0 # Internal counter used to detect cycles 2120 2121 # Diagonistic information filled in by the table generator 2122 self.sr_conflict = 0 2123 self.rr_conflict = 0 2124 self.conflicts = [] # List of conflicts 2125 2126 self.sr_conflicts = [] 2127 self.rr_conflicts = [] 2128 2129 # Build the tables 2130 self.grammar.build_lritems() 2131 self.grammar.compute_first() 2132 self.grammar.compute_follow() 2133 self.lr_parse_table() 2134 2135 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items. 2136 2137 def lr0_closure(self, I): 2138 self._add_count += 1 2139 2140 # Add everything in I to J 2141 J = I[:] 2142 didadd = True 2143 while didadd: 2144 didadd = False 2145 for j in J: 2146 for x in j.lr_after: 2147 if getattr(x, 'lr0_added', 0) == self._add_count: 2148 continue 2149 # Add B --> .G to J 2150 J.append(x.lr_next) 2151 x.lr0_added = self._add_count 2152 didadd = True 2153 2154 return J 2155 2156 # Compute the LR(0) goto function goto(I,X) where I is a set 2157 # of LR(0) items and X is a grammar symbol. This function is written 2158 # in a way that guarantees uniqueness of the generated goto sets 2159 # (i.e. the same goto set will never be returned as two different Python 2160 # objects). With uniqueness, we can later do fast set comparisons using 2161 # id(obj) instead of element-wise comparison. 2162 2163 def lr0_goto(self, I, x): 2164 # First we look for a previously cached entry 2165 g = self.lr_goto_cache.get((id(I), x)) 2166 if g: 2167 return g 2168 2169 # Now we generate the goto set in a way that guarantees uniqueness 2170 # of the result 2171 2172 s = self.lr_goto_cache.get(x) 2173 if not s: 2174 s = {} 2175 self.lr_goto_cache[x] = s 2176 2177 gs = [] 2178 for p in I: 2179 n = p.lr_next 2180 if n and n.lr_before == x: 2181 s1 = s.get(id(n)) 2182 if not s1: 2183 s1 = {} 2184 s[id(n)] = s1 2185 gs.append(n) 2186 s = s1 2187 g = s.get('$end') 2188 if not g: 2189 if gs: 2190 g = self.lr0_closure(gs) 2191 s['$end'] = g 2192 else: 2193 s['$end'] = gs 2194 self.lr_goto_cache[(id(I), x)] = g 2195 return g 2196 2197 # Compute the LR(0) sets of item function 2198 def lr0_items(self): 2199 C = [self.lr0_closure([self.grammar.Productions[0].lr_next])] 2200 i = 0 2201 for I in C: 2202 self.lr0_cidhash[id(I)] = i 2203 i += 1 2204 2205 # Loop over the items in C and each grammar symbols 2206 i = 0 2207 while i < len(C): 2208 I = C[i] 2209 i += 1 2210 2211 # Collect all of the symbols that could possibly be in the goto(I,X) sets 2212 asyms = {} 2213 for ii in I: 2214 for s in ii.usyms: 2215 asyms[s] = None 2216 2217 for x in asyms: 2218 g = self.lr0_goto(I, x) 2219 if not g or id(g) in self.lr0_cidhash: 2220 continue 2221 self.lr0_cidhash[id(g)] = len(C) 2222 C.append(g) 2223 2224 return C 2225 2226 # ----------------------------------------------------------------------------- 2227 # ==== LALR(1) Parsing ==== 2228 # 2229 # LALR(1) parsing is almost exactly the same as SLR except that instead of 2230 # relying upon Follow() sets when performing reductions, a more selective 2231 # lookahead set that incorporates the state of the LR(0) machine is utilized. 2232 # Thus, we mainly just have to focus on calculating the lookahead sets. 2233 # 2234 # The method used here is due to DeRemer and Pennelo (1982). 2235 # 2236 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1) 2237 # Lookahead Sets", ACM Transactions on Programming Languages and Systems, 2238 # Vol. 4, No. 4, Oct. 1982, pp. 615-649 2239 # 2240 # Further details can also be found in: 2241 # 2242 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing", 2243 # McGraw-Hill Book Company, (1985). 2244 # 2245 # ----------------------------------------------------------------------------- 2246 2247 # ----------------------------------------------------------------------------- 2248 # compute_nullable_nonterminals() 2249 # 2250 # Creates a dictionary containing all of the non-terminals that might produce 2251 # an empty production. 2252 # ----------------------------------------------------------------------------- 2253 2254 def compute_nullable_nonterminals(self): 2255 nullable = set() 2256 num_nullable = 0 2257 while True: 2258 for p in self.grammar.Productions[1:]: 2259 if p.len == 0: 2260 nullable.add(p.name) 2261 continue 2262 for t in p.prod: 2263 if t not in nullable: 2264 break 2265 else: 2266 nullable.add(p.name) 2267 if len(nullable) == num_nullable: 2268 break 2269 num_nullable = len(nullable) 2270 return nullable 2271 2272 # ----------------------------------------------------------------------------- 2273 # find_nonterminal_trans(C) 2274 # 2275 # Given a set of LR(0) items, this functions finds all of the non-terminal 2276 # transitions. These are transitions in which a dot appears immediately before 2277 # a non-terminal. Returns a list of tuples of the form (state,N) where state 2278 # is the state number and N is the nonterminal symbol. 2279 # 2280 # The input C is the set of LR(0) items. 2281 # ----------------------------------------------------------------------------- 2282 2283 def find_nonterminal_transitions(self, C): 2284 trans = [] 2285 for stateno, state in enumerate(C): 2286 for p in state: 2287 if p.lr_index < p.len - 1: 2288 t = (stateno, p.prod[p.lr_index+1]) 2289 if t[1] in self.grammar.Nonterminals: 2290 if t not in trans: 2291 trans.append(t) 2292 return trans 2293 2294 # ----------------------------------------------------------------------------- 2295 # dr_relation() 2296 # 2297 # Computes the DR(p,A) relationships for non-terminal transitions. The input 2298 # is a tuple (state,N) where state is a number and N is a nonterminal symbol. 2299 # 2300 # Returns a list of terminals. 2301 # ----------------------------------------------------------------------------- 2302 2303 def dr_relation(self, C, trans, nullable): 2304 dr_set = {} 2305 state, N = trans 2306 terms = [] 2307 2308 g = self.lr0_goto(C[state], N) 2309 for p in g: 2310 if p.lr_index < p.len - 1: 2311 a = p.prod[p.lr_index+1] 2312 if a in self.grammar.Terminals: 2313 if a not in terms: 2314 terms.append(a) 2315 2316 # This extra bit is to handle the start state 2317 if state == 0 and N == self.grammar.Productions[0].prod[0]: 2318 terms.append('$end') 2319 2320 return terms 2321 2322 # ----------------------------------------------------------------------------- 2323 # reads_relation() 2324 # 2325 # Computes the READS() relation (p,A) READS (t,C). 2326 # ----------------------------------------------------------------------------- 2327 2328 def reads_relation(self, C, trans, empty): 2329 # Look for empty transitions 2330 rel = [] 2331 state, N = trans 2332 2333 g = self.lr0_goto(C[state], N) 2334 j = self.lr0_cidhash.get(id(g), -1) 2335 for p in g: 2336 if p.lr_index < p.len - 1: 2337 a = p.prod[p.lr_index + 1] 2338 if a in empty: 2339 rel.append((j, a)) 2340 2341 return rel 2342 2343 # ----------------------------------------------------------------------------- 2344 # compute_lookback_includes() 2345 # 2346 # Determines the lookback and includes relations 2347 # 2348 # LOOKBACK: 2349 # 2350 # This relation is determined by running the LR(0) state machine forward. 2351 # For example, starting with a production "N : . A B C", we run it forward 2352 # to obtain "N : A B C ." We then build a relationship between this final 2353 # state and the starting state. These relationships are stored in a dictionary 2354 # lookdict. 2355 # 2356 # INCLUDES: 2357 # 2358 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B). 2359 # 2360 # This relation is used to determine non-terminal transitions that occur 2361 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B) 2362 # if the following holds: 2363 # 2364 # B -> LAT, where T -> epsilon and p' -L-> p 2365 # 2366 # L is essentially a prefix (which may be empty), T is a suffix that must be 2367 # able to derive an empty string. State p' must lead to state p with the string L. 2368 # 2369 # ----------------------------------------------------------------------------- 2370 2371 def compute_lookback_includes(self, C, trans, nullable): 2372 lookdict = {} # Dictionary of lookback relations 2373 includedict = {} # Dictionary of include relations 2374 2375 # Make a dictionary of non-terminal transitions 2376 dtrans = {} 2377 for t in trans: 2378 dtrans[t] = 1 2379 2380 # Loop over all transitions and compute lookbacks and includes 2381 for state, N in trans: 2382 lookb = [] 2383 includes = [] 2384 for p in C[state]: 2385 if p.name != N: 2386 continue 2387 2388 # Okay, we have a name match. We now follow the production all the way 2389 # through the state machine until we get the . on the right hand side 2390 2391 lr_index = p.lr_index 2392 j = state 2393 while lr_index < p.len - 1: 2394 lr_index = lr_index + 1 2395 t = p.prod[lr_index] 2396 2397 # Check to see if this symbol and state are a non-terminal transition 2398 if (j, t) in dtrans: 2399 # Yes. Okay, there is some chance that this is an includes relation 2400 # the only way to know for certain is whether the rest of the 2401 # production derives empty 2402 2403 li = lr_index + 1 2404 while li < p.len: 2405 if p.prod[li] in self.grammar.Terminals: 2406 break # No forget it 2407 if p.prod[li] not in nullable: 2408 break 2409 li = li + 1 2410 else: 2411 # Appears to be a relation between (j,t) and (state,N) 2412 includes.append((j, t)) 2413 2414 g = self.lr0_goto(C[j], t) # Go to next set 2415 j = self.lr0_cidhash.get(id(g), -1) # Go to next state 2416 2417 # When we get here, j is the final state, now we have to locate the production 2418 for r in C[j]: 2419 if r.name != p.name: 2420 continue 2421 if r.len != p.len: 2422 continue 2423 i = 0 2424 # This look is comparing a production ". A B C" with "A B C ." 2425 while i < r.lr_index: 2426 if r.prod[i] != p.prod[i+1]: 2427 break 2428 i = i + 1 2429 else: 2430 lookb.append((j, r)) 2431 for i in includes: 2432 if i not in includedict: 2433 includedict[i] = [] 2434 includedict[i].append((state, N)) 2435 lookdict[(state, N)] = lookb 2436 2437 return lookdict, includedict 2438 2439 # ----------------------------------------------------------------------------- 2440 # compute_read_sets() 2441 # 2442 # Given a set of LR(0) items, this function computes the read sets. 2443 # 2444 # Inputs: C = Set of LR(0) items 2445 # ntrans = Set of nonterminal transitions 2446 # nullable = Set of empty transitions 2447 # 2448 # Returns a set containing the read sets 2449 # ----------------------------------------------------------------------------- 2450 2451 def compute_read_sets(self, C, ntrans, nullable): 2452 FP = lambda x: self.dr_relation(C, x, nullable) 2453 R = lambda x: self.reads_relation(C, x, nullable) 2454 F = digraph(ntrans, R, FP) 2455 return F 2456 2457 # ----------------------------------------------------------------------------- 2458 # compute_follow_sets() 2459 # 2460 # Given a set of LR(0) items, a set of non-terminal transitions, a readset, 2461 # and an include set, this function computes the follow sets 2462 # 2463 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)} 2464 # 2465 # Inputs: 2466 # ntrans = Set of nonterminal transitions 2467 # readsets = Readset (previously computed) 2468 # inclsets = Include sets (previously computed) 2469 # 2470 # Returns a set containing the follow sets 2471 # ----------------------------------------------------------------------------- 2472 2473 def compute_follow_sets(self, ntrans, readsets, inclsets): 2474 FP = lambda x: readsets[x] 2475 R = lambda x: inclsets.get(x, []) 2476 F = digraph(ntrans, R, FP) 2477 return F 2478 2479 # ----------------------------------------------------------------------------- 2480 # add_lookaheads() 2481 # 2482 # Attaches the lookahead symbols to grammar rules. 2483 # 2484 # Inputs: lookbacks - Set of lookback relations 2485 # followset - Computed follow set 2486 # 2487 # This function directly attaches the lookaheads to productions contained 2488 # in the lookbacks set 2489 # ----------------------------------------------------------------------------- 2490 2491 def add_lookaheads(self, lookbacks, followset): 2492 for trans, lb in lookbacks.items(): 2493 # Loop over productions in lookback 2494 for state, p in lb: 2495 if state not in p.lookaheads: 2496 p.lookaheads[state] = [] 2497 f = followset.get(trans, []) 2498 for a in f: 2499 if a not in p.lookaheads[state]: 2500 p.lookaheads[state].append(a) 2501 2502 # ----------------------------------------------------------------------------- 2503 # add_lalr_lookaheads() 2504 # 2505 # This function does all of the work of adding lookahead information for use 2506 # with LALR parsing 2507 # ----------------------------------------------------------------------------- 2508 2509 def add_lalr_lookaheads(self, C): 2510 # Determine all of the nullable nonterminals 2511 nullable = self.compute_nullable_nonterminals() 2512 2513 # Find all non-terminal transitions 2514 trans = self.find_nonterminal_transitions(C) 2515 2516 # Compute read sets 2517 readsets = self.compute_read_sets(C, trans, nullable) 2518 2519 # Compute lookback/includes relations 2520 lookd, included = self.compute_lookback_includes(C, trans, nullable) 2521 2522 # Compute LALR FOLLOW sets 2523 followsets = self.compute_follow_sets(trans, readsets, included) 2524 2525 # Add all of the lookaheads 2526 self.add_lookaheads(lookd, followsets) 2527 2528 # ----------------------------------------------------------------------------- 2529 # lr_parse_table() 2530 # 2531 # This function constructs the parse tables for SLR or LALR 2532 # ----------------------------------------------------------------------------- 2533 def lr_parse_table(self): 2534 Productions = self.grammar.Productions 2535 Precedence = self.grammar.Precedence 2536 goto = self.lr_goto # Goto array 2537 action = self.lr_action # Action array 2538 log = self.log # Logger for output 2539 2540 actionp = {} # Action production array (temporary) 2541 2542 log.info('Parsing method: %s', self.lr_method) 2543 2544 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items 2545 # This determines the number of states 2546 2547 C = self.lr0_items() 2548 2549 if self.lr_method == 'LALR': 2550 self.add_lalr_lookaheads(C) 2551 2552 # Build the parser table, state by state 2553 st = 0 2554 for I in C: 2555 # Loop over each production in I 2556 actlist = [] # List of actions 2557 st_action = {} 2558 st_actionp = {} 2559 st_goto = {} 2560 log.info('') 2561 log.info('state %d', st) 2562 log.info('') 2563 for p in I: 2564 log.info(' (%d) %s', p.number, p) 2565 log.info('') 2566 2567 for p in I: 2568 if p.len == p.lr_index + 1: 2569 if p.name == "S'": 2570 # Start symbol. Accept! 2571 st_action['$end'] = 0 2572 st_actionp['$end'] = p 2573 else: 2574 # We are at the end of a production. Reduce! 2575 if self.lr_method == 'LALR': 2576 laheads = p.lookaheads[st] 2577 else: 2578 laheads = self.grammar.Follow[p.name] 2579 for a in laheads: 2580 actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p))) 2581 r = st_action.get(a) 2582 if r is not None: 2583 # Whoa. Have a shift/reduce or reduce/reduce conflict 2584 if r > 0: 2585 # Need to decide on shift or reduce here 2586 # By default we favor shifting. Need to add 2587 # some precedence rules here. 2588 2589 # Shift precedence comes from the token 2590 sprec, slevel = Precedence.get(a, ('right', 0)) 2591 2592 # Reduce precedence comes from rule being reduced (p) 2593 rprec, rlevel = Productions[p.number].prec 2594 2595 if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')): 2596 # We really need to reduce here. 2597 st_action[a] = -p.number 2598 st_actionp[a] = p 2599 if not slevel and not rlevel: 2600 log.info(' ! shift/reduce conflict for %s resolved as reduce', a) 2601 self.sr_conflicts.append((st, a, 'reduce')) 2602 Productions[p.number].reduced += 1 2603 elif (slevel == rlevel) and (rprec == 'nonassoc'): 2604 st_action[a] = None 2605 else: 2606 # Hmmm. Guess we'll keep the shift 2607 if not rlevel: 2608 log.info(' ! shift/reduce conflict for %s resolved as shift', a) 2609 self.sr_conflicts.append((st, a, 'shift')) 2610 elif r < 0: 2611 # Reduce/reduce conflict. In this case, we favor the rule 2612 # that was defined first in the grammar file 2613 oldp = Productions[-r] 2614 pp = Productions[p.number] 2615 if oldp.line > pp.line: 2616 st_action[a] = -p.number 2617 st_actionp[a] = p 2618 chosenp, rejectp = pp, oldp 2619 Productions[p.number].reduced += 1 2620 Productions[oldp.number].reduced -= 1 2621 else: 2622 chosenp, rejectp = oldp, pp 2623 self.rr_conflicts.append((st, chosenp, rejectp)) 2624 log.info(' ! reduce/reduce conflict for %s resolved using rule %d (%s)', 2625 a, st_actionp[a].number, st_actionp[a]) 2626 else: 2627 raise LALRError('Unknown conflict in state %d' % st) 2628 else: 2629 st_action[a] = -p.number 2630 st_actionp[a] = p 2631 Productions[p.number].reduced += 1 2632 else: 2633 i = p.lr_index 2634 a = p.prod[i+1] # Get symbol right after the "." 2635 if a in self.grammar.Terminals: 2636 g = self.lr0_goto(I, a) 2637 j = self.lr0_cidhash.get(id(g), -1) 2638 if j >= 0: 2639 # We are in a shift state 2640 actlist.append((a, p, 'shift and go to state %d' % j)) 2641 r = st_action.get(a) 2642 if r is not None: 2643 # Whoa have a shift/reduce or shift/shift conflict 2644 if r > 0: 2645 if r != j: 2646 raise LALRError('Shift/shift conflict in state %d' % st) 2647 elif r < 0: 2648 # Do a precedence check. 2649 # - if precedence of reduce rule is higher, we reduce. 2650 # - if precedence of reduce is same and left assoc, we reduce. 2651 # - otherwise we shift 2652 2653 # Shift precedence comes from the token 2654 sprec, slevel = Precedence.get(a, ('right', 0)) 2655 2656 # Reduce precedence comes from the rule that could have been reduced 2657 rprec, rlevel = Productions[st_actionp[a].number].prec 2658 2659 if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')): 2660 # We decide to shift here... highest precedence to shift 2661 Productions[st_actionp[a].number].reduced -= 1 2662 st_action[a] = j 2663 st_actionp[a] = p 2664 if not rlevel: 2665 log.info(' ! shift/reduce conflict for %s resolved as shift', a) 2666 self.sr_conflicts.append((st, a, 'shift')) 2667 elif (slevel == rlevel) and (rprec == 'nonassoc'): 2668 st_action[a] = None 2669 else: 2670 # Hmmm. Guess we'll keep the reduce 2671 if not slevel and not rlevel: 2672 log.info(' ! shift/reduce conflict for %s resolved as reduce', a) 2673 self.sr_conflicts.append((st, a, 'reduce')) 2674 2675 else: 2676 raise LALRError('Unknown conflict in state %d' % st) 2677 else: 2678 st_action[a] = j 2679 st_actionp[a] = p 2680 2681 # Print the actions associated with each terminal 2682 _actprint = {} 2683 for a, p, m in actlist: 2684 if a in st_action: 2685 if p is st_actionp[a]: 2686 log.info(' %-15s %s', a, m) 2687 _actprint[(a, m)] = 1 2688 log.info('') 2689 # Print the actions that were not used. (debugging) 2690 not_used = 0 2691 for a, p, m in actlist: 2692 if a in st_action: 2693 if p is not st_actionp[a]: 2694 if not (a, m) in _actprint: 2695 log.debug(' ! %-15s [ %s ]', a, m) 2696 not_used = 1 2697 _actprint[(a, m)] = 1 2698 if not_used: 2699 log.debug('') 2700 2701 # Construct the goto table for this state 2702 2703 nkeys = {} 2704 for ii in I: 2705 for s in ii.usyms: 2706 if s in self.grammar.Nonterminals: 2707 nkeys[s] = None 2708 for n in nkeys: 2709 g = self.lr0_goto(I, n) 2710 j = self.lr0_cidhash.get(id(g), -1) 2711 if j >= 0: 2712 st_goto[n] = j 2713 log.info(' %-30s shift and go to state %d', n, j) 2714 2715 action[st] = st_action 2716 actionp[st] = st_actionp 2717 goto[st] = st_goto 2718 st += 1 2719 2720 # ----------------------------------------------------------------------------- 2721 # write() 2722 # 2723 # This function writes the LR parsing tables to a file 2724 # ----------------------------------------------------------------------------- 2725 2726 def write_table(self, tabmodule, outputdir='', signature=''): 2727 if isinstance(tabmodule, types.ModuleType): 2728 raise IOError("Won't overwrite existing tabmodule") 2729 2730 basemodulename = tabmodule.split('.')[-1] 2731 filename = os.path.join(outputdir, basemodulename) + '.py' 2732 try: 2733 f = open(filename, 'w') 2734 2735 f.write(''' 2736# %s 2737# This file is automatically generated. Do not edit. 2738_tabversion = %r 2739 2740_lr_method = %r 2741 2742_lr_signature = %r 2743 ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature)) 2744 2745 # Change smaller to 0 to go back to original tables 2746 smaller = 1 2747 2748 # Factor out names to try and make smaller 2749 if smaller: 2750 items = {} 2751 2752 for s, nd in self.lr_action.items(): 2753 for name, v in nd.items(): 2754 i = items.get(name) 2755 if not i: 2756 i = ([], []) 2757 items[name] = i 2758 i[0].append(s) 2759 i[1].append(v) 2760 2761 f.write('\n_lr_action_items = {') 2762 for k, v in items.items(): 2763 f.write('%r:([' % k) 2764 for i in v[0]: 2765 f.write('%r,' % i) 2766 f.write('],[') 2767 for i in v[1]: 2768 f.write('%r,' % i) 2769 2770 f.write(']),') 2771 f.write('}\n') 2772 2773 f.write(''' 2774_lr_action = {} 2775for _k, _v in _lr_action_items.items(): 2776 for _x,_y in zip(_v[0],_v[1]): 2777 if not _x in _lr_action: _lr_action[_x] = {} 2778 _lr_action[_x][_k] = _y 2779del _lr_action_items 2780''') 2781 2782 else: 2783 f.write('\n_lr_action = { ') 2784 for k, v in self.lr_action.items(): 2785 f.write('(%r,%r):%r,' % (k[0], k[1], v)) 2786 f.write('}\n') 2787 2788 if smaller: 2789 # Factor out names to try and make smaller 2790 items = {} 2791 2792 for s, nd in self.lr_goto.items(): 2793 for name, v in nd.items(): 2794 i = items.get(name) 2795 if not i: 2796 i = ([], []) 2797 items[name] = i 2798 i[0].append(s) 2799 i[1].append(v) 2800 2801 f.write('\n_lr_goto_items = {') 2802 for k, v in items.items(): 2803 f.write('%r:([' % k) 2804 for i in v[0]: 2805 f.write('%r,' % i) 2806 f.write('],[') 2807 for i in v[1]: 2808 f.write('%r,' % i) 2809 2810 f.write(']),') 2811 f.write('}\n') 2812 2813 f.write(''' 2814_lr_goto = {} 2815for _k, _v in _lr_goto_items.items(): 2816 for _x, _y in zip(_v[0], _v[1]): 2817 if not _x in _lr_goto: _lr_goto[_x] = {} 2818 _lr_goto[_x][_k] = _y 2819del _lr_goto_items 2820''') 2821 else: 2822 f.write('\n_lr_goto = { ') 2823 for k, v in self.lr_goto.items(): 2824 f.write('(%r,%r):%r,' % (k[0], k[1], v)) 2825 f.write('}\n') 2826 2827 # Write production table 2828 f.write('_lr_productions = [\n') 2829 for p in self.lr_productions: 2830 if p.func: 2831 f.write(' (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len, 2832 p.func, os.path.basename(p.file), p.line)) 2833 else: 2834 f.write(' (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len)) 2835 f.write(']\n') 2836 f.close() 2837 2838 except IOError as e: 2839 raise 2840 2841 2842 # ----------------------------------------------------------------------------- 2843 # pickle_table() 2844 # 2845 # This function pickles the LR parsing tables to a supplied file object 2846 # ----------------------------------------------------------------------------- 2847 2848 def pickle_table(self, filename, signature=''): 2849 try: 2850 import cPickle as pickle 2851 except ImportError: 2852 import pickle 2853 with open(filename, 'wb') as outf: 2854 pickle.dump(__tabversion__, outf, pickle_protocol) 2855 pickle.dump(self.lr_method, outf, pickle_protocol) 2856 pickle.dump(signature, outf, pickle_protocol) 2857 pickle.dump(self.lr_action, outf, pickle_protocol) 2858 pickle.dump(self.lr_goto, outf, pickle_protocol) 2859 2860 outp = [] 2861 for p in self.lr_productions: 2862 if p.func: 2863 outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line)) 2864 else: 2865 outp.append((str(p), p.name, p.len, None, None, None)) 2866 pickle.dump(outp, outf, pickle_protocol) 2867 2868# ----------------------------------------------------------------------------- 2869# === INTROSPECTION === 2870# 2871# The following functions and classes are used to implement the PLY 2872# introspection features followed by the yacc() function itself. 2873# ----------------------------------------------------------------------------- 2874 2875# ----------------------------------------------------------------------------- 2876# get_caller_module_dict() 2877# 2878# This function returns a dictionary containing all of the symbols defined within 2879# a caller further down the call stack. This is used to get the environment 2880# associated with the yacc() call if none was provided. 2881# ----------------------------------------------------------------------------- 2882 2883def get_caller_module_dict(levels): 2884 f = sys._getframe(levels) 2885 ldict = f.f_globals.copy() 2886 if f.f_globals != f.f_locals: 2887 ldict.update(f.f_locals) 2888 return ldict 2889 2890# ----------------------------------------------------------------------------- 2891# parse_grammar() 2892# 2893# This takes a raw grammar rule string and parses it into production data 2894# ----------------------------------------------------------------------------- 2895def parse_grammar(doc, file, line): 2896 grammar = [] 2897 # Split the doc string into lines 2898 pstrings = doc.splitlines() 2899 lastp = None 2900 dline = line 2901 for ps in pstrings: 2902 dline += 1 2903 p = ps.split() 2904 if not p: 2905 continue 2906 try: 2907 if p[0] == '|': 2908 # This is a continuation of a previous rule 2909 if not lastp: 2910 raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline)) 2911 prodname = lastp 2912 syms = p[1:] 2913 else: 2914 prodname = p[0] 2915 lastp = prodname 2916 syms = p[2:] 2917 assign = p[1] 2918 if assign != ':' and assign != '::=': 2919 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline)) 2920 2921 grammar.append((file, dline, prodname, syms)) 2922 except SyntaxError: 2923 raise 2924 except Exception: 2925 raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip())) 2926 2927 return grammar 2928 2929# ----------------------------------------------------------------------------- 2930# ParserReflect() 2931# 2932# This class represents information extracted for building a parser including 2933# start symbol, error function, tokens, precedence list, action functions, 2934# etc. 2935# ----------------------------------------------------------------------------- 2936class ParserReflect(object): 2937 def __init__(self, pdict, log=None): 2938 self.pdict = pdict 2939 self.start = None 2940 self.error_func = None 2941 self.tokens = None 2942 self.modules = set() 2943 self.grammar = [] 2944 self.error = False 2945 2946 if log is None: 2947 self.log = PlyLogger(sys.stderr) 2948 else: 2949 self.log = log 2950 2951 # Get all of the basic information 2952 def get_all(self): 2953 self.get_start() 2954 self.get_error_func() 2955 self.get_tokens() 2956 self.get_precedence() 2957 self.get_pfunctions() 2958 2959 # Validate all of the information 2960 def validate_all(self): 2961 self.validate_start() 2962 self.validate_error_func() 2963 self.validate_tokens() 2964 self.validate_precedence() 2965 self.validate_pfunctions() 2966 self.validate_modules() 2967 return self.error 2968 2969 # Compute a signature over the grammar 2970 def signature(self): 2971 parts = [] 2972 try: 2973 if self.start: 2974 parts.append(self.start) 2975 if self.prec: 2976 parts.append(''.join([''.join(p) for p in self.prec])) 2977 if self.tokens: 2978 parts.append(' '.join(self.tokens)) 2979 for f in self.pfuncs: 2980 if f[3]: 2981 parts.append(f[3]) 2982 except (TypeError, ValueError): 2983 pass 2984 return ''.join(parts) 2985 2986 # ----------------------------------------------------------------------------- 2987 # validate_modules() 2988 # 2989 # This method checks to see if there are duplicated p_rulename() functions 2990 # in the parser module file. Without this function, it is really easy for 2991 # users to make mistakes by cutting and pasting code fragments (and it's a real 2992 # bugger to try and figure out why the resulting parser doesn't work). Therefore, 2993 # we just do a little regular expression pattern matching of def statements 2994 # to try and detect duplicates. 2995 # ----------------------------------------------------------------------------- 2996 2997 def validate_modules(self): 2998 # Match def p_funcname( 2999 fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(') 3000 3001 for module in self.modules: 3002 try: 3003 lines, linen = inspect.getsourcelines(module) 3004 except IOError: 3005 continue 3006 3007 counthash = {} 3008 for linen, line in enumerate(lines): 3009 linen += 1 3010 m = fre.match(line) 3011 if m: 3012 name = m.group(1) 3013 prev = counthash.get(name) 3014 if not prev: 3015 counthash[name] = linen 3016 else: 3017 filename = inspect.getsourcefile(module) 3018 self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d', 3019 filename, linen, name, prev) 3020 3021 # Get the start symbol 3022 def get_start(self): 3023 self.start = self.pdict.get('start') 3024 3025 # Validate the start symbol 3026 def validate_start(self): 3027 if self.start is not None: 3028 if not isinstance(self.start, string_types): 3029 self.log.error("'start' must be a string") 3030 3031 # Look for error handler 3032 def get_error_func(self): 3033 self.error_func = self.pdict.get('p_error') 3034 3035 # Validate the error function 3036 def validate_error_func(self): 3037 if self.error_func: 3038 if isinstance(self.error_func, types.FunctionType): 3039 ismethod = 0 3040 elif isinstance(self.error_func, types.MethodType): 3041 ismethod = 1 3042 else: 3043 self.log.error("'p_error' defined, but is not a function or method") 3044 self.error = True 3045 return 3046 3047 eline = self.error_func.__code__.co_firstlineno 3048 efile = self.error_func.__code__.co_filename 3049 module = inspect.getmodule(self.error_func) 3050 self.modules.add(module) 3051 3052 argcount = self.error_func.__code__.co_argcount - ismethod 3053 if argcount != 1: 3054 self.log.error('%s:%d: p_error() requires 1 argument', efile, eline) 3055 self.error = True 3056 3057 # Get the tokens map 3058 def get_tokens(self): 3059 tokens = self.pdict.get('tokens') 3060 if not tokens: 3061 self.log.error('No token list is defined') 3062 self.error = True 3063 return 3064 3065 if not isinstance(tokens, (list, tuple)): 3066 self.log.error('tokens must be a list or tuple') 3067 self.error = True 3068 return 3069 3070 if not tokens: 3071 self.log.error('tokens is empty') 3072 self.error = True 3073 return 3074 3075 self.tokens = tokens 3076 3077 # Validate the tokens 3078 def validate_tokens(self): 3079 # Validate the tokens. 3080 if 'error' in self.tokens: 3081 self.log.error("Illegal token name 'error'. Is a reserved word") 3082 self.error = True 3083 return 3084 3085 terminals = set() 3086 for n in self.tokens: 3087 if n in terminals: 3088 self.log.warning('Token %r multiply defined', n) 3089 terminals.add(n) 3090 3091 # Get the precedence map (if any) 3092 def get_precedence(self): 3093 self.prec = self.pdict.get('precedence') 3094 3095 # Validate and parse the precedence map 3096 def validate_precedence(self): 3097 preclist = [] 3098 if self.prec: 3099 if not isinstance(self.prec, (list, tuple)): 3100 self.log.error('precedence must be a list or tuple') 3101 self.error = True 3102 return 3103 for level, p in enumerate(self.prec): 3104 if not isinstance(p, (list, tuple)): 3105 self.log.error('Bad precedence table') 3106 self.error = True 3107 return 3108 3109 if len(p) < 2: 3110 self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p) 3111 self.error = True 3112 return 3113 assoc = p[0] 3114 if not isinstance(assoc, string_types): 3115 self.log.error('precedence associativity must be a string') 3116 self.error = True 3117 return 3118 for term in p[1:]: 3119 if not isinstance(term, string_types): 3120 self.log.error('precedence items must be strings') 3121 self.error = True 3122 return 3123 preclist.append((term, assoc, level+1)) 3124 self.preclist = preclist 3125 3126 # Get all p_functions from the grammar 3127 def get_pfunctions(self): 3128 p_functions = [] 3129 for name, item in self.pdict.items(): 3130 if not name.startswith('p_') or name == 'p_error': 3131 continue 3132 if isinstance(item, (types.FunctionType, types.MethodType)): 3133 line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno) 3134 module = inspect.getmodule(item) 3135 p_functions.append((line, module, name, item.__doc__)) 3136 3137 # Sort all of the actions by line number; make sure to stringify 3138 # modules to make them sortable, since `line` may not uniquely sort all 3139 # p functions 3140 p_functions.sort(key=lambda p_function: ( 3141 p_function[0], 3142 str(p_function[1]), 3143 p_function[2], 3144 p_function[3])) 3145 self.pfuncs = p_functions 3146 3147 # Validate all of the p_functions 3148 def validate_pfunctions(self): 3149 grammar = [] 3150 # Check for non-empty symbols 3151 if len(self.pfuncs) == 0: 3152 self.log.error('no rules of the form p_rulename are defined') 3153 self.error = True 3154 return 3155 3156 for line, module, name, doc in self.pfuncs: 3157 file = inspect.getsourcefile(module) 3158 func = self.pdict[name] 3159 if isinstance(func, types.MethodType): 3160 reqargs = 2 3161 else: 3162 reqargs = 1 3163 if func.__code__.co_argcount > reqargs: 3164 self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__) 3165 self.error = True 3166 elif func.__code__.co_argcount < reqargs: 3167 self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__) 3168 self.error = True 3169 elif not func.__doc__: 3170 self.log.warning('%s:%d: No documentation string specified in function %r (ignored)', 3171 file, line, func.__name__) 3172 else: 3173 try: 3174 parsed_g = parse_grammar(doc, file, line) 3175 for g in parsed_g: 3176 grammar.append((name, g)) 3177 except SyntaxError as e: 3178 self.log.error(str(e)) 3179 self.error = True 3180 3181 # Looks like a valid grammar rule 3182 # Mark the file in which defined. 3183 self.modules.add(module) 3184 3185 # Secondary validation step that looks for p_ definitions that are not functions 3186 # or functions that look like they might be grammar rules. 3187 3188 for n, v in self.pdict.items(): 3189 if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)): 3190 continue 3191 if n.startswith('t_'): 3192 continue 3193 if n.startswith('p_') and n != 'p_error': 3194 self.log.warning('%r not defined as a function', n) 3195 if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or 3196 (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)): 3197 if v.__doc__: 3198 try: 3199 doc = v.__doc__.split(' ') 3200 if doc[1] == ':': 3201 self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix', 3202 v.__code__.co_filename, v.__code__.co_firstlineno, n) 3203 except IndexError: 3204 pass 3205 3206 self.grammar = grammar 3207 3208# ----------------------------------------------------------------------------- 3209# yacc(module) 3210# 3211# Build a parser 3212# ----------------------------------------------------------------------------- 3213 3214def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None, 3215 check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file, 3216 outputdir=None, debuglog=None, errorlog=None, picklefile=None): 3217 3218 if tabmodule is None: 3219 tabmodule = tab_module 3220 3221 # Reference to the parsing method of the last built parser 3222 global parse 3223 3224 # If pickling is enabled, table files are not created 3225 if picklefile: 3226 write_tables = 0 3227 3228 if errorlog is None: 3229 errorlog = PlyLogger(sys.stderr) 3230 3231 # Get the module dictionary used for the parser 3232 if module: 3233 _items = [(k, getattr(module, k)) for k in dir(module)] 3234 pdict = dict(_items) 3235 # If no __file__ attribute is available, try to obtain it from the __module__ instead 3236 if '__file__' not in pdict: 3237 pdict['__file__'] = sys.modules[pdict['__module__']].__file__ 3238 else: 3239 pdict = get_caller_module_dict(2) 3240 3241 if outputdir is None: 3242 # If no output directory is set, the location of the output files 3243 # is determined according to the following rules: 3244 # - If tabmodule specifies a package, files go into that package directory 3245 # - Otherwise, files go in the same directory as the specifying module 3246 if isinstance(tabmodule, types.ModuleType): 3247 srcfile = tabmodule.__file__ 3248 else: 3249 if '.' not in tabmodule: 3250 srcfile = pdict['__file__'] 3251 else: 3252 parts = tabmodule.split('.') 3253 pkgname = '.'.join(parts[:-1]) 3254 exec('import %s' % pkgname) 3255 srcfile = getattr(sys.modules[pkgname], '__file__', '') 3256 outputdir = os.path.dirname(srcfile) 3257 3258 # Determine if the module is package of a package or not. 3259 # If so, fix the tabmodule setting so that tables load correctly 3260 pkg = pdict.get('__package__') 3261 if pkg and isinstance(tabmodule, str): 3262 if '.' not in tabmodule: 3263 tabmodule = pkg + '.' + tabmodule 3264 3265 3266 3267 # Set start symbol if it's specified directly using an argument 3268 if start is not None: 3269 pdict['start'] = start 3270 3271 # Collect parser information from the dictionary 3272 pinfo = ParserReflect(pdict, log=errorlog) 3273 pinfo.get_all() 3274 3275 if pinfo.error: 3276 raise YaccError('Unable to build parser') 3277 3278 # Check signature against table files (if any) 3279 signature = pinfo.signature() 3280 3281 # Read the tables 3282 try: 3283 lr = LRTable() 3284 if picklefile: 3285 read_signature = lr.read_pickle(picklefile) 3286 else: 3287 read_signature = lr.read_table(tabmodule) 3288 if optimize or (read_signature == signature): 3289 try: 3290 lr.bind_callables(pinfo.pdict) 3291 parser = LRParser(lr, pinfo.error_func) 3292 parse = parser.parse 3293 return parser 3294 except Exception as e: 3295 errorlog.warning('There was a problem loading the table file: %r', e) 3296 except VersionError as e: 3297 errorlog.warning(str(e)) 3298 except ImportError: 3299 pass 3300 3301 if debuglog is None: 3302 if debug: 3303 try: 3304 debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w')) 3305 except IOError as e: 3306 errorlog.warning("Couldn't open %r. %s" % (debugfile, e)) 3307 debuglog = NullLogger() 3308 else: 3309 debuglog = NullLogger() 3310 3311 debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__) 3312 3313 errors = False 3314 3315 # Validate the parser information 3316 if pinfo.validate_all(): 3317 raise YaccError('Unable to build parser') 3318 3319 if not pinfo.error_func: 3320 errorlog.warning('no p_error() function is defined') 3321 3322 # Create a grammar object 3323 grammar = Grammar(pinfo.tokens) 3324 3325 # Set precedence level for terminals 3326 for term, assoc, level in pinfo.preclist: 3327 try: 3328 grammar.set_precedence(term, assoc, level) 3329 except GrammarError as e: 3330 errorlog.warning('%s', e) 3331 3332 # Add productions to the grammar 3333 for funcname, gram in pinfo.grammar: 3334 file, line, prodname, syms = gram 3335 try: 3336 grammar.add_production(prodname, syms, funcname, file, line) 3337 except GrammarError as e: 3338 errorlog.error('%s', e) 3339 errors = True 3340 3341 # Set the grammar start symbols 3342 try: 3343 if start is None: 3344 grammar.set_start(pinfo.start) 3345 else: 3346 grammar.set_start(start) 3347 except GrammarError as e: 3348 errorlog.error(str(e)) 3349 errors = True 3350 3351 if errors: 3352 raise YaccError('Unable to build parser') 3353 3354 # Verify the grammar structure 3355 undefined_symbols = grammar.undefined_symbols() 3356 for sym, prod in undefined_symbols: 3357 errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym) 3358 errors = True 3359 3360 unused_terminals = grammar.unused_terminals() 3361 if unused_terminals: 3362 debuglog.info('') 3363 debuglog.info('Unused terminals:') 3364 debuglog.info('') 3365 for term in unused_terminals: 3366 errorlog.warning('Token %r defined, but not used', term) 3367 debuglog.info(' %s', term) 3368 3369 # Print out all productions to the debug log 3370 if debug: 3371 debuglog.info('') 3372 debuglog.info('Grammar') 3373 debuglog.info('') 3374 for n, p in enumerate(grammar.Productions): 3375 debuglog.info('Rule %-5d %s', n, p) 3376 3377 # Find unused non-terminals 3378 unused_rules = grammar.unused_rules() 3379 for prod in unused_rules: 3380 errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name) 3381 3382 if len(unused_terminals) == 1: 3383 errorlog.warning('There is 1 unused token') 3384 if len(unused_terminals) > 1: 3385 errorlog.warning('There are %d unused tokens', len(unused_terminals)) 3386 3387 if len(unused_rules) == 1: 3388 errorlog.warning('There is 1 unused rule') 3389 if len(unused_rules) > 1: 3390 errorlog.warning('There are %d unused rules', len(unused_rules)) 3391 3392 if debug: 3393 debuglog.info('') 3394 debuglog.info('Terminals, with rules where they appear') 3395 debuglog.info('') 3396 terms = list(grammar.Terminals) 3397 terms.sort() 3398 for term in terms: 3399 debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]])) 3400 3401 debuglog.info('') 3402 debuglog.info('Nonterminals, with rules where they appear') 3403 debuglog.info('') 3404 nonterms = list(grammar.Nonterminals) 3405 nonterms.sort() 3406 for nonterm in nonterms: 3407 debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]])) 3408 debuglog.info('') 3409 3410 if check_recursion: 3411 unreachable = grammar.find_unreachable() 3412 for u in unreachable: 3413 errorlog.warning('Symbol %r is unreachable', u) 3414 3415 infinite = grammar.infinite_cycles() 3416 for inf in infinite: 3417 errorlog.error('Infinite recursion detected for symbol %r', inf) 3418 errors = True 3419 3420 unused_prec = grammar.unused_precedence() 3421 for term, assoc in unused_prec: 3422 errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term) 3423 errors = True 3424 3425 if errors: 3426 raise YaccError('Unable to build parser') 3427 3428 # Run the LRGeneratedTable on the grammar 3429 if debug: 3430 errorlog.debug('Generating %s tables', method) 3431 3432 lr = LRGeneratedTable(grammar, method, debuglog) 3433 3434 if debug: 3435 num_sr = len(lr.sr_conflicts) 3436 3437 # Report shift/reduce and reduce/reduce conflicts 3438 if num_sr == 1: 3439 errorlog.warning('1 shift/reduce conflict') 3440 elif num_sr > 1: 3441 errorlog.warning('%d shift/reduce conflicts', num_sr) 3442 3443 num_rr = len(lr.rr_conflicts) 3444 if num_rr == 1: 3445 errorlog.warning('1 reduce/reduce conflict') 3446 elif num_rr > 1: 3447 errorlog.warning('%d reduce/reduce conflicts', num_rr) 3448 3449 # Write out conflicts to the output file 3450 if debug and (lr.sr_conflicts or lr.rr_conflicts): 3451 debuglog.warning('') 3452 debuglog.warning('Conflicts:') 3453 debuglog.warning('') 3454 3455 for state, tok, resolution in lr.sr_conflicts: 3456 debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s', tok, state, resolution) 3457 3458 already_reported = set() 3459 for state, rule, rejected in lr.rr_conflicts: 3460 if (state, id(rule), id(rejected)) in already_reported: 3461 continue 3462 debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) 3463 debuglog.warning('rejected rule (%s) in state %d', rejected, state) 3464 errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule) 3465 errorlog.warning('rejected rule (%s) in state %d', rejected, state) 3466 already_reported.add((state, id(rule), id(rejected))) 3467 3468 warned_never = [] 3469 for state, rule, rejected in lr.rr_conflicts: 3470 if not rejected.reduced and (rejected not in warned_never): 3471 debuglog.warning('Rule (%s) is never reduced', rejected) 3472 errorlog.warning('Rule (%s) is never reduced', rejected) 3473 warned_never.append(rejected) 3474 3475 # Write the table file if requested 3476 if write_tables: 3477 try: 3478 lr.write_table(tabmodule, outputdir, signature) 3479 except IOError as e: 3480 errorlog.warning("Couldn't create %r. %s" % (tabmodule, e)) 3481 3482 # Write a pickled version of the tables 3483 if picklefile: 3484 try: 3485 lr.pickle_table(picklefile, signature) 3486 except IOError as e: 3487 errorlog.warning("Couldn't create %r. %s" % (picklefile, e)) 3488 3489 # Build the parser 3490 lr.bind_callables(pinfo.pdict) 3491 parser = LRParser(lr, pinfo.error_func) 3492 3493 parse = parser.parse 3494 return parser 3495