1# -----------------------------------------------------------------------------
2# ply: yacc.py
3#
4# Copyright (C) 2001-2015,
5# David M. Beazley (Dabeaz LLC)
6# All rights reserved.
7#
8# SPDX-License-Identifier: BSD-3-Clause
9# -----------------------------------------------------------------------------
10#
11# This implements an LR parser that is constructed from grammar rules defined
12# as Python functions. The grammer is specified by supplying the BNF inside
13# Python documentation strings.  The inspiration for this technique was borrowed
14# from John Aycock's Spark parsing system.  PLY might be viewed as cross between
15# Spark and the GNU bison utility.
16#
17# The current implementation is only somewhat object-oriented. The
18# LR parser itself is defined in terms of an object (which allows multiple
19# parsers to co-exist).  However, most of the variables used during table
20# construction are defined in terms of global variables.  Users shouldn't
21# notice unless they are trying to define multiple parsers at the same
22# time using threads (in which case they should have their head examined).
23#
24# This implementation supports both SLR and LALR(1) parsing.  LALR(1)
25# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
26# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
27# Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced
28# by the more efficient DeRemer and Pennello algorithm.
29#
30# :::::::: WARNING :::::::
31#
32# Construction of LR parsing tables is fairly complicated and expensive.
33# To make this module run fast, a *LOT* of work has been put into
34# optimization---often at the expensive of readability and what might
35# consider to be good Python "coding style."   Modify the code at your
36# own risk!
37# ----------------------------------------------------------------------------
38
39import re
40import types
41import sys
42import os.path
43import inspect
44import base64
45import warnings
46
47__version__    = '3.8'
48__tabversion__ = '3.8'
49
50#-----------------------------------------------------------------------------
51#                     === User configurable parameters ===
52#
53# Change these to modify the default behavior of yacc (if you wish)
54#-----------------------------------------------------------------------------
55
56yaccdebug   = True             # Debugging mode.  If set, yacc generates a
57                               # a 'parser.out' file in the current directory
58
59debug_file  = 'parser.out'     # Default name of the debugging file
60tab_module  = 'parsetab'       # Default name of the table module
61default_lr  = 'LALR'           # Default LR table generation method
62
63error_count = 3                # Number of symbols that must be shifted to leave recovery mode
64
65yaccdevel   = False            # Set to True if developing yacc.  This turns off optimized
66                               # implementations of certain functions.
67
68resultlimit = 40               # Size limit of results when running in debug mode.
69
70pickle_protocol = 0            # Protocol to use when writing pickle files
71
72# String type-checking compatibility
73if sys.version_info[0] < 3:
74    string_types = basestring
75else:
76    string_types = str
77
78MAXINT = sys.maxsize
79
80# This object is a stand-in for a logging object created by the
81# logging module.   PLY will use this by default to create things
82# such as the parser.out file.  If a user wants more detailed
83# information, they can create their own logging object and pass
84# it into PLY.
85
86class PlyLogger(object):
87    def __init__(self, f):
88        self.f = f
89
90    def debug(self, msg, *args, **kwargs):
91        self.f.write((msg % args) + '\n')
92
93    info = debug
94
95    def warning(self, msg, *args, **kwargs):
96        self.f.write('WARNING: ' + (msg % args) + '\n')
97
98    def error(self, msg, *args, **kwargs):
99        self.f.write('ERROR: ' + (msg % args) + '\n')
100
101    critical = debug
102
103# Null logger is used when no output is generated. Does nothing.
104class NullLogger(object):
105    def __getattribute__(self, name):
106        return self
107
108    def __call__(self, *args, **kwargs):
109        return self
110
111# Exception raised for yacc-related errors
112class YaccError(Exception):
113    pass
114
115# Format the result message that the parser produces when running in debug mode.
116def format_result(r):
117    repr_str = repr(r)
118    if '\n' in repr_str:
119        repr_str = repr(repr_str)
120    if len(repr_str) > resultlimit:
121        repr_str = repr_str[:resultlimit] + ' ...'
122    result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str)
123    return result
124
125# Format stack entries when the parser is running in debug mode
126def format_stack_entry(r):
127    repr_str = repr(r)
128    if '\n' in repr_str:
129        repr_str = repr(repr_str)
130    if len(repr_str) < 16:
131        return repr_str
132    else:
133        return '<%s @ 0x%x>' % (type(r).__name__, id(r))
134
135# Panic mode error recovery support.   This feature is being reworked--much of the
136# code here is to offer a deprecation/backwards compatible transition
137
138_errok = None
139_token = None
140_restart = None
141_warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error().
142Instead, invoke the methods on the associated parser instance:
143
144    def p_error(p):
145        ...
146        # Use parser.errok(), parser.token(), parser.restart()
147        ...
148
149    parser = yacc.yacc()
150'''
151
152def errok():
153    warnings.warn(_warnmsg)
154    return _errok()
155
156def restart():
157    warnings.warn(_warnmsg)
158    return _restart()
159
160def token():
161    warnings.warn(_warnmsg)
162    return _token()
163
164# Utility function to call the p_error() function with some deprecation hacks
165def call_errorfunc(errorfunc, token, parser):
166    global _errok, _token, _restart
167    _errok = parser.errok
168    _token = parser.token
169    _restart = parser.restart
170    r = errorfunc(token)
171    try:
172        del _errok, _token, _restart
173    except NameError:
174        pass
175    return r
176
177#-----------------------------------------------------------------------------
178#                        ===  LR Parsing Engine ===
179#
180# The following classes are used for the LR parser itself.  These are not
181# used during table construction and are independent of the actual LR
182# table generation algorithm
183#-----------------------------------------------------------------------------
184
185# This class is used to hold non-terminal grammar symbols during parsing.
186# It normally has the following attributes set:
187#        .type       = Grammar symbol type
188#        .value      = Symbol value
189#        .lineno     = Starting line number
190#        .endlineno  = Ending line number (optional, set automatically)
191#        .lexpos     = Starting lex position
192#        .endlexpos  = Ending lex position (optional, set automatically)
193
194class YaccSymbol:
195    def __str__(self):
196        return self.type
197
198    def __repr__(self):
199        return str(self)
200
201# This class is a wrapper around the objects actually passed to each
202# grammar rule.   Index lookup and assignment actually assign the
203# .value attribute of the underlying YaccSymbol object.
204# The lineno() method returns the line number of a given
205# item (or 0 if not defined).   The linespan() method returns
206# a tuple of (startline,endline) representing the range of lines
207# for a symbol.  The lexspan() method returns a tuple (lexpos,endlexpos)
208# representing the range of positional information for a symbol.
209
210class YaccProduction:
211    def __init__(self, s, stack=None):
212        self.slice = s
213        self.stack = stack
214        self.lexer = None
215        self.parser = None
216
217    def __getitem__(self, n):
218        if isinstance(n, slice):
219            return [s.value for s in self.slice[n]]
220        elif n >= 0:
221            return self.slice[n].value
222        else:
223            return self.stack[n].value
224
225    def __setitem__(self, n, v):
226        self.slice[n].value = v
227
228    def __getslice__(self, i, j):
229        return [s.value for s in self.slice[i:j]]
230
231    def __len__(self):
232        return len(self.slice)
233
234    def lineno(self, n):
235        return getattr(self.slice[n], 'lineno', 0)
236
237    def set_lineno(self, n, lineno):
238        self.slice[n].lineno = lineno
239
240    def linespan(self, n):
241        startline = getattr(self.slice[n], 'lineno', 0)
242        endline = getattr(self.slice[n], 'endlineno', startline)
243        return startline, endline
244
245    def lexpos(self, n):
246        return getattr(self.slice[n], 'lexpos', 0)
247
248    def lexspan(self, n):
249        startpos = getattr(self.slice[n], 'lexpos', 0)
250        endpos = getattr(self.slice[n], 'endlexpos', startpos)
251        return startpos, endpos
252
253    def error(self):
254        raise SyntaxError
255
256# -----------------------------------------------------------------------------
257#                               == LRParser ==
258#
259# The LR Parsing engine.
260# -----------------------------------------------------------------------------
261
262class LRParser:
263    def __init__(self, lrtab, errorf):
264        self.productions = lrtab.lr_productions
265        self.action = lrtab.lr_action
266        self.goto = lrtab.lr_goto
267        self.errorfunc = errorf
268        self.set_defaulted_states()
269        self.errorok = True
270
271    def errok(self):
272        self.errorok = True
273
274    def restart(self):
275        del self.statestack[:]
276        del self.symstack[:]
277        sym = YaccSymbol()
278        sym.type = '$end'
279        self.symstack.append(sym)
280        self.statestack.append(0)
281
282    # Defaulted state support.
283    # This method identifies parser states where there is only one possible reduction action.
284    # For such states, the parser can make a choose to make a rule reduction without consuming
285    # the next look-ahead token.  This delayed invocation of the tokenizer can be useful in
286    # certain kinds of advanced parsing situations where the lexer and parser interact with
287    # each other or change states (i.e., manipulation of scope, lexer states, etc.).
288    #
289    # See:  https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
290    def set_defaulted_states(self):
291        self.defaulted_states = {}
292        for state, actions in self.action.items():
293            rules = list(actions.values())
294            if len(rules) == 1 and rules[0] < 0:
295                self.defaulted_states[state] = rules[0]
296
297    def disable_defaulted_states(self):
298        self.defaulted_states = {}
299
300    def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
301        if debug or yaccdevel:
302            if isinstance(debug, int):
303                debug = PlyLogger(sys.stderr)
304            return self.parsedebug(input, lexer, debug, tracking, tokenfunc)
305        elif tracking:
306            return self.parseopt(input, lexer, debug, tracking, tokenfunc)
307        else:
308            return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc)
309
310
311    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
312    # parsedebug().
313    #
314    # This is the debugging enabled version of parse().  All changes made to the
315    # parsing engine should be made here.   Optimized versions of this function
316    # are automatically created by the ply/ygen.py script.  This script cuts out
317    # sections enclosed in markers such as this:
318    #
319    #      #--! DEBUG
320    #      statements
321    #      #--! DEBUG
322    #
323    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
324
325    def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
326        #--! parsedebug-start
327        lookahead = None                         # Current lookahead symbol
328        lookaheadstack = []                      # Stack of lookahead symbols
329        actions = self.action                    # Local reference to action table (to avoid lookup on self.)
330        goto    = self.goto                      # Local reference to goto table (to avoid lookup on self.)
331        prod    = self.productions               # Local reference to production list (to avoid lookup on self.)
332        defaulted_states = self.defaulted_states # Local reference to defaulted states
333        pslice  = YaccProduction(None)           # Production object passed to grammar rules
334        errorcount = 0                           # Used during error recovery
335
336        #--! DEBUG
337        debug.info('PLY: PARSE DEBUG START')
338        #--! DEBUG
339
340        # If no lexer was given, we will try to use the lex module
341        if not lexer:
342            from . import lex
343            lexer = lex.lexer
344
345        # Set up the lexer and parser objects on pslice
346        pslice.lexer = lexer
347        pslice.parser = self
348
349        # If input was supplied, pass to lexer
350        if input is not None:
351            lexer.input(input)
352
353        if tokenfunc is None:
354            # Tokenize function
355            get_token = lexer.token
356        else:
357            get_token = tokenfunc
358
359        # Set the parser() token method (sometimes used in error recovery)
360        self.token = get_token
361
362        # Set up the state and symbol stacks
363
364        statestack = []                # Stack of parsing states
365        self.statestack = statestack
366        symstack   = []                # Stack of grammar symbols
367        self.symstack = symstack
368
369        pslice.stack = symstack         # Put in the production
370        errtoken   = None               # Err token
371
372        # The start state is assumed to be (0,$end)
373
374        statestack.append(0)
375        sym = YaccSymbol()
376        sym.type = '$end'
377        symstack.append(sym)
378        state = 0
379        while True:
380            # Get the next symbol on the input.  If a lookahead symbol
381            # is already set, we just use that. Otherwise, we'll pull
382            # the next token off of the lookaheadstack or from the lexer
383
384            #--! DEBUG
385            debug.debug('')
386            debug.debug('State  : %s', state)
387            #--! DEBUG
388
389            if state not in defaulted_states:
390                if not lookahead:
391                    if not lookaheadstack:
392                        lookahead = get_token()     # Get the next token
393                    else:
394                        lookahead = lookaheadstack.pop()
395                    if not lookahead:
396                        lookahead = YaccSymbol()
397                        lookahead.type = '$end'
398
399                # Check the action table
400                ltype = lookahead.type
401                t = actions[state].get(ltype)
402            else:
403                t = defaulted_states[state]
404                #--! DEBUG
405                debug.debug('Defaulted state %s: Reduce using %d', state, -t)
406                #--! DEBUG
407
408            #--! DEBUG
409            debug.debug('Stack  : %s',
410                        ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
411            #--! DEBUG
412
413            if t is not None:
414                if t > 0:
415                    # shift a symbol on the stack
416                    statestack.append(t)
417                    state = t
418
419                    #--! DEBUG
420                    debug.debug('Action : Shift and goto state %s', t)
421                    #--! DEBUG
422
423                    symstack.append(lookahead)
424                    lookahead = None
425
426                    # Decrease error count on successful shift
427                    if errorcount:
428                        errorcount -= 1
429                    continue
430
431                if t < 0:
432                    # reduce a symbol on the stack, emit a production
433                    p = prod[-t]
434                    pname = p.name
435                    plen  = p.len
436
437                    # Get production function
438                    sym = YaccSymbol()
439                    sym.type = pname       # Production name
440                    sym.value = None
441
442                    #--! DEBUG
443                    if plen:
444                        debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
445                                   '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',
446                                   goto[statestack[-1-plen]][pname])
447                    else:
448                        debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],
449                                   goto[statestack[-1]][pname])
450
451                    #--! DEBUG
452
453                    if plen:
454                        targ = symstack[-plen-1:]
455                        targ[0] = sym
456
457                        #--! TRACKING
458                        if tracking:
459                            t1 = targ[1]
460                            sym.lineno = t1.lineno
461                            sym.lexpos = t1.lexpos
462                            t1 = targ[-1]
463                            sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
464                            sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
465                        #--! TRACKING
466
467                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
468                        # The code enclosed in this section is duplicated
469                        # below as a performance optimization.  Make sure
470                        # changes get made in both locations.
471
472                        pslice.slice = targ
473
474                        try:
475                            # Call the grammar rule with our special slice object
476                            del symstack[-plen:]
477                            del statestack[-plen:]
478                            p.callable(pslice)
479                            #--! DEBUG
480                            debug.info('Result : %s', format_result(pslice[0]))
481                            #--! DEBUG
482                            symstack.append(sym)
483                            state = goto[statestack[-1]][pname]
484                            statestack.append(state)
485                        except SyntaxError:
486                            # If an error was set. Enter error recovery state
487                            lookaheadstack.append(lookahead)
488                            symstack.pop()
489                            statestack.pop()
490                            state = statestack[-1]
491                            sym.type = 'error'
492                            lookahead = sym
493                            errorcount = error_count
494                            self.errorok = False
495                        continue
496                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
497
498                    else:
499
500                        #--! TRACKING
501                        if tracking:
502                            sym.lineno = lexer.lineno
503                            sym.lexpos = lexer.lexpos
504                        #--! TRACKING
505
506                        targ = [sym]
507
508                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
509                        # The code enclosed in this section is duplicated
510                        # above as a performance optimization.  Make sure
511                        # changes get made in both locations.
512
513                        pslice.slice = targ
514
515                        try:
516                            # Call the grammar rule with our special slice object
517                            p.callable(pslice)
518                            #--! DEBUG
519                            debug.info('Result : %s', format_result(pslice[0]))
520                            #--! DEBUG
521                            symstack.append(sym)
522                            state = goto[statestack[-1]][pname]
523                            statestack.append(state)
524                        except SyntaxError:
525                            # If an error was set. Enter error recovery state
526                            lookaheadstack.append(lookahead)
527                            symstack.pop()
528                            statestack.pop()
529                            state = statestack[-1]
530                            sym.type = 'error'
531                            lookahead = sym
532                            errorcount = error_count
533                            self.errorok = False
534                        continue
535                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
536
537                if t == 0:
538                    n = symstack[-1]
539                    result = getattr(n, 'value', None)
540                    #--! DEBUG
541                    debug.info('Done   : Returning %s', format_result(result))
542                    debug.info('PLY: PARSE DEBUG END')
543                    #--! DEBUG
544                    return result
545
546            if t is None:
547
548                #--! DEBUG
549                debug.error('Error  : %s',
550                            ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
551                #--! DEBUG
552
553                # We have some kind of parsing error here.  To handle
554                # this, we are going to push the current token onto
555                # the tokenstack and replace it with an 'error' token.
556                # If there are any synchronization rules, they may
557                # catch it.
558                #
559                # In addition to pushing the error token, we call call
560                # the user defined p_error() function if this is the
561                # first syntax error.  This function is only called if
562                # errorcount == 0.
563                if errorcount == 0 or self.errorok:
564                    errorcount = error_count
565                    self.errorok = False
566                    errtoken = lookahead
567                    if errtoken.type == '$end':
568                        errtoken = None               # End of file!
569                    if self.errorfunc:
570                        if errtoken and not hasattr(errtoken, 'lexer'):
571                            errtoken.lexer = lexer
572                        tok = call_errorfunc(self.errorfunc, errtoken, self)
573                        if self.errorok:
574                            # User must have done some kind of panic
575                            # mode recovery on their own.  The
576                            # returned token is the next lookahead
577                            lookahead = tok
578                            errtoken = None
579                            continue
580                    else:
581                        if errtoken:
582                            if hasattr(errtoken, 'lineno'):
583                                lineno = lookahead.lineno
584                            else:
585                                lineno = 0
586                            if lineno:
587                                sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
588                            else:
589                                sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
590                        else:
591                            sys.stderr.write('yacc: Parse error in input. EOF\n')
592                            return
593
594                else:
595                    errorcount = error_count
596
597                # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
598                # entire parse has been rolled back and we're completely hosed.   The token is
599                # discarded and we just keep going.
600
601                if len(statestack) <= 1 and lookahead.type != '$end':
602                    lookahead = None
603                    errtoken = None
604                    state = 0
605                    # Nuke the pushback stack
606                    del lookaheadstack[:]
607                    continue
608
609                # case 2: the statestack has a couple of entries on it, but we're
610                # at the end of the file. nuke the top entry and generate an error token
611
612                # Start nuking entries on the stack
613                if lookahead.type == '$end':
614                    # Whoa. We're really hosed here. Bail out
615                    return
616
617                if lookahead.type != 'error':
618                    sym = symstack[-1]
619                    if sym.type == 'error':
620                        # Hmmm. Error is on top of stack, we'll just nuke input
621                        # symbol and continue
622                        #--! TRACKING
623                        if tracking:
624                            sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
625                            sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
626                        #--! TRACKING
627                        lookahead = None
628                        continue
629
630                    # Create the error symbol for the first time and make it the new lookahead symbol
631                    t = YaccSymbol()
632                    t.type = 'error'
633
634                    if hasattr(lookahead, 'lineno'):
635                        t.lineno = t.endlineno = lookahead.lineno
636                    if hasattr(lookahead, 'lexpos'):
637                        t.lexpos = t.endlexpos = lookahead.lexpos
638                    t.value = lookahead
639                    lookaheadstack.append(lookahead)
640                    lookahead = t
641                else:
642                    sym = symstack.pop()
643                    #--! TRACKING
644                    if tracking:
645                        lookahead.lineno = sym.lineno
646                        lookahead.lexpos = sym.lexpos
647                    #--! TRACKING
648                    statestack.pop()
649                    state = statestack[-1]
650
651                continue
652
653            # Call an error function here
654            raise RuntimeError('yacc: internal parser error!!!\n')
655
656        #--! parsedebug-end
657
658    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
659    # parseopt().
660    #
661    # Optimized version of parse() method.  DO NOT EDIT THIS CODE DIRECTLY!
662    # This code is automatically generated by the ply/ygen.py script. Make
663    # changes to the parsedebug() method instead.
664    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
665
666    def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
667        #--! parseopt-start
668        lookahead = None                         # Current lookahead symbol
669        lookaheadstack = []                      # Stack of lookahead symbols
670        actions = self.action                    # Local reference to action table (to avoid lookup on self.)
671        goto    = self.goto                      # Local reference to goto table (to avoid lookup on self.)
672        prod    = self.productions               # Local reference to production list (to avoid lookup on self.)
673        defaulted_states = self.defaulted_states # Local reference to defaulted states
674        pslice  = YaccProduction(None)           # Production object passed to grammar rules
675        errorcount = 0                           # Used during error recovery
676
677
678        # If no lexer was given, we will try to use the lex module
679        if not lexer:
680            from . import lex
681            lexer = lex.lexer
682
683        # Set up the lexer and parser objects on pslice
684        pslice.lexer = lexer
685        pslice.parser = self
686
687        # If input was supplied, pass to lexer
688        if input is not None:
689            lexer.input(input)
690
691        if tokenfunc is None:
692            # Tokenize function
693            get_token = lexer.token
694        else:
695            get_token = tokenfunc
696
697        # Set the parser() token method (sometimes used in error recovery)
698        self.token = get_token
699
700        # Set up the state and symbol stacks
701
702        statestack = []                # Stack of parsing states
703        self.statestack = statestack
704        symstack   = []                # Stack of grammar symbols
705        self.symstack = symstack
706
707        pslice.stack = symstack         # Put in the production
708        errtoken   = None               # Err token
709
710        # The start state is assumed to be (0,$end)
711
712        statestack.append(0)
713        sym = YaccSymbol()
714        sym.type = '$end'
715        symstack.append(sym)
716        state = 0
717        while True:
718            # Get the next symbol on the input.  If a lookahead symbol
719            # is already set, we just use that. Otherwise, we'll pull
720            # the next token off of the lookaheadstack or from the lexer
721
722
723            if state not in defaulted_states:
724                if not lookahead:
725                    if not lookaheadstack:
726                        lookahead = get_token()     # Get the next token
727                    else:
728                        lookahead = lookaheadstack.pop()
729                    if not lookahead:
730                        lookahead = YaccSymbol()
731                        lookahead.type = '$end'
732
733                # Check the action table
734                ltype = lookahead.type
735                t = actions[state].get(ltype)
736            else:
737                t = defaulted_states[state]
738
739
740            if t is not None:
741                if t > 0:
742                    # shift a symbol on the stack
743                    statestack.append(t)
744                    state = t
745
746
747                    symstack.append(lookahead)
748                    lookahead = None
749
750                    # Decrease error count on successful shift
751                    if errorcount:
752                        errorcount -= 1
753                    continue
754
755                if t < 0:
756                    # reduce a symbol on the stack, emit a production
757                    p = prod[-t]
758                    pname = p.name
759                    plen  = p.len
760
761                    # Get production function
762                    sym = YaccSymbol()
763                    sym.type = pname       # Production name
764                    sym.value = None
765
766
767                    if plen:
768                        targ = symstack[-plen-1:]
769                        targ[0] = sym
770
771                        #--! TRACKING
772                        if tracking:
773                            t1 = targ[1]
774                            sym.lineno = t1.lineno
775                            sym.lexpos = t1.lexpos
776                            t1 = targ[-1]
777                            sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
778                            sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
779                        #--! TRACKING
780
781                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
782                        # The code enclosed in this section is duplicated
783                        # below as a performance optimization.  Make sure
784                        # changes get made in both locations.
785
786                        pslice.slice = targ
787
788                        try:
789                            # Call the grammar rule with our special slice object
790                            del symstack[-plen:]
791                            del statestack[-plen:]
792                            p.callable(pslice)
793                            symstack.append(sym)
794                            state = goto[statestack[-1]][pname]
795                            statestack.append(state)
796                        except SyntaxError:
797                            # If an error was set. Enter error recovery state
798                            lookaheadstack.append(lookahead)
799                            symstack.pop()
800                            statestack.pop()
801                            state = statestack[-1]
802                            sym.type = 'error'
803                            lookahead = sym
804                            errorcount = error_count
805                            self.errorok = False
806                        continue
807                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
808
809                    else:
810
811                        #--! TRACKING
812                        if tracking:
813                            sym.lineno = lexer.lineno
814                            sym.lexpos = lexer.lexpos
815                        #--! TRACKING
816
817                        targ = [sym]
818
819                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
820                        # The code enclosed in this section is duplicated
821                        # above as a performance optimization.  Make sure
822                        # changes get made in both locations.
823
824                        pslice.slice = targ
825
826                        try:
827                            # Call the grammar rule with our special slice object
828                            p.callable(pslice)
829                            symstack.append(sym)
830                            state = goto[statestack[-1]][pname]
831                            statestack.append(state)
832                        except SyntaxError:
833                            # If an error was set. Enter error recovery state
834                            lookaheadstack.append(lookahead)
835                            symstack.pop()
836                            statestack.pop()
837                            state = statestack[-1]
838                            sym.type = 'error'
839                            lookahead = sym
840                            errorcount = error_count
841                            self.errorok = False
842                        continue
843                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
844
845                if t == 0:
846                    n = symstack[-1]
847                    result = getattr(n, 'value', None)
848                    return result
849
850            if t is None:
851
852
853                # We have some kind of parsing error here.  To handle
854                # this, we are going to push the current token onto
855                # the tokenstack and replace it with an 'error' token.
856                # If there are any synchronization rules, they may
857                # catch it.
858                #
859                # In addition to pushing the error token, we call call
860                # the user defined p_error() function if this is the
861                # first syntax error.  This function is only called if
862                # errorcount == 0.
863                if errorcount == 0 or self.errorok:
864                    errorcount = error_count
865                    self.errorok = False
866                    errtoken = lookahead
867                    if errtoken.type == '$end':
868                        errtoken = None               # End of file!
869                    if self.errorfunc:
870                        if errtoken and not hasattr(errtoken, 'lexer'):
871                            errtoken.lexer = lexer
872                        tok = call_errorfunc(self.errorfunc, errtoken, self)
873                        if self.errorok:
874                            # User must have done some kind of panic
875                            # mode recovery on their own.  The
876                            # returned token is the next lookahead
877                            lookahead = tok
878                            errtoken = None
879                            continue
880                    else:
881                        if errtoken:
882                            if hasattr(errtoken, 'lineno'):
883                                lineno = lookahead.lineno
884                            else:
885                                lineno = 0
886                            if lineno:
887                                sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
888                            else:
889                                sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
890                        else:
891                            sys.stderr.write('yacc: Parse error in input. EOF\n')
892                            return
893
894                else:
895                    errorcount = error_count
896
897                # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
898                # entire parse has been rolled back and we're completely hosed.   The token is
899                # discarded and we just keep going.
900
901                if len(statestack) <= 1 and lookahead.type != '$end':
902                    lookahead = None
903                    errtoken = None
904                    state = 0
905                    # Nuke the pushback stack
906                    del lookaheadstack[:]
907                    continue
908
909                # case 2: the statestack has a couple of entries on it, but we're
910                # at the end of the file. nuke the top entry and generate an error token
911
912                # Start nuking entries on the stack
913                if lookahead.type == '$end':
914                    # Whoa. We're really hosed here. Bail out
915                    return
916
917                if lookahead.type != 'error':
918                    sym = symstack[-1]
919                    if sym.type == 'error':
920                        # Hmmm. Error is on top of stack, we'll just nuke input
921                        # symbol and continue
922                        #--! TRACKING
923                        if tracking:
924                            sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
925                            sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
926                        #--! TRACKING
927                        lookahead = None
928                        continue
929
930                    # Create the error symbol for the first time and make it the new lookahead symbol
931                    t = YaccSymbol()
932                    t.type = 'error'
933
934                    if hasattr(lookahead, 'lineno'):
935                        t.lineno = t.endlineno = lookahead.lineno
936                    if hasattr(lookahead, 'lexpos'):
937                        t.lexpos = t.endlexpos = lookahead.lexpos
938                    t.value = lookahead
939                    lookaheadstack.append(lookahead)
940                    lookahead = t
941                else:
942                    sym = symstack.pop()
943                    #--! TRACKING
944                    if tracking:
945                        lookahead.lineno = sym.lineno
946                        lookahead.lexpos = sym.lexpos
947                    #--! TRACKING
948                    statestack.pop()
949                    state = statestack[-1]
950
951                continue
952
953            # Call an error function here
954            raise RuntimeError('yacc: internal parser error!!!\n')
955
956        #--! parseopt-end
957
958    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
959    # parseopt_notrack().
960    #
961    # Optimized version of parseopt() with line number tracking removed.
962    # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated
963    # by the ply/ygen.py script. Make changes to the parsedebug() method instead.
964    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
965
966    def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
967        #--! parseopt-notrack-start
968        lookahead = None                         # Current lookahead symbol
969        lookaheadstack = []                      # Stack of lookahead symbols
970        actions = self.action                    # Local reference to action table (to avoid lookup on self.)
971        goto    = self.goto                      # Local reference to goto table (to avoid lookup on self.)
972        prod    = self.productions               # Local reference to production list (to avoid lookup on self.)
973        defaulted_states = self.defaulted_states # Local reference to defaulted states
974        pslice  = YaccProduction(None)           # Production object passed to grammar rules
975        errorcount = 0                           # Used during error recovery
976
977
978        # If no lexer was given, we will try to use the lex module
979        if not lexer:
980            from . import lex
981            lexer = lex.lexer
982
983        # Set up the lexer and parser objects on pslice
984        pslice.lexer = lexer
985        pslice.parser = self
986
987        # If input was supplied, pass to lexer
988        if input is not None:
989            lexer.input(input)
990
991        if tokenfunc is None:
992            # Tokenize function
993            get_token = lexer.token
994        else:
995            get_token = tokenfunc
996
997        # Set the parser() token method (sometimes used in error recovery)
998        self.token = get_token
999
1000        # Set up the state and symbol stacks
1001
1002        statestack = []                # Stack of parsing states
1003        self.statestack = statestack
1004        symstack   = []                # Stack of grammar symbols
1005        self.symstack = symstack
1006
1007        pslice.stack = symstack         # Put in the production
1008        errtoken   = None               # Err token
1009
1010        # The start state is assumed to be (0,$end)
1011
1012        statestack.append(0)
1013        sym = YaccSymbol()
1014        sym.type = '$end'
1015        symstack.append(sym)
1016        state = 0
1017        while True:
1018            # Get the next symbol on the input.  If a lookahead symbol
1019            # is already set, we just use that. Otherwise, we'll pull
1020            # the next token off of the lookaheadstack or from the lexer
1021
1022
1023            if state not in defaulted_states:
1024                if not lookahead:
1025                    if not lookaheadstack:
1026                        lookahead = get_token()     # Get the next token
1027                    else:
1028                        lookahead = lookaheadstack.pop()
1029                    if not lookahead:
1030                        lookahead = YaccSymbol()
1031                        lookahead.type = '$end'
1032
1033                # Check the action table
1034                ltype = lookahead.type
1035                t = actions[state].get(ltype)
1036            else:
1037                t = defaulted_states[state]
1038
1039
1040            if t is not None:
1041                if t > 0:
1042                    # shift a symbol on the stack
1043                    statestack.append(t)
1044                    state = t
1045
1046
1047                    symstack.append(lookahead)
1048                    lookahead = None
1049
1050                    # Decrease error count on successful shift
1051                    if errorcount:
1052                        errorcount -= 1
1053                    continue
1054
1055                if t < 0:
1056                    # reduce a symbol on the stack, emit a production
1057                    p = prod[-t]
1058                    pname = p.name
1059                    plen  = p.len
1060
1061                    # Get production function
1062                    sym = YaccSymbol()
1063                    sym.type = pname       # Production name
1064                    sym.value = None
1065
1066
1067                    if plen:
1068                        targ = symstack[-plen-1:]
1069                        targ[0] = sym
1070
1071
1072                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1073                        # The code enclosed in this section is duplicated
1074                        # below as a performance optimization.  Make sure
1075                        # changes get made in both locations.
1076
1077                        pslice.slice = targ
1078
1079                        try:
1080                            # Call the grammar rule with our special slice object
1081                            del symstack[-plen:]
1082                            del statestack[-plen:]
1083                            p.callable(pslice)
1084                            symstack.append(sym)
1085                            state = goto[statestack[-1]][pname]
1086                            statestack.append(state)
1087                        except SyntaxError:
1088                            # If an error was set. Enter error recovery state
1089                            lookaheadstack.append(lookahead)
1090                            symstack.pop()
1091                            statestack.pop()
1092                            state = statestack[-1]
1093                            sym.type = 'error'
1094                            lookahead = sym
1095                            errorcount = error_count
1096                            self.errorok = False
1097                        continue
1098                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1099
1100                    else:
1101
1102
1103                        targ = [sym]
1104
1105                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1106                        # The code enclosed in this section is duplicated
1107                        # above as a performance optimization.  Make sure
1108                        # changes get made in both locations.
1109
1110                        pslice.slice = targ
1111
1112                        try:
1113                            # Call the grammar rule with our special slice object
1114                            p.callable(pslice)
1115                            symstack.append(sym)
1116                            state = goto[statestack[-1]][pname]
1117                            statestack.append(state)
1118                        except SyntaxError:
1119                            # If an error was set. Enter error recovery state
1120                            lookaheadstack.append(lookahead)
1121                            symstack.pop()
1122                            statestack.pop()
1123                            state = statestack[-1]
1124                            sym.type = 'error'
1125                            lookahead = sym
1126                            errorcount = error_count
1127                            self.errorok = False
1128                        continue
1129                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1130
1131                if t == 0:
1132                    n = symstack[-1]
1133                    result = getattr(n, 'value', None)
1134                    return result
1135
1136            if t is None:
1137
1138
1139                # We have some kind of parsing error here.  To handle
1140                # this, we are going to push the current token onto
1141                # the tokenstack and replace it with an 'error' token.
1142                # If there are any synchronization rules, they may
1143                # catch it.
1144                #
1145                # In addition to pushing the error token, we call call
1146                # the user defined p_error() function if this is the
1147                # first syntax error.  This function is only called if
1148                # errorcount == 0.
1149                if errorcount == 0 or self.errorok:
1150                    errorcount = error_count
1151                    self.errorok = False
1152                    errtoken = lookahead
1153                    if errtoken.type == '$end':
1154                        errtoken = None               # End of file!
1155                    if self.errorfunc:
1156                        if errtoken and not hasattr(errtoken, 'lexer'):
1157                            errtoken.lexer = lexer
1158                        tok = call_errorfunc(self.errorfunc, errtoken, self)
1159                        if self.errorok:
1160                            # User must have done some kind of panic
1161                            # mode recovery on their own.  The
1162                            # returned token is the next lookahead
1163                            lookahead = tok
1164                            errtoken = None
1165                            continue
1166                    else:
1167                        if errtoken:
1168                            if hasattr(errtoken, 'lineno'):
1169                                lineno = lookahead.lineno
1170                            else:
1171                                lineno = 0
1172                            if lineno:
1173                                sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
1174                            else:
1175                                sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
1176                        else:
1177                            sys.stderr.write('yacc: Parse error in input. EOF\n')
1178                            return
1179
1180                else:
1181                    errorcount = error_count
1182
1183                # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
1184                # entire parse has been rolled back and we're completely hosed.   The token is
1185                # discarded and we just keep going.
1186
1187                if len(statestack) <= 1 and lookahead.type != '$end':
1188                    lookahead = None
1189                    errtoken = None
1190                    state = 0
1191                    # Nuke the pushback stack
1192                    del lookaheadstack[:]
1193                    continue
1194
1195                # case 2: the statestack has a couple of entries on it, but we're
1196                # at the end of the file. nuke the top entry and generate an error token
1197
1198                # Start nuking entries on the stack
1199                if lookahead.type == '$end':
1200                    # Whoa. We're really hosed here. Bail out
1201                    return
1202
1203                if lookahead.type != 'error':
1204                    sym = symstack[-1]
1205                    if sym.type == 'error':
1206                        # Hmmm. Error is on top of stack, we'll just nuke input
1207                        # symbol and continue
1208                        lookahead = None
1209                        continue
1210
1211                    # Create the error symbol for the first time and make it the new lookahead symbol
1212                    t = YaccSymbol()
1213                    t.type = 'error'
1214
1215                    if hasattr(lookahead, 'lineno'):
1216                        t.lineno = t.endlineno = lookahead.lineno
1217                    if hasattr(lookahead, 'lexpos'):
1218                        t.lexpos = t.endlexpos = lookahead.lexpos
1219                    t.value = lookahead
1220                    lookaheadstack.append(lookahead)
1221                    lookahead = t
1222                else:
1223                    sym = symstack.pop()
1224                    statestack.pop()
1225                    state = statestack[-1]
1226
1227                continue
1228
1229            # Call an error function here
1230            raise RuntimeError('yacc: internal parser error!!!\n')
1231
1232        #--! parseopt-notrack-end
1233
1234# -----------------------------------------------------------------------------
1235#                          === Grammar Representation ===
1236#
1237# The following functions, classes, and variables are used to represent and
1238# manipulate the rules that make up a grammar.
1239# -----------------------------------------------------------------------------
1240
1241# regex matching identifiers
1242_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1243
1244# -----------------------------------------------------------------------------
1245# class Production:
1246#
1247# This class stores the raw information about a single production or grammar rule.
1248# A grammar rule refers to a specification such as this:
1249#
1250#       expr : expr PLUS term
1251#
1252# Here are the basic attributes defined on all productions
1253#
1254#       name     - Name of the production.  For example 'expr'
1255#       prod     - A list of symbols on the right side ['expr','PLUS','term']
1256#       prec     - Production precedence level
1257#       number   - Production number.
1258#       func     - Function that executes on reduce
1259#       file     - File where production function is defined
1260#       lineno   - Line number where production function is defined
1261#
1262# The following attributes are defined or optional.
1263#
1264#       len       - Length of the production (number of symbols on right hand side)
1265#       usyms     - Set of unique symbols found in the production
1266# -----------------------------------------------------------------------------
1267
1268class Production(object):
1269    reduced = 0
1270    def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):
1271        self.name     = name
1272        self.prod     = tuple(prod)
1273        self.number   = number
1274        self.func     = func
1275        self.callable = None
1276        self.file     = file
1277        self.line     = line
1278        self.prec     = precedence
1279
1280        # Internal settings used during table construction
1281
1282        self.len  = len(self.prod)   # Length of the production
1283
1284        # Create a list of unique production symbols used in the production
1285        self.usyms = []
1286        for s in self.prod:
1287            if s not in self.usyms:
1288                self.usyms.append(s)
1289
1290        # List of all LR items for the production
1291        self.lr_items = []
1292        self.lr_next = None
1293
1294        # Create a string representation
1295        if self.prod:
1296            self.str = '%s -> %s' % (self.name, ' '.join(self.prod))
1297        else:
1298            self.str = '%s -> <empty>' % self.name
1299
1300    def __str__(self):
1301        return self.str
1302
1303    def __repr__(self):
1304        return 'Production(' + str(self) + ')'
1305
1306    def __len__(self):
1307        return len(self.prod)
1308
1309    def __nonzero__(self):
1310        return 1
1311
1312    def __getitem__(self, index):
1313        return self.prod[index]
1314
1315    # Return the nth lr_item from the production (or None if at the end)
1316    def lr_item(self, n):
1317        if n > len(self.prod):
1318            return None
1319        p = LRItem(self, n)
1320        # Precompute the list of productions immediately following.
1321        try:
1322            p.lr_after = Prodnames[p.prod[n+1]]
1323        except (IndexError, KeyError):
1324            p.lr_after = []
1325        try:
1326            p.lr_before = p.prod[n-1]
1327        except IndexError:
1328            p.lr_before = None
1329        return p
1330
1331    # Bind the production function name to a callable
1332    def bind(self, pdict):
1333        if self.func:
1334            self.callable = pdict[self.func]
1335
1336# This class serves as a minimal standin for Production objects when
1337# reading table data from files.   It only contains information
1338# actually used by the LR parsing engine, plus some additional
1339# debugging information.
1340class MiniProduction(object):
1341    def __init__(self, str, name, len, func, file, line):
1342        self.name     = name
1343        self.len      = len
1344        self.func     = func
1345        self.callable = None
1346        self.file     = file
1347        self.line     = line
1348        self.str      = str
1349
1350    def __str__(self):
1351        return self.str
1352
1353    def __repr__(self):
1354        return 'MiniProduction(%s)' % self.str
1355
1356    # Bind the production function name to a callable
1357    def bind(self, pdict):
1358        if self.func:
1359            self.callable = pdict[self.func]
1360
1361
1362# -----------------------------------------------------------------------------
1363# class LRItem
1364#
1365# This class represents a specific stage of parsing a production rule.  For
1366# example:
1367#
1368#       expr : expr . PLUS term
1369#
1370# In the above, the "." represents the current location of the parse.  Here
1371# basic attributes:
1372#
1373#       name       - Name of the production.  For example 'expr'
1374#       prod       - A list of symbols on the right side ['expr','.', 'PLUS','term']
1375#       number     - Production number.
1376#
1377#       lr_next      Next LR item. Example, if we are ' expr -> expr . PLUS term'
1378#                    then lr_next refers to 'expr -> expr PLUS . term'
1379#       lr_index   - LR item index (location of the ".") in the prod list.
1380#       lookaheads - LALR lookahead symbols for this item
1381#       len        - Length of the production (number of symbols on right hand side)
1382#       lr_after    - List of all productions that immediately follow
1383#       lr_before   - Grammar symbol immediately before
1384# -----------------------------------------------------------------------------
1385
1386class LRItem(object):
1387    def __init__(self, p, n):
1388        self.name       = p.name
1389        self.prod       = list(p.prod)
1390        self.number     = p.number
1391        self.lr_index   = n
1392        self.lookaheads = {}
1393        self.prod.insert(n, '.')
1394        self.prod       = tuple(self.prod)
1395        self.len        = len(self.prod)
1396        self.usyms      = p.usyms
1397
1398    def __str__(self):
1399        if self.prod:
1400            s = '%s -> %s' % (self.name, ' '.join(self.prod))
1401        else:
1402            s = '%s -> <empty>' % self.name
1403        return s
1404
1405    def __repr__(self):
1406        return 'LRItem(' + str(self) + ')'
1407
1408# -----------------------------------------------------------------------------
1409# rightmost_terminal()
1410#
1411# Return the rightmost terminal from a list of symbols.  Used in add_production()
1412# -----------------------------------------------------------------------------
1413def rightmost_terminal(symbols, terminals):
1414    i = len(symbols) - 1
1415    while i >= 0:
1416        if symbols[i] in terminals:
1417            return symbols[i]
1418        i -= 1
1419    return None
1420
1421# -----------------------------------------------------------------------------
1422#                           === GRAMMAR CLASS ===
1423#
1424# The following class represents the contents of the specified grammar along
1425# with various computed properties such as first sets, follow sets, LR items, etc.
1426# This data is used for critical parts of the table generation process later.
1427# -----------------------------------------------------------------------------
1428
1429class GrammarError(YaccError):
1430    pass
1431
1432class Grammar(object):
1433    def __init__(self, terminals):
1434        self.Productions  = [None]  # A list of all of the productions.  The first
1435                                    # entry is always reserved for the purpose of
1436                                    # building an augmented grammar
1437
1438        self.Prodnames    = {}      # A dictionary mapping the names of nonterminals to a list of all
1439                                    # productions of that nonterminal.
1440
1441        self.Prodmap      = {}      # A dictionary that is only used to detect duplicate
1442                                    # productions.
1443
1444        self.Terminals    = {}      # A dictionary mapping the names of terminal symbols to a
1445                                    # list of the rules where they are used.
1446
1447        for term in terminals:
1448            self.Terminals[term] = []
1449
1450        self.Terminals['error'] = []
1451
1452        self.Nonterminals = {}      # A dictionary mapping names of nonterminals to a list
1453                                    # of rule numbers where they are used.
1454
1455        self.First        = {}      # A dictionary of precomputed FIRST(x) symbols
1456
1457        self.Follow       = {}      # A dictionary of precomputed FOLLOW(x) symbols
1458
1459        self.Precedence   = {}      # Precedence rules for each terminal. Contains tuples of the
1460                                    # form ('right',level) or ('nonassoc', level) or ('left',level)
1461
1462        self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.
1463                                    # This is only used to provide error checking and to generate
1464                                    # a warning about unused precedence rules.
1465
1466        self.Start = None           # Starting symbol for the grammar
1467
1468
1469    def __len__(self):
1470        return len(self.Productions)
1471
1472    def __getitem__(self, index):
1473        return self.Productions[index]
1474
1475    # -----------------------------------------------------------------------------
1476    # set_precedence()
1477    #
1478    # Sets the precedence for a given terminal. assoc is the associativity such as
1479    # 'left','right', or 'nonassoc'.  level is a numeric level.
1480    #
1481    # -----------------------------------------------------------------------------
1482
1483    def set_precedence(self, term, assoc, level):
1484        assert self.Productions == [None], 'Must call set_precedence() before add_production()'
1485        if term in self.Precedence:
1486            raise GrammarError('Precedence already specified for terminal %r' % term)
1487        if assoc not in ['left', 'right', 'nonassoc']:
1488            raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1489        self.Precedence[term] = (assoc, level)
1490
1491    # -----------------------------------------------------------------------------
1492    # add_production()
1493    #
1494    # Given an action function, this function assembles a production rule and
1495    # computes its precedence level.
1496    #
1497    # The production rule is supplied as a list of symbols.   For example,
1498    # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1499    # symbols ['expr','PLUS','term'].
1500    #
1501    # Precedence is determined by the precedence of the right-most non-terminal
1502    # or the precedence of a terminal specified by %prec.
1503    #
1504    # A variety of error checks are performed to make sure production symbols
1505    # are valid and that %prec is used correctly.
1506    # -----------------------------------------------------------------------------
1507
1508    def add_production(self, prodname, syms, func=None, file='', line=0):
1509
1510        if prodname in self.Terminals:
1511            raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname))
1512        if prodname == 'error':
1513            raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname))
1514        if not _is_identifier.match(prodname):
1515            raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname))
1516
1517        # Look for literal tokens
1518        for n, s in enumerate(syms):
1519            if s[0] in "'\"":
1520                try:
1521                    c = eval(s)
1522                    if (len(c) > 1):
1523                        raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
1524                                           (file, line, s, prodname))
1525                    if c not in self.Terminals:
1526                        self.Terminals[c] = []
1527                    syms[n] = c
1528                    continue
1529                except SyntaxError:
1530                    pass
1531            if not _is_identifier.match(s) and s != '%prec':
1532                raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname))
1533
1534        # Determine the precedence level
1535        if '%prec' in syms:
1536            if syms[-1] == '%prec':
1537                raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))
1538            if syms[-2] != '%prec':
1539                raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
1540                                   (file, line))
1541            precname = syms[-1]
1542            prodprec = self.Precedence.get(precname)
1543            if not prodprec:
1544                raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname))
1545            else:
1546                self.UsedPrecedence.add(precname)
1547            del syms[-2:]     # Drop %prec from the rule
1548        else:
1549            # If no %prec, precedence is determined by the rightmost terminal symbol
1550            precname = rightmost_terminal(syms, self.Terminals)
1551            prodprec = self.Precedence.get(precname, ('right', 0))
1552
1553        # See if the rule is already in the rulemap
1554        map = '%s -> %s' % (prodname, syms)
1555        if map in self.Prodmap:
1556            m = self.Prodmap[map]
1557            raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) +
1558                               'Previous definition at %s:%d' % (m.file, m.line))
1559
1560        # From this point on, everything is valid.  Create a new Production instance
1561        pnumber  = len(self.Productions)
1562        if prodname not in self.Nonterminals:
1563            self.Nonterminals[prodname] = []
1564
1565        # Add the production number to Terminals and Nonterminals
1566        for t in syms:
1567            if t in self.Terminals:
1568                self.Terminals[t].append(pnumber)
1569            else:
1570                if t not in self.Nonterminals:
1571                    self.Nonterminals[t] = []
1572                self.Nonterminals[t].append(pnumber)
1573
1574        # Create a production and add it to the list of productions
1575        p = Production(pnumber, prodname, syms, prodprec, func, file, line)
1576        self.Productions.append(p)
1577        self.Prodmap[map] = p
1578
1579        # Add to the global productions list
1580        try:
1581            self.Prodnames[prodname].append(p)
1582        except KeyError:
1583            self.Prodnames[prodname] = [p]
1584
1585    # -----------------------------------------------------------------------------
1586    # set_start()
1587    #
1588    # Sets the starting symbol and creates the augmented grammar.  Production
1589    # rule 0 is S' -> start where start is the start symbol.
1590    # -----------------------------------------------------------------------------
1591
1592    def set_start(self, start=None):
1593        if not start:
1594            start = self.Productions[1].name
1595        if start not in self.Nonterminals:
1596            raise GrammarError('start symbol %s undefined' % start)
1597        self.Productions[0] = Production(0, "S'", [start])
1598        self.Nonterminals[start].append(0)
1599        self.Start = start
1600
1601    # -----------------------------------------------------------------------------
1602    # find_unreachable()
1603    #
1604    # Find all of the nonterminal symbols that can't be reached from the starting
1605    # symbol.  Returns a list of nonterminals that can't be reached.
1606    # -----------------------------------------------------------------------------
1607
1608    def find_unreachable(self):
1609
1610        # Mark all symbols that are reachable from a symbol s
1611        def mark_reachable_from(s):
1612            if s in reachable:
1613                return
1614            reachable.add(s)
1615            for p in self.Prodnames.get(s, []):
1616                for r in p.prod:
1617                    mark_reachable_from(r)
1618
1619        reachable = set()
1620        mark_reachable_from(self.Productions[0].prod[0])
1621        return [s for s in self.Nonterminals if s not in reachable]
1622
1623    # -----------------------------------------------------------------------------
1624    # infinite_cycles()
1625    #
1626    # This function looks at the various parsing rules and tries to detect
1627    # infinite recursion cycles (grammar rules where there is no possible way
1628    # to derive a string of only terminals).
1629    # -----------------------------------------------------------------------------
1630
1631    def infinite_cycles(self):
1632        terminates = {}
1633
1634        # Terminals:
1635        for t in self.Terminals:
1636            terminates[t] = True
1637
1638        terminates['$end'] = True
1639
1640        # Nonterminals:
1641
1642        # Initialize to false:
1643        for n in self.Nonterminals:
1644            terminates[n] = False
1645
1646        # Then propagate termination until no change:
1647        while True:
1648            some_change = False
1649            for (n, pl) in self.Prodnames.items():
1650                # Nonterminal n terminates iff any of its productions terminates.
1651                for p in pl:
1652                    # Production p terminates iff all of its rhs symbols terminate.
1653                    for s in p.prod:
1654                        if not terminates[s]:
1655                            # The symbol s does not terminate,
1656                            # so production p does not terminate.
1657                            p_terminates = False
1658                            break
1659                    else:
1660                        # didn't break from the loop,
1661                        # so every symbol s terminates
1662                        # so production p terminates.
1663                        p_terminates = True
1664
1665                    if p_terminates:
1666                        # symbol n terminates!
1667                        if not terminates[n]:
1668                            terminates[n] = True
1669                            some_change = True
1670                        # Don't need to consider any more productions for this n.
1671                        break
1672
1673            if not some_change:
1674                break
1675
1676        infinite = []
1677        for (s, term) in terminates.items():
1678            if not term:
1679                if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1680                    # s is used-but-not-defined, and we've already warned of that,
1681                    # so it would be overkill to say that it's also non-terminating.
1682                    pass
1683                else:
1684                    infinite.append(s)
1685
1686        return infinite
1687
1688    # -----------------------------------------------------------------------------
1689    # undefined_symbols()
1690    #
1691    # Find all symbols that were used the grammar, but not defined as tokens or
1692    # grammar rules.  Returns a list of tuples (sym, prod) where sym in the symbol
1693    # and prod is the production where the symbol was used.
1694    # -----------------------------------------------------------------------------
1695    def undefined_symbols(self):
1696        result = []
1697        for p in self.Productions:
1698            if not p:
1699                continue
1700
1701            for s in p.prod:
1702                if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1703                    result.append((s, p))
1704        return result
1705
1706    # -----------------------------------------------------------------------------
1707    # unused_terminals()
1708    #
1709    # Find all terminals that were defined, but not used by the grammar.  Returns
1710    # a list of all symbols.
1711    # -----------------------------------------------------------------------------
1712    def unused_terminals(self):
1713        unused_tok = []
1714        for s, v in self.Terminals.items():
1715            if s != 'error' and not v:
1716                unused_tok.append(s)
1717
1718        return unused_tok
1719
1720    # ------------------------------------------------------------------------------
1721    # unused_rules()
1722    #
1723    # Find all grammar rules that were defined,  but not used (maybe not reachable)
1724    # Returns a list of productions.
1725    # ------------------------------------------------------------------------------
1726
1727    def unused_rules(self):
1728        unused_prod = []
1729        for s, v in self.Nonterminals.items():
1730            if not v:
1731                p = self.Prodnames[s][0]
1732                unused_prod.append(p)
1733        return unused_prod
1734
1735    # -----------------------------------------------------------------------------
1736    # unused_precedence()
1737    #
1738    # Returns a list of tuples (term,precedence) corresponding to precedence
1739    # rules that were never used by the grammar.  term is the name of the terminal
1740    # on which precedence was applied and precedence is a string such as 'left' or
1741    # 'right' corresponding to the type of precedence.
1742    # -----------------------------------------------------------------------------
1743
1744    def unused_precedence(self):
1745        unused = []
1746        for termname in self.Precedence:
1747            if not (termname in self.Terminals or termname in self.UsedPrecedence):
1748                unused.append((termname, self.Precedence[termname][0]))
1749
1750        return unused
1751
1752    # -------------------------------------------------------------------------
1753    # _first()
1754    #
1755    # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1756    #
1757    # During execution of compute_first1, the result may be incomplete.
1758    # Afterward (e.g., when called from compute_follow()), it will be complete.
1759    # -------------------------------------------------------------------------
1760    def _first(self, beta):
1761
1762        # We are computing First(x1,x2,x3,...,xn)
1763        result = []
1764        for x in beta:
1765            x_produces_empty = False
1766
1767            # Add all the non-<empty> symbols of First[x] to the result.
1768            for f in self.First[x]:
1769                if f == '<empty>':
1770                    x_produces_empty = True
1771                else:
1772                    if f not in result:
1773                        result.append(f)
1774
1775            if x_produces_empty:
1776                # We have to consider the next x in beta,
1777                # i.e. stay in the loop.
1778                pass
1779            else:
1780                # We don't have to consider any further symbols in beta.
1781                break
1782        else:
1783            # There was no 'break' from the loop,
1784            # so x_produces_empty was true for all x in beta,
1785            # so beta produces empty as well.
1786            result.append('<empty>')
1787
1788        return result
1789
1790    # -------------------------------------------------------------------------
1791    # compute_first()
1792    #
1793    # Compute the value of FIRST1(X) for all symbols
1794    # -------------------------------------------------------------------------
1795    def compute_first(self):
1796        if self.First:
1797            return self.First
1798
1799        # Terminals:
1800        for t in self.Terminals:
1801            self.First[t] = [t]
1802
1803        self.First['$end'] = ['$end']
1804
1805        # Nonterminals:
1806
1807        # Initialize to the empty set:
1808        for n in self.Nonterminals:
1809            self.First[n] = []
1810
1811        # Then propagate symbols until no change:
1812        while True:
1813            some_change = False
1814            for n in self.Nonterminals:
1815                for p in self.Prodnames[n]:
1816                    for f in self._first(p.prod):
1817                        if f not in self.First[n]:
1818                            self.First[n].append(f)
1819                            some_change = True
1820            if not some_change:
1821                break
1822
1823        return self.First
1824
1825    # ---------------------------------------------------------------------
1826    # compute_follow()
1827    #
1828    # Computes all of the follow sets for every non-terminal symbol.  The
1829    # follow set is the set of all symbols that might follow a given
1830    # non-terminal.  See the Dragon book, 2nd Ed. p. 189.
1831    # ---------------------------------------------------------------------
1832    def compute_follow(self, start=None):
1833        # If already computed, return the result
1834        if self.Follow:
1835            return self.Follow
1836
1837        # If first sets not computed yet, do that first.
1838        if not self.First:
1839            self.compute_first()
1840
1841        # Add '$end' to the follow list of the start symbol
1842        for k in self.Nonterminals:
1843            self.Follow[k] = []
1844
1845        if not start:
1846            start = self.Productions[1].name
1847
1848        self.Follow[start] = ['$end']
1849
1850        while True:
1851            didadd = False
1852            for p in self.Productions[1:]:
1853                # Here is the production set
1854                for i, B in enumerate(p.prod):
1855                    if B in self.Nonterminals:
1856                        # Okay. We got a non-terminal in a production
1857                        fst = self._first(p.prod[i+1:])
1858                        hasempty = False
1859                        for f in fst:
1860                            if f != '<empty>' and f not in self.Follow[B]:
1861                                self.Follow[B].append(f)
1862                                didadd = True
1863                            if f == '<empty>':
1864                                hasempty = True
1865                        if hasempty or i == (len(p.prod)-1):
1866                            # Add elements of follow(a) to follow(b)
1867                            for f in self.Follow[p.name]:
1868                                if f not in self.Follow[B]:
1869                                    self.Follow[B].append(f)
1870                                    didadd = True
1871            if not didadd:
1872                break
1873        return self.Follow
1874
1875
1876    # -----------------------------------------------------------------------------
1877    # build_lritems()
1878    #
1879    # This function walks the list of productions and builds a complete set of the
1880    # LR items.  The LR items are stored in two ways:  First, they are uniquely
1881    # numbered and placed in the list _lritems.  Second, a linked list of LR items
1882    # is built for each production.  For example:
1883    #
1884    #   E -> E PLUS E
1885    #
1886    # Creates the list
1887    #
1888    #  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1889    # -----------------------------------------------------------------------------
1890
1891    def build_lritems(self):
1892        for p in self.Productions:
1893            lastlri = p
1894            i = 0
1895            lr_items = []
1896            while True:
1897                if i > len(p):
1898                    lri = None
1899                else:
1900                    lri = LRItem(p, i)
1901                    # Precompute the list of productions immediately following
1902                    try:
1903                        lri.lr_after = self.Prodnames[lri.prod[i+1]]
1904                    except (IndexError, KeyError):
1905                        lri.lr_after = []
1906                    try:
1907                        lri.lr_before = lri.prod[i-1]
1908                    except IndexError:
1909                        lri.lr_before = None
1910
1911                lastlri.lr_next = lri
1912                if not lri:
1913                    break
1914                lr_items.append(lri)
1915                lastlri = lri
1916                i += 1
1917            p.lr_items = lr_items
1918
1919# -----------------------------------------------------------------------------
1920#                            == Class LRTable ==
1921#
1922# This basic class represents a basic table of LR parsing information.
1923# Methods for generating the tables are not defined here.  They are defined
1924# in the derived class LRGeneratedTable.
1925# -----------------------------------------------------------------------------
1926
1927class VersionError(YaccError):
1928    pass
1929
1930class LRTable(object):
1931    def __init__(self):
1932        self.lr_action = None
1933        self.lr_goto = None
1934        self.lr_productions = None
1935        self.lr_method = None
1936
1937    def read_table(self, module):
1938        if isinstance(module, types.ModuleType):
1939            parsetab = module
1940        else:
1941            exec('import %s' % module)
1942            parsetab = sys.modules[module]
1943
1944        if parsetab._tabversion != __tabversion__:
1945            raise VersionError('yacc table file version is out of date')
1946
1947        self.lr_action = parsetab._lr_action
1948        self.lr_goto = parsetab._lr_goto
1949
1950        self.lr_productions = []
1951        for p in parsetab._lr_productions:
1952            self.lr_productions.append(MiniProduction(*p))
1953
1954        self.lr_method = parsetab._lr_method
1955        return parsetab._lr_signature
1956
1957    def read_pickle(self, filename):
1958        try:
1959            import cPickle as pickle
1960        except ImportError:
1961            import pickle
1962
1963        if not os.path.exists(filename):
1964          raise ImportError
1965
1966        in_f = open(filename, 'rb')
1967
1968        tabversion = pickle.load(in_f)
1969        if tabversion != __tabversion__:
1970            raise VersionError('yacc table file version is out of date')
1971        self.lr_method = pickle.load(in_f)
1972        signature      = pickle.load(in_f)
1973        self.lr_action = pickle.load(in_f)
1974        self.lr_goto   = pickle.load(in_f)
1975        productions    = pickle.load(in_f)
1976
1977        self.lr_productions = []
1978        for p in productions:
1979            self.lr_productions.append(MiniProduction(*p))
1980
1981        in_f.close()
1982        return signature
1983
1984    # Bind all production function names to callable objects in pdict
1985    def bind_callables(self, pdict):
1986        for p in self.lr_productions:
1987            p.bind(pdict)
1988
1989
1990# -----------------------------------------------------------------------------
1991#                           === LR Generator ===
1992#
1993# The following classes and functions are used to generate LR parsing tables on
1994# a grammar.
1995# -----------------------------------------------------------------------------
1996
1997# -----------------------------------------------------------------------------
1998# digraph()
1999# traverse()
2000#
2001# The following two functions are used to compute set valued functions
2002# of the form:
2003#
2004#     F(x) = F'(x) U U{F(y) | x R y}
2005#
2006# This is used to compute the values of Read() sets as well as FOLLOW sets
2007# in LALR(1) generation.
2008#
2009# Inputs:  X    - An input set
2010#          R    - A relation
2011#          FP   - Set-valued function
2012# ------------------------------------------------------------------------------
2013
2014def digraph(X, R, FP):
2015    N = {}
2016    for x in X:
2017        N[x] = 0
2018    stack = []
2019    F = {}
2020    for x in X:
2021        if N[x] == 0:
2022            traverse(x, N, stack, F, X, R, FP)
2023    return F
2024
2025def traverse(x, N, stack, F, X, R, FP):
2026    stack.append(x)
2027    d = len(stack)
2028    N[x] = d
2029    F[x] = FP(x)             # F(X) <- F'(x)
2030
2031    rel = R(x)               # Get y's related to x
2032    for y in rel:
2033        if N[y] == 0:
2034            traverse(y, N, stack, F, X, R, FP)
2035        N[x] = min(N[x], N[y])
2036        for a in F.get(y, []):
2037            if a not in F[x]:
2038                F[x].append(a)
2039    if N[x] == d:
2040        N[stack[-1]] = MAXINT
2041        F[stack[-1]] = F[x]
2042        element = stack.pop()
2043        while element != x:
2044            N[stack[-1]] = MAXINT
2045            F[stack[-1]] = F[x]
2046            element = stack.pop()
2047
2048class LALRError(YaccError):
2049    pass
2050
2051# -----------------------------------------------------------------------------
2052#                             == LRGeneratedTable ==
2053#
2054# This class implements the LR table generation algorithm.  There are no
2055# public methods except for write()
2056# -----------------------------------------------------------------------------
2057
2058class LRGeneratedTable(LRTable):
2059    def __init__(self, grammar, method='LALR', log=None):
2060        if method not in ['SLR', 'LALR']:
2061            raise LALRError('Unsupported method %s' % method)
2062
2063        self.grammar = grammar
2064        self.lr_method = method
2065
2066        # Set up the logger
2067        if not log:
2068            log = NullLogger()
2069        self.log = log
2070
2071        # Internal attributes
2072        self.lr_action     = {}        # Action table
2073        self.lr_goto       = {}        # Goto table
2074        self.lr_productions  = grammar.Productions    # Copy of grammar Production array
2075        self.lr_goto_cache = {}        # Cache of computed gotos
2076        self.lr0_cidhash   = {}        # Cache of closures
2077
2078        self._add_count    = 0         # Internal counter used to detect cycles
2079
2080        # Diagonistic information filled in by the table generator
2081        self.sr_conflict   = 0
2082        self.rr_conflict   = 0
2083        self.conflicts     = []        # List of conflicts
2084
2085        self.sr_conflicts  = []
2086        self.rr_conflicts  = []
2087
2088        # Build the tables
2089        self.grammar.build_lritems()
2090        self.grammar.compute_first()
2091        self.grammar.compute_follow()
2092        self.lr_parse_table()
2093
2094    # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
2095
2096    def lr0_closure(self, I):
2097        self._add_count += 1
2098
2099        # Add everything in I to J
2100        J = I[:]
2101        didadd = True
2102        while didadd:
2103            didadd = False
2104            for j in J:
2105                for x in j.lr_after:
2106                    if getattr(x, 'lr0_added', 0) == self._add_count:
2107                        continue
2108                    # Add B --> .G to J
2109                    J.append(x.lr_next)
2110                    x.lr0_added = self._add_count
2111                    didadd = True
2112
2113        return J
2114
2115    # Compute the LR(0) goto function goto(I,X) where I is a set
2116    # of LR(0) items and X is a grammar symbol.   This function is written
2117    # in a way that guarantees uniqueness of the generated goto sets
2118    # (i.e. the same goto set will never be returned as two different Python
2119    # objects).  With uniqueness, we can later do fast set comparisons using
2120    # id(obj) instead of element-wise comparison.
2121
2122    def lr0_goto(self, I, x):
2123        # First we look for a previously cached entry
2124        g = self.lr_goto_cache.get((id(I), x))
2125        if g:
2126            return g
2127
2128        # Now we generate the goto set in a way that guarantees uniqueness
2129        # of the result
2130
2131        s = self.lr_goto_cache.get(x)
2132        if not s:
2133            s = {}
2134            self.lr_goto_cache[x] = s
2135
2136        gs = []
2137        for p in I:
2138            n = p.lr_next
2139            if n and n.lr_before == x:
2140                s1 = s.get(id(n))
2141                if not s1:
2142                    s1 = {}
2143                    s[id(n)] = s1
2144                gs.append(n)
2145                s = s1
2146        g = s.get('$end')
2147        if not g:
2148            if gs:
2149                g = self.lr0_closure(gs)
2150                s['$end'] = g
2151            else:
2152                s['$end'] = gs
2153        self.lr_goto_cache[(id(I), x)] = g
2154        return g
2155
2156    # Compute the LR(0) sets of item function
2157    def lr0_items(self):
2158        C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
2159        i = 0
2160        for I in C:
2161            self.lr0_cidhash[id(I)] = i
2162            i += 1
2163
2164        # Loop over the items in C and each grammar symbols
2165        i = 0
2166        while i < len(C):
2167            I = C[i]
2168            i += 1
2169
2170            # Collect all of the symbols that could possibly be in the goto(I,X) sets
2171            asyms = {}
2172            for ii in I:
2173                for s in ii.usyms:
2174                    asyms[s] = None
2175
2176            for x in asyms:
2177                g = self.lr0_goto(I, x)
2178                if not g or id(g) in self.lr0_cidhash:
2179                    continue
2180                self.lr0_cidhash[id(g)] = len(C)
2181                C.append(g)
2182
2183        return C
2184
2185    # -----------------------------------------------------------------------------
2186    #                       ==== LALR(1) Parsing ====
2187    #
2188    # LALR(1) parsing is almost exactly the same as SLR except that instead of
2189    # relying upon Follow() sets when performing reductions, a more selective
2190    # lookahead set that incorporates the state of the LR(0) machine is utilized.
2191    # Thus, we mainly just have to focus on calculating the lookahead sets.
2192    #
2193    # The method used here is due to DeRemer and Pennelo (1982).
2194    #
2195    # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2196    #     Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2197    #     Vol. 4, No. 4, Oct. 1982, pp. 615-649
2198    #
2199    # Further details can also be found in:
2200    #
2201    #  J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2202    #      McGraw-Hill Book Company, (1985).
2203    #
2204    # -----------------------------------------------------------------------------
2205
2206    # -----------------------------------------------------------------------------
2207    # compute_nullable_nonterminals()
2208    #
2209    # Creates a dictionary containing all of the non-terminals that might produce
2210    # an empty production.
2211    # -----------------------------------------------------------------------------
2212
2213    def compute_nullable_nonterminals(self):
2214        nullable = set()
2215        num_nullable = 0
2216        while True:
2217            for p in self.grammar.Productions[1:]:
2218                if p.len == 0:
2219                    nullable.add(p.name)
2220                    continue
2221                for t in p.prod:
2222                    if t not in nullable:
2223                        break
2224                else:
2225                    nullable.add(p.name)
2226            if len(nullable) == num_nullable:
2227                break
2228            num_nullable = len(nullable)
2229        return nullable
2230
2231    # -----------------------------------------------------------------------------
2232    # find_nonterminal_trans(C)
2233    #
2234    # Given a set of LR(0) items, this functions finds all of the non-terminal
2235    # transitions.    These are transitions in which a dot appears immediately before
2236    # a non-terminal.   Returns a list of tuples of the form (state,N) where state
2237    # is the state number and N is the nonterminal symbol.
2238    #
2239    # The input C is the set of LR(0) items.
2240    # -----------------------------------------------------------------------------
2241
2242    def find_nonterminal_transitions(self, C):
2243        trans = []
2244        for stateno, state in enumerate(C):
2245            for p in state:
2246                if p.lr_index < p.len - 1:
2247                    t = (stateno, p.prod[p.lr_index+1])
2248                    if t[1] in self.grammar.Nonterminals:
2249                        if t not in trans:
2250                            trans.append(t)
2251        return trans
2252
2253    # -----------------------------------------------------------------------------
2254    # dr_relation()
2255    #
2256    # Computes the DR(p,A) relationships for non-terminal transitions.  The input
2257    # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2258    #
2259    # Returns a list of terminals.
2260    # -----------------------------------------------------------------------------
2261
2262    def dr_relation(self, C, trans, nullable):
2263        dr_set = {}
2264        state, N = trans
2265        terms = []
2266
2267        g = self.lr0_goto(C[state], N)
2268        for p in g:
2269            if p.lr_index < p.len - 1:
2270                a = p.prod[p.lr_index+1]
2271                if a in self.grammar.Terminals:
2272                    if a not in terms:
2273                        terms.append(a)
2274
2275        # This extra bit is to handle the start state
2276        if state == 0 and N == self.grammar.Productions[0].prod[0]:
2277            terms.append('$end')
2278
2279        return terms
2280
2281    # -----------------------------------------------------------------------------
2282    # reads_relation()
2283    #
2284    # Computes the READS() relation (p,A) READS (t,C).
2285    # -----------------------------------------------------------------------------
2286
2287    def reads_relation(self, C, trans, empty):
2288        # Look for empty transitions
2289        rel = []
2290        state, N = trans
2291
2292        g = self.lr0_goto(C[state], N)
2293        j = self.lr0_cidhash.get(id(g), -1)
2294        for p in g:
2295            if p.lr_index < p.len - 1:
2296                a = p.prod[p.lr_index + 1]
2297                if a in empty:
2298                    rel.append((j, a))
2299
2300        return rel
2301
2302    # -----------------------------------------------------------------------------
2303    # compute_lookback_includes()
2304    #
2305    # Determines the lookback and includes relations
2306    #
2307    # LOOKBACK:
2308    #
2309    # This relation is determined by running the LR(0) state machine forward.
2310    # For example, starting with a production "N : . A B C", we run it forward
2311    # to obtain "N : A B C ."   We then build a relationship between this final
2312    # state and the starting state.   These relationships are stored in a dictionary
2313    # lookdict.
2314    #
2315    # INCLUDES:
2316    #
2317    # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2318    #
2319    # This relation is used to determine non-terminal transitions that occur
2320    # inside of other non-terminal transition states.   (p,A) INCLUDES (p', B)
2321    # if the following holds:
2322    #
2323    #       B -> LAT, where T -> epsilon and p' -L-> p
2324    #
2325    # L is essentially a prefix (which may be empty), T is a suffix that must be
2326    # able to derive an empty string.  State p' must lead to state p with the string L.
2327    #
2328    # -----------------------------------------------------------------------------
2329
2330    def compute_lookback_includes(self, C, trans, nullable):
2331        lookdict = {}          # Dictionary of lookback relations
2332        includedict = {}       # Dictionary of include relations
2333
2334        # Make a dictionary of non-terminal transitions
2335        dtrans = {}
2336        for t in trans:
2337            dtrans[t] = 1
2338
2339        # Loop over all transitions and compute lookbacks and includes
2340        for state, N in trans:
2341            lookb = []
2342            includes = []
2343            for p in C[state]:
2344                if p.name != N:
2345                    continue
2346
2347                # Okay, we have a name match.  We now follow the production all the way
2348                # through the state machine until we get the . on the right hand side
2349
2350                lr_index = p.lr_index
2351                j = state
2352                while lr_index < p.len - 1:
2353                    lr_index = lr_index + 1
2354                    t = p.prod[lr_index]
2355
2356                    # Check to see if this symbol and state are a non-terminal transition
2357                    if (j, t) in dtrans:
2358                        # Yes.  Okay, there is some chance that this is an includes relation
2359                        # the only way to know for certain is whether the rest of the
2360                        # production derives empty
2361
2362                        li = lr_index + 1
2363                        while li < p.len:
2364                            if p.prod[li] in self.grammar.Terminals:
2365                                break      # No forget it
2366                            if p.prod[li] not in nullable:
2367                                break
2368                            li = li + 1
2369                        else:
2370                            # Appears to be a relation between (j,t) and (state,N)
2371                            includes.append((j, t))
2372
2373                    g = self.lr0_goto(C[j], t)               # Go to next set
2374                    j = self.lr0_cidhash.get(id(g), -1)      # Go to next state
2375
2376                # When we get here, j is the final state, now we have to locate the production
2377                for r in C[j]:
2378                    if r.name != p.name:
2379                        continue
2380                    if r.len != p.len:
2381                        continue
2382                    i = 0
2383                    # This look is comparing a production ". A B C" with "A B C ."
2384                    while i < r.lr_index:
2385                        if r.prod[i] != p.prod[i+1]:
2386                            break
2387                        i = i + 1
2388                    else:
2389                        lookb.append((j, r))
2390            for i in includes:
2391                if i not in includedict:
2392                    includedict[i] = []
2393                includedict[i].append((state, N))
2394            lookdict[(state, N)] = lookb
2395
2396        return lookdict, includedict
2397
2398    # -----------------------------------------------------------------------------
2399    # compute_read_sets()
2400    #
2401    # Given a set of LR(0) items, this function computes the read sets.
2402    #
2403    # Inputs:  C        =  Set of LR(0) items
2404    #          ntrans   = Set of nonterminal transitions
2405    #          nullable = Set of empty transitions
2406    #
2407    # Returns a set containing the read sets
2408    # -----------------------------------------------------------------------------
2409
2410    def compute_read_sets(self, C, ntrans, nullable):
2411        FP = lambda x: self.dr_relation(C, x, nullable)
2412        R =  lambda x: self.reads_relation(C, x, nullable)
2413        F = digraph(ntrans, R, FP)
2414        return F
2415
2416    # -----------------------------------------------------------------------------
2417    # compute_follow_sets()
2418    #
2419    # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2420    # and an include set, this function computes the follow sets
2421    #
2422    # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2423    #
2424    # Inputs:
2425    #            ntrans     = Set of nonterminal transitions
2426    #            readsets   = Readset (previously computed)
2427    #            inclsets   = Include sets (previously computed)
2428    #
2429    # Returns a set containing the follow sets
2430    # -----------------------------------------------------------------------------
2431
2432    def compute_follow_sets(self, ntrans, readsets, inclsets):
2433        FP = lambda x: readsets[x]
2434        R  = lambda x: inclsets.get(x, [])
2435        F = digraph(ntrans, R, FP)
2436        return F
2437
2438    # -----------------------------------------------------------------------------
2439    # add_lookaheads()
2440    #
2441    # Attaches the lookahead symbols to grammar rules.
2442    #
2443    # Inputs:    lookbacks         -  Set of lookback relations
2444    #            followset         -  Computed follow set
2445    #
2446    # This function directly attaches the lookaheads to productions contained
2447    # in the lookbacks set
2448    # -----------------------------------------------------------------------------
2449
2450    def add_lookaheads(self, lookbacks, followset):
2451        for trans, lb in lookbacks.items():
2452            # Loop over productions in lookback
2453            for state, p in lb:
2454                if state not in p.lookaheads:
2455                    p.lookaheads[state] = []
2456                f = followset.get(trans, [])
2457                for a in f:
2458                    if a not in p.lookaheads[state]:
2459                        p.lookaheads[state].append(a)
2460
2461    # -----------------------------------------------------------------------------
2462    # add_lalr_lookaheads()
2463    #
2464    # This function does all of the work of adding lookahead information for use
2465    # with LALR parsing
2466    # -----------------------------------------------------------------------------
2467
2468    def add_lalr_lookaheads(self, C):
2469        # Determine all of the nullable nonterminals
2470        nullable = self.compute_nullable_nonterminals()
2471
2472        # Find all non-terminal transitions
2473        trans = self.find_nonterminal_transitions(C)
2474
2475        # Compute read sets
2476        readsets = self.compute_read_sets(C, trans, nullable)
2477
2478        # Compute lookback/includes relations
2479        lookd, included = self.compute_lookback_includes(C, trans, nullable)
2480
2481        # Compute LALR FOLLOW sets
2482        followsets = self.compute_follow_sets(trans, readsets, included)
2483
2484        # Add all of the lookaheads
2485        self.add_lookaheads(lookd, followsets)
2486
2487    # -----------------------------------------------------------------------------
2488    # lr_parse_table()
2489    #
2490    # This function constructs the parse tables for SLR or LALR
2491    # -----------------------------------------------------------------------------
2492    def lr_parse_table(self):
2493        Productions = self.grammar.Productions
2494        Precedence  = self.grammar.Precedence
2495        goto   = self.lr_goto         # Goto array
2496        action = self.lr_action       # Action array
2497        log    = self.log             # Logger for output
2498
2499        actionp = {}                  # Action production array (temporary)
2500
2501        log.info('Parsing method: %s', self.lr_method)
2502
2503        # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2504        # This determines the number of states
2505
2506        C = self.lr0_items()
2507
2508        if self.lr_method == 'LALR':
2509            self.add_lalr_lookaheads(C)
2510
2511        # Build the parser table, state by state
2512        st = 0
2513        for I in C:
2514            # Loop over each production in I
2515            actlist = []              # List of actions
2516            st_action  = {}
2517            st_actionp = {}
2518            st_goto    = {}
2519            log.info('')
2520            log.info('state %d', st)
2521            log.info('')
2522            for p in I:
2523                log.info('    (%d) %s', p.number, p)
2524            log.info('')
2525
2526            for p in I:
2527                    if p.len == p.lr_index + 1:
2528                        if p.name == "S'":
2529                            # Start symbol. Accept!
2530                            st_action['$end'] = 0
2531                            st_actionp['$end'] = p
2532                        else:
2533                            # We are at the end of a production.  Reduce!
2534                            if self.lr_method == 'LALR':
2535                                laheads = p.lookaheads[st]
2536                            else:
2537                                laheads = self.grammar.Follow[p.name]
2538                            for a in laheads:
2539                                actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p)))
2540                                r = st_action.get(a)
2541                                if r is not None:
2542                                    # Whoa. Have a shift/reduce or reduce/reduce conflict
2543                                    if r > 0:
2544                                        # Need to decide on shift or reduce here
2545                                        # By default we favor shifting. Need to add
2546                                        # some precedence rules here.
2547                                        sprec, slevel = Productions[st_actionp[a].number].prec
2548                                        rprec, rlevel = Precedence.get(a, ('right', 0))
2549                                        if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
2550                                            # We really need to reduce here.
2551                                            st_action[a] = -p.number
2552                                            st_actionp[a] = p
2553                                            if not slevel and not rlevel:
2554                                                log.info('  ! shift/reduce conflict for %s resolved as reduce', a)
2555                                                self.sr_conflicts.append((st, a, 'reduce'))
2556                                            Productions[p.number].reduced += 1
2557                                        elif (slevel == rlevel) and (rprec == 'nonassoc'):
2558                                            st_action[a] = None
2559                                        else:
2560                                            # Hmmm. Guess we'll keep the shift
2561                                            if not rlevel:
2562                                                log.info('  ! shift/reduce conflict for %s resolved as shift', a)
2563                                                self.sr_conflicts.append((st, a, 'shift'))
2564                                    elif r < 0:
2565                                        # Reduce/reduce conflict.   In this case, we favor the rule
2566                                        # that was defined first in the grammar file
2567                                        oldp = Productions[-r]
2568                                        pp = Productions[p.number]
2569                                        if oldp.line > pp.line:
2570                                            st_action[a] = -p.number
2571                                            st_actionp[a] = p
2572                                            chosenp, rejectp = pp, oldp
2573                                            Productions[p.number].reduced += 1
2574                                            Productions[oldp.number].reduced -= 1
2575                                        else:
2576                                            chosenp, rejectp = oldp, pp
2577                                        self.rr_conflicts.append((st, chosenp, rejectp))
2578                                        log.info('  ! reduce/reduce conflict for %s resolved using rule %d (%s)',
2579                                                 a, st_actionp[a].number, st_actionp[a])
2580                                    else:
2581                                        raise LALRError('Unknown conflict in state %d' % st)
2582                                else:
2583                                    st_action[a] = -p.number
2584                                    st_actionp[a] = p
2585                                    Productions[p.number].reduced += 1
2586                    else:
2587                        i = p.lr_index
2588                        a = p.prod[i+1]       # Get symbol right after the "."
2589                        if a in self.grammar.Terminals:
2590                            g = self.lr0_goto(I, a)
2591                            j = self.lr0_cidhash.get(id(g), -1)
2592                            if j >= 0:
2593                                # We are in a shift state
2594                                actlist.append((a, p, 'shift and go to state %d' % j))
2595                                r = st_action.get(a)
2596                                if r is not None:
2597                                    # Whoa have a shift/reduce or shift/shift conflict
2598                                    if r > 0:
2599                                        if r != j:
2600                                            raise LALRError('Shift/shift conflict in state %d' % st)
2601                                    elif r < 0:
2602                                        # Do a precedence check.
2603                                        #   -  if precedence of reduce rule is higher, we reduce.
2604                                        #   -  if precedence of reduce is same and left assoc, we reduce.
2605                                        #   -  otherwise we shift
2606                                        rprec, rlevel = Productions[st_actionp[a].number].prec
2607                                        sprec, slevel = Precedence.get(a, ('right', 0))
2608                                        if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
2609                                            # We decide to shift here... highest precedence to shift
2610                                            Productions[st_actionp[a].number].reduced -= 1
2611                                            st_action[a] = j
2612                                            st_actionp[a] = p
2613                                            if not rlevel:
2614                                                log.info('  ! shift/reduce conflict for %s resolved as shift', a)
2615                                                self.sr_conflicts.append((st, a, 'shift'))
2616                                        elif (slevel == rlevel) and (rprec == 'nonassoc'):
2617                                            st_action[a] = None
2618                                        else:
2619                                            # Hmmm. Guess we'll keep the reduce
2620                                            if not slevel and not rlevel:
2621                                                log.info('  ! shift/reduce conflict for %s resolved as reduce', a)
2622                                                self.sr_conflicts.append((st, a, 'reduce'))
2623
2624                                    else:
2625                                        raise LALRError('Unknown conflict in state %d' % st)
2626                                else:
2627                                    st_action[a] = j
2628                                    st_actionp[a] = p
2629
2630            # Print the actions associated with each terminal
2631            _actprint = {}
2632            for a, p, m in actlist:
2633                if a in st_action:
2634                    if p is st_actionp[a]:
2635                        log.info('    %-15s %s', a, m)
2636                        _actprint[(a, m)] = 1
2637            log.info('')
2638            # Print the actions that were not used. (debugging)
2639            not_used = 0
2640            for a, p, m in actlist:
2641                if a in st_action:
2642                    if p is not st_actionp[a]:
2643                        if not (a, m) in _actprint:
2644                            log.debug('  ! %-15s [ %s ]', a, m)
2645                            not_used = 1
2646                            _actprint[(a, m)] = 1
2647            if not_used:
2648                log.debug('')
2649
2650            # Construct the goto table for this state
2651
2652            nkeys = {}
2653            for ii in I:
2654                for s in ii.usyms:
2655                    if s in self.grammar.Nonterminals:
2656                        nkeys[s] = None
2657            for n in nkeys:
2658                g = self.lr0_goto(I, n)
2659                j = self.lr0_cidhash.get(id(g), -1)
2660                if j >= 0:
2661                    st_goto[n] = j
2662                    log.info('    %-30s shift and go to state %d', n, j)
2663
2664            action[st] = st_action
2665            actionp[st] = st_actionp
2666            goto[st] = st_goto
2667            st += 1
2668
2669    # -----------------------------------------------------------------------------
2670    # write()
2671    #
2672    # This function writes the LR parsing tables to a file
2673    # -----------------------------------------------------------------------------
2674
2675    def write_table(self, tabmodule, outputdir='', signature=''):
2676        if isinstance(tabmodule, types.ModuleType):
2677            raise IOError("Won't overwrite existing tabmodule")
2678
2679        basemodulename = tabmodule.split('.')[-1]
2680        filename = os.path.join(outputdir, basemodulename) + '.py'
2681        try:
2682            f = open(filename, 'w')
2683
2684            f.write('''
2685# %s
2686# This file is automatically generated. Do not edit.
2687_tabversion = %r
2688
2689_lr_method = %r
2690
2691_lr_signature = %r
2692    ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature))
2693
2694            # Change smaller to 0 to go back to original tables
2695            smaller = 1
2696
2697            # Factor out names to try and make smaller
2698            if smaller:
2699                items = {}
2700
2701                for s, nd in self.lr_action.items():
2702                    for name, v in nd.items():
2703                        i = items.get(name)
2704                        if not i:
2705                            i = ([], [])
2706                            items[name] = i
2707                        i[0].append(s)
2708                        i[1].append(v)
2709
2710                f.write('\n_lr_action_items = {')
2711                for k, v in items.items():
2712                    f.write('%r:([' % k)
2713                    for i in v[0]:
2714                        f.write('%r,' % i)
2715                    f.write('],[')
2716                    for i in v[1]:
2717                        f.write('%r,' % i)
2718
2719                    f.write(']),')
2720                f.write('}\n')
2721
2722                f.write('''
2723_lr_action = {}
2724for _k, _v in _lr_action_items.items():
2725   for _x,_y in zip(_v[0],_v[1]):
2726      if not _x in _lr_action:  _lr_action[_x] = {}
2727      _lr_action[_x][_k] = _y
2728del _lr_action_items
2729''')
2730
2731            else:
2732                f.write('\n_lr_action = { ')
2733                for k, v in self.lr_action.items():
2734                    f.write('(%r,%r):%r,' % (k[0], k[1], v))
2735                f.write('}\n')
2736
2737            if smaller:
2738                # Factor out names to try and make smaller
2739                items = {}
2740
2741                for s, nd in self.lr_goto.items():
2742                    for name, v in nd.items():
2743                        i = items.get(name)
2744                        if not i:
2745                            i = ([], [])
2746                            items[name] = i
2747                        i[0].append(s)
2748                        i[1].append(v)
2749
2750                f.write('\n_lr_goto_items = {')
2751                for k, v in items.items():
2752                    f.write('%r:([' % k)
2753                    for i in v[0]:
2754                        f.write('%r,' % i)
2755                    f.write('],[')
2756                    for i in v[1]:
2757                        f.write('%r,' % i)
2758
2759                    f.write(']),')
2760                f.write('}\n')
2761
2762                f.write('''
2763_lr_goto = {}
2764for _k, _v in _lr_goto_items.items():
2765   for _x, _y in zip(_v[0], _v[1]):
2766       if not _x in _lr_goto: _lr_goto[_x] = {}
2767       _lr_goto[_x][_k] = _y
2768del _lr_goto_items
2769''')
2770            else:
2771                f.write('\n_lr_goto = { ')
2772                for k, v in self.lr_goto.items():
2773                    f.write('(%r,%r):%r,' % (k[0], k[1], v))
2774                f.write('}\n')
2775
2776            # Write production table
2777            f.write('_lr_productions = [\n')
2778            for p in self.lr_productions:
2779                if p.func:
2780                    f.write('  (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len,
2781                                                          p.func, os.path.basename(p.file), p.line))
2782                else:
2783                    f.write('  (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len))
2784            f.write(']\n')
2785            f.close()
2786
2787        except IOError as e:
2788            raise
2789
2790
2791    # -----------------------------------------------------------------------------
2792    # pickle_table()
2793    #
2794    # This function pickles the LR parsing tables to a supplied file object
2795    # -----------------------------------------------------------------------------
2796
2797    def pickle_table(self, filename, signature=''):
2798        try:
2799            import cPickle as pickle
2800        except ImportError:
2801            import pickle
2802        with open(filename, 'wb') as outf:
2803            pickle.dump(__tabversion__, outf, pickle_protocol)
2804            pickle.dump(self.lr_method, outf, pickle_protocol)
2805            pickle.dump(signature, outf, pickle_protocol)
2806            pickle.dump(self.lr_action, outf, pickle_protocol)
2807            pickle.dump(self.lr_goto, outf, pickle_protocol)
2808
2809            outp = []
2810            for p in self.lr_productions:
2811                if p.func:
2812                    outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line))
2813                else:
2814                    outp.append((str(p), p.name, p.len, None, None, None))
2815            pickle.dump(outp, outf, pickle_protocol)
2816
2817# -----------------------------------------------------------------------------
2818#                            === INTROSPECTION ===
2819#
2820# The following functions and classes are used to implement the PLY
2821# introspection features followed by the yacc() function itself.
2822# -----------------------------------------------------------------------------
2823
2824# -----------------------------------------------------------------------------
2825# get_caller_module_dict()
2826#
2827# This function returns a dictionary containing all of the symbols defined within
2828# a caller further down the call stack.  This is used to get the environment
2829# associated with the yacc() call if none was provided.
2830# -----------------------------------------------------------------------------
2831
2832def get_caller_module_dict(levels):
2833    f = sys._getframe(levels)
2834    ldict = f.f_globals.copy()
2835    if f.f_globals != f.f_locals:
2836        ldict.update(f.f_locals)
2837    return ldict
2838
2839# -----------------------------------------------------------------------------
2840# parse_grammar()
2841#
2842# This takes a raw grammar rule string and parses it into production data
2843# -----------------------------------------------------------------------------
2844def parse_grammar(doc, file, line):
2845    grammar = []
2846    # Split the doc string into lines
2847    pstrings = doc.splitlines()
2848    lastp = None
2849    dline = line
2850    for ps in pstrings:
2851        dline += 1
2852        p = ps.split()
2853        if not p:
2854            continue
2855        try:
2856            if p[0] == '|':
2857                # This is a continuation of a previous rule
2858                if not lastp:
2859                    raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))
2860                prodname = lastp
2861                syms = p[1:]
2862            else:
2863                prodname = p[0]
2864                lastp = prodname
2865                syms   = p[2:]
2866                assign = p[1]
2867                if assign != ':' and assign != '::=':
2868                    raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))
2869
2870            grammar.append((file, dline, prodname, syms))
2871        except SyntaxError:
2872            raise
2873        except Exception:
2874            raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip()))
2875
2876    return grammar
2877
2878# -----------------------------------------------------------------------------
2879# ParserReflect()
2880#
2881# This class represents information extracted for building a parser including
2882# start symbol, error function, tokens, precedence list, action functions,
2883# etc.
2884# -----------------------------------------------------------------------------
2885class ParserReflect(object):
2886    def __init__(self, pdict, log=None):
2887        self.pdict      = pdict
2888        self.start      = None
2889        self.error_func = None
2890        self.tokens     = None
2891        self.modules    = set()
2892        self.grammar    = []
2893        self.error      = False
2894
2895        if log is None:
2896            self.log = PlyLogger(sys.stderr)
2897        else:
2898            self.log = log
2899
2900    # Get all of the basic information
2901    def get_all(self):
2902        self.get_start()
2903        self.get_error_func()
2904        self.get_tokens()
2905        self.get_precedence()
2906        self.get_pfunctions()
2907
2908    # Validate all of the information
2909    def validate_all(self):
2910        self.validate_start()
2911        self.validate_error_func()
2912        self.validate_tokens()
2913        self.validate_precedence()
2914        self.validate_pfunctions()
2915        self.validate_modules()
2916        return self.error
2917
2918    # Compute a signature over the grammar
2919    def signature(self):
2920        try:
2921            from hashlib import md5
2922        except ImportError:
2923            from md5 import md5
2924        try:
2925            sig = md5()
2926            if self.start:
2927                sig.update(self.start.encode('latin-1'))
2928            if self.prec:
2929                sig.update(''.join([''.join(p) for p in self.prec]).encode('latin-1'))
2930            if self.tokens:
2931                sig.update(' '.join(self.tokens).encode('latin-1'))
2932            for f in self.pfuncs:
2933                if f[3]:
2934                    sig.update(f[3].encode('latin-1'))
2935        except (TypeError, ValueError):
2936            pass
2937
2938        digest = base64.b16encode(sig.digest())
2939        if sys.version_info[0] >= 3:
2940            digest = digest.decode('latin-1')
2941        return digest
2942
2943    # -----------------------------------------------------------------------------
2944    # validate_modules()
2945    #
2946    # This method checks to see if there are duplicated p_rulename() functions
2947    # in the parser module file.  Without this function, it is really easy for
2948    # users to make mistakes by cutting and pasting code fragments (and it's a real
2949    # bugger to try and figure out why the resulting parser doesn't work).  Therefore,
2950    # we just do a little regular expression pattern matching of def statements
2951    # to try and detect duplicates.
2952    # -----------------------------------------------------------------------------
2953
2954    def validate_modules(self):
2955        # Match def p_funcname(
2956        fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2957
2958        for module in self.modules:
2959            lines, linen = inspect.getsourcelines(module)
2960
2961            counthash = {}
2962            for linen, line in enumerate(lines):
2963                linen += 1
2964                m = fre.match(line)
2965                if m:
2966                    name = m.group(1)
2967                    prev = counthash.get(name)
2968                    if not prev:
2969                        counthash[name] = linen
2970                    else:
2971                        filename = inspect.getsourcefile(module)
2972                        self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
2973                                         filename, linen, name, prev)
2974
2975    # Get the start symbol
2976    def get_start(self):
2977        self.start = self.pdict.get('start')
2978
2979    # Validate the start symbol
2980    def validate_start(self):
2981        if self.start is not None:
2982            if not isinstance(self.start, string_types):
2983                self.log.error("'start' must be a string")
2984
2985    # Look for error handler
2986    def get_error_func(self):
2987        self.error_func = self.pdict.get('p_error')
2988
2989    # Validate the error function
2990    def validate_error_func(self):
2991        if self.error_func:
2992            if isinstance(self.error_func, types.FunctionType):
2993                ismethod = 0
2994            elif isinstance(self.error_func, types.MethodType):
2995                ismethod = 1
2996            else:
2997                self.log.error("'p_error' defined, but is not a function or method")
2998                self.error = True
2999                return
3000
3001            eline = self.error_func.__code__.co_firstlineno
3002            efile = self.error_func.__code__.co_filename
3003            module = inspect.getmodule(self.error_func)
3004            self.modules.add(module)
3005
3006            argcount = self.error_func.__code__.co_argcount - ismethod
3007            if argcount != 1:
3008                self.log.error('%s:%d: p_error() requires 1 argument', efile, eline)
3009                self.error = True
3010
3011    # Get the tokens map
3012    def get_tokens(self):
3013        tokens = self.pdict.get('tokens')
3014        if not tokens:
3015            self.log.error('No token list is defined')
3016            self.error = True
3017            return
3018
3019        if not isinstance(tokens, (list, tuple)):
3020            self.log.error('tokens must be a list or tuple')
3021            self.error = True
3022            return
3023
3024        if not tokens:
3025            self.log.error('tokens is empty')
3026            self.error = True
3027            return
3028
3029        self.tokens = tokens
3030
3031    # Validate the tokens
3032    def validate_tokens(self):
3033        # Validate the tokens.
3034        if 'error' in self.tokens:
3035            self.log.error("Illegal token name 'error'. Is a reserved word")
3036            self.error = True
3037            return
3038
3039        terminals = set()
3040        for n in self.tokens:
3041            if n in terminals:
3042                self.log.warning('Token %r multiply defined', n)
3043            terminals.add(n)
3044
3045    # Get the precedence map (if any)
3046    def get_precedence(self):
3047        self.prec = self.pdict.get('precedence')
3048
3049    # Validate and parse the precedence map
3050    def validate_precedence(self):
3051        preclist = []
3052        if self.prec:
3053            if not isinstance(self.prec, (list, tuple)):
3054                self.log.error('precedence must be a list or tuple')
3055                self.error = True
3056                return
3057            for level, p in enumerate(self.prec):
3058                if not isinstance(p, (list, tuple)):
3059                    self.log.error('Bad precedence table')
3060                    self.error = True
3061                    return
3062
3063                if len(p) < 2:
3064                    self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p)
3065                    self.error = True
3066                    return
3067                assoc = p[0]
3068                if not isinstance(assoc, string_types):
3069                    self.log.error('precedence associativity must be a string')
3070                    self.error = True
3071                    return
3072                for term in p[1:]:
3073                    if not isinstance(term, string_types):
3074                        self.log.error('precedence items must be strings')
3075                        self.error = True
3076                        return
3077                    preclist.append((term, assoc, level+1))
3078        self.preclist = preclist
3079
3080    # Get all p_functions from the grammar
3081    def get_pfunctions(self):
3082        p_functions = []
3083        for name, item in self.pdict.items():
3084            if not name.startswith('p_') or name == 'p_error':
3085                continue
3086            if isinstance(item, (types.FunctionType, types.MethodType)):
3087                line = item.__code__.co_firstlineno
3088                module = inspect.getmodule(item)
3089                p_functions.append((line, module, name, item.__doc__))
3090
3091        # Sort all of the actions by line number; make sure to stringify
3092        # modules to make them sortable, since `line` may not uniquely sort all
3093        # p functions
3094        p_functions.sort(key=lambda p_function: (
3095            p_function[0],
3096            str(p_function[1]),
3097            p_function[2],
3098            p_function[3]))
3099        self.pfuncs = p_functions
3100
3101    # Validate all of the p_functions
3102    def validate_pfunctions(self):
3103        grammar = []
3104        # Check for non-empty symbols
3105        if len(self.pfuncs) == 0:
3106            self.log.error('no rules of the form p_rulename are defined')
3107            self.error = True
3108            return
3109
3110        for line, module, name, doc in self.pfuncs:
3111            file = inspect.getsourcefile(module)
3112            func = self.pdict[name]
3113            if isinstance(func, types.MethodType):
3114                reqargs = 2
3115            else:
3116                reqargs = 1
3117            if func.__code__.co_argcount > reqargs:
3118                self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__)
3119                self.error = True
3120            elif func.__code__.co_argcount < reqargs:
3121                self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)
3122                self.error = True
3123            elif not func.__doc__:
3124                self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
3125                                 file, line, func.__name__)
3126            else:
3127                try:
3128                    parsed_g = parse_grammar(doc, file, line)
3129                    for g in parsed_g:
3130                        grammar.append((name, g))
3131                except SyntaxError as e:
3132                    self.log.error(str(e))
3133                    self.error = True
3134
3135                # Looks like a valid grammar rule
3136                # Mark the file in which defined.
3137                self.modules.add(module)
3138
3139        # Secondary validation step that looks for p_ definitions that are not functions
3140        # or functions that look like they might be grammar rules.
3141
3142        for n, v in self.pdict.items():
3143            if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)):
3144                continue
3145            if n.startswith('t_'):
3146                continue
3147            if n.startswith('p_') and n != 'p_error':
3148                self.log.warning('%r not defined as a function', n)
3149            if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or
3150                   (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)):
3151                if v.__doc__:
3152                    try:
3153                        doc = v.__doc__.split(' ')
3154                        if doc[1] == ':':
3155                            self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',
3156                                             v.__code__.co_filename, v.__code__.co_firstlineno, n)
3157                    except IndexError:
3158                        pass
3159
3160        self.grammar = grammar
3161
3162# -----------------------------------------------------------------------------
3163# yacc(module)
3164#
3165# Build a parser
3166# -----------------------------------------------------------------------------
3167
3168def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
3169         check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file,
3170         outputdir=None, debuglog=None, errorlog=None, picklefile=None):
3171
3172    if tabmodule is None:
3173        tabmodule = tab_module
3174
3175    # Reference to the parsing method of the last built parser
3176    global parse
3177
3178    # If pickling is enabled, table files are not created
3179    if picklefile:
3180        write_tables = 0
3181
3182    if errorlog is None:
3183        errorlog = PlyLogger(sys.stderr)
3184
3185    # Get the module dictionary used for the parser
3186    if module:
3187        _items = [(k, getattr(module, k)) for k in dir(module)]
3188        pdict = dict(_items)
3189        # If no __file__ attribute is available, try to obtain it from the __module__ instead
3190        if '__file__' not in pdict:
3191            pdict['__file__'] = sys.modules[pdict['__module__']].__file__
3192    else:
3193        pdict = get_caller_module_dict(2)
3194
3195    if outputdir is None:
3196        # If no output directory is set, the location of the output files
3197        # is determined according to the following rules:
3198        #     - If tabmodule specifies a package, files go into that package directory
3199        #     - Otherwise, files go in the same directory as the specifying module
3200        if isinstance(tabmodule, types.ModuleType):
3201            srcfile = tabmodule.__file__
3202        else:
3203            if '.' not in tabmodule:
3204                srcfile = pdict['__file__']
3205            else:
3206                parts = tabmodule.split('.')
3207                pkgname = '.'.join(parts[:-1])
3208                exec('import %s' % pkgname)
3209                srcfile = getattr(sys.modules[pkgname], '__file__', '')
3210        outputdir = os.path.dirname(srcfile)
3211
3212    # Determine if the module is package of a package or not.
3213    # If so, fix the tabmodule setting so that tables load correctly
3214    pkg = pdict.get('__package__')
3215    if pkg and isinstance(tabmodule, str):
3216        if '.' not in tabmodule:
3217            tabmodule = pkg + '.' + tabmodule
3218
3219
3220
3221    # Set start symbol if it's specified directly using an argument
3222    if start is not None:
3223        pdict['start'] = start
3224
3225    # Collect parser information from the dictionary
3226    pinfo = ParserReflect(pdict, log=errorlog)
3227    pinfo.get_all()
3228
3229    if pinfo.error:
3230        raise YaccError('Unable to build parser')
3231
3232    # Check signature against table files (if any)
3233    signature = pinfo.signature()
3234
3235    # Read the tables
3236    try:
3237        lr = LRTable()
3238        if picklefile:
3239            read_signature = lr.read_pickle(picklefile)
3240        else:
3241            read_signature = lr.read_table(tabmodule)
3242        if optimize or (read_signature == signature):
3243            try:
3244                lr.bind_callables(pinfo.pdict)
3245                parser = LRParser(lr, pinfo.error_func)
3246                parse = parser.parse
3247                return parser
3248            except Exception as e:
3249                errorlog.warning('There was a problem loading the table file: %r', e)
3250    except VersionError as e:
3251        errorlog.warning(str(e))
3252    except ImportError:
3253        pass
3254
3255    if debuglog is None:
3256        if debug:
3257            try:
3258                debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w'))
3259            except IOError as e:
3260                errorlog.warning("Couldn't open %r. %s" % (debugfile, e))
3261                debuglog = NullLogger()
3262        else:
3263            debuglog = NullLogger()
3264
3265    debuglog.info('Created by PLY version %s (https://www.dabeaz.com/ply/)', __version__)
3266
3267    errors = False
3268
3269    # Validate the parser information
3270    if pinfo.validate_all():
3271        raise YaccError('Unable to build parser')
3272
3273    if not pinfo.error_func:
3274        errorlog.warning('no p_error() function is defined')
3275
3276    # Create a grammar object
3277    grammar = Grammar(pinfo.tokens)
3278
3279    # Set precedence level for terminals
3280    for term, assoc, level in pinfo.preclist:
3281        try:
3282            grammar.set_precedence(term, assoc, level)
3283        except GrammarError as e:
3284            errorlog.warning('%s', e)
3285
3286    # Add productions to the grammar
3287    for funcname, gram in pinfo.grammar:
3288        file, line, prodname, syms = gram
3289        try:
3290            grammar.add_production(prodname, syms, funcname, file, line)
3291        except GrammarError as e:
3292            errorlog.error('%s', e)
3293            errors = True
3294
3295    # Set the grammar start symbols
3296    try:
3297        if start is None:
3298            grammar.set_start(pinfo.start)
3299        else:
3300            grammar.set_start(start)
3301    except GrammarError as e:
3302        errorlog.error(str(e))
3303        errors = True
3304
3305    if errors:
3306        raise YaccError('Unable to build parser')
3307
3308    # Verify the grammar structure
3309    undefined_symbols = grammar.undefined_symbols()
3310    for sym, prod in undefined_symbols:
3311        errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
3312        errors = True
3313
3314    unused_terminals = grammar.unused_terminals()
3315    if unused_terminals:
3316        debuglog.info('')
3317        debuglog.info('Unused terminals:')
3318        debuglog.info('')
3319        for term in unused_terminals:
3320            errorlog.warning('Token %r defined, but not used', term)
3321            debuglog.info('    %s', term)
3322
3323    # Print out all productions to the debug log
3324    if debug:
3325        debuglog.info('')
3326        debuglog.info('Grammar')
3327        debuglog.info('')
3328        for n, p in enumerate(grammar.Productions):
3329            debuglog.info('Rule %-5d %s', n, p)
3330
3331    # Find unused non-terminals
3332    unused_rules = grammar.unused_rules()
3333    for prod in unused_rules:
3334        errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)
3335
3336    if len(unused_terminals) == 1:
3337        errorlog.warning('There is 1 unused token')
3338    if len(unused_terminals) > 1:
3339        errorlog.warning('There are %d unused tokens', len(unused_terminals))
3340
3341    if len(unused_rules) == 1:
3342        errorlog.warning('There is 1 unused rule')
3343    if len(unused_rules) > 1:
3344        errorlog.warning('There are %d unused rules', len(unused_rules))
3345
3346    if debug:
3347        debuglog.info('')
3348        debuglog.info('Terminals, with rules where they appear')
3349        debuglog.info('')
3350        terms = list(grammar.Terminals)
3351        terms.sort()
3352        for term in terms:
3353            debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]]))
3354
3355        debuglog.info('')
3356        debuglog.info('Nonterminals, with rules where they appear')
3357        debuglog.info('')
3358        nonterms = list(grammar.Nonterminals)
3359        nonterms.sort()
3360        for nonterm in nonterms:
3361            debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]]))
3362        debuglog.info('')
3363
3364    if check_recursion:
3365        unreachable = grammar.find_unreachable()
3366        for u in unreachable:
3367            errorlog.warning('Symbol %r is unreachable', u)
3368
3369        infinite = grammar.infinite_cycles()
3370        for inf in infinite:
3371            errorlog.error('Infinite recursion detected for symbol %r', inf)
3372            errors = True
3373
3374    unused_prec = grammar.unused_precedence()
3375    for term, assoc in unused_prec:
3376        errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term)
3377        errors = True
3378
3379    if errors:
3380        raise YaccError('Unable to build parser')
3381
3382    # Run the LRGeneratedTable on the grammar
3383    if debug:
3384        errorlog.debug('Generating %s tables', method)
3385
3386    lr = LRGeneratedTable(grammar, method, debuglog)
3387
3388    if debug:
3389        num_sr = len(lr.sr_conflicts)
3390
3391        # Report shift/reduce and reduce/reduce conflicts
3392        if num_sr == 1:
3393            errorlog.warning('1 shift/reduce conflict')
3394        elif num_sr > 1:
3395            errorlog.warning('%d shift/reduce conflicts', num_sr)
3396
3397        num_rr = len(lr.rr_conflicts)
3398        if num_rr == 1:
3399            errorlog.warning('1 reduce/reduce conflict')
3400        elif num_rr > 1:
3401            errorlog.warning('%d reduce/reduce conflicts', num_rr)
3402
3403    # Write out conflicts to the output file
3404    if debug and (lr.sr_conflicts or lr.rr_conflicts):
3405        debuglog.warning('')
3406        debuglog.warning('Conflicts:')
3407        debuglog.warning('')
3408
3409        for state, tok, resolution in lr.sr_conflicts:
3410            debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s',  tok, state, resolution)
3411
3412        already_reported = set()
3413        for state, rule, rejected in lr.rr_conflicts:
3414            if (state, id(rule), id(rejected)) in already_reported:
3415                continue
3416            debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3417            debuglog.warning('rejected rule (%s) in state %d', rejected, state)
3418            errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3419            errorlog.warning('rejected rule (%s) in state %d', rejected, state)
3420            already_reported.add((state, id(rule), id(rejected)))
3421
3422        warned_never = []
3423        for state, rule, rejected in lr.rr_conflicts:
3424            if not rejected.reduced and (rejected not in warned_never):
3425                debuglog.warning('Rule (%s) is never reduced', rejected)
3426                errorlog.warning('Rule (%s) is never reduced', rejected)
3427                warned_never.append(rejected)
3428
3429    # Write the table file if requested
3430    if write_tables:
3431        try:
3432            lr.write_table(tabmodule, outputdir, signature)
3433        except IOError as e:
3434            errorlog.warning("Couldn't create %r. %s" % (tabmodule, e))
3435
3436    # Write a pickled version of the tables
3437    if picklefile:
3438        try:
3439            lr.pickle_table(picklefile, signature)
3440        except IOError as e:
3441            errorlog.warning("Couldn't create %r. %s" % (picklefile, e))
3442
3443    # Build the parser
3444    lr.bind_callables(pinfo.pdict)
3445    parser = LRParser(lr, pinfo.error_func)
3446
3447    parse = parser.parse
3448    return parser
3449