1# -----------------------------------------------------------------------------
2# ply: yacc.py
3#
4# Copyright (C) 2001-2018
5# David M. Beazley (Dabeaz LLC)
6# All rights reserved.
7#
8# Redistribution and use in source and binary forms, with or without
9# modification, are permitted provided that the following conditions are
10# met:
11#
12# * Redistributions of source code must retain the above copyright notice,
13#   this list of conditions and the following disclaimer.
14# * Redistributions in binary form must reproduce the above copyright notice,
15#   this list of conditions and the following disclaimer in the documentation
16#   and/or other materials provided with the distribution.
17# * Neither the name of the David Beazley or Dabeaz LLC may be used to
18#   endorse or promote products derived from this software without
19#  specific prior written permission.
20#
21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32# -----------------------------------------------------------------------------
33#
34# This implements an LR parser that is constructed from grammar rules defined
35# as Python functions. The grammar is specified by supplying the BNF inside
36# Python documentation strings.  The inspiration for this technique was borrowed
37# from John Aycock's Spark parsing system.  PLY might be viewed as cross between
38# Spark and the GNU bison utility.
39#
40# The current implementation is only somewhat object-oriented. The
41# LR parser itself is defined in terms of an object (which allows multiple
42# parsers to co-exist).  However, most of the variables used during table
43# construction are defined in terms of global variables.  Users shouldn't
44# notice unless they are trying to define multiple parsers at the same
45# time using threads (in which case they should have their head examined).
46#
47# This implementation supports both SLR and LALR(1) parsing.  LALR(1)
48# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
49# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
50# Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced
51# by the more efficient DeRemer and Pennello algorithm.
52#
53# :::::::: WARNING :::::::
54#
55# Construction of LR parsing tables is fairly complicated and expensive.
56# To make this module run fast, a *LOT* of work has been put into
57# optimization---often at the expensive of readability and what might
58# consider to be good Python "coding style."   Modify the code at your
59# own risk!
60# ----------------------------------------------------------------------------
61
62import re
63import types
64import sys
65import os.path
66import inspect
67import warnings
68
69__version__    = '3.11'
70__tabversion__ = '3.10'
71
72#-----------------------------------------------------------------------------
73#                     === User configurable parameters ===
74#
75# Change these to modify the default behavior of yacc (if you wish)
76#-----------------------------------------------------------------------------
77
78yaccdebug   = True             # Debugging mode.  If set, yacc generates a
79                               # a 'parser.out' file in the current directory
80
81debug_file  = 'parser.out'     # Default name of the debugging file
82tab_module  = 'parsetab'       # Default name of the table module
83default_lr  = 'LALR'           # Default LR table generation method
84
85error_count = 3                # Number of symbols that must be shifted to leave recovery mode
86
87yaccdevel   = False            # Set to True if developing yacc.  This turns off optimized
88                               # implementations of certain functions.
89
90resultlimit = 40               # Size limit of results when running in debug mode.
91
92pickle_protocol = 0            # Protocol to use when writing pickle files
93
94# String type-checking compatibility
95if sys.version_info[0] < 3:
96    string_types = basestring
97else:
98    string_types = str
99
100MAXINT = sys.maxsize
101
102# This object is a stand-in for a logging object created by the
103# logging module.   PLY will use this by default to create things
104# such as the parser.out file.  If a user wants more detailed
105# information, they can create their own logging object and pass
106# it into PLY.
107
108class PlyLogger(object):
109    def __init__(self, f):
110        self.f = f
111
112    def debug(self, msg, *args, **kwargs):
113        self.f.write((msg % args) + '\n')
114
115    info = debug
116
117    def warning(self, msg, *args, **kwargs):
118        self.f.write('WARNING: ' + (msg % args) + '\n')
119
120    def error(self, msg, *args, **kwargs):
121        self.f.write('ERROR: ' + (msg % args) + '\n')
122
123    critical = debug
124
125# Null logger is used when no output is generated. Does nothing.
126class NullLogger(object):
127    def __getattribute__(self, name):
128        return self
129
130    def __call__(self, *args, **kwargs):
131        return self
132
133# Exception raised for yacc-related errors
134class YaccError(Exception):
135    pass
136
137# Format the result message that the parser produces when running in debug mode.
138def format_result(r):
139    repr_str = repr(r)
140    if '\n' in repr_str:
141        repr_str = repr(repr_str)
142    if len(repr_str) > resultlimit:
143        repr_str = repr_str[:resultlimit] + ' ...'
144    result = '<%s @ 0x%x> (%s)' % (type(r).__name__, id(r), repr_str)
145    return result
146
147# Format stack entries when the parser is running in debug mode
148def format_stack_entry(r):
149    repr_str = repr(r)
150    if '\n' in repr_str:
151        repr_str = repr(repr_str)
152    if len(repr_str) < 16:
153        return repr_str
154    else:
155        return '<%s @ 0x%x>' % (type(r).__name__, id(r))
156
157# Panic mode error recovery support.   This feature is being reworked--much of the
158# code here is to offer a deprecation/backwards compatible transition
159
160_errok = None
161_token = None
162_restart = None
163_warnmsg = '''PLY: Don't use global functions errok(), token(), and restart() in p_error().
164Instead, invoke the methods on the associated parser instance:
165
166    def p_error(p):
167        ...
168        # Use parser.errok(), parser.token(), parser.restart()
169        ...
170
171    parser = yacc.yacc()
172'''
173
174def errok():
175    warnings.warn(_warnmsg)
176    return _errok()
177
178def restart():
179    warnings.warn(_warnmsg)
180    return _restart()
181
182def token():
183    warnings.warn(_warnmsg)
184    return _token()
185
186# Utility function to call the p_error() function with some deprecation hacks
187def call_errorfunc(errorfunc, token, parser):
188    global _errok, _token, _restart
189    _errok = parser.errok
190    _token = parser.token
191    _restart = parser.restart
192    r = errorfunc(token)
193    try:
194        del _errok, _token, _restart
195    except NameError:
196        pass
197    return r
198
199#-----------------------------------------------------------------------------
200#                        ===  LR Parsing Engine ===
201#
202# The following classes are used for the LR parser itself.  These are not
203# used during table construction and are independent of the actual LR
204# table generation algorithm
205#-----------------------------------------------------------------------------
206
207# This class is used to hold non-terminal grammar symbols during parsing.
208# It normally has the following attributes set:
209#        .type       = Grammar symbol type
210#        .value      = Symbol value
211#        .lineno     = Starting line number
212#        .endlineno  = Ending line number (optional, set automatically)
213#        .lexpos     = Starting lex position
214#        .endlexpos  = Ending lex position (optional, set automatically)
215
216class YaccSymbol:
217    def __str__(self):
218        return self.type
219
220    def __repr__(self):
221        return str(self)
222
223# This class is a wrapper around the objects actually passed to each
224# grammar rule.   Index lookup and assignment actually assign the
225# .value attribute of the underlying YaccSymbol object.
226# The lineno() method returns the line number of a given
227# item (or 0 if not defined).   The linespan() method returns
228# a tuple of (startline,endline) representing the range of lines
229# for a symbol.  The lexspan() method returns a tuple (lexpos,endlexpos)
230# representing the range of positional information for a symbol.
231
232class YaccProduction:
233    def __init__(self, s, stack=None):
234        self.slice = s
235        self.stack = stack
236        self.lexer = None
237        self.parser = None
238
239    def __getitem__(self, n):
240        if isinstance(n, slice):
241            return [s.value for s in self.slice[n]]
242        elif n >= 0:
243            return self.slice[n].value
244        else:
245            return self.stack[n].value
246
247    def __setitem__(self, n, v):
248        self.slice[n].value = v
249
250    def __getslice__(self, i, j):
251        return [s.value for s in self.slice[i:j]]
252
253    def __len__(self):
254        return len(self.slice)
255
256    def lineno(self, n):
257        return getattr(self.slice[n], 'lineno', 0)
258
259    def set_lineno(self, n, lineno):
260        self.slice[n].lineno = lineno
261
262    def linespan(self, n):
263        startline = getattr(self.slice[n], 'lineno', 0)
264        endline = getattr(self.slice[n], 'endlineno', startline)
265        return startline, endline
266
267    def lexpos(self, n):
268        return getattr(self.slice[n], 'lexpos', 0)
269
270    def set_lexpos(self, n, lexpos):
271        self.slice[n].lexpos = lexpos
272
273    def lexspan(self, n):
274        startpos = getattr(self.slice[n], 'lexpos', 0)
275        endpos = getattr(self.slice[n], 'endlexpos', startpos)
276        return startpos, endpos
277
278    def error(self):
279        raise SyntaxError
280
281# -----------------------------------------------------------------------------
282#                               == LRParser ==
283#
284# The LR Parsing engine.
285# -----------------------------------------------------------------------------
286
287class LRParser:
288    def __init__(self, lrtab, errorf):
289        self.productions = lrtab.lr_productions
290        self.action = lrtab.lr_action
291        self.goto = lrtab.lr_goto
292        self.errorfunc = errorf
293        self.set_defaulted_states()
294        self.errorok = True
295
296    def errok(self):
297        self.errorok = True
298
299    def restart(self):
300        del self.statestack[:]
301        del self.symstack[:]
302        sym = YaccSymbol()
303        sym.type = '$end'
304        self.symstack.append(sym)
305        self.statestack.append(0)
306
307    # Defaulted state support.
308    # This method identifies parser states where there is only one possible reduction action.
309    # For such states, the parser can make a choose to make a rule reduction without consuming
310    # the next look-ahead token.  This delayed invocation of the tokenizer can be useful in
311    # certain kinds of advanced parsing situations where the lexer and parser interact with
312    # each other or change states (i.e., manipulation of scope, lexer states, etc.).
313    #
314    # See:  http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
315    def set_defaulted_states(self):
316        self.defaulted_states = {}
317        for state, actions in self.action.items():
318            rules = list(actions.values())
319            if len(rules) == 1 and rules[0] < 0:
320                self.defaulted_states[state] = rules[0]
321
322    def disable_defaulted_states(self):
323        self.defaulted_states = {}
324
325    def parse(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
326        if debug or yaccdevel:
327            if isinstance(debug, int):
328                debug = PlyLogger(sys.stderr)
329            return self.parsedebug(input, lexer, debug, tracking, tokenfunc)
330        elif tracking:
331            return self.parseopt(input, lexer, debug, tracking, tokenfunc)
332        else:
333            return self.parseopt_notrack(input, lexer, debug, tracking, tokenfunc)
334
335
336    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
337    # parsedebug().
338    #
339    # This is the debugging enabled version of parse().  All changes made to the
340    # parsing engine should be made here.   Optimized versions of this function
341    # are automatically created by the ply/ygen.py script.  This script cuts out
342    # sections enclosed in markers such as this:
343    #
344    #      #--! DEBUG
345    #      statements
346    #      #--! DEBUG
347    #
348    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
349
350    def parsedebug(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
351        #--! parsedebug-start
352        lookahead = None                         # Current lookahead symbol
353        lookaheadstack = []                      # Stack of lookahead symbols
354        actions = self.action                    # Local reference to action table (to avoid lookup on self.)
355        goto    = self.goto                      # Local reference to goto table (to avoid lookup on self.)
356        prod    = self.productions               # Local reference to production list (to avoid lookup on self.)
357        defaulted_states = self.defaulted_states # Local reference to defaulted states
358        pslice  = YaccProduction(None)           # Production object passed to grammar rules
359        errorcount = 0                           # Used during error recovery
360
361        #--! DEBUG
362        debug.info('PLY: PARSE DEBUG START')
363        #--! DEBUG
364
365        # If no lexer was given, we will try to use the lex module
366        if not lexer:
367            from . import lex
368            lexer = lex.lexer
369
370        # Set up the lexer and parser objects on pslice
371        pslice.lexer = lexer
372        pslice.parser = self
373
374        # If input was supplied, pass to lexer
375        if input is not None:
376            lexer.input(input)
377
378        if tokenfunc is None:
379            # Tokenize function
380            get_token = lexer.token
381        else:
382            get_token = tokenfunc
383
384        # Set the parser() token method (sometimes used in error recovery)
385        self.token = get_token
386
387        # Set up the state and symbol stacks
388
389        statestack = []                # Stack of parsing states
390        self.statestack = statestack
391        symstack   = []                # Stack of grammar symbols
392        self.symstack = symstack
393
394        pslice.stack = symstack         # Put in the production
395        errtoken   = None               # Err token
396
397        # The start state is assumed to be (0,$end)
398
399        statestack.append(0)
400        sym = YaccSymbol()
401        sym.type = '$end'
402        symstack.append(sym)
403        state = 0
404        while True:
405            # Get the next symbol on the input.  If a lookahead symbol
406            # is already set, we just use that. Otherwise, we'll pull
407            # the next token off of the lookaheadstack or from the lexer
408
409            #--! DEBUG
410            debug.debug('')
411            debug.debug('State  : %s', state)
412            #--! DEBUG
413
414            if state not in defaulted_states:
415                if not lookahead:
416                    if not lookaheadstack:
417                        lookahead = get_token()     # Get the next token
418                    else:
419                        lookahead = lookaheadstack.pop()
420                    if not lookahead:
421                        lookahead = YaccSymbol()
422                        lookahead.type = '$end'
423
424                # Check the action table
425                ltype = lookahead.type
426                t = actions[state].get(ltype)
427            else:
428                t = defaulted_states[state]
429                #--! DEBUG
430                debug.debug('Defaulted state %s: Reduce using %d', state, -t)
431                #--! DEBUG
432
433            #--! DEBUG
434            debug.debug('Stack  : %s',
435                        ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
436            #--! DEBUG
437
438            if t is not None:
439                if t > 0:
440                    # shift a symbol on the stack
441                    statestack.append(t)
442                    state = t
443
444                    #--! DEBUG
445                    debug.debug('Action : Shift and goto state %s', t)
446                    #--! DEBUG
447
448                    symstack.append(lookahead)
449                    lookahead = None
450
451                    # Decrease error count on successful shift
452                    if errorcount:
453                        errorcount -= 1
454                    continue
455
456                if t < 0:
457                    # reduce a symbol on the stack, emit a production
458                    p = prod[-t]
459                    pname = p.name
460                    plen  = p.len
461
462                    # Get production function
463                    sym = YaccSymbol()
464                    sym.type = pname       # Production name
465                    sym.value = None
466
467                    #--! DEBUG
468                    if plen:
469                        debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str,
470                                   '['+','.join([format_stack_entry(_v.value) for _v in symstack[-plen:]])+']',
471                                   goto[statestack[-1-plen]][pname])
472                    else:
473                        debug.info('Action : Reduce rule [%s] with %s and goto state %d', p.str, [],
474                                   goto[statestack[-1]][pname])
475
476                    #--! DEBUG
477
478                    if plen:
479                        targ = symstack[-plen-1:]
480                        targ[0] = sym
481
482                        #--! TRACKING
483                        if tracking:
484                            t1 = targ[1]
485                            sym.lineno = t1.lineno
486                            sym.lexpos = t1.lexpos
487                            t1 = targ[-1]
488                            sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
489                            sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
490                        #--! TRACKING
491
492                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
493                        # The code enclosed in this section is duplicated
494                        # below as a performance optimization.  Make sure
495                        # changes get made in both locations.
496
497                        pslice.slice = targ
498
499                        try:
500                            # Call the grammar rule with our special slice object
501                            del symstack[-plen:]
502                            self.state = state
503                            p.callable(pslice)
504                            del statestack[-plen:]
505                            #--! DEBUG
506                            debug.info('Result : %s', format_result(pslice[0]))
507                            #--! DEBUG
508                            symstack.append(sym)
509                            state = goto[statestack[-1]][pname]
510                            statestack.append(state)
511                        except SyntaxError:
512                            # If an error was set. Enter error recovery state
513                            lookaheadstack.append(lookahead)    # Save the current lookahead token
514                            symstack.extend(targ[1:-1])         # Put the production slice back on the stack
515                            statestack.pop()                    # Pop back one state (before the reduce)
516                            state = statestack[-1]
517                            sym.type = 'error'
518                            sym.value = 'error'
519                            lookahead = sym
520                            errorcount = error_count
521                            self.errorok = False
522
523                        continue
524                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
525
526                    else:
527
528                        #--! TRACKING
529                        if tracking:
530                            sym.lineno = lexer.lineno
531                            sym.lexpos = lexer.lexpos
532                        #--! TRACKING
533
534                        targ = [sym]
535
536                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
537                        # The code enclosed in this section is duplicated
538                        # above as a performance optimization.  Make sure
539                        # changes get made in both locations.
540
541                        pslice.slice = targ
542
543                        try:
544                            # Call the grammar rule with our special slice object
545                            self.state = state
546                            p.callable(pslice)
547                            #--! DEBUG
548                            debug.info('Result : %s', format_result(pslice[0]))
549                            #--! DEBUG
550                            symstack.append(sym)
551                            state = goto[statestack[-1]][pname]
552                            statestack.append(state)
553                        except SyntaxError:
554                            # If an error was set. Enter error recovery state
555                            lookaheadstack.append(lookahead)    # Save the current lookahead token
556                            statestack.pop()                    # Pop back one state (before the reduce)
557                            state = statestack[-1]
558                            sym.type = 'error'
559                            sym.value = 'error'
560                            lookahead = sym
561                            errorcount = error_count
562                            self.errorok = False
563
564                        continue
565                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
566
567                if t == 0:
568                    n = symstack[-1]
569                    result = getattr(n, 'value', None)
570                    #--! DEBUG
571                    debug.info('Done   : Returning %s', format_result(result))
572                    debug.info('PLY: PARSE DEBUG END')
573                    #--! DEBUG
574                    return result
575
576            if t is None:
577
578                #--! DEBUG
579                debug.error('Error  : %s',
580                            ('%s . %s' % (' '.join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip())
581                #--! DEBUG
582
583                # We have some kind of parsing error here.  To handle
584                # this, we are going to push the current token onto
585                # the tokenstack and replace it with an 'error' token.
586                # If there are any synchronization rules, they may
587                # catch it.
588                #
589                # In addition to pushing the error token, we call call
590                # the user defined p_error() function if this is the
591                # first syntax error.  This function is only called if
592                # errorcount == 0.
593                if errorcount == 0 or self.errorok:
594                    errorcount = error_count
595                    self.errorok = False
596                    errtoken = lookahead
597                    if errtoken.type == '$end':
598                        errtoken = None               # End of file!
599                    if self.errorfunc:
600                        if errtoken and not hasattr(errtoken, 'lexer'):
601                            errtoken.lexer = lexer
602                        self.state = state
603                        tok = call_errorfunc(self.errorfunc, errtoken, self)
604                        if self.errorok:
605                            # User must have done some kind of panic
606                            # mode recovery on their own.  The
607                            # returned token is the next lookahead
608                            lookahead = tok
609                            errtoken = None
610                            continue
611                    else:
612                        if errtoken:
613                            if hasattr(errtoken, 'lineno'):
614                                lineno = lookahead.lineno
615                            else:
616                                lineno = 0
617                            if lineno:
618                                sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
619                            else:
620                                sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
621                        else:
622                            sys.stderr.write('yacc: Parse error in input. EOF\n')
623                            return
624
625                else:
626                    errorcount = error_count
627
628                # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
629                # entire parse has been rolled back and we're completely hosed.   The token is
630                # discarded and we just keep going.
631
632                if len(statestack) <= 1 and lookahead.type != '$end':
633                    lookahead = None
634                    errtoken = None
635                    state = 0
636                    # Nuke the pushback stack
637                    del lookaheadstack[:]
638                    continue
639
640                # case 2: the statestack has a couple of entries on it, but we're
641                # at the end of the file. nuke the top entry and generate an error token
642
643                # Start nuking entries on the stack
644                if lookahead.type == '$end':
645                    # Whoa. We're really hosed here. Bail out
646                    return
647
648                if lookahead.type != 'error':
649                    sym = symstack[-1]
650                    if sym.type == 'error':
651                        # Hmmm. Error is on top of stack, we'll just nuke input
652                        # symbol and continue
653                        #--! TRACKING
654                        if tracking:
655                            sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
656                            sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
657                        #--! TRACKING
658                        lookahead = None
659                        continue
660
661                    # Create the error symbol for the first time and make it the new lookahead symbol
662                    t = YaccSymbol()
663                    t.type = 'error'
664
665                    if hasattr(lookahead, 'lineno'):
666                        t.lineno = t.endlineno = lookahead.lineno
667                    if hasattr(lookahead, 'lexpos'):
668                        t.lexpos = t.endlexpos = lookahead.lexpos
669                    t.value = lookahead
670                    lookaheadstack.append(lookahead)
671                    lookahead = t
672                else:
673                    sym = symstack.pop()
674                    #--! TRACKING
675                    if tracking:
676                        lookahead.lineno = sym.lineno
677                        lookahead.lexpos = sym.lexpos
678                    #--! TRACKING
679                    statestack.pop()
680                    state = statestack[-1]
681
682                continue
683
684            # Call an error function here
685            raise RuntimeError('yacc: internal parser error!!!\n')
686
687        #--! parsedebug-end
688
689    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
690    # parseopt().
691    #
692    # Optimized version of parse() method.  DO NOT EDIT THIS CODE DIRECTLY!
693    # This code is automatically generated by the ply/ygen.py script. Make
694    # changes to the parsedebug() method instead.
695    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
696
697    def parseopt(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
698        #--! parseopt-start
699        lookahead = None                         # Current lookahead symbol
700        lookaheadstack = []                      # Stack of lookahead symbols
701        actions = self.action                    # Local reference to action table (to avoid lookup on self.)
702        goto    = self.goto                      # Local reference to goto table (to avoid lookup on self.)
703        prod    = self.productions               # Local reference to production list (to avoid lookup on self.)
704        defaulted_states = self.defaulted_states # Local reference to defaulted states
705        pslice  = YaccProduction(None)           # Production object passed to grammar rules
706        errorcount = 0                           # Used during error recovery
707
708
709        # If no lexer was given, we will try to use the lex module
710        if not lexer:
711            from . import lex
712            lexer = lex.lexer
713
714        # Set up the lexer and parser objects on pslice
715        pslice.lexer = lexer
716        pslice.parser = self
717
718        # If input was supplied, pass to lexer
719        if input is not None:
720            lexer.input(input)
721
722        if tokenfunc is None:
723            # Tokenize function
724            get_token = lexer.token
725        else:
726            get_token = tokenfunc
727
728        # Set the parser() token method (sometimes used in error recovery)
729        self.token = get_token
730
731        # Set up the state and symbol stacks
732
733        statestack = []                # Stack of parsing states
734        self.statestack = statestack
735        symstack   = []                # Stack of grammar symbols
736        self.symstack = symstack
737
738        pslice.stack = symstack         # Put in the production
739        errtoken   = None               # Err token
740
741        # The start state is assumed to be (0,$end)
742
743        statestack.append(0)
744        sym = YaccSymbol()
745        sym.type = '$end'
746        symstack.append(sym)
747        state = 0
748        while True:
749            # Get the next symbol on the input.  If a lookahead symbol
750            # is already set, we just use that. Otherwise, we'll pull
751            # the next token off of the lookaheadstack or from the lexer
752
753
754            if state not in defaulted_states:
755                if not lookahead:
756                    if not lookaheadstack:
757                        lookahead = get_token()     # Get the next token
758                    else:
759                        lookahead = lookaheadstack.pop()
760                    if not lookahead:
761                        lookahead = YaccSymbol()
762                        lookahead.type = '$end'
763
764                # Check the action table
765                ltype = lookahead.type
766                t = actions[state].get(ltype)
767            else:
768                t = defaulted_states[state]
769
770
771            if t is not None:
772                if t > 0:
773                    # shift a symbol on the stack
774                    statestack.append(t)
775                    state = t
776
777
778                    symstack.append(lookahead)
779                    lookahead = None
780
781                    # Decrease error count on successful shift
782                    if errorcount:
783                        errorcount -= 1
784                    continue
785
786                if t < 0:
787                    # reduce a symbol on the stack, emit a production
788                    p = prod[-t]
789                    pname = p.name
790                    plen  = p.len
791
792                    # Get production function
793                    sym = YaccSymbol()
794                    sym.type = pname       # Production name
795                    sym.value = None
796
797
798                    if plen:
799                        targ = symstack[-plen-1:]
800                        targ[0] = sym
801
802                        #--! TRACKING
803                        if tracking:
804                            t1 = targ[1]
805                            sym.lineno = t1.lineno
806                            sym.lexpos = t1.lexpos
807                            t1 = targ[-1]
808                            sym.endlineno = getattr(t1, 'endlineno', t1.lineno)
809                            sym.endlexpos = getattr(t1, 'endlexpos', t1.lexpos)
810                        #--! TRACKING
811
812                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
813                        # The code enclosed in this section is duplicated
814                        # below as a performance optimization.  Make sure
815                        # changes get made in both locations.
816
817                        pslice.slice = targ
818
819                        try:
820                            # Call the grammar rule with our special slice object
821                            del symstack[-plen:]
822                            self.state = state
823                            p.callable(pslice)
824                            del statestack[-plen:]
825                            symstack.append(sym)
826                            state = goto[statestack[-1]][pname]
827                            statestack.append(state)
828                        except SyntaxError:
829                            # If an error was set. Enter error recovery state
830                            lookaheadstack.append(lookahead)    # Save the current lookahead token
831                            symstack.extend(targ[1:-1])         # Put the production slice back on the stack
832                            statestack.pop()                    # Pop back one state (before the reduce)
833                            state = statestack[-1]
834                            sym.type = 'error'
835                            sym.value = 'error'
836                            lookahead = sym
837                            errorcount = error_count
838                            self.errorok = False
839
840                        continue
841                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
842
843                    else:
844
845                        #--! TRACKING
846                        if tracking:
847                            sym.lineno = lexer.lineno
848                            sym.lexpos = lexer.lexpos
849                        #--! TRACKING
850
851                        targ = [sym]
852
853                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
854                        # The code enclosed in this section is duplicated
855                        # above as a performance optimization.  Make sure
856                        # changes get made in both locations.
857
858                        pslice.slice = targ
859
860                        try:
861                            # Call the grammar rule with our special slice object
862                            self.state = state
863                            p.callable(pslice)
864                            symstack.append(sym)
865                            state = goto[statestack[-1]][pname]
866                            statestack.append(state)
867                        except SyntaxError:
868                            # If an error was set. Enter error recovery state
869                            lookaheadstack.append(lookahead)    # Save the current lookahead token
870                            statestack.pop()                    # Pop back one state (before the reduce)
871                            state = statestack[-1]
872                            sym.type = 'error'
873                            sym.value = 'error'
874                            lookahead = sym
875                            errorcount = error_count
876                            self.errorok = False
877
878                        continue
879                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
880
881                if t == 0:
882                    n = symstack[-1]
883                    result = getattr(n, 'value', None)
884                    return result
885
886            if t is None:
887
888
889                # We have some kind of parsing error here.  To handle
890                # this, we are going to push the current token onto
891                # the tokenstack and replace it with an 'error' token.
892                # If there are any synchronization rules, they may
893                # catch it.
894                #
895                # In addition to pushing the error token, we call call
896                # the user defined p_error() function if this is the
897                # first syntax error.  This function is only called if
898                # errorcount == 0.
899                if errorcount == 0 or self.errorok:
900                    errorcount = error_count
901                    self.errorok = False
902                    errtoken = lookahead
903                    if errtoken.type == '$end':
904                        errtoken = None               # End of file!
905                    if self.errorfunc:
906                        if errtoken and not hasattr(errtoken, 'lexer'):
907                            errtoken.lexer = lexer
908                        self.state = state
909                        tok = call_errorfunc(self.errorfunc, errtoken, self)
910                        if self.errorok:
911                            # User must have done some kind of panic
912                            # mode recovery on their own.  The
913                            # returned token is the next lookahead
914                            lookahead = tok
915                            errtoken = None
916                            continue
917                    else:
918                        if errtoken:
919                            if hasattr(errtoken, 'lineno'):
920                                lineno = lookahead.lineno
921                            else:
922                                lineno = 0
923                            if lineno:
924                                sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
925                            else:
926                                sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
927                        else:
928                            sys.stderr.write('yacc: Parse error in input. EOF\n')
929                            return
930
931                else:
932                    errorcount = error_count
933
934                # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
935                # entire parse has been rolled back and we're completely hosed.   The token is
936                # discarded and we just keep going.
937
938                if len(statestack) <= 1 and lookahead.type != '$end':
939                    lookahead = None
940                    errtoken = None
941                    state = 0
942                    # Nuke the pushback stack
943                    del lookaheadstack[:]
944                    continue
945
946                # case 2: the statestack has a couple of entries on it, but we're
947                # at the end of the file. nuke the top entry and generate an error token
948
949                # Start nuking entries on the stack
950                if lookahead.type == '$end':
951                    # Whoa. We're really hosed here. Bail out
952                    return
953
954                if lookahead.type != 'error':
955                    sym = symstack[-1]
956                    if sym.type == 'error':
957                        # Hmmm. Error is on top of stack, we'll just nuke input
958                        # symbol and continue
959                        #--! TRACKING
960                        if tracking:
961                            sym.endlineno = getattr(lookahead, 'lineno', sym.lineno)
962                            sym.endlexpos = getattr(lookahead, 'lexpos', sym.lexpos)
963                        #--! TRACKING
964                        lookahead = None
965                        continue
966
967                    # Create the error symbol for the first time and make it the new lookahead symbol
968                    t = YaccSymbol()
969                    t.type = 'error'
970
971                    if hasattr(lookahead, 'lineno'):
972                        t.lineno = t.endlineno = lookahead.lineno
973                    if hasattr(lookahead, 'lexpos'):
974                        t.lexpos = t.endlexpos = lookahead.lexpos
975                    t.value = lookahead
976                    lookaheadstack.append(lookahead)
977                    lookahead = t
978                else:
979                    sym = symstack.pop()
980                    #--! TRACKING
981                    if tracking:
982                        lookahead.lineno = sym.lineno
983                        lookahead.lexpos = sym.lexpos
984                    #--! TRACKING
985                    statestack.pop()
986                    state = statestack[-1]
987
988                continue
989
990            # Call an error function here
991            raise RuntimeError('yacc: internal parser error!!!\n')
992
993        #--! parseopt-end
994
995    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
996    # parseopt_notrack().
997    #
998    # Optimized version of parseopt() with line number tracking removed.
999    # DO NOT EDIT THIS CODE DIRECTLY. This code is automatically generated
1000    # by the ply/ygen.py script. Make changes to the parsedebug() method instead.
1001    # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1002
1003    def parseopt_notrack(self, input=None, lexer=None, debug=False, tracking=False, tokenfunc=None):
1004        #--! parseopt-notrack-start
1005        lookahead = None                         # Current lookahead symbol
1006        lookaheadstack = []                      # Stack of lookahead symbols
1007        actions = self.action                    # Local reference to action table (to avoid lookup on self.)
1008        goto    = self.goto                      # Local reference to goto table (to avoid lookup on self.)
1009        prod    = self.productions               # Local reference to production list (to avoid lookup on self.)
1010        defaulted_states = self.defaulted_states # Local reference to defaulted states
1011        pslice  = YaccProduction(None)           # Production object passed to grammar rules
1012        errorcount = 0                           # Used during error recovery
1013
1014
1015        # If no lexer was given, we will try to use the lex module
1016        if not lexer:
1017            from . import lex
1018            lexer = lex.lexer
1019
1020        # Set up the lexer and parser objects on pslice
1021        pslice.lexer = lexer
1022        pslice.parser = self
1023
1024        # If input was supplied, pass to lexer
1025        if input is not None:
1026            lexer.input(input)
1027
1028        if tokenfunc is None:
1029            # Tokenize function
1030            get_token = lexer.token
1031        else:
1032            get_token = tokenfunc
1033
1034        # Set the parser() token method (sometimes used in error recovery)
1035        self.token = get_token
1036
1037        # Set up the state and symbol stacks
1038
1039        statestack = []                # Stack of parsing states
1040        self.statestack = statestack
1041        symstack   = []                # Stack of grammar symbols
1042        self.symstack = symstack
1043
1044        pslice.stack = symstack         # Put in the production
1045        errtoken   = None               # Err token
1046
1047        # The start state is assumed to be (0,$end)
1048
1049        statestack.append(0)
1050        sym = YaccSymbol()
1051        sym.type = '$end'
1052        symstack.append(sym)
1053        state = 0
1054        while True:
1055            # Get the next symbol on the input.  If a lookahead symbol
1056            # is already set, we just use that. Otherwise, we'll pull
1057            # the next token off of the lookaheadstack or from the lexer
1058
1059
1060            if state not in defaulted_states:
1061                if not lookahead:
1062                    if not lookaheadstack:
1063                        lookahead = get_token()     # Get the next token
1064                    else:
1065                        lookahead = lookaheadstack.pop()
1066                    if not lookahead:
1067                        lookahead = YaccSymbol()
1068                        lookahead.type = '$end'
1069
1070                # Check the action table
1071                ltype = lookahead.type
1072                t = actions[state].get(ltype)
1073            else:
1074                t = defaulted_states[state]
1075
1076
1077            if t is not None:
1078                if t > 0:
1079                    # shift a symbol on the stack
1080                    statestack.append(t)
1081                    state = t
1082
1083
1084                    symstack.append(lookahead)
1085                    lookahead = None
1086
1087                    # Decrease error count on successful shift
1088                    if errorcount:
1089                        errorcount -= 1
1090                    continue
1091
1092                if t < 0:
1093                    # reduce a symbol on the stack, emit a production
1094                    p = prod[-t]
1095                    pname = p.name
1096                    plen  = p.len
1097
1098                    # Get production function
1099                    sym = YaccSymbol()
1100                    sym.type = pname       # Production name
1101                    sym.value = None
1102
1103
1104                    if plen:
1105                        targ = symstack[-plen-1:]
1106                        targ[0] = sym
1107
1108
1109                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1110                        # The code enclosed in this section is duplicated
1111                        # below as a performance optimization.  Make sure
1112                        # changes get made in both locations.
1113
1114                        pslice.slice = targ
1115
1116                        try:
1117                            # Call the grammar rule with our special slice object
1118                            del symstack[-plen:]
1119                            self.state = state
1120                            p.callable(pslice)
1121                            del statestack[-plen:]
1122                            symstack.append(sym)
1123                            state = goto[statestack[-1]][pname]
1124                            statestack.append(state)
1125                        except SyntaxError:
1126                            # If an error was set. Enter error recovery state
1127                            lookaheadstack.append(lookahead)    # Save the current lookahead token
1128                            symstack.extend(targ[1:-1])         # Put the production slice back on the stack
1129                            statestack.pop()                    # Pop back one state (before the reduce)
1130                            state = statestack[-1]
1131                            sym.type = 'error'
1132                            sym.value = 'error'
1133                            lookahead = sym
1134                            errorcount = error_count
1135                            self.errorok = False
1136
1137                        continue
1138                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1139
1140                    else:
1141
1142
1143                        targ = [sym]
1144
1145                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1146                        # The code enclosed in this section is duplicated
1147                        # above as a performance optimization.  Make sure
1148                        # changes get made in both locations.
1149
1150                        pslice.slice = targ
1151
1152                        try:
1153                            # Call the grammar rule with our special slice object
1154                            self.state = state
1155                            p.callable(pslice)
1156                            symstack.append(sym)
1157                            state = goto[statestack[-1]][pname]
1158                            statestack.append(state)
1159                        except SyntaxError:
1160                            # If an error was set. Enter error recovery state
1161                            lookaheadstack.append(lookahead)    # Save the current lookahead token
1162                            statestack.pop()                    # Pop back one state (before the reduce)
1163                            state = statestack[-1]
1164                            sym.type = 'error'
1165                            sym.value = 'error'
1166                            lookahead = sym
1167                            errorcount = error_count
1168                            self.errorok = False
1169
1170                        continue
1171                        # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1172
1173                if t == 0:
1174                    n = symstack[-1]
1175                    result = getattr(n, 'value', None)
1176                    return result
1177
1178            if t is None:
1179
1180
1181                # We have some kind of parsing error here.  To handle
1182                # this, we are going to push the current token onto
1183                # the tokenstack and replace it with an 'error' token.
1184                # If there are any synchronization rules, they may
1185                # catch it.
1186                #
1187                # In addition to pushing the error token, we call call
1188                # the user defined p_error() function if this is the
1189                # first syntax error.  This function is only called if
1190                # errorcount == 0.
1191                if errorcount == 0 or self.errorok:
1192                    errorcount = error_count
1193                    self.errorok = False
1194                    errtoken = lookahead
1195                    if errtoken.type == '$end':
1196                        errtoken = None               # End of file!
1197                    if self.errorfunc:
1198                        if errtoken and not hasattr(errtoken, 'lexer'):
1199                            errtoken.lexer = lexer
1200                        self.state = state
1201                        tok = call_errorfunc(self.errorfunc, errtoken, self)
1202                        if self.errorok:
1203                            # User must have done some kind of panic
1204                            # mode recovery on their own.  The
1205                            # returned token is the next lookahead
1206                            lookahead = tok
1207                            errtoken = None
1208                            continue
1209                    else:
1210                        if errtoken:
1211                            if hasattr(errtoken, 'lineno'):
1212                                lineno = lookahead.lineno
1213                            else:
1214                                lineno = 0
1215                            if lineno:
1216                                sys.stderr.write('yacc: Syntax error at line %d, token=%s\n' % (lineno, errtoken.type))
1217                            else:
1218                                sys.stderr.write('yacc: Syntax error, token=%s' % errtoken.type)
1219                        else:
1220                            sys.stderr.write('yacc: Parse error in input. EOF\n')
1221                            return
1222
1223                else:
1224                    errorcount = error_count
1225
1226                # case 1:  the statestack only has 1 entry on it.  If we're in this state, the
1227                # entire parse has been rolled back and we're completely hosed.   The token is
1228                # discarded and we just keep going.
1229
1230                if len(statestack) <= 1 and lookahead.type != '$end':
1231                    lookahead = None
1232                    errtoken = None
1233                    state = 0
1234                    # Nuke the pushback stack
1235                    del lookaheadstack[:]
1236                    continue
1237
1238                # case 2: the statestack has a couple of entries on it, but we're
1239                # at the end of the file. nuke the top entry and generate an error token
1240
1241                # Start nuking entries on the stack
1242                if lookahead.type == '$end':
1243                    # Whoa. We're really hosed here. Bail out
1244                    return
1245
1246                if lookahead.type != 'error':
1247                    sym = symstack[-1]
1248                    if sym.type == 'error':
1249                        # Hmmm. Error is on top of stack, we'll just nuke input
1250                        # symbol and continue
1251                        lookahead = None
1252                        continue
1253
1254                    # Create the error symbol for the first time and make it the new lookahead symbol
1255                    t = YaccSymbol()
1256                    t.type = 'error'
1257
1258                    if hasattr(lookahead, 'lineno'):
1259                        t.lineno = t.endlineno = lookahead.lineno
1260                    if hasattr(lookahead, 'lexpos'):
1261                        t.lexpos = t.endlexpos = lookahead.lexpos
1262                    t.value = lookahead
1263                    lookaheadstack.append(lookahead)
1264                    lookahead = t
1265                else:
1266                    sym = symstack.pop()
1267                    statestack.pop()
1268                    state = statestack[-1]
1269
1270                continue
1271
1272            # Call an error function here
1273            raise RuntimeError('yacc: internal parser error!!!\n')
1274
1275        #--! parseopt-notrack-end
1276
1277# -----------------------------------------------------------------------------
1278#                          === Grammar Representation ===
1279#
1280# The following functions, classes, and variables are used to represent and
1281# manipulate the rules that make up a grammar.
1282# -----------------------------------------------------------------------------
1283
1284# regex matching identifiers
1285_is_identifier = re.compile(r'^[a-zA-Z0-9_-]+$')
1286
1287# -----------------------------------------------------------------------------
1288# class Production:
1289#
1290# This class stores the raw information about a single production or grammar rule.
1291# A grammar rule refers to a specification such as this:
1292#
1293#       expr : expr PLUS term
1294#
1295# Here are the basic attributes defined on all productions
1296#
1297#       name     - Name of the production.  For example 'expr'
1298#       prod     - A list of symbols on the right side ['expr','PLUS','term']
1299#       prec     - Production precedence level
1300#       number   - Production number.
1301#       func     - Function that executes on reduce
1302#       file     - File where production function is defined
1303#       lineno   - Line number where production function is defined
1304#
1305# The following attributes are defined or optional.
1306#
1307#       len       - Length of the production (number of symbols on right hand side)
1308#       usyms     - Set of unique symbols found in the production
1309# -----------------------------------------------------------------------------
1310
1311class Production(object):
1312    reduced = 0
1313    def __init__(self, number, name, prod, precedence=('right', 0), func=None, file='', line=0):
1314        self.name     = name
1315        self.prod     = tuple(prod)
1316        self.number   = number
1317        self.func     = func
1318        self.callable = None
1319        self.file     = file
1320        self.line     = line
1321        self.prec     = precedence
1322
1323        # Internal settings used during table construction
1324
1325        self.len  = len(self.prod)   # Length of the production
1326
1327        # Create a list of unique production symbols used in the production
1328        self.usyms = []
1329        for s in self.prod:
1330            if s not in self.usyms:
1331                self.usyms.append(s)
1332
1333        # List of all LR items for the production
1334        self.lr_items = []
1335        self.lr_next = None
1336
1337        # Create a string representation
1338        if self.prod:
1339            self.str = '%s -> %s' % (self.name, ' '.join(self.prod))
1340        else:
1341            self.str = '%s -> <empty>' % self.name
1342
1343    def __str__(self):
1344        return self.str
1345
1346    def __repr__(self):
1347        return 'Production(' + str(self) + ')'
1348
1349    def __len__(self):
1350        return len(self.prod)
1351
1352    def __nonzero__(self):
1353        return 1
1354
1355    def __getitem__(self, index):
1356        return self.prod[index]
1357
1358    # Return the nth lr_item from the production (or None if at the end)
1359    def lr_item(self, n):
1360        if n > len(self.prod):
1361            return None
1362        p = LRItem(self, n)
1363        # Precompute the list of productions immediately following.
1364        try:
1365            p.lr_after = self.Prodnames[p.prod[n+1]]
1366        except (IndexError, KeyError):
1367            p.lr_after = []
1368        try:
1369            p.lr_before = p.prod[n-1]
1370        except IndexError:
1371            p.lr_before = None
1372        return p
1373
1374    # Bind the production function name to a callable
1375    def bind(self, pdict):
1376        if self.func:
1377            self.callable = pdict[self.func]
1378
1379# This class serves as a minimal standin for Production objects when
1380# reading table data from files.   It only contains information
1381# actually used by the LR parsing engine, plus some additional
1382# debugging information.
1383class MiniProduction(object):
1384    def __init__(self, str, name, len, func, file, line):
1385        self.name     = name
1386        self.len      = len
1387        self.func     = func
1388        self.callable = None
1389        self.file     = file
1390        self.line     = line
1391        self.str      = str
1392
1393    def __str__(self):
1394        return self.str
1395
1396    def __repr__(self):
1397        return 'MiniProduction(%s)' % self.str
1398
1399    # Bind the production function name to a callable
1400    def bind(self, pdict):
1401        if self.func:
1402            self.callable = pdict[self.func]
1403
1404
1405# -----------------------------------------------------------------------------
1406# class LRItem
1407#
1408# This class represents a specific stage of parsing a production rule.  For
1409# example:
1410#
1411#       expr : expr . PLUS term
1412#
1413# In the above, the "." represents the current location of the parse.  Here
1414# basic attributes:
1415#
1416#       name       - Name of the production.  For example 'expr'
1417#       prod       - A list of symbols on the right side ['expr','.', 'PLUS','term']
1418#       number     - Production number.
1419#
1420#       lr_next      Next LR item. Example, if we are ' expr -> expr . PLUS term'
1421#                    then lr_next refers to 'expr -> expr PLUS . term'
1422#       lr_index   - LR item index (location of the ".") in the prod list.
1423#       lookaheads - LALR lookahead symbols for this item
1424#       len        - Length of the production (number of symbols on right hand side)
1425#       lr_after    - List of all productions that immediately follow
1426#       lr_before   - Grammar symbol immediately before
1427# -----------------------------------------------------------------------------
1428
1429class LRItem(object):
1430    def __init__(self, p, n):
1431        self.name       = p.name
1432        self.prod       = list(p.prod)
1433        self.number     = p.number
1434        self.lr_index   = n
1435        self.lookaheads = {}
1436        self.prod.insert(n, '.')
1437        self.prod       = tuple(self.prod)
1438        self.len        = len(self.prod)
1439        self.usyms      = p.usyms
1440
1441    def __str__(self):
1442        if self.prod:
1443            s = '%s -> %s' % (self.name, ' '.join(self.prod))
1444        else:
1445            s = '%s -> <empty>' % self.name
1446        return s
1447
1448    def __repr__(self):
1449        return 'LRItem(' + str(self) + ')'
1450
1451# -----------------------------------------------------------------------------
1452# rightmost_terminal()
1453#
1454# Return the rightmost terminal from a list of symbols.  Used in add_production()
1455# -----------------------------------------------------------------------------
1456def rightmost_terminal(symbols, terminals):
1457    i = len(symbols) - 1
1458    while i >= 0:
1459        if symbols[i] in terminals:
1460            return symbols[i]
1461        i -= 1
1462    return None
1463
1464# -----------------------------------------------------------------------------
1465#                           === GRAMMAR CLASS ===
1466#
1467# The following class represents the contents of the specified grammar along
1468# with various computed properties such as first sets, follow sets, LR items, etc.
1469# This data is used for critical parts of the table generation process later.
1470# -----------------------------------------------------------------------------
1471
1472class GrammarError(YaccError):
1473    pass
1474
1475class Grammar(object):
1476    def __init__(self, terminals):
1477        self.Productions  = [None]  # A list of all of the productions.  The first
1478                                    # entry is always reserved for the purpose of
1479                                    # building an augmented grammar
1480
1481        self.Prodnames    = {}      # A dictionary mapping the names of nonterminals to a list of all
1482                                    # productions of that nonterminal.
1483
1484        self.Prodmap      = {}      # A dictionary that is only used to detect duplicate
1485                                    # productions.
1486
1487        self.Terminals    = {}      # A dictionary mapping the names of terminal symbols to a
1488                                    # list of the rules where they are used.
1489
1490        for term in terminals:
1491            self.Terminals[term] = []
1492
1493        self.Terminals['error'] = []
1494
1495        self.Nonterminals = {}      # A dictionary mapping names of nonterminals to a list
1496                                    # of rule numbers where they are used.
1497
1498        self.First        = {}      # A dictionary of precomputed FIRST(x) symbols
1499
1500        self.Follow       = {}      # A dictionary of precomputed FOLLOW(x) symbols
1501
1502        self.Precedence   = {}      # Precedence rules for each terminal. Contains tuples of the
1503                                    # form ('right',level) or ('nonassoc', level) or ('left',level)
1504
1505        self.UsedPrecedence = set() # Precedence rules that were actually used by the grammer.
1506                                    # This is only used to provide error checking and to generate
1507                                    # a warning about unused precedence rules.
1508
1509        self.Start = None           # Starting symbol for the grammar
1510
1511
1512    def __len__(self):
1513        return len(self.Productions)
1514
1515    def __getitem__(self, index):
1516        return self.Productions[index]
1517
1518    # -----------------------------------------------------------------------------
1519    # set_precedence()
1520    #
1521    # Sets the precedence for a given terminal. assoc is the associativity such as
1522    # 'left','right', or 'nonassoc'.  level is a numeric level.
1523    #
1524    # -----------------------------------------------------------------------------
1525
1526    def set_precedence(self, term, assoc, level):
1527        assert self.Productions == [None], 'Must call set_precedence() before add_production()'
1528        if term in self.Precedence:
1529            raise GrammarError('Precedence already specified for terminal %r' % term)
1530        if assoc not in ['left', 'right', 'nonassoc']:
1531            raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1532        self.Precedence[term] = (assoc, level)
1533
1534    # -----------------------------------------------------------------------------
1535    # add_production()
1536    #
1537    # Given an action function, this function assembles a production rule and
1538    # computes its precedence level.
1539    #
1540    # The production rule is supplied as a list of symbols.   For example,
1541    # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1542    # symbols ['expr','PLUS','term'].
1543    #
1544    # Precedence is determined by the precedence of the right-most non-terminal
1545    # or the precedence of a terminal specified by %prec.
1546    #
1547    # A variety of error checks are performed to make sure production symbols
1548    # are valid and that %prec is used correctly.
1549    # -----------------------------------------------------------------------------
1550
1551    def add_production(self, prodname, syms, func=None, file='', line=0):
1552
1553        if prodname in self.Terminals:
1554            raise GrammarError('%s:%d: Illegal rule name %r. Already defined as a token' % (file, line, prodname))
1555        if prodname == 'error':
1556            raise GrammarError('%s:%d: Illegal rule name %r. error is a reserved word' % (file, line, prodname))
1557        if not _is_identifier.match(prodname):
1558            raise GrammarError('%s:%d: Illegal rule name %r' % (file, line, prodname))
1559
1560        # Look for literal tokens
1561        for n, s in enumerate(syms):
1562            if s[0] in "'\"":
1563                try:
1564                    c = eval(s)
1565                    if (len(c) > 1):
1566                        raise GrammarError('%s:%d: Literal token %s in rule %r may only be a single character' %
1567                                           (file, line, s, prodname))
1568                    if c not in self.Terminals:
1569                        self.Terminals[c] = []
1570                    syms[n] = c
1571                    continue
1572                except SyntaxError:
1573                    pass
1574            if not _is_identifier.match(s) and s != '%prec':
1575                raise GrammarError('%s:%d: Illegal name %r in rule %r' % (file, line, s, prodname))
1576
1577        # Determine the precedence level
1578        if '%prec' in syms:
1579            if syms[-1] == '%prec':
1580                raise GrammarError('%s:%d: Syntax error. Nothing follows %%prec' % (file, line))
1581            if syms[-2] != '%prec':
1582                raise GrammarError('%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule' %
1583                                   (file, line))
1584            precname = syms[-1]
1585            prodprec = self.Precedence.get(precname)
1586            if not prodprec:
1587                raise GrammarError('%s:%d: Nothing known about the precedence of %r' % (file, line, precname))
1588            else:
1589                self.UsedPrecedence.add(precname)
1590            del syms[-2:]     # Drop %prec from the rule
1591        else:
1592            # If no %prec, precedence is determined by the rightmost terminal symbol
1593            precname = rightmost_terminal(syms, self.Terminals)
1594            prodprec = self.Precedence.get(precname, ('right', 0))
1595
1596        # See if the rule is already in the rulemap
1597        map = '%s -> %s' % (prodname, syms)
1598        if map in self.Prodmap:
1599            m = self.Prodmap[map]
1600            raise GrammarError('%s:%d: Duplicate rule %s. ' % (file, line, m) +
1601                               'Previous definition at %s:%d' % (m.file, m.line))
1602
1603        # From this point on, everything is valid.  Create a new Production instance
1604        pnumber  = len(self.Productions)
1605        if prodname not in self.Nonterminals:
1606            self.Nonterminals[prodname] = []
1607
1608        # Add the production number to Terminals and Nonterminals
1609        for t in syms:
1610            if t in self.Terminals:
1611                self.Terminals[t].append(pnumber)
1612            else:
1613                if t not in self.Nonterminals:
1614                    self.Nonterminals[t] = []
1615                self.Nonterminals[t].append(pnumber)
1616
1617        # Create a production and add it to the list of productions
1618        p = Production(pnumber, prodname, syms, prodprec, func, file, line)
1619        self.Productions.append(p)
1620        self.Prodmap[map] = p
1621
1622        # Add to the global productions list
1623        try:
1624            self.Prodnames[prodname].append(p)
1625        except KeyError:
1626            self.Prodnames[prodname] = [p]
1627
1628    # -----------------------------------------------------------------------------
1629    # set_start()
1630    #
1631    # Sets the starting symbol and creates the augmented grammar.  Production
1632    # rule 0 is S' -> start where start is the start symbol.
1633    # -----------------------------------------------------------------------------
1634
1635    def set_start(self, start=None):
1636        if not start:
1637            start = self.Productions[1].name
1638        if start not in self.Nonterminals:
1639            raise GrammarError('start symbol %s undefined' % start)
1640        self.Productions[0] = Production(0, "S'", [start])
1641        self.Nonterminals[start].append(0)
1642        self.Start = start
1643
1644    # -----------------------------------------------------------------------------
1645    # find_unreachable()
1646    #
1647    # Find all of the nonterminal symbols that can't be reached from the starting
1648    # symbol.  Returns a list of nonterminals that can't be reached.
1649    # -----------------------------------------------------------------------------
1650
1651    def find_unreachable(self):
1652
1653        # Mark all symbols that are reachable from a symbol s
1654        def mark_reachable_from(s):
1655            if s in reachable:
1656                return
1657            reachable.add(s)
1658            for p in self.Prodnames.get(s, []):
1659                for r in p.prod:
1660                    mark_reachable_from(r)
1661
1662        reachable = set()
1663        mark_reachable_from(self.Productions[0].prod[0])
1664        return [s for s in self.Nonterminals if s not in reachable]
1665
1666    # -----------------------------------------------------------------------------
1667    # infinite_cycles()
1668    #
1669    # This function looks at the various parsing rules and tries to detect
1670    # infinite recursion cycles (grammar rules where there is no possible way
1671    # to derive a string of only terminals).
1672    # -----------------------------------------------------------------------------
1673
1674    def infinite_cycles(self):
1675        terminates = {}
1676
1677        # Terminals:
1678        for t in self.Terminals:
1679            terminates[t] = True
1680
1681        terminates['$end'] = True
1682
1683        # Nonterminals:
1684
1685        # Initialize to false:
1686        for n in self.Nonterminals:
1687            terminates[n] = False
1688
1689        # Then propagate termination until no change:
1690        while True:
1691            some_change = False
1692            for (n, pl) in self.Prodnames.items():
1693                # Nonterminal n terminates iff any of its productions terminates.
1694                for p in pl:
1695                    # Production p terminates iff all of its rhs symbols terminate.
1696                    for s in p.prod:
1697                        if not terminates[s]:
1698                            # The symbol s does not terminate,
1699                            # so production p does not terminate.
1700                            p_terminates = False
1701                            break
1702                    else:
1703                        # didn't break from the loop,
1704                        # so every symbol s terminates
1705                        # so production p terminates.
1706                        p_terminates = True
1707
1708                    if p_terminates:
1709                        # symbol n terminates!
1710                        if not terminates[n]:
1711                            terminates[n] = True
1712                            some_change = True
1713                        # Don't need to consider any more productions for this n.
1714                        break
1715
1716            if not some_change:
1717                break
1718
1719        infinite = []
1720        for (s, term) in terminates.items():
1721            if not term:
1722                if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1723                    # s is used-but-not-defined, and we've already warned of that,
1724                    # so it would be overkill to say that it's also non-terminating.
1725                    pass
1726                else:
1727                    infinite.append(s)
1728
1729        return infinite
1730
1731    # -----------------------------------------------------------------------------
1732    # undefined_symbols()
1733    #
1734    # Find all symbols that were used the grammar, but not defined as tokens or
1735    # grammar rules.  Returns a list of tuples (sym, prod) where sym in the symbol
1736    # and prod is the production where the symbol was used.
1737    # -----------------------------------------------------------------------------
1738    def undefined_symbols(self):
1739        result = []
1740        for p in self.Productions:
1741            if not p:
1742                continue
1743
1744            for s in p.prod:
1745                if s not in self.Prodnames and s not in self.Terminals and s != 'error':
1746                    result.append((s, p))
1747        return result
1748
1749    # -----------------------------------------------------------------------------
1750    # unused_terminals()
1751    #
1752    # Find all terminals that were defined, but not used by the grammar.  Returns
1753    # a list of all symbols.
1754    # -----------------------------------------------------------------------------
1755    def unused_terminals(self):
1756        unused_tok = []
1757        for s, v in self.Terminals.items():
1758            if s != 'error' and not v:
1759                unused_tok.append(s)
1760
1761        return unused_tok
1762
1763    # ------------------------------------------------------------------------------
1764    # unused_rules()
1765    #
1766    # Find all grammar rules that were defined,  but not used (maybe not reachable)
1767    # Returns a list of productions.
1768    # ------------------------------------------------------------------------------
1769
1770    def unused_rules(self):
1771        unused_prod = []
1772        for s, v in self.Nonterminals.items():
1773            if not v:
1774                p = self.Prodnames[s][0]
1775                unused_prod.append(p)
1776        return unused_prod
1777
1778    # -----------------------------------------------------------------------------
1779    # unused_precedence()
1780    #
1781    # Returns a list of tuples (term,precedence) corresponding to precedence
1782    # rules that were never used by the grammar.  term is the name of the terminal
1783    # on which precedence was applied and precedence is a string such as 'left' or
1784    # 'right' corresponding to the type of precedence.
1785    # -----------------------------------------------------------------------------
1786
1787    def unused_precedence(self):
1788        unused = []
1789        for termname in self.Precedence:
1790            if not (termname in self.Terminals or termname in self.UsedPrecedence):
1791                unused.append((termname, self.Precedence[termname][0]))
1792
1793        return unused
1794
1795    # -------------------------------------------------------------------------
1796    # _first()
1797    #
1798    # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1799    #
1800    # During execution of compute_first1, the result may be incomplete.
1801    # Afterward (e.g., when called from compute_follow()), it will be complete.
1802    # -------------------------------------------------------------------------
1803    def _first(self, beta):
1804
1805        # We are computing First(x1,x2,x3,...,xn)
1806        result = []
1807        for x in beta:
1808            x_produces_empty = False
1809
1810            # Add all the non-<empty> symbols of First[x] to the result.
1811            for f in self.First[x]:
1812                if f == '<empty>':
1813                    x_produces_empty = True
1814                else:
1815                    if f not in result:
1816                        result.append(f)
1817
1818            if x_produces_empty:
1819                # We have to consider the next x in beta,
1820                # i.e. stay in the loop.
1821                pass
1822            else:
1823                # We don't have to consider any further symbols in beta.
1824                break
1825        else:
1826            # There was no 'break' from the loop,
1827            # so x_produces_empty was true for all x in beta,
1828            # so beta produces empty as well.
1829            result.append('<empty>')
1830
1831        return result
1832
1833    # -------------------------------------------------------------------------
1834    # compute_first()
1835    #
1836    # Compute the value of FIRST1(X) for all symbols
1837    # -------------------------------------------------------------------------
1838    def compute_first(self):
1839        if self.First:
1840            return self.First
1841
1842        # Terminals:
1843        for t in self.Terminals:
1844            self.First[t] = [t]
1845
1846        self.First['$end'] = ['$end']
1847
1848        # Nonterminals:
1849
1850        # Initialize to the empty set:
1851        for n in self.Nonterminals:
1852            self.First[n] = []
1853
1854        # Then propagate symbols until no change:
1855        while True:
1856            some_change = False
1857            for n in self.Nonterminals:
1858                for p in self.Prodnames[n]:
1859                    for f in self._first(p.prod):
1860                        if f not in self.First[n]:
1861                            self.First[n].append(f)
1862                            some_change = True
1863            if not some_change:
1864                break
1865
1866        return self.First
1867
1868    # ---------------------------------------------------------------------
1869    # compute_follow()
1870    #
1871    # Computes all of the follow sets for every non-terminal symbol.  The
1872    # follow set is the set of all symbols that might follow a given
1873    # non-terminal.  See the Dragon book, 2nd Ed. p. 189.
1874    # ---------------------------------------------------------------------
1875    def compute_follow(self, start=None):
1876        # If already computed, return the result
1877        if self.Follow:
1878            return self.Follow
1879
1880        # If first sets not computed yet, do that first.
1881        if not self.First:
1882            self.compute_first()
1883
1884        # Add '$end' to the follow list of the start symbol
1885        for k in self.Nonterminals:
1886            self.Follow[k] = []
1887
1888        if not start:
1889            start = self.Productions[1].name
1890
1891        self.Follow[start] = ['$end']
1892
1893        while True:
1894            didadd = False
1895            for p in self.Productions[1:]:
1896                # Here is the production set
1897                for i, B in enumerate(p.prod):
1898                    if B in self.Nonterminals:
1899                        # Okay. We got a non-terminal in a production
1900                        fst = self._first(p.prod[i+1:])
1901                        hasempty = False
1902                        for f in fst:
1903                            if f != '<empty>' and f not in self.Follow[B]:
1904                                self.Follow[B].append(f)
1905                                didadd = True
1906                            if f == '<empty>':
1907                                hasempty = True
1908                        if hasempty or i == (len(p.prod)-1):
1909                            # Add elements of follow(a) to follow(b)
1910                            for f in self.Follow[p.name]:
1911                                if f not in self.Follow[B]:
1912                                    self.Follow[B].append(f)
1913                                    didadd = True
1914            if not didadd:
1915                break
1916        return self.Follow
1917
1918
1919    # -----------------------------------------------------------------------------
1920    # build_lritems()
1921    #
1922    # This function walks the list of productions and builds a complete set of the
1923    # LR items.  The LR items are stored in two ways:  First, they are uniquely
1924    # numbered and placed in the list _lritems.  Second, a linked list of LR items
1925    # is built for each production.  For example:
1926    #
1927    #   E -> E PLUS E
1928    #
1929    # Creates the list
1930    #
1931    #  [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1932    # -----------------------------------------------------------------------------
1933
1934    def build_lritems(self):
1935        for p in self.Productions:
1936            lastlri = p
1937            i = 0
1938            lr_items = []
1939            while True:
1940                if i > len(p):
1941                    lri = None
1942                else:
1943                    lri = LRItem(p, i)
1944                    # Precompute the list of productions immediately following
1945                    try:
1946                        lri.lr_after = self.Prodnames[lri.prod[i+1]]
1947                    except (IndexError, KeyError):
1948                        lri.lr_after = []
1949                    try:
1950                        lri.lr_before = lri.prod[i-1]
1951                    except IndexError:
1952                        lri.lr_before = None
1953
1954                lastlri.lr_next = lri
1955                if not lri:
1956                    break
1957                lr_items.append(lri)
1958                lastlri = lri
1959                i += 1
1960            p.lr_items = lr_items
1961
1962# -----------------------------------------------------------------------------
1963#                            == Class LRTable ==
1964#
1965# This basic class represents a basic table of LR parsing information.
1966# Methods for generating the tables are not defined here.  They are defined
1967# in the derived class LRGeneratedTable.
1968# -----------------------------------------------------------------------------
1969
1970class VersionError(YaccError):
1971    pass
1972
1973class LRTable(object):
1974    def __init__(self):
1975        self.lr_action = None
1976        self.lr_goto = None
1977        self.lr_productions = None
1978        self.lr_method = None
1979
1980    def read_table(self, module):
1981        if isinstance(module, types.ModuleType):
1982            parsetab = module
1983        else:
1984            exec('import %s' % module)
1985            parsetab = sys.modules[module]
1986
1987        if parsetab._tabversion != __tabversion__:
1988            raise VersionError('yacc table file version is out of date')
1989
1990        self.lr_action = parsetab._lr_action
1991        self.lr_goto = parsetab._lr_goto
1992
1993        self.lr_productions = []
1994        for p in parsetab._lr_productions:
1995            self.lr_productions.append(MiniProduction(*p))
1996
1997        self.lr_method = parsetab._lr_method
1998        return parsetab._lr_signature
1999
2000    def read_pickle(self, filename):
2001        try:
2002            import cPickle as pickle
2003        except ImportError:
2004            import pickle
2005
2006        if not os.path.exists(filename):
2007          raise ImportError
2008
2009        in_f = open(filename, 'rb')
2010
2011        tabversion = pickle.load(in_f)
2012        if tabversion != __tabversion__:
2013            raise VersionError('yacc table file version is out of date')
2014        self.lr_method = pickle.load(in_f)
2015        signature      = pickle.load(in_f)
2016        self.lr_action = pickle.load(in_f)
2017        self.lr_goto   = pickle.load(in_f)
2018        productions    = pickle.load(in_f)
2019
2020        self.lr_productions = []
2021        for p in productions:
2022            self.lr_productions.append(MiniProduction(*p))
2023
2024        in_f.close()
2025        return signature
2026
2027    # Bind all production function names to callable objects in pdict
2028    def bind_callables(self, pdict):
2029        for p in self.lr_productions:
2030            p.bind(pdict)
2031
2032
2033# -----------------------------------------------------------------------------
2034#                           === LR Generator ===
2035#
2036# The following classes and functions are used to generate LR parsing tables on
2037# a grammar.
2038# -----------------------------------------------------------------------------
2039
2040# -----------------------------------------------------------------------------
2041# digraph()
2042# traverse()
2043#
2044# The following two functions are used to compute set valued functions
2045# of the form:
2046#
2047#     F(x) = F'(x) U U{F(y) | x R y}
2048#
2049# This is used to compute the values of Read() sets as well as FOLLOW sets
2050# in LALR(1) generation.
2051#
2052# Inputs:  X    - An input set
2053#          R    - A relation
2054#          FP   - Set-valued function
2055# ------------------------------------------------------------------------------
2056
2057def digraph(X, R, FP):
2058    N = {}
2059    for x in X:
2060        N[x] = 0
2061    stack = []
2062    F = {}
2063    for x in X:
2064        if N[x] == 0:
2065            traverse(x, N, stack, F, X, R, FP)
2066    return F
2067
2068def traverse(x, N, stack, F, X, R, FP):
2069    stack.append(x)
2070    d = len(stack)
2071    N[x] = d
2072    F[x] = FP(x)             # F(X) <- F'(x)
2073
2074    rel = R(x)               # Get y's related to x
2075    for y in rel:
2076        if N[y] == 0:
2077            traverse(y, N, stack, F, X, R, FP)
2078        N[x] = min(N[x], N[y])
2079        for a in F.get(y, []):
2080            if a not in F[x]:
2081                F[x].append(a)
2082    if N[x] == d:
2083        N[stack[-1]] = MAXINT
2084        F[stack[-1]] = F[x]
2085        element = stack.pop()
2086        while element != x:
2087            N[stack[-1]] = MAXINT
2088            F[stack[-1]] = F[x]
2089            element = stack.pop()
2090
2091class LALRError(YaccError):
2092    pass
2093
2094# -----------------------------------------------------------------------------
2095#                             == LRGeneratedTable ==
2096#
2097# This class implements the LR table generation algorithm.  There are no
2098# public methods except for write()
2099# -----------------------------------------------------------------------------
2100
2101class LRGeneratedTable(LRTable):
2102    def __init__(self, grammar, method='LALR', log=None):
2103        if method not in ['SLR', 'LALR']:
2104            raise LALRError('Unsupported method %s' % method)
2105
2106        self.grammar = grammar
2107        self.lr_method = method
2108
2109        # Set up the logger
2110        if not log:
2111            log = NullLogger()
2112        self.log = log
2113
2114        # Internal attributes
2115        self.lr_action     = {}        # Action table
2116        self.lr_goto       = {}        # Goto table
2117        self.lr_productions  = grammar.Productions    # Copy of grammar Production array
2118        self.lr_goto_cache = {}        # Cache of computed gotos
2119        self.lr0_cidhash   = {}        # Cache of closures
2120
2121        self._add_count    = 0         # Internal counter used to detect cycles
2122
2123        # Diagonistic information filled in by the table generator
2124        self.sr_conflict   = 0
2125        self.rr_conflict   = 0
2126        self.conflicts     = []        # List of conflicts
2127
2128        self.sr_conflicts  = []
2129        self.rr_conflicts  = []
2130
2131        # Build the tables
2132        self.grammar.build_lritems()
2133        self.grammar.compute_first()
2134        self.grammar.compute_follow()
2135        self.lr_parse_table()
2136
2137    # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
2138
2139    def lr0_closure(self, I):
2140        self._add_count += 1
2141
2142        # Add everything in I to J
2143        J = I[:]
2144        didadd = True
2145        while didadd:
2146            didadd = False
2147            for j in J:
2148                for x in j.lr_after:
2149                    if getattr(x, 'lr0_added', 0) == self._add_count:
2150                        continue
2151                    # Add B --> .G to J
2152                    J.append(x.lr_next)
2153                    x.lr0_added = self._add_count
2154                    didadd = True
2155
2156        return J
2157
2158    # Compute the LR(0) goto function goto(I,X) where I is a set
2159    # of LR(0) items and X is a grammar symbol.   This function is written
2160    # in a way that guarantees uniqueness of the generated goto sets
2161    # (i.e. the same goto set will never be returned as two different Python
2162    # objects).  With uniqueness, we can later do fast set comparisons using
2163    # id(obj) instead of element-wise comparison.
2164
2165    def lr0_goto(self, I, x):
2166        # First we look for a previously cached entry
2167        g = self.lr_goto_cache.get((id(I), x))
2168        if g:
2169            return g
2170
2171        # Now we generate the goto set in a way that guarantees uniqueness
2172        # of the result
2173
2174        s = self.lr_goto_cache.get(x)
2175        if not s:
2176            s = {}
2177            self.lr_goto_cache[x] = s
2178
2179        gs = []
2180        for p in I:
2181            n = p.lr_next
2182            if n and n.lr_before == x:
2183                s1 = s.get(id(n))
2184                if not s1:
2185                    s1 = {}
2186                    s[id(n)] = s1
2187                gs.append(n)
2188                s = s1
2189        g = s.get('$end')
2190        if not g:
2191            if gs:
2192                g = self.lr0_closure(gs)
2193                s['$end'] = g
2194            else:
2195                s['$end'] = gs
2196        self.lr_goto_cache[(id(I), x)] = g
2197        return g
2198
2199    # Compute the LR(0) sets of item function
2200    def lr0_items(self):
2201        C = [self.lr0_closure([self.grammar.Productions[0].lr_next])]
2202        i = 0
2203        for I in C:
2204            self.lr0_cidhash[id(I)] = i
2205            i += 1
2206
2207        # Loop over the items in C and each grammar symbols
2208        i = 0
2209        while i < len(C):
2210            I = C[i]
2211            i += 1
2212
2213            # Collect all of the symbols that could possibly be in the goto(I,X) sets
2214            asyms = {}
2215            for ii in I:
2216                for s in ii.usyms:
2217                    asyms[s] = None
2218
2219            for x in asyms:
2220                g = self.lr0_goto(I, x)
2221                if not g or id(g) in self.lr0_cidhash:
2222                    continue
2223                self.lr0_cidhash[id(g)] = len(C)
2224                C.append(g)
2225
2226        return C
2227
2228    # -----------------------------------------------------------------------------
2229    #                       ==== LALR(1) Parsing ====
2230    #
2231    # LALR(1) parsing is almost exactly the same as SLR except that instead of
2232    # relying upon Follow() sets when performing reductions, a more selective
2233    # lookahead set that incorporates the state of the LR(0) machine is utilized.
2234    # Thus, we mainly just have to focus on calculating the lookahead sets.
2235    #
2236    # The method used here is due to DeRemer and Pennelo (1982).
2237    #
2238    # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2239    #     Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2240    #     Vol. 4, No. 4, Oct. 1982, pp. 615-649
2241    #
2242    # Further details can also be found in:
2243    #
2244    #  J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2245    #      McGraw-Hill Book Company, (1985).
2246    #
2247    # -----------------------------------------------------------------------------
2248
2249    # -----------------------------------------------------------------------------
2250    # compute_nullable_nonterminals()
2251    #
2252    # Creates a dictionary containing all of the non-terminals that might produce
2253    # an empty production.
2254    # -----------------------------------------------------------------------------
2255
2256    def compute_nullable_nonterminals(self):
2257        nullable = set()
2258        num_nullable = 0
2259        while True:
2260            for p in self.grammar.Productions[1:]:
2261                if p.len == 0:
2262                    nullable.add(p.name)
2263                    continue
2264                for t in p.prod:
2265                    if t not in nullable:
2266                        break
2267                else:
2268                    nullable.add(p.name)
2269            if len(nullable) == num_nullable:
2270                break
2271            num_nullable = len(nullable)
2272        return nullable
2273
2274    # -----------------------------------------------------------------------------
2275    # find_nonterminal_trans(C)
2276    #
2277    # Given a set of LR(0) items, this functions finds all of the non-terminal
2278    # transitions.    These are transitions in which a dot appears immediately before
2279    # a non-terminal.   Returns a list of tuples of the form (state,N) where state
2280    # is the state number and N is the nonterminal symbol.
2281    #
2282    # The input C is the set of LR(0) items.
2283    # -----------------------------------------------------------------------------
2284
2285    def find_nonterminal_transitions(self, C):
2286        trans = []
2287        for stateno, state in enumerate(C):
2288            for p in state:
2289                if p.lr_index < p.len - 1:
2290                    t = (stateno, p.prod[p.lr_index+1])
2291                    if t[1] in self.grammar.Nonterminals:
2292                        if t not in trans:
2293                            trans.append(t)
2294        return trans
2295
2296    # -----------------------------------------------------------------------------
2297    # dr_relation()
2298    #
2299    # Computes the DR(p,A) relationships for non-terminal transitions.  The input
2300    # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2301    #
2302    # Returns a list of terminals.
2303    # -----------------------------------------------------------------------------
2304
2305    def dr_relation(self, C, trans, nullable):
2306        state, N = trans
2307        terms = []
2308
2309        g = self.lr0_goto(C[state], N)
2310        for p in g:
2311            if p.lr_index < p.len - 1:
2312                a = p.prod[p.lr_index+1]
2313                if a in self.grammar.Terminals:
2314                    if a not in terms:
2315                        terms.append(a)
2316
2317        # This extra bit is to handle the start state
2318        if state == 0 and N == self.grammar.Productions[0].prod[0]:
2319            terms.append('$end')
2320
2321        return terms
2322
2323    # -----------------------------------------------------------------------------
2324    # reads_relation()
2325    #
2326    # Computes the READS() relation (p,A) READS (t,C).
2327    # -----------------------------------------------------------------------------
2328
2329    def reads_relation(self, C, trans, empty):
2330        # Look for empty transitions
2331        rel = []
2332        state, N = trans
2333
2334        g = self.lr0_goto(C[state], N)
2335        j = self.lr0_cidhash.get(id(g), -1)
2336        for p in g:
2337            if p.lr_index < p.len - 1:
2338                a = p.prod[p.lr_index + 1]
2339                if a in empty:
2340                    rel.append((j, a))
2341
2342        return rel
2343
2344    # -----------------------------------------------------------------------------
2345    # compute_lookback_includes()
2346    #
2347    # Determines the lookback and includes relations
2348    #
2349    # LOOKBACK:
2350    #
2351    # This relation is determined by running the LR(0) state machine forward.
2352    # For example, starting with a production "N : . A B C", we run it forward
2353    # to obtain "N : A B C ."   We then build a relationship between this final
2354    # state and the starting state.   These relationships are stored in a dictionary
2355    # lookdict.
2356    #
2357    # INCLUDES:
2358    #
2359    # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2360    #
2361    # This relation is used to determine non-terminal transitions that occur
2362    # inside of other non-terminal transition states.   (p,A) INCLUDES (p', B)
2363    # if the following holds:
2364    #
2365    #       B -> LAT, where T -> epsilon and p' -L-> p
2366    #
2367    # L is essentially a prefix (which may be empty), T is a suffix that must be
2368    # able to derive an empty string.  State p' must lead to state p with the string L.
2369    #
2370    # -----------------------------------------------------------------------------
2371
2372    def compute_lookback_includes(self, C, trans, nullable):
2373        lookdict = {}          # Dictionary of lookback relations
2374        includedict = {}       # Dictionary of include relations
2375
2376        # Make a dictionary of non-terminal transitions
2377        dtrans = {}
2378        for t in trans:
2379            dtrans[t] = 1
2380
2381        # Loop over all transitions and compute lookbacks and includes
2382        for state, N in trans:
2383            lookb = []
2384            includes = []
2385            for p in C[state]:
2386                if p.name != N:
2387                    continue
2388
2389                # Okay, we have a name match.  We now follow the production all the way
2390                # through the state machine until we get the . on the right hand side
2391
2392                lr_index = p.lr_index
2393                j = state
2394                while lr_index < p.len - 1:
2395                    lr_index = lr_index + 1
2396                    t = p.prod[lr_index]
2397
2398                    # Check to see if this symbol and state are a non-terminal transition
2399                    if (j, t) in dtrans:
2400                        # Yes.  Okay, there is some chance that this is an includes relation
2401                        # the only way to know for certain is whether the rest of the
2402                        # production derives empty
2403
2404                        li = lr_index + 1
2405                        while li < p.len:
2406                            if p.prod[li] in self.grammar.Terminals:
2407                                break      # No forget it
2408                            if p.prod[li] not in nullable:
2409                                break
2410                            li = li + 1
2411                        else:
2412                            # Appears to be a relation between (j,t) and (state,N)
2413                            includes.append((j, t))
2414
2415                    g = self.lr0_goto(C[j], t)               # Go to next set
2416                    j = self.lr0_cidhash.get(id(g), -1)      # Go to next state
2417
2418                # When we get here, j is the final state, now we have to locate the production
2419                for r in C[j]:
2420                    if r.name != p.name:
2421                        continue
2422                    if r.len != p.len:
2423                        continue
2424                    i = 0
2425                    # This look is comparing a production ". A B C" with "A B C ."
2426                    while i < r.lr_index:
2427                        if r.prod[i] != p.prod[i+1]:
2428                            break
2429                        i = i + 1
2430                    else:
2431                        lookb.append((j, r))
2432            for i in includes:
2433                if i not in includedict:
2434                    includedict[i] = []
2435                includedict[i].append((state, N))
2436            lookdict[(state, N)] = lookb
2437
2438        return lookdict, includedict
2439
2440    # -----------------------------------------------------------------------------
2441    # compute_read_sets()
2442    #
2443    # Given a set of LR(0) items, this function computes the read sets.
2444    #
2445    # Inputs:  C        =  Set of LR(0) items
2446    #          ntrans   = Set of nonterminal transitions
2447    #          nullable = Set of empty transitions
2448    #
2449    # Returns a set containing the read sets
2450    # -----------------------------------------------------------------------------
2451
2452    def compute_read_sets(self, C, ntrans, nullable):
2453        FP = lambda x: self.dr_relation(C, x, nullable)
2454        R =  lambda x: self.reads_relation(C, x, nullable)
2455        F = digraph(ntrans, R, FP)
2456        return F
2457
2458    # -----------------------------------------------------------------------------
2459    # compute_follow_sets()
2460    #
2461    # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2462    # and an include set, this function computes the follow sets
2463    #
2464    # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2465    #
2466    # Inputs:
2467    #            ntrans     = Set of nonterminal transitions
2468    #            readsets   = Readset (previously computed)
2469    #            inclsets   = Include sets (previously computed)
2470    #
2471    # Returns a set containing the follow sets
2472    # -----------------------------------------------------------------------------
2473
2474    def compute_follow_sets(self, ntrans, readsets, inclsets):
2475        FP = lambda x: readsets[x]
2476        R  = lambda x: inclsets.get(x, [])
2477        F = digraph(ntrans, R, FP)
2478        return F
2479
2480    # -----------------------------------------------------------------------------
2481    # add_lookaheads()
2482    #
2483    # Attaches the lookahead symbols to grammar rules.
2484    #
2485    # Inputs:    lookbacks         -  Set of lookback relations
2486    #            followset         -  Computed follow set
2487    #
2488    # This function directly attaches the lookaheads to productions contained
2489    # in the lookbacks set
2490    # -----------------------------------------------------------------------------
2491
2492    def add_lookaheads(self, lookbacks, followset):
2493        for trans, lb in lookbacks.items():
2494            # Loop over productions in lookback
2495            for state, p in lb:
2496                if state not in p.lookaheads:
2497                    p.lookaheads[state] = []
2498                f = followset.get(trans, [])
2499                for a in f:
2500                    if a not in p.lookaheads[state]:
2501                        p.lookaheads[state].append(a)
2502
2503    # -----------------------------------------------------------------------------
2504    # add_lalr_lookaheads()
2505    #
2506    # This function does all of the work of adding lookahead information for use
2507    # with LALR parsing
2508    # -----------------------------------------------------------------------------
2509
2510    def add_lalr_lookaheads(self, C):
2511        # Determine all of the nullable nonterminals
2512        nullable = self.compute_nullable_nonterminals()
2513
2514        # Find all non-terminal transitions
2515        trans = self.find_nonterminal_transitions(C)
2516
2517        # Compute read sets
2518        readsets = self.compute_read_sets(C, trans, nullable)
2519
2520        # Compute lookback/includes relations
2521        lookd, included = self.compute_lookback_includes(C, trans, nullable)
2522
2523        # Compute LALR FOLLOW sets
2524        followsets = self.compute_follow_sets(trans, readsets, included)
2525
2526        # Add all of the lookaheads
2527        self.add_lookaheads(lookd, followsets)
2528
2529    # -----------------------------------------------------------------------------
2530    # lr_parse_table()
2531    #
2532    # This function constructs the parse tables for SLR or LALR
2533    # -----------------------------------------------------------------------------
2534    def lr_parse_table(self):
2535        Productions = self.grammar.Productions
2536        Precedence  = self.grammar.Precedence
2537        goto   = self.lr_goto         # Goto array
2538        action = self.lr_action       # Action array
2539        log    = self.log             # Logger for output
2540
2541        actionp = {}                  # Action production array (temporary)
2542
2543        log.info('Parsing method: %s', self.lr_method)
2544
2545        # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2546        # This determines the number of states
2547
2548        C = self.lr0_items()
2549
2550        if self.lr_method == 'LALR':
2551            self.add_lalr_lookaheads(C)
2552
2553        # Build the parser table, state by state
2554        st = 0
2555        for I in C:
2556            # Loop over each production in I
2557            actlist = []              # List of actions
2558            st_action  = {}
2559            st_actionp = {}
2560            st_goto    = {}
2561            log.info('')
2562            log.info('state %d', st)
2563            log.info('')
2564            for p in I:
2565                log.info('    (%d) %s', p.number, p)
2566            log.info('')
2567
2568            for p in I:
2569                    if p.len == p.lr_index + 1:
2570                        if p.name == "S'":
2571                            # Start symbol. Accept!
2572                            st_action['$end'] = 0
2573                            st_actionp['$end'] = p
2574                        else:
2575                            # We are at the end of a production.  Reduce!
2576                            if self.lr_method == 'LALR':
2577                                laheads = p.lookaheads[st]
2578                            else:
2579                                laheads = self.grammar.Follow[p.name]
2580                            for a in laheads:
2581                                actlist.append((a, p, 'reduce using rule %d (%s)' % (p.number, p)))
2582                                r = st_action.get(a)
2583                                if r is not None:
2584                                    # Whoa. Have a shift/reduce or reduce/reduce conflict
2585                                    if r > 0:
2586                                        # Need to decide on shift or reduce here
2587                                        # By default we favor shifting. Need to add
2588                                        # some precedence rules here.
2589
2590                                        # Shift precedence comes from the token
2591                                        sprec, slevel = Precedence.get(a, ('right', 0))
2592
2593                                        # Reduce precedence comes from rule being reduced (p)
2594                                        rprec, rlevel = Productions[p.number].prec
2595
2596                                        if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
2597                                            # We really need to reduce here.
2598                                            st_action[a] = -p.number
2599                                            st_actionp[a] = p
2600                                            if not slevel and not rlevel:
2601                                                log.info('  ! shift/reduce conflict for %s resolved as reduce', a)
2602                                                self.sr_conflicts.append((st, a, 'reduce'))
2603                                            Productions[p.number].reduced += 1
2604                                        elif (slevel == rlevel) and (rprec == 'nonassoc'):
2605                                            st_action[a] = None
2606                                        else:
2607                                            # Hmmm. Guess we'll keep the shift
2608                                            if not rlevel:
2609                                                log.info('  ! shift/reduce conflict for %s resolved as shift', a)
2610                                                self.sr_conflicts.append((st, a, 'shift'))
2611                                    elif r < 0:
2612                                        # Reduce/reduce conflict.   In this case, we favor the rule
2613                                        # that was defined first in the grammar file
2614                                        oldp = Productions[-r]
2615                                        pp = Productions[p.number]
2616                                        if oldp.line > pp.line:
2617                                            st_action[a] = -p.number
2618                                            st_actionp[a] = p
2619                                            chosenp, rejectp = pp, oldp
2620                                            Productions[p.number].reduced += 1
2621                                            Productions[oldp.number].reduced -= 1
2622                                        else:
2623                                            chosenp, rejectp = oldp, pp
2624                                        self.rr_conflicts.append((st, chosenp, rejectp))
2625                                        log.info('  ! reduce/reduce conflict for %s resolved using rule %d (%s)',
2626                                                 a, st_actionp[a].number, st_actionp[a])
2627                                    else:
2628                                        raise LALRError('Unknown conflict in state %d' % st)
2629                                else:
2630                                    st_action[a] = -p.number
2631                                    st_actionp[a] = p
2632                                    Productions[p.number].reduced += 1
2633                    else:
2634                        i = p.lr_index
2635                        a = p.prod[i+1]       # Get symbol right after the "."
2636                        if a in self.grammar.Terminals:
2637                            g = self.lr0_goto(I, a)
2638                            j = self.lr0_cidhash.get(id(g), -1)
2639                            if j >= 0:
2640                                # We are in a shift state
2641                                actlist.append((a, p, 'shift and go to state %d' % j))
2642                                r = st_action.get(a)
2643                                if r is not None:
2644                                    # Whoa have a shift/reduce or shift/shift conflict
2645                                    if r > 0:
2646                                        if r != j:
2647                                            raise LALRError('Shift/shift conflict in state %d' % st)
2648                                    elif r < 0:
2649                                        # Do a precedence check.
2650                                        #   -  if precedence of reduce rule is higher, we reduce.
2651                                        #   -  if precedence of reduce is same and left assoc, we reduce.
2652                                        #   -  otherwise we shift
2653
2654                                        # Shift precedence comes from the token
2655                                        sprec, slevel = Precedence.get(a, ('right', 0))
2656
2657                                        # Reduce precedence comes from the rule that could have been reduced
2658                                        rprec, rlevel = Productions[st_actionp[a].number].prec
2659
2660                                        if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
2661                                            # We decide to shift here... highest precedence to shift
2662                                            Productions[st_actionp[a].number].reduced -= 1
2663                                            st_action[a] = j
2664                                            st_actionp[a] = p
2665                                            if not rlevel:
2666                                                log.info('  ! shift/reduce conflict for %s resolved as shift', a)
2667                                                self.sr_conflicts.append((st, a, 'shift'))
2668                                        elif (slevel == rlevel) and (rprec == 'nonassoc'):
2669                                            st_action[a] = None
2670                                        else:
2671                                            # Hmmm. Guess we'll keep the reduce
2672                                            if not slevel and not rlevel:
2673                                                log.info('  ! shift/reduce conflict for %s resolved as reduce', a)
2674                                                self.sr_conflicts.append((st, a, 'reduce'))
2675
2676                                    else:
2677                                        raise LALRError('Unknown conflict in state %d' % st)
2678                                else:
2679                                    st_action[a] = j
2680                                    st_actionp[a] = p
2681
2682            # Print the actions associated with each terminal
2683            _actprint = {}
2684            for a, p, m in actlist:
2685                if a in st_action:
2686                    if p is st_actionp[a]:
2687                        log.info('    %-15s %s', a, m)
2688                        _actprint[(a, m)] = 1
2689            log.info('')
2690            # Print the actions that were not used. (debugging)
2691            not_used = 0
2692            for a, p, m in actlist:
2693                if a in st_action:
2694                    if p is not st_actionp[a]:
2695                        if not (a, m) in _actprint:
2696                            log.debug('  ! %-15s [ %s ]', a, m)
2697                            not_used = 1
2698                            _actprint[(a, m)] = 1
2699            if not_used:
2700                log.debug('')
2701
2702            # Construct the goto table for this state
2703
2704            nkeys = {}
2705            for ii in I:
2706                for s in ii.usyms:
2707                    if s in self.grammar.Nonterminals:
2708                        nkeys[s] = None
2709            for n in nkeys:
2710                g = self.lr0_goto(I, n)
2711                j = self.lr0_cidhash.get(id(g), -1)
2712                if j >= 0:
2713                    st_goto[n] = j
2714                    log.info('    %-30s shift and go to state %d', n, j)
2715
2716            action[st] = st_action
2717            actionp[st] = st_actionp
2718            goto[st] = st_goto
2719            st += 1
2720
2721    # -----------------------------------------------------------------------------
2722    # write()
2723    #
2724    # This function writes the LR parsing tables to a file
2725    # -----------------------------------------------------------------------------
2726
2727    def write_table(self, tabmodule, outputdir='', signature=''):
2728        if isinstance(tabmodule, types.ModuleType):
2729            raise IOError("Won't overwrite existing tabmodule")
2730
2731        basemodulename = tabmodule.split('.')[-1]
2732        filename = os.path.join(outputdir, basemodulename) + '.py'
2733        try:
2734            f = open(filename, 'w')
2735
2736            f.write('''
2737# %s
2738# This file is automatically generated. Do not edit.
2739# pylint: disable=W,C,R
2740_tabversion = %r
2741
2742_lr_method = %r
2743
2744_lr_signature = %r
2745    ''' % (os.path.basename(filename), __tabversion__, self.lr_method, signature))
2746
2747            # Change smaller to 0 to go back to original tables
2748            smaller = 1
2749
2750            # Factor out names to try and make smaller
2751            if smaller:
2752                items = {}
2753
2754                for s, nd in self.lr_action.items():
2755                    for name, v in nd.items():
2756                        i = items.get(name)
2757                        if not i:
2758                            i = ([], [])
2759                            items[name] = i
2760                        i[0].append(s)
2761                        i[1].append(v)
2762
2763                f.write('\n_lr_action_items = {')
2764                for k, v in items.items():
2765                    f.write('%r:([' % k)
2766                    for i in v[0]:
2767                        f.write('%r,' % i)
2768                    f.write('],[')
2769                    for i in v[1]:
2770                        f.write('%r,' % i)
2771
2772                    f.write(']),')
2773                f.write('}\n')
2774
2775                f.write('''
2776_lr_action = {}
2777for _k, _v in _lr_action_items.items():
2778   for _x,_y in zip(_v[0],_v[1]):
2779      if not _x in _lr_action:  _lr_action[_x] = {}
2780      _lr_action[_x][_k] = _y
2781del _lr_action_items
2782''')
2783
2784            else:
2785                f.write('\n_lr_action = { ')
2786                for k, v in self.lr_action.items():
2787                    f.write('(%r,%r):%r,' % (k[0], k[1], v))
2788                f.write('}\n')
2789
2790            if smaller:
2791                # Factor out names to try and make smaller
2792                items = {}
2793
2794                for s, nd in self.lr_goto.items():
2795                    for name, v in nd.items():
2796                        i = items.get(name)
2797                        if not i:
2798                            i = ([], [])
2799                            items[name] = i
2800                        i[0].append(s)
2801                        i[1].append(v)
2802
2803                f.write('\n_lr_goto_items = {')
2804                for k, v in items.items():
2805                    f.write('%r:([' % k)
2806                    for i in v[0]:
2807                        f.write('%r,' % i)
2808                    f.write('],[')
2809                    for i in v[1]:
2810                        f.write('%r,' % i)
2811
2812                    f.write(']),')
2813                f.write('}\n')
2814
2815                f.write('''
2816_lr_goto = {}
2817for _k, _v in _lr_goto_items.items():
2818   for _x, _y in zip(_v[0], _v[1]):
2819       if not _x in _lr_goto: _lr_goto[_x] = {}
2820       _lr_goto[_x][_k] = _y
2821del _lr_goto_items
2822''')
2823            else:
2824                f.write('\n_lr_goto = { ')
2825                for k, v in self.lr_goto.items():
2826                    f.write('(%r,%r):%r,' % (k[0], k[1], v))
2827                f.write('}\n')
2828
2829            # Write production table
2830            f.write('_lr_productions = [\n')
2831            for p in self.lr_productions:
2832                if p.func:
2833                    f.write('  (%r,%r,%d,%r,%r,%d),\n' % (p.str, p.name, p.len,
2834                                                          p.func, os.path.basename(p.file), p.line))
2835                else:
2836                    f.write('  (%r,%r,%d,None,None,None),\n' % (str(p), p.name, p.len))
2837            f.write(']\n')
2838            f.close()
2839
2840        except IOError as e:
2841            raise
2842
2843
2844    # -----------------------------------------------------------------------------
2845    # pickle_table()
2846    #
2847    # This function pickles the LR parsing tables to a supplied file object
2848    # -----------------------------------------------------------------------------
2849
2850    def pickle_table(self, filename, signature=''):
2851        try:
2852            import cPickle as pickle
2853        except ImportError:
2854            import pickle
2855        with open(filename, 'wb') as outf:
2856            pickle.dump(__tabversion__, outf, pickle_protocol)
2857            pickle.dump(self.lr_method, outf, pickle_protocol)
2858            pickle.dump(signature, outf, pickle_protocol)
2859            pickle.dump(self.lr_action, outf, pickle_protocol)
2860            pickle.dump(self.lr_goto, outf, pickle_protocol)
2861
2862            outp = []
2863            for p in self.lr_productions:
2864                if p.func:
2865                    outp.append((p.str, p.name, p.len, p.func, os.path.basename(p.file), p.line))
2866                else:
2867                    outp.append((str(p), p.name, p.len, None, None, None))
2868            pickle.dump(outp, outf, pickle_protocol)
2869
2870# -----------------------------------------------------------------------------
2871#                            === INTROSPECTION ===
2872#
2873# The following functions and classes are used to implement the PLY
2874# introspection features followed by the yacc() function itself.
2875# -----------------------------------------------------------------------------
2876
2877# -----------------------------------------------------------------------------
2878# get_caller_module_dict()
2879#
2880# This function returns a dictionary containing all of the symbols defined within
2881# a caller further down the call stack.  This is used to get the environment
2882# associated with the yacc() call if none was provided.
2883# -----------------------------------------------------------------------------
2884
2885def get_caller_module_dict(levels):
2886    f = sys._getframe(levels)
2887    ldict = f.f_globals.copy()
2888    if f.f_globals != f.f_locals:
2889        ldict.update(f.f_locals)
2890    return ldict
2891
2892# -----------------------------------------------------------------------------
2893# parse_grammar()
2894#
2895# This takes a raw grammar rule string and parses it into production data
2896# -----------------------------------------------------------------------------
2897def parse_grammar(doc, file, line):
2898    grammar = []
2899    # Split the doc string into lines
2900    pstrings = doc.splitlines()
2901    lastp = None
2902    dline = line
2903    for ps in pstrings:
2904        dline += 1
2905        p = ps.split()
2906        if not p:
2907            continue
2908        try:
2909            if p[0] == '|':
2910                # This is a continuation of a previous rule
2911                if not lastp:
2912                    raise SyntaxError("%s:%d: Misplaced '|'" % (file, dline))
2913                prodname = lastp
2914                syms = p[1:]
2915            else:
2916                prodname = p[0]
2917                lastp = prodname
2918                syms   = p[2:]
2919                assign = p[1]
2920                if assign != ':' and assign != '::=':
2921                    raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file, dline))
2922
2923            grammar.append((file, dline, prodname, syms))
2924        except SyntaxError:
2925            raise
2926        except Exception:
2927            raise SyntaxError('%s:%d: Syntax error in rule %r' % (file, dline, ps.strip()))
2928
2929    return grammar
2930
2931# -----------------------------------------------------------------------------
2932# ParserReflect()
2933#
2934# This class represents information extracted for building a parser including
2935# start symbol, error function, tokens, precedence list, action functions,
2936# etc.
2937# -----------------------------------------------------------------------------
2938class ParserReflect(object):
2939    def __init__(self, pdict, log=None):
2940        self.pdict      = pdict
2941        self.start      = None
2942        self.error_func = None
2943        self.tokens     = None
2944        self.modules    = set()
2945        self.grammar    = []
2946        self.error      = False
2947
2948        if log is None:
2949            self.log = PlyLogger(sys.stderr)
2950        else:
2951            self.log = log
2952
2953    # Get all of the basic information
2954    def get_all(self):
2955        self.get_start()
2956        self.get_error_func()
2957        self.get_tokens()
2958        self.get_precedence()
2959        self.get_pfunctions()
2960
2961    # Validate all of the information
2962    def validate_all(self):
2963        self.validate_start()
2964        self.validate_error_func()
2965        self.validate_tokens()
2966        self.validate_precedence()
2967        self.validate_pfunctions()
2968        self.validate_modules()
2969        return self.error
2970
2971    # Compute a signature over the grammar
2972    def signature(self):
2973        parts = []
2974        try:
2975            if self.start:
2976                parts.append(self.start)
2977            if self.prec:
2978                parts.append(''.join([''.join(p) for p in self.prec]))
2979            if self.tokens:
2980                parts.append(' '.join(self.tokens))
2981            for f in self.pfuncs:
2982                if f[3]:
2983                    parts.append(f[3])
2984        except (TypeError, ValueError):
2985            pass
2986        return ''.join(parts)
2987
2988    # -----------------------------------------------------------------------------
2989    # validate_modules()
2990    #
2991    # This method checks to see if there are duplicated p_rulename() functions
2992    # in the parser module file.  Without this function, it is really easy for
2993    # users to make mistakes by cutting and pasting code fragments (and it's a real
2994    # bugger to try and figure out why the resulting parser doesn't work).  Therefore,
2995    # we just do a little regular expression pattern matching of def statements
2996    # to try and detect duplicates.
2997    # -----------------------------------------------------------------------------
2998
2999    def validate_modules(self):
3000        # Match def p_funcname(
3001        fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
3002
3003        for module in self.modules:
3004            try:
3005                lines, linen = inspect.getsourcelines(module)
3006            except IOError:
3007                continue
3008
3009            counthash = {}
3010            for linen, line in enumerate(lines):
3011                linen += 1
3012                m = fre.match(line)
3013                if m:
3014                    name = m.group(1)
3015                    prev = counthash.get(name)
3016                    if not prev:
3017                        counthash[name] = linen
3018                    else:
3019                        filename = inspect.getsourcefile(module)
3020                        self.log.warning('%s:%d: Function %s redefined. Previously defined on line %d',
3021                                         filename, linen, name, prev)
3022
3023    # Get the start symbol
3024    def get_start(self):
3025        self.start = self.pdict.get('start')
3026
3027    # Validate the start symbol
3028    def validate_start(self):
3029        if self.start is not None:
3030            if not isinstance(self.start, string_types):
3031                self.log.error("'start' must be a string")
3032
3033    # Look for error handler
3034    def get_error_func(self):
3035        self.error_func = self.pdict.get('p_error')
3036
3037    # Validate the error function
3038    def validate_error_func(self):
3039        if self.error_func:
3040            if isinstance(self.error_func, types.FunctionType):
3041                ismethod = 0
3042            elif isinstance(self.error_func, types.MethodType):
3043                ismethod = 1
3044            else:
3045                self.log.error("'p_error' defined, but is not a function or method")
3046                self.error = True
3047                return
3048
3049            eline = self.error_func.__code__.co_firstlineno
3050            efile = self.error_func.__code__.co_filename
3051            module = inspect.getmodule(self.error_func)
3052            self.modules.add(module)
3053
3054            argcount = self.error_func.__code__.co_argcount - ismethod
3055            if argcount != 1:
3056                self.log.error('%s:%d: p_error() requires 1 argument', efile, eline)
3057                self.error = True
3058
3059    # Get the tokens map
3060    def get_tokens(self):
3061        tokens = self.pdict.get('tokens')
3062        if not tokens:
3063            self.log.error('No token list is defined')
3064            self.error = True
3065            return
3066
3067        if not isinstance(tokens, (list, tuple)):
3068            self.log.error('tokens must be a list or tuple')
3069            self.error = True
3070            return
3071
3072        if not tokens:
3073            self.log.error('tokens is empty')
3074            self.error = True
3075            return
3076
3077        self.tokens = sorted(tokens)
3078
3079    # Validate the tokens
3080    def validate_tokens(self):
3081        # Validate the tokens.
3082        if 'error' in self.tokens:
3083            self.log.error("Illegal token name 'error'. Is a reserved word")
3084            self.error = True
3085            return
3086
3087        terminals = set()
3088        for n in self.tokens:
3089            if n in terminals:
3090                self.log.warning('Token %r multiply defined', n)
3091            terminals.add(n)
3092
3093    # Get the precedence map (if any)
3094    def get_precedence(self):
3095        self.prec = self.pdict.get('precedence')
3096
3097    # Validate and parse the precedence map
3098    def validate_precedence(self):
3099        preclist = []
3100        if self.prec:
3101            if not isinstance(self.prec, (list, tuple)):
3102                self.log.error('precedence must be a list or tuple')
3103                self.error = True
3104                return
3105            for level, p in enumerate(self.prec):
3106                if not isinstance(p, (list, tuple)):
3107                    self.log.error('Bad precedence table')
3108                    self.error = True
3109                    return
3110
3111                if len(p) < 2:
3112                    self.log.error('Malformed precedence entry %s. Must be (assoc, term, ..., term)', p)
3113                    self.error = True
3114                    return
3115                assoc = p[0]
3116                if not isinstance(assoc, string_types):
3117                    self.log.error('precedence associativity must be a string')
3118                    self.error = True
3119                    return
3120                for term in p[1:]:
3121                    if not isinstance(term, string_types):
3122                        self.log.error('precedence items must be strings')
3123                        self.error = True
3124                        return
3125                    preclist.append((term, assoc, level+1))
3126        self.preclist = preclist
3127
3128    # Get all p_functions from the grammar
3129    def get_pfunctions(self):
3130        p_functions = []
3131        for name, item in self.pdict.items():
3132            if not name.startswith('p_') or name == 'p_error':
3133                continue
3134            if isinstance(item, (types.FunctionType, types.MethodType)):
3135                line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno)
3136                module = inspect.getmodule(item)
3137                p_functions.append((line, module, name, item.__doc__))
3138
3139        # Sort all of the actions by line number; make sure to stringify
3140        # modules to make them sortable, since `line` may not uniquely sort all
3141        # p functions
3142        p_functions.sort(key=lambda p_function: (
3143            p_function[0],
3144            str(p_function[1]),
3145            p_function[2],
3146            p_function[3]))
3147        self.pfuncs = p_functions
3148
3149    # Validate all of the p_functions
3150    def validate_pfunctions(self):
3151        grammar = []
3152        # Check for non-empty symbols
3153        if len(self.pfuncs) == 0:
3154            self.log.error('no rules of the form p_rulename are defined')
3155            self.error = True
3156            return
3157
3158        for line, module, name, doc in self.pfuncs:
3159            file = inspect.getsourcefile(module)
3160            func = self.pdict[name]
3161            if isinstance(func, types.MethodType):
3162                reqargs = 2
3163            else:
3164                reqargs = 1
3165            if func.__code__.co_argcount > reqargs:
3166                self.log.error('%s:%d: Rule %r has too many arguments', file, line, func.__name__)
3167                self.error = True
3168            elif func.__code__.co_argcount < reqargs:
3169                self.log.error('%s:%d: Rule %r requires an argument', file, line, func.__name__)
3170                self.error = True
3171            elif not func.__doc__:
3172                self.log.warning('%s:%d: No documentation string specified in function %r (ignored)',
3173                                 file, line, func.__name__)
3174            else:
3175                try:
3176                    parsed_g = parse_grammar(doc, file, line)
3177                    for g in parsed_g:
3178                        grammar.append((name, g))
3179                except SyntaxError as e:
3180                    self.log.error(str(e))
3181                    self.error = True
3182
3183                # Looks like a valid grammar rule
3184                # Mark the file in which defined.
3185                self.modules.add(module)
3186
3187        # Secondary validation step that looks for p_ definitions that are not functions
3188        # or functions that look like they might be grammar rules.
3189
3190        for n, v in self.pdict.items():
3191            if n.startswith('p_') and isinstance(v, (types.FunctionType, types.MethodType)):
3192                continue
3193            if n.startswith('t_'):
3194                continue
3195            if n.startswith('p_') and n != 'p_error':
3196                self.log.warning('%r not defined as a function', n)
3197            if ((isinstance(v, types.FunctionType) and v.__code__.co_argcount == 1) or
3198                   (isinstance(v, types.MethodType) and v.__func__.__code__.co_argcount == 2)):
3199                if v.__doc__:
3200                    try:
3201                        doc = v.__doc__.split(' ')
3202                        if doc[1] == ':':
3203                            self.log.warning('%s:%d: Possible grammar rule %r defined without p_ prefix',
3204                                             v.__code__.co_filename, v.__code__.co_firstlineno, n)
3205                    except IndexError:
3206                        pass
3207
3208        self.grammar = grammar
3209
3210# -----------------------------------------------------------------------------
3211# yacc(module)
3212#
3213# Build a parser
3214# -----------------------------------------------------------------------------
3215
3216def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, start=None,
3217         check_recursion=True, optimize=False, write_tables=True, debugfile=debug_file,
3218         outputdir=None, debuglog=None, errorlog=None, picklefile=None):
3219
3220    if tabmodule is None:
3221        tabmodule = tab_module
3222
3223    # Reference to the parsing method of the last built parser
3224    global parse
3225
3226    # If pickling is enabled, table files are not created
3227    if picklefile:
3228        write_tables = 0
3229
3230    if errorlog is None:
3231        errorlog = PlyLogger(sys.stderr)
3232
3233    # Get the module dictionary used for the parser
3234    if module:
3235        _items = [(k, getattr(module, k)) for k in dir(module)]
3236        pdict = dict(_items)
3237        # If no __file__ or __package__ attributes are available, try to obtain them
3238        # from the __module__ instead
3239        if '__file__' not in pdict:
3240            pdict['__file__'] = sys.modules[pdict['__module__']].__file__
3241        if '__package__' not in pdict and '__module__' in pdict:
3242            if hasattr(sys.modules[pdict['__module__']], '__package__'):
3243                pdict['__package__'] = sys.modules[pdict['__module__']].__package__
3244    else:
3245        pdict = get_caller_module_dict(2)
3246
3247    if outputdir is None:
3248        # If no output directory is set, the location of the output files
3249        # is determined according to the following rules:
3250        #     - If tabmodule specifies a package, files go into that package directory
3251        #     - Otherwise, files go in the same directory as the specifying module
3252        if isinstance(tabmodule, types.ModuleType):
3253            srcfile = tabmodule.__file__
3254        else:
3255            if '.' not in tabmodule:
3256                srcfile = pdict['__file__']
3257            else:
3258                parts = tabmodule.split('.')
3259                pkgname = '.'.join(parts[:-1])
3260                exec('import %s' % pkgname)
3261                srcfile = getattr(sys.modules[pkgname], '__file__', '')
3262        outputdir = os.path.dirname(srcfile)
3263
3264    # Determine if the module is package of a package or not.
3265    # If so, fix the tabmodule setting so that tables load correctly
3266    pkg = pdict.get('__package__')
3267    if pkg and isinstance(tabmodule, str):
3268        if '.' not in tabmodule:
3269            tabmodule = pkg + '.' + tabmodule
3270
3271
3272
3273    # Set start symbol if it's specified directly using an argument
3274    if start is not None:
3275        pdict['start'] = start
3276
3277    # Collect parser information from the dictionary
3278    pinfo = ParserReflect(pdict, log=errorlog)
3279    pinfo.get_all()
3280
3281    if pinfo.error:
3282        raise YaccError('Unable to build parser')
3283
3284    # Check signature against table files (if any)
3285    signature = pinfo.signature()
3286
3287    # Read the tables
3288    try:
3289        lr = LRTable()
3290        if picklefile:
3291            read_signature = lr.read_pickle(picklefile)
3292        else:
3293            read_signature = lr.read_table(tabmodule)
3294        if optimize or (read_signature == signature):
3295            try:
3296                lr.bind_callables(pinfo.pdict)
3297                parser = LRParser(lr, pinfo.error_func)
3298                parse = parser.parse
3299                return parser
3300            except Exception as e:
3301                errorlog.warning('There was a problem loading the table file: %r', e)
3302    except VersionError as e:
3303        errorlog.warning(str(e))
3304    except ImportError:
3305        pass
3306
3307    if debuglog is None:
3308        if debug:
3309            try:
3310                debuglog = PlyLogger(open(os.path.join(outputdir, debugfile), 'w'))
3311            except IOError as e:
3312                errorlog.warning("Couldn't open %r. %s" % (debugfile, e))
3313                debuglog = NullLogger()
3314        else:
3315            debuglog = NullLogger()
3316
3317    debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__)
3318
3319    errors = False
3320
3321    # Validate the parser information
3322    if pinfo.validate_all():
3323        raise YaccError('Unable to build parser')
3324
3325    if not pinfo.error_func:
3326        errorlog.warning('no p_error() function is defined')
3327
3328    # Create a grammar object
3329    grammar = Grammar(pinfo.tokens)
3330
3331    # Set precedence level for terminals
3332    for term, assoc, level in pinfo.preclist:
3333        try:
3334            grammar.set_precedence(term, assoc, level)
3335        except GrammarError as e:
3336            errorlog.warning('%s', e)
3337
3338    # Add productions to the grammar
3339    for funcname, gram in pinfo.grammar:
3340        file, line, prodname, syms = gram
3341        try:
3342            grammar.add_production(prodname, syms, funcname, file, line)
3343        except GrammarError as e:
3344            errorlog.error('%s', e)
3345            errors = True
3346
3347    # Set the grammar start symbols
3348    try:
3349        if start is None:
3350            grammar.set_start(pinfo.start)
3351        else:
3352            grammar.set_start(start)
3353    except GrammarError as e:
3354        errorlog.error(str(e))
3355        errors = True
3356
3357    if errors:
3358        raise YaccError('Unable to build parser')
3359
3360    # Verify the grammar structure
3361    undefined_symbols = grammar.undefined_symbols()
3362    for sym, prod in undefined_symbols:
3363        errorlog.error('%s:%d: Symbol %r used, but not defined as a token or a rule', prod.file, prod.line, sym)
3364        errors = True
3365
3366    unused_terminals = grammar.unused_terminals()
3367    if unused_terminals:
3368        debuglog.info('')
3369        debuglog.info('Unused terminals:')
3370        debuglog.info('')
3371        for term in unused_terminals:
3372            errorlog.warning('Token %r defined, but not used', term)
3373            debuglog.info('    %s', term)
3374
3375    # Print out all productions to the debug log
3376    if debug:
3377        debuglog.info('')
3378        debuglog.info('Grammar')
3379        debuglog.info('')
3380        for n, p in enumerate(grammar.Productions):
3381            debuglog.info('Rule %-5d %s', n, p)
3382
3383    # Find unused non-terminals
3384    unused_rules = grammar.unused_rules()
3385    for prod in unused_rules:
3386        errorlog.warning('%s:%d: Rule %r defined, but not used', prod.file, prod.line, prod.name)
3387
3388    if len(unused_terminals) == 1:
3389        errorlog.warning('There is 1 unused token')
3390    if len(unused_terminals) > 1:
3391        errorlog.warning('There are %d unused tokens', len(unused_terminals))
3392
3393    if len(unused_rules) == 1:
3394        errorlog.warning('There is 1 unused rule')
3395    if len(unused_rules) > 1:
3396        errorlog.warning('There are %d unused rules', len(unused_rules))
3397
3398    if debug:
3399        debuglog.info('')
3400        debuglog.info('Terminals, with rules where they appear')
3401        debuglog.info('')
3402        terms = list(grammar.Terminals)
3403        terms.sort()
3404        for term in terms:
3405            debuglog.info('%-20s : %s', term, ' '.join([str(s) for s in grammar.Terminals[term]]))
3406
3407        debuglog.info('')
3408        debuglog.info('Nonterminals, with rules where they appear')
3409        debuglog.info('')
3410        nonterms = list(grammar.Nonterminals)
3411        nonterms.sort()
3412        for nonterm in nonterms:
3413            debuglog.info('%-20s : %s', nonterm, ' '.join([str(s) for s in grammar.Nonterminals[nonterm]]))
3414        debuglog.info('')
3415
3416    if check_recursion:
3417        unreachable = grammar.find_unreachable()
3418        for u in unreachable:
3419            errorlog.warning('Symbol %r is unreachable', u)
3420
3421        infinite = grammar.infinite_cycles()
3422        for inf in infinite:
3423            errorlog.error('Infinite recursion detected for symbol %r', inf)
3424            errors = True
3425
3426    unused_prec = grammar.unused_precedence()
3427    for term, assoc in unused_prec:
3428        errorlog.error('Precedence rule %r defined for unknown symbol %r', assoc, term)
3429        errors = True
3430
3431    if errors:
3432        raise YaccError('Unable to build parser')
3433
3434    # Run the LRGeneratedTable on the grammar
3435    if debug:
3436        errorlog.debug('Generating %s tables', method)
3437
3438    lr = LRGeneratedTable(grammar, method, debuglog)
3439
3440    if debug:
3441        num_sr = len(lr.sr_conflicts)
3442
3443        # Report shift/reduce and reduce/reduce conflicts
3444        if num_sr == 1:
3445            errorlog.warning('1 shift/reduce conflict')
3446        elif num_sr > 1:
3447            errorlog.warning('%d shift/reduce conflicts', num_sr)
3448
3449        num_rr = len(lr.rr_conflicts)
3450        if num_rr == 1:
3451            errorlog.warning('1 reduce/reduce conflict')
3452        elif num_rr > 1:
3453            errorlog.warning('%d reduce/reduce conflicts', num_rr)
3454
3455    # Write out conflicts to the output file
3456    if debug and (lr.sr_conflicts or lr.rr_conflicts):
3457        debuglog.warning('')
3458        debuglog.warning('Conflicts:')
3459        debuglog.warning('')
3460
3461        for state, tok, resolution in lr.sr_conflicts:
3462            debuglog.warning('shift/reduce conflict for %s in state %d resolved as %s',  tok, state, resolution)
3463
3464        already_reported = set()
3465        for state, rule, rejected in lr.rr_conflicts:
3466            if (state, id(rule), id(rejected)) in already_reported:
3467                continue
3468            debuglog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3469            debuglog.warning('rejected rule (%s) in state %d', rejected, state)
3470            errorlog.warning('reduce/reduce conflict in state %d resolved using rule (%s)', state, rule)
3471            errorlog.warning('rejected rule (%s) in state %d', rejected, state)
3472            already_reported.add((state, id(rule), id(rejected)))
3473
3474        warned_never = []
3475        for state, rule, rejected in lr.rr_conflicts:
3476            if not rejected.reduced and (rejected not in warned_never):
3477                debuglog.warning('Rule (%s) is never reduced', rejected)
3478                errorlog.warning('Rule (%s) is never reduced', rejected)
3479                warned_never.append(rejected)
3480
3481    # Write the table file if requested
3482    if write_tables:
3483        try:
3484            lr.write_table(tabmodule, outputdir, signature)
3485            if tabmodule in sys.modules:
3486                del sys.modules[tabmodule]
3487        except IOError as e:
3488            errorlog.warning("Couldn't create %r. %s" % (tabmodule, e))
3489
3490    # Write a pickled version of the tables
3491    if picklefile:
3492        try:
3493            lr.pickle_table(picklefile, signature)
3494        except IOError as e:
3495            errorlog.warning("Couldn't create %r. %s" % (picklefile, e))
3496
3497    # Build the parser
3498    lr.bind_callables(pinfo.pdict)
3499    parser = LRParser(lr, pinfo.error_func)
3500
3501    parse = parser.parse
3502    return parser
3503