1# pythonGrammarParser.py
2#
3# Copyright, 2006, by Paul McGuire
4#
5
6from pyparsing import *
7
8# should probably read this from the Grammar file provided with the Python source, but
9# this just skips that step and inlines the bnf text directly - this grammar was taken from
10# Python 2.4.1
11#
12grammar = r"""
13# Grammar for Python
14
15# Note:  Changing the grammar specified in this file will most likely
16#        require corresponding changes in the parser module
17#        (../Modules/parsermodule.c).  If you can't make the changes to
18#        that module yourself, please co-ordinate the required changes
19#        with someone who can; ask around on python-dev for help.  Fred
20#        Drake <fdrake@acm.org> will probably be listening there.
21
22# Commands for Kees Blom's railroad program
23#diagram:token NAME
24#diagram:token NUMBER
25#diagram:token STRING
26#diagram:token NEWLINE
27#diagram:token ENDMARKER
28#diagram:token INDENT
29#diagram:output\input python.bla
30#diagram:token DEDENT
31#diagram:output\textwidth 20.04cm\oddsidemargin  0.0cm\evensidemargin 0.0cm
32#diagram:rules
33
34# Start symbols for the grammar:
35#	single_input is a single interactive statement;
36#	file_input is a module or sequence of commands read from an input file;
37#	eval_input is the input for the eval() and input() functions.
38# NB: compound_stmt in single_input is followed by extra NEWLINE!
39single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE
40file_input: (NEWLINE | stmt)* ENDMARKER
41eval_input: testlist NEWLINE* ENDMARKER
42
43decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE
44decorators: decorator+
45funcdef: [decorators] 'def' NAME parameters ':' suite
46parameters: '(' [varargslist] ')'
47varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) | fpdef ['=' test] (',' fpdef ['=' test])* [',']
48fpdef: NAME | '(' fplist ')'
49fplist: fpdef (',' fpdef)* [',']
50
51stmt: simple_stmt | compound_stmt
52simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
53small_stmt: expr_stmt | print_stmt  | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | exec_stmt | assert_stmt
54expr_stmt: testlist (augassign testlist | ('=' testlist)*)
55augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=' | '**=' | '//='
56# For normal assignments, additional restrictions enforced by the interpreter
57print_stmt: 'print' ( [ test (',' test)* [','] ] | '>>' test [ (',' test)+ [','] ] )
58del_stmt: 'del' exprlist
59pass_stmt: 'pass'
60flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt
61break_stmt: 'break'
62continue_stmt: 'continue'
63return_stmt: 'return' [testlist]
64yield_stmt: 'yield' testlist
65raise_stmt: 'raise' [test [',' test [',' test]]]
66import_stmt: import_name | import_from
67import_name: 'import' dotted_as_names
68import_from: 'from' dotted_name 'import' ('*' | '(' import_as_names ')' | import_as_names)
69import_as_name: NAME [NAME NAME]
70dotted_as_name: dotted_name [NAME NAME]
71import_as_names: import_as_name (',' import_as_name)* [',']
72dotted_as_names: dotted_as_name (',' dotted_as_name)*
73dotted_name: NAME ('.' NAME)*
74global_stmt: 'global' NAME (',' NAME)*
75exec_stmt: 'exec' expr ['in' test [',' test]]
76assert_stmt: 'assert' test [',' test]
77#35
78compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | funcdef | classdef
79if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
80while_stmt: 'while' test ':' suite ['else' ':' suite]
81for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
82try_stmt: ('try' ':' suite (except_clause ':' suite)+ #diagram:break
83           ['else' ':' suite] | 'try' ':' suite 'finally' ':' suite)
84# NB compile.c makes sure that the default except clause is last
85except_clause: 'except' [test [',' test]]
86suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
87
88test: and_test ('or' and_test)* | lambdef
89and_test: not_test ('and' not_test)*
90not_test: 'not' not_test | comparison
91comparison: expr (comp_op expr)*
92comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
93expr: xor_expr ('|' xor_expr)*
94xor_expr: and_expr ('^' and_expr)*
95and_expr: shift_expr ('&' shift_expr)*
96shift_expr: arith_expr (('<<'|'>>') arith_expr)*
97arith_expr: term (('+'|'-') term)*
98term: factor (('*'|'/'|'%'|'//') factor)*
99factor: ('+'|'-'|'~') factor | power
100power: atom trailer* ['**' factor]
101atom: '(' [testlist_gexp] ')' | '[' [listmaker] ']' | '{' [dictmaker] '}' | '`' testlist1 '`' | NAME | NUMBER | STRING+
102listmaker: test ( list_for | (',' test)* [','] )
103testlist_gexp: test ( gen_for | (',' test)* [','] )
104lambdef: 'lambda' [varargslist] ':' test
105trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
106subscriptlist: subscript (',' subscript)* [',']
107subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
108sliceop: ':' [test]
109exprlist: expr (',' expr)* [',']
110testlist: test (',' test)* [',']
111testlist_safe: test [(',' test)+ [',']]
112dictmaker: test ':' test (',' test ':' test)* [',']
113
114classdef: 'class' NAME ['(' testlist ')'] ':' suite
115
116arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test)
117argument: [test '='] test [gen_for] # Really [keyword '='] test
118
119list_iter: list_for | list_if
120list_for: 'for' exprlist 'in' testlist_safe [list_iter]
121list_if: 'if' test [list_iter]
122
123gen_iter: gen_for | gen_if
124gen_for: 'for' exprlist 'in' test [gen_iter]
125gen_if: 'if' test [gen_iter]
126
127testlist1: test (',' test)*
128
129# not used in grammar, but may appear in "node" passed from Parser to Compiler
130encoding_decl: NAME
131"""
132
133class SemanticGroup(object):
134    def __init__(self,contents):
135        self.contents = contents
136        while self.contents[-1].__class__ == self.__class__:
137            self.contents = self.contents[:-1] + self.contents[-1].contents
138
139    def __str__(self):
140        return "{0}({1})".format(self.label,
141                " ".join([isinstance(c,str) and c or str(c) for c in self.contents]) )
142
143class OrList(SemanticGroup):
144    label = "OR"
145    pass
146
147class AndList(SemanticGroup):
148    label = "AND"
149    pass
150
151class OptionalGroup(SemanticGroup):
152    label = "OPT"
153    pass
154
155class Atom(SemanticGroup):
156    def __init__(self,contents):
157        if len(contents) > 1:
158            self.rep = contents[1]
159        else:
160            self.rep = ""
161        if isinstance(contents,str):
162            self.contents = contents
163        else:
164            self.contents = contents[0]
165
166    def __str__(self):
167        return "{0}{1}".format(self.rep, self.contents)
168
169def makeGroupObject(cls):
170    def groupAction(s,l,t):
171        try:
172            return cls(t[0].asList())
173        except Exception:
174            return cls(t)
175    return groupAction
176
177
178# bnf punctuation
179LPAREN = Suppress("(")
180RPAREN = Suppress(")")
181LBRACK = Suppress("[")
182RBRACK = Suppress("]")
183COLON  = Suppress(":")
184ALT_OP = Suppress("|")
185
186# bnf grammar
187ident = Word(alphanums+"_")
188bnfToken = Word(alphanums+"_") + ~FollowedBy(":")
189repSymbol = oneOf("* +")
190bnfExpr = Forward()
191optionalTerm = Group(LBRACK + bnfExpr + RBRACK).setParseAction(makeGroupObject(OptionalGroup))
192bnfTerm = ( (bnfToken | quotedString | optionalTerm | ( LPAREN + bnfExpr + RPAREN )) + Optional(repSymbol) ).setParseAction(makeGroupObject(Atom))
193andList = Group(bnfTerm + OneOrMore(bnfTerm)).setParseAction(makeGroupObject(AndList))
194bnfFactor = andList | bnfTerm
195orList = Group( bnfFactor + OneOrMore( ALT_OP + bnfFactor ) ).setParseAction(makeGroupObject(OrList))
196bnfExpr <<  ( orList | bnfFactor )
197bnfLine = ident + COLON + bnfExpr
198
199bnfComment = "#" + restOfLine
200
201# build return tokens as a dictionary
202bnf = Dict(OneOrMore(Group(bnfLine)))
203bnf.ignore(bnfComment)
204
205# bnf is defined, parse the grammar text
206bnfDefs = bnf.parseString(grammar)
207
208# correct answer is 78
209expected = 78
210assert len(bnfDefs) == expected, \
211    "Error, found %d BNF defns, expected %d" % (len(bnfDefs), expected)
212
213# list out defns in order they were parsed (to verify accuracy of parsing)
214for k,v in bnfDefs:
215    print(k,"=",v)
216print()
217
218# list out parsed grammar defns (demonstrates dictionary access to parsed tokens)
219for k in list(bnfDefs.keys()):
220    print(k,"=",bnfDefs[k])
221