1# pythonGrammarParser.py 2# 3# Copyright, 2006, by Paul McGuire 4# 5 6from pyparsing import * 7 8# should probably read this from the Grammar file provided with the Python source, but 9# this just skips that step and inlines the bnf text directly - this grammar was taken from 10# Python 2.4.1 11# 12grammar = r""" 13# Grammar for Python 14 15# Note: Changing the grammar specified in this file will most likely 16# require corresponding changes in the parser module 17# (../Modules/parsermodule.c). If you can't make the changes to 18# that module yourself, please co-ordinate the required changes 19# with someone who can; ask around on python-dev for help. Fred 20# Drake <fdrake@acm.org> will probably be listening there. 21 22# Commands for Kees Blom's railroad program 23#diagram:token NAME 24#diagram:token NUMBER 25#diagram:token STRING 26#diagram:token NEWLINE 27#diagram:token ENDMARKER 28#diagram:token INDENT 29#diagram:output\input python.bla 30#diagram:token DEDENT 31#diagram:output\textwidth 20.04cm\oddsidemargin 0.0cm\evensidemargin 0.0cm 32#diagram:rules 33 34# Start symbols for the grammar: 35# single_input is a single interactive statement; 36# file_input is a module or sequence of commands read from an input file; 37# eval_input is the input for the eval() and input() functions. 38# NB: compound_stmt in single_input is followed by extra NEWLINE! 39single_input: NEWLINE | simple_stmt | compound_stmt NEWLINE 40file_input: (NEWLINE | stmt)* ENDMARKER 41eval_input: testlist NEWLINE* ENDMARKER 42 43decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE 44decorators: decorator+ 45funcdef: [decorators] 'def' NAME parameters ':' suite 46parameters: '(' [varargslist] ')' 47varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME) | fpdef ['=' test] (',' fpdef ['=' test])* [','] 48fpdef: NAME | '(' fplist ')' 49fplist: fpdef (',' fpdef)* [','] 50 51stmt: simple_stmt | compound_stmt 52simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE 53small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt | flow_stmt | import_stmt | global_stmt | exec_stmt | assert_stmt 54expr_stmt: testlist (augassign testlist | ('=' testlist)*) 55augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^=' | '<<=' | '>>=' | '**=' | '//=' 56# For normal assignments, additional restrictions enforced by the interpreter 57print_stmt: 'print' ( [ test (',' test)* [','] ] | '>>' test [ (',' test)+ [','] ] ) 58del_stmt: 'del' exprlist 59pass_stmt: 'pass' 60flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt | yield_stmt 61break_stmt: 'break' 62continue_stmt: 'continue' 63return_stmt: 'return' [testlist] 64yield_stmt: 'yield' testlist 65raise_stmt: 'raise' [test [',' test [',' test]]] 66import_stmt: import_name | import_from 67import_name: 'import' dotted_as_names 68import_from: 'from' dotted_name 'import' ('*' | '(' import_as_names ')' | import_as_names) 69import_as_name: NAME [NAME NAME] 70dotted_as_name: dotted_name [NAME NAME] 71import_as_names: import_as_name (',' import_as_name)* [','] 72dotted_as_names: dotted_as_name (',' dotted_as_name)* 73dotted_name: NAME ('.' NAME)* 74global_stmt: 'global' NAME (',' NAME)* 75exec_stmt: 'exec' expr ['in' test [',' test]] 76assert_stmt: 'assert' test [',' test] 77#35 78compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | funcdef | classdef 79if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite] 80while_stmt: 'while' test ':' suite ['else' ':' suite] 81for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] 82try_stmt: ('try' ':' suite (except_clause ':' suite)+ #diagram:break 83 ['else' ':' suite] | 'try' ':' suite 'finally' ':' suite) 84# NB compile.c makes sure that the default except clause is last 85except_clause: 'except' [test [',' test]] 86suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT 87 88test: and_test ('or' and_test)* | lambdef 89and_test: not_test ('and' not_test)* 90not_test: 'not' not_test | comparison 91comparison: expr (comp_op expr)* 92comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not' 93expr: xor_expr ('|' xor_expr)* 94xor_expr: and_expr ('^' and_expr)* 95and_expr: shift_expr ('&' shift_expr)* 96shift_expr: arith_expr (('<<'|'>>') arith_expr)* 97arith_expr: term (('+'|'-') term)* 98term: factor (('*'|'/'|'%'|'//') factor)* 99factor: ('+'|'-'|'~') factor | power 100power: atom trailer* ['**' factor] 101atom: '(' [testlist_gexp] ')' | '[' [listmaker] ']' | '{' [dictmaker] '}' | '`' testlist1 '`' | NAME | NUMBER | STRING+ 102listmaker: test ( list_for | (',' test)* [','] ) 103testlist_gexp: test ( gen_for | (',' test)* [','] ) 104lambdef: 'lambda' [varargslist] ':' test 105trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME 106subscriptlist: subscript (',' subscript)* [','] 107subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop] 108sliceop: ':' [test] 109exprlist: expr (',' expr)* [','] 110testlist: test (',' test)* [','] 111testlist_safe: test [(',' test)+ [',']] 112dictmaker: test ':' test (',' test ':' test)* [','] 113 114classdef: 'class' NAME ['(' testlist ')'] ':' suite 115 116arglist: (argument ',')* (argument [',']| '*' test [',' '**' test] | '**' test) 117argument: [test '='] test [gen_for] # Really [keyword '='] test 118 119list_iter: list_for | list_if 120list_for: 'for' exprlist 'in' testlist_safe [list_iter] 121list_if: 'if' test [list_iter] 122 123gen_iter: gen_for | gen_if 124gen_for: 'for' exprlist 'in' test [gen_iter] 125gen_if: 'if' test [gen_iter] 126 127testlist1: test (',' test)* 128 129# not used in grammar, but may appear in "node" passed from Parser to Compiler 130encoding_decl: NAME 131""" 132 133class SemanticGroup(object): 134 def __init__(self,contents): 135 self.contents = contents 136 while self.contents[-1].__class__ == self.__class__: 137 self.contents = self.contents[:-1] + self.contents[-1].contents 138 139 def __str__(self): 140 return "{0}({1})".format(self.label, 141 " ".join([isinstance(c,str) and c or str(c) for c in self.contents]) ) 142 143class OrList(SemanticGroup): 144 label = "OR" 145 pass 146 147class AndList(SemanticGroup): 148 label = "AND" 149 pass 150 151class OptionalGroup(SemanticGroup): 152 label = "OPT" 153 pass 154 155class Atom(SemanticGroup): 156 def __init__(self,contents): 157 if len(contents) > 1: 158 self.rep = contents[1] 159 else: 160 self.rep = "" 161 if isinstance(contents,str): 162 self.contents = contents 163 else: 164 self.contents = contents[0] 165 166 def __str__(self): 167 return "{0}{1}".format(self.rep, self.contents) 168 169def makeGroupObject(cls): 170 def groupAction(s,l,t): 171 try: 172 return cls(t[0].asList()) 173 except Exception: 174 return cls(t) 175 return groupAction 176 177 178# bnf punctuation 179LPAREN = Suppress("(") 180RPAREN = Suppress(")") 181LBRACK = Suppress("[") 182RBRACK = Suppress("]") 183COLON = Suppress(":") 184ALT_OP = Suppress("|") 185 186# bnf grammar 187ident = Word(alphanums+"_") 188bnfToken = Word(alphanums+"_") + ~FollowedBy(":") 189repSymbol = oneOf("* +") 190bnfExpr = Forward() 191optionalTerm = Group(LBRACK + bnfExpr + RBRACK).setParseAction(makeGroupObject(OptionalGroup)) 192bnfTerm = ( (bnfToken | quotedString | optionalTerm | ( LPAREN + bnfExpr + RPAREN )) + Optional(repSymbol) ).setParseAction(makeGroupObject(Atom)) 193andList = Group(bnfTerm + OneOrMore(bnfTerm)).setParseAction(makeGroupObject(AndList)) 194bnfFactor = andList | bnfTerm 195orList = Group( bnfFactor + OneOrMore( ALT_OP + bnfFactor ) ).setParseAction(makeGroupObject(OrList)) 196bnfExpr << ( orList | bnfFactor ) 197bnfLine = ident + COLON + bnfExpr 198 199bnfComment = "#" + restOfLine 200 201# build return tokens as a dictionary 202bnf = Dict(OneOrMore(Group(bnfLine))) 203bnf.ignore(bnfComment) 204 205# bnf is defined, parse the grammar text 206bnfDefs = bnf.parseString(grammar) 207 208# correct answer is 78 209expected = 78 210assert len(bnfDefs) == expected, \ 211 "Error, found %d BNF defns, expected %d" % (len(bnfDefs), expected) 212 213# list out defns in order they were parsed (to verify accuracy of parsing) 214for k,v in bnfDefs: 215 print(k,"=",v) 216print() 217 218# list out parsed grammar defns (demonstrates dictionary access to parsed tokens) 219for k in list(bnfDefs.keys()): 220 print(k,"=",bnfDefs[k]) 221