1#------------------------------------------------------------------------------ 2# pycparser: c_parser.py 3# 4# CParser class: Parser and AST builder for the C language 5# 6# Eli Bendersky [https://eli.thegreenplace.net/] 7# License: BSD 8#------------------------------------------------------------------------------ 9from .ply import yacc 10 11from . import c_ast 12from .c_lexer import CLexer 13from .plyparser import PLYParser, ParseError, parameterized, template 14from .ast_transforms import fix_switch_cases, fix_atomic_specifiers 15 16 17@template 18class CParser(PLYParser): 19 def __init__( 20 self, 21 lex_optimize=True, 22 lexer=CLexer, 23 lextab='pycparser.lextab', 24 yacc_optimize=True, 25 yacctab='pycparser.yacctab', 26 yacc_debug=False, 27 taboutputdir=''): 28 """ Create a new CParser. 29 30 Some arguments for controlling the debug/optimization 31 level of the parser are provided. The defaults are 32 tuned for release/performance mode. 33 The simple rules for using them are: 34 *) When tweaking CParser/CLexer, set these to False 35 *) When releasing a stable parser, set to True 36 37 lex_optimize: 38 Set to False when you're modifying the lexer. 39 Otherwise, changes in the lexer won't be used, if 40 some lextab.py file exists. 41 When releasing with a stable lexer, set to True 42 to save the re-generation of the lexer table on 43 each run. 44 45 lexer: 46 Set this parameter to define the lexer to use if 47 you're not using the default CLexer. 48 49 lextab: 50 Points to the lex table that's used for optimized 51 mode. Only if you're modifying the lexer and want 52 some tests to avoid re-generating the table, make 53 this point to a local lex table file (that's been 54 earlier generated with lex_optimize=True) 55 56 yacc_optimize: 57 Set to False when you're modifying the parser. 58 Otherwise, changes in the parser won't be used, if 59 some parsetab.py file exists. 60 When releasing with a stable parser, set to True 61 to save the re-generation of the parser table on 62 each run. 63 64 yacctab: 65 Points to the yacc table that's used for optimized 66 mode. Only if you're modifying the parser, make 67 this point to a local yacc table file 68 69 yacc_debug: 70 Generate a parser.out file that explains how yacc 71 built the parsing table from the grammar. 72 73 taboutputdir: 74 Set this parameter to control the location of generated 75 lextab and yacctab files. 76 """ 77 self.clex = lexer( 78 error_func=self._lex_error_func, 79 on_lbrace_func=self._lex_on_lbrace_func, 80 on_rbrace_func=self._lex_on_rbrace_func, 81 type_lookup_func=self._lex_type_lookup_func) 82 83 self.clex.build( 84 optimize=lex_optimize, 85 lextab=lextab, 86 outputdir=taboutputdir) 87 self.tokens = self.clex.tokens 88 89 rules_with_opt = [ 90 'abstract_declarator', 91 'assignment_expression', 92 'declaration_list', 93 'declaration_specifiers_no_type', 94 'designation', 95 'expression', 96 'identifier_list', 97 'init_declarator_list', 98 'id_init_declarator_list', 99 'initializer_list', 100 'parameter_type_list', 101 'block_item_list', 102 'type_qualifier_list', 103 'struct_declarator_list' 104 ] 105 106 for rule in rules_with_opt: 107 self._create_opt_rule(rule) 108 109 self.cparser = yacc.yacc( 110 module=self, 111 start='translation_unit_or_empty', 112 debug=yacc_debug, 113 optimize=yacc_optimize, 114 tabmodule=yacctab, 115 outputdir=taboutputdir) 116 117 # Stack of scopes for keeping track of symbols. _scope_stack[-1] is 118 # the current (topmost) scope. Each scope is a dictionary that 119 # specifies whether a name is a type. If _scope_stack[n][name] is 120 # True, 'name' is currently a type in the scope. If it's False, 121 # 'name' is used in the scope but not as a type (for instance, if we 122 # saw: int name; 123 # If 'name' is not a key in _scope_stack[n] then 'name' was not defined 124 # in this scope at all. 125 self._scope_stack = [dict()] 126 127 # Keeps track of the last token given to yacc (the lookahead token) 128 self._last_yielded_token = None 129 130 def parse(self, text, filename='', debug=False): 131 """ Parses C code and returns an AST. 132 133 text: 134 A string containing the C source code 135 136 filename: 137 Name of the file being parsed (for meaningful 138 error messages) 139 140 debug: 141 Debug flag to YACC 142 """ 143 self.clex.filename = filename 144 self.clex.reset_lineno() 145 self._scope_stack = [dict()] 146 self._last_yielded_token = None 147 return self.cparser.parse( 148 input=text, 149 lexer=self.clex, 150 debug=debug) 151 152 ######################-- PRIVATE --###################### 153 154 def _push_scope(self): 155 self._scope_stack.append(dict()) 156 157 def _pop_scope(self): 158 assert len(self._scope_stack) > 1 159 self._scope_stack.pop() 160 161 def _add_typedef_name(self, name, coord): 162 """ Add a new typedef name (ie a TYPEID) to the current scope 163 """ 164 if not self._scope_stack[-1].get(name, True): 165 self._parse_error( 166 "Typedef %r previously declared as non-typedef " 167 "in this scope" % name, coord) 168 self._scope_stack[-1][name] = True 169 170 def _add_identifier(self, name, coord): 171 """ Add a new object, function, or enum member name (ie an ID) to the 172 current scope 173 """ 174 if self._scope_stack[-1].get(name, False): 175 self._parse_error( 176 "Non-typedef %r previously declared as typedef " 177 "in this scope" % name, coord) 178 self._scope_stack[-1][name] = False 179 180 def _is_type_in_scope(self, name): 181 """ Is *name* a typedef-name in the current scope? 182 """ 183 for scope in reversed(self._scope_stack): 184 # If name is an identifier in this scope it shadows typedefs in 185 # higher scopes. 186 in_scope = scope.get(name) 187 if in_scope is not None: return in_scope 188 return False 189 190 def _lex_error_func(self, msg, line, column): 191 self._parse_error(msg, self._coord(line, column)) 192 193 def _lex_on_lbrace_func(self): 194 self._push_scope() 195 196 def _lex_on_rbrace_func(self): 197 self._pop_scope() 198 199 def _lex_type_lookup_func(self, name): 200 """ Looks up types that were previously defined with 201 typedef. 202 Passed to the lexer for recognizing identifiers that 203 are types. 204 """ 205 is_type = self._is_type_in_scope(name) 206 return is_type 207 208 def _get_yacc_lookahead_token(self): 209 """ We need access to yacc's lookahead token in certain cases. 210 This is the last token yacc requested from the lexer, so we 211 ask the lexer. 212 """ 213 return self.clex.last_token 214 215 # To understand what's going on here, read sections A.8.5 and 216 # A.8.6 of K&R2 very carefully. 217 # 218 # A C type consists of a basic type declaration, with a list 219 # of modifiers. For example: 220 # 221 # int *c[5]; 222 # 223 # The basic declaration here is 'int c', and the pointer and 224 # the array are the modifiers. 225 # 226 # Basic declarations are represented by TypeDecl (from module c_ast) and the 227 # modifiers are FuncDecl, PtrDecl and ArrayDecl. 228 # 229 # The standard states that whenever a new modifier is parsed, it should be 230 # added to the end of the list of modifiers. For example: 231 # 232 # K&R2 A.8.6.2: Array Declarators 233 # 234 # In a declaration T D where D has the form 235 # D1 [constant-expression-opt] 236 # and the type of the identifier in the declaration T D1 is 237 # "type-modifier T", the type of the 238 # identifier of D is "type-modifier array of T" 239 # 240 # This is what this method does. The declarator it receives 241 # can be a list of declarators ending with TypeDecl. It 242 # tacks the modifier to the end of this list, just before 243 # the TypeDecl. 244 # 245 # Additionally, the modifier may be a list itself. This is 246 # useful for pointers, that can come as a chain from the rule 247 # p_pointer. In this case, the whole modifier list is spliced 248 # into the new location. 249 def _type_modify_decl(self, decl, modifier): 250 """ Tacks a type modifier on a declarator, and returns 251 the modified declarator. 252 253 Note: the declarator and modifier may be modified 254 """ 255 #~ print '****' 256 #~ decl.show(offset=3) 257 #~ modifier.show(offset=3) 258 #~ print '****' 259 260 modifier_head = modifier 261 modifier_tail = modifier 262 263 # The modifier may be a nested list. Reach its tail. 264 while modifier_tail.type: 265 modifier_tail = modifier_tail.type 266 267 # If the decl is a basic type, just tack the modifier onto it. 268 if isinstance(decl, c_ast.TypeDecl): 269 modifier_tail.type = decl 270 return modifier 271 else: 272 # Otherwise, the decl is a list of modifiers. Reach 273 # its tail and splice the modifier onto the tail, 274 # pointing to the underlying basic type. 275 decl_tail = decl 276 277 while not isinstance(decl_tail.type, c_ast.TypeDecl): 278 decl_tail = decl_tail.type 279 280 modifier_tail.type = decl_tail.type 281 decl_tail.type = modifier_head 282 return decl 283 284 # Due to the order in which declarators are constructed, 285 # they have to be fixed in order to look like a normal AST. 286 # 287 # When a declaration arrives from syntax construction, it has 288 # these problems: 289 # * The innermost TypeDecl has no type (because the basic 290 # type is only known at the uppermost declaration level) 291 # * The declaration has no variable name, since that is saved 292 # in the innermost TypeDecl 293 # * The typename of the declaration is a list of type 294 # specifiers, and not a node. Here, basic identifier types 295 # should be separated from more complex types like enums 296 # and structs. 297 # 298 # This method fixes these problems. 299 def _fix_decl_name_type(self, decl, typename): 300 """ Fixes a declaration. Modifies decl. 301 """ 302 # Reach the underlying basic type 303 # 304 type = decl 305 while not isinstance(type, c_ast.TypeDecl): 306 type = type.type 307 308 decl.name = type.declname 309 type.quals = decl.quals[:] 310 311 # The typename is a list of types. If any type in this 312 # list isn't an IdentifierType, it must be the only 313 # type in the list (it's illegal to declare "int enum ..") 314 # If all the types are basic, they're collected in the 315 # IdentifierType holder. 316 for tn in typename: 317 if not isinstance(tn, c_ast.IdentifierType): 318 if len(typename) > 1: 319 self._parse_error( 320 "Invalid multiple types specified", tn.coord) 321 else: 322 type.type = tn 323 return decl 324 325 if not typename: 326 # Functions default to returning int 327 # 328 if not isinstance(decl.type, c_ast.FuncDecl): 329 self._parse_error( 330 "Missing type in declaration", decl.coord) 331 type.type = c_ast.IdentifierType( 332 ['int'], 333 coord=decl.coord) 334 else: 335 # At this point, we know that typename is a list of IdentifierType 336 # nodes. Concatenate all the names into a single list. 337 # 338 type.type = c_ast.IdentifierType( 339 [name for id in typename for name in id.names], 340 coord=typename[0].coord) 341 return decl 342 343 def _add_declaration_specifier(self, declspec, newspec, kind, append=False): 344 """ Declaration specifiers are represented by a dictionary 345 with the entries: 346 * qual: a list of type qualifiers 347 * storage: a list of storage type qualifiers 348 * type: a list of type specifiers 349 * function: a list of function specifiers 350 * alignment: a list of alignment specifiers 351 352 This method is given a declaration specifier, and a 353 new specifier of a given kind. 354 If `append` is True, the new specifier is added to the end of 355 the specifiers list, otherwise it's added at the beginning. 356 Returns the declaration specifier, with the new 357 specifier incorporated. 358 """ 359 spec = declspec or dict(qual=[], storage=[], type=[], function=[], alignment=[]) 360 361 if append: 362 spec[kind].append(newspec) 363 else: 364 spec[kind].insert(0, newspec) 365 366 return spec 367 368 def _build_declarations(self, spec, decls, typedef_namespace=False): 369 """ Builds a list of declarations all sharing the given specifiers. 370 If typedef_namespace is true, each declared name is added 371 to the "typedef namespace", which also includes objects, 372 functions, and enum constants. 373 """ 374 is_typedef = 'typedef' in spec['storage'] 375 declarations = [] 376 377 # Bit-fields are allowed to be unnamed. 378 if decls[0].get('bitsize') is not None: 379 pass 380 381 # When redeclaring typedef names as identifiers in inner scopes, a 382 # problem can occur where the identifier gets grouped into 383 # spec['type'], leaving decl as None. This can only occur for the 384 # first declarator. 385 elif decls[0]['decl'] is None: 386 if len(spec['type']) < 2 or len(spec['type'][-1].names) != 1 or \ 387 not self._is_type_in_scope(spec['type'][-1].names[0]): 388 coord = '?' 389 for t in spec['type']: 390 if hasattr(t, 'coord'): 391 coord = t.coord 392 break 393 self._parse_error('Invalid declaration', coord) 394 395 # Make this look as if it came from "direct_declarator:ID" 396 decls[0]['decl'] = c_ast.TypeDecl( 397 declname=spec['type'][-1].names[0], 398 type=None, 399 quals=None, 400 align=spec['alignment'], 401 coord=spec['type'][-1].coord) 402 # Remove the "new" type's name from the end of spec['type'] 403 del spec['type'][-1] 404 405 # A similar problem can occur where the declaration ends up looking 406 # like an abstract declarator. Give it a name if this is the case. 407 elif not isinstance(decls[0]['decl'], ( 408 c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType)): 409 decls_0_tail = decls[0]['decl'] 410 while not isinstance(decls_0_tail, c_ast.TypeDecl): 411 decls_0_tail = decls_0_tail.type 412 if decls_0_tail.declname is None: 413 decls_0_tail.declname = spec['type'][-1].names[0] 414 del spec['type'][-1] 415 416 for decl in decls: 417 assert decl['decl'] is not None 418 if is_typedef: 419 declaration = c_ast.Typedef( 420 name=None, 421 quals=spec['qual'], 422 storage=spec['storage'], 423 type=decl['decl'], 424 coord=decl['decl'].coord) 425 else: 426 declaration = c_ast.Decl( 427 name=None, 428 quals=spec['qual'], 429 align=spec['alignment'], 430 storage=spec['storage'], 431 funcspec=spec['function'], 432 type=decl['decl'], 433 init=decl.get('init'), 434 bitsize=decl.get('bitsize'), 435 coord=decl['decl'].coord) 436 437 if isinstance(declaration.type, ( 438 c_ast.Enum, c_ast.Struct, c_ast.Union, 439 c_ast.IdentifierType)): 440 fixed_decl = declaration 441 else: 442 fixed_decl = self._fix_decl_name_type(declaration, spec['type']) 443 444 # Add the type name defined by typedef to a 445 # symbol table (for usage in the lexer) 446 if typedef_namespace: 447 if is_typedef: 448 self._add_typedef_name(fixed_decl.name, fixed_decl.coord) 449 else: 450 self._add_identifier(fixed_decl.name, fixed_decl.coord) 451 452 fixed_decl = fix_atomic_specifiers(fixed_decl) 453 declarations.append(fixed_decl) 454 455 return declarations 456 457 def _build_function_definition(self, spec, decl, param_decls, body): 458 """ Builds a function definition. 459 """ 460 if 'typedef' in spec['storage']: 461 self._parse_error("Invalid typedef", decl.coord) 462 463 declaration = self._build_declarations( 464 spec=spec, 465 decls=[dict(decl=decl, init=None)], 466 typedef_namespace=True)[0] 467 468 return c_ast.FuncDef( 469 decl=declaration, 470 param_decls=param_decls, 471 body=body, 472 coord=decl.coord) 473 474 def _select_struct_union_class(self, token): 475 """ Given a token (either STRUCT or UNION), selects the 476 appropriate AST class. 477 """ 478 if token == 'struct': 479 return c_ast.Struct 480 else: 481 return c_ast.Union 482 483 ## 484 ## Precedence and associativity of operators 485 ## 486 # If this changes, c_generator.CGenerator.precedence_map needs to change as 487 # well 488 precedence = ( 489 ('left', 'LOR'), 490 ('left', 'LAND'), 491 ('left', 'OR'), 492 ('left', 'XOR'), 493 ('left', 'AND'), 494 ('left', 'EQ', 'NE'), 495 ('left', 'GT', 'GE', 'LT', 'LE'), 496 ('left', 'RSHIFT', 'LSHIFT'), 497 ('left', 'PLUS', 'MINUS'), 498 ('left', 'TIMES', 'DIVIDE', 'MOD') 499 ) 500 501 ## 502 ## Grammar productions 503 ## Implementation of the BNF defined in K&R2 A.13 504 ## 505 506 # Wrapper around a translation unit, to allow for empty input. 507 # Not strictly part of the C99 Grammar, but useful in practice. 508 def p_translation_unit_or_empty(self, p): 509 """ translation_unit_or_empty : translation_unit 510 | empty 511 """ 512 if p[1] is None: 513 p[0] = c_ast.FileAST([]) 514 else: 515 p[0] = c_ast.FileAST(p[1]) 516 517 def p_translation_unit_1(self, p): 518 """ translation_unit : external_declaration 519 """ 520 # Note: external_declaration is already a list 521 p[0] = p[1] 522 523 def p_translation_unit_2(self, p): 524 """ translation_unit : translation_unit external_declaration 525 """ 526 p[1].extend(p[2]) 527 p[0] = p[1] 528 529 # Declarations always come as lists (because they can be 530 # several in one line), so we wrap the function definition 531 # into a list as well, to make the return value of 532 # external_declaration homogeneous. 533 def p_external_declaration_1(self, p): 534 """ external_declaration : function_definition 535 """ 536 p[0] = [p[1]] 537 538 def p_external_declaration_2(self, p): 539 """ external_declaration : declaration 540 """ 541 p[0] = p[1] 542 543 def p_external_declaration_3(self, p): 544 """ external_declaration : pp_directive 545 | pppragma_directive 546 """ 547 p[0] = [p[1]] 548 549 def p_external_declaration_4(self, p): 550 """ external_declaration : SEMI 551 """ 552 p[0] = [] 553 554 def p_external_declaration_5(self, p): 555 """ external_declaration : static_assert 556 """ 557 p[0] = p[1] 558 559 def p_static_assert_declaration(self, p): 560 """ static_assert : _STATIC_ASSERT LPAREN constant_expression COMMA unified_string_literal RPAREN 561 | _STATIC_ASSERT LPAREN constant_expression RPAREN 562 """ 563 if len(p) == 5: 564 p[0] = [c_ast.StaticAssert(p[3], None, self._token_coord(p, 1))] 565 else: 566 p[0] = [c_ast.StaticAssert(p[3], p[5], self._token_coord(p, 1))] 567 568 def p_pp_directive(self, p): 569 """ pp_directive : PPHASH 570 """ 571 self._parse_error('Directives not supported yet', 572 self._token_coord(p, 1)) 573 574 def p_pppragma_directive(self, p): 575 """ pppragma_directive : PPPRAGMA 576 | PPPRAGMA PPPRAGMASTR 577 """ 578 if len(p) == 3: 579 p[0] = c_ast.Pragma(p[2], self._token_coord(p, 2)) 580 else: 581 p[0] = c_ast.Pragma("", self._token_coord(p, 1)) 582 583 # In function definitions, the declarator can be followed by 584 # a declaration list, for old "K&R style" function definitios. 585 def p_function_definition_1(self, p): 586 """ function_definition : id_declarator declaration_list_opt compound_statement 587 """ 588 # no declaration specifiers - 'int' becomes the default type 589 spec = dict( 590 qual=[], 591 alignment=[], 592 storage=[], 593 type=[c_ast.IdentifierType(['int'], 594 coord=self._token_coord(p, 1))], 595 function=[]) 596 597 p[0] = self._build_function_definition( 598 spec=spec, 599 decl=p[1], 600 param_decls=p[2], 601 body=p[3]) 602 603 def p_function_definition_2(self, p): 604 """ function_definition : declaration_specifiers id_declarator declaration_list_opt compound_statement 605 """ 606 spec = p[1] 607 608 p[0] = self._build_function_definition( 609 spec=spec, 610 decl=p[2], 611 param_decls=p[3], 612 body=p[4]) 613 614 # Note, according to C18 A.2.2 6.7.10 static_assert-declaration _Static_assert 615 # is a declaration, not a statement. We additionally recognise it as a statement 616 # to fix parsing of _Static_assert inside the functions. 617 # 618 def p_statement(self, p): 619 """ statement : labeled_statement 620 | expression_statement 621 | compound_statement 622 | selection_statement 623 | iteration_statement 624 | jump_statement 625 | pppragma_directive 626 | static_assert 627 """ 628 p[0] = p[1] 629 630 # A pragma is generally considered a decorator rather than an actual 631 # statement. Still, for the purposes of analyzing an abstract syntax tree of 632 # C code, pragma's should not be ignored and were previously treated as a 633 # statement. This presents a problem for constructs that take a statement 634 # such as labeled_statements, selection_statements, and 635 # iteration_statements, causing a misleading structure in the AST. For 636 # example, consider the following C code. 637 # 638 # for (int i = 0; i < 3; i++) 639 # #pragma omp critical 640 # sum += 1; 641 # 642 # This code will compile and execute "sum += 1;" as the body of the for 643 # loop. Previous implementations of PyCParser would render the AST for this 644 # block of code as follows: 645 # 646 # For: 647 # DeclList: 648 # Decl: i, [], [], [] 649 # TypeDecl: i, [] 650 # IdentifierType: ['int'] 651 # Constant: int, 0 652 # BinaryOp: < 653 # ID: i 654 # Constant: int, 3 655 # UnaryOp: p++ 656 # ID: i 657 # Pragma: omp critical 658 # Assignment: += 659 # ID: sum 660 # Constant: int, 1 661 # 662 # This AST misleadingly takes the Pragma as the body of the loop and the 663 # assignment then becomes a sibling of the loop. 664 # 665 # To solve edge cases like these, the pragmacomp_or_statement rule groups 666 # a pragma and its following statement (which would otherwise be orphaned) 667 # using a compound block, effectively turning the above code into: 668 # 669 # for (int i = 0; i < 3; i++) { 670 # #pragma omp critical 671 # sum += 1; 672 # } 673 def p_pragmacomp_or_statement(self, p): 674 """ pragmacomp_or_statement : pppragma_directive statement 675 | statement 676 """ 677 if isinstance(p[1], c_ast.Pragma) and len(p) == 3: 678 p[0] = c_ast.Compound( 679 block_items=[p[1], p[2]], 680 coord=self._token_coord(p, 1)) 681 else: 682 p[0] = p[1] 683 684 # In C, declarations can come several in a line: 685 # int x, *px, romulo = 5; 686 # 687 # However, for the AST, we will split them to separate Decl 688 # nodes. 689 # 690 # This rule splits its declarations and always returns a list 691 # of Decl nodes, even if it's one element long. 692 # 693 def p_decl_body(self, p): 694 """ decl_body : declaration_specifiers init_declarator_list_opt 695 | declaration_specifiers_no_type id_init_declarator_list_opt 696 """ 697 spec = p[1] 698 699 # p[2] (init_declarator_list_opt) is either a list or None 700 # 701 if p[2] is None: 702 # By the standard, you must have at least one declarator unless 703 # declaring a structure tag, a union tag, or the members of an 704 # enumeration. 705 # 706 ty = spec['type'] 707 s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum) 708 if len(ty) == 1 and isinstance(ty[0], s_u_or_e): 709 decls = [c_ast.Decl( 710 name=None, 711 quals=spec['qual'], 712 align=spec['alignment'], 713 storage=spec['storage'], 714 funcspec=spec['function'], 715 type=ty[0], 716 init=None, 717 bitsize=None, 718 coord=ty[0].coord)] 719 720 # However, this case can also occur on redeclared identifiers in 721 # an inner scope. The trouble is that the redeclared type's name 722 # gets grouped into declaration_specifiers; _build_declarations 723 # compensates for this. 724 # 725 else: 726 decls = self._build_declarations( 727 spec=spec, 728 decls=[dict(decl=None, init=None)], 729 typedef_namespace=True) 730 731 else: 732 decls = self._build_declarations( 733 spec=spec, 734 decls=p[2], 735 typedef_namespace=True) 736 737 p[0] = decls 738 739 # The declaration has been split to a decl_body sub-rule and 740 # SEMI, because having them in a single rule created a problem 741 # for defining typedefs. 742 # 743 # If a typedef line was directly followed by a line using the 744 # type defined with the typedef, the type would not be 745 # recognized. This is because to reduce the declaration rule, 746 # the parser's lookahead asked for the token after SEMI, which 747 # was the type from the next line, and the lexer had no chance 748 # to see the updated type symbol table. 749 # 750 # Splitting solves this problem, because after seeing SEMI, 751 # the parser reduces decl_body, which actually adds the new 752 # type into the table to be seen by the lexer before the next 753 # line is reached. 754 def p_declaration(self, p): 755 """ declaration : decl_body SEMI 756 """ 757 p[0] = p[1] 758 759 # Since each declaration is a list of declarations, this 760 # rule will combine all the declarations and return a single 761 # list 762 # 763 def p_declaration_list(self, p): 764 """ declaration_list : declaration 765 | declaration_list declaration 766 """ 767 p[0] = p[1] if len(p) == 2 else p[1] + p[2] 768 769 # To know when declaration-specifiers end and declarators begin, 770 # we require declaration-specifiers to have at least one 771 # type-specifier, and disallow typedef-names after we've seen any 772 # type-specifier. These are both required by the spec. 773 # 774 def p_declaration_specifiers_no_type_1(self, p): 775 """ declaration_specifiers_no_type : type_qualifier declaration_specifiers_no_type_opt 776 """ 777 p[0] = self._add_declaration_specifier(p[2], p[1], 'qual') 778 779 def p_declaration_specifiers_no_type_2(self, p): 780 """ declaration_specifiers_no_type : storage_class_specifier declaration_specifiers_no_type_opt 781 """ 782 p[0] = self._add_declaration_specifier(p[2], p[1], 'storage') 783 784 def p_declaration_specifiers_no_type_3(self, p): 785 """ declaration_specifiers_no_type : function_specifier declaration_specifiers_no_type_opt 786 """ 787 p[0] = self._add_declaration_specifier(p[2], p[1], 'function') 788 789 # Without this, `typedef _Atomic(T) U` will parse incorrectly because the 790 # _Atomic qualifier will match, instead of the specifier. 791 def p_declaration_specifiers_no_type_4(self, p): 792 """ declaration_specifiers_no_type : atomic_specifier declaration_specifiers_no_type_opt 793 """ 794 p[0] = self._add_declaration_specifier(p[2], p[1], 'type') 795 796 def p_declaration_specifiers_no_type_5(self, p): 797 """ declaration_specifiers_no_type : alignment_specifier declaration_specifiers_no_type_opt 798 """ 799 p[0] = self._add_declaration_specifier(p[2], p[1], 'alignment') 800 801 def p_declaration_specifiers_1(self, p): 802 """ declaration_specifiers : declaration_specifiers type_qualifier 803 """ 804 p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True) 805 806 def p_declaration_specifiers_2(self, p): 807 """ declaration_specifiers : declaration_specifiers storage_class_specifier 808 """ 809 p[0] = self._add_declaration_specifier(p[1], p[2], 'storage', append=True) 810 811 def p_declaration_specifiers_3(self, p): 812 """ declaration_specifiers : declaration_specifiers function_specifier 813 """ 814 p[0] = self._add_declaration_specifier(p[1], p[2], 'function', append=True) 815 816 def p_declaration_specifiers_4(self, p): 817 """ declaration_specifiers : declaration_specifiers type_specifier_no_typeid 818 """ 819 p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True) 820 821 def p_declaration_specifiers_5(self, p): 822 """ declaration_specifiers : type_specifier 823 """ 824 p[0] = self._add_declaration_specifier(None, p[1], 'type') 825 826 def p_declaration_specifiers_6(self, p): 827 """ declaration_specifiers : declaration_specifiers_no_type type_specifier 828 """ 829 p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True) 830 831 def p_declaration_specifiers_7(self, p): 832 """ declaration_specifiers : declaration_specifiers alignment_specifier 833 """ 834 p[0] = self._add_declaration_specifier(p[1], p[2], 'alignment', append=True) 835 836 def p_storage_class_specifier(self, p): 837 """ storage_class_specifier : AUTO 838 | REGISTER 839 | STATIC 840 | EXTERN 841 | TYPEDEF 842 | _THREAD_LOCAL 843 """ 844 p[0] = p[1] 845 846 def p_function_specifier(self, p): 847 """ function_specifier : INLINE 848 | _NORETURN 849 """ 850 p[0] = p[1] 851 852 def p_type_specifier_no_typeid(self, p): 853 """ type_specifier_no_typeid : VOID 854 | _BOOL 855 | CHAR 856 | SHORT 857 | INT 858 | LONG 859 | FLOAT 860 | DOUBLE 861 | _COMPLEX 862 | SIGNED 863 | UNSIGNED 864 | __INT128 865 """ 866 p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1)) 867 868 def p_type_specifier(self, p): 869 """ type_specifier : typedef_name 870 | enum_specifier 871 | struct_or_union_specifier 872 | type_specifier_no_typeid 873 | atomic_specifier 874 """ 875 p[0] = p[1] 876 877 # See section 6.7.2.4 of the C11 standard. 878 def p_atomic_specifier(self, p): 879 """ atomic_specifier : _ATOMIC LPAREN type_name RPAREN 880 """ 881 typ = p[3] 882 typ.quals.append('_Atomic') 883 p[0] = typ 884 885 def p_type_qualifier(self, p): 886 """ type_qualifier : CONST 887 | RESTRICT 888 | VOLATILE 889 | _ATOMIC 890 """ 891 p[0] = p[1] 892 893 def p_init_declarator_list(self, p): 894 """ init_declarator_list : init_declarator 895 | init_declarator_list COMMA init_declarator 896 """ 897 p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]] 898 899 # Returns a {decl=<declarator> : init=<initializer>} dictionary 900 # If there's no initializer, uses None 901 # 902 def p_init_declarator(self, p): 903 """ init_declarator : declarator 904 | declarator EQUALS initializer 905 """ 906 p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None)) 907 908 def p_id_init_declarator_list(self, p): 909 """ id_init_declarator_list : id_init_declarator 910 | id_init_declarator_list COMMA init_declarator 911 """ 912 p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]] 913 914 def p_id_init_declarator(self, p): 915 """ id_init_declarator : id_declarator 916 | id_declarator EQUALS initializer 917 """ 918 p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None)) 919 920 # Require at least one type specifier in a specifier-qualifier-list 921 # 922 def p_specifier_qualifier_list_1(self, p): 923 """ specifier_qualifier_list : specifier_qualifier_list type_specifier_no_typeid 924 """ 925 p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True) 926 927 def p_specifier_qualifier_list_2(self, p): 928 """ specifier_qualifier_list : specifier_qualifier_list type_qualifier 929 """ 930 p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True) 931 932 def p_specifier_qualifier_list_3(self, p): 933 """ specifier_qualifier_list : type_specifier 934 """ 935 p[0] = self._add_declaration_specifier(None, p[1], 'type') 936 937 def p_specifier_qualifier_list_4(self, p): 938 """ specifier_qualifier_list : type_qualifier_list type_specifier 939 """ 940 p[0] = dict(qual=p[1], alignment=[], storage=[], type=[p[2]], function=[]) 941 942 def p_specifier_qualifier_list_5(self, p): 943 """ specifier_qualifier_list : alignment_specifier 944 """ 945 p[0] = dict(qual=[], alignment=[p[1]], storage=[], type=[], function=[]) 946 947 def p_specifier_qualifier_list_6(self, p): 948 """ specifier_qualifier_list : specifier_qualifier_list alignment_specifier 949 """ 950 p[0] = self._add_declaration_specifier(p[1], p[2], 'alignment') 951 952 # TYPEID is allowed here (and in other struct/enum related tag names), because 953 # struct/enum tags reside in their own namespace and can be named the same as types 954 # 955 def p_struct_or_union_specifier_1(self, p): 956 """ struct_or_union_specifier : struct_or_union ID 957 | struct_or_union TYPEID 958 """ 959 klass = self._select_struct_union_class(p[1]) 960 # None means no list of members 961 p[0] = klass( 962 name=p[2], 963 decls=None, 964 coord=self._token_coord(p, 2)) 965 966 def p_struct_or_union_specifier_2(self, p): 967 """ struct_or_union_specifier : struct_or_union brace_open struct_declaration_list brace_close 968 | struct_or_union brace_open brace_close 969 """ 970 klass = self._select_struct_union_class(p[1]) 971 if len(p) == 4: 972 # Empty sequence means an empty list of members 973 p[0] = klass( 974 name=None, 975 decls=[], 976 coord=self._token_coord(p, 2)) 977 else: 978 p[0] = klass( 979 name=None, 980 decls=p[3], 981 coord=self._token_coord(p, 2)) 982 983 984 def p_struct_or_union_specifier_3(self, p): 985 """ struct_or_union_specifier : struct_or_union ID brace_open struct_declaration_list brace_close 986 | struct_or_union ID brace_open brace_close 987 | struct_or_union TYPEID brace_open struct_declaration_list brace_close 988 | struct_or_union TYPEID brace_open brace_close 989 """ 990 klass = self._select_struct_union_class(p[1]) 991 if len(p) == 5: 992 # Empty sequence means an empty list of members 993 p[0] = klass( 994 name=p[2], 995 decls=[], 996 coord=self._token_coord(p, 2)) 997 else: 998 p[0] = klass( 999 name=p[2], 1000 decls=p[4], 1001 coord=self._token_coord(p, 2)) 1002 1003 def p_struct_or_union(self, p): 1004 """ struct_or_union : STRUCT 1005 | UNION 1006 """ 1007 p[0] = p[1] 1008 1009 # Combine all declarations into a single list 1010 # 1011 def p_struct_declaration_list(self, p): 1012 """ struct_declaration_list : struct_declaration 1013 | struct_declaration_list struct_declaration 1014 """ 1015 if len(p) == 2: 1016 p[0] = p[1] or [] 1017 else: 1018 p[0] = p[1] + (p[2] or []) 1019 1020 def p_struct_declaration_1(self, p): 1021 """ struct_declaration : specifier_qualifier_list struct_declarator_list_opt SEMI 1022 """ 1023 spec = p[1] 1024 assert 'typedef' not in spec['storage'] 1025 1026 if p[2] is not None: 1027 decls = self._build_declarations( 1028 spec=spec, 1029 decls=p[2]) 1030 1031 elif len(spec['type']) == 1: 1032 # Anonymous struct/union, gcc extension, C1x feature. 1033 # Although the standard only allows structs/unions here, I see no 1034 # reason to disallow other types since some compilers have typedefs 1035 # here, and pycparser isn't about rejecting all invalid code. 1036 # 1037 node = spec['type'][0] 1038 if isinstance(node, c_ast.Node): 1039 decl_type = node 1040 else: 1041 decl_type = c_ast.IdentifierType(node) 1042 1043 decls = self._build_declarations( 1044 spec=spec, 1045 decls=[dict(decl=decl_type)]) 1046 1047 else: 1048 # Structure/union members can have the same names as typedefs. 1049 # The trouble is that the member's name gets grouped into 1050 # specifier_qualifier_list; _build_declarations compensates. 1051 # 1052 decls = self._build_declarations( 1053 spec=spec, 1054 decls=[dict(decl=None, init=None)]) 1055 1056 p[0] = decls 1057 1058 def p_struct_declaration_2(self, p): 1059 """ struct_declaration : SEMI 1060 """ 1061 p[0] = None 1062 1063 def p_struct_declaration_3(self, p): 1064 """ struct_declaration : pppragma_directive 1065 """ 1066 p[0] = [p[1]] 1067 1068 def p_struct_declarator_list(self, p): 1069 """ struct_declarator_list : struct_declarator 1070 | struct_declarator_list COMMA struct_declarator 1071 """ 1072 p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]] 1073 1074 # struct_declarator passes up a dict with the keys: decl (for 1075 # the underlying declarator) and bitsize (for the bitsize) 1076 # 1077 def p_struct_declarator_1(self, p): 1078 """ struct_declarator : declarator 1079 """ 1080 p[0] = {'decl': p[1], 'bitsize': None} 1081 1082 def p_struct_declarator_2(self, p): 1083 """ struct_declarator : declarator COLON constant_expression 1084 | COLON constant_expression 1085 """ 1086 if len(p) > 3: 1087 p[0] = {'decl': p[1], 'bitsize': p[3]} 1088 else: 1089 p[0] = {'decl': c_ast.TypeDecl(None, None, None, None), 'bitsize': p[2]} 1090 1091 def p_enum_specifier_1(self, p): 1092 """ enum_specifier : ENUM ID 1093 | ENUM TYPEID 1094 """ 1095 p[0] = c_ast.Enum(p[2], None, self._token_coord(p, 1)) 1096 1097 def p_enum_specifier_2(self, p): 1098 """ enum_specifier : ENUM brace_open enumerator_list brace_close 1099 """ 1100 p[0] = c_ast.Enum(None, p[3], self._token_coord(p, 1)) 1101 1102 def p_enum_specifier_3(self, p): 1103 """ enum_specifier : ENUM ID brace_open enumerator_list brace_close 1104 | ENUM TYPEID brace_open enumerator_list brace_close 1105 """ 1106 p[0] = c_ast.Enum(p[2], p[4], self._token_coord(p, 1)) 1107 1108 def p_enumerator_list(self, p): 1109 """ enumerator_list : enumerator 1110 | enumerator_list COMMA 1111 | enumerator_list COMMA enumerator 1112 """ 1113 if len(p) == 2: 1114 p[0] = c_ast.EnumeratorList([p[1]], p[1].coord) 1115 elif len(p) == 3: 1116 p[0] = p[1] 1117 else: 1118 p[1].enumerators.append(p[3]) 1119 p[0] = p[1] 1120 1121 def p_alignment_specifier(self, p): 1122 """ alignment_specifier : _ALIGNAS LPAREN type_name RPAREN 1123 | _ALIGNAS LPAREN constant_expression RPAREN 1124 """ 1125 p[0] = c_ast.Alignas(p[3], self._token_coord(p, 1)) 1126 1127 def p_enumerator(self, p): 1128 """ enumerator : ID 1129 | ID EQUALS constant_expression 1130 """ 1131 if len(p) == 2: 1132 enumerator = c_ast.Enumerator( 1133 p[1], None, 1134 self._token_coord(p, 1)) 1135 else: 1136 enumerator = c_ast.Enumerator( 1137 p[1], p[3], 1138 self._token_coord(p, 1)) 1139 self._add_identifier(enumerator.name, enumerator.coord) 1140 1141 p[0] = enumerator 1142 1143 def p_declarator(self, p): 1144 """ declarator : id_declarator 1145 | typeid_declarator 1146 """ 1147 p[0] = p[1] 1148 1149 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID')) 1150 def p_xxx_declarator_1(self, p): 1151 """ xxx_declarator : direct_xxx_declarator 1152 """ 1153 p[0] = p[1] 1154 1155 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID')) 1156 def p_xxx_declarator_2(self, p): 1157 """ xxx_declarator : pointer direct_xxx_declarator 1158 """ 1159 p[0] = self._type_modify_decl(p[2], p[1]) 1160 1161 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID')) 1162 def p_direct_xxx_declarator_1(self, p): 1163 """ direct_xxx_declarator : yyy 1164 """ 1165 p[0] = c_ast.TypeDecl( 1166 declname=p[1], 1167 type=None, 1168 quals=None, 1169 align=None, 1170 coord=self._token_coord(p, 1)) 1171 1172 @parameterized(('id', 'ID'), ('typeid', 'TYPEID')) 1173 def p_direct_xxx_declarator_2(self, p): 1174 """ direct_xxx_declarator : LPAREN xxx_declarator RPAREN 1175 """ 1176 p[0] = p[2] 1177 1178 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID')) 1179 def p_direct_xxx_declarator_3(self, p): 1180 """ direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET 1181 """ 1182 quals = (p[3] if len(p) > 5 else []) or [] 1183 # Accept dimension qualifiers 1184 # Per C99 6.7.5.3 p7 1185 arr = c_ast.ArrayDecl( 1186 type=None, 1187 dim=p[4] if len(p) > 5 else p[3], 1188 dim_quals=quals, 1189 coord=p[1].coord) 1190 1191 p[0] = self._type_modify_decl(decl=p[1], modifier=arr) 1192 1193 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID')) 1194 def p_direct_xxx_declarator_4(self, p): 1195 """ direct_xxx_declarator : direct_xxx_declarator LBRACKET STATIC type_qualifier_list_opt assignment_expression RBRACKET 1196 | direct_xxx_declarator LBRACKET type_qualifier_list STATIC assignment_expression RBRACKET 1197 """ 1198 # Using slice notation for PLY objects doesn't work in Python 3 for the 1199 # version of PLY embedded with pycparser; see PLY Google Code issue 30. 1200 # Work around that here by listing the two elements separately. 1201 listed_quals = [item if isinstance(item, list) else [item] 1202 for item in [p[3],p[4]]] 1203 dim_quals = [qual for sublist in listed_quals for qual in sublist 1204 if qual is not None] 1205 arr = c_ast.ArrayDecl( 1206 type=None, 1207 dim=p[5], 1208 dim_quals=dim_quals, 1209 coord=p[1].coord) 1210 1211 p[0] = self._type_modify_decl(decl=p[1], modifier=arr) 1212 1213 # Special for VLAs 1214 # 1215 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID')) 1216 def p_direct_xxx_declarator_5(self, p): 1217 """ direct_xxx_declarator : direct_xxx_declarator LBRACKET type_qualifier_list_opt TIMES RBRACKET 1218 """ 1219 arr = c_ast.ArrayDecl( 1220 type=None, 1221 dim=c_ast.ID(p[4], self._token_coord(p, 4)), 1222 dim_quals=p[3] if p[3] is not None else [], 1223 coord=p[1].coord) 1224 1225 p[0] = self._type_modify_decl(decl=p[1], modifier=arr) 1226 1227 @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID')) 1228 def p_direct_xxx_declarator_6(self, p): 1229 """ direct_xxx_declarator : direct_xxx_declarator LPAREN parameter_type_list RPAREN 1230 | direct_xxx_declarator LPAREN identifier_list_opt RPAREN 1231 """ 1232 func = c_ast.FuncDecl( 1233 args=p[3], 1234 type=None, 1235 coord=p[1].coord) 1236 1237 # To see why _get_yacc_lookahead_token is needed, consider: 1238 # typedef char TT; 1239 # void foo(int TT) { TT = 10; } 1240 # Outside the function, TT is a typedef, but inside (starting and 1241 # ending with the braces) it's a parameter. The trouble begins with 1242 # yacc's lookahead token. We don't know if we're declaring or 1243 # defining a function until we see LBRACE, but if we wait for yacc to 1244 # trigger a rule on that token, then TT will have already been read 1245 # and incorrectly interpreted as TYPEID. We need to add the 1246 # parameters to the scope the moment the lexer sees LBRACE. 1247 # 1248 if self._get_yacc_lookahead_token().type == "LBRACE": 1249 if func.args is not None: 1250 for param in func.args.params: 1251 if isinstance(param, c_ast.EllipsisParam): break 1252 self._add_identifier(param.name, param.coord) 1253 1254 p[0] = self._type_modify_decl(decl=p[1], modifier=func) 1255 1256 def p_pointer(self, p): 1257 """ pointer : TIMES type_qualifier_list_opt 1258 | TIMES type_qualifier_list_opt pointer 1259 """ 1260 coord = self._token_coord(p, 1) 1261 # Pointer decls nest from inside out. This is important when different 1262 # levels have different qualifiers. For example: 1263 # 1264 # char * const * p; 1265 # 1266 # Means "pointer to const pointer to char" 1267 # 1268 # While: 1269 # 1270 # char ** const p; 1271 # 1272 # Means "const pointer to pointer to char" 1273 # 1274 # So when we construct PtrDecl nestings, the leftmost pointer goes in 1275 # as the most nested type. 1276 nested_type = c_ast.PtrDecl(quals=p[2] or [], type=None, coord=coord) 1277 if len(p) > 3: 1278 tail_type = p[3] 1279 while tail_type.type is not None: 1280 tail_type = tail_type.type 1281 tail_type.type = nested_type 1282 p[0] = p[3] 1283 else: 1284 p[0] = nested_type 1285 1286 def p_type_qualifier_list(self, p): 1287 """ type_qualifier_list : type_qualifier 1288 | type_qualifier_list type_qualifier 1289 """ 1290 p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]] 1291 1292 def p_parameter_type_list(self, p): 1293 """ parameter_type_list : parameter_list 1294 | parameter_list COMMA ELLIPSIS 1295 """ 1296 if len(p) > 2: 1297 p[1].params.append(c_ast.EllipsisParam(self._token_coord(p, 3))) 1298 1299 p[0] = p[1] 1300 1301 def p_parameter_list(self, p): 1302 """ parameter_list : parameter_declaration 1303 | parameter_list COMMA parameter_declaration 1304 """ 1305 if len(p) == 2: # single parameter 1306 p[0] = c_ast.ParamList([p[1]], p[1].coord) 1307 else: 1308 p[1].params.append(p[3]) 1309 p[0] = p[1] 1310 1311 # From ISO/IEC 9899:TC2, 6.7.5.3.11: 1312 # "If, in a parameter declaration, an identifier can be treated either 1313 # as a typedef name or as a parameter name, it shall be taken as a 1314 # typedef name." 1315 # 1316 # Inside a parameter declaration, once we've reduced declaration specifiers, 1317 # if we shift in an LPAREN and see a TYPEID, it could be either an abstract 1318 # declarator or a declarator nested inside parens. This rule tells us to 1319 # always treat it as an abstract declarator. Therefore, we only accept 1320 # `id_declarator`s and `typeid_noparen_declarator`s. 1321 def p_parameter_declaration_1(self, p): 1322 """ parameter_declaration : declaration_specifiers id_declarator 1323 | declaration_specifiers typeid_noparen_declarator 1324 """ 1325 spec = p[1] 1326 if not spec['type']: 1327 spec['type'] = [c_ast.IdentifierType(['int'], 1328 coord=self._token_coord(p, 1))] 1329 p[0] = self._build_declarations( 1330 spec=spec, 1331 decls=[dict(decl=p[2])])[0] 1332 1333 def p_parameter_declaration_2(self, p): 1334 """ parameter_declaration : declaration_specifiers abstract_declarator_opt 1335 """ 1336 spec = p[1] 1337 if not spec['type']: 1338 spec['type'] = [c_ast.IdentifierType(['int'], 1339 coord=self._token_coord(p, 1))] 1340 1341 # Parameters can have the same names as typedefs. The trouble is that 1342 # the parameter's name gets grouped into declaration_specifiers, making 1343 # it look like an old-style declaration; compensate. 1344 # 1345 if len(spec['type']) > 1 and len(spec['type'][-1].names) == 1 and \ 1346 self._is_type_in_scope(spec['type'][-1].names[0]): 1347 decl = self._build_declarations( 1348 spec=spec, 1349 decls=[dict(decl=p[2], init=None)])[0] 1350 1351 # This truly is an old-style parameter declaration 1352 # 1353 else: 1354 decl = c_ast.Typename( 1355 name='', 1356 quals=spec['qual'], 1357 align=None, 1358 type=p[2] or c_ast.TypeDecl(None, None, None, None), 1359 coord=self._token_coord(p, 2)) 1360 typename = spec['type'] 1361 decl = self._fix_decl_name_type(decl, typename) 1362 1363 p[0] = decl 1364 1365 def p_identifier_list(self, p): 1366 """ identifier_list : identifier 1367 | identifier_list COMMA identifier 1368 """ 1369 if len(p) == 2: # single parameter 1370 p[0] = c_ast.ParamList([p[1]], p[1].coord) 1371 else: 1372 p[1].params.append(p[3]) 1373 p[0] = p[1] 1374 1375 def p_initializer_1(self, p): 1376 """ initializer : assignment_expression 1377 """ 1378 p[0] = p[1] 1379 1380 def p_initializer_2(self, p): 1381 """ initializer : brace_open initializer_list_opt brace_close 1382 | brace_open initializer_list COMMA brace_close 1383 """ 1384 if p[2] is None: 1385 p[0] = c_ast.InitList([], self._token_coord(p, 1)) 1386 else: 1387 p[0] = p[2] 1388 1389 def p_initializer_list(self, p): 1390 """ initializer_list : designation_opt initializer 1391 | initializer_list COMMA designation_opt initializer 1392 """ 1393 if len(p) == 3: # single initializer 1394 init = p[2] if p[1] is None else c_ast.NamedInitializer(p[1], p[2]) 1395 p[0] = c_ast.InitList([init], p[2].coord) 1396 else: 1397 init = p[4] if p[3] is None else c_ast.NamedInitializer(p[3], p[4]) 1398 p[1].exprs.append(init) 1399 p[0] = p[1] 1400 1401 def p_designation(self, p): 1402 """ designation : designator_list EQUALS 1403 """ 1404 p[0] = p[1] 1405 1406 # Designators are represented as a list of nodes, in the order in which 1407 # they're written in the code. 1408 # 1409 def p_designator_list(self, p): 1410 """ designator_list : designator 1411 | designator_list designator 1412 """ 1413 p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]] 1414 1415 def p_designator(self, p): 1416 """ designator : LBRACKET constant_expression RBRACKET 1417 | PERIOD identifier 1418 """ 1419 p[0] = p[2] 1420 1421 def p_type_name(self, p): 1422 """ type_name : specifier_qualifier_list abstract_declarator_opt 1423 """ 1424 typename = c_ast.Typename( 1425 name='', 1426 quals=p[1]['qual'][:], 1427 align=None, 1428 type=p[2] or c_ast.TypeDecl(None, None, None, None), 1429 coord=self._token_coord(p, 2)) 1430 1431 p[0] = self._fix_decl_name_type(typename, p[1]['type']) 1432 1433 def p_abstract_declarator_1(self, p): 1434 """ abstract_declarator : pointer 1435 """ 1436 dummytype = c_ast.TypeDecl(None, None, None, None) 1437 p[0] = self._type_modify_decl( 1438 decl=dummytype, 1439 modifier=p[1]) 1440 1441 def p_abstract_declarator_2(self, p): 1442 """ abstract_declarator : pointer direct_abstract_declarator 1443 """ 1444 p[0] = self._type_modify_decl(p[2], p[1]) 1445 1446 def p_abstract_declarator_3(self, p): 1447 """ abstract_declarator : direct_abstract_declarator 1448 """ 1449 p[0] = p[1] 1450 1451 # Creating and using direct_abstract_declarator_opt here 1452 # instead of listing both direct_abstract_declarator and the 1453 # lack of it in the beginning of _1 and _2 caused two 1454 # shift/reduce errors. 1455 # 1456 def p_direct_abstract_declarator_1(self, p): 1457 """ direct_abstract_declarator : LPAREN abstract_declarator RPAREN """ 1458 p[0] = p[2] 1459 1460 def p_direct_abstract_declarator_2(self, p): 1461 """ direct_abstract_declarator : direct_abstract_declarator LBRACKET assignment_expression_opt RBRACKET 1462 """ 1463 arr = c_ast.ArrayDecl( 1464 type=None, 1465 dim=p[3], 1466 dim_quals=[], 1467 coord=p[1].coord) 1468 1469 p[0] = self._type_modify_decl(decl=p[1], modifier=arr) 1470 1471 def p_direct_abstract_declarator_3(self, p): 1472 """ direct_abstract_declarator : LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET 1473 """ 1474 quals = (p[2] if len(p) > 4 else []) or [] 1475 p[0] = c_ast.ArrayDecl( 1476 type=c_ast.TypeDecl(None, None, None, None), 1477 dim=p[3] if len(p) > 4 else p[2], 1478 dim_quals=quals, 1479 coord=self._token_coord(p, 1)) 1480 1481 def p_direct_abstract_declarator_4(self, p): 1482 """ direct_abstract_declarator : direct_abstract_declarator LBRACKET TIMES RBRACKET 1483 """ 1484 arr = c_ast.ArrayDecl( 1485 type=None, 1486 dim=c_ast.ID(p[3], self._token_coord(p, 3)), 1487 dim_quals=[], 1488 coord=p[1].coord) 1489 1490 p[0] = self._type_modify_decl(decl=p[1], modifier=arr) 1491 1492 def p_direct_abstract_declarator_5(self, p): 1493 """ direct_abstract_declarator : LBRACKET TIMES RBRACKET 1494 """ 1495 p[0] = c_ast.ArrayDecl( 1496 type=c_ast.TypeDecl(None, None, None, None), 1497 dim=c_ast.ID(p[3], self._token_coord(p, 3)), 1498 dim_quals=[], 1499 coord=self._token_coord(p, 1)) 1500 1501 def p_direct_abstract_declarator_6(self, p): 1502 """ direct_abstract_declarator : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN 1503 """ 1504 func = c_ast.FuncDecl( 1505 args=p[3], 1506 type=None, 1507 coord=p[1].coord) 1508 1509 p[0] = self._type_modify_decl(decl=p[1], modifier=func) 1510 1511 def p_direct_abstract_declarator_7(self, p): 1512 """ direct_abstract_declarator : LPAREN parameter_type_list_opt RPAREN 1513 """ 1514 p[0] = c_ast.FuncDecl( 1515 args=p[2], 1516 type=c_ast.TypeDecl(None, None, None, None), 1517 coord=self._token_coord(p, 1)) 1518 1519 # declaration is a list, statement isn't. To make it consistent, block_item 1520 # will always be a list 1521 # 1522 def p_block_item(self, p): 1523 """ block_item : declaration 1524 | statement 1525 """ 1526 p[0] = p[1] if isinstance(p[1], list) else [p[1]] 1527 1528 # Since we made block_item a list, this just combines lists 1529 # 1530 def p_block_item_list(self, p): 1531 """ block_item_list : block_item 1532 | block_item_list block_item 1533 """ 1534 # Empty block items (plain ';') produce [None], so ignore them 1535 p[0] = p[1] if (len(p) == 2 or p[2] == [None]) else p[1] + p[2] 1536 1537 def p_compound_statement_1(self, p): 1538 """ compound_statement : brace_open block_item_list_opt brace_close """ 1539 p[0] = c_ast.Compound( 1540 block_items=p[2], 1541 coord=self._token_coord(p, 1)) 1542 1543 def p_labeled_statement_1(self, p): 1544 """ labeled_statement : ID COLON pragmacomp_or_statement """ 1545 p[0] = c_ast.Label(p[1], p[3], self._token_coord(p, 1)) 1546 1547 def p_labeled_statement_2(self, p): 1548 """ labeled_statement : CASE constant_expression COLON pragmacomp_or_statement """ 1549 p[0] = c_ast.Case(p[2], [p[4]], self._token_coord(p, 1)) 1550 1551 def p_labeled_statement_3(self, p): 1552 """ labeled_statement : DEFAULT COLON pragmacomp_or_statement """ 1553 p[0] = c_ast.Default([p[3]], self._token_coord(p, 1)) 1554 1555 def p_selection_statement_1(self, p): 1556 """ selection_statement : IF LPAREN expression RPAREN pragmacomp_or_statement """ 1557 p[0] = c_ast.If(p[3], p[5], None, self._token_coord(p, 1)) 1558 1559 def p_selection_statement_2(self, p): 1560 """ selection_statement : IF LPAREN expression RPAREN statement ELSE pragmacomp_or_statement """ 1561 p[0] = c_ast.If(p[3], p[5], p[7], self._token_coord(p, 1)) 1562 1563 def p_selection_statement_3(self, p): 1564 """ selection_statement : SWITCH LPAREN expression RPAREN pragmacomp_or_statement """ 1565 p[0] = fix_switch_cases( 1566 c_ast.Switch(p[3], p[5], self._token_coord(p, 1))) 1567 1568 def p_iteration_statement_1(self, p): 1569 """ iteration_statement : WHILE LPAREN expression RPAREN pragmacomp_or_statement """ 1570 p[0] = c_ast.While(p[3], p[5], self._token_coord(p, 1)) 1571 1572 def p_iteration_statement_2(self, p): 1573 """ iteration_statement : DO pragmacomp_or_statement WHILE LPAREN expression RPAREN SEMI """ 1574 p[0] = c_ast.DoWhile(p[5], p[2], self._token_coord(p, 1)) 1575 1576 def p_iteration_statement_3(self, p): 1577 """ iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """ 1578 p[0] = c_ast.For(p[3], p[5], p[7], p[9], self._token_coord(p, 1)) 1579 1580 def p_iteration_statement_4(self, p): 1581 """ iteration_statement : FOR LPAREN declaration expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """ 1582 p[0] = c_ast.For(c_ast.DeclList(p[3], self._token_coord(p, 1)), 1583 p[4], p[6], p[8], self._token_coord(p, 1)) 1584 1585 def p_jump_statement_1(self, p): 1586 """ jump_statement : GOTO ID SEMI """ 1587 p[0] = c_ast.Goto(p[2], self._token_coord(p, 1)) 1588 1589 def p_jump_statement_2(self, p): 1590 """ jump_statement : BREAK SEMI """ 1591 p[0] = c_ast.Break(self._token_coord(p, 1)) 1592 1593 def p_jump_statement_3(self, p): 1594 """ jump_statement : CONTINUE SEMI """ 1595 p[0] = c_ast.Continue(self._token_coord(p, 1)) 1596 1597 def p_jump_statement_4(self, p): 1598 """ jump_statement : RETURN expression SEMI 1599 | RETURN SEMI 1600 """ 1601 p[0] = c_ast.Return(p[2] if len(p) == 4 else None, self._token_coord(p, 1)) 1602 1603 def p_expression_statement(self, p): 1604 """ expression_statement : expression_opt SEMI """ 1605 if p[1] is None: 1606 p[0] = c_ast.EmptyStatement(self._token_coord(p, 2)) 1607 else: 1608 p[0] = p[1] 1609 1610 def p_expression(self, p): 1611 """ expression : assignment_expression 1612 | expression COMMA assignment_expression 1613 """ 1614 if len(p) == 2: 1615 p[0] = p[1] 1616 else: 1617 if not isinstance(p[1], c_ast.ExprList): 1618 p[1] = c_ast.ExprList([p[1]], p[1].coord) 1619 1620 p[1].exprs.append(p[3]) 1621 p[0] = p[1] 1622 1623 def p_parenthesized_compound_expression(self, p): 1624 """ assignment_expression : LPAREN compound_statement RPAREN """ 1625 p[0] = p[2] 1626 1627 def p_typedef_name(self, p): 1628 """ typedef_name : TYPEID """ 1629 p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1)) 1630 1631 def p_assignment_expression(self, p): 1632 """ assignment_expression : conditional_expression 1633 | unary_expression assignment_operator assignment_expression 1634 """ 1635 if len(p) == 2: 1636 p[0] = p[1] 1637 else: 1638 p[0] = c_ast.Assignment(p[2], p[1], p[3], p[1].coord) 1639 1640 # K&R2 defines these as many separate rules, to encode 1641 # precedence and associativity. Why work hard ? I'll just use 1642 # the built in precedence/associativity specification feature 1643 # of PLY. (see precedence declaration above) 1644 # 1645 def p_assignment_operator(self, p): 1646 """ assignment_operator : EQUALS 1647 | XOREQUAL 1648 | TIMESEQUAL 1649 | DIVEQUAL 1650 | MODEQUAL 1651 | PLUSEQUAL 1652 | MINUSEQUAL 1653 | LSHIFTEQUAL 1654 | RSHIFTEQUAL 1655 | ANDEQUAL 1656 | OREQUAL 1657 """ 1658 p[0] = p[1] 1659 1660 def p_constant_expression(self, p): 1661 """ constant_expression : conditional_expression """ 1662 p[0] = p[1] 1663 1664 def p_conditional_expression(self, p): 1665 """ conditional_expression : binary_expression 1666 | binary_expression CONDOP expression COLON conditional_expression 1667 """ 1668 if len(p) == 2: 1669 p[0] = p[1] 1670 else: 1671 p[0] = c_ast.TernaryOp(p[1], p[3], p[5], p[1].coord) 1672 1673 def p_binary_expression(self, p): 1674 """ binary_expression : cast_expression 1675 | binary_expression TIMES binary_expression 1676 | binary_expression DIVIDE binary_expression 1677 | binary_expression MOD binary_expression 1678 | binary_expression PLUS binary_expression 1679 | binary_expression MINUS binary_expression 1680 | binary_expression RSHIFT binary_expression 1681 | binary_expression LSHIFT binary_expression 1682 | binary_expression LT binary_expression 1683 | binary_expression LE binary_expression 1684 | binary_expression GE binary_expression 1685 | binary_expression GT binary_expression 1686 | binary_expression EQ binary_expression 1687 | binary_expression NE binary_expression 1688 | binary_expression AND binary_expression 1689 | binary_expression OR binary_expression 1690 | binary_expression XOR binary_expression 1691 | binary_expression LAND binary_expression 1692 | binary_expression LOR binary_expression 1693 """ 1694 if len(p) == 2: 1695 p[0] = p[1] 1696 else: 1697 p[0] = c_ast.BinaryOp(p[2], p[1], p[3], p[1].coord) 1698 1699 def p_cast_expression_1(self, p): 1700 """ cast_expression : unary_expression """ 1701 p[0] = p[1] 1702 1703 def p_cast_expression_2(self, p): 1704 """ cast_expression : LPAREN type_name RPAREN cast_expression """ 1705 p[0] = c_ast.Cast(p[2], p[4], self._token_coord(p, 1)) 1706 1707 def p_unary_expression_1(self, p): 1708 """ unary_expression : postfix_expression """ 1709 p[0] = p[1] 1710 1711 def p_unary_expression_2(self, p): 1712 """ unary_expression : PLUSPLUS unary_expression 1713 | MINUSMINUS unary_expression 1714 | unary_operator cast_expression 1715 """ 1716 p[0] = c_ast.UnaryOp(p[1], p[2], p[2].coord) 1717 1718 def p_unary_expression_3(self, p): 1719 """ unary_expression : SIZEOF unary_expression 1720 | SIZEOF LPAREN type_name RPAREN 1721 | _ALIGNOF LPAREN type_name RPAREN 1722 """ 1723 p[0] = c_ast.UnaryOp( 1724 p[1], 1725 p[2] if len(p) == 3 else p[3], 1726 self._token_coord(p, 1)) 1727 1728 def p_unary_operator(self, p): 1729 """ unary_operator : AND 1730 | TIMES 1731 | PLUS 1732 | MINUS 1733 | NOT 1734 | LNOT 1735 """ 1736 p[0] = p[1] 1737 1738 def p_postfix_expression_1(self, p): 1739 """ postfix_expression : primary_expression """ 1740 p[0] = p[1] 1741 1742 def p_postfix_expression_2(self, p): 1743 """ postfix_expression : postfix_expression LBRACKET expression RBRACKET """ 1744 p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord) 1745 1746 def p_postfix_expression_3(self, p): 1747 """ postfix_expression : postfix_expression LPAREN argument_expression_list RPAREN 1748 | postfix_expression LPAREN RPAREN 1749 """ 1750 p[0] = c_ast.FuncCall(p[1], p[3] if len(p) == 5 else None, p[1].coord) 1751 1752 def p_postfix_expression_4(self, p): 1753 """ postfix_expression : postfix_expression PERIOD ID 1754 | postfix_expression PERIOD TYPEID 1755 | postfix_expression ARROW ID 1756 | postfix_expression ARROW TYPEID 1757 """ 1758 field = c_ast.ID(p[3], self._token_coord(p, 3)) 1759 p[0] = c_ast.StructRef(p[1], p[2], field, p[1].coord) 1760 1761 def p_postfix_expression_5(self, p): 1762 """ postfix_expression : postfix_expression PLUSPLUS 1763 | postfix_expression MINUSMINUS 1764 """ 1765 p[0] = c_ast.UnaryOp('p' + p[2], p[1], p[1].coord) 1766 1767 def p_postfix_expression_6(self, p): 1768 """ postfix_expression : LPAREN type_name RPAREN brace_open initializer_list brace_close 1769 | LPAREN type_name RPAREN brace_open initializer_list COMMA brace_close 1770 """ 1771 p[0] = c_ast.CompoundLiteral(p[2], p[5]) 1772 1773 def p_primary_expression_1(self, p): 1774 """ primary_expression : identifier """ 1775 p[0] = p[1] 1776 1777 def p_primary_expression_2(self, p): 1778 """ primary_expression : constant """ 1779 p[0] = p[1] 1780 1781 def p_primary_expression_3(self, p): 1782 """ primary_expression : unified_string_literal 1783 | unified_wstring_literal 1784 """ 1785 p[0] = p[1] 1786 1787 def p_primary_expression_4(self, p): 1788 """ primary_expression : LPAREN expression RPAREN """ 1789 p[0] = p[2] 1790 1791 def p_primary_expression_5(self, p): 1792 """ primary_expression : OFFSETOF LPAREN type_name COMMA offsetof_member_designator RPAREN 1793 """ 1794 coord = self._token_coord(p, 1) 1795 p[0] = c_ast.FuncCall(c_ast.ID(p[1], coord), 1796 c_ast.ExprList([p[3], p[5]], coord), 1797 coord) 1798 1799 def p_offsetof_member_designator(self, p): 1800 """ offsetof_member_designator : identifier 1801 | offsetof_member_designator PERIOD identifier 1802 | offsetof_member_designator LBRACKET expression RBRACKET 1803 """ 1804 if len(p) == 2: 1805 p[0] = p[1] 1806 elif len(p) == 4: 1807 p[0] = c_ast.StructRef(p[1], p[2], p[3], p[1].coord) 1808 elif len(p) == 5: 1809 p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord) 1810 else: 1811 raise NotImplementedError("Unexpected parsing state. len(p): %u" % len(p)) 1812 1813 def p_argument_expression_list(self, p): 1814 """ argument_expression_list : assignment_expression 1815 | argument_expression_list COMMA assignment_expression 1816 """ 1817 if len(p) == 2: # single expr 1818 p[0] = c_ast.ExprList([p[1]], p[1].coord) 1819 else: 1820 p[1].exprs.append(p[3]) 1821 p[0] = p[1] 1822 1823 def p_identifier(self, p): 1824 """ identifier : ID """ 1825 p[0] = c_ast.ID(p[1], self._token_coord(p, 1)) 1826 1827 def p_constant_1(self, p): 1828 """ constant : INT_CONST_DEC 1829 | INT_CONST_OCT 1830 | INT_CONST_HEX 1831 | INT_CONST_BIN 1832 | INT_CONST_CHAR 1833 """ 1834 uCount = 0 1835 lCount = 0 1836 for x in p[1][-3:]: 1837 if x in ('l', 'L'): 1838 lCount += 1 1839 elif x in ('u', 'U'): 1840 uCount += 1 1841 t = '' 1842 if uCount > 1: 1843 raise ValueError('Constant cannot have more than one u/U suffix.') 1844 elif lCount > 2: 1845 raise ValueError('Constant cannot have more than two l/L suffix.') 1846 prefix = 'unsigned ' * uCount + 'long ' * lCount 1847 p[0] = c_ast.Constant( 1848 prefix + 'int', p[1], self._token_coord(p, 1)) 1849 1850 def p_constant_2(self, p): 1851 """ constant : FLOAT_CONST 1852 | HEX_FLOAT_CONST 1853 """ 1854 if 'x' in p[1].lower(): 1855 t = 'float' 1856 else: 1857 if p[1][-1] in ('f', 'F'): 1858 t = 'float' 1859 elif p[1][-1] in ('l', 'L'): 1860 t = 'long double' 1861 else: 1862 t = 'double' 1863 1864 p[0] = c_ast.Constant( 1865 t, p[1], self._token_coord(p, 1)) 1866 1867 def p_constant_3(self, p): 1868 """ constant : CHAR_CONST 1869 | WCHAR_CONST 1870 | U8CHAR_CONST 1871 | U16CHAR_CONST 1872 | U32CHAR_CONST 1873 """ 1874 p[0] = c_ast.Constant( 1875 'char', p[1], self._token_coord(p, 1)) 1876 1877 # The "unified" string and wstring literal rules are for supporting 1878 # concatenation of adjacent string literals. 1879 # I.e. "hello " "world" is seen by the C compiler as a single string literal 1880 # with the value "hello world" 1881 # 1882 def p_unified_string_literal(self, p): 1883 """ unified_string_literal : STRING_LITERAL 1884 | unified_string_literal STRING_LITERAL 1885 """ 1886 if len(p) == 2: # single literal 1887 p[0] = c_ast.Constant( 1888 'string', p[1], self._token_coord(p, 1)) 1889 else: 1890 p[1].value = p[1].value[:-1] + p[2][1:] 1891 p[0] = p[1] 1892 1893 def p_unified_wstring_literal(self, p): 1894 """ unified_wstring_literal : WSTRING_LITERAL 1895 | U8STRING_LITERAL 1896 | U16STRING_LITERAL 1897 | U32STRING_LITERAL 1898 | unified_wstring_literal WSTRING_LITERAL 1899 | unified_wstring_literal U8STRING_LITERAL 1900 | unified_wstring_literal U16STRING_LITERAL 1901 | unified_wstring_literal U32STRING_LITERAL 1902 """ 1903 if len(p) == 2: # single literal 1904 p[0] = c_ast.Constant( 1905 'string', p[1], self._token_coord(p, 1)) 1906 else: 1907 p[1].value = p[1].value.rstrip()[:-1] + p[2][2:] 1908 p[0] = p[1] 1909 1910 def p_brace_open(self, p): 1911 """ brace_open : LBRACE 1912 """ 1913 p[0] = p[1] 1914 p.set_lineno(0, p.lineno(1)) 1915 1916 def p_brace_close(self, p): 1917 """ brace_close : RBRACE 1918 """ 1919 p[0] = p[1] 1920 p.set_lineno(0, p.lineno(1)) 1921 1922 def p_empty(self, p): 1923 'empty : ' 1924 p[0] = None 1925 1926 def p_error(self, p): 1927 # If error recovery is added here in the future, make sure 1928 # _get_yacc_lookahead_token still works! 1929 # 1930 if p: 1931 self._parse_error( 1932 'before: %s' % p.value, 1933 self._coord(lineno=p.lineno, 1934 column=self.clex.find_tok_column(p))) 1935 else: 1936 self._parse_error('At end of input', self.clex.filename) 1937