1#------------------------------------------------------------------------------
2# pycparser: c_parser.py
3#
4# CParser class: Parser and AST builder for the C language
5#
6# Eli Bendersky [https://eli.thegreenplace.net/]
7# License: BSD
8#------------------------------------------------------------------------------
9from .ply import yacc
10
11from . import c_ast
12from .c_lexer import CLexer
13from .plyparser import PLYParser, ParseError, parameterized, template
14from .ast_transforms import fix_switch_cases, fix_atomic_specifiers
15
16
17@template
18class CParser(PLYParser):
19    def __init__(
20            self,
21            lex_optimize=True,
22            lexer=CLexer,
23            lextab='pycparser.lextab',
24            yacc_optimize=True,
25            yacctab='pycparser.yacctab',
26            yacc_debug=False,
27            taboutputdir=''):
28        """ Create a new CParser.
29
30            Some arguments for controlling the debug/optimization
31            level of the parser are provided. The defaults are
32            tuned for release/performance mode.
33            The simple rules for using them are:
34            *) When tweaking CParser/CLexer, set these to False
35            *) When releasing a stable parser, set to True
36
37            lex_optimize:
38                Set to False when you're modifying the lexer.
39                Otherwise, changes in the lexer won't be used, if
40                some lextab.py file exists.
41                When releasing with a stable lexer, set to True
42                to save the re-generation of the lexer table on
43                each run.
44
45            lexer:
46                Set this parameter to define the lexer to use if
47                you're not using the default CLexer.
48
49            lextab:
50                Points to the lex table that's used for optimized
51                mode. Only if you're modifying the lexer and want
52                some tests to avoid re-generating the table, make
53                this point to a local lex table file (that's been
54                earlier generated with lex_optimize=True)
55
56            yacc_optimize:
57                Set to False when you're modifying the parser.
58                Otherwise, changes in the parser won't be used, if
59                some parsetab.py file exists.
60                When releasing with a stable parser, set to True
61                to save the re-generation of the parser table on
62                each run.
63
64            yacctab:
65                Points to the yacc table that's used for optimized
66                mode. Only if you're modifying the parser, make
67                this point to a local yacc table file
68
69            yacc_debug:
70                Generate a parser.out file that explains how yacc
71                built the parsing table from the grammar.
72
73            taboutputdir:
74                Set this parameter to control the location of generated
75                lextab and yacctab files.
76        """
77        self.clex = lexer(
78            error_func=self._lex_error_func,
79            on_lbrace_func=self._lex_on_lbrace_func,
80            on_rbrace_func=self._lex_on_rbrace_func,
81            type_lookup_func=self._lex_type_lookup_func)
82
83        self.clex.build(
84            optimize=lex_optimize,
85            lextab=lextab,
86            outputdir=taboutputdir)
87        self.tokens = self.clex.tokens
88
89        rules_with_opt = [
90            'abstract_declarator',
91            'assignment_expression',
92            'declaration_list',
93            'declaration_specifiers_no_type',
94            'designation',
95            'expression',
96            'identifier_list',
97            'init_declarator_list',
98            'id_init_declarator_list',
99            'initializer_list',
100            'parameter_type_list',
101            'block_item_list',
102            'type_qualifier_list',
103            'struct_declarator_list'
104        ]
105
106        for rule in rules_with_opt:
107            self._create_opt_rule(rule)
108
109        self.cparser = yacc.yacc(
110            module=self,
111            start='translation_unit_or_empty',
112            debug=yacc_debug,
113            optimize=yacc_optimize,
114            tabmodule=yacctab,
115            outputdir=taboutputdir)
116
117        # Stack of scopes for keeping track of symbols. _scope_stack[-1] is
118        # the current (topmost) scope. Each scope is a dictionary that
119        # specifies whether a name is a type. If _scope_stack[n][name] is
120        # True, 'name' is currently a type in the scope. If it's False,
121        # 'name' is used in the scope but not as a type (for instance, if we
122        # saw: int name;
123        # If 'name' is not a key in _scope_stack[n] then 'name' was not defined
124        # in this scope at all.
125        self._scope_stack = [dict()]
126
127        # Keeps track of the last token given to yacc (the lookahead token)
128        self._last_yielded_token = None
129
130    def parse(self, text, filename='', debug=False):
131        """ Parses C code and returns an AST.
132
133            text:
134                A string containing the C source code
135
136            filename:
137                Name of the file being parsed (for meaningful
138                error messages)
139
140            debug:
141                Debug flag to YACC
142        """
143        self.clex.filename = filename
144        self.clex.reset_lineno()
145        self._scope_stack = [dict()]
146        self._last_yielded_token = None
147        return self.cparser.parse(
148                input=text,
149                lexer=self.clex,
150                debug=debug)
151
152    ######################--   PRIVATE   --######################
153
154    def _push_scope(self):
155        self._scope_stack.append(dict())
156
157    def _pop_scope(self):
158        assert len(self._scope_stack) > 1
159        self._scope_stack.pop()
160
161    def _add_typedef_name(self, name, coord):
162        """ Add a new typedef name (ie a TYPEID) to the current scope
163        """
164        if not self._scope_stack[-1].get(name, True):
165            self._parse_error(
166                "Typedef %r previously declared as non-typedef "
167                "in this scope" % name, coord)
168        self._scope_stack[-1][name] = True
169
170    def _add_identifier(self, name, coord):
171        """ Add a new object, function, or enum member name (ie an ID) to the
172            current scope
173        """
174        if self._scope_stack[-1].get(name, False):
175            self._parse_error(
176                "Non-typedef %r previously declared as typedef "
177                "in this scope" % name, coord)
178        self._scope_stack[-1][name] = False
179
180    def _is_type_in_scope(self, name):
181        """ Is *name* a typedef-name in the current scope?
182        """
183        for scope in reversed(self._scope_stack):
184            # If name is an identifier in this scope it shadows typedefs in
185            # higher scopes.
186            in_scope = scope.get(name)
187            if in_scope is not None: return in_scope
188        return False
189
190    def _lex_error_func(self, msg, line, column):
191        self._parse_error(msg, self._coord(line, column))
192
193    def _lex_on_lbrace_func(self):
194        self._push_scope()
195
196    def _lex_on_rbrace_func(self):
197        self._pop_scope()
198
199    def _lex_type_lookup_func(self, name):
200        """ Looks up types that were previously defined with
201            typedef.
202            Passed to the lexer for recognizing identifiers that
203            are types.
204        """
205        is_type = self._is_type_in_scope(name)
206        return is_type
207
208    def _get_yacc_lookahead_token(self):
209        """ We need access to yacc's lookahead token in certain cases.
210            This is the last token yacc requested from the lexer, so we
211            ask the lexer.
212        """
213        return self.clex.last_token
214
215    # To understand what's going on here, read sections A.8.5 and
216    # A.8.6 of K&R2 very carefully.
217    #
218    # A C type consists of a basic type declaration, with a list
219    # of modifiers. For example:
220    #
221    # int *c[5];
222    #
223    # The basic declaration here is 'int c', and the pointer and
224    # the array are the modifiers.
225    #
226    # Basic declarations are represented by TypeDecl (from module c_ast) and the
227    # modifiers are FuncDecl, PtrDecl and ArrayDecl.
228    #
229    # The standard states that whenever a new modifier is parsed, it should be
230    # added to the end of the list of modifiers. For example:
231    #
232    # K&R2 A.8.6.2: Array Declarators
233    #
234    # In a declaration T D where D has the form
235    #   D1 [constant-expression-opt]
236    # and the type of the identifier in the declaration T D1 is
237    # "type-modifier T", the type of the
238    # identifier of D is "type-modifier array of T"
239    #
240    # This is what this method does. The declarator it receives
241    # can be a list of declarators ending with TypeDecl. It
242    # tacks the modifier to the end of this list, just before
243    # the TypeDecl.
244    #
245    # Additionally, the modifier may be a list itself. This is
246    # useful for pointers, that can come as a chain from the rule
247    # p_pointer. In this case, the whole modifier list is spliced
248    # into the new location.
249    def _type_modify_decl(self, decl, modifier):
250        """ Tacks a type modifier on a declarator, and returns
251            the modified declarator.
252
253            Note: the declarator and modifier may be modified
254        """
255        #~ print '****'
256        #~ decl.show(offset=3)
257        #~ modifier.show(offset=3)
258        #~ print '****'
259
260        modifier_head = modifier
261        modifier_tail = modifier
262
263        # The modifier may be a nested list. Reach its tail.
264        while modifier_tail.type:
265            modifier_tail = modifier_tail.type
266
267        # If the decl is a basic type, just tack the modifier onto it.
268        if isinstance(decl, c_ast.TypeDecl):
269            modifier_tail.type = decl
270            return modifier
271        else:
272            # Otherwise, the decl is a list of modifiers. Reach
273            # its tail and splice the modifier onto the tail,
274            # pointing to the underlying basic type.
275            decl_tail = decl
276
277            while not isinstance(decl_tail.type, c_ast.TypeDecl):
278                decl_tail = decl_tail.type
279
280            modifier_tail.type = decl_tail.type
281            decl_tail.type = modifier_head
282            return decl
283
284    # Due to the order in which declarators are constructed,
285    # they have to be fixed in order to look like a normal AST.
286    #
287    # When a declaration arrives from syntax construction, it has
288    # these problems:
289    # * The innermost TypeDecl has no type (because the basic
290    #   type is only known at the uppermost declaration level)
291    # * The declaration has no variable name, since that is saved
292    #   in the innermost TypeDecl
293    # * The typename of the declaration is a list of type
294    #   specifiers, and not a node. Here, basic identifier types
295    #   should be separated from more complex types like enums
296    #   and structs.
297    #
298    # This method fixes these problems.
299    def _fix_decl_name_type(self, decl, typename):
300        """ Fixes a declaration. Modifies decl.
301        """
302        # Reach the underlying basic type
303        #
304        type = decl
305        while not isinstance(type, c_ast.TypeDecl):
306            type = type.type
307
308        decl.name = type.declname
309        type.quals = decl.quals[:]
310
311        # The typename is a list of types. If any type in this
312        # list isn't an IdentifierType, it must be the only
313        # type in the list (it's illegal to declare "int enum ..")
314        # If all the types are basic, they're collected in the
315        # IdentifierType holder.
316        for tn in typename:
317            if not isinstance(tn, c_ast.IdentifierType):
318                if len(typename) > 1:
319                    self._parse_error(
320                        "Invalid multiple types specified", tn.coord)
321                else:
322                    type.type = tn
323                    return decl
324
325        if not typename:
326            # Functions default to returning int
327            #
328            if not isinstance(decl.type, c_ast.FuncDecl):
329                self._parse_error(
330                        "Missing type in declaration", decl.coord)
331            type.type = c_ast.IdentifierType(
332                    ['int'],
333                    coord=decl.coord)
334        else:
335            # At this point, we know that typename is a list of IdentifierType
336            # nodes. Concatenate all the names into a single list.
337            #
338            type.type = c_ast.IdentifierType(
339                [name for id in typename for name in id.names],
340                coord=typename[0].coord)
341        return decl
342
343    def _add_declaration_specifier(self, declspec, newspec, kind, append=False):
344        """ Declaration specifiers are represented by a dictionary
345            with the entries:
346            * qual: a list of type qualifiers
347            * storage: a list of storage type qualifiers
348            * type: a list of type specifiers
349            * function: a list of function specifiers
350            * alignment: a list of alignment specifiers
351
352            This method is given a declaration specifier, and a
353            new specifier of a given kind.
354            If `append` is True, the new specifier is added to the end of
355            the specifiers list, otherwise it's added at the beginning.
356            Returns the declaration specifier, with the new
357            specifier incorporated.
358        """
359        spec = declspec or dict(qual=[], storage=[], type=[], function=[], alignment=[])
360
361        if append:
362            spec[kind].append(newspec)
363        else:
364            spec[kind].insert(0, newspec)
365
366        return spec
367
368    def _build_declarations(self, spec, decls, typedef_namespace=False):
369        """ Builds a list of declarations all sharing the given specifiers.
370            If typedef_namespace is true, each declared name is added
371            to the "typedef namespace", which also includes objects,
372            functions, and enum constants.
373        """
374        is_typedef = 'typedef' in spec['storage']
375        declarations = []
376
377        # Bit-fields are allowed to be unnamed.
378        if decls[0].get('bitsize') is not None:
379            pass
380
381        # When redeclaring typedef names as identifiers in inner scopes, a
382        # problem can occur where the identifier gets grouped into
383        # spec['type'], leaving decl as None.  This can only occur for the
384        # first declarator.
385        elif decls[0]['decl'] is None:
386            if len(spec['type']) < 2 or len(spec['type'][-1].names) != 1 or \
387                    not self._is_type_in_scope(spec['type'][-1].names[0]):
388                coord = '?'
389                for t in spec['type']:
390                    if hasattr(t, 'coord'):
391                        coord = t.coord
392                        break
393                self._parse_error('Invalid declaration', coord)
394
395            # Make this look as if it came from "direct_declarator:ID"
396            decls[0]['decl'] = c_ast.TypeDecl(
397                declname=spec['type'][-1].names[0],
398                type=None,
399                quals=None,
400                align=spec['alignment'],
401                coord=spec['type'][-1].coord)
402            # Remove the "new" type's name from the end of spec['type']
403            del spec['type'][-1]
404
405        # A similar problem can occur where the declaration ends up looking
406        # like an abstract declarator.  Give it a name if this is the case.
407        elif not isinstance(decls[0]['decl'], (
408                c_ast.Enum, c_ast.Struct, c_ast.Union, c_ast.IdentifierType)):
409            decls_0_tail = decls[0]['decl']
410            while not isinstance(decls_0_tail, c_ast.TypeDecl):
411                decls_0_tail = decls_0_tail.type
412            if decls_0_tail.declname is None:
413                decls_0_tail.declname = spec['type'][-1].names[0]
414                del spec['type'][-1]
415
416        for decl in decls:
417            assert decl['decl'] is not None
418            if is_typedef:
419                declaration = c_ast.Typedef(
420                    name=None,
421                    quals=spec['qual'],
422                    storage=spec['storage'],
423                    type=decl['decl'],
424                    coord=decl['decl'].coord)
425            else:
426                declaration = c_ast.Decl(
427                    name=None,
428                    quals=spec['qual'],
429                    align=spec['alignment'],
430                    storage=spec['storage'],
431                    funcspec=spec['function'],
432                    type=decl['decl'],
433                    init=decl.get('init'),
434                    bitsize=decl.get('bitsize'),
435                    coord=decl['decl'].coord)
436
437            if isinstance(declaration.type, (
438                    c_ast.Enum, c_ast.Struct, c_ast.Union,
439                    c_ast.IdentifierType)):
440                fixed_decl = declaration
441            else:
442                fixed_decl = self._fix_decl_name_type(declaration, spec['type'])
443
444            # Add the type name defined by typedef to a
445            # symbol table (for usage in the lexer)
446            if typedef_namespace:
447                if is_typedef:
448                    self._add_typedef_name(fixed_decl.name, fixed_decl.coord)
449                else:
450                    self._add_identifier(fixed_decl.name, fixed_decl.coord)
451
452            fixed_decl = fix_atomic_specifiers(fixed_decl)
453            declarations.append(fixed_decl)
454
455        return declarations
456
457    def _build_function_definition(self, spec, decl, param_decls, body):
458        """ Builds a function definition.
459        """
460        if 'typedef' in spec['storage']:
461            self._parse_error("Invalid typedef", decl.coord)
462
463        declaration = self._build_declarations(
464            spec=spec,
465            decls=[dict(decl=decl, init=None)],
466            typedef_namespace=True)[0]
467
468        return c_ast.FuncDef(
469            decl=declaration,
470            param_decls=param_decls,
471            body=body,
472            coord=decl.coord)
473
474    def _select_struct_union_class(self, token):
475        """ Given a token (either STRUCT or UNION), selects the
476            appropriate AST class.
477        """
478        if token == 'struct':
479            return c_ast.Struct
480        else:
481            return c_ast.Union
482
483    ##
484    ## Precedence and associativity of operators
485    ##
486    # If this changes, c_generator.CGenerator.precedence_map needs to change as
487    # well
488    precedence = (
489        ('left', 'LOR'),
490        ('left', 'LAND'),
491        ('left', 'OR'),
492        ('left', 'XOR'),
493        ('left', 'AND'),
494        ('left', 'EQ', 'NE'),
495        ('left', 'GT', 'GE', 'LT', 'LE'),
496        ('left', 'RSHIFT', 'LSHIFT'),
497        ('left', 'PLUS', 'MINUS'),
498        ('left', 'TIMES', 'DIVIDE', 'MOD')
499    )
500
501    ##
502    ## Grammar productions
503    ## Implementation of the BNF defined in K&R2 A.13
504    ##
505
506    # Wrapper around a translation unit, to allow for empty input.
507    # Not strictly part of the C99 Grammar, but useful in practice.
508    def p_translation_unit_or_empty(self, p):
509        """ translation_unit_or_empty   : translation_unit
510                                        | empty
511        """
512        if p[1] is None:
513            p[0] = c_ast.FileAST([])
514        else:
515            p[0] = c_ast.FileAST(p[1])
516
517    def p_translation_unit_1(self, p):
518        """ translation_unit    : external_declaration
519        """
520        # Note: external_declaration is already a list
521        p[0] = p[1]
522
523    def p_translation_unit_2(self, p):
524        """ translation_unit    : translation_unit external_declaration
525        """
526        p[1].extend(p[2])
527        p[0] = p[1]
528
529    # Declarations always come as lists (because they can be
530    # several in one line), so we wrap the function definition
531    # into a list as well, to make the return value of
532    # external_declaration homogeneous.
533    def p_external_declaration_1(self, p):
534        """ external_declaration    : function_definition
535        """
536        p[0] = [p[1]]
537
538    def p_external_declaration_2(self, p):
539        """ external_declaration    : declaration
540        """
541        p[0] = p[1]
542
543    def p_external_declaration_3(self, p):
544        """ external_declaration    : pp_directive
545                                    | pppragma_directive
546        """
547        p[0] = [p[1]]
548
549    def p_external_declaration_4(self, p):
550        """ external_declaration    : SEMI
551        """
552        p[0] = []
553
554    def p_external_declaration_5(self, p):
555        """ external_declaration    : static_assert
556        """
557        p[0] = p[1]
558
559    def p_static_assert_declaration(self, p):
560        """ static_assert           : _STATIC_ASSERT LPAREN constant_expression COMMA unified_string_literal RPAREN
561                                    | _STATIC_ASSERT LPAREN constant_expression RPAREN
562        """
563        if len(p) == 5:
564            p[0] = [c_ast.StaticAssert(p[3], None, self._token_coord(p, 1))]
565        else:
566            p[0] = [c_ast.StaticAssert(p[3], p[5], self._token_coord(p, 1))]
567
568    def p_pp_directive(self, p):
569        """ pp_directive  : PPHASH
570        """
571        self._parse_error('Directives not supported yet',
572                          self._token_coord(p, 1))
573
574    def p_pppragma_directive(self, p):
575        """ pppragma_directive      : PPPRAGMA
576                                    | PPPRAGMA PPPRAGMASTR
577        """
578        if len(p) == 3:
579            p[0] = c_ast.Pragma(p[2], self._token_coord(p, 2))
580        else:
581            p[0] = c_ast.Pragma("", self._token_coord(p, 1))
582
583    # In function definitions, the declarator can be followed by
584    # a declaration list, for old "K&R style" function definitios.
585    def p_function_definition_1(self, p):
586        """ function_definition : id_declarator declaration_list_opt compound_statement
587        """
588        # no declaration specifiers - 'int' becomes the default type
589        spec = dict(
590            qual=[],
591            alignment=[],
592            storage=[],
593            type=[c_ast.IdentifierType(['int'],
594                                       coord=self._token_coord(p, 1))],
595            function=[])
596
597        p[0] = self._build_function_definition(
598            spec=spec,
599            decl=p[1],
600            param_decls=p[2],
601            body=p[3])
602
603    def p_function_definition_2(self, p):
604        """ function_definition : declaration_specifiers id_declarator declaration_list_opt compound_statement
605        """
606        spec = p[1]
607
608        p[0] = self._build_function_definition(
609            spec=spec,
610            decl=p[2],
611            param_decls=p[3],
612            body=p[4])
613
614    # Note, according to C18 A.2.2 6.7.10 static_assert-declaration _Static_assert
615    # is a declaration, not a statement. We additionally recognise it as a statement
616    # to fix parsing of _Static_assert inside the functions.
617    #
618    def p_statement(self, p):
619        """ statement   : labeled_statement
620                        | expression_statement
621                        | compound_statement
622                        | selection_statement
623                        | iteration_statement
624                        | jump_statement
625                        | pppragma_directive
626                        | static_assert
627        """
628        p[0] = p[1]
629
630    # A pragma is generally considered a decorator rather than an actual
631    # statement. Still, for the purposes of analyzing an abstract syntax tree of
632    # C code, pragma's should not be ignored and were previously treated as a
633    # statement. This presents a problem for constructs that take a statement
634    # such as labeled_statements, selection_statements, and
635    # iteration_statements, causing a misleading structure in the AST. For
636    # example, consider the following C code.
637    #
638    #   for (int i = 0; i < 3; i++)
639    #       #pragma omp critical
640    #       sum += 1;
641    #
642    # This code will compile and execute "sum += 1;" as the body of the for
643    # loop. Previous implementations of PyCParser would render the AST for this
644    # block of code as follows:
645    #
646    #   For:
647    #     DeclList:
648    #       Decl: i, [], [], []
649    #         TypeDecl: i, []
650    #           IdentifierType: ['int']
651    #         Constant: int, 0
652    #     BinaryOp: <
653    #       ID: i
654    #       Constant: int, 3
655    #     UnaryOp: p++
656    #       ID: i
657    #     Pragma: omp critical
658    #   Assignment: +=
659    #     ID: sum
660    #     Constant: int, 1
661    #
662    # This AST misleadingly takes the Pragma as the body of the loop and the
663    # assignment then becomes a sibling of the loop.
664    #
665    # To solve edge cases like these, the pragmacomp_or_statement rule groups
666    # a pragma and its following statement (which would otherwise be orphaned)
667    # using a compound block, effectively turning the above code into:
668    #
669    #   for (int i = 0; i < 3; i++) {
670    #       #pragma omp critical
671    #       sum += 1;
672    #   }
673    def p_pragmacomp_or_statement(self, p):
674        """ pragmacomp_or_statement     : pppragma_directive statement
675                                        | statement
676        """
677        if isinstance(p[1], c_ast.Pragma) and len(p) == 3:
678            p[0] = c_ast.Compound(
679                block_items=[p[1], p[2]],
680                coord=self._token_coord(p, 1))
681        else:
682            p[0] = p[1]
683
684    # In C, declarations can come several in a line:
685    #   int x, *px, romulo = 5;
686    #
687    # However, for the AST, we will split them to separate Decl
688    # nodes.
689    #
690    # This rule splits its declarations and always returns a list
691    # of Decl nodes, even if it's one element long.
692    #
693    def p_decl_body(self, p):
694        """ decl_body : declaration_specifiers init_declarator_list_opt
695                      | declaration_specifiers_no_type id_init_declarator_list_opt
696        """
697        spec = p[1]
698
699        # p[2] (init_declarator_list_opt) is either a list or None
700        #
701        if p[2] is None:
702            # By the standard, you must have at least one declarator unless
703            # declaring a structure tag, a union tag, or the members of an
704            # enumeration.
705            #
706            ty = spec['type']
707            s_u_or_e = (c_ast.Struct, c_ast.Union, c_ast.Enum)
708            if len(ty) == 1 and isinstance(ty[0], s_u_or_e):
709                decls = [c_ast.Decl(
710                    name=None,
711                    quals=spec['qual'],
712                    align=spec['alignment'],
713                    storage=spec['storage'],
714                    funcspec=spec['function'],
715                    type=ty[0],
716                    init=None,
717                    bitsize=None,
718                    coord=ty[0].coord)]
719
720            # However, this case can also occur on redeclared identifiers in
721            # an inner scope.  The trouble is that the redeclared type's name
722            # gets grouped into declaration_specifiers; _build_declarations
723            # compensates for this.
724            #
725            else:
726                decls = self._build_declarations(
727                    spec=spec,
728                    decls=[dict(decl=None, init=None)],
729                    typedef_namespace=True)
730
731        else:
732            decls = self._build_declarations(
733                spec=spec,
734                decls=p[2],
735                typedef_namespace=True)
736
737        p[0] = decls
738
739    # The declaration has been split to a decl_body sub-rule and
740    # SEMI, because having them in a single rule created a problem
741    # for defining typedefs.
742    #
743    # If a typedef line was directly followed by a line using the
744    # type defined with the typedef, the type would not be
745    # recognized. This is because to reduce the declaration rule,
746    # the parser's lookahead asked for the token after SEMI, which
747    # was the type from the next line, and the lexer had no chance
748    # to see the updated type symbol table.
749    #
750    # Splitting solves this problem, because after seeing SEMI,
751    # the parser reduces decl_body, which actually adds the new
752    # type into the table to be seen by the lexer before the next
753    # line is reached.
754    def p_declaration(self, p):
755        """ declaration : decl_body SEMI
756        """
757        p[0] = p[1]
758
759    # Since each declaration is a list of declarations, this
760    # rule will combine all the declarations and return a single
761    # list
762    #
763    def p_declaration_list(self, p):
764        """ declaration_list    : declaration
765                                | declaration_list declaration
766        """
767        p[0] = p[1] if len(p) == 2 else p[1] + p[2]
768
769    # To know when declaration-specifiers end and declarators begin,
770    # we require declaration-specifiers to have at least one
771    # type-specifier, and disallow typedef-names after we've seen any
772    # type-specifier. These are both required by the spec.
773    #
774    def p_declaration_specifiers_no_type_1(self, p):
775        """ declaration_specifiers_no_type  : type_qualifier declaration_specifiers_no_type_opt
776        """
777        p[0] = self._add_declaration_specifier(p[2], p[1], 'qual')
778
779    def p_declaration_specifiers_no_type_2(self, p):
780        """ declaration_specifiers_no_type  : storage_class_specifier declaration_specifiers_no_type_opt
781        """
782        p[0] = self._add_declaration_specifier(p[2], p[1], 'storage')
783
784    def p_declaration_specifiers_no_type_3(self, p):
785        """ declaration_specifiers_no_type  : function_specifier declaration_specifiers_no_type_opt
786        """
787        p[0] = self._add_declaration_specifier(p[2], p[1], 'function')
788
789    # Without this, `typedef _Atomic(T) U` will parse incorrectly because the
790    # _Atomic qualifier will match, instead of the specifier.
791    def p_declaration_specifiers_no_type_4(self, p):
792        """ declaration_specifiers_no_type  : atomic_specifier declaration_specifiers_no_type_opt
793        """
794        p[0] = self._add_declaration_specifier(p[2], p[1], 'type')
795
796    def p_declaration_specifiers_no_type_5(self, p):
797        """ declaration_specifiers_no_type  : alignment_specifier declaration_specifiers_no_type_opt
798        """
799        p[0] = self._add_declaration_specifier(p[2], p[1], 'alignment')
800
801    def p_declaration_specifiers_1(self, p):
802        """ declaration_specifiers  : declaration_specifiers type_qualifier
803        """
804        p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True)
805
806    def p_declaration_specifiers_2(self, p):
807        """ declaration_specifiers  : declaration_specifiers storage_class_specifier
808        """
809        p[0] = self._add_declaration_specifier(p[1], p[2], 'storage', append=True)
810
811    def p_declaration_specifiers_3(self, p):
812        """ declaration_specifiers  : declaration_specifiers function_specifier
813        """
814        p[0] = self._add_declaration_specifier(p[1], p[2], 'function', append=True)
815
816    def p_declaration_specifiers_4(self, p):
817        """ declaration_specifiers  : declaration_specifiers type_specifier_no_typeid
818        """
819        p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
820
821    def p_declaration_specifiers_5(self, p):
822        """ declaration_specifiers  : type_specifier
823        """
824        p[0] = self._add_declaration_specifier(None, p[1], 'type')
825
826    def p_declaration_specifiers_6(self, p):
827        """ declaration_specifiers  : declaration_specifiers_no_type type_specifier
828        """
829        p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
830
831    def p_declaration_specifiers_7(self, p):
832        """ declaration_specifiers  : declaration_specifiers alignment_specifier
833        """
834        p[0] = self._add_declaration_specifier(p[1], p[2], 'alignment', append=True)
835
836    def p_storage_class_specifier(self, p):
837        """ storage_class_specifier : AUTO
838                                    | REGISTER
839                                    | STATIC
840                                    | EXTERN
841                                    | TYPEDEF
842                                    | _THREAD_LOCAL
843        """
844        p[0] = p[1]
845
846    def p_function_specifier(self, p):
847        """ function_specifier  : INLINE
848                                | _NORETURN
849        """
850        p[0] = p[1]
851
852    def p_type_specifier_no_typeid(self, p):
853        """ type_specifier_no_typeid  : VOID
854                                      | _BOOL
855                                      | CHAR
856                                      | SHORT
857                                      | INT
858                                      | LONG
859                                      | FLOAT
860                                      | DOUBLE
861                                      | _COMPLEX
862                                      | SIGNED
863                                      | UNSIGNED
864                                      | __INT128
865        """
866        p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1))
867
868    def p_type_specifier(self, p):
869        """ type_specifier  : typedef_name
870                            | enum_specifier
871                            | struct_or_union_specifier
872                            | type_specifier_no_typeid
873                            | atomic_specifier
874        """
875        p[0] = p[1]
876
877    # See section 6.7.2.4 of the C11 standard.
878    def p_atomic_specifier(self, p):
879        """ atomic_specifier  : _ATOMIC LPAREN type_name RPAREN
880        """
881        typ = p[3]
882        typ.quals.append('_Atomic')
883        p[0] = typ
884
885    def p_type_qualifier(self, p):
886        """ type_qualifier  : CONST
887                            | RESTRICT
888                            | VOLATILE
889                            | _ATOMIC
890        """
891        p[0] = p[1]
892
893    def p_init_declarator_list(self, p):
894        """ init_declarator_list    : init_declarator
895                                    | init_declarator_list COMMA init_declarator
896        """
897        p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
898
899    # Returns a {decl=<declarator> : init=<initializer>} dictionary
900    # If there's no initializer, uses None
901    #
902    def p_init_declarator(self, p):
903        """ init_declarator : declarator
904                            | declarator EQUALS initializer
905        """
906        p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None))
907
908    def p_id_init_declarator_list(self, p):
909        """ id_init_declarator_list    : id_init_declarator
910                                       | id_init_declarator_list COMMA init_declarator
911        """
912        p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
913
914    def p_id_init_declarator(self, p):
915        """ id_init_declarator : id_declarator
916                               | id_declarator EQUALS initializer
917        """
918        p[0] = dict(decl=p[1], init=(p[3] if len(p) > 2 else None))
919
920    # Require at least one type specifier in a specifier-qualifier-list
921    #
922    def p_specifier_qualifier_list_1(self, p):
923        """ specifier_qualifier_list    : specifier_qualifier_list type_specifier_no_typeid
924        """
925        p[0] = self._add_declaration_specifier(p[1], p[2], 'type', append=True)
926
927    def p_specifier_qualifier_list_2(self, p):
928        """ specifier_qualifier_list    : specifier_qualifier_list type_qualifier
929        """
930        p[0] = self._add_declaration_specifier(p[1], p[2], 'qual', append=True)
931
932    def p_specifier_qualifier_list_3(self, p):
933        """ specifier_qualifier_list  : type_specifier
934        """
935        p[0] = self._add_declaration_specifier(None, p[1], 'type')
936
937    def p_specifier_qualifier_list_4(self, p):
938        """ specifier_qualifier_list  : type_qualifier_list type_specifier
939        """
940        p[0] = dict(qual=p[1], alignment=[], storage=[], type=[p[2]], function=[])
941
942    def p_specifier_qualifier_list_5(self, p):
943        """ specifier_qualifier_list  : alignment_specifier
944        """
945        p[0] = dict(qual=[], alignment=[p[1]], storage=[], type=[], function=[])
946
947    def p_specifier_qualifier_list_6(self, p):
948        """ specifier_qualifier_list  : specifier_qualifier_list alignment_specifier
949        """
950        p[0] = self._add_declaration_specifier(p[1], p[2], 'alignment')
951
952    # TYPEID is allowed here (and in other struct/enum related tag names), because
953    # struct/enum tags reside in their own namespace and can be named the same as types
954    #
955    def p_struct_or_union_specifier_1(self, p):
956        """ struct_or_union_specifier   : struct_or_union ID
957                                        | struct_or_union TYPEID
958        """
959        klass = self._select_struct_union_class(p[1])
960        # None means no list of members
961        p[0] = klass(
962            name=p[2],
963            decls=None,
964            coord=self._token_coord(p, 2))
965
966    def p_struct_or_union_specifier_2(self, p):
967        """ struct_or_union_specifier : struct_or_union brace_open struct_declaration_list brace_close
968                                      | struct_or_union brace_open brace_close
969        """
970        klass = self._select_struct_union_class(p[1])
971        if len(p) == 4:
972            # Empty sequence means an empty list of members
973            p[0] = klass(
974                name=None,
975                decls=[],
976                coord=self._token_coord(p, 2))
977        else:
978            p[0] = klass(
979                name=None,
980                decls=p[3],
981                coord=self._token_coord(p, 2))
982
983
984    def p_struct_or_union_specifier_3(self, p):
985        """ struct_or_union_specifier   : struct_or_union ID brace_open struct_declaration_list brace_close
986                                        | struct_or_union ID brace_open brace_close
987                                        | struct_or_union TYPEID brace_open struct_declaration_list brace_close
988                                        | struct_or_union TYPEID brace_open brace_close
989        """
990        klass = self._select_struct_union_class(p[1])
991        if len(p) == 5:
992            # Empty sequence means an empty list of members
993            p[0] = klass(
994                name=p[2],
995                decls=[],
996                coord=self._token_coord(p, 2))
997        else:
998            p[0] = klass(
999                name=p[2],
1000                decls=p[4],
1001                coord=self._token_coord(p, 2))
1002
1003    def p_struct_or_union(self, p):
1004        """ struct_or_union : STRUCT
1005                            | UNION
1006        """
1007        p[0] = p[1]
1008
1009    # Combine all declarations into a single list
1010    #
1011    def p_struct_declaration_list(self, p):
1012        """ struct_declaration_list     : struct_declaration
1013                                        | struct_declaration_list struct_declaration
1014        """
1015        if len(p) == 2:
1016            p[0] = p[1] or []
1017        else:
1018            p[0] = p[1] + (p[2] or [])
1019
1020    def p_struct_declaration_1(self, p):
1021        """ struct_declaration : specifier_qualifier_list struct_declarator_list_opt SEMI
1022        """
1023        spec = p[1]
1024        assert 'typedef' not in spec['storage']
1025
1026        if p[2] is not None:
1027            decls = self._build_declarations(
1028                spec=spec,
1029                decls=p[2])
1030
1031        elif len(spec['type']) == 1:
1032            # Anonymous struct/union, gcc extension, C1x feature.
1033            # Although the standard only allows structs/unions here, I see no
1034            # reason to disallow other types since some compilers have typedefs
1035            # here, and pycparser isn't about rejecting all invalid code.
1036            #
1037            node = spec['type'][0]
1038            if isinstance(node, c_ast.Node):
1039                decl_type = node
1040            else:
1041                decl_type = c_ast.IdentifierType(node)
1042
1043            decls = self._build_declarations(
1044                spec=spec,
1045                decls=[dict(decl=decl_type)])
1046
1047        else:
1048            # Structure/union members can have the same names as typedefs.
1049            # The trouble is that the member's name gets grouped into
1050            # specifier_qualifier_list; _build_declarations compensates.
1051            #
1052            decls = self._build_declarations(
1053                spec=spec,
1054                decls=[dict(decl=None, init=None)])
1055
1056        p[0] = decls
1057
1058    def p_struct_declaration_2(self, p):
1059        """ struct_declaration : SEMI
1060        """
1061        p[0] = None
1062
1063    def p_struct_declaration_3(self, p):
1064        """ struct_declaration : pppragma_directive
1065        """
1066        p[0] = [p[1]]
1067
1068    def p_struct_declarator_list(self, p):
1069        """ struct_declarator_list  : struct_declarator
1070                                    | struct_declarator_list COMMA struct_declarator
1071        """
1072        p[0] = p[1] + [p[3]] if len(p) == 4 else [p[1]]
1073
1074    # struct_declarator passes up a dict with the keys: decl (for
1075    # the underlying declarator) and bitsize (for the bitsize)
1076    #
1077    def p_struct_declarator_1(self, p):
1078        """ struct_declarator : declarator
1079        """
1080        p[0] = {'decl': p[1], 'bitsize': None}
1081
1082    def p_struct_declarator_2(self, p):
1083        """ struct_declarator   : declarator COLON constant_expression
1084                                | COLON constant_expression
1085        """
1086        if len(p) > 3:
1087            p[0] = {'decl': p[1], 'bitsize': p[3]}
1088        else:
1089            p[0] = {'decl': c_ast.TypeDecl(None, None, None, None), 'bitsize': p[2]}
1090
1091    def p_enum_specifier_1(self, p):
1092        """ enum_specifier  : ENUM ID
1093                            | ENUM TYPEID
1094        """
1095        p[0] = c_ast.Enum(p[2], None, self._token_coord(p, 1))
1096
1097    def p_enum_specifier_2(self, p):
1098        """ enum_specifier  : ENUM brace_open enumerator_list brace_close
1099        """
1100        p[0] = c_ast.Enum(None, p[3], self._token_coord(p, 1))
1101
1102    def p_enum_specifier_3(self, p):
1103        """ enum_specifier  : ENUM ID brace_open enumerator_list brace_close
1104                            | ENUM TYPEID brace_open enumerator_list brace_close
1105        """
1106        p[0] = c_ast.Enum(p[2], p[4], self._token_coord(p, 1))
1107
1108    def p_enumerator_list(self, p):
1109        """ enumerator_list : enumerator
1110                            | enumerator_list COMMA
1111                            | enumerator_list COMMA enumerator
1112        """
1113        if len(p) == 2:
1114            p[0] = c_ast.EnumeratorList([p[1]], p[1].coord)
1115        elif len(p) == 3:
1116            p[0] = p[1]
1117        else:
1118            p[1].enumerators.append(p[3])
1119            p[0] = p[1]
1120
1121    def p_alignment_specifier(self, p):
1122        """ alignment_specifier  : _ALIGNAS LPAREN type_name RPAREN
1123                                 | _ALIGNAS LPAREN constant_expression RPAREN
1124        """
1125        p[0] = c_ast.Alignas(p[3], self._token_coord(p, 1))
1126
1127    def p_enumerator(self, p):
1128        """ enumerator  : ID
1129                        | ID EQUALS constant_expression
1130        """
1131        if len(p) == 2:
1132            enumerator = c_ast.Enumerator(
1133                        p[1], None,
1134                        self._token_coord(p, 1))
1135        else:
1136            enumerator = c_ast.Enumerator(
1137                        p[1], p[3],
1138                        self._token_coord(p, 1))
1139        self._add_identifier(enumerator.name, enumerator.coord)
1140
1141        p[0] = enumerator
1142
1143    def p_declarator(self, p):
1144        """ declarator  : id_declarator
1145                        | typeid_declarator
1146        """
1147        p[0] = p[1]
1148
1149    @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1150    def p_xxx_declarator_1(self, p):
1151        """ xxx_declarator  : direct_xxx_declarator
1152        """
1153        p[0] = p[1]
1154
1155    @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1156    def p_xxx_declarator_2(self, p):
1157        """ xxx_declarator  : pointer direct_xxx_declarator
1158        """
1159        p[0] = self._type_modify_decl(p[2], p[1])
1160
1161    @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1162    def p_direct_xxx_declarator_1(self, p):
1163        """ direct_xxx_declarator   : yyy
1164        """
1165        p[0] = c_ast.TypeDecl(
1166            declname=p[1],
1167            type=None,
1168            quals=None,
1169            align=None,
1170            coord=self._token_coord(p, 1))
1171
1172    @parameterized(('id', 'ID'), ('typeid', 'TYPEID'))
1173    def p_direct_xxx_declarator_2(self, p):
1174        """ direct_xxx_declarator   : LPAREN xxx_declarator RPAREN
1175        """
1176        p[0] = p[2]
1177
1178    @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1179    def p_direct_xxx_declarator_3(self, p):
1180        """ direct_xxx_declarator   : direct_xxx_declarator LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET
1181        """
1182        quals = (p[3] if len(p) > 5 else []) or []
1183        # Accept dimension qualifiers
1184        # Per C99 6.7.5.3 p7
1185        arr = c_ast.ArrayDecl(
1186            type=None,
1187            dim=p[4] if len(p) > 5 else p[3],
1188            dim_quals=quals,
1189            coord=p[1].coord)
1190
1191        p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1192
1193    @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1194    def p_direct_xxx_declarator_4(self, p):
1195        """ direct_xxx_declarator   : direct_xxx_declarator LBRACKET STATIC type_qualifier_list_opt assignment_expression RBRACKET
1196                                    | direct_xxx_declarator LBRACKET type_qualifier_list STATIC assignment_expression RBRACKET
1197        """
1198        # Using slice notation for PLY objects doesn't work in Python 3 for the
1199        # version of PLY embedded with pycparser; see PLY Google Code issue 30.
1200        # Work around that here by listing the two elements separately.
1201        listed_quals = [item if isinstance(item, list) else [item]
1202            for item in [p[3],p[4]]]
1203        dim_quals = [qual for sublist in listed_quals for qual in sublist
1204            if qual is not None]
1205        arr = c_ast.ArrayDecl(
1206            type=None,
1207            dim=p[5],
1208            dim_quals=dim_quals,
1209            coord=p[1].coord)
1210
1211        p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1212
1213    # Special for VLAs
1214    #
1215    @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1216    def p_direct_xxx_declarator_5(self, p):
1217        """ direct_xxx_declarator   : direct_xxx_declarator LBRACKET type_qualifier_list_opt TIMES RBRACKET
1218        """
1219        arr = c_ast.ArrayDecl(
1220            type=None,
1221            dim=c_ast.ID(p[4], self._token_coord(p, 4)),
1222            dim_quals=p[3] if p[3] is not None else [],
1223            coord=p[1].coord)
1224
1225        p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1226
1227    @parameterized(('id', 'ID'), ('typeid', 'TYPEID'), ('typeid_noparen', 'TYPEID'))
1228    def p_direct_xxx_declarator_6(self, p):
1229        """ direct_xxx_declarator   : direct_xxx_declarator LPAREN parameter_type_list RPAREN
1230                                    | direct_xxx_declarator LPAREN identifier_list_opt RPAREN
1231        """
1232        func = c_ast.FuncDecl(
1233            args=p[3],
1234            type=None,
1235            coord=p[1].coord)
1236
1237        # To see why _get_yacc_lookahead_token is needed, consider:
1238        #   typedef char TT;
1239        #   void foo(int TT) { TT = 10; }
1240        # Outside the function, TT is a typedef, but inside (starting and
1241        # ending with the braces) it's a parameter.  The trouble begins with
1242        # yacc's lookahead token.  We don't know if we're declaring or
1243        # defining a function until we see LBRACE, but if we wait for yacc to
1244        # trigger a rule on that token, then TT will have already been read
1245        # and incorrectly interpreted as TYPEID.  We need to add the
1246        # parameters to the scope the moment the lexer sees LBRACE.
1247        #
1248        if self._get_yacc_lookahead_token().type == "LBRACE":
1249            if func.args is not None:
1250                for param in func.args.params:
1251                    if isinstance(param, c_ast.EllipsisParam): break
1252                    self._add_identifier(param.name, param.coord)
1253
1254        p[0] = self._type_modify_decl(decl=p[1], modifier=func)
1255
1256    def p_pointer(self, p):
1257        """ pointer : TIMES type_qualifier_list_opt
1258                    | TIMES type_qualifier_list_opt pointer
1259        """
1260        coord = self._token_coord(p, 1)
1261        # Pointer decls nest from inside out. This is important when different
1262        # levels have different qualifiers. For example:
1263        #
1264        #  char * const * p;
1265        #
1266        # Means "pointer to const pointer to char"
1267        #
1268        # While:
1269        #
1270        #  char ** const p;
1271        #
1272        # Means "const pointer to pointer to char"
1273        #
1274        # So when we construct PtrDecl nestings, the leftmost pointer goes in
1275        # as the most nested type.
1276        nested_type = c_ast.PtrDecl(quals=p[2] or [], type=None, coord=coord)
1277        if len(p) > 3:
1278            tail_type = p[3]
1279            while tail_type.type is not None:
1280                tail_type = tail_type.type
1281            tail_type.type = nested_type
1282            p[0] = p[3]
1283        else:
1284            p[0] = nested_type
1285
1286    def p_type_qualifier_list(self, p):
1287        """ type_qualifier_list : type_qualifier
1288                                | type_qualifier_list type_qualifier
1289        """
1290        p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
1291
1292    def p_parameter_type_list(self, p):
1293        """ parameter_type_list : parameter_list
1294                                | parameter_list COMMA ELLIPSIS
1295        """
1296        if len(p) > 2:
1297            p[1].params.append(c_ast.EllipsisParam(self._token_coord(p, 3)))
1298
1299        p[0] = p[1]
1300
1301    def p_parameter_list(self, p):
1302        """ parameter_list  : parameter_declaration
1303                            | parameter_list COMMA parameter_declaration
1304        """
1305        if len(p) == 2: # single parameter
1306            p[0] = c_ast.ParamList([p[1]], p[1].coord)
1307        else:
1308            p[1].params.append(p[3])
1309            p[0] = p[1]
1310
1311    # From ISO/IEC 9899:TC2, 6.7.5.3.11:
1312    # "If, in a parameter declaration, an identifier can be treated either
1313    #  as a typedef name or as a parameter name, it shall be taken as a
1314    #  typedef name."
1315    #
1316    # Inside a parameter declaration, once we've reduced declaration specifiers,
1317    # if we shift in an LPAREN and see a TYPEID, it could be either an abstract
1318    # declarator or a declarator nested inside parens. This rule tells us to
1319    # always treat it as an abstract declarator. Therefore, we only accept
1320    # `id_declarator`s and `typeid_noparen_declarator`s.
1321    def p_parameter_declaration_1(self, p):
1322        """ parameter_declaration   : declaration_specifiers id_declarator
1323                                    | declaration_specifiers typeid_noparen_declarator
1324        """
1325        spec = p[1]
1326        if not spec['type']:
1327            spec['type'] = [c_ast.IdentifierType(['int'],
1328                coord=self._token_coord(p, 1))]
1329        p[0] = self._build_declarations(
1330            spec=spec,
1331            decls=[dict(decl=p[2])])[0]
1332
1333    def p_parameter_declaration_2(self, p):
1334        """ parameter_declaration   : declaration_specifiers abstract_declarator_opt
1335        """
1336        spec = p[1]
1337        if not spec['type']:
1338            spec['type'] = [c_ast.IdentifierType(['int'],
1339                coord=self._token_coord(p, 1))]
1340
1341        # Parameters can have the same names as typedefs.  The trouble is that
1342        # the parameter's name gets grouped into declaration_specifiers, making
1343        # it look like an old-style declaration; compensate.
1344        #
1345        if len(spec['type']) > 1 and len(spec['type'][-1].names) == 1 and \
1346                self._is_type_in_scope(spec['type'][-1].names[0]):
1347            decl = self._build_declarations(
1348                    spec=spec,
1349                    decls=[dict(decl=p[2], init=None)])[0]
1350
1351        # This truly is an old-style parameter declaration
1352        #
1353        else:
1354            decl = c_ast.Typename(
1355                name='',
1356                quals=spec['qual'],
1357                align=None,
1358                type=p[2] or c_ast.TypeDecl(None, None, None, None),
1359                coord=self._token_coord(p, 2))
1360            typename = spec['type']
1361            decl = self._fix_decl_name_type(decl, typename)
1362
1363        p[0] = decl
1364
1365    def p_identifier_list(self, p):
1366        """ identifier_list : identifier
1367                            | identifier_list COMMA identifier
1368        """
1369        if len(p) == 2: # single parameter
1370            p[0] = c_ast.ParamList([p[1]], p[1].coord)
1371        else:
1372            p[1].params.append(p[3])
1373            p[0] = p[1]
1374
1375    def p_initializer_1(self, p):
1376        """ initializer : assignment_expression
1377        """
1378        p[0] = p[1]
1379
1380    def p_initializer_2(self, p):
1381        """ initializer : brace_open initializer_list_opt brace_close
1382                        | brace_open initializer_list COMMA brace_close
1383        """
1384        if p[2] is None:
1385            p[0] = c_ast.InitList([], self._token_coord(p, 1))
1386        else:
1387            p[0] = p[2]
1388
1389    def p_initializer_list(self, p):
1390        """ initializer_list    : designation_opt initializer
1391                                | initializer_list COMMA designation_opt initializer
1392        """
1393        if len(p) == 3: # single initializer
1394            init = p[2] if p[1] is None else c_ast.NamedInitializer(p[1], p[2])
1395            p[0] = c_ast.InitList([init], p[2].coord)
1396        else:
1397            init = p[4] if p[3] is None else c_ast.NamedInitializer(p[3], p[4])
1398            p[1].exprs.append(init)
1399            p[0] = p[1]
1400
1401    def p_designation(self, p):
1402        """ designation : designator_list EQUALS
1403        """
1404        p[0] = p[1]
1405
1406    # Designators are represented as a list of nodes, in the order in which
1407    # they're written in the code.
1408    #
1409    def p_designator_list(self, p):
1410        """ designator_list : designator
1411                            | designator_list designator
1412        """
1413        p[0] = [p[1]] if len(p) == 2 else p[1] + [p[2]]
1414
1415    def p_designator(self, p):
1416        """ designator  : LBRACKET constant_expression RBRACKET
1417                        | PERIOD identifier
1418        """
1419        p[0] = p[2]
1420
1421    def p_type_name(self, p):
1422        """ type_name   : specifier_qualifier_list abstract_declarator_opt
1423        """
1424        typename = c_ast.Typename(
1425            name='',
1426            quals=p[1]['qual'][:],
1427            align=None,
1428            type=p[2] or c_ast.TypeDecl(None, None, None, None),
1429            coord=self._token_coord(p, 2))
1430
1431        p[0] = self._fix_decl_name_type(typename, p[1]['type'])
1432
1433    def p_abstract_declarator_1(self, p):
1434        """ abstract_declarator     : pointer
1435        """
1436        dummytype = c_ast.TypeDecl(None, None, None, None)
1437        p[0] = self._type_modify_decl(
1438            decl=dummytype,
1439            modifier=p[1])
1440
1441    def p_abstract_declarator_2(self, p):
1442        """ abstract_declarator     : pointer direct_abstract_declarator
1443        """
1444        p[0] = self._type_modify_decl(p[2], p[1])
1445
1446    def p_abstract_declarator_3(self, p):
1447        """ abstract_declarator     : direct_abstract_declarator
1448        """
1449        p[0] = p[1]
1450
1451    # Creating and using direct_abstract_declarator_opt here
1452    # instead of listing both direct_abstract_declarator and the
1453    # lack of it in the beginning of _1 and _2 caused two
1454    # shift/reduce errors.
1455    #
1456    def p_direct_abstract_declarator_1(self, p):
1457        """ direct_abstract_declarator  : LPAREN abstract_declarator RPAREN """
1458        p[0] = p[2]
1459
1460    def p_direct_abstract_declarator_2(self, p):
1461        """ direct_abstract_declarator  : direct_abstract_declarator LBRACKET assignment_expression_opt RBRACKET
1462        """
1463        arr = c_ast.ArrayDecl(
1464            type=None,
1465            dim=p[3],
1466            dim_quals=[],
1467            coord=p[1].coord)
1468
1469        p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1470
1471    def p_direct_abstract_declarator_3(self, p):
1472        """ direct_abstract_declarator  : LBRACKET type_qualifier_list_opt assignment_expression_opt RBRACKET
1473        """
1474        quals = (p[2] if len(p) > 4 else []) or []
1475        p[0] = c_ast.ArrayDecl(
1476            type=c_ast.TypeDecl(None, None, None, None),
1477            dim=p[3] if len(p) > 4 else p[2],
1478            dim_quals=quals,
1479            coord=self._token_coord(p, 1))
1480
1481    def p_direct_abstract_declarator_4(self, p):
1482        """ direct_abstract_declarator  : direct_abstract_declarator LBRACKET TIMES RBRACKET
1483        """
1484        arr = c_ast.ArrayDecl(
1485            type=None,
1486            dim=c_ast.ID(p[3], self._token_coord(p, 3)),
1487            dim_quals=[],
1488            coord=p[1].coord)
1489
1490        p[0] = self._type_modify_decl(decl=p[1], modifier=arr)
1491
1492    def p_direct_abstract_declarator_5(self, p):
1493        """ direct_abstract_declarator  : LBRACKET TIMES RBRACKET
1494        """
1495        p[0] = c_ast.ArrayDecl(
1496            type=c_ast.TypeDecl(None, None, None, None),
1497            dim=c_ast.ID(p[3], self._token_coord(p, 3)),
1498            dim_quals=[],
1499            coord=self._token_coord(p, 1))
1500
1501    def p_direct_abstract_declarator_6(self, p):
1502        """ direct_abstract_declarator  : direct_abstract_declarator LPAREN parameter_type_list_opt RPAREN
1503        """
1504        func = c_ast.FuncDecl(
1505            args=p[3],
1506            type=None,
1507            coord=p[1].coord)
1508
1509        p[0] = self._type_modify_decl(decl=p[1], modifier=func)
1510
1511    def p_direct_abstract_declarator_7(self, p):
1512        """ direct_abstract_declarator  : LPAREN parameter_type_list_opt RPAREN
1513        """
1514        p[0] = c_ast.FuncDecl(
1515            args=p[2],
1516            type=c_ast.TypeDecl(None, None, None, None),
1517            coord=self._token_coord(p, 1))
1518
1519    # declaration is a list, statement isn't. To make it consistent, block_item
1520    # will always be a list
1521    #
1522    def p_block_item(self, p):
1523        """ block_item  : declaration
1524                        | statement
1525        """
1526        p[0] = p[1] if isinstance(p[1], list) else [p[1]]
1527
1528    # Since we made block_item a list, this just combines lists
1529    #
1530    def p_block_item_list(self, p):
1531        """ block_item_list : block_item
1532                            | block_item_list block_item
1533        """
1534        # Empty block items (plain ';') produce [None], so ignore them
1535        p[0] = p[1] if (len(p) == 2 or p[2] == [None]) else p[1] + p[2]
1536
1537    def p_compound_statement_1(self, p):
1538        """ compound_statement : brace_open block_item_list_opt brace_close """
1539        p[0] = c_ast.Compound(
1540            block_items=p[2],
1541            coord=self._token_coord(p, 1))
1542
1543    def p_labeled_statement_1(self, p):
1544        """ labeled_statement : ID COLON pragmacomp_or_statement """
1545        p[0] = c_ast.Label(p[1], p[3], self._token_coord(p, 1))
1546
1547    def p_labeled_statement_2(self, p):
1548        """ labeled_statement : CASE constant_expression COLON pragmacomp_or_statement """
1549        p[0] = c_ast.Case(p[2], [p[4]], self._token_coord(p, 1))
1550
1551    def p_labeled_statement_3(self, p):
1552        """ labeled_statement : DEFAULT COLON pragmacomp_or_statement """
1553        p[0] = c_ast.Default([p[3]], self._token_coord(p, 1))
1554
1555    def p_selection_statement_1(self, p):
1556        """ selection_statement : IF LPAREN expression RPAREN pragmacomp_or_statement """
1557        p[0] = c_ast.If(p[3], p[5], None, self._token_coord(p, 1))
1558
1559    def p_selection_statement_2(self, p):
1560        """ selection_statement : IF LPAREN expression RPAREN statement ELSE pragmacomp_or_statement """
1561        p[0] = c_ast.If(p[3], p[5], p[7], self._token_coord(p, 1))
1562
1563    def p_selection_statement_3(self, p):
1564        """ selection_statement : SWITCH LPAREN expression RPAREN pragmacomp_or_statement """
1565        p[0] = fix_switch_cases(
1566                c_ast.Switch(p[3], p[5], self._token_coord(p, 1)))
1567
1568    def p_iteration_statement_1(self, p):
1569        """ iteration_statement : WHILE LPAREN expression RPAREN pragmacomp_or_statement """
1570        p[0] = c_ast.While(p[3], p[5], self._token_coord(p, 1))
1571
1572    def p_iteration_statement_2(self, p):
1573        """ iteration_statement : DO pragmacomp_or_statement WHILE LPAREN expression RPAREN SEMI """
1574        p[0] = c_ast.DoWhile(p[5], p[2], self._token_coord(p, 1))
1575
1576    def p_iteration_statement_3(self, p):
1577        """ iteration_statement : FOR LPAREN expression_opt SEMI expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """
1578        p[0] = c_ast.For(p[3], p[5], p[7], p[9], self._token_coord(p, 1))
1579
1580    def p_iteration_statement_4(self, p):
1581        """ iteration_statement : FOR LPAREN declaration expression_opt SEMI expression_opt RPAREN pragmacomp_or_statement """
1582        p[0] = c_ast.For(c_ast.DeclList(p[3], self._token_coord(p, 1)),
1583                         p[4], p[6], p[8], self._token_coord(p, 1))
1584
1585    def p_jump_statement_1(self, p):
1586        """ jump_statement  : GOTO ID SEMI """
1587        p[0] = c_ast.Goto(p[2], self._token_coord(p, 1))
1588
1589    def p_jump_statement_2(self, p):
1590        """ jump_statement  : BREAK SEMI """
1591        p[0] = c_ast.Break(self._token_coord(p, 1))
1592
1593    def p_jump_statement_3(self, p):
1594        """ jump_statement  : CONTINUE SEMI """
1595        p[0] = c_ast.Continue(self._token_coord(p, 1))
1596
1597    def p_jump_statement_4(self, p):
1598        """ jump_statement  : RETURN expression SEMI
1599                            | RETURN SEMI
1600        """
1601        p[0] = c_ast.Return(p[2] if len(p) == 4 else None, self._token_coord(p, 1))
1602
1603    def p_expression_statement(self, p):
1604        """ expression_statement : expression_opt SEMI """
1605        if p[1] is None:
1606            p[0] = c_ast.EmptyStatement(self._token_coord(p, 2))
1607        else:
1608            p[0] = p[1]
1609
1610    def p_expression(self, p):
1611        """ expression  : assignment_expression
1612                        | expression COMMA assignment_expression
1613        """
1614        if len(p) == 2:
1615            p[0] = p[1]
1616        else:
1617            if not isinstance(p[1], c_ast.ExprList):
1618                p[1] = c_ast.ExprList([p[1]], p[1].coord)
1619
1620            p[1].exprs.append(p[3])
1621            p[0] = p[1]
1622
1623    def p_parenthesized_compound_expression(self, p):
1624        """ assignment_expression : LPAREN compound_statement RPAREN """
1625        p[0] = p[2]
1626
1627    def p_typedef_name(self, p):
1628        """ typedef_name : TYPEID """
1629        p[0] = c_ast.IdentifierType([p[1]], coord=self._token_coord(p, 1))
1630
1631    def p_assignment_expression(self, p):
1632        """ assignment_expression   : conditional_expression
1633                                    | unary_expression assignment_operator assignment_expression
1634        """
1635        if len(p) == 2:
1636            p[0] = p[1]
1637        else:
1638            p[0] = c_ast.Assignment(p[2], p[1], p[3], p[1].coord)
1639
1640    # K&R2 defines these as many separate rules, to encode
1641    # precedence and associativity. Why work hard ? I'll just use
1642    # the built in precedence/associativity specification feature
1643    # of PLY. (see precedence declaration above)
1644    #
1645    def p_assignment_operator(self, p):
1646        """ assignment_operator : EQUALS
1647                                | XOREQUAL
1648                                | TIMESEQUAL
1649                                | DIVEQUAL
1650                                | MODEQUAL
1651                                | PLUSEQUAL
1652                                | MINUSEQUAL
1653                                | LSHIFTEQUAL
1654                                | RSHIFTEQUAL
1655                                | ANDEQUAL
1656                                | OREQUAL
1657        """
1658        p[0] = p[1]
1659
1660    def p_constant_expression(self, p):
1661        """ constant_expression : conditional_expression """
1662        p[0] = p[1]
1663
1664    def p_conditional_expression(self, p):
1665        """ conditional_expression  : binary_expression
1666                                    | binary_expression CONDOP expression COLON conditional_expression
1667        """
1668        if len(p) == 2:
1669            p[0] = p[1]
1670        else:
1671            p[0] = c_ast.TernaryOp(p[1], p[3], p[5], p[1].coord)
1672
1673    def p_binary_expression(self, p):
1674        """ binary_expression   : cast_expression
1675                                | binary_expression TIMES binary_expression
1676                                | binary_expression DIVIDE binary_expression
1677                                | binary_expression MOD binary_expression
1678                                | binary_expression PLUS binary_expression
1679                                | binary_expression MINUS binary_expression
1680                                | binary_expression RSHIFT binary_expression
1681                                | binary_expression LSHIFT binary_expression
1682                                | binary_expression LT binary_expression
1683                                | binary_expression LE binary_expression
1684                                | binary_expression GE binary_expression
1685                                | binary_expression GT binary_expression
1686                                | binary_expression EQ binary_expression
1687                                | binary_expression NE binary_expression
1688                                | binary_expression AND binary_expression
1689                                | binary_expression OR binary_expression
1690                                | binary_expression XOR binary_expression
1691                                | binary_expression LAND binary_expression
1692                                | binary_expression LOR binary_expression
1693        """
1694        if len(p) == 2:
1695            p[0] = p[1]
1696        else:
1697            p[0] = c_ast.BinaryOp(p[2], p[1], p[3], p[1].coord)
1698
1699    def p_cast_expression_1(self, p):
1700        """ cast_expression : unary_expression """
1701        p[0] = p[1]
1702
1703    def p_cast_expression_2(self, p):
1704        """ cast_expression : LPAREN type_name RPAREN cast_expression """
1705        p[0] = c_ast.Cast(p[2], p[4], self._token_coord(p, 1))
1706
1707    def p_unary_expression_1(self, p):
1708        """ unary_expression    : postfix_expression """
1709        p[0] = p[1]
1710
1711    def p_unary_expression_2(self, p):
1712        """ unary_expression    : PLUSPLUS unary_expression
1713                                | MINUSMINUS unary_expression
1714                                | unary_operator cast_expression
1715        """
1716        p[0] = c_ast.UnaryOp(p[1], p[2], p[2].coord)
1717
1718    def p_unary_expression_3(self, p):
1719        """ unary_expression    : SIZEOF unary_expression
1720                                | SIZEOF LPAREN type_name RPAREN
1721                                | _ALIGNOF LPAREN type_name RPAREN
1722        """
1723        p[0] = c_ast.UnaryOp(
1724            p[1],
1725            p[2] if len(p) == 3 else p[3],
1726            self._token_coord(p, 1))
1727
1728    def p_unary_operator(self, p):
1729        """ unary_operator  : AND
1730                            | TIMES
1731                            | PLUS
1732                            | MINUS
1733                            | NOT
1734                            | LNOT
1735        """
1736        p[0] = p[1]
1737
1738    def p_postfix_expression_1(self, p):
1739        """ postfix_expression  : primary_expression """
1740        p[0] = p[1]
1741
1742    def p_postfix_expression_2(self, p):
1743        """ postfix_expression  : postfix_expression LBRACKET expression RBRACKET """
1744        p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord)
1745
1746    def p_postfix_expression_3(self, p):
1747        """ postfix_expression  : postfix_expression LPAREN argument_expression_list RPAREN
1748                                | postfix_expression LPAREN RPAREN
1749        """
1750        p[0] = c_ast.FuncCall(p[1], p[3] if len(p) == 5 else None, p[1].coord)
1751
1752    def p_postfix_expression_4(self, p):
1753        """ postfix_expression  : postfix_expression PERIOD ID
1754                                | postfix_expression PERIOD TYPEID
1755                                | postfix_expression ARROW ID
1756                                | postfix_expression ARROW TYPEID
1757        """
1758        field = c_ast.ID(p[3], self._token_coord(p, 3))
1759        p[0] = c_ast.StructRef(p[1], p[2], field, p[1].coord)
1760
1761    def p_postfix_expression_5(self, p):
1762        """ postfix_expression  : postfix_expression PLUSPLUS
1763                                | postfix_expression MINUSMINUS
1764        """
1765        p[0] = c_ast.UnaryOp('p' + p[2], p[1], p[1].coord)
1766
1767    def p_postfix_expression_6(self, p):
1768        """ postfix_expression  : LPAREN type_name RPAREN brace_open initializer_list brace_close
1769                                | LPAREN type_name RPAREN brace_open initializer_list COMMA brace_close
1770        """
1771        p[0] = c_ast.CompoundLiteral(p[2], p[5])
1772
1773    def p_primary_expression_1(self, p):
1774        """ primary_expression  : identifier """
1775        p[0] = p[1]
1776
1777    def p_primary_expression_2(self, p):
1778        """ primary_expression  : constant """
1779        p[0] = p[1]
1780
1781    def p_primary_expression_3(self, p):
1782        """ primary_expression  : unified_string_literal
1783                                | unified_wstring_literal
1784        """
1785        p[0] = p[1]
1786
1787    def p_primary_expression_4(self, p):
1788        """ primary_expression  : LPAREN expression RPAREN """
1789        p[0] = p[2]
1790
1791    def p_primary_expression_5(self, p):
1792        """ primary_expression  : OFFSETOF LPAREN type_name COMMA offsetof_member_designator RPAREN
1793        """
1794        coord = self._token_coord(p, 1)
1795        p[0] = c_ast.FuncCall(c_ast.ID(p[1], coord),
1796                              c_ast.ExprList([p[3], p[5]], coord),
1797                              coord)
1798
1799    def p_offsetof_member_designator(self, p):
1800        """ offsetof_member_designator : identifier
1801                                         | offsetof_member_designator PERIOD identifier
1802                                         | offsetof_member_designator LBRACKET expression RBRACKET
1803        """
1804        if len(p) == 2:
1805            p[0] = p[1]
1806        elif len(p) == 4:
1807            p[0] = c_ast.StructRef(p[1], p[2], p[3], p[1].coord)
1808        elif len(p) == 5:
1809            p[0] = c_ast.ArrayRef(p[1], p[3], p[1].coord)
1810        else:
1811            raise NotImplementedError("Unexpected parsing state. len(p): %u" % len(p))
1812
1813    def p_argument_expression_list(self, p):
1814        """ argument_expression_list    : assignment_expression
1815                                        | argument_expression_list COMMA assignment_expression
1816        """
1817        if len(p) == 2: # single expr
1818            p[0] = c_ast.ExprList([p[1]], p[1].coord)
1819        else:
1820            p[1].exprs.append(p[3])
1821            p[0] = p[1]
1822
1823    def p_identifier(self, p):
1824        """ identifier  : ID """
1825        p[0] = c_ast.ID(p[1], self._token_coord(p, 1))
1826
1827    def p_constant_1(self, p):
1828        """ constant    : INT_CONST_DEC
1829                        | INT_CONST_OCT
1830                        | INT_CONST_HEX
1831                        | INT_CONST_BIN
1832                        | INT_CONST_CHAR
1833        """
1834        uCount = 0
1835        lCount = 0
1836        for x in p[1][-3:]:
1837            if x in ('l', 'L'):
1838                lCount += 1
1839            elif x in ('u', 'U'):
1840                uCount += 1
1841        t = ''
1842        if uCount > 1:
1843             raise ValueError('Constant cannot have more than one u/U suffix.')
1844        elif lCount > 2:
1845             raise ValueError('Constant cannot have more than two l/L suffix.')
1846        prefix = 'unsigned ' * uCount + 'long ' * lCount
1847        p[0] = c_ast.Constant(
1848            prefix + 'int', p[1], self._token_coord(p, 1))
1849
1850    def p_constant_2(self, p):
1851        """ constant    : FLOAT_CONST
1852                        | HEX_FLOAT_CONST
1853        """
1854        if 'x' in p[1].lower():
1855            t = 'float'
1856        else:
1857            if p[1][-1] in ('f', 'F'):
1858                t = 'float'
1859            elif p[1][-1] in ('l', 'L'):
1860                t = 'long double'
1861            else:
1862                t = 'double'
1863
1864        p[0] = c_ast.Constant(
1865            t, p[1], self._token_coord(p, 1))
1866
1867    def p_constant_3(self, p):
1868        """ constant    : CHAR_CONST
1869                        | WCHAR_CONST
1870                        | U8CHAR_CONST
1871                        | U16CHAR_CONST
1872                        | U32CHAR_CONST
1873        """
1874        p[0] = c_ast.Constant(
1875            'char', p[1], self._token_coord(p, 1))
1876
1877    # The "unified" string and wstring literal rules are for supporting
1878    # concatenation of adjacent string literals.
1879    # I.e. "hello " "world" is seen by the C compiler as a single string literal
1880    # with the value "hello world"
1881    #
1882    def p_unified_string_literal(self, p):
1883        """ unified_string_literal  : STRING_LITERAL
1884                                    | unified_string_literal STRING_LITERAL
1885        """
1886        if len(p) == 2: # single literal
1887            p[0] = c_ast.Constant(
1888                'string', p[1], self._token_coord(p, 1))
1889        else:
1890            p[1].value = p[1].value[:-1] + p[2][1:]
1891            p[0] = p[1]
1892
1893    def p_unified_wstring_literal(self, p):
1894        """ unified_wstring_literal : WSTRING_LITERAL
1895                                    | U8STRING_LITERAL
1896                                    | U16STRING_LITERAL
1897                                    | U32STRING_LITERAL
1898                                    | unified_wstring_literal WSTRING_LITERAL
1899                                    | unified_wstring_literal U8STRING_LITERAL
1900                                    | unified_wstring_literal U16STRING_LITERAL
1901                                    | unified_wstring_literal U32STRING_LITERAL
1902        """
1903        if len(p) == 2: # single literal
1904            p[0] = c_ast.Constant(
1905                'string', p[1], self._token_coord(p, 1))
1906        else:
1907            p[1].value = p[1].value.rstrip()[:-1] + p[2][2:]
1908            p[0] = p[1]
1909
1910    def p_brace_open(self, p):
1911        """ brace_open  :   LBRACE
1912        """
1913        p[0] = p[1]
1914        p.set_lineno(0, p.lineno(1))
1915
1916    def p_brace_close(self, p):
1917        """ brace_close :   RBRACE
1918        """
1919        p[0] = p[1]
1920        p.set_lineno(0, p.lineno(1))
1921
1922    def p_empty(self, p):
1923        'empty : '
1924        p[0] = None
1925
1926    def p_error(self, p):
1927        # If error recovery is added here in the future, make sure
1928        # _get_yacc_lookahead_token still works!
1929        #
1930        if p:
1931            self._parse_error(
1932                'before: %s' % p.value,
1933                self._coord(lineno=p.lineno,
1934                            column=self.clex.find_tok_column(p)))
1935        else:
1936            self._parse_error('At end of input', self.clex.filename)
1937