1# The C Parser.
2# Copyright (C) 2019-2021 Free Software Foundation, Inc.
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation; either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17from enum import Enum
18import re
19from vcstocl.misc_util import *
20
21class block_flags(Enum):
22    ''' Flags for the code block.
23    '''
24    else_block = 1
25    macro_defined = 2
26    macro_redefined = 3
27
28
29class block_type(Enum):
30    ''' Type of code block.
31    '''
32    file = 1
33    macro_cond = 2
34    macro_def = 3
35    macro_undef = 4
36    macro_include = 5
37    macro_info = 6
38    decl = 7
39    func = 8
40    composite = 9
41    macrocall = 10
42    fndecl = 11
43    assign = 12
44    struct = 13
45    union = 14
46    enum = 15
47
48# A dictionary describing what each action (add, modify, delete) show up as in
49# the ChangeLog output.
50actions = {0:{'new': 'New', 'mod': 'Modified', 'del': 'Remove'},
51           block_type.file:{'new': 'New file', 'mod': 'Modified file',
52                            'del': 'Remove file'},
53           block_type.macro_cond:{'new': 'New', 'mod': 'Modified',
54                                  'del': 'Remove'},
55           block_type.macro_def:{'new': 'New', 'mod': 'Modified',
56                                 'del': 'Remove'},
57           block_type.macro_include:{'new': 'Include file', 'mod': 'Modified',
58                                     'del': 'Remove include'},
59           block_type.macro_info:{'new': 'New preprocessor message',
60                                  'mod': 'Modified', 'del': 'Remove'},
61           block_type.decl:{'new': 'New', 'mod': 'Modified', 'del': 'Remove'},
62           block_type.func:{'new': 'New function', 'mod': 'Modified function',
63                 'del': 'Remove function'},
64           block_type.composite:{'new': 'New', 'mod': 'Modified',
65                                 'del': 'Remove'},
66           block_type.struct:{'new': 'New struct', 'mod': 'Modified struct',
67                                 'del': 'Remove struct'},
68           block_type.union:{'new': 'New union', 'mod': 'Modified union',
69                                 'del': 'Remove union'},
70           block_type.enum:{'new': 'New enum', 'mod': 'Modified enum',
71                                 'del': 'Remove enum'},
72           block_type.macrocall:{'new': 'New', 'mod': 'Modified',
73                                 'del': 'Remove'},
74           block_type.fndecl:{'new': 'New function', 'mod': 'Modified',
75                              'del': 'Remove'},
76           block_type.assign:{'new': 'New', 'mod': 'Modified', 'del': 'Remove'}}
77
78def new_block(name, type, contents, parent, flags = 0):
79    '''  Create a new code block with the parent as PARENT.
80
81    The code block is a basic structure around which the tree representation of
82    the source code is built.  It has the following attributes:
83
84    - name: A name to refer it by in the ChangeLog
85    - type: Any one of the following types in BLOCK_TYPE.
86    - contents: The contents of the block.  For a block of types file or
87      macro_cond, this would be a list of blocks that it nests.  For other types
88      it is a list with a single string specifying its contents.
89    - parent: This is the parent of the current block, useful in setting up
90      #elif or #else blocks in the tree.
91    - flags: A special field to indicate some properties of the block. See
92      BLOCK_FLAGS for values.
93    '''
94    block = {}
95    block['matched'] = False
96    block['name'] = name
97    block['type'] = type
98    block['contents'] = contents
99    block['parent'] = parent
100    if parent:
101        parent['contents'].append(block)
102
103    block['flags'] = flags
104    block['actions'] = actions[type]
105
106    return block
107
108
109class ExprParser:
110    ''' Parent class of all of the C expression parsers.
111
112    It is necessary that the children override the parse_line() method.
113    '''
114    ATTRIBUTE = r'(((__attribute__\s*\(\([^;]+\)\))|(asm\s*\([?)]+\)))\s*)*'
115
116    def __init__(self, project_quirks, debug):
117        self.project_quirks = project_quirks
118        self.debug = debug
119
120    def fast_forward_scope(self, cur, op, loc):
121        ''' Consume lines in a code block.
122
123        Consume all lines of a block of code such as a composite type declaration or
124        a function declaration.
125
126        - CUR is the string to consume this expression from
127        - OP is the string array for the file
128        - LOC is the first unread location in CUR
129
130        - Returns: The next location to be read in the array as well as the updated
131          value of CUR, which will now have the body of the function or composite
132          type.
133        '''
134        nesting = cur.count('{') - cur.count('}')
135        while nesting > 0 and loc < len(op):
136            cur = cur + ' ' + op[loc]
137
138            nesting = nesting + op[loc].count('{')
139            nesting = nesting - op[loc].count('}')
140            loc = loc + 1
141
142        return (cur, loc)
143
144    def parse_line(self, cur, op, loc, code, macros):
145        ''' The parse method should always be overridden by the child.
146        '''
147        raise
148
149
150class FuncParser(ExprParser):
151    REGEX = re.compile(ExprParser.ATTRIBUTE + r'\s*(\w+)\s*\([^(][^{]+\)\s*{')
152
153    def parse_line(self, cur, op, loc, code, macros):
154        ''' Parse a function.
155
156        Match a function definition.
157
158        - CUR is the string to consume this expression from
159        - OP is the string array for the file
160        - LOC is the first unread location in CUR
161        - CODE is the block to which we add this
162
163        - Returns: The next location to be read in the array.
164        '''
165        found = re.search(self.REGEX, cur)
166        if not found:
167            return cur, loc
168
169        name = found.group(5)
170        self.debug.print('FOUND FUNC: %s' % name)
171
172        # Consume everything up to the ending brace of the function.
173        (cur, loc) = self.fast_forward_scope(cur, op, loc)
174
175        new_block(name, block_type.func, [cur], code)
176
177        return '', loc
178
179
180class CompositeParser(ExprParser):
181    # Composite types such as structs and unions.
182    REGEX = re.compile(r'(struct|union|enum)\s*(\w*)\s*{')
183
184    def parse_line(self, cur, op, loc, code, macros):
185        ''' Parse a composite type.
186
187        Match declaration of a composite type such as a sruct or a union..
188
189        - CUR is the string to consume this expression from
190        - OP is the string array for the file
191        - LOC is the first unread location in CUR
192        - CODE is the block to which we add this
193
194        - Returns: The next location to be read in the array.
195        '''
196        found = re.search(self.REGEX, cur)
197        if not found:
198            return cur, loc
199
200        # Lap up all of the struct definition.
201        (cur, loc) = self.fast_forward_scope(cur, op, loc)
202
203        name = found.group(2)
204
205        if not name:
206            if 'typedef' in cur:
207                name = re.sub(r'.*}\s*(\w+);$', r'\1', cur)
208            else:
209                name= '<anoymous>'
210
211        ctype = found.group(1)
212
213        if ctype == 'struct':
214            blocktype = block_type.struct
215        if ctype == 'enum':
216            blocktype = block_type.enum
217        if ctype == 'union':
218            blocktype = block_type.union
219
220        new_block(name, block_type.composite, [cur], code)
221
222        return '', loc
223
224
225class AssignParser(ExprParser):
226    # Static assignments.
227    REGEX = re.compile(r'(\w+)\s*(\[[^\]]*\])*\s*([^\s]*attribute[\s\w()]+)?\s*=')
228
229    def parse_line(self, cur, op, loc, code, macros):
230        ''' Parse an assignment statement.
231
232        This includes array assignments.
233
234        - CUR is the string to consume this expression from
235        - OP is the string array for the file
236        - LOC is the first unread location in CUR
237        - CODE is the block to which we add this
238
239        - Returns: The next location to be read in the array.
240        '''
241        found = re.search(self.REGEX, cur)
242        if not found:
243            return cur, loc
244
245        name = found.group(1)
246        self.debug.print('FOUND ASSIGN: %s' % name)
247        # Lap up everything up to semicolon.
248        while ';' not in cur and loc < len(op):
249            cur = op[loc]
250            loc = loc + 1
251
252        new_block(name, block_type.assign, [cur], code)
253
254        return '', loc
255
256
257class DeclParser(ExprParser):
258    # Function pointer typedefs.
259    TYPEDEF_FN_RE = re.compile(r'\(\*(\w+)\)\s*\([^)]+\);')
260
261    # Simple decls.
262    DECL_RE = re.compile(r'(\w+)(\[\w*\])*\s*' + ExprParser.ATTRIBUTE + ';')
263
264    # __typeof decls.
265    TYPEOF_RE = re.compile(r'__typeof\s*\([\w\s]+\)\s*(\w+)\s*' + \
266                           ExprParser.ATTRIBUTE + ';')
267
268    # Function Declarations.
269    FNDECL_RE = re.compile(r'\s*(\w+)\s*\(([^\(][^;]*)?\)\s*' +
270                           ExprParser.ATTRIBUTE + ';')
271
272    def __init__(self, regex, blocktype, project_quirks, debug):
273        # The regex for the current instance.
274        self.REGEX = regex
275        self.blocktype = blocktype
276        super().__init__(project_quirks, debug)
277
278    def parse_line(self, cur, op, loc, code, macros):
279        ''' Parse a top level declaration.
280
281        All types of declarations except function declarations.
282
283        - CUR is the string to consume this expression from
284        - OP is the string array for the file
285        - LOC is the first unread location in CUR
286        - CODE is the block to which we add this function
287
288        - Returns: The next location to be read in the array.
289        '''
290        found = re.search(self.REGEX, cur)
291        if not found:
292            return cur, loc
293
294        # The name is the first group for all of the above regexes.  This is a
295        # coincidence, so care must be taken if regexes are added or changed to
296        # ensure that this is true.
297        name = found.group(1)
298
299        self.debug.print('FOUND DECL: %s' % name)
300        new_block(name, self.blocktype, [cur], code)
301
302        return '', loc
303
304
305class MacroParser(ExprParser):
306    # The macrocall_re peeks into the next line to ensure that it doesn't
307    # eat up a FUNC by accident.  The func_re regex is also quite crude and
308    # only intends to ensure that the function name gets picked up
309    # correctly.
310    MACROCALL_RE = re.compile(r'(\w+)\s*(\(.*\))*$')
311
312    def parse_line(self, cur, op, loc, code, macros):
313        ''' Parse a macro call.
314
315        Match a symbol hack macro calls that get added without semicolons.
316
317        - CUR is the string to consume this expression from
318        - OP is the string array for the file
319        - LOC is the first unread location in CUR
320        - CODE is the block to which we add this
321        - MACROS is the regex match object.
322
323        - Returns: The next location to be read in the array.
324        '''
325
326        # First we have the macros for symbol hacks and all macros we identified so
327        # far.
328        if cur.count('(') != cur.count(')'):
329            return cur, loc
330        if loc < len(op) and '{' in op[loc]:
331            return cur, loc
332
333        found = re.search(self.MACROCALL_RE, cur)
334        if found:
335            sym = found.group(1)
336            name = found.group(2)
337            if sym in macros or self.project_quirks and \
338                    sym in self.project_quirks.C_MACROS:
339                self.debug.print('FOUND MACROCALL: %s (%s)' % (sym, name))
340                new_block(sym, block_type.macrocall, [cur], code)
341                return '', loc
342
343        # Next, there could be macros that get called right inside their #ifdef, but
344        # without the semi-colon.
345        if cur.strip() == code['name'].strip():
346            self.debug.print('FOUND MACROCALL (without brackets): %s' % (cur))
347            new_block(cur, block_type.macrocall, [cur], code)
348            return '',loc
349
350        return cur, loc
351
352
353class Frontend:
354    ''' The C Frontend implementation.
355    '''
356    KNOWN_MACROS = []
357
358    def __init__(self, project_quirks, debug):
359        self.op = []
360        self.debug = debug
361        self.project_quirks = project_quirks
362
363        self.c_expr_parsers = [
364                CompositeParser(project_quirks, debug),
365                AssignParser(project_quirks, debug),
366                DeclParser(DeclParser.TYPEOF_RE, block_type.decl,
367                           project_quirks, debug),
368                DeclParser(DeclParser.TYPEDEF_FN_RE, block_type.decl,
369                           project_quirks, debug),
370                DeclParser(DeclParser.FNDECL_RE, block_type.fndecl,
371                           project_quirks, debug),
372                FuncParser(project_quirks, debug),
373                DeclParser(DeclParser.DECL_RE, block_type.decl, project_quirks,
374                           debug),
375                MacroParser(project_quirks, debug)]
376
377
378    def remove_extern_c(self):
379        ''' Process extern "C"/"C++" block nesting.
380
381        The extern "C" nesting does not add much value so it's safe to almost always
382        drop it.  Also drop extern "C++"
383        '''
384        new_op = []
385        nesting = 0
386        extern_nesting = 0
387        for l in self.op:
388            if '{' in l:
389                nesting = nesting + 1
390            if re.match(r'extern\s*"C"\s*{', l):
391                extern_nesting = nesting
392                continue
393            if '}' in l:
394                nesting = nesting - 1
395                if nesting < extern_nesting:
396                    extern_nesting = 0
397                    continue
398            new_op.append(l)
399
400        # Now drop all extern C++ blocks.
401        self.op = new_op
402        new_op = []
403        nesting = 0
404        extern_nesting = 0
405        in_cpp = False
406        for l in self.op:
407            if re.match(r'extern\s*"C\+\+"\s*{', l):
408                nesting = nesting + 1
409                in_cpp = True
410
411            if in_cpp:
412                if '{' in l:
413                    nesting = nesting + 1
414                if '}' in l:
415                    nesting = nesting - 1
416            if nesting == 0:
417                new_op.append(l)
418
419        self.op = new_op
420
421
422    def remove_comments(self, op):
423        ''' Remove comments.
424
425        Return OP by removing all comments from it.
426        '''
427        self.debug.print('REMOVE COMMENTS')
428
429        sep='\n'
430        opstr = sep.join(op)
431        opstr = re.sub(r'/\*.*?\*/', r'', opstr, flags=re.MULTILINE | re.DOTALL)
432        opstr = re.sub(r'\\\n', r' ', opstr, flags=re.MULTILINE | re.DOTALL)
433        new_op = list(filter(None, opstr.split(sep)))
434
435        return new_op
436
437
438    def normalize_condition(self, name):
439        ''' Make some minor transformations on macro conditions to make them more
440        readable.
441        '''
442        # Negation with a redundant bracket.
443        name = re.sub(r'!\s*\(\s*(\w+)\s*\)', r'! \1', name)
444        # Pull in negation of equality.
445        name = re.sub(r'!\s*\(\s*(\w+)\s*==\s*(\w+)\)', r'\1 != \2', name)
446        # Pull in negation of inequality.
447        name = re.sub(r'!\s*\(\s*(\w+)\s*!=\s*(\w+)\)', r'\1 == \2', name)
448        # Fix simple double negation.
449        name = re.sub(r'!\s*\(\s*!\s*(\w+)\s*\)', r'\1', name)
450        # Similar, but nesting a complex expression.  Because of the greedy match,
451        # this matches only the outermost brackets.
452        name = re.sub(r'!\s*\(\s*!\s*\((.*)\)\s*\)$', r'\1', name)
453        return name
454
455
456    def parse_preprocessor(self, loc, code, start = ''):
457        ''' Parse a preprocessor directive.
458
459        In case a preprocessor condition (i.e. if/elif/else), create a new code
460        block to nest code into and in other cases, identify and add entities suchas
461        include files, defines, etc.
462
463        - OP is the string array for the file
464        - LOC is the first unread location in CUR
465        - CODE is the block to which we add this function
466        - START is the string that should continue to be expanded in case we step
467          into a new macro scope.
468
469        - Returns: The next location to be read in the array.
470        '''
471        cur = self.op[loc]
472        loc = loc + 1
473        endblock = False
474
475        self.debug.print('PARSE_MACRO: %s' % cur)
476
477        # Remove the # and strip spaces again.
478        cur = cur[1:].strip()
479
480        # Include file.
481        if cur.find('include') == 0:
482            m = re.search(r'include\s*["<]?([^">]+)[">]?', cur)
483            new_block(m.group(1), block_type.macro_include, [cur], code)
484
485        # Macro definition.
486        if cur.find('define') == 0:
487            m = re.search(r'define\s+([a-zA-Z0-9_]+)', cur)
488            name = m.group(1)
489            exists = False
490            # Find out if this is a redefinition.
491            for c in code['contents']:
492                if c['name'] == name and c['type'] == block_type.macro_def:
493                    c['flags'] = block_flags.macro_redefined
494                    exists = True
495                    break
496            if not exists:
497                new_block(m.group(1), block_type.macro_def, [cur], code,
498                          block_flags.macro_defined)
499                # Add macros as we encounter them.
500                self.KNOWN_MACROS.append(m.group(1))
501
502        # Macro undef.
503        if cur.find('undef') == 0:
504            m = re.search(r'undef\s+([a-zA-Z0-9_]+)', cur)
505            new_block(m.group(1), block_type.macro_def, [cur], code)
506
507        # #error and #warning macros.
508        if cur.find('error') == 0 or cur.find('warning') == 0:
509            m = re.search(r'(error|warning)\s+"?(.*)"?', cur)
510            if m:
511                name = m.group(2)
512            else:
513                name = '<blank>'
514            new_block(name, block_type.macro_info, [cur], code)
515
516        # Start of an #if or #ifdef block.
517        elif cur.find('if') == 0:
518            rem = re.sub(r'ifndef', r'!', cur).strip()
519            rem = re.sub(r'(ifdef|defined|if)', r'', rem).strip()
520            rem = self.normalize_condition(rem)
521            ifdef = new_block(rem, block_type.macro_cond, [], code)
522            ifdef['headcond'] = ifdef
523            ifdef['start'] = start
524            loc = self.parse_line(loc, ifdef, start)
525
526        # End the previous #if/#elif and begin a new block.
527        elif cur.find('elif') == 0 and code['parent']:
528            rem = self.normalize_condition(re.sub(r'(elif|defined)', r'', cur).strip())
529            # The #else and #elif blocks should go into the current block's parent.
530            ifdef = new_block(rem, block_type.macro_cond, [], code['parent'])
531            ifdef['headcond'] = code['headcond']
532            loc = self.parse_line(loc, ifdef, code['headcond']['start'])
533            endblock = True
534
535        # End the previous #if/#elif and begin a new block.
536        elif cur.find('else') == 0 and code['parent']:
537            name = self.normalize_condition('!(' + code['name'] + ')')
538            ifdef = new_block(name, block_type.macro_cond, [], code['parent'],
539                              block_flags.else_block)
540            ifdef['headcond'] = code['headcond']
541            loc = self.parse_line(loc, ifdef, code['headcond']['start'])
542            endblock = True
543
544        elif cur.find('endif') == 0 and code['parent']:
545            # Insert an empty else block if there isn't one.
546            if code['flags'] != block_flags.else_block:
547                name = self.normalize_condition('!(' + code['name'] + ')')
548                ifdef = new_block(name, block_type.macro_cond, [], code['parent'],
549                                  block_flags.else_block)
550                ifdef['headcond'] = code['headcond']
551                loc = self.parse_line(loc - 1, ifdef, code['headcond']['start'])
552            endblock = True
553
554        return (loc, endblock)
555
556
557    def parse_c_expr(self, cur, loc, code):
558        ''' Parse a C expression.
559
560        CUR is the string to be parsed, which continues to grow until a match is
561        found.  OP is the string array and LOC is the first unread location in the
562        string array.  CODE is the block in which any identified expressions should
563        be added.
564        '''
565        self.debug.print('PARSING: %s' % cur)
566
567        for p in self.c_expr_parsers:
568            cur, loc = p.parse_line(cur, self.op, loc, code, self.KNOWN_MACROS)
569            if not cur:
570                break
571
572        return cur, loc
573
574
575    def expand_problematic_macros(self, cur):
576        ''' Replace problem macros with their substitutes in CUR.
577        '''
578        for p in self.project_quirks.MACRO_QUIRKS:
579            cur = re.sub(p['orig'], p['sub'], cur)
580
581        return cur
582
583
584    def parse_line(self, loc, code, start = ''):
585        '''
586        Parse the file line by line.  The function assumes a mostly GNU coding
587        standard compliant input so it might barf with anything that is eligible for
588        the Obfuscated C code contest.
589
590        The basic idea of the parser is to identify macro conditional scopes and
591        definitions, includes, etc. and then parse the remaining C code in the
592        context of those macro scopes.  The parser does not try to understand the
593        semantics of the code or even validate its syntax.  It only records high
594        level symbols in the source and makes a tree structure to indicate the
595        declaration/definition of those symbols and their scope in the macro
596        definitions.
597
598        OP is the string array.
599        LOC is the first unparsed line.
600        CODE is the block scope within which the parsing is currently going on.
601        START is the string with which this parsing should start.
602        '''
603        cur = start
604        endblock = False
605        saved_cur = ''
606        saved_loc = 0
607        endblock_loc = loc
608
609        while loc < len(self.op):
610            nextline = self.op[loc]
611
612            # Macros.
613            if nextline[0] == '#':
614                (loc, endblock) = self.parse_preprocessor(loc, code, cur)
615                if endblock:
616                    endblock_loc = loc
617            # Rest of C Code.
618            else:
619                cur = cur + ' ' + nextline
620                cur = self.expand_problematic_macros(cur).strip()
621                cur, loc = self.parse_c_expr(cur, loc + 1, code)
622
623            if endblock and not cur:
624                # If we are returning from the first #if block, we want to proceed
625                # beyond the current block, not repeat it for any preceding blocks.
626                if code['headcond'] == code:
627                    return loc
628                else:
629                    return endblock_loc
630
631        return loc
632
633    def drop_empty_blocks(self, tree):
634        ''' Drop empty macro conditional blocks.
635        '''
636        newcontents = []
637
638        for x in tree['contents']:
639            if x['type'] != block_type.macro_cond or len(x['contents']) > 0:
640                newcontents.append(x)
641
642        for t in newcontents:
643            if t['type'] == block_type.macro_cond:
644                self.drop_empty_blocks(t)
645
646        tree['contents'] = newcontents
647
648
649    def consolidate_tree_blocks(self, tree):
650        ''' Consolidate common macro conditional blocks.
651
652        Get macro conditional blocks at the same level but scatterred across the
653        file together into a single common block to allow for better comparison.
654        '''
655        # Nothing to do for non-nesting blocks.
656        if tree['type'] != block_type.macro_cond \
657                and tree['type'] != block_type.file:
658            return
659
660        # Now for nesting blocks, get the list of unique condition names and
661        # consolidate code under them.  The result also bunches up all the
662        # conditions at the top.
663        newcontents = []
664
665        macros = [x for x in tree['contents'] \
666                  if x['type'] == block_type.macro_cond]
667        macro_names = sorted(set([x['name'] for x in macros]))
668        for m in macro_names:
669            nc = [x['contents'] for x in tree['contents'] if x['name'] == m \
670                    and x['type'] == block_type.macro_cond]
671            b = new_block(m, block_type.macro_cond, sum(nc, []), tree)
672            self.consolidate_tree_blocks(b)
673            newcontents.append(b)
674
675        newcontents.extend([x for x in tree['contents'] \
676                            if x['type'] != block_type.macro_cond])
677
678        tree['contents'] = newcontents
679
680
681    def compact_tree(self, tree):
682        ''' Try to reduce the tree to its minimal form.
683
684        A source code tree in its simplest form may have a lot of duplicated
685        information that may be difficult to compare and come up with a minimal
686        difference.
687        '''
688
689        # First, drop all empty blocks.
690        self.drop_empty_blocks(tree)
691
692        # Macro conditions that nest the entire file aren't very interesting.  This
693        # should take care of the header guards.
694        if tree['type'] == block_type.file \
695                and len(tree['contents']) == 1 \
696                and tree['contents'][0]['type'] == block_type.macro_cond:
697            tree['contents'] = tree['contents'][0]['contents']
698
699        # Finally consolidate all macro conditional blocks.
700        self.consolidate_tree_blocks(tree)
701
702
703    def parse(self, op):
704        ''' File parser.
705
706        Parse the input array of lines OP and generate a tree structure to
707        represent the file.  This tree structure is then used for comparison between
708        the old and new file.
709        '''
710        self.KNOWN_MACROS = []
711        tree = new_block('', block_type.file, [], None)
712        self.op = self.remove_comments(op)
713        self.remove_extern_c()
714        self.op = [re.sub(r'#\s+', '#', x) for x in self.op]
715        self.parse_line(0, tree)
716        self.compact_tree(tree)
717        self.dump_tree(tree, 0)
718
719        return tree
720
721
722    def print_change(self, tree, action, prologue = ''):
723        ''' Print the nature of the differences found in the tree compared to the
724        other tree.  TREE is the tree that changed, action is what the change was
725        (Added, Removed, Modified) and prologue specifies the macro scope the change
726        is in.  The function calls itself recursively for all macro condition tree
727        nodes.
728        '''
729
730        if tree['type'] != block_type.macro_cond:
731            print('\t%s(%s): %s.' % (prologue, tree['name'], action))
732            return
733
734        prologue = '%s[%s]' % (prologue, tree['name'])
735        for t in tree['contents']:
736            if t['type'] == block_type.macro_cond:
737                self.print_change(t, action, prologue)
738            else:
739                print('\t%s(%s): %s.' % (prologue, t['name'], action))
740
741
742    def compare_trees(self, left, right, prologue = ''):
743        ''' Compare two trees and print the difference.
744
745        This routine is the entry point to compare two trees and print out their
746        differences.  LEFT and RIGHT will always have the same name and type,
747        starting with block_type.file and '' at the top level.
748        '''
749
750        if left['type'] == block_type.macro_cond or left['type'] == block_type.file:
751
752            if left['type'] == block_type.macro_cond:
753                prologue = '%s[%s]' % (prologue, left['name'])
754
755            # Make sure that everything in the left tree exists in the right tree.
756            for cl in left['contents']:
757                found = False
758                for cr in right['contents']:
759                    if not cl['matched'] and not cr['matched'] and \
760                            cl['name'] == cr['name'] and cl['type'] == cr['type']:
761                        cl['matched'] = cr['matched'] = True
762                        self.compare_trees(cl, cr, prologue)
763                        found = True
764                        break
765                if not found:
766                    self.print_change(cl, cl['actions']['del'], prologue)
767
768            # ... and vice versa.  This time we only need to look at unmatched
769            # contents.
770            for cr in right['contents']:
771                if not cr['matched']:
772                    self.print_change(cr, cr['actions']['new'], prologue)
773        else:
774            if left['contents'] != right['contents']:
775                self.print_change(left, left['actions']['mod'], prologue)
776
777
778    def dump_tree(self, tree, indent):
779        ''' Print the entire tree.
780        '''
781        if not self.debug.debug:
782            return
783
784        if tree['type'] == block_type.macro_cond or tree['type'] == block_type.file:
785            print('%sScope: %s' % (' ' * indent, tree['name']))
786            for c in tree['contents']:
787                self.dump_tree(c, indent + 4)
788            print('%sEndScope: %s' % (' ' * indent, tree['name']))
789        else:
790            if tree['type'] == block_type.func:
791                print('%sFUNC: %s' % (' ' * indent, tree['name']))
792            elif tree['type'] == block_type.composite:
793                print('%sCOMPOSITE: %s' % (' ' * indent, tree['name']))
794            elif tree['type'] == block_type.assign:
795                print('%sASSIGN: %s' % (' ' * indent, tree['name']))
796            elif tree['type'] == block_type.fndecl:
797                print('%sFNDECL: %s' % (' ' * indent, tree['name']))
798            elif tree['type'] == block_type.decl:
799                print('%sDECL: %s' % (' ' * indent, tree['name']))
800            elif tree['type'] == block_type.macrocall:
801                print('%sMACROCALL: %s' % (' ' * indent, tree['name']))
802            elif tree['type'] == block_type.macro_def:
803                print('%sDEFINE: %s' % (' ' * indent, tree['name']))
804            elif tree['type'] == block_type.macro_include:
805                print('%sINCLUDE: %s' % (' ' * indent, tree['name']))
806            elif tree['type'] == block_type.macro_undef:
807                print('%sUNDEF: %s' % (' ' * indent, tree['name']))
808            else:
809                print('%sMACRO LEAF: %s' % (' ' * indent, tree['name']))
810
811
812    def compare(self, oldfile, newfile):
813        ''' Entry point for the C backend.
814
815        Parse the two files into trees and compare them.  Print the result of the
816        comparison in the ChangeLog-like format.
817        '''
818        self.debug.print('LEFT TREE')
819        self.debug.print('-' * 80)
820        left = self.parse(oldfile)
821
822        self.debug.print('RIGHT TREE')
823        self.debug.print('-' * 80)
824        right = self.parse(newfile)
825
826        self.compare_trees(left, right)
827