1# The C Parser. 2# Copyright (C) 2019-2021 Free Software Foundation, Inc. 3# 4# This program is free software: you can redistribute it and/or modify 5# it under the terms of the GNU General Public License as published by 6# the Free Software Foundation; either version 3 of the License, or 7# (at your option) any later version. 8# 9# This program is distributed in the hope that it will be useful, 10# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12# GNU General Public License for more details. 13# 14# You should have received a copy of the GNU General Public License 15# along with this program. If not, see <https://www.gnu.org/licenses/>. 16 17from enum import Enum 18import re 19from vcstocl.misc_util import * 20 21class block_flags(Enum): 22 ''' Flags for the code block. 23 ''' 24 else_block = 1 25 macro_defined = 2 26 macro_redefined = 3 27 28 29class block_type(Enum): 30 ''' Type of code block. 31 ''' 32 file = 1 33 macro_cond = 2 34 macro_def = 3 35 macro_undef = 4 36 macro_include = 5 37 macro_info = 6 38 decl = 7 39 func = 8 40 composite = 9 41 macrocall = 10 42 fndecl = 11 43 assign = 12 44 struct = 13 45 union = 14 46 enum = 15 47 48# A dictionary describing what each action (add, modify, delete) show up as in 49# the ChangeLog output. 50actions = {0:{'new': 'New', 'mod': 'Modified', 'del': 'Remove'}, 51 block_type.file:{'new': 'New file', 'mod': 'Modified file', 52 'del': 'Remove file'}, 53 block_type.macro_cond:{'new': 'New', 'mod': 'Modified', 54 'del': 'Remove'}, 55 block_type.macro_def:{'new': 'New', 'mod': 'Modified', 56 'del': 'Remove'}, 57 block_type.macro_include:{'new': 'Include file', 'mod': 'Modified', 58 'del': 'Remove include'}, 59 block_type.macro_info:{'new': 'New preprocessor message', 60 'mod': 'Modified', 'del': 'Remove'}, 61 block_type.decl:{'new': 'New', 'mod': 'Modified', 'del': 'Remove'}, 62 block_type.func:{'new': 'New function', 'mod': 'Modified function', 63 'del': 'Remove function'}, 64 block_type.composite:{'new': 'New', 'mod': 'Modified', 65 'del': 'Remove'}, 66 block_type.struct:{'new': 'New struct', 'mod': 'Modified struct', 67 'del': 'Remove struct'}, 68 block_type.union:{'new': 'New union', 'mod': 'Modified union', 69 'del': 'Remove union'}, 70 block_type.enum:{'new': 'New enum', 'mod': 'Modified enum', 71 'del': 'Remove enum'}, 72 block_type.macrocall:{'new': 'New', 'mod': 'Modified', 73 'del': 'Remove'}, 74 block_type.fndecl:{'new': 'New function', 'mod': 'Modified', 75 'del': 'Remove'}, 76 block_type.assign:{'new': 'New', 'mod': 'Modified', 'del': 'Remove'}} 77 78def new_block(name, type, contents, parent, flags = 0): 79 ''' Create a new code block with the parent as PARENT. 80 81 The code block is a basic structure around which the tree representation of 82 the source code is built. It has the following attributes: 83 84 - name: A name to refer it by in the ChangeLog 85 - type: Any one of the following types in BLOCK_TYPE. 86 - contents: The contents of the block. For a block of types file or 87 macro_cond, this would be a list of blocks that it nests. For other types 88 it is a list with a single string specifying its contents. 89 - parent: This is the parent of the current block, useful in setting up 90 #elif or #else blocks in the tree. 91 - flags: A special field to indicate some properties of the block. See 92 BLOCK_FLAGS for values. 93 ''' 94 block = {} 95 block['matched'] = False 96 block['name'] = name 97 block['type'] = type 98 block['contents'] = contents 99 block['parent'] = parent 100 if parent: 101 parent['contents'].append(block) 102 103 block['flags'] = flags 104 block['actions'] = actions[type] 105 106 return block 107 108 109class ExprParser: 110 ''' Parent class of all of the C expression parsers. 111 112 It is necessary that the children override the parse_line() method. 113 ''' 114 ATTRIBUTE = r'(((__attribute__\s*\(\([^;]+\)\))|(asm\s*\([?)]+\)))\s*)*' 115 116 def __init__(self, project_quirks, debug): 117 self.project_quirks = project_quirks 118 self.debug = debug 119 120 def fast_forward_scope(self, cur, op, loc): 121 ''' Consume lines in a code block. 122 123 Consume all lines of a block of code such as a composite type declaration or 124 a function declaration. 125 126 - CUR is the string to consume this expression from 127 - OP is the string array for the file 128 - LOC is the first unread location in CUR 129 130 - Returns: The next location to be read in the array as well as the updated 131 value of CUR, which will now have the body of the function or composite 132 type. 133 ''' 134 nesting = cur.count('{') - cur.count('}') 135 while nesting > 0 and loc < len(op): 136 cur = cur + ' ' + op[loc] 137 138 nesting = nesting + op[loc].count('{') 139 nesting = nesting - op[loc].count('}') 140 loc = loc + 1 141 142 return (cur, loc) 143 144 def parse_line(self, cur, op, loc, code, macros): 145 ''' The parse method should always be overridden by the child. 146 ''' 147 raise 148 149 150class FuncParser(ExprParser): 151 REGEX = re.compile(ExprParser.ATTRIBUTE + r'\s*(\w+)\s*\([^(][^{]+\)\s*{') 152 153 def parse_line(self, cur, op, loc, code, macros): 154 ''' Parse a function. 155 156 Match a function definition. 157 158 - CUR is the string to consume this expression from 159 - OP is the string array for the file 160 - LOC is the first unread location in CUR 161 - CODE is the block to which we add this 162 163 - Returns: The next location to be read in the array. 164 ''' 165 found = re.search(self.REGEX, cur) 166 if not found: 167 return cur, loc 168 169 name = found.group(5) 170 self.debug.print('FOUND FUNC: %s' % name) 171 172 # Consume everything up to the ending brace of the function. 173 (cur, loc) = self.fast_forward_scope(cur, op, loc) 174 175 new_block(name, block_type.func, [cur], code) 176 177 return '', loc 178 179 180class CompositeParser(ExprParser): 181 # Composite types such as structs and unions. 182 REGEX = re.compile(r'(struct|union|enum)\s*(\w*)\s*{') 183 184 def parse_line(self, cur, op, loc, code, macros): 185 ''' Parse a composite type. 186 187 Match declaration of a composite type such as a sruct or a union.. 188 189 - CUR is the string to consume this expression from 190 - OP is the string array for the file 191 - LOC is the first unread location in CUR 192 - CODE is the block to which we add this 193 194 - Returns: The next location to be read in the array. 195 ''' 196 found = re.search(self.REGEX, cur) 197 if not found: 198 return cur, loc 199 200 # Lap up all of the struct definition. 201 (cur, loc) = self.fast_forward_scope(cur, op, loc) 202 203 name = found.group(2) 204 205 if not name: 206 if 'typedef' in cur: 207 name = re.sub(r'.*}\s*(\w+);$', r'\1', cur) 208 else: 209 name= '<anoymous>' 210 211 ctype = found.group(1) 212 213 if ctype == 'struct': 214 blocktype = block_type.struct 215 if ctype == 'enum': 216 blocktype = block_type.enum 217 if ctype == 'union': 218 blocktype = block_type.union 219 220 new_block(name, block_type.composite, [cur], code) 221 222 return '', loc 223 224 225class AssignParser(ExprParser): 226 # Static assignments. 227 REGEX = re.compile(r'(\w+)\s*(\[[^\]]*\])*\s*([^\s]*attribute[\s\w()]+)?\s*=') 228 229 def parse_line(self, cur, op, loc, code, macros): 230 ''' Parse an assignment statement. 231 232 This includes array assignments. 233 234 - CUR is the string to consume this expression from 235 - OP is the string array for the file 236 - LOC is the first unread location in CUR 237 - CODE is the block to which we add this 238 239 - Returns: The next location to be read in the array. 240 ''' 241 found = re.search(self.REGEX, cur) 242 if not found: 243 return cur, loc 244 245 name = found.group(1) 246 self.debug.print('FOUND ASSIGN: %s' % name) 247 # Lap up everything up to semicolon. 248 while ';' not in cur and loc < len(op): 249 cur = op[loc] 250 loc = loc + 1 251 252 new_block(name, block_type.assign, [cur], code) 253 254 return '', loc 255 256 257class DeclParser(ExprParser): 258 # Function pointer typedefs. 259 TYPEDEF_FN_RE = re.compile(r'\(\*(\w+)\)\s*\([^)]+\);') 260 261 # Simple decls. 262 DECL_RE = re.compile(r'(\w+)(\[\w*\])*\s*' + ExprParser.ATTRIBUTE + ';') 263 264 # __typeof decls. 265 TYPEOF_RE = re.compile(r'__typeof\s*\([\w\s]+\)\s*(\w+)\s*' + \ 266 ExprParser.ATTRIBUTE + ';') 267 268 # Function Declarations. 269 FNDECL_RE = re.compile(r'\s*(\w+)\s*\(([^\(][^;]*)?\)\s*' + 270 ExprParser.ATTRIBUTE + ';') 271 272 def __init__(self, regex, blocktype, project_quirks, debug): 273 # The regex for the current instance. 274 self.REGEX = regex 275 self.blocktype = blocktype 276 super().__init__(project_quirks, debug) 277 278 def parse_line(self, cur, op, loc, code, macros): 279 ''' Parse a top level declaration. 280 281 All types of declarations except function declarations. 282 283 - CUR is the string to consume this expression from 284 - OP is the string array for the file 285 - LOC is the first unread location in CUR 286 - CODE is the block to which we add this function 287 288 - Returns: The next location to be read in the array. 289 ''' 290 found = re.search(self.REGEX, cur) 291 if not found: 292 return cur, loc 293 294 # The name is the first group for all of the above regexes. This is a 295 # coincidence, so care must be taken if regexes are added or changed to 296 # ensure that this is true. 297 name = found.group(1) 298 299 self.debug.print('FOUND DECL: %s' % name) 300 new_block(name, self.blocktype, [cur], code) 301 302 return '', loc 303 304 305class MacroParser(ExprParser): 306 # The macrocall_re peeks into the next line to ensure that it doesn't 307 # eat up a FUNC by accident. The func_re regex is also quite crude and 308 # only intends to ensure that the function name gets picked up 309 # correctly. 310 MACROCALL_RE = re.compile(r'(\w+)\s*(\(.*\))*$') 311 312 def parse_line(self, cur, op, loc, code, macros): 313 ''' Parse a macro call. 314 315 Match a symbol hack macro calls that get added without semicolons. 316 317 - CUR is the string to consume this expression from 318 - OP is the string array for the file 319 - LOC is the first unread location in CUR 320 - CODE is the block to which we add this 321 - MACROS is the regex match object. 322 323 - Returns: The next location to be read in the array. 324 ''' 325 326 # First we have the macros for symbol hacks and all macros we identified so 327 # far. 328 if cur.count('(') != cur.count(')'): 329 return cur, loc 330 if loc < len(op) and '{' in op[loc]: 331 return cur, loc 332 333 found = re.search(self.MACROCALL_RE, cur) 334 if found: 335 sym = found.group(1) 336 name = found.group(2) 337 if sym in macros or self.project_quirks and \ 338 sym in self.project_quirks.C_MACROS: 339 self.debug.print('FOUND MACROCALL: %s (%s)' % (sym, name)) 340 new_block(sym, block_type.macrocall, [cur], code) 341 return '', loc 342 343 # Next, there could be macros that get called right inside their #ifdef, but 344 # without the semi-colon. 345 if cur.strip() == code['name'].strip(): 346 self.debug.print('FOUND MACROCALL (without brackets): %s' % (cur)) 347 new_block(cur, block_type.macrocall, [cur], code) 348 return '',loc 349 350 return cur, loc 351 352 353class Frontend: 354 ''' The C Frontend implementation. 355 ''' 356 KNOWN_MACROS = [] 357 358 def __init__(self, project_quirks, debug): 359 self.op = [] 360 self.debug = debug 361 self.project_quirks = project_quirks 362 363 self.c_expr_parsers = [ 364 CompositeParser(project_quirks, debug), 365 AssignParser(project_quirks, debug), 366 DeclParser(DeclParser.TYPEOF_RE, block_type.decl, 367 project_quirks, debug), 368 DeclParser(DeclParser.TYPEDEF_FN_RE, block_type.decl, 369 project_quirks, debug), 370 DeclParser(DeclParser.FNDECL_RE, block_type.fndecl, 371 project_quirks, debug), 372 FuncParser(project_quirks, debug), 373 DeclParser(DeclParser.DECL_RE, block_type.decl, project_quirks, 374 debug), 375 MacroParser(project_quirks, debug)] 376 377 378 def remove_extern_c(self): 379 ''' Process extern "C"/"C++" block nesting. 380 381 The extern "C" nesting does not add much value so it's safe to almost always 382 drop it. Also drop extern "C++" 383 ''' 384 new_op = [] 385 nesting = 0 386 extern_nesting = 0 387 for l in self.op: 388 if '{' in l: 389 nesting = nesting + 1 390 if re.match(r'extern\s*"C"\s*{', l): 391 extern_nesting = nesting 392 continue 393 if '}' in l: 394 nesting = nesting - 1 395 if nesting < extern_nesting: 396 extern_nesting = 0 397 continue 398 new_op.append(l) 399 400 # Now drop all extern C++ blocks. 401 self.op = new_op 402 new_op = [] 403 nesting = 0 404 extern_nesting = 0 405 in_cpp = False 406 for l in self.op: 407 if re.match(r'extern\s*"C\+\+"\s*{', l): 408 nesting = nesting + 1 409 in_cpp = True 410 411 if in_cpp: 412 if '{' in l: 413 nesting = nesting + 1 414 if '}' in l: 415 nesting = nesting - 1 416 if nesting == 0: 417 new_op.append(l) 418 419 self.op = new_op 420 421 422 def remove_comments(self, op): 423 ''' Remove comments. 424 425 Return OP by removing all comments from it. 426 ''' 427 self.debug.print('REMOVE COMMENTS') 428 429 sep='\n' 430 opstr = sep.join(op) 431 opstr = re.sub(r'/\*.*?\*/', r'', opstr, flags=re.MULTILINE | re.DOTALL) 432 opstr = re.sub(r'\\\n', r' ', opstr, flags=re.MULTILINE | re.DOTALL) 433 new_op = list(filter(None, opstr.split(sep))) 434 435 return new_op 436 437 438 def normalize_condition(self, name): 439 ''' Make some minor transformations on macro conditions to make them more 440 readable. 441 ''' 442 # Negation with a redundant bracket. 443 name = re.sub(r'!\s*\(\s*(\w+)\s*\)', r'! \1', name) 444 # Pull in negation of equality. 445 name = re.sub(r'!\s*\(\s*(\w+)\s*==\s*(\w+)\)', r'\1 != \2', name) 446 # Pull in negation of inequality. 447 name = re.sub(r'!\s*\(\s*(\w+)\s*!=\s*(\w+)\)', r'\1 == \2', name) 448 # Fix simple double negation. 449 name = re.sub(r'!\s*\(\s*!\s*(\w+)\s*\)', r'\1', name) 450 # Similar, but nesting a complex expression. Because of the greedy match, 451 # this matches only the outermost brackets. 452 name = re.sub(r'!\s*\(\s*!\s*\((.*)\)\s*\)$', r'\1', name) 453 return name 454 455 456 def parse_preprocessor(self, loc, code, start = ''): 457 ''' Parse a preprocessor directive. 458 459 In case a preprocessor condition (i.e. if/elif/else), create a new code 460 block to nest code into and in other cases, identify and add entities suchas 461 include files, defines, etc. 462 463 - OP is the string array for the file 464 - LOC is the first unread location in CUR 465 - CODE is the block to which we add this function 466 - START is the string that should continue to be expanded in case we step 467 into a new macro scope. 468 469 - Returns: The next location to be read in the array. 470 ''' 471 cur = self.op[loc] 472 loc = loc + 1 473 endblock = False 474 475 self.debug.print('PARSE_MACRO: %s' % cur) 476 477 # Remove the # and strip spaces again. 478 cur = cur[1:].strip() 479 480 # Include file. 481 if cur.find('include') == 0: 482 m = re.search(r'include\s*["<]?([^">]+)[">]?', cur) 483 new_block(m.group(1), block_type.macro_include, [cur], code) 484 485 # Macro definition. 486 if cur.find('define') == 0: 487 m = re.search(r'define\s+([a-zA-Z0-9_]+)', cur) 488 name = m.group(1) 489 exists = False 490 # Find out if this is a redefinition. 491 for c in code['contents']: 492 if c['name'] == name and c['type'] == block_type.macro_def: 493 c['flags'] = block_flags.macro_redefined 494 exists = True 495 break 496 if not exists: 497 new_block(m.group(1), block_type.macro_def, [cur], code, 498 block_flags.macro_defined) 499 # Add macros as we encounter them. 500 self.KNOWN_MACROS.append(m.group(1)) 501 502 # Macro undef. 503 if cur.find('undef') == 0: 504 m = re.search(r'undef\s+([a-zA-Z0-9_]+)', cur) 505 new_block(m.group(1), block_type.macro_def, [cur], code) 506 507 # #error and #warning macros. 508 if cur.find('error') == 0 or cur.find('warning') == 0: 509 m = re.search(r'(error|warning)\s+"?(.*)"?', cur) 510 if m: 511 name = m.group(2) 512 else: 513 name = '<blank>' 514 new_block(name, block_type.macro_info, [cur], code) 515 516 # Start of an #if or #ifdef block. 517 elif cur.find('if') == 0: 518 rem = re.sub(r'ifndef', r'!', cur).strip() 519 rem = re.sub(r'(ifdef|defined|if)', r'', rem).strip() 520 rem = self.normalize_condition(rem) 521 ifdef = new_block(rem, block_type.macro_cond, [], code) 522 ifdef['headcond'] = ifdef 523 ifdef['start'] = start 524 loc = self.parse_line(loc, ifdef, start) 525 526 # End the previous #if/#elif and begin a new block. 527 elif cur.find('elif') == 0 and code['parent']: 528 rem = self.normalize_condition(re.sub(r'(elif|defined)', r'', cur).strip()) 529 # The #else and #elif blocks should go into the current block's parent. 530 ifdef = new_block(rem, block_type.macro_cond, [], code['parent']) 531 ifdef['headcond'] = code['headcond'] 532 loc = self.parse_line(loc, ifdef, code['headcond']['start']) 533 endblock = True 534 535 # End the previous #if/#elif and begin a new block. 536 elif cur.find('else') == 0 and code['parent']: 537 name = self.normalize_condition('!(' + code['name'] + ')') 538 ifdef = new_block(name, block_type.macro_cond, [], code['parent'], 539 block_flags.else_block) 540 ifdef['headcond'] = code['headcond'] 541 loc = self.parse_line(loc, ifdef, code['headcond']['start']) 542 endblock = True 543 544 elif cur.find('endif') == 0 and code['parent']: 545 # Insert an empty else block if there isn't one. 546 if code['flags'] != block_flags.else_block: 547 name = self.normalize_condition('!(' + code['name'] + ')') 548 ifdef = new_block(name, block_type.macro_cond, [], code['parent'], 549 block_flags.else_block) 550 ifdef['headcond'] = code['headcond'] 551 loc = self.parse_line(loc - 1, ifdef, code['headcond']['start']) 552 endblock = True 553 554 return (loc, endblock) 555 556 557 def parse_c_expr(self, cur, loc, code): 558 ''' Parse a C expression. 559 560 CUR is the string to be parsed, which continues to grow until a match is 561 found. OP is the string array and LOC is the first unread location in the 562 string array. CODE is the block in which any identified expressions should 563 be added. 564 ''' 565 self.debug.print('PARSING: %s' % cur) 566 567 for p in self.c_expr_parsers: 568 cur, loc = p.parse_line(cur, self.op, loc, code, self.KNOWN_MACROS) 569 if not cur: 570 break 571 572 return cur, loc 573 574 575 def expand_problematic_macros(self, cur): 576 ''' Replace problem macros with their substitutes in CUR. 577 ''' 578 for p in self.project_quirks.MACRO_QUIRKS: 579 cur = re.sub(p['orig'], p['sub'], cur) 580 581 return cur 582 583 584 def parse_line(self, loc, code, start = ''): 585 ''' 586 Parse the file line by line. The function assumes a mostly GNU coding 587 standard compliant input so it might barf with anything that is eligible for 588 the Obfuscated C code contest. 589 590 The basic idea of the parser is to identify macro conditional scopes and 591 definitions, includes, etc. and then parse the remaining C code in the 592 context of those macro scopes. The parser does not try to understand the 593 semantics of the code or even validate its syntax. It only records high 594 level symbols in the source and makes a tree structure to indicate the 595 declaration/definition of those symbols and their scope in the macro 596 definitions. 597 598 OP is the string array. 599 LOC is the first unparsed line. 600 CODE is the block scope within which the parsing is currently going on. 601 START is the string with which this parsing should start. 602 ''' 603 cur = start 604 endblock = False 605 saved_cur = '' 606 saved_loc = 0 607 endblock_loc = loc 608 609 while loc < len(self.op): 610 nextline = self.op[loc] 611 612 # Macros. 613 if nextline[0] == '#': 614 (loc, endblock) = self.parse_preprocessor(loc, code, cur) 615 if endblock: 616 endblock_loc = loc 617 # Rest of C Code. 618 else: 619 cur = cur + ' ' + nextline 620 cur = self.expand_problematic_macros(cur).strip() 621 cur, loc = self.parse_c_expr(cur, loc + 1, code) 622 623 if endblock and not cur: 624 # If we are returning from the first #if block, we want to proceed 625 # beyond the current block, not repeat it for any preceding blocks. 626 if code['headcond'] == code: 627 return loc 628 else: 629 return endblock_loc 630 631 return loc 632 633 def drop_empty_blocks(self, tree): 634 ''' Drop empty macro conditional blocks. 635 ''' 636 newcontents = [] 637 638 for x in tree['contents']: 639 if x['type'] != block_type.macro_cond or len(x['contents']) > 0: 640 newcontents.append(x) 641 642 for t in newcontents: 643 if t['type'] == block_type.macro_cond: 644 self.drop_empty_blocks(t) 645 646 tree['contents'] = newcontents 647 648 649 def consolidate_tree_blocks(self, tree): 650 ''' Consolidate common macro conditional blocks. 651 652 Get macro conditional blocks at the same level but scatterred across the 653 file together into a single common block to allow for better comparison. 654 ''' 655 # Nothing to do for non-nesting blocks. 656 if tree['type'] != block_type.macro_cond \ 657 and tree['type'] != block_type.file: 658 return 659 660 # Now for nesting blocks, get the list of unique condition names and 661 # consolidate code under them. The result also bunches up all the 662 # conditions at the top. 663 newcontents = [] 664 665 macros = [x for x in tree['contents'] \ 666 if x['type'] == block_type.macro_cond] 667 macro_names = sorted(set([x['name'] for x in macros])) 668 for m in macro_names: 669 nc = [x['contents'] for x in tree['contents'] if x['name'] == m \ 670 and x['type'] == block_type.macro_cond] 671 b = new_block(m, block_type.macro_cond, sum(nc, []), tree) 672 self.consolidate_tree_blocks(b) 673 newcontents.append(b) 674 675 newcontents.extend([x for x in tree['contents'] \ 676 if x['type'] != block_type.macro_cond]) 677 678 tree['contents'] = newcontents 679 680 681 def compact_tree(self, tree): 682 ''' Try to reduce the tree to its minimal form. 683 684 A source code tree in its simplest form may have a lot of duplicated 685 information that may be difficult to compare and come up with a minimal 686 difference. 687 ''' 688 689 # First, drop all empty blocks. 690 self.drop_empty_blocks(tree) 691 692 # Macro conditions that nest the entire file aren't very interesting. This 693 # should take care of the header guards. 694 if tree['type'] == block_type.file \ 695 and len(tree['contents']) == 1 \ 696 and tree['contents'][0]['type'] == block_type.macro_cond: 697 tree['contents'] = tree['contents'][0]['contents'] 698 699 # Finally consolidate all macro conditional blocks. 700 self.consolidate_tree_blocks(tree) 701 702 703 def parse(self, op): 704 ''' File parser. 705 706 Parse the input array of lines OP and generate a tree structure to 707 represent the file. This tree structure is then used for comparison between 708 the old and new file. 709 ''' 710 self.KNOWN_MACROS = [] 711 tree = new_block('', block_type.file, [], None) 712 self.op = self.remove_comments(op) 713 self.remove_extern_c() 714 self.op = [re.sub(r'#\s+', '#', x) for x in self.op] 715 self.parse_line(0, tree) 716 self.compact_tree(tree) 717 self.dump_tree(tree, 0) 718 719 return tree 720 721 722 def print_change(self, tree, action, prologue = ''): 723 ''' Print the nature of the differences found in the tree compared to the 724 other tree. TREE is the tree that changed, action is what the change was 725 (Added, Removed, Modified) and prologue specifies the macro scope the change 726 is in. The function calls itself recursively for all macro condition tree 727 nodes. 728 ''' 729 730 if tree['type'] != block_type.macro_cond: 731 print('\t%s(%s): %s.' % (prologue, tree['name'], action)) 732 return 733 734 prologue = '%s[%s]' % (prologue, tree['name']) 735 for t in tree['contents']: 736 if t['type'] == block_type.macro_cond: 737 self.print_change(t, action, prologue) 738 else: 739 print('\t%s(%s): %s.' % (prologue, t['name'], action)) 740 741 742 def compare_trees(self, left, right, prologue = ''): 743 ''' Compare two trees and print the difference. 744 745 This routine is the entry point to compare two trees and print out their 746 differences. LEFT and RIGHT will always have the same name and type, 747 starting with block_type.file and '' at the top level. 748 ''' 749 750 if left['type'] == block_type.macro_cond or left['type'] == block_type.file: 751 752 if left['type'] == block_type.macro_cond: 753 prologue = '%s[%s]' % (prologue, left['name']) 754 755 # Make sure that everything in the left tree exists in the right tree. 756 for cl in left['contents']: 757 found = False 758 for cr in right['contents']: 759 if not cl['matched'] and not cr['matched'] and \ 760 cl['name'] == cr['name'] and cl['type'] == cr['type']: 761 cl['matched'] = cr['matched'] = True 762 self.compare_trees(cl, cr, prologue) 763 found = True 764 break 765 if not found: 766 self.print_change(cl, cl['actions']['del'], prologue) 767 768 # ... and vice versa. This time we only need to look at unmatched 769 # contents. 770 for cr in right['contents']: 771 if not cr['matched']: 772 self.print_change(cr, cr['actions']['new'], prologue) 773 else: 774 if left['contents'] != right['contents']: 775 self.print_change(left, left['actions']['mod'], prologue) 776 777 778 def dump_tree(self, tree, indent): 779 ''' Print the entire tree. 780 ''' 781 if not self.debug.debug: 782 return 783 784 if tree['type'] == block_type.macro_cond or tree['type'] == block_type.file: 785 print('%sScope: %s' % (' ' * indent, tree['name'])) 786 for c in tree['contents']: 787 self.dump_tree(c, indent + 4) 788 print('%sEndScope: %s' % (' ' * indent, tree['name'])) 789 else: 790 if tree['type'] == block_type.func: 791 print('%sFUNC: %s' % (' ' * indent, tree['name'])) 792 elif tree['type'] == block_type.composite: 793 print('%sCOMPOSITE: %s' % (' ' * indent, tree['name'])) 794 elif tree['type'] == block_type.assign: 795 print('%sASSIGN: %s' % (' ' * indent, tree['name'])) 796 elif tree['type'] == block_type.fndecl: 797 print('%sFNDECL: %s' % (' ' * indent, tree['name'])) 798 elif tree['type'] == block_type.decl: 799 print('%sDECL: %s' % (' ' * indent, tree['name'])) 800 elif tree['type'] == block_type.macrocall: 801 print('%sMACROCALL: %s' % (' ' * indent, tree['name'])) 802 elif tree['type'] == block_type.macro_def: 803 print('%sDEFINE: %s' % (' ' * indent, tree['name'])) 804 elif tree['type'] == block_type.macro_include: 805 print('%sINCLUDE: %s' % (' ' * indent, tree['name'])) 806 elif tree['type'] == block_type.macro_undef: 807 print('%sUNDEF: %s' % (' ' * indent, tree['name'])) 808 else: 809 print('%sMACRO LEAF: %s' % (' ' * indent, tree['name'])) 810 811 812 def compare(self, oldfile, newfile): 813 ''' Entry point for the C backend. 814 815 Parse the two files into trees and compare them. Print the result of the 816 comparison in the ChangeLog-like format. 817 ''' 818 self.debug.print('LEFT TREE') 819 self.debug.print('-' * 80) 820 left = self.parse(oldfile) 821 822 self.debug.print('RIGHT TREE') 823 self.debug.print('-' * 80) 824 right = self.parse(newfile) 825 826 self.compare_trees(left, right) 827