1#!/usr/bin/env python 2# 3# Cppcheck - A tool for static C/C++ code analysis 4# Copyright (C) 2007-2019 Cppcheck team. 5# 6# This program is free software: you can redistribute it and/or modify 7# it under the terms of the GNU General Public License as published by 8# the Free Software Foundation, either version 3 of the License, or 9# (at your option) any later version. 10# 11# This program is distributed in the hope that it will be useful, 12# but WITHOUT ANY WARRANTY; without even the implied warranty of 13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14# GNU General Public License for more details. 15# 16# You should have received a copy of the GNU General Public License 17# along with this program. If not, see <http://www.gnu.org/licenses/>. 18 19import io 20import os 21import sys 22import re 23import glob 24import argparse 25import errno 26 27 28class MatchCompiler: 29 30 def __init__(self, verify_mode=False, show_skipped=False): 31 self._verifyMode = verify_mode 32 self._showSkipped = show_skipped 33 self._reset() 34 35 def _reset(self): 36 self._rawMatchFunctions = [] 37 self._matchFunctionCache = {} 38 39 @staticmethod 40 def _generateCacheSignature( 41 pattern, endToken=None, varId=None, isFindMatch=False): 42 sig = pattern 43 44 if endToken: 45 sig += '|ENDTOKEN' 46 else: 47 sig += '|NO-ENDTOKEN' 48 49 if varId: 50 sig += '|VARID' 51 else: 52 sig += '|NO-VARID' 53 54 if isFindMatch: 55 sig += '|ISFINDMATCH' 56 else: 57 sig += '|NORMALMATCH' 58 59 return sig 60 61 def _lookupMatchFunctionId( 62 self, pattern, endToken=None, varId=None, isFindMatch=False): 63 signature = self._generateCacheSignature( 64 pattern, endToken, varId, isFindMatch) 65 66 if signature in self._matchFunctionCache: 67 return self._matchFunctionCache[signature] 68 69 return None 70 71 def _insertMatchFunctionId( 72 self, id, pattern, endToken=None, varId=None, isFindMatch=False): 73 signature = self._generateCacheSignature( 74 pattern, endToken, varId, isFindMatch) 75 76 # function signature should not be in the cache 77 assert( 78 self._lookupMatchFunctionId( 79 pattern, 80 endToken, 81 varId, 82 isFindMatch) is None) 83 84 self._matchFunctionCache[signature] = id 85 86 @staticmethod 87 def _compileCmd(tok): 88 if tok == '%any%': 89 return 'true' 90 elif tok == '%assign%': 91 return 'tok->isAssignmentOp()' 92 elif tok == '%bool%': 93 return 'tok->isBoolean()' 94 elif tok == '%char%': 95 return '(tok->tokType() == Token::eChar)' 96 elif tok == '%comp%': 97 return 'tok->isComparisonOp()' 98 elif tok == '%num%': 99 return 'tok->isNumber()' 100 elif tok == '%cop%': 101 return 'tok->isConstOp()' 102 elif tok == '%op%': 103 return 'tok->isOp()' 104 elif tok == '%or%': 105 return '(tok->tokType() == Token::eBitOp && tok->str() == MatchCompiler::makeConstString("|") )' 106 elif tok == '%oror%': 107 return '(tok->tokType() == Token::eLogicalOp && tok->str() == MatchCompiler::makeConstString("||"))' 108 elif tok == '%str%': 109 return '(tok->tokType() == Token::eString)' 110 elif tok == '%type%': 111 return '(tok->isName() && tok->varId() == 0U && (tok->str() != "delete" || !tok->isKeyword()))' 112 elif tok == '%name%': 113 return 'tok->isName()' 114 elif tok == '%var%': 115 return '(tok->varId() != 0)' 116 elif tok == '%varid%': 117 return '(tok->isName() && tok->varId() == varid)' 118 elif (len(tok) > 2) and (tok[0] == "%"): 119 print("unhandled:" + tok) 120 121 return ( 122 '(tok->str() == MatchCompiler::makeConstString("' + tok + '"))' 123 ) 124 125 def _compilePattern(self, pattern, nr, varid, 126 isFindMatch=False, tokenType="const Token"): 127 if isFindMatch: 128 ret = '\n ' + tokenType + ' * tok = start_tok;\n' 129 returnStatement = 'continue;\n' 130 else: 131 arg2 = '' 132 if varid: 133 arg2 = ', const int varid' 134 135 ret = '// pattern: ' + pattern + '\n' 136 ret += 'static bool match' + \ 137 str(nr) + '(' + tokenType + '* tok' + arg2 + ') {\n' 138 returnStatement = 'return false;\n' 139 140 tokens = pattern.split(' ') 141 gotoNextToken = '' 142 checked_varid = False 143 for tok in tokens: 144 if tok == '': 145 continue 146 ret += gotoNextToken 147 gotoNextToken = ' tok = tok->next();\n' 148 149 # if varid is provided, check that it's non-zero on first use 150 if varid and '%varid%' in tok and not checked_varid: 151 ret += ' if (varid==0U)\n' 152 ret += ' throw InternalError(tok, "Internal error. Token::Match called with varid 0. ' +\ 153 'Please report this to Cppcheck developers");\n' 154 checked_varid = True 155 156 # [abc] 157 if (len(tok) > 2) and (tok[0] == '[') and (tok[-1] == ']'): 158 ret += ' if (!tok || tok->str().size() != 1U || !strchr("' + tok[1:-1] + '", tok->str()[0]))\n' 159 ret += ' ' + returnStatement 160 161 # a|b|c 162 elif tok.find('|') > 0: 163 tokens2 = tok.split('|') 164 logicalOp = ' || ' 165 if "" in tokens2: 166 ret += ' if (tok && (' 167 else: 168 ret += ' if (!tok || !(' 169 first = True 170 for tok2 in tokens2: 171 if tok2 == '': 172 continue 173 if not first: 174 ret += logicalOp 175 first = False 176 ret += self._compileCmd(tok2) 177 178 ret += '))\n' 179 if "" in tokens2: 180 ret += ' tok = tok->next();\n' 181 gotoNextToken = '' 182 else: 183 ret += ' ' + returnStatement 184 185 # !!a 186 elif tok[0:2] == "!!": 187 ret += ' if (tok && tok->str() == MatchCompiler::makeConstString("' + tok[2:] + '"))\n' 188 ret += ' ' + returnStatement 189 gotoNextToken = ' tok = tok ? tok->next() : nullptr;\n' 190 191 else: 192 negatedTok = "!" + self._compileCmd(tok) 193 # fold !true => false ; !false => true 194 # this avoids cppcheck warnings about condition always being true/false 195 if negatedTok == "!false": 196 negatedTok = "true" 197 elif negatedTok == "!true": 198 negatedTok = "false" 199 ret += ' if (!tok || ' + negatedTok + ')\n' 200 ret += ' ' + returnStatement 201 202 if isFindMatch: 203 ret += ' return start_tok;\n' 204 else: 205 ret += ' return true;\n' 206 ret += '}\n' 207 208 return ret 209 210 def _compileFindPattern(self, pattern, findmatchnr, endToken, varId): 211 more_args = '' 212 endCondition = '' 213 if endToken: 214 more_args += ', const Token * end' 215 endCondition = ' && start_tok != end' 216 if varId: 217 more_args += ', int varid' 218 219 ret = '// pattern: ' + pattern + '\n' 220 ret += 'template<class T> static T * findmatch' + \ 221 str(findmatchnr) + '(T * start_tok' + more_args + ') {\n' 222 ret += ' for (; start_tok' + endCondition + \ 223 '; start_tok = start_tok->next()) {\n' 224 225 ret += self._compilePattern(pattern, -1, varId, True, 'T') 226 ret += ' }\n' 227 ret += ' return nullptr;\n}\n' 228 229 return ret 230 231 @staticmethod 232 def parseMatch(line, pos1): 233 parlevel = 0 234 args = [] 235 argstart = 0 236 pos = pos1 237 inString = False 238 while pos < len(line): 239 if inString: 240 if line[pos] == '\\': 241 pos += 1 242 elif line[pos] == '"': 243 inString = False 244 elif line[pos] == '"': 245 inString = True 246 elif line[pos] == '(': 247 parlevel += 1 248 if parlevel == 1: 249 argstart = pos + 1 250 elif line[pos] == ')': 251 parlevel -= 1 252 if parlevel == 0: 253 ret = [line[pos1:pos + 1]] 254 ret.extend(args) 255 ret.append(line[argstart:pos]) 256 return ret 257 elif line[pos] == ',' and parlevel == 1: 258 args.append(line[argstart:pos]) 259 argstart = pos + 1 260 pos += 1 261 262 return None 263 264 @staticmethod 265 def _isInString(line, pos1): 266 pos = 0 267 inString = False 268 while pos != pos1: 269 if line[pos] == '\\': 270 pos += 1 271 elif line[pos] == '"': 272 inString = not inString 273 pos += 1 274 return inString 275 276 @staticmethod 277 def _parseStringComparison(line, pos1): 278 startPos = 0 279 pos = pos1 280 inString = False 281 while pos < len(line): 282 if inString: 283 if line[pos] == '\\': 284 pos += 1 285 elif line[pos] == '"': 286 inString = False 287 endPos = pos + 1 288 return startPos, endPos 289 elif line[pos] == '"': 290 startPos = pos 291 inString = True 292 pos += 1 293 294 return None 295 296 @staticmethod 297 def _compileVerifyTokenMatch( 298 is_simplematch, verifyNumber, pattern, patternNumber, varId): 299 more_args = '' 300 if varId: 301 more_args = ', const int varid' 302 303 ret = 'static bool match_verify' + \ 304 str(verifyNumber) + '(const Token *tok' + more_args + ') {\n' 305 306 origMatchName = 'Match' 307 if is_simplematch: 308 origMatchName = 'simpleMatch' 309 assert(varId is None) 310 311 ret += ' bool res_compiled_match = match' + \ 312 str(patternNumber) + '(tok' 313 if varId: 314 ret += ', varid' 315 ret += ');\n' 316 317 ret += ' bool res_parsed_match = Token::' + \ 318 origMatchName + '(tok, "' + pattern + '"' 319 if varId: 320 ret += ', varid' 321 ret += ');\n' 322 323 ret += '\n' 324 # Don't use assert() here, it's disabled for optimized builds. 325 # We also need to verify builds in 'release' mode 326 ret += ' if (res_parsed_match != res_compiled_match) {\n' 327 # ret += ' std::cout << "res_parsed_match' + str(verifyNumber) +\ 328 # ': " << res_parsed_match << ", res_compiled_match: " << res_compiled_match << "\\n";\n' 329 # ret += ' if (tok)\n' 330 # ret += ' std::cout << "tok: " << tok->str();\n' 331 # ret += ' if (tok->next())\n' 332 # ret += ' std::cout << "tok next: " << tok->next()->str();\n' 333 ret += ' throw InternalError(tok, "Internal error. ' +\ 334 'Compiled match returned different result than parsed match: ' + pattern + '");\n' 335 ret += ' }\n' 336 ret += ' return res_compiled_match;\n' 337 ret += '}\n' 338 339 return ret 340 341 def _replaceSpecificTokenMatch( 342 self, is_simplematch, line, start_pos, end_pos, pattern, tok, varId): 343 more_args = '' 344 if varId: 345 more_args = ',' + varId 346 347 # Compile function or use previously compiled one 348 patternNumber = self._lookupMatchFunctionId( 349 pattern, None, varId, False) 350 351 if patternNumber is None: 352 patternNumber = len(self._rawMatchFunctions) + 1 353 self._insertMatchFunctionId( 354 patternNumber, 355 pattern, 356 None, 357 varId, 358 False) 359 self._rawMatchFunctions.append( 360 self._compilePattern(pattern, patternNumber, varId)) 361 362 functionName = "match" 363 if self._verifyMode: 364 verifyNumber = len(self._rawMatchFunctions) + 1 365 self._rawMatchFunctions.append( 366 self._compileVerifyTokenMatch( 367 is_simplematch, 368 verifyNumber, 369 pattern, 370 patternNumber, 371 varId)) 372 373 # inject verify function 374 functionName = "match_verify" 375 patternNumber = verifyNumber 376 377 return ( 378 line[:start_pos] + functionName + str( 379 patternNumber) + '(' + tok + more_args + ')' + line[start_pos + end_pos:] 380 ) 381 382 def _replaceTokenMatch(self, line, linenr, filename): 383 for func in ('Match', 'simpleMatch'): 384 is_simplematch = func == 'simpleMatch' 385 pattern_start = 0 386 while True: 387 pos1 = line.find('Token::' + func + '(', pattern_start) 388 if pos1 == -1: 389 break 390 391 res = self.parseMatch(line, pos1) 392 if res is None: 393 break 394 395 # assert that Token::Match has either 2 or 3 arguments 396 assert(len(res) == 3 or len(res) == 4) 397 398 end_pos = len(res[0]) 399 tok = res[1] 400 raw_pattern = res[2] 401 varId = None 402 if len(res) == 4: 403 varId = res[3] 404 405 pattern_start = pos1 + end_pos 406 res = re.match(r'\s*"((?:.|\\")*?)"\s*$', raw_pattern) 407 if res is None: 408 if self._showSkipped: 409 print(filename + ":" + str(linenr) + " skipping match pattern:" + raw_pattern) 410 continue # Non-const pattern - bailout 411 412 pattern = res.group(1) 413 orig_len = len(line) 414 line = self._replaceSpecificTokenMatch( 415 is_simplematch, 416 line, 417 pos1, 418 end_pos, 419 pattern, 420 tok, 421 varId) 422 pattern_start += len(line) - orig_len 423 424 return line 425 426 @staticmethod 427 def _compileVerifyTokenFindMatch( 428 is_findsimplematch, verifyNumber, pattern, patternNumber, endToken, varId): 429 more_args = '' 430 if endToken: 431 more_args += ', const Token * endToken' 432 if varId: 433 more_args += ', const int varid' 434 435 ret = 'template < class T > static T * findmatch_verify' + \ 436 str(verifyNumber) + '(T * tok' + more_args + ') {\n' 437 438 origFindMatchName = 'findmatch' 439 if is_findsimplematch: 440 origFindMatchName = 'findsimplematch' 441 assert(varId is None) 442 443 ret += ' T * res_compiled_findmatch = findmatch' + \ 444 str(patternNumber) + '(tok' 445 if endToken: 446 ret += ', endToken' 447 if varId: 448 ret += ', varid' 449 ret += ');\n' 450 451 ret += ' T * res_parsed_findmatch = Token::' + \ 452 origFindMatchName + '(tok, "' + pattern + '"' 453 if endToken: 454 ret += ', endToken' 455 if varId: 456 ret += ', varid' 457 ret += ');\n' 458 459 ret += '\n' 460 # Don't use assert() here, it's disabled for optimized builds. 461 # We also need to verify builds in 'release' mode 462 ret += ' if (res_parsed_findmatch != res_compiled_findmatch) {\n' 463 ret += ' throw InternalError(tok, "Internal error. ' +\ 464 'Compiled findmatch returned different result than parsed findmatch: ' + pattern + '");\n' 465 ret += ' }\n' 466 ret += ' return res_compiled_findmatch;\n' 467 ret += '}\n' 468 469 return ret 470 471 def _replaceSpecificFindTokenMatch( 472 self, is_findsimplematch, line, start_pos, end_pos, pattern, tok, endToken, varId): 473 more_args = '' 474 if endToken: 475 more_args += ',' + endToken 476 if varId: 477 more_args += ',' + varId 478 479 # Compile function or use previously compiled one 480 findMatchNumber = self._lookupMatchFunctionId( 481 pattern, endToken, varId, True) 482 483 if findMatchNumber is None: 484 findMatchNumber = len(self._rawMatchFunctions) + 1 485 self._insertMatchFunctionId( 486 findMatchNumber, 487 pattern, 488 endToken, 489 varId, 490 True) 491 self._rawMatchFunctions.append( 492 self._compileFindPattern( 493 pattern, 494 findMatchNumber, 495 endToken, 496 varId)) 497 498 functionName = "findmatch" 499 if self._verifyMode: 500 verifyNumber = len(self._rawMatchFunctions) + 1 501 self._rawMatchFunctions.append( 502 self._compileVerifyTokenFindMatch( 503 is_findsimplematch, 504 verifyNumber, 505 pattern, 506 findMatchNumber, 507 endToken, 508 varId)) 509 510 # inject verify function 511 functionName = "findmatch_verify" 512 findMatchNumber = verifyNumber 513 514 return ( 515 line[:start_pos] + functionName + str( 516 findMatchNumber) + '(' + tok + more_args + ') ' + line[start_pos + end_pos:] 517 ) 518 519 def _replaceTokenFindMatch(self, line, linenr, filename): 520 while True: 521 is_findsimplematch = True 522 pos1 = line.find('Token::findsimplematch(') 523 if pos1 == -1: 524 is_findsimplematch = False 525 pos1 = line.find('Token::findmatch(') 526 if pos1 == -1: 527 break 528 529 res = self.parseMatch(line, pos1) 530 if res is None: 531 break 532 533 # assert that Token::find(simple)match has either 2, 3 or 4 arguments 534 assert(len(res) >= 3 or len(res) < 6) 535 536 g0 = res[0] 537 tok = res[1] 538 pattern = res[2] 539 540 # Check for varId 541 varId = None 542 if not is_findsimplematch and "%varid%" in g0: 543 if len(res) == 5: 544 varId = res[4] 545 else: 546 varId = res[3] 547 548 # endToken support. We resolve the overloaded type by checking if varId is used or not. 549 # Function prototypes: 550 # Token *findsimplematch(const Token *tok, const char pattern[]); 551 # Token *findsimplematch(const Token *tok, const char pattern[], const Token *end); 552 # Token *findmatch(const Token *tok, const char pattern[], int varId = 0); 553 # Token *findmatch(const Token *tok, const char pattern[], const 554 # Token *end, int varId = 0); 555 endToken = None 556 if ((is_findsimplematch and len(res) == 4) or 557 (not is_findsimplematch and varId and (len(res) == 5)) or 558 (not is_findsimplematch and varId is None and len(res) == 4)): 559 endToken = res[3] 560 561 res = re.match(r'\s*"((?:.|\\")*?)"\s*$', pattern) 562 if res is None: 563 if self._showSkipped: 564 print(filename + ":" + str(linenr) + " skipping findmatch pattern:" + pattern) 565 break # Non-const pattern - bailout 566 567 pattern = res.group(1) 568 line = self._replaceSpecificFindTokenMatch( 569 is_findsimplematch, 570 line, 571 pos1, 572 len(g0), 573 pattern, 574 tok, 575 endToken, 576 varId) 577 578 return line 579 580 def _replaceCStrings(self, line): 581 while True: 582 match = re.search('(==|!=) *"', line) 583 if not match: 584 break 585 586 if self._isInString(line, match.start()): 587 break 588 589 res = self._parseStringComparison(line, match.start()) 590 if res is None: 591 break 592 593 startPos = res[0] 594 endPos = res[1] 595 text = line[startPos + 1:endPos - 1] 596 line = line[:startPos] + 'MatchCompiler::makeConstStringBegin' +\ 597 text + 'MatchCompiler::makeConstStringEnd' + line[endPos:] 598 line = line.replace('MatchCompiler::makeConstStringBegin', 'MatchCompiler::makeConstString("') 599 line = line.replace('MatchCompiler::makeConstStringEnd', '")') 600 return line 601 602 def convertFile(self, srcname, destname, line_directive): 603 self._reset() 604 605 fin = io.open(srcname, "rt", encoding="utf-8") 606 srclines = fin.readlines() 607 fin.close() 608 609 code = u'' 610 611 modified = False 612 613 linenr = 0 614 for line in srclines: 615 if not modified: 616 line_orig = line 617 618 linenr += 1 619 # Compile Token::Match and Token::simpleMatch 620 line = self._replaceTokenMatch(line, linenr, srcname) 621 622 # Compile Token::findsimplematch 623 line = self._replaceTokenFindMatch(line, linenr, srcname) 624 625 # Cache plain C-strings in C++ strings 626 line = self._replaceCStrings(line) 627 628 if not modified and not line_orig == line: 629 modified = True 630 631 code += line 632 633 # Compute matchFunctions 634 strFunctions = u'' 635 for function in self._rawMatchFunctions: 636 strFunctions += function 637 638 lineno = u'' 639 if line_directive: 640 lineno = u'#line 1 "' + srcname + '"\n' 641 642 header = u'#include "matchcompiler.h"\n' 643 header += u'#include <string>\n' 644 header += u'#include <cstring>\n' 645 if len(self._rawMatchFunctions): 646 header += u'#include "errorlogger.h"\n' 647 header += u'#include "token.h"\n' 648 649 fout = io.open(destname, 'wt', encoding="utf-8") 650 if modified or len(self._rawMatchFunctions): 651 fout.write(header) 652 fout.write(strFunctions) 653 fout.write(lineno) 654 fout.write(code) 655 fout.close() 656 657 658def main(): 659 # Main program 660 661 # Argument handling 662 parser = argparse.ArgumentParser( 663 description='Compile Token::Match() calls into native C++ code') 664 parser.add_argument('--verify', action='store_true', default=False, 665 help='verify compiled matches against on-the-fly parser. Slow!') 666 parser.add_argument('--show-skipped', action='store_true', default=False, 667 help='show skipped (non-static) patterns') 668 parser.add_argument('--read-dir', default="lib", 669 help='directory from which files are read') 670 parser.add_argument('--write-dir', default="build", 671 help='directory into which files are written') 672 parser.add_argument('--prefix', default="", 673 help='prefix for build files') 674 parser.add_argument('--line', action='store_true', default=False, 675 help='add line directive to input files into build files') 676 parser.add_argument('file', nargs='*', 677 help='file to compile') 678 args = parser.parse_args() 679 lib_dir = args.read_dir 680 build_dir = args.write_dir 681 line_directive = args.line 682 files = args.file 683 684 # Check if we are invoked from the right place 685 if not os.path.exists(lib_dir): 686 print('Directory "' + lib_dir + '"not found.') 687 sys.exit(-1) 688 689 # Create build directory if needed 690 try: 691 os.makedirs(build_dir) 692 except OSError as e: 693 # due to race condition in case of parallel build, 694 # makedirs may fail. Ignore that; if there's actual 695 # problem with directory creation, it'll be caught 696 # by the following isdir check 697 if e.errno != errno.EEXIST: 698 raise 699 700 if not os.path.isdir(build_dir): 701 raise Exception(build_dir + ' is not a directory') 702 703 mc = MatchCompiler(verify_mode=args.verify, 704 show_skipped=args.show_skipped) 705 706 if not files: 707 # select all *.cpp files in lib_dir 708 for f in glob.glob(lib_dir + '/*.cpp'): 709 files.append(f[len(lib_dir) + 1:]) 710 711 # convert files 712 for fi in files: 713 pi = lib_dir + '/' + fi 714 fo = args.prefix + fi 715 po = build_dir + '/' + fo 716 print(pi + ' => ' + po) 717 mc.convertFile(pi, po, line_directive) 718 719if __name__ == '__main__': 720 main() 721