1#!/usr/bin/env python
2#
3# Cppcheck - A tool for static C/C++ code analysis
4# Copyright (C) 2007-2019 Cppcheck team.
5#
6# This program is free software: you can redistribute it and/or modify
7# it under the terms of the GNU General Public License as published by
8# the Free Software Foundation, either version 3 of the License, or
9# (at your option) any later version.
10#
11# This program is distributed in the hope that it will be useful,
12# but WITHOUT ANY WARRANTY; without even the implied warranty of
13# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14# GNU General Public License for more details.
15#
16# You should have received a copy of the GNU General Public License
17# along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19import io
20import os
21import sys
22import re
23import glob
24import argparse
25import errno
26
27
28class MatchCompiler:
29
30    def __init__(self, verify_mode=False, show_skipped=False):
31        self._verifyMode = verify_mode
32        self._showSkipped = show_skipped
33        self._reset()
34
35    def _reset(self):
36        self._rawMatchFunctions = []
37        self._matchFunctionCache = {}
38
39    @staticmethod
40    def _generateCacheSignature(
41            pattern, endToken=None, varId=None, isFindMatch=False):
42        sig = pattern
43
44        if endToken:
45            sig += '|ENDTOKEN'
46        else:
47            sig += '|NO-ENDTOKEN'
48
49        if varId:
50            sig += '|VARID'
51        else:
52            sig += '|NO-VARID'
53
54        if isFindMatch:
55            sig += '|ISFINDMATCH'
56        else:
57            sig += '|NORMALMATCH'
58
59        return sig
60
61    def _lookupMatchFunctionId(
62            self, pattern, endToken=None, varId=None, isFindMatch=False):
63        signature = self._generateCacheSignature(
64            pattern, endToken, varId, isFindMatch)
65
66        if signature in self._matchFunctionCache:
67            return self._matchFunctionCache[signature]
68
69        return None
70
71    def _insertMatchFunctionId(
72            self, id, pattern, endToken=None, varId=None, isFindMatch=False):
73        signature = self._generateCacheSignature(
74            pattern, endToken, varId, isFindMatch)
75
76        # function signature should not be in the cache
77        assert(
78            self._lookupMatchFunctionId(
79                pattern,
80                endToken,
81                varId,
82                isFindMatch) is None)
83
84        self._matchFunctionCache[signature] = id
85
86    @staticmethod
87    def _compileCmd(tok):
88        if tok == '%any%':
89            return 'true'
90        elif tok == '%assign%':
91            return 'tok->isAssignmentOp()'
92        elif tok == '%bool%':
93            return 'tok->isBoolean()'
94        elif tok == '%char%':
95            return '(tok->tokType() == Token::eChar)'
96        elif tok == '%comp%':
97            return 'tok->isComparisonOp()'
98        elif tok == '%num%':
99            return 'tok->isNumber()'
100        elif tok == '%cop%':
101            return 'tok->isConstOp()'
102        elif tok == '%op%':
103            return 'tok->isOp()'
104        elif tok == '%or%':
105            return '(tok->tokType() == Token::eBitOp && tok->str() == MatchCompiler::makeConstString("|") )'
106        elif tok == '%oror%':
107            return '(tok->tokType() == Token::eLogicalOp && tok->str() == MatchCompiler::makeConstString("||"))'
108        elif tok == '%str%':
109            return '(tok->tokType() == Token::eString)'
110        elif tok == '%type%':
111            return '(tok->isName() && tok->varId() == 0U && (tok->str() != "delete" || !tok->isKeyword()))'
112        elif tok == '%name%':
113            return 'tok->isName()'
114        elif tok == '%var%':
115            return '(tok->varId() != 0)'
116        elif tok == '%varid%':
117            return '(tok->isName() && tok->varId() == varid)'
118        elif (len(tok) > 2) and (tok[0] == "%"):
119            print("unhandled:" + tok)
120
121        return (
122            '(tok->str() == MatchCompiler::makeConstString("' + tok + '"))'
123        )
124
125    def _compilePattern(self, pattern, nr, varid,
126                        isFindMatch=False, tokenType="const Token"):
127        if isFindMatch:
128            ret = '\n    ' + tokenType + ' * tok = start_tok;\n'
129            returnStatement = 'continue;\n'
130        else:
131            arg2 = ''
132            if varid:
133                arg2 = ', const int varid'
134
135            ret = '// pattern: ' + pattern + '\n'
136            ret += 'static bool match' + \
137                str(nr) + '(' + tokenType + '* tok' + arg2 + ') {\n'
138            returnStatement = 'return false;\n'
139
140        tokens = pattern.split(' ')
141        gotoNextToken = ''
142        checked_varid = False
143        for tok in tokens:
144            if tok == '':
145                continue
146            ret += gotoNextToken
147            gotoNextToken = '    tok = tok->next();\n'
148
149            # if varid is provided, check that it's non-zero on first use
150            if varid and '%varid%' in tok and not checked_varid:
151                ret += '    if (varid==0U)\n'
152                ret += '        throw InternalError(tok, "Internal error. Token::Match called with varid 0. ' +\
153                    'Please report this to Cppcheck developers");\n'
154                checked_varid = True
155
156            # [abc]
157            if (len(tok) > 2) and (tok[0] == '[') and (tok[-1] == ']'):
158                ret += '    if (!tok || tok->str().size() != 1U || !strchr("' + tok[1:-1] + '", tok->str()[0]))\n'
159                ret += '        ' + returnStatement
160
161            # a|b|c
162            elif tok.find('|') > 0:
163                tokens2 = tok.split('|')
164                logicalOp = ' || '
165                if "" in tokens2:
166                    ret += '    if (tok && ('
167                else:
168                    ret += '    if (!tok || !('
169                first = True
170                for tok2 in tokens2:
171                    if tok2 == '':
172                        continue
173                    if not first:
174                        ret += logicalOp
175                    first = False
176                    ret += self._compileCmd(tok2)
177
178                ret += '))\n'
179                if "" in tokens2:
180                    ret += '        tok = tok->next();\n'
181                    gotoNextToken = ''
182                else:
183                    ret += '        ' + returnStatement
184
185            # !!a
186            elif tok[0:2] == "!!":
187                ret += '    if (tok && tok->str() == MatchCompiler::makeConstString("' + tok[2:] + '"))\n'
188                ret += '        ' + returnStatement
189                gotoNextToken = '    tok = tok ? tok->next() : nullptr;\n'
190
191            else:
192                negatedTok = "!" + self._compileCmd(tok)
193                # fold !true => false ; !false => true
194                # this avoids cppcheck warnings about condition always being true/false
195                if negatedTok == "!false":
196                    negatedTok = "true"
197                elif negatedTok == "!true":
198                    negatedTok = "false"
199                ret += '    if (!tok || ' + negatedTok + ')\n'
200                ret += '        ' + returnStatement
201
202        if isFindMatch:
203            ret += '    return start_tok;\n'
204        else:
205            ret += '    return true;\n'
206            ret += '}\n'
207
208        return ret
209
210    def _compileFindPattern(self, pattern, findmatchnr, endToken, varId):
211        more_args = ''
212        endCondition = ''
213        if endToken:
214            more_args += ', const Token * end'
215            endCondition = ' && start_tok != end'
216        if varId:
217            more_args += ', int varid'
218
219        ret = '// pattern: ' + pattern + '\n'
220        ret += 'template<class T> static T * findmatch' + \
221            str(findmatchnr) + '(T * start_tok' + more_args + ') {\n'
222        ret += '    for (; start_tok' + endCondition + \
223            '; start_tok = start_tok->next()) {\n'
224
225        ret += self._compilePattern(pattern, -1, varId, True, 'T')
226        ret += '    }\n'
227        ret += '    return nullptr;\n}\n'
228
229        return ret
230
231    @staticmethod
232    def parseMatch(line, pos1):
233        parlevel = 0
234        args = []
235        argstart = 0
236        pos = pos1
237        inString = False
238        while pos < len(line):
239            if inString:
240                if line[pos] == '\\':
241                    pos += 1
242                elif line[pos] == '"':
243                    inString = False
244            elif line[pos] == '"':
245                inString = True
246            elif line[pos] == '(':
247                parlevel += 1
248                if parlevel == 1:
249                    argstart = pos + 1
250            elif line[pos] == ')':
251                parlevel -= 1
252                if parlevel == 0:
253                    ret = [line[pos1:pos + 1]]
254                    ret.extend(args)
255                    ret.append(line[argstart:pos])
256                    return ret
257            elif line[pos] == ',' and parlevel == 1:
258                args.append(line[argstart:pos])
259                argstart = pos + 1
260            pos += 1
261
262        return None
263
264    @staticmethod
265    def _isInString(line, pos1):
266        pos = 0
267        inString = False
268        while pos != pos1:
269            if line[pos] == '\\':
270                pos += 1
271            elif line[pos] == '"':
272                inString = not inString
273            pos += 1
274        return inString
275
276    @staticmethod
277    def _parseStringComparison(line, pos1):
278        startPos = 0
279        pos = pos1
280        inString = False
281        while pos < len(line):
282            if inString:
283                if line[pos] == '\\':
284                    pos += 1
285                elif line[pos] == '"':
286                    inString = False
287                    endPos = pos + 1
288                    return startPos, endPos
289            elif line[pos] == '"':
290                startPos = pos
291                inString = True
292            pos += 1
293
294        return None
295
296    @staticmethod
297    def _compileVerifyTokenMatch(
298            is_simplematch, verifyNumber, pattern, patternNumber, varId):
299        more_args = ''
300        if varId:
301            more_args = ', const int varid'
302
303        ret = 'static bool match_verify' + \
304            str(verifyNumber) + '(const Token *tok' + more_args + ') {\n'
305
306        origMatchName = 'Match'
307        if is_simplematch:
308            origMatchName = 'simpleMatch'
309            assert(varId is None)
310
311        ret += '    bool res_compiled_match = match' + \
312            str(patternNumber) + '(tok'
313        if varId:
314            ret += ', varid'
315        ret += ');\n'
316
317        ret += '    bool res_parsed_match = Token::' + \
318            origMatchName + '(tok, "' + pattern + '"'
319        if varId:
320            ret += ', varid'
321        ret += ');\n'
322
323        ret += '\n'
324        # Don't use assert() here, it's disabled for optimized builds.
325        # We also need to verify builds in 'release' mode
326        ret += '    if (res_parsed_match != res_compiled_match) {\n'
327        # ret += '        std::cout << "res_parsed_match' + str(verifyNumber) +\
328        #     ': " << res_parsed_match << ", res_compiled_match: " << res_compiled_match << "\\n";\n'
329        # ret += '        if (tok)\n'
330        # ret += '            std::cout << "tok: " << tok->str();\n'
331        # ret += '        if (tok->next())\n'
332        # ret += '            std::cout << "tok next: " << tok->next()->str();\n'
333        ret += '        throw InternalError(tok, "Internal error. ' +\
334            'Compiled match returned different result than parsed match: ' + pattern + '");\n'
335        ret += '    }\n'
336        ret += '    return res_compiled_match;\n'
337        ret += '}\n'
338
339        return ret
340
341    def _replaceSpecificTokenMatch(
342            self, is_simplematch, line, start_pos, end_pos, pattern, tok, varId):
343        more_args = ''
344        if varId:
345            more_args = ',' + varId
346
347        # Compile function or use previously compiled one
348        patternNumber = self._lookupMatchFunctionId(
349            pattern, None, varId, False)
350
351        if patternNumber is None:
352            patternNumber = len(self._rawMatchFunctions) + 1
353            self._insertMatchFunctionId(
354                patternNumber,
355                pattern,
356                None,
357                varId,
358                False)
359            self._rawMatchFunctions.append(
360                self._compilePattern(pattern, patternNumber, varId))
361
362        functionName = "match"
363        if self._verifyMode:
364            verifyNumber = len(self._rawMatchFunctions) + 1
365            self._rawMatchFunctions.append(
366                self._compileVerifyTokenMatch(
367                    is_simplematch,
368                    verifyNumber,
369                    pattern,
370                    patternNumber,
371                    varId))
372
373            # inject verify function
374            functionName = "match_verify"
375            patternNumber = verifyNumber
376
377        return (
378            line[:start_pos] + functionName + str(
379                patternNumber) + '(' + tok + more_args + ')' + line[start_pos + end_pos:]
380        )
381
382    def _replaceTokenMatch(self, line, linenr, filename):
383        for func in ('Match', 'simpleMatch'):
384            is_simplematch = func == 'simpleMatch'
385            pattern_start = 0
386            while True:
387                pos1 = line.find('Token::' + func + '(', pattern_start)
388                if pos1 == -1:
389                    break
390
391                res = self.parseMatch(line, pos1)
392                if res is None:
393                    break
394
395                # assert that Token::Match has either 2 or 3 arguments
396                assert(len(res) == 3 or len(res) == 4)
397
398                end_pos = len(res[0])
399                tok = res[1]
400                raw_pattern = res[2]
401                varId = None
402                if len(res) == 4:
403                    varId = res[3]
404
405                pattern_start = pos1 + end_pos
406                res = re.match(r'\s*"((?:.|\\")*?)"\s*$', raw_pattern)
407                if res is None:
408                    if self._showSkipped:
409                        print(filename + ":" + str(linenr) + " skipping match pattern:" + raw_pattern)
410                    continue # Non-const pattern - bailout
411
412                pattern = res.group(1)
413                orig_len = len(line)
414                line = self._replaceSpecificTokenMatch(
415                    is_simplematch,
416                    line,
417                    pos1,
418                    end_pos,
419                    pattern,
420                    tok,
421                    varId)
422                pattern_start += len(line) - orig_len
423
424        return line
425
426    @staticmethod
427    def _compileVerifyTokenFindMatch(
428            is_findsimplematch, verifyNumber, pattern, patternNumber, endToken, varId):
429        more_args = ''
430        if endToken:
431            more_args += ', const Token * endToken'
432        if varId:
433            more_args += ', const int varid'
434
435        ret = 'template < class T > static T * findmatch_verify' + \
436            str(verifyNumber) + '(T * tok' + more_args + ') {\n'
437
438        origFindMatchName = 'findmatch'
439        if is_findsimplematch:
440            origFindMatchName = 'findsimplematch'
441            assert(varId is None)
442
443        ret += '    T * res_compiled_findmatch = findmatch' + \
444            str(patternNumber) + '(tok'
445        if endToken:
446            ret += ', endToken'
447        if varId:
448            ret += ', varid'
449        ret += ');\n'
450
451        ret += '    T * res_parsed_findmatch = Token::' + \
452            origFindMatchName + '(tok, "' + pattern + '"'
453        if endToken:
454            ret += ', endToken'
455        if varId:
456            ret += ', varid'
457        ret += ');\n'
458
459        ret += '\n'
460        # Don't use assert() here, it's disabled for optimized builds.
461        # We also need to verify builds in 'release' mode
462        ret += '    if (res_parsed_findmatch != res_compiled_findmatch) {\n'
463        ret += '        throw InternalError(tok, "Internal error. ' +\
464            'Compiled findmatch returned different result than parsed findmatch: ' + pattern + '");\n'
465        ret += '    }\n'
466        ret += '    return res_compiled_findmatch;\n'
467        ret += '}\n'
468
469        return ret
470
471    def _replaceSpecificFindTokenMatch(
472            self, is_findsimplematch, line, start_pos, end_pos, pattern, tok, endToken, varId):
473        more_args = ''
474        if endToken:
475            more_args += ',' + endToken
476        if varId:
477            more_args += ',' + varId
478
479        # Compile function or use previously compiled one
480        findMatchNumber = self._lookupMatchFunctionId(
481            pattern, endToken, varId, True)
482
483        if findMatchNumber is None:
484            findMatchNumber = len(self._rawMatchFunctions) + 1
485            self._insertMatchFunctionId(
486                findMatchNumber,
487                pattern,
488                endToken,
489                varId,
490                True)
491            self._rawMatchFunctions.append(
492                self._compileFindPattern(
493                    pattern,
494                    findMatchNumber,
495                    endToken,
496                    varId))
497
498        functionName = "findmatch"
499        if self._verifyMode:
500            verifyNumber = len(self._rawMatchFunctions) + 1
501            self._rawMatchFunctions.append(
502                self._compileVerifyTokenFindMatch(
503                    is_findsimplematch,
504                    verifyNumber,
505                    pattern,
506                    findMatchNumber,
507                    endToken,
508                    varId))
509
510            # inject verify function
511            functionName = "findmatch_verify"
512            findMatchNumber = verifyNumber
513
514        return (
515            line[:start_pos] + functionName + str(
516                findMatchNumber) + '(' + tok + more_args + ') ' + line[start_pos + end_pos:]
517        )
518
519    def _replaceTokenFindMatch(self, line, linenr, filename):
520        while True:
521            is_findsimplematch = True
522            pos1 = line.find('Token::findsimplematch(')
523            if pos1 == -1:
524                is_findsimplematch = False
525                pos1 = line.find('Token::findmatch(')
526            if pos1 == -1:
527                break
528
529            res = self.parseMatch(line, pos1)
530            if res is None:
531                break
532
533            # assert that Token::find(simple)match has either 2, 3 or 4 arguments
534            assert(len(res) >= 3 or len(res) < 6)
535
536            g0 = res[0]
537            tok = res[1]
538            pattern = res[2]
539
540            # Check for varId
541            varId = None
542            if not is_findsimplematch and "%varid%" in g0:
543                if len(res) == 5:
544                    varId = res[4]
545                else:
546                    varId = res[3]
547
548            # endToken support. We resolve the overloaded type by checking if varId is used or not.
549            # Function prototypes:
550            #     Token *findsimplematch(const Token *tok, const char pattern[]);
551            #     Token *findsimplematch(const Token *tok, const char pattern[], const Token *end);
552            #     Token *findmatch(const Token *tok, const char pattern[], int varId = 0);
553            # Token *findmatch(const Token *tok, const char pattern[], const
554            # Token *end, int varId = 0);
555            endToken = None
556            if ((is_findsimplematch and len(res) == 4) or
557               (not is_findsimplematch and varId and (len(res) == 5)) or
558               (not is_findsimplematch and varId is None and len(res) == 4)):
559                endToken = res[3]
560
561            res = re.match(r'\s*"((?:.|\\")*?)"\s*$', pattern)
562            if res is None:
563                if self._showSkipped:
564                    print(filename + ":" + str(linenr) + " skipping findmatch pattern:" + pattern)
565                break  # Non-const pattern - bailout
566
567            pattern = res.group(1)
568            line = self._replaceSpecificFindTokenMatch(
569                is_findsimplematch,
570                line,
571                pos1,
572                len(g0),
573                pattern,
574                tok,
575                endToken,
576                varId)
577
578        return line
579
580    def _replaceCStrings(self, line):
581        while True:
582            match = re.search('(==|!=) *"', line)
583            if not match:
584                break
585
586            if self._isInString(line, match.start()):
587                break
588
589            res = self._parseStringComparison(line, match.start())
590            if res is None:
591                break
592
593            startPos = res[0]
594            endPos = res[1]
595            text = line[startPos + 1:endPos - 1]
596            line = line[:startPos] + 'MatchCompiler::makeConstStringBegin' +\
597                text + 'MatchCompiler::makeConstStringEnd' + line[endPos:]
598        line = line.replace('MatchCompiler::makeConstStringBegin', 'MatchCompiler::makeConstString("')
599        line = line.replace('MatchCompiler::makeConstStringEnd', '")')
600        return line
601
602    def convertFile(self, srcname, destname, line_directive):
603        self._reset()
604
605        fin = io.open(srcname, "rt", encoding="utf-8")
606        srclines = fin.readlines()
607        fin.close()
608
609        code = u''
610
611        modified = False
612
613        linenr = 0
614        for line in srclines:
615            if not modified:
616                line_orig = line
617
618            linenr += 1
619            # Compile Token::Match and Token::simpleMatch
620            line = self._replaceTokenMatch(line, linenr, srcname)
621
622            # Compile Token::findsimplematch
623            line = self._replaceTokenFindMatch(line, linenr, srcname)
624
625            # Cache plain C-strings in C++ strings
626            line = self._replaceCStrings(line)
627
628            if not modified and not line_orig == line:
629                modified = True
630
631            code += line
632
633        # Compute matchFunctions
634        strFunctions = u''
635        for function in self._rawMatchFunctions:
636            strFunctions += function
637
638        lineno = u''
639        if line_directive:
640            lineno = u'#line 1 "' + srcname + '"\n'
641
642        header = u'#include "matchcompiler.h"\n'
643        header += u'#include <string>\n'
644        header += u'#include <cstring>\n'
645        if len(self._rawMatchFunctions):
646            header += u'#include "errorlogger.h"\n'
647            header += u'#include "token.h"\n'
648
649        fout = io.open(destname, 'wt', encoding="utf-8")
650        if modified or len(self._rawMatchFunctions):
651            fout.write(header)
652            fout.write(strFunctions)
653        fout.write(lineno)
654        fout.write(code)
655        fout.close()
656
657
658def main():
659    # Main program
660
661    # Argument handling
662    parser = argparse.ArgumentParser(
663        description='Compile Token::Match() calls into native C++ code')
664    parser.add_argument('--verify', action='store_true', default=False,
665                        help='verify compiled matches against on-the-fly parser. Slow!')
666    parser.add_argument('--show-skipped', action='store_true', default=False,
667                        help='show skipped (non-static) patterns')
668    parser.add_argument('--read-dir', default="lib",
669                        help='directory from which files are read')
670    parser.add_argument('--write-dir', default="build",
671                        help='directory into which files are written')
672    parser.add_argument('--prefix', default="",
673                        help='prefix for build files')
674    parser.add_argument('--line', action='store_true', default=False,
675                        help='add line directive to input files into build files')
676    parser.add_argument('file', nargs='*',
677                        help='file to compile')
678    args = parser.parse_args()
679    lib_dir = args.read_dir
680    build_dir = args.write_dir
681    line_directive = args.line
682    files = args.file
683
684    # Check if we are invoked from the right place
685    if not os.path.exists(lib_dir):
686        print('Directory "' + lib_dir + '"not found.')
687        sys.exit(-1)
688
689    # Create build directory if needed
690    try:
691        os.makedirs(build_dir)
692    except OSError as e:
693        # due to race condition in case of parallel build,
694        # makedirs may fail. Ignore that; if there's actual
695        # problem with directory creation, it'll be caught
696        # by the following isdir check
697        if e.errno != errno.EEXIST:
698            raise
699
700    if not os.path.isdir(build_dir):
701        raise Exception(build_dir + ' is not a directory')
702
703    mc = MatchCompiler(verify_mode=args.verify,
704                       show_skipped=args.show_skipped)
705
706    if not files:
707        # select all *.cpp files in lib_dir
708        for f in glob.glob(lib_dir + '/*.cpp'):
709            files.append(f[len(lib_dir) + 1:])
710
711    # convert files
712    for fi in files:
713        pi = lib_dir + '/' + fi
714        fo = args.prefix + fi
715        po = build_dir + '/' + fo
716        print(pi + ' => ' + po)
717        mc.convertFile(pi, po, line_directive)
718
719if __name__ == '__main__':
720    main()
721