1#!/usr/bin/env python
2
3"""Produces a report of all assertions in the MongoDB server codebase.
4
5Parses .cpp files for assertions and verifies assertion codes are distinct.
6Optionally replaces zero codes in source code with new distinct values.
7"""
8
9import bisect
10import os
11import sys
12from . import utils
13from collections import defaultdict, namedtuple
14from optparse import OptionParser
15
16try:
17    import regex as re
18except ImportError:
19    print("*** Run 'pip2 install --user regex' to speed up error code checking")
20    import re
21
22ASSERT_NAMES = [ "uassert" , "massert", "fassert", "fassertFailed" ]
23MINIMUM_CODE = 10000
24
25codes = []
26
27# Each AssertLocation identifies the C++ source location of an assertion
28AssertLocation = namedtuple( "AssertLocation", ['sourceFile', 'byteOffset', 'lines', 'code'] )
29
30list_files = False
31
32# Of historical interest only
33def assignErrorCodes():
34    cur = MINIMUM_CODE
35    for root in ASSERT_NAMES:
36        for x in utils.getAllSourceFiles():
37            print( x )
38            didAnything = False
39            fixed = ""
40            for line in open( x ):
41                s = line.partition( root + "(" )
42                if s[1] == "" or line.startswith( "#define " + root):
43                    fixed += line
44                    continue
45                fixed += s[0] + root + "( " + str( cur ) + " , " + s[2]
46                cur = cur + 1
47                didAnything = True
48            if didAnything:
49                out = open( x , 'w' )
50                out.write( fixed )
51                out.close()
52
53
54def parseSourceFiles( callback ):
55    """Walks MongoDB sourcefiles and invokes callback for each AssertLocation found."""
56
57    quick = ["assert", "Exception", "ErrorCodes::Error"]
58
59    patterns = [
60        re.compile( r"(?:u|m(?:sg)?)asser(?:t|ted)(?:NoTrace)?\s*\(\s*(\d+)", re.MULTILINE ) ,
61        re.compile( r"(?:DB|Assertion)Exception\s*[({]\s*(\d+)", re.MULTILINE ),
62        re.compile( r"fassert(?:Failed)?(?:WithStatus)?(?:NoTrace)?(?:StatusOK)?\s*\(\s*(\d+)",
63                    re.MULTILINE ),
64        re.compile( r"ErrorCodes::Error\s*[({]\s*(\d+)", re.MULTILINE )
65    ]
66
67    for sourceFile in utils.getAllSourceFiles(prefix='src/mongo/'):
68        if list_files:
69            print ('scanning file: ' + sourceFile)
70
71        with open(sourceFile) as f:
72            text = f.read()
73
74            if not any([zz in text for zz in quick]):
75                continue
76
77            matchiters = [p.finditer(text) for p in patterns]
78            for matchiter in matchiters:
79                for match in matchiter:
80                    code = match.group(1)
81                    codeOffset = match.start(1)
82
83                    # Note that this will include the text of the full match but will report the
84                    # position of the beginning of the code portion rather than the beginning of the
85                    # match. This is to position editors on the spot that needs to change.
86                    thisLoc = AssertLocation(sourceFile,
87                                             codeOffset,
88                                             text[match.start():match.end()],
89                                             code)
90
91                    callback( thisLoc )
92
93# Converts an absolute position in a file into a line number.
94def getLineAndColumnForPosition(loc, _file_cache={}):
95    if loc.sourceFile not in _file_cache:
96        with open(loc.sourceFile) as f:
97            text = f.read()
98            line_offsets = [0]
99            for line in text.splitlines(True):
100                line_offsets.append(line_offsets[-1] + len(line))
101            _file_cache[loc.sourceFile] = line_offsets
102
103    # These are both 1-based, but line is handled by starting the list with 0.
104    line = bisect.bisect(_file_cache[loc.sourceFile], loc.byteOffset)
105    column = loc.byteOffset - _file_cache[loc.sourceFile][line - 1] + 1
106    return (line, column)
107
108def isTerminated( lines ):
109    """Given .cpp/.h source lines as text, determine if assert is terminated."""
110    x = " ".join(lines)
111    return ';' in x \
112        or x.count('(') - x.count(')') <= 0
113
114
115def getNextCode():
116    """Finds next unused assertion code.
117
118    Called by: SConstruct and main()
119    Since SConstruct calls us, codes[] must be global OR WE REPARSE EVERYTHING
120    """
121    if not len(codes) > 0:
122        readErrorCodes()
123
124    highest = reduce( lambda x, y: max(int(x), int(y)),
125                      (loc.code for loc in codes) )
126    return highest + 1
127
128
129def checkErrorCodes():
130    """SConstruct expects a boolean response from this function.
131    """
132    (codes, errors) = readErrorCodes()
133    return len( errors ) == 0
134
135
136def readErrorCodes():
137    """Defines callback, calls parseSourceFiles() with callback,
138    and saves matches to global codes list.
139    """
140    seen = {}
141    errors = []
142    dups = defaultdict(list)
143
144    # define callback
145    def checkDups( assertLoc ):
146        codes.append( assertLoc )
147        code = assertLoc.code
148
149        if not code in seen:
150            seen[code] = assertLoc
151        else:
152            if not code in dups:
153                # on first duplicate, add original to dups, errors
154                dups[code].append( seen[code] )
155                errors.append( seen[code] )
156
157            dups[code].append( assertLoc )
158            errors.append( assertLoc )
159
160    parseSourceFiles( checkDups )
161
162    if "0" in seen:
163        code = "0"
164        bad = seen[code]
165        errors.append( bad )
166        line, col = getLineAndColumnForPosition(bad)
167        print( "ZERO_CODE:" )
168        print( "  %s:%d:%d:%s" % (bad.sourceFile, line, col, bad.lines) )
169
170    for code, locations in dups.items():
171        print( "DUPLICATE IDS: %s" % code )
172        for loc in locations:
173            line, col = getLineAndColumnForPosition(loc)
174            print( "  %s:%d:%d:%s" % (loc.sourceFile, line, col, loc.lines) )
175
176    return (codes, errors)
177
178
179def replaceBadCodes( errors, nextCode ):
180    """Modifies C++ source files to replace invalid assertion codes.
181    For now, we only modify zero codes.
182
183    Args:
184        errors: list of AssertLocation
185        nextCode: int, next non-conflicting assertion code
186    """
187    zero_errors = [e for e in errors if int(e.code) == 0]
188    skip_errors = [e for e in errors if int(e.code) != 0]
189
190    for loc in skip_errors:
191        line, col = getLineAndColumnForPosition(loc)
192        print ("SKIPPING NONZERO code=%s: %s:%d:%d"
193                % (loc.code, loc.sourceFile, line, col))
194
195    # Dedupe, sort, and reverse so we don't have to update offsets as we go.
196    for assertLoc in reversed(sorted(set(zero_errors))):
197        (sourceFile, byteOffset, lines, code) = assertLoc
198        lineNum, _ = getLineAndColumnForPosition(assertLoc)
199        print ("UPDATING_FILE: %s:%s" % (sourceFile, lineNum))
200
201        ln = lineNum - 1
202
203        with open(sourceFile, 'r+') as f:
204            print ("LINE_%d_BEFORE:%s" % (lineNum, f.readlines()[ln].rstrip()))
205
206            f.seek(0)
207            text = f.read()
208            assert text[byteOffset] == '0'
209            f.seek(0)
210            f.write(text[:byteOffset])
211            f.write(str(nextCode))
212            f.write(text[byteOffset+1:])
213            f.seek(0)
214
215            print ("LINE_%d_AFTER :%s" % (lineNum, f.readlines()[ln].rstrip()))
216        nextCode += 1
217
218
219def getBestMessage( lines , codeStr ):
220    """Extracts message from one AssertionLocation.lines entry
221
222    Args:
223        lines: list of contiguous C++ source lines
224        codeStr: assertion code found in first line
225    """
226    line = lines if isinstance(lines, str) else " ".join(lines)
227
228    err = line.partition( codeStr )[2]
229    if not err:
230        return ""
231
232    # Trim to outer quotes
233    m = re.search(r'"(.*)"', err)
234    if not m:
235        return ""
236    err = m.group(1)
237
238    # Trim inner quote pairs
239    err = re.sub(r'" +"', '', err)
240    err = re.sub(r'" *<< *"', '', err)
241    err = re.sub(r'" *<<[^<]+<< *"', '<X>', err)
242    err = re.sub(r'" *\+[^+]+\+ *"', '<X>', err)
243
244    # Trim escaped quotes
245    err = re.sub(r'\\"', '', err)
246
247    # Iff doublequote still present, trim that and any trailing text
248    err = re.sub(r'".*$', '', err)
249
250    return err.strip()
251
252def main():
253    parser = OptionParser(description=__doc__.strip())
254    parser.add_option("--fix", dest="replace",
255                      action="store_true", default=False,
256                      help="Fix zero codes in source files [default: %default]")
257    parser.add_option("-q", "--quiet", dest="quiet",
258                      action="store_true", default=False,
259                      help="Suppress output on success [default: %default]")
260    parser.add_option("--list-files", dest="list_files",
261                      action="store_true", default=False,
262                      help="Print the name of each file as it is scanned [default: %default]")
263    (options, args) = parser.parse_args()
264
265    global list_files
266    list_files = options.list_files
267
268    (codes, errors) = readErrorCodes()
269    ok = len(errors) == 0
270
271    if ok and options.quiet:
272        return
273
274    next = getNextCode()
275
276    print("ok: %s" % ok)
277    print("next: %s" % next)
278
279    if ok:
280        sys.exit(0)
281    elif options.replace:
282        replaceBadCodes(errors, next)
283    else:
284        print (ERROR_HELP)
285        sys.exit(1)
286
287
288ERROR_HELP = """
289ERRORS DETECTED. To correct, run "buildscripts/errorcodes.py --fix" to replace zero codes.
290Other errors require manual correction.
291"""
292
293if __name__ == "__main__":
294    main()
295