1#!/usr/bin/env python 2 3"""Produces a report of all assertions in the MongoDB server codebase. 4 5Parses .cpp files for assertions and verifies assertion codes are distinct. 6Optionally replaces zero codes in source code with new distinct values. 7""" 8 9import bisect 10import os 11import sys 12from . import utils 13from collections import defaultdict, namedtuple 14from optparse import OptionParser 15 16try: 17 import regex as re 18except ImportError: 19 print("*** Run 'pip2 install --user regex' to speed up error code checking") 20 import re 21 22ASSERT_NAMES = [ "uassert" , "massert", "fassert", "fassertFailed" ] 23MINIMUM_CODE = 10000 24 25codes = [] 26 27# Each AssertLocation identifies the C++ source location of an assertion 28AssertLocation = namedtuple( "AssertLocation", ['sourceFile', 'byteOffset', 'lines', 'code'] ) 29 30list_files = False 31 32# Of historical interest only 33def assignErrorCodes(): 34 cur = MINIMUM_CODE 35 for root in ASSERT_NAMES: 36 for x in utils.getAllSourceFiles(): 37 print( x ) 38 didAnything = False 39 fixed = "" 40 for line in open( x ): 41 s = line.partition( root + "(" ) 42 if s[1] == "" or line.startswith( "#define " + root): 43 fixed += line 44 continue 45 fixed += s[0] + root + "( " + str( cur ) + " , " + s[2] 46 cur = cur + 1 47 didAnything = True 48 if didAnything: 49 out = open( x , 'w' ) 50 out.write( fixed ) 51 out.close() 52 53 54def parseSourceFiles( callback ): 55 """Walks MongoDB sourcefiles and invokes callback for each AssertLocation found.""" 56 57 quick = ["assert", "Exception", "ErrorCodes::Error"] 58 59 patterns = [ 60 re.compile( r"(?:u|m(?:sg)?)asser(?:t|ted)(?:NoTrace)?\s*\(\s*(\d+)", re.MULTILINE ) , 61 re.compile( r"(?:DB|Assertion)Exception\s*[({]\s*(\d+)", re.MULTILINE ), 62 re.compile( r"fassert(?:Failed)?(?:WithStatus)?(?:NoTrace)?(?:StatusOK)?\s*\(\s*(\d+)", 63 re.MULTILINE ), 64 re.compile( r"ErrorCodes::Error\s*[({]\s*(\d+)", re.MULTILINE ) 65 ] 66 67 for sourceFile in utils.getAllSourceFiles(prefix='src/mongo/'): 68 if list_files: 69 print ('scanning file: ' + sourceFile) 70 71 with open(sourceFile) as f: 72 text = f.read() 73 74 if not any([zz in text for zz in quick]): 75 continue 76 77 matchiters = [p.finditer(text) for p in patterns] 78 for matchiter in matchiters: 79 for match in matchiter: 80 code = match.group(1) 81 codeOffset = match.start(1) 82 83 # Note that this will include the text of the full match but will report the 84 # position of the beginning of the code portion rather than the beginning of the 85 # match. This is to position editors on the spot that needs to change. 86 thisLoc = AssertLocation(sourceFile, 87 codeOffset, 88 text[match.start():match.end()], 89 code) 90 91 callback( thisLoc ) 92 93# Converts an absolute position in a file into a line number. 94def getLineAndColumnForPosition(loc, _file_cache={}): 95 if loc.sourceFile not in _file_cache: 96 with open(loc.sourceFile) as f: 97 text = f.read() 98 line_offsets = [0] 99 for line in text.splitlines(True): 100 line_offsets.append(line_offsets[-1] + len(line)) 101 _file_cache[loc.sourceFile] = line_offsets 102 103 # These are both 1-based, but line is handled by starting the list with 0. 104 line = bisect.bisect(_file_cache[loc.sourceFile], loc.byteOffset) 105 column = loc.byteOffset - _file_cache[loc.sourceFile][line - 1] + 1 106 return (line, column) 107 108def isTerminated( lines ): 109 """Given .cpp/.h source lines as text, determine if assert is terminated.""" 110 x = " ".join(lines) 111 return ';' in x \ 112 or x.count('(') - x.count(')') <= 0 113 114 115def getNextCode(): 116 """Finds next unused assertion code. 117 118 Called by: SConstruct and main() 119 Since SConstruct calls us, codes[] must be global OR WE REPARSE EVERYTHING 120 """ 121 if not len(codes) > 0: 122 readErrorCodes() 123 124 highest = reduce( lambda x, y: max(int(x), int(y)), 125 (loc.code for loc in codes) ) 126 return highest + 1 127 128 129def checkErrorCodes(): 130 """SConstruct expects a boolean response from this function. 131 """ 132 (codes, errors) = readErrorCodes() 133 return len( errors ) == 0 134 135 136def readErrorCodes(): 137 """Defines callback, calls parseSourceFiles() with callback, 138 and saves matches to global codes list. 139 """ 140 seen = {} 141 errors = [] 142 dups = defaultdict(list) 143 144 # define callback 145 def checkDups( assertLoc ): 146 codes.append( assertLoc ) 147 code = assertLoc.code 148 149 if not code in seen: 150 seen[code] = assertLoc 151 else: 152 if not code in dups: 153 # on first duplicate, add original to dups, errors 154 dups[code].append( seen[code] ) 155 errors.append( seen[code] ) 156 157 dups[code].append( assertLoc ) 158 errors.append( assertLoc ) 159 160 parseSourceFiles( checkDups ) 161 162 if "0" in seen: 163 code = "0" 164 bad = seen[code] 165 errors.append( bad ) 166 line, col = getLineAndColumnForPosition(bad) 167 print( "ZERO_CODE:" ) 168 print( " %s:%d:%d:%s" % (bad.sourceFile, line, col, bad.lines) ) 169 170 for code, locations in dups.items(): 171 print( "DUPLICATE IDS: %s" % code ) 172 for loc in locations: 173 line, col = getLineAndColumnForPosition(loc) 174 print( " %s:%d:%d:%s" % (loc.sourceFile, line, col, loc.lines) ) 175 176 return (codes, errors) 177 178 179def replaceBadCodes( errors, nextCode ): 180 """Modifies C++ source files to replace invalid assertion codes. 181 For now, we only modify zero codes. 182 183 Args: 184 errors: list of AssertLocation 185 nextCode: int, next non-conflicting assertion code 186 """ 187 zero_errors = [e for e in errors if int(e.code) == 0] 188 skip_errors = [e for e in errors if int(e.code) != 0] 189 190 for loc in skip_errors: 191 line, col = getLineAndColumnForPosition(loc) 192 print ("SKIPPING NONZERO code=%s: %s:%d:%d" 193 % (loc.code, loc.sourceFile, line, col)) 194 195 # Dedupe, sort, and reverse so we don't have to update offsets as we go. 196 for assertLoc in reversed(sorted(set(zero_errors))): 197 (sourceFile, byteOffset, lines, code) = assertLoc 198 lineNum, _ = getLineAndColumnForPosition(assertLoc) 199 print ("UPDATING_FILE: %s:%s" % (sourceFile, lineNum)) 200 201 ln = lineNum - 1 202 203 with open(sourceFile, 'r+') as f: 204 print ("LINE_%d_BEFORE:%s" % (lineNum, f.readlines()[ln].rstrip())) 205 206 f.seek(0) 207 text = f.read() 208 assert text[byteOffset] == '0' 209 f.seek(0) 210 f.write(text[:byteOffset]) 211 f.write(str(nextCode)) 212 f.write(text[byteOffset+1:]) 213 f.seek(0) 214 215 print ("LINE_%d_AFTER :%s" % (lineNum, f.readlines()[ln].rstrip())) 216 nextCode += 1 217 218 219def getBestMessage( lines , codeStr ): 220 """Extracts message from one AssertionLocation.lines entry 221 222 Args: 223 lines: list of contiguous C++ source lines 224 codeStr: assertion code found in first line 225 """ 226 line = lines if isinstance(lines, str) else " ".join(lines) 227 228 err = line.partition( codeStr )[2] 229 if not err: 230 return "" 231 232 # Trim to outer quotes 233 m = re.search(r'"(.*)"', err) 234 if not m: 235 return "" 236 err = m.group(1) 237 238 # Trim inner quote pairs 239 err = re.sub(r'" +"', '', err) 240 err = re.sub(r'" *<< *"', '', err) 241 err = re.sub(r'" *<<[^<]+<< *"', '<X>', err) 242 err = re.sub(r'" *\+[^+]+\+ *"', '<X>', err) 243 244 # Trim escaped quotes 245 err = re.sub(r'\\"', '', err) 246 247 # Iff doublequote still present, trim that and any trailing text 248 err = re.sub(r'".*$', '', err) 249 250 return err.strip() 251 252def main(): 253 parser = OptionParser(description=__doc__.strip()) 254 parser.add_option("--fix", dest="replace", 255 action="store_true", default=False, 256 help="Fix zero codes in source files [default: %default]") 257 parser.add_option("-q", "--quiet", dest="quiet", 258 action="store_true", default=False, 259 help="Suppress output on success [default: %default]") 260 parser.add_option("--list-files", dest="list_files", 261 action="store_true", default=False, 262 help="Print the name of each file as it is scanned [default: %default]") 263 (options, args) = parser.parse_args() 264 265 global list_files 266 list_files = options.list_files 267 268 (codes, errors) = readErrorCodes() 269 ok = len(errors) == 0 270 271 if ok and options.quiet: 272 return 273 274 next = getNextCode() 275 276 print("ok: %s" % ok) 277 print("next: %s" % next) 278 279 if ok: 280 sys.exit(0) 281 elif options.replace: 282 replaceBadCodes(errors, next) 283 else: 284 print (ERROR_HELP) 285 sys.exit(1) 286 287 288ERROR_HELP = """ 289ERRORS DETECTED. To correct, run "buildscripts/errorcodes.py --fix" to replace zero codes. 290Other errors require manual correction. 291""" 292 293if __name__ == "__main__": 294 main() 295