1#!/usr/bin/env python3 2"""Calls C-Reduce to create a minimal reproducer for clang crashes. 3 4Output files: 5 *.reduced.sh -- crash reproducer with minimal arguments 6 *.reduced.cpp -- the reduced file 7 *.test.sh -- interestingness test for C-Reduce 8""" 9 10from __future__ import print_function 11from argparse import ArgumentParser, RawTextHelpFormatter 12import os 13import re 14import stat 15import sys 16import subprocess 17import pipes 18import shlex 19import tempfile 20import shutil 21from distutils.spawn import find_executable 22import multiprocessing 23 24verbose = False 25creduce_cmd = None 26clang_cmd = None 27 28def verbose_print(*args, **kwargs): 29 if verbose: 30 print(*args, **kwargs) 31 32def check_file(fname): 33 fname = os.path.normpath(fname) 34 if not os.path.isfile(fname): 35 sys.exit("ERROR: %s does not exist" % (fname)) 36 return fname 37 38def check_cmd(cmd_name, cmd_dir, cmd_path=None): 39 """ 40 Returns absolute path to cmd_path if it is given, 41 or absolute path to cmd_dir/cmd_name. 42 """ 43 if cmd_path: 44 # Make the path absolute so the creduce test can be run from any directory. 45 cmd_path = os.path.abspath(cmd_path) 46 cmd = find_executable(cmd_path) 47 if cmd: 48 return cmd 49 sys.exit("ERROR: executable `%s` not found" % (cmd_path)) 50 51 cmd = find_executable(cmd_name, path=cmd_dir) 52 if cmd: 53 return cmd 54 55 if not cmd_dir: 56 cmd_dir = "$PATH" 57 sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir)) 58 59def quote_cmd(cmd): 60 return ' '.join(pipes.quote(arg) for arg in cmd) 61 62def write_to_script(text, filename): 63 with open(filename, 'w') as f: 64 f.write(text) 65 os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC) 66 67class Reduce(object): 68 def __init__(self, crash_script, file_to_reduce, core_number): 69 crash_script_name, crash_script_ext = os.path.splitext(crash_script) 70 file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce) 71 72 self.testfile = file_reduce_name + '.test.sh' 73 self.crash_script = crash_script_name + '.reduced' + crash_script_ext 74 self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext 75 shutil.copy(file_to_reduce, self.file_to_reduce) 76 77 self.clang = clang_cmd 78 self.clang_args = [] 79 self.expected_output = [] 80 self.needs_stack_trace = False 81 self.creduce_flags = ["--tidy"] 82 self.creduce_flags = ["--n", str(core_number)] 83 84 self.read_clang_args(crash_script, file_to_reduce) 85 self.read_expected_output() 86 87 def get_crash_cmd(self, cmd=None, args=None, filename=None): 88 if not cmd: 89 cmd = self.clang 90 if not args: 91 args = self.clang_args 92 if not filename: 93 filename = self.file_to_reduce 94 95 return [cmd] + args + [filename] 96 97 def read_clang_args(self, crash_script, filename): 98 print("\nReading arguments from crash script...") 99 with open(crash_script) as f: 100 # Assume clang call is the first non comment line. 101 cmd = [] 102 for line in f: 103 if not line.lstrip().startswith('#'): 104 cmd = shlex.split(line) 105 break 106 if not cmd: 107 sys.exit("Could not find command in the crash script."); 108 109 # Remove clang and filename from the command 110 # Assume the last occurrence of the filename is the clang input file 111 del cmd[0] 112 for i in range(len(cmd)-1, -1, -1): 113 if cmd[i] == filename: 114 del cmd[i] 115 break 116 self.clang_args = cmd 117 verbose_print("Clang arguments:", quote_cmd(self.clang_args)) 118 119 def read_expected_output(self): 120 print("\nGetting expected crash output...") 121 p = subprocess.Popen(self.get_crash_cmd(), 122 stdout=subprocess.PIPE, 123 stderr=subprocess.STDOUT) 124 crash_output, _ = p.communicate() 125 result = [] 126 127 # Remove color codes 128 ansi_escape = r'\x1b\[[0-?]*m' 129 crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8')) 130 131 # Look for specific error messages 132 regexes = [r"Assertion .+ failed", # Linux assert() 133 r"Assertion failed: .+,", # FreeBSD/Mac assert() 134 r"fatal error: error in backend: .+", 135 r"LLVM ERROR: .+", 136 r"UNREACHABLE executed at .+?!", 137 r"LLVM IR generation of declaration '.+'", 138 r"Generating code for declaration '.+'", 139 r"\*\*\* Bad machine code: .+ \*\*\*", 140 r"ERROR: .*Sanitizer: [^ ]+ "] 141 for msg_re in regexes: 142 match = re.search(msg_re, crash_output) 143 if match: 144 msg = match.group(0) 145 result = [msg] 146 print("Found message:", msg) 147 break 148 149 # If no message was found, use the top five stack trace functions, 150 # ignoring some common functions 151 # Five is a somewhat arbitrary number; the goal is to get a small number 152 # of identifying functions with some leeway for common functions 153 if not result: 154 self.needs_stack_trace = True 155 stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\(' 156 filters = ["PrintStackTrace", "RunSignalHandlers", "CleanupOnSignal", 157 "HandleCrash", "SignalHandler", "__restore_rt", "gsignal", "abort"] 158 def skip_function(func_name): 159 return any(name in func_name for name in filters) 160 161 matches = re.findall(stacktrace_re, crash_output) 162 result = [x for x in matches if x and not skip_function(x)][:5] 163 for msg in result: 164 print("Found stack trace function:", msg) 165 166 if not result: 167 print("ERROR: no crash was found") 168 print("The crash output was:\n========\n%s========" % crash_output) 169 sys.exit(1) 170 171 self.expected_output = result 172 173 def check_expected_output(self, args=None, filename=None): 174 if not args: 175 args = self.clang_args 176 if not filename: 177 filename = self.file_to_reduce 178 179 p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename), 180 stdout=subprocess.PIPE, 181 stderr=subprocess.STDOUT) 182 crash_output, _ = p.communicate() 183 return all(msg in crash_output.decode('utf-8') for msg in 184 self.expected_output) 185 186 def write_interestingness_test(self): 187 print("\nCreating the interestingness test...") 188 189 # Disable symbolization if it's not required to avoid slow symbolization. 190 disable_symbolization = '' 191 if not self.needs_stack_trace: 192 disable_symbolization = 'export LLVM_DISABLE_SYMBOLIZATION=1' 193 194 output = """#!/bin/bash 195%s 196if %s >& t.log ; then 197 exit 1 198fi 199""" % (disable_symbolization, quote_cmd(self.get_crash_cmd())) 200 201 for msg in self.expected_output: 202 output += 'grep -F %s t.log || exit 1\n' % pipes.quote(msg) 203 204 write_to_script(output, self.testfile) 205 self.check_interestingness() 206 207 def check_interestingness(self): 208 testfile = os.path.abspath(self.testfile) 209 210 # Check that the test considers the original file interesting 211 with open(os.devnull, 'w') as devnull: 212 returncode = subprocess.call(testfile, stdout=devnull) 213 if returncode: 214 sys.exit("The interestingness test does not pass for the original file.") 215 216 # Check that an empty file is not interesting 217 # Instead of modifying the filename in the test file, just run the command 218 with tempfile.NamedTemporaryFile() as empty_file: 219 is_interesting = self.check_expected_output(filename=empty_file.name) 220 if is_interesting: 221 sys.exit("The interestingness test passes for an empty file.") 222 223 def clang_preprocess(self): 224 print("\nTrying to preprocess the source file...") 225 with tempfile.NamedTemporaryFile() as tmpfile: 226 cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name] 227 cmd_preprocess_no_lines = cmd_preprocess + ['-P'] 228 try: 229 subprocess.check_call(cmd_preprocess_no_lines) 230 if self.check_expected_output(filename=tmpfile.name): 231 print("Successfully preprocessed with line markers removed") 232 shutil.copy(tmpfile.name, self.file_to_reduce) 233 else: 234 subprocess.check_call(cmd_preprocess) 235 if self.check_expected_output(filename=tmpfile.name): 236 print("Successfully preprocessed without removing line markers") 237 shutil.copy(tmpfile.name, self.file_to_reduce) 238 else: 239 print("No longer crashes after preprocessing -- " 240 "using original source") 241 except subprocess.CalledProcessError: 242 print("Preprocessing failed") 243 244 @staticmethod 245 def filter_args(args, opts_equal=[], opts_startswith=[], 246 opts_one_arg_startswith=[]): 247 result = [] 248 skip_next = False 249 for arg in args: 250 if skip_next: 251 skip_next = False 252 continue 253 if any(arg == a for a in opts_equal): 254 continue 255 if any(arg.startswith(a) for a in opts_startswith): 256 continue 257 if any(arg.startswith(a) for a in opts_one_arg_startswith): 258 skip_next = True 259 continue 260 result.append(arg) 261 return result 262 263 def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs): 264 new_args = self.filter_args(args, **kwargs) 265 266 if extra_arg: 267 if extra_arg in new_args: 268 new_args.remove(extra_arg) 269 new_args.append(extra_arg) 270 271 if (new_args != args and 272 self.check_expected_output(args=new_args)): 273 if msg: 274 verbose_print(msg) 275 return new_args 276 return args 277 278 def try_remove_arg_by_index(self, args, index): 279 new_args = args[:index] + args[index+1:] 280 removed_arg = args[index] 281 282 # Heuristic for grouping arguments: 283 # remove next argument if it doesn't start with "-" 284 if index < len(new_args) and not new_args[index].startswith('-'): 285 del new_args[index] 286 removed_arg += ' ' + args[index+1] 287 288 if self.check_expected_output(args=new_args): 289 verbose_print("Removed", removed_arg) 290 return new_args, index 291 return args, index+1 292 293 def simplify_clang_args(self): 294 """Simplify clang arguments before running C-Reduce to reduce the time the 295 interestingness test takes to run. 296 """ 297 print("\nSimplifying the clang command...") 298 299 # Remove some clang arguments to speed up the interestingness test 300 new_args = self.clang_args 301 new_args = self.try_remove_args(new_args, 302 msg="Removed debug info options", 303 opts_startswith=["-gcodeview", 304 "-debug-info-kind=", 305 "-debugger-tuning="]) 306 307 new_args = self.try_remove_args(new_args, 308 msg="Removed --show-includes", 309 opts_startswith=["--show-includes"]) 310 # Not suppressing warnings (-w) sometimes prevents the crash from occurring 311 # after preprocessing 312 new_args = self.try_remove_args(new_args, 313 msg="Replaced -W options with -w", 314 extra_arg='-w', 315 opts_startswith=["-W"]) 316 new_args = self.try_remove_args(new_args, 317 msg="Replaced optimization level with -O0", 318 extra_arg="-O0", 319 opts_startswith=["-O"]) 320 321 # Try to remove compilation steps 322 new_args = self.try_remove_args(new_args, msg="Added -emit-llvm", 323 extra_arg="-emit-llvm") 324 new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only", 325 extra_arg="-fsyntax-only") 326 327 # Try to make implicit int an error for more sensible test output 328 new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int", 329 opts_equal=["-w"], 330 extra_arg="-Werror=implicit-int") 331 332 self.clang_args = new_args 333 verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd())) 334 335 def reduce_clang_args(self): 336 """Minimize the clang arguments after running C-Reduce, to get the smallest 337 command that reproduces the crash on the reduced file. 338 """ 339 print("\nReducing the clang crash command...") 340 341 new_args = self.clang_args 342 343 # Remove some often occurring args 344 new_args = self.try_remove_args(new_args, msg="Removed -D options", 345 opts_startswith=["-D"]) 346 new_args = self.try_remove_args(new_args, msg="Removed -D options", 347 opts_one_arg_startswith=["-D"]) 348 new_args = self.try_remove_args(new_args, msg="Removed -I options", 349 opts_startswith=["-I"]) 350 new_args = self.try_remove_args(new_args, msg="Removed -I options", 351 opts_one_arg_startswith=["-I"]) 352 new_args = self.try_remove_args(new_args, msg="Removed -W options", 353 opts_startswith=["-W"]) 354 355 # Remove other cases that aren't covered by the heuristic 356 new_args = self.try_remove_args(new_args, msg="Removed -mllvm", 357 opts_one_arg_startswith=["-mllvm"]) 358 359 i = 0 360 while i < len(new_args): 361 new_args, i = self.try_remove_arg_by_index(new_args, i) 362 363 self.clang_args = new_args 364 365 reduced_cmd = quote_cmd(self.get_crash_cmd()) 366 write_to_script(reduced_cmd, self.crash_script) 367 print("Reduced command:", reduced_cmd) 368 369 def run_creduce(self): 370 print("\nRunning C-Reduce...") 371 try: 372 p = subprocess.Popen([creduce_cmd] + self.creduce_flags + 373 [self.testfile, self.file_to_reduce]) 374 p.communicate() 375 except KeyboardInterrupt: 376 # Hack to kill C-Reduce because it jumps into its own pgid 377 print('\n\nctrl-c detected, killed creduce') 378 p.kill() 379 380def main(): 381 global verbose 382 global creduce_cmd 383 global clang_cmd 384 385 parser = ArgumentParser(description=__doc__, 386 formatter_class=RawTextHelpFormatter) 387 parser.add_argument('crash_script', type=str, nargs=1, 388 help="Name of the script that generates the crash.") 389 parser.add_argument('file_to_reduce', type=str, nargs=1, 390 help="Name of the file to be reduced.") 391 parser.add_argument('--llvm-bin', dest='llvm_bin', type=str, 392 help="Path to the LLVM bin directory.") 393 parser.add_argument('--clang', dest='clang', type=str, 394 help="The path to the `clang` executable. " 395 "By default uses the llvm-bin directory.") 396 parser.add_argument('--creduce', dest='creduce', type=str, 397 help="The path to the `creduce` executable. " 398 "Required if `creduce` is not in PATH environment.") 399 parser.add_argument('--n', dest='core_number', type=int, 400 default=max(4, multiprocessing.cpu_count() // 2), 401 help="Number of cores to use.") 402 parser.add_argument('-v', '--verbose', action='store_true') 403 args = parser.parse_args() 404 405 verbose = args.verbose 406 llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None 407 creduce_cmd = check_cmd('creduce', None, args.creduce) 408 clang_cmd = check_cmd('clang', llvm_bin, args.clang) 409 core_number = args.core_number 410 411 crash_script = check_file(args.crash_script[0]) 412 file_to_reduce = check_file(args.file_to_reduce[0]) 413 414 r = Reduce(crash_script, file_to_reduce, core_number) 415 416 r.simplify_clang_args() 417 r.write_interestingness_test() 418 r.clang_preprocess() 419 r.run_creduce() 420 r.reduce_clang_args() 421 422if __name__ == '__main__': 423 main() 424