1#!/usr/bin/env python
2"""Calls C-Reduce to create a minimal reproducer for clang crashes.
3
4Output files:
5  *.reduced.sh -- crash reproducer with minimal arguments
6  *.reduced.cpp -- the reduced file
7  *.test.sh -- interestingness test for C-Reduce
8"""
9
10from __future__ import print_function
11from argparse import ArgumentParser, RawTextHelpFormatter
12import os
13import re
14import stat
15import sys
16import subprocess
17import pipes
18import shlex
19import tempfile
20import shutil
21from distutils.spawn import find_executable
22import multiprocessing
23
24verbose = False
25creduce_cmd = None
26clang_cmd = None
27
28def verbose_print(*args, **kwargs):
29  if verbose:
30    print(*args, **kwargs)
31
32def check_file(fname):
33  fname = os.path.normpath(fname)
34  if not os.path.isfile(fname):
35    sys.exit("ERROR: %s does not exist" % (fname))
36  return fname
37
38def check_cmd(cmd_name, cmd_dir, cmd_path=None):
39  """
40  Returns absolute path to cmd_path if it is given,
41  or absolute path to cmd_dir/cmd_name.
42  """
43  if cmd_path:
44    # Make the path absolute so the creduce test can be run from any directory.
45    cmd_path = os.path.abspath(cmd_path)
46    cmd = find_executable(cmd_path)
47    if cmd:
48      return cmd
49    sys.exit("ERROR: executable `%s` not found" % (cmd_path))
50
51  cmd = find_executable(cmd_name, path=cmd_dir)
52  if cmd:
53    return cmd
54
55  if not cmd_dir:
56    cmd_dir = "$PATH"
57  sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
58
59def quote_cmd(cmd):
60  return ' '.join(pipes.quote(arg) for arg in cmd)
61
62def write_to_script(text, filename):
63  with open(filename, 'w') as f:
64    f.write(text)
65  os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
66
67class Reduce(object):
68  def __init__(self, crash_script, file_to_reduce, core_number):
69    crash_script_name, crash_script_ext = os.path.splitext(crash_script)
70    file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
71
72    self.testfile = file_reduce_name + '.test.sh'
73    self.crash_script = crash_script_name + '.reduced' + crash_script_ext
74    self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext
75    shutil.copy(file_to_reduce, self.file_to_reduce)
76
77    self.clang = clang_cmd
78    self.clang_args = []
79    self.expected_output = []
80    self.needs_stack_trace = False
81    self.creduce_flags = ["--tidy"]
82    self.creduce_flags = ["--n", str(core_number)]
83
84    self.read_clang_args(crash_script, file_to_reduce)
85    self.read_expected_output()
86
87  def get_crash_cmd(self, cmd=None, args=None, filename=None):
88    if not cmd:
89      cmd = self.clang
90    if not args:
91      args = self.clang_args
92    if not filename:
93      filename = self.file_to_reduce
94
95    return [cmd] + args + [filename]
96
97  def read_clang_args(self, crash_script, filename):
98    print("\nReading arguments from crash script...")
99    with open(crash_script) as f:
100      # Assume clang call is the first non comment line.
101      cmd = []
102      for line in f:
103        if not line.lstrip().startswith('#'):
104          cmd = shlex.split(line)
105          break
106    if not cmd:
107      sys.exit("Could not find command in the crash script.");
108
109    # Remove clang and filename from the command
110    # Assume the last occurrence of the filename is the clang input file
111    del cmd[0]
112    for i in range(len(cmd)-1, -1, -1):
113      if cmd[i] == filename:
114        del cmd[i]
115        break
116    self.clang_args = cmd
117    verbose_print("Clang arguments:", quote_cmd(self.clang_args))
118
119  def read_expected_output(self):
120    print("\nGetting expected crash output...")
121    p = subprocess.Popen(self.get_crash_cmd(),
122                         stdout=subprocess.PIPE,
123                         stderr=subprocess.STDOUT)
124    crash_output, _ = p.communicate()
125    result = []
126
127    # Remove color codes
128    ansi_escape = r'\x1b\[[0-?]*m'
129    crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8'))
130
131    # Look for specific error messages
132    regexes = [r"Assertion .+ failed", # Linux assert()
133               r"Assertion failed: .+,", # FreeBSD/Mac assert()
134               r"fatal error: error in backend: .+",
135               r"LLVM ERROR: .+",
136               r"UNREACHABLE executed at .+?!",
137               r"LLVM IR generation of declaration '.+'",
138               r"Generating code for declaration '.+'",
139               r"\*\*\* Bad machine code: .+ \*\*\*",
140               r"ERROR: .*Sanitizer: [^ ]+ "]
141    for msg_re in regexes:
142      match = re.search(msg_re, crash_output)
143      if match:
144        msg = match.group(0)
145        result = [msg]
146        print("Found message:", msg)
147        break
148
149    # If no message was found, use the top five stack trace functions,
150    # ignoring some common functions
151    # Five is a somewhat arbitrary number; the goal is to get a small number
152    # of identifying functions with some leeway for common functions
153    if not result:
154      self.needs_stack_trace = True
155      stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
156      filters = ["PrintStackTrace", "RunSignalHandlers", "CleanupOnSignal",
157                 "HandleCrash", "SignalHandler", "__restore_rt", "gsignal", "abort"]
158      def skip_function(func_name):
159        return any(name in func_name for name in filters)
160
161      matches = re.findall(stacktrace_re, crash_output)
162      result = [x for x in matches if x and not skip_function(x)][:5]
163      for msg in result:
164        print("Found stack trace function:", msg)
165
166    if not result:
167      print("ERROR: no crash was found")
168      print("The crash output was:\n========\n%s========" % crash_output)
169      sys.exit(1)
170
171    self.expected_output = result
172
173  def check_expected_output(self, args=None, filename=None):
174    if not args:
175      args = self.clang_args
176    if not filename:
177      filename = self.file_to_reduce
178
179    p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename),
180                         stdout=subprocess.PIPE,
181                         stderr=subprocess.STDOUT)
182    crash_output, _ = p.communicate()
183    return all(msg in crash_output.decode('utf-8') for msg in
184               self.expected_output)
185
186  def write_interestingness_test(self):
187    print("\nCreating the interestingness test...")
188
189    # Disable symbolization if it's not required to avoid slow symbolization.
190    disable_symbolization = ''
191    if not self.needs_stack_trace:
192      disable_symbolization = 'export LLVM_DISABLE_SYMBOLIZATION=1'
193
194    output = """#!/bin/bash
195%s
196if %s >& t.log ; then
197  exit 1
198fi
199""" % (disable_symbolization, quote_cmd(self.get_crash_cmd()))
200
201    for msg in self.expected_output:
202      output += 'grep -F %s t.log || exit 1\n' % pipes.quote(msg)
203
204    write_to_script(output, self.testfile)
205    self.check_interestingness()
206
207  def check_interestingness(self):
208    testfile = os.path.abspath(self.testfile)
209
210    # Check that the test considers the original file interesting
211    with open(os.devnull, 'w') as devnull:
212      returncode = subprocess.call(testfile, stdout=devnull)
213    if returncode:
214      sys.exit("The interestingness test does not pass for the original file.")
215
216    # Check that an empty file is not interesting
217    # Instead of modifying the filename in the test file, just run the command
218    with tempfile.NamedTemporaryFile() as empty_file:
219      is_interesting = self.check_expected_output(filename=empty_file.name)
220    if is_interesting:
221      sys.exit("The interestingness test passes for an empty file.")
222
223  def clang_preprocess(self):
224    print("\nTrying to preprocess the source file...")
225    with tempfile.NamedTemporaryFile() as tmpfile:
226      cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name]
227      cmd_preprocess_no_lines = cmd_preprocess + ['-P']
228      try:
229        subprocess.check_call(cmd_preprocess_no_lines)
230        if self.check_expected_output(filename=tmpfile.name):
231          print("Successfully preprocessed with line markers removed")
232          shutil.copy(tmpfile.name, self.file_to_reduce)
233        else:
234          subprocess.check_call(cmd_preprocess)
235          if self.check_expected_output(filename=tmpfile.name):
236            print("Successfully preprocessed without removing line markers")
237            shutil.copy(tmpfile.name, self.file_to_reduce)
238          else:
239            print("No longer crashes after preprocessing -- "
240                  "using original source")
241      except subprocess.CalledProcessError:
242        print("Preprocessing failed")
243
244  @staticmethod
245  def filter_args(args, opts_equal=[], opts_startswith=[],
246                  opts_one_arg_startswith=[]):
247    result = []
248    skip_next = False
249    for arg in args:
250      if skip_next:
251        skip_next = False
252        continue
253      if any(arg == a for a in opts_equal):
254        continue
255      if any(arg.startswith(a) for a in opts_startswith):
256        continue
257      if any(arg.startswith(a) for a in opts_one_arg_startswith):
258        skip_next = True
259        continue
260      result.append(arg)
261    return result
262
263  def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
264    new_args = self.filter_args(args, **kwargs)
265
266    if extra_arg:
267      if extra_arg in new_args:
268        new_args.remove(extra_arg)
269      new_args.append(extra_arg)
270
271    if (new_args != args and
272        self.check_expected_output(args=new_args)):
273      if msg:
274        verbose_print(msg)
275      return new_args
276    return args
277
278  def try_remove_arg_by_index(self, args, index):
279    new_args = args[:index] + args[index+1:]
280    removed_arg = args[index]
281
282    # Heuristic for grouping arguments:
283    # remove next argument if it doesn't start with "-"
284    if index < len(new_args) and not new_args[index].startswith('-'):
285      del new_args[index]
286      removed_arg += ' ' + args[index+1]
287
288    if self.check_expected_output(args=new_args):
289      verbose_print("Removed", removed_arg)
290      return new_args, index
291    return args, index+1
292
293  def simplify_clang_args(self):
294    """Simplify clang arguments before running C-Reduce to reduce the time the
295    interestingness test takes to run.
296    """
297    print("\nSimplifying the clang command...")
298
299    # Remove some clang arguments to speed up the interestingness test
300    new_args = self.clang_args
301    new_args = self.try_remove_args(new_args,
302                                    msg="Removed debug info options",
303                                    opts_startswith=["-gcodeview",
304                                                     "-debug-info-kind=",
305                                                     "-debugger-tuning="])
306
307    new_args = self.try_remove_args(new_args,
308                                    msg="Removed --show-includes",
309                                    opts_startswith=["--show-includes"])
310    # Not suppressing warnings (-w) sometimes prevents the crash from occurring
311    # after preprocessing
312    new_args = self.try_remove_args(new_args,
313                                    msg="Replaced -W options with -w",
314                                    extra_arg='-w',
315                                    opts_startswith=["-W"])
316    new_args = self.try_remove_args(new_args,
317                                    msg="Replaced optimization level with -O0",
318                                    extra_arg="-O0",
319                                    opts_startswith=["-O"])
320
321    # Try to remove compilation steps
322    new_args = self.try_remove_args(new_args, msg="Added -emit-llvm",
323                                    extra_arg="-emit-llvm")
324    new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only",
325                                    extra_arg="-fsyntax-only")
326
327    # Try to make implicit int an error for more sensible test output
328    new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int",
329                                    opts_equal=["-w"],
330                                    extra_arg="-Werror=implicit-int")
331
332    self.clang_args = new_args
333    verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
334
335  def reduce_clang_args(self):
336    """Minimize the clang arguments after running C-Reduce, to get the smallest
337    command that reproduces the crash on the reduced file.
338    """
339    print("\nReducing the clang crash command...")
340
341    new_args = self.clang_args
342
343    # Remove some often occurring args
344    new_args = self.try_remove_args(new_args, msg="Removed -D options",
345                                    opts_startswith=["-D"])
346    new_args = self.try_remove_args(new_args, msg="Removed -D options",
347                                    opts_one_arg_startswith=["-D"])
348    new_args = self.try_remove_args(new_args, msg="Removed -I options",
349                                    opts_startswith=["-I"])
350    new_args = self.try_remove_args(new_args, msg="Removed -I options",
351                                    opts_one_arg_startswith=["-I"])
352    new_args = self.try_remove_args(new_args, msg="Removed -W options",
353                                    opts_startswith=["-W"])
354
355    # Remove other cases that aren't covered by the heuristic
356    new_args = self.try_remove_args(new_args, msg="Removed -mllvm",
357                                    opts_one_arg_startswith=["-mllvm"])
358
359    i = 0
360    while i < len(new_args):
361      new_args, i = self.try_remove_arg_by_index(new_args, i)
362
363    self.clang_args = new_args
364
365    reduced_cmd = quote_cmd(self.get_crash_cmd())
366    write_to_script(reduced_cmd, self.crash_script)
367    print("Reduced command:", reduced_cmd)
368
369  def run_creduce(self):
370    print("\nRunning C-Reduce...")
371    try:
372      p = subprocess.Popen([creduce_cmd] + self.creduce_flags +
373                           [self.testfile, self.file_to_reduce])
374      p.communicate()
375    except KeyboardInterrupt:
376      # Hack to kill C-Reduce because it jumps into its own pgid
377      print('\n\nctrl-c detected, killed creduce')
378      p.kill()
379
380def main():
381  global verbose
382  global creduce_cmd
383  global clang_cmd
384
385  parser = ArgumentParser(description=__doc__,
386                          formatter_class=RawTextHelpFormatter)
387  parser.add_argument('crash_script', type=str, nargs=1,
388                      help="Name of the script that generates the crash.")
389  parser.add_argument('file_to_reduce', type=str, nargs=1,
390                      help="Name of the file to be reduced.")
391  parser.add_argument('--llvm-bin', dest='llvm_bin', type=str,
392                      help="Path to the LLVM bin directory.")
393  parser.add_argument('--clang', dest='clang', type=str,
394                      help="The path to the `clang` executable. "
395                      "By default uses the llvm-bin directory.")
396  parser.add_argument('--creduce', dest='creduce', type=str,
397                      help="The path to the `creduce` executable. "
398                      "Required if `creduce` is not in PATH environment.")
399  parser.add_argument('--n', dest='core_number', type=int,
400                      default=max(4, multiprocessing.cpu_count() / 2),
401                      help="Number of cores to use.")
402  parser.add_argument('-v', '--verbose', action='store_true')
403  args = parser.parse_args()
404
405  verbose = args.verbose
406  llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
407  creduce_cmd = check_cmd('creduce', None, args.creduce)
408  clang_cmd = check_cmd('clang', llvm_bin, args.clang)
409  core_number = args.core_number
410
411  crash_script = check_file(args.crash_script[0])
412  file_to_reduce = check_file(args.file_to_reduce[0])
413
414  r = Reduce(crash_script, file_to_reduce, core_number)
415
416  r.simplify_clang_args()
417  r.write_interestingness_test()
418  r.clang_preprocess()
419  r.run_creduce()
420  r.reduce_clang_args()
421
422if __name__ == '__main__':
423  main()
424