416 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
			
		
		
	
	
			416 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
| #!/usr/bin/env python
 | |
| """Calls C-Reduce to create a minimal reproducer for clang crashes.
 | |
| 
 | |
| Output files:
 | |
|   *.reduced.sh -- crash reproducer with minimal arguments
 | |
|   *.reduced.cpp -- the reduced file
 | |
|   *.test.sh -- interestingness test for C-Reduce
 | |
| """
 | |
| 
 | |
| from __future__ import print_function
 | |
| from argparse import ArgumentParser, RawTextHelpFormatter
 | |
| import os
 | |
| import re
 | |
| import stat
 | |
| import sys
 | |
| import subprocess
 | |
| import pipes
 | |
| import shlex
 | |
| import tempfile
 | |
| import shutil
 | |
| from distutils.spawn import find_executable
 | |
| 
 | |
| verbose = False
 | |
| creduce_cmd = None
 | |
| clang_cmd = None
 | |
| not_cmd = None
 | |
| 
 | |
| def verbose_print(*args, **kwargs):
 | |
|   if verbose:
 | |
|     print(*args, **kwargs)
 | |
| 
 | |
| def check_file(fname):
 | |
|   fname = os.path.normpath(fname)
 | |
|   if not os.path.isfile(fname):
 | |
|     sys.exit("ERROR: %s does not exist" % (fname))
 | |
|   return fname
 | |
| 
 | |
| def check_cmd(cmd_name, cmd_dir, cmd_path=None):
 | |
|   """
 | |
|   Returns absolute path to cmd_path if it is given,
 | |
|   or absolute path to cmd_dir/cmd_name.
 | |
|   """
 | |
|   if cmd_path:
 | |
|     # Make the path absolute so the creduce test can be run from any directory.
 | |
|     cmd_path = os.path.abspath(cmd_path)
 | |
|     cmd = find_executable(cmd_path)
 | |
|     if cmd:
 | |
|       return cmd
 | |
|     sys.exit("ERROR: executable `%s` not found" % (cmd_path))
 | |
| 
 | |
|   cmd = find_executable(cmd_name, path=cmd_dir)
 | |
|   if cmd:
 | |
|     return cmd
 | |
| 
 | |
|   if not cmd_dir:
 | |
|     cmd_dir = "$PATH"
 | |
|   sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
 | |
| 
 | |
| def quote_cmd(cmd):
 | |
|   return ' '.join(pipes.quote(arg) for arg in cmd)
 | |
| 
 | |
| def write_to_script(text, filename):
 | |
|   with open(filename, 'w') as f:
 | |
|     f.write(text)
 | |
|   os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
 | |
| 
 | |
| class Reduce(object):
 | |
|   def __init__(self, crash_script, file_to_reduce):
 | |
|     crash_script_name, crash_script_ext = os.path.splitext(crash_script)
 | |
|     file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
 | |
| 
 | |
|     self.testfile = file_reduce_name + '.test.sh'
 | |
|     self.crash_script = crash_script_name + '.reduced' + crash_script_ext
 | |
|     self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext
 | |
|     shutil.copy(file_to_reduce, self.file_to_reduce)
 | |
| 
 | |
|     self.clang = clang_cmd
 | |
|     self.clang_args = []
 | |
|     self.expected_output = []
 | |
|     self.is_crash = True
 | |
|     self.creduce_flags = ["--tidy"]
 | |
| 
 | |
|     self.read_clang_args(crash_script, file_to_reduce)
 | |
|     self.read_expected_output()
 | |
| 
 | |
|   def get_crash_cmd(self, cmd=None, args=None, filename=None):
 | |
|     if not cmd:
 | |
|       cmd = self.clang
 | |
|     if not args:
 | |
|       args = self.clang_args
 | |
|     if not filename:
 | |
|       filename = self.file_to_reduce
 | |
| 
 | |
|     return [cmd] + args + [filename]
 | |
| 
 | |
|   def read_clang_args(self, crash_script, filename):
 | |
|     print("\nReading arguments from crash script...")
 | |
|     with open(crash_script) as f:
 | |
|       # Assume clang call is the first non comment line.
 | |
|       cmd = []
 | |
|       for line in f:
 | |
|         if not line.lstrip().startswith('#'):
 | |
|           cmd = shlex.split(line)
 | |
|           break
 | |
|     if not cmd:
 | |
|       sys.exit("Could not find command in the crash script.");
 | |
| 
 | |
|     # Remove clang and filename from the command
 | |
|     # Assume the last occurrence of the filename is the clang input file
 | |
|     del cmd[0]
 | |
|     for i in range(len(cmd)-1, -1, -1):
 | |
|       if cmd[i] == filename:
 | |
|         del cmd[i]
 | |
|         break
 | |
|     self.clang_args = cmd
 | |
|     verbose_print("Clang arguments:", quote_cmd(self.clang_args))
 | |
| 
 | |
|   def read_expected_output(self):
 | |
|     print("\nGetting expected crash output...")
 | |
|     p = subprocess.Popen(self.get_crash_cmd(),
 | |
|                          stdout=subprocess.PIPE,
 | |
|                          stderr=subprocess.STDOUT)
 | |
|     crash_output, _ = p.communicate()
 | |
|     result = []
 | |
| 
 | |
|     # Remove color codes
 | |
|     ansi_escape = r'\x1b\[[0-?]*m'
 | |
|     crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8'))
 | |
| 
 | |
|     # Look for specific error messages
 | |
|     regexes = [r"Assertion `(.+)' failed", # Linux assert()
 | |
|                r"Assertion failed: (.+),", # FreeBSD/Mac assert()
 | |
|                r"fatal error: error in backend: (.+)",
 | |
|                r"LLVM ERROR: (.+)",
 | |
|                r"UNREACHABLE executed (at .+)?!",
 | |
|                r"LLVM IR generation of declaration '(.+)'",
 | |
|                r"Generating code for declaration '(.+)'",
 | |
|                r"\*\*\* Bad machine code: (.+) \*\*\*"]
 | |
|     for msg_re in regexes:
 | |
|       match = re.search(msg_re, crash_output)
 | |
|       if match:
 | |
|         msg = match.group(1)
 | |
|         result = [msg]
 | |
|         print("Found message:", msg)
 | |
| 
 | |
|         if "fatal error:" in msg_re:
 | |
|           self.is_crash = False
 | |
|         break
 | |
| 
 | |
|     # If no message was found, use the top five stack trace functions,
 | |
|     # ignoring some common functions
 | |
|     # Five is a somewhat arbitrary number; the goal is to get a small number
 | |
|     # of identifying functions with some leeway for common functions
 | |
|     if not result:
 | |
|       stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
 | |
|       filters = ["PrintStackTraceSignalHandler",
 | |
|                  "llvm::sys::RunSignalHandlers",
 | |
|                  "SignalHandler", "__restore_rt", "gsignal", "abort"]
 | |
|       matches = re.findall(stacktrace_re, crash_output)
 | |
|       result = [x for x in matches if x and x.strip() not in filters][:5]
 | |
|       for msg in result:
 | |
|         print("Found stack trace function:", msg)
 | |
| 
 | |
|     if not result:
 | |
|       print("ERROR: no crash was found")
 | |
|       print("The crash output was:\n========\n%s========" % crash_output)
 | |
|       sys.exit(1)
 | |
| 
 | |
|     self.expected_output = result
 | |
| 
 | |
|   def check_expected_output(self, args=None, filename=None):
 | |
|     if not args:
 | |
|       args = self.clang_args
 | |
|     if not filename:
 | |
|       filename = self.file_to_reduce
 | |
| 
 | |
|     p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename),
 | |
|                          stdout=subprocess.PIPE,
 | |
|                          stderr=subprocess.STDOUT)
 | |
|     crash_output, _ = p.communicate()
 | |
|     return all(msg in crash_output.decode('utf-8') for msg in
 | |
|                self.expected_output)
 | |
| 
 | |
|   def write_interestingness_test(self):
 | |
|     print("\nCreating the interestingness test...")
 | |
| 
 | |
|     crash_flag = "--crash" if self.is_crash else ""
 | |
| 
 | |
|     output = "#!/bin/bash\n%s %s %s >& t.log || exit 1\n" % \
 | |
|         (pipes.quote(not_cmd), crash_flag, quote_cmd(self.get_crash_cmd()))
 | |
| 
 | |
|     for msg in self.expected_output:
 | |
|       output += 'grep -F %s t.log || exit 1\n' % pipes.quote(msg)
 | |
| 
 | |
|     write_to_script(output, self.testfile)
 | |
|     self.check_interestingness()
 | |
| 
 | |
|   def check_interestingness(self):
 | |
|     testfile = os.path.abspath(self.testfile)
 | |
| 
 | |
|     # Check that the test considers the original file interesting
 | |
|     with open(os.devnull, 'w') as devnull:
 | |
|       returncode = subprocess.call(testfile, stdout=devnull)
 | |
|     if returncode:
 | |
|       sys.exit("The interestingness test does not pass for the original file.")
 | |
| 
 | |
|     # Check that an empty file is not interesting
 | |
|     # Instead of modifying the filename in the test file, just run the command
 | |
|     with tempfile.NamedTemporaryFile() as empty_file:
 | |
|       is_interesting = self.check_expected_output(filename=empty_file.name)
 | |
|     if is_interesting:
 | |
|       sys.exit("The interestingness test passes for an empty file.")
 | |
| 
 | |
|   def clang_preprocess(self):
 | |
|     print("\nTrying to preprocess the source file...")
 | |
|     with tempfile.NamedTemporaryFile() as tmpfile:
 | |
|       cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name]
 | |
|       cmd_preprocess_no_lines = cmd_preprocess + ['-P']
 | |
|       try:
 | |
|         subprocess.check_call(cmd_preprocess_no_lines)
 | |
|         if self.check_expected_output(filename=tmpfile.name):
 | |
|           print("Successfully preprocessed with line markers removed")
 | |
|           shutil.copy(tmpfile.name, self.file_to_reduce)
 | |
|         else:
 | |
|           subprocess.check_call(cmd_preprocess)
 | |
|           if self.check_expected_output(filename=tmpfile.name):
 | |
|             print("Successfully preprocessed without removing line markers")
 | |
|             shutil.copy(tmpfile.name, self.file_to_reduce)
 | |
|           else:
 | |
|             print("No longer crashes after preprocessing -- "
 | |
|                   "using original source")
 | |
|       except subprocess.CalledProcessError:
 | |
|         print("Preprocessing failed")
 | |
| 
 | |
|   @staticmethod
 | |
|   def filter_args(args, opts_equal=[], opts_startswith=[],
 | |
|                   opts_one_arg_startswith=[]):
 | |
|     result = []
 | |
|     skip_next = False
 | |
|     for arg in args:
 | |
|       if skip_next:
 | |
|         skip_next = False
 | |
|         continue
 | |
|       if any(arg == a for a in opts_equal):
 | |
|         continue
 | |
|       if any(arg.startswith(a) for a in opts_startswith):
 | |
|         continue
 | |
|       if any(arg.startswith(a) for a in opts_one_arg_startswith):
 | |
|         skip_next = True
 | |
|         continue
 | |
|       result.append(arg)
 | |
|     return result
 | |
| 
 | |
|   def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
 | |
|     new_args = self.filter_args(args, **kwargs)
 | |
| 
 | |
|     if extra_arg:
 | |
|       if extra_arg in new_args:
 | |
|         new_args.remove(extra_arg)
 | |
|       new_args.append(extra_arg)
 | |
| 
 | |
|     if (new_args != args and
 | |
|         self.check_expected_output(args=new_args)):
 | |
|       if msg:
 | |
|         verbose_print(msg)
 | |
|       return new_args
 | |
|     return args
 | |
| 
 | |
|   def try_remove_arg_by_index(self, args, index):
 | |
|     new_args = args[:index] + args[index+1:]
 | |
|     removed_arg = args[index]
 | |
| 
 | |
|     # Heuristic for grouping arguments:
 | |
|     # remove next argument if it doesn't start with "-"
 | |
|     if index < len(new_args) and not new_args[index].startswith('-'):
 | |
|       del new_args[index]
 | |
|       removed_arg += ' ' + args[index+1]
 | |
| 
 | |
|     if self.check_expected_output(args=new_args):
 | |
|       verbose_print("Removed", removed_arg)
 | |
|       return new_args, index
 | |
|     return args, index+1
 | |
| 
 | |
|   def simplify_clang_args(self):
 | |
|     """Simplify clang arguments before running C-Reduce to reduce the time the
 | |
|     interestingness test takes to run.
 | |
|     """
 | |
|     print("\nSimplifying the clang command...")
 | |
| 
 | |
|     # Remove some clang arguments to speed up the interestingness test
 | |
|     new_args = self.clang_args
 | |
|     new_args = self.try_remove_args(new_args,
 | |
|                                     msg="Removed debug info options",
 | |
|                                     opts_startswith=["-gcodeview",
 | |
|                                                      "-debug-info-kind=",
 | |
|                                                      "-debugger-tuning="])
 | |
| 
 | |
|     new_args = self.try_remove_args(new_args,
 | |
|                                     msg="Removed --show-includes",
 | |
|                                     opts_startswith=["--show-includes"])
 | |
|     # Not suppressing warnings (-w) sometimes prevents the crash from occurring
 | |
|     # after preprocessing
 | |
|     new_args = self.try_remove_args(new_args,
 | |
|                                     msg="Replaced -W options with -w",
 | |
|                                     extra_arg='-w',
 | |
|                                     opts_startswith=["-W"])
 | |
|     new_args = self.try_remove_args(new_args,
 | |
|                                     msg="Replaced optimization level with -O0",
 | |
|                                     extra_arg="-O0",
 | |
|                                     opts_startswith=["-O"])
 | |
| 
 | |
|     # Try to remove compilation steps
 | |
|     new_args = self.try_remove_args(new_args, msg="Added -emit-llvm",
 | |
|                                     extra_arg="-emit-llvm")
 | |
|     new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only",
 | |
|                                     extra_arg="-fsyntax-only")
 | |
| 
 | |
|     # Try to make implicit int an error for more sensible test output
 | |
|     new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int",
 | |
|                                     opts_equal=["-w"],
 | |
|                                     extra_arg="-Werror=implicit-int")
 | |
| 
 | |
|     self.clang_args = new_args
 | |
|     verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
 | |
| 
 | |
|   def reduce_clang_args(self):
 | |
|     """Minimize the clang arguments after running C-Reduce, to get the smallest
 | |
|     command that reproduces the crash on the reduced file.
 | |
|     """
 | |
|     print("\nReducing the clang crash command...")
 | |
| 
 | |
|     new_args = self.clang_args
 | |
| 
 | |
|     # Remove some often occurring args
 | |
|     new_args = self.try_remove_args(new_args, msg="Removed -D options",
 | |
|                                     opts_startswith=["-D"])
 | |
|     new_args = self.try_remove_args(new_args, msg="Removed -D options",
 | |
|                                     opts_one_arg_startswith=["-D"])
 | |
|     new_args = self.try_remove_args(new_args, msg="Removed -I options",
 | |
|                                     opts_startswith=["-I"])
 | |
|     new_args = self.try_remove_args(new_args, msg="Removed -I options",
 | |
|                                     opts_one_arg_startswith=["-I"])
 | |
|     new_args = self.try_remove_args(new_args, msg="Removed -W options",
 | |
|                                     opts_startswith=["-W"])
 | |
| 
 | |
|     # Remove other cases that aren't covered by the heuristic
 | |
|     new_args = self.try_remove_args(new_args, msg="Removed -mllvm",
 | |
|                                     opts_one_arg_startswith=["-mllvm"])
 | |
| 
 | |
|     i = 0
 | |
|     while i < len(new_args):
 | |
|       new_args, i = self.try_remove_arg_by_index(new_args, i)
 | |
| 
 | |
|     self.clang_args = new_args
 | |
| 
 | |
|     reduced_cmd = quote_cmd(self.get_crash_cmd())
 | |
|     write_to_script(reduced_cmd, self.crash_script)
 | |
|     print("Reduced command:", reduced_cmd)
 | |
| 
 | |
|   def run_creduce(self):
 | |
|     print("\nRunning C-Reduce...")
 | |
|     try:
 | |
|       p = subprocess.Popen([creduce_cmd] + self.creduce_flags +
 | |
|                            [self.testfile, self.file_to_reduce])
 | |
|       p.communicate()
 | |
|     except KeyboardInterrupt:
 | |
|       # Hack to kill C-Reduce because it jumps into its own pgid
 | |
|       print('\n\nctrl-c detected, killed creduce')
 | |
|       p.kill()
 | |
| 
 | |
| def main():
 | |
|   global verbose
 | |
|   global creduce_cmd
 | |
|   global clang_cmd
 | |
|   global not_cmd
 | |
| 
 | |
|   parser = ArgumentParser(description=__doc__,
 | |
|                           formatter_class=RawTextHelpFormatter)
 | |
|   parser.add_argument('crash_script', type=str, nargs=1,
 | |
|                       help="Name of the script that generates the crash.")
 | |
|   parser.add_argument('file_to_reduce', type=str, nargs=1,
 | |
|                       help="Name of the file to be reduced.")
 | |
|   parser.add_argument('--llvm-bin', dest='llvm_bin', type=str,
 | |
|                       help="Path to the LLVM bin directory.")
 | |
|   parser.add_argument('--llvm-not', dest='llvm_not', type=str,
 | |
|                       help="The path to the `not` executable. "
 | |
|                       "By default uses the llvm-bin directory.")
 | |
|   parser.add_argument('--clang', dest='clang', type=str,
 | |
|                       help="The path to the `clang` executable. "
 | |
|                       "By default uses the llvm-bin directory.")
 | |
|   parser.add_argument('--creduce', dest='creduce', type=str,
 | |
|                       help="The path to the `creduce` executable. "
 | |
|                       "Required if `creduce` is not in PATH environment.")
 | |
|   parser.add_argument('-v', '--verbose', action='store_true')
 | |
|   args = parser.parse_args()
 | |
| 
 | |
|   verbose = args.verbose
 | |
|   llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
 | |
|   creduce_cmd = check_cmd('creduce', None, args.creduce)
 | |
|   clang_cmd = check_cmd('clang', llvm_bin, args.clang)
 | |
|   not_cmd = check_cmd('not', llvm_bin, args.llvm_not)
 | |
| 
 | |
|   crash_script = check_file(args.crash_script[0])
 | |
|   file_to_reduce = check_file(args.file_to_reduce[0])
 | |
| 
 | |
|   r = Reduce(crash_script, file_to_reduce)
 | |
| 
 | |
|   r.simplify_clang_args()
 | |
|   r.write_interestingness_test()
 | |
|   r.clang_preprocess()
 | |
|   r.run_creduce()
 | |
|   r.reduce_clang_args()
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|   main()
 |