llvm-project
484 строки · 16.3 Кб
1#!/usr/bin/env python3
2"""Calls C-Reduce to create a minimal reproducer for clang crashes.
3Unknown arguments are treated at creduce options.
4
5Output files:
6*.reduced.sh -- crash reproducer with minimal arguments
7*.reduced.cpp -- the reduced file
8*.test.sh -- interestingness test for C-Reduce
9"""
10
11from __future__ import print_function12from argparse import ArgumentParser, RawTextHelpFormatter13import os14import re15import shutil16import stat17import sys18import subprocess19import shlex20import tempfile21import shutil22import multiprocessing23
24verbose = False25creduce_cmd = None26clang_cmd = None27
28
29def verbose_print(*args, **kwargs):30if verbose:31print(*args, **kwargs)32
33
34def check_file(fname):35fname = os.path.normpath(fname)36if not os.path.isfile(fname):37sys.exit("ERROR: %s does not exist" % (fname))38return fname39
40
41def check_cmd(cmd_name, cmd_dir, cmd_path=None):42"""43Returns absolute path to cmd_path if it is given,
44or absolute path to cmd_dir/cmd_name.
45"""
46if cmd_path:47# Make the path absolute so the creduce test can be run from any directory.48cmd_path = os.path.abspath(cmd_path)49cmd = shutil.which(cmd_path)50if cmd:51return cmd52sys.exit("ERROR: executable `%s` not found" % (cmd_path))53
54cmd = shutil.which(cmd_name, path=cmd_dir)55if cmd:56return cmd57
58if not cmd_dir:59cmd_dir = "$PATH"60sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))61
62
63def quote_cmd(cmd):64return " ".join(shlex.quote(arg) for arg in cmd)65
66
67def write_to_script(text, filename):68with open(filename, "w") as f:69f.write(text)70os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)71
72
73class Reduce(object):74def __init__(self, crash_script, file_to_reduce, creduce_flags):75crash_script_name, crash_script_ext = os.path.splitext(crash_script)76file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)77
78self.testfile = file_reduce_name + ".test.sh"79self.crash_script = crash_script_name + ".reduced" + crash_script_ext80self.file_to_reduce = file_reduce_name + ".reduced" + file_reduce_ext81shutil.copy(file_to_reduce, self.file_to_reduce)82
83self.clang = clang_cmd84self.clang_args = []85self.expected_output = []86self.needs_stack_trace = False87self.creduce_flags = ["--tidy"] + creduce_flags88
89self.read_clang_args(crash_script, file_to_reduce)90self.read_expected_output()91
92def get_crash_cmd(self, cmd=None, args=None, filename=None):93if not cmd:94cmd = self.clang95if not args:96args = self.clang_args97if not filename:98filename = self.file_to_reduce99
100return [cmd] + args + [filename]101
102def read_clang_args(self, crash_script, filename):103print("\nReading arguments from crash script...")104with open(crash_script) as f:105# Assume clang call is the first non comment line.106cmd = []107for line in f:108if not line.lstrip().startswith("#"):109cmd = shlex.split(line)110break111if not cmd:112sys.exit("Could not find command in the crash script.")113
114# Remove clang and filename from the command115# Assume the last occurrence of the filename is the clang input file116del cmd[0]117for i in range(len(cmd) - 1, -1, -1):118if cmd[i] == filename:119del cmd[i]120break121self.clang_args = cmd122verbose_print("Clang arguments:", quote_cmd(self.clang_args))123
124def read_expected_output(self):125print("\nGetting expected crash output...")126p = subprocess.Popen(127self.get_crash_cmd(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT128)129crash_output, _ = p.communicate()130result = []131
132# Remove color codes133ansi_escape = r"\x1b\[[0-?]*m"134crash_output = re.sub(ansi_escape, "", crash_output.decode("utf-8"))135
136# Look for specific error messages137regexes = [138r"Assertion .+ failed", # Linux assert()139r"Assertion failed: .+,", # FreeBSD/Mac assert()140r"fatal error: error in backend: .+",141r"LLVM ERROR: .+",142r"UNREACHABLE executed at .+?!",143r"LLVM IR generation of declaration '.+'",144r"Generating code for declaration '.+'",145r"\*\*\* Bad machine code: .+ \*\*\*",146r"ERROR: .*Sanitizer: [^ ]+ ",147]148for msg_re in regexes:149match = re.search(msg_re, crash_output)150if match:151msg = match.group(0)152result = [msg]153print("Found message:", msg)154break155
156# If no message was found, use the top five stack trace functions,157# ignoring some common functions158# Five is a somewhat arbitrary number; the goal is to get a small number159# of identifying functions with some leeway for common functions160if not result:161self.needs_stack_trace = True162stacktrace_re = r"[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\("163filters = [164"PrintStackTrace",165"RunSignalHandlers",166"CleanupOnSignal",167"HandleCrash",168"SignalHandler",169"__restore_rt",170"gsignal",171"abort",172]173
174def skip_function(func_name):175return any(name in func_name for name in filters)176
177matches = re.findall(stacktrace_re, crash_output)178result = [x for x in matches if x and not skip_function(x)][:5]179for msg in result:180print("Found stack trace function:", msg)181
182if not result:183print("ERROR: no crash was found")184print("The crash output was:\n========\n%s========" % crash_output)185sys.exit(1)186
187self.expected_output = result188
189def check_expected_output(self, args=None, filename=None):190if not args:191args = self.clang_args192if not filename:193filename = self.file_to_reduce194
195p = subprocess.Popen(196self.get_crash_cmd(args=args, filename=filename),197stdout=subprocess.PIPE,198stderr=subprocess.STDOUT,199)200crash_output, _ = p.communicate()201return all(msg in crash_output.decode("utf-8") for msg in self.expected_output)202
203def write_interestingness_test(self):204print("\nCreating the interestingness test...")205
206# Disable symbolization if it's not required to avoid slow symbolization.207disable_symbolization = ""208if not self.needs_stack_trace:209disable_symbolization = "export LLVM_DISABLE_SYMBOLIZATION=1"210
211output = """#!/bin/bash212%s
213if %s >& t.log ; then
214exit 1
215fi
216""" % (217disable_symbolization,218quote_cmd(self.get_crash_cmd()),219)220
221for msg in self.expected_output:222output += "grep -F %s t.log || exit 1\n" % shlex.quote(msg)223
224write_to_script(output, self.testfile)225self.check_interestingness()226
227def check_interestingness(self):228testfile = os.path.abspath(self.testfile)229
230# Check that the test considers the original file interesting231with open(os.devnull, "w") as devnull:232returncode = subprocess.call(testfile, stdout=devnull)233if returncode:234sys.exit("The interestingness test does not pass for the original file.")235
236# Check that an empty file is not interesting237# Instead of modifying the filename in the test file, just run the command238with tempfile.NamedTemporaryFile() as empty_file:239is_interesting = self.check_expected_output(filename=empty_file.name)240if is_interesting:241sys.exit("The interestingness test passes for an empty file.")242
243def clang_preprocess(self):244print("\nTrying to preprocess the source file...")245with tempfile.NamedTemporaryFile() as tmpfile:246cmd_preprocess = self.get_crash_cmd() + ["-E", "-o", tmpfile.name]247cmd_preprocess_no_lines = cmd_preprocess + ["-P"]248try:249subprocess.check_call(cmd_preprocess_no_lines)250if self.check_expected_output(filename=tmpfile.name):251print("Successfully preprocessed with line markers removed")252shutil.copy(tmpfile.name, self.file_to_reduce)253else:254subprocess.check_call(cmd_preprocess)255if self.check_expected_output(filename=tmpfile.name):256print("Successfully preprocessed without removing line markers")257shutil.copy(tmpfile.name, self.file_to_reduce)258else:259print(260"No longer crashes after preprocessing -- "261"using original source"262)263except subprocess.CalledProcessError:264print("Preprocessing failed")265
266@staticmethod267def filter_args(268args, opts_equal=[], opts_startswith=[], opts_one_arg_startswith=[]269):270result = []271skip_next = False272for arg in args:273if skip_next:274skip_next = False275continue276if any(arg == a for a in opts_equal):277continue278if any(arg.startswith(a) for a in opts_startswith):279continue280if any(arg.startswith(a) for a in opts_one_arg_startswith):281skip_next = True282continue283result.append(arg)284return result285
286def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):287new_args = self.filter_args(args, **kwargs)288
289if extra_arg:290if extra_arg in new_args:291new_args.remove(extra_arg)292new_args.append(extra_arg)293
294if new_args != args and self.check_expected_output(args=new_args):295if msg:296verbose_print(msg)297return new_args298return args299
300def try_remove_arg_by_index(self, args, index):301new_args = args[:index] + args[index + 1 :]302removed_arg = args[index]303
304# Heuristic for grouping arguments:305# remove next argument if it doesn't start with "-"306if index < len(new_args) and not new_args[index].startswith("-"):307del new_args[index]308removed_arg += " " + args[index + 1]309
310if self.check_expected_output(args=new_args):311verbose_print("Removed", removed_arg)312return new_args, index313return args, index + 1314
315def simplify_clang_args(self):316"""Simplify clang arguments before running C-Reduce to reduce the time the317interestingness test takes to run.
318"""
319print("\nSimplifying the clang command...")320new_args = self.clang_args321
322# Remove the color diagnostics flag to make it easier to match error323# text.324new_args = self.try_remove_args(325new_args,326msg="Removed -fcolor-diagnostics",327opts_equal=["-fcolor-diagnostics"],328)329
330# Remove some clang arguments to speed up the interestingness test331new_args = self.try_remove_args(332new_args,333msg="Removed debug info options",334opts_startswith=["-gcodeview", "-debug-info-kind=", "-debugger-tuning="],335)336
337new_args = self.try_remove_args(338new_args, msg="Removed --show-includes", opts_startswith=["--show-includes"]339)340# Not suppressing warnings (-w) sometimes prevents the crash from occurring341# after preprocessing342new_args = self.try_remove_args(343new_args,344msg="Replaced -W options with -w",345extra_arg="-w",346opts_startswith=["-W"],347)348new_args = self.try_remove_args(349new_args,350msg="Replaced optimization level with -O0",351extra_arg="-O0",352opts_startswith=["-O"],353)354
355# Try to remove compilation steps356new_args = self.try_remove_args(357new_args, msg="Added -emit-llvm", extra_arg="-emit-llvm"358)359new_args = self.try_remove_args(360new_args, msg="Added -fsyntax-only", extra_arg="-fsyntax-only"361)362
363# Try to make implicit int an error for more sensible test output364new_args = self.try_remove_args(365new_args,366msg="Added -Werror=implicit-int",367opts_equal=["-w"],368extra_arg="-Werror=implicit-int",369)370
371self.clang_args = new_args372verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))373
374def reduce_clang_args(self):375"""Minimize the clang arguments after running C-Reduce, to get the smallest376command that reproduces the crash on the reduced file.
377"""
378print("\nReducing the clang crash command...")379
380new_args = self.clang_args381
382# Remove some often occurring args383new_args = self.try_remove_args(384new_args, msg="Removed -D options", opts_startswith=["-D"]385)386new_args = self.try_remove_args(387new_args, msg="Removed -D options", opts_one_arg_startswith=["-D"]388)389new_args = self.try_remove_args(390new_args, msg="Removed -I options", opts_startswith=["-I"]391)392new_args = self.try_remove_args(393new_args, msg="Removed -I options", opts_one_arg_startswith=["-I"]394)395new_args = self.try_remove_args(396new_args, msg="Removed -W options", opts_startswith=["-W"]397)398
399# Remove other cases that aren't covered by the heuristic400new_args = self.try_remove_args(401new_args, msg="Removed -mllvm", opts_one_arg_startswith=["-mllvm"]402)403
404i = 0405while i < len(new_args):406new_args, i = self.try_remove_arg_by_index(new_args, i)407
408self.clang_args = new_args409
410reduced_cmd = quote_cmd(self.get_crash_cmd())411write_to_script(reduced_cmd, self.crash_script)412print("Reduced command:", reduced_cmd)413
414def run_creduce(self):415full_creduce_cmd = (416[creduce_cmd] + self.creduce_flags + [self.testfile, self.file_to_reduce]417)418print("\nRunning C-Reduce...")419verbose_print(quote_cmd(full_creduce_cmd))420try:421p = subprocess.Popen(full_creduce_cmd)422p.communicate()423except KeyboardInterrupt:424# Hack to kill C-Reduce because it jumps into its own pgid425print("\n\nctrl-c detected, killed creduce")426p.kill()427
428
429def main():430global verbose431global creduce_cmd432global clang_cmd433
434parser = ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter)435parser.add_argument(436"crash_script",437type=str,438nargs=1,439help="Name of the script that generates the crash.",440)441parser.add_argument(442"file_to_reduce", type=str, nargs=1, help="Name of the file to be reduced."443)444parser.add_argument(445"--llvm-bin", dest="llvm_bin", type=str, help="Path to the LLVM bin directory."446)447parser.add_argument(448"--clang",449dest="clang",450type=str,451help="The path to the `clang` executable. "452"By default uses the llvm-bin directory.",453)454parser.add_argument(455"--creduce",456dest="creduce",457type=str,458help="The path to the `creduce` executable. "459"Required if `creduce` is not in PATH environment.",460)461parser.add_argument("-v", "--verbose", action="store_true")462args, creduce_flags = parser.parse_known_args()463verbose = args.verbose464llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None465creduce_cmd = check_cmd("creduce", None, args.creduce)466clang_cmd = check_cmd("clang", llvm_bin, args.clang)467
468crash_script = check_file(args.crash_script[0])469file_to_reduce = check_file(args.file_to_reduce[0])470
471if "--n" not in creduce_flags:472creduce_flags += ["--n", str(max(4, multiprocessing.cpu_count() // 2))]473
474r = Reduce(crash_script, file_to_reduce, creduce_flags)475
476r.simplify_clang_args()477r.write_interestingness_test()478r.clang_preprocess()479r.run_creduce()480r.reduce_clang_args()481
482
483if __name__ == "__main__":484main()485