llvm-project
488 строк · 15.8 Кб
1#!/usr/bin/env python3
2"""
3This script:
4- Builds clang with user-defined flags
5- Uses that clang to build an instrumented clang, which can be used to collect
6PGO samples
7- Builds a user-defined set of sources (default: clang) to act as a
8"benchmark" to generate a PGO profile
9- Builds clang once more with the PGO profile generated above
10
11This is a total of four clean builds of clang (by default). This may take a
12while. :)
13
14This scripts duplicates https://llvm.org/docs/AdvancedBuilds.html#multi-stage-pgo
15Eventually, it will be updated to instead call the cmake cache mentioned there.
16"""
17
18import argparse19import collections20import multiprocessing21import os22import shlex23import shutil24import subprocess25import sys26
27### User configuration
28
29
30# If you want to use a different 'benchmark' than building clang, make this
31# function do what you want. out_dir is the build directory for clang, so all
32# of the clang binaries will live under "${out_dir}/bin/". Using clang in
33# ${out_dir} will magically have the profiles go to the right place.
34#
35# You may assume that out_dir is a freshly-built directory that you can reach
36# in to build more things, if you'd like.
37def _run_benchmark(env, out_dir, include_debug_info):38"""The 'benchmark' we run to generate profile data."""39target_dir = env.output_subdir("instrumentation_run")40
41# `check-llvm` and `check-clang` are cheap ways to increase coverage. The42# former lets us touch on the non-x86 backends a bit if configured, and the43# latter gives us more C to chew on (and will send us through diagnostic44# paths a fair amount, though the `if (stuff_is_broken) { diag() ... }`45# branches should still heavily be weighted in the not-taken direction,46# since we built all of LLVM/etc).47_build_things_in(env, out_dir, what=["check-llvm", "check-clang"])48
49# Building tblgen gets us coverage; don't skip it. (out_dir may also not50# have them anyway, but that's less of an issue)51cmake = _get_cmake_invocation_for_bootstrap_from(env, out_dir, skip_tablegens=False)52
53if include_debug_info:54cmake.add_flag("CMAKE_BUILD_TYPE", "RelWithDebInfo")55
56_run_fresh_cmake(env, cmake, target_dir)57
58# Just build all the things. The more data we have, the better.59_build_things_in(env, target_dir, what=["all"])60
61
62### Script
63
64
65class CmakeInvocation:66_cflags = ["CMAKE_C_FLAGS", "CMAKE_CXX_FLAGS"]67_ldflags = [68"CMAKE_EXE_LINKER_FLAGS",69"CMAKE_MODULE_LINKER_FLAGS",70"CMAKE_SHARED_LINKER_FLAGS",71]72
73def __init__(self, cmake, maker, cmake_dir):74self._prefix = [cmake, "-G", maker, cmake_dir]75
76# Map of str -> (list|str).77self._flags = {}78for flag in CmakeInvocation._cflags + CmakeInvocation._ldflags:79self._flags[flag] = []80
81def add_new_flag(self, key, value):82self.add_flag(key, value, allow_overwrites=False)83
84def add_flag(self, key, value, allow_overwrites=True):85if key not in self._flags:86self._flags[key] = value87return88
89existing_value = self._flags[key]90if isinstance(existing_value, list):91existing_value.append(value)92return93
94if not allow_overwrites:95raise ValueError("Invalid overwrite of %s requested" % key)96
97self._flags[key] = value98
99def add_cflags(self, flags):100# No, I didn't intend to append ['-', 'O', '2'] to my flags, thanks :)101assert not isinstance(flags, str)102for f in CmakeInvocation._cflags:103self._flags[f].extend(flags)104
105def add_ldflags(self, flags):106assert not isinstance(flags, str)107for f in CmakeInvocation._ldflags:108self._flags[f].extend(flags)109
110def to_args(self):111args = self._prefix.copy()112for key, value in sorted(self._flags.items()):113if isinstance(value, list):114# We preload all of the list-y values (cflags, ...). If we've115# nothing to add, don't.116if not value:117continue118value = " ".join(value)119
120arg = "-D" + key121if value != "":122arg += "=" + value123args.append(arg)124return args125
126
127class Env:128def __init__(self, llvm_dir, use_make, output_dir, default_cmake_args, dry_run):129self.llvm_dir = llvm_dir130self.use_make = use_make131self.output_dir = output_dir132self.default_cmake_args = default_cmake_args.copy()133self.dry_run = dry_run134
135def get_default_cmake_args_kv(self):136return self.default_cmake_args.items()137
138def get_cmake_maker(self):139return "Ninja" if not self.use_make else "Unix Makefiles"140
141def get_make_command(self):142if self.use_make:143return ["make", "-j{}".format(multiprocessing.cpu_count())]144return ["ninja"]145
146def output_subdir(self, name):147return os.path.join(self.output_dir, name)148
149def has_llvm_subproject(self, name):150if name == "compiler-rt":151subdir = "../compiler-rt"152elif name == "clang":153subdir = "../clang"154else:155raise ValueError("Unknown subproject: %s" % name)156
157return os.path.isdir(os.path.join(self.llvm_dir, subdir))158
159# Note that we don't allow capturing stdout/stderr. This works quite nicely160# with dry_run.161def run_command(self, cmd, cwd=None, check=False, silent_unless_error=False):162print("Running `%s` in %s" % (cmd, shlex.quote(cwd or os.getcwd())))163
164if self.dry_run:165return166
167if silent_unless_error:168stdout, stderr = subprocess.PIPE, subprocess.STDOUT169else:170stdout, stderr = None, None171
172# Don't use subprocess.run because it's >= py3.5 only, and it's not too173# much extra effort to get what it gives us anyway.174popen = subprocess.Popen(175cmd, stdin=subprocess.DEVNULL, stdout=stdout, stderr=stderr, cwd=cwd176)177stdout, _ = popen.communicate()178return_code = popen.wait(timeout=0)179
180if not return_code:181return182
183if silent_unless_error:184print(stdout.decode("utf-8", "ignore"))185
186if check:187raise subprocess.CalledProcessError(188returncode=return_code, cmd=cmd, output=stdout, stderr=None189)190
191
192def _get_default_cmake_invocation(env):193inv = CmakeInvocation(194cmake="cmake", maker=env.get_cmake_maker(), cmake_dir=env.llvm_dir195)196for key, value in env.get_default_cmake_args_kv():197inv.add_new_flag(key, value)198return inv199
200
201def _get_cmake_invocation_for_bootstrap_from(env, out_dir, skip_tablegens=True):202clang = os.path.join(out_dir, "bin", "clang")203cmake = _get_default_cmake_invocation(env)204cmake.add_new_flag("CMAKE_C_COMPILER", clang)205cmake.add_new_flag("CMAKE_CXX_COMPILER", clang + "++")206
207# We often get no value out of building new tblgens; the previous build208# should have them. It's still correct to build them, just slower.209def add_tablegen(key, binary):210path = os.path.join(out_dir, "bin", binary)211
212# Check that this exists, since the user's allowed to specify their own213# stage1 directory (which is generally where we'll source everything214# from). Dry runs should hope for the best from our user, as well.215if env.dry_run or os.path.exists(path):216cmake.add_new_flag(key, path)217
218if skip_tablegens:219add_tablegen("LLVM_TABLEGEN", "llvm-tblgen")220add_tablegen("CLANG_TABLEGEN", "clang-tblgen")221
222return cmake223
224
225def _build_things_in(env, target_dir, what):226cmd = env.get_make_command() + what227env.run_command(cmd, cwd=target_dir, check=True)228
229
230def _run_fresh_cmake(env, cmake, target_dir):231if not env.dry_run:232try:233shutil.rmtree(target_dir)234except FileNotFoundError:235pass236
237os.makedirs(target_dir, mode=0o755)238
239cmake_args = cmake.to_args()240env.run_command(cmake_args, cwd=target_dir, check=True, silent_unless_error=True)241
242
243def _build_stage1_clang(env):244target_dir = env.output_subdir("stage1")245cmake = _get_default_cmake_invocation(env)246_run_fresh_cmake(env, cmake, target_dir)247_build_things_in(env, target_dir, what=["clang", "llvm-profdata", "profile"])248return target_dir249
250
251def _generate_instrumented_clang_profile(env, stage1_dir, profile_dir, output_file):252llvm_profdata = os.path.join(stage1_dir, "bin", "llvm-profdata")253if env.dry_run:254profiles = [os.path.join(profile_dir, "*.profraw")]255else:256profiles = [257os.path.join(profile_dir, f)258for f in os.listdir(profile_dir)259if f.endswith(".profraw")260]261cmd = [llvm_profdata, "merge", "-output=" + output_file] + profiles262env.run_command(cmd, check=True)263
264
265def _build_instrumented_clang(env, stage1_dir):266assert os.path.isabs(stage1_dir)267
268target_dir = os.path.join(env.output_dir, "instrumented")269cmake = _get_cmake_invocation_for_bootstrap_from(env, stage1_dir)270cmake.add_new_flag("LLVM_BUILD_INSTRUMENTED", "IR")271
272# libcxx's configure step messes with our link order: we'll link273# libclang_rt.profile after libgcc, and the former requires atexit from the274# latter. So, configure checks fail.275#276# Since we don't need libcxx or compiler-rt anyway, just disable them.277cmake.add_new_flag("LLVM_BUILD_RUNTIME", "No")278
279_run_fresh_cmake(env, cmake, target_dir)280_build_things_in(env, target_dir, what=["clang", "lld"])281
282profiles_dir = os.path.join(target_dir, "profiles")283return target_dir, profiles_dir284
285
286def _build_optimized_clang(env, stage1_dir, profdata_file):287if not env.dry_run and not os.path.exists(profdata_file):288raise ValueError(289"Looks like the profdata file at %s doesn't exist" % profdata_file290)291
292target_dir = os.path.join(env.output_dir, "optimized")293cmake = _get_cmake_invocation_for_bootstrap_from(env, stage1_dir)294cmake.add_new_flag("LLVM_PROFDATA_FILE", os.path.abspath(profdata_file))295
296# We'll get complaints about hash mismatches in `main` in tools/etc. Ignore297# it.298cmake.add_cflags(["-Wno-backend-plugin"])299_run_fresh_cmake(env, cmake, target_dir)300_build_things_in(env, target_dir, what=["clang"])301return target_dir302
303
304Args = collections.namedtuple(305"Args",306[307"do_optimized_build",308"include_debug_info",309"profile_location",310"stage1_dir",311],312)
313
314
315def _parse_args():316parser = argparse.ArgumentParser(317description="Builds LLVM and Clang with instrumentation, collects "318"instrumentation profiles for them, and (optionally) builds things "319"with these PGO profiles. By default, it's assumed that you're "320"running this from your LLVM root, and all build artifacts will be "321"saved to $PWD/out."322)323parser.add_argument(324"--cmake-extra-arg",325action="append",326default=[],327help="an extra arg to pass to all cmake invocations. Note that this "328"is interpreted as a -D argument, e.g. --cmake-extra-arg FOO=BAR will "329"be passed as -DFOO=BAR. This may be specified multiple times.",330)331parser.add_argument(332"--dry-run", action="store_true", help="print commands instead of running them"333)334parser.add_argument(335"--llvm-dir",336default=".",337help="directory containing an LLVM checkout (default: $PWD)",338)339parser.add_argument(340"--no-optimized-build",341action="store_true",342help="disable the final, PGO-optimized build",343)344parser.add_argument(345"--out-dir", help="directory to write artifacts to (default: $llvm_dir/out)"346)347parser.add_argument(348"--profile-output",349help="where to output the profile (default is $out/pgo_profile.prof)",350)351parser.add_argument(352"--stage1-dir",353help="instead of having an initial build of everything, use the given "354"directory. It is expected that this directory will have clang, "355"llvm-profdata, and the appropriate libclang_rt.profile already built",356)357parser.add_argument(358"--use-debug-info-in-benchmark",359action="store_true",360help="use a regular build instead of RelWithDebInfo in the benchmark. "361"This increases benchmark execution time and disk space requirements, "362"but gives more coverage over debuginfo bits in LLVM and clang.",363)364parser.add_argument(365"--use-make",366action="store_true",367default=shutil.which("ninja") is None,368help="use Makefiles instead of ninja",369)370
371args = parser.parse_args()372
373llvm_dir = os.path.abspath(args.llvm_dir)374if args.out_dir is None:375output_dir = os.path.join(llvm_dir, "out")376else:377output_dir = os.path.abspath(args.out_dir)378
379extra_args = {380"CMAKE_BUILD_TYPE": "Release",381"LLVM_ENABLE_PROJECTS": "clang;compiler-rt;lld",382}383for arg in args.cmake_extra_arg:384if arg.startswith("-D"):385arg = arg[2:]386elif arg.startswith("-"):387raise ValueError(388"Unknown not- -D arg encountered; you may need "389"to tweak the source..."390)391split = arg.split("=", 1)392if len(split) == 1:393key, val = split[0], ""394else:395key, val = split396extra_args[key] = val397
398env = Env(399default_cmake_args=extra_args,400dry_run=args.dry_run,401llvm_dir=llvm_dir,402output_dir=output_dir,403use_make=args.use_make,404)405
406if args.profile_output is not None:407profile_location = args.profile_output408else:409profile_location = os.path.join(env.output_dir, "pgo_profile.prof")410
411result_args = Args(412do_optimized_build=not args.no_optimized_build,413include_debug_info=args.use_debug_info_in_benchmark,414profile_location=profile_location,415stage1_dir=args.stage1_dir,416)417
418return env, result_args419
420
421def _looks_like_llvm_dir(directory):422"""Arbitrary set of heuristics to determine if `directory` is an llvm dir.423
424Errs on the side of false-positives."""
425
426contents = set(os.listdir(directory))427expected_contents = [428"CODE_OWNERS.TXT",429"cmake",430"docs",431"include",432"utils",433]434
435if not all(c in contents for c in expected_contents):436return False437
438try:439include_listing = os.listdir(os.path.join(directory, "include"))440except NotADirectoryError:441return False442
443return "llvm" in include_listing444
445
446def _die(*args, **kwargs):447kwargs["file"] = sys.stderr448print(*args, **kwargs)449sys.exit(1)450
451
452def _main():453env, args = _parse_args()454
455if not _looks_like_llvm_dir(env.llvm_dir):456_die("Looks like %s isn't an LLVM directory; please see --help" % env.llvm_dir)457if not env.has_llvm_subproject("clang"):458_die("Need a clang checkout at tools/clang")459if not env.has_llvm_subproject("compiler-rt"):460_die("Need a compiler-rt checkout at projects/compiler-rt")461
462def status(*args):463print(*args, file=sys.stderr)464
465if args.stage1_dir is None:466status("*** Building stage1 clang...")467stage1_out = _build_stage1_clang(env)468else:469stage1_out = args.stage1_dir470
471status("*** Building instrumented clang...")472instrumented_out, profile_dir = _build_instrumented_clang(env, stage1_out)473status("*** Running profdata benchmarks...")474_run_benchmark(env, instrumented_out, args.include_debug_info)475status("*** Generating profile...")476_generate_instrumented_clang_profile(477env, stage1_out, profile_dir, args.profile_location478)479
480print("Final profile:", args.profile_location)481if args.do_optimized_build:482status("*** Building PGO-optimized binaries...")483optimized_out = _build_optimized_clang(env, stage1_out, args.profile_location)484print("Final build directory:", optimized_out)485
486
487if __name__ == "__main__":488_main()489