llvm-project

Форк
0
/
collect_and_build_with_pgo.py 
488 строк · 15.8 Кб
1
#!/usr/bin/env python3
2
"""
3
This script:
4
- Builds clang with user-defined flags
5
- Uses that clang to build an instrumented clang, which can be used to collect
6
  PGO samples
7
- Builds a user-defined set of sources (default: clang) to act as a
8
  "benchmark" to generate a PGO profile
9
- Builds clang once more with the PGO profile generated above
10

11
This is a total of four clean builds of clang (by default). This may take a
12
while. :)
13

14
This scripts duplicates https://llvm.org/docs/AdvancedBuilds.html#multi-stage-pgo
15
Eventually, it will be updated to instead call the cmake cache mentioned there.
16
"""
17

18
import argparse
19
import collections
20
import multiprocessing
21
import os
22
import shlex
23
import shutil
24
import subprocess
25
import sys
26

27
### User configuration
28

29

30
# If you want to use a different 'benchmark' than building clang, make this
31
# function do what you want. out_dir is the build directory for clang, so all
32
# of the clang binaries will live under "${out_dir}/bin/". Using clang in
33
# ${out_dir} will magically have the profiles go to the right place.
34
#
35
# You may assume that out_dir is a freshly-built directory that you can reach
36
# in to build more things, if you'd like.
37
def _run_benchmark(env, out_dir, include_debug_info):
38
    """The 'benchmark' we run to generate profile data."""
39
    target_dir = env.output_subdir("instrumentation_run")
40

41
    # `check-llvm` and `check-clang` are cheap ways to increase coverage. The
42
    # former lets us touch on the non-x86 backends a bit if configured, and the
43
    # latter gives us more C to chew on (and will send us through diagnostic
44
    # paths a fair amount, though the `if (stuff_is_broken) { diag() ... }`
45
    # branches should still heavily be weighted in the not-taken direction,
46
    # since we built all of LLVM/etc).
47
    _build_things_in(env, out_dir, what=["check-llvm", "check-clang"])
48

49
    # Building tblgen gets us coverage; don't skip it. (out_dir may also not
50
    # have them anyway, but that's less of an issue)
51
    cmake = _get_cmake_invocation_for_bootstrap_from(env, out_dir, skip_tablegens=False)
52

53
    if include_debug_info:
54
        cmake.add_flag("CMAKE_BUILD_TYPE", "RelWithDebInfo")
55

56
    _run_fresh_cmake(env, cmake, target_dir)
57

58
    # Just build all the things. The more data we have, the better.
59
    _build_things_in(env, target_dir, what=["all"])
60

61

62
### Script
63

64

65
class CmakeInvocation:
66
    _cflags = ["CMAKE_C_FLAGS", "CMAKE_CXX_FLAGS"]
67
    _ldflags = [
68
        "CMAKE_EXE_LINKER_FLAGS",
69
        "CMAKE_MODULE_LINKER_FLAGS",
70
        "CMAKE_SHARED_LINKER_FLAGS",
71
    ]
72

73
    def __init__(self, cmake, maker, cmake_dir):
74
        self._prefix = [cmake, "-G", maker, cmake_dir]
75

76
        # Map of str -> (list|str).
77
        self._flags = {}
78
        for flag in CmakeInvocation._cflags + CmakeInvocation._ldflags:
79
            self._flags[flag] = []
80

81
    def add_new_flag(self, key, value):
82
        self.add_flag(key, value, allow_overwrites=False)
83

84
    def add_flag(self, key, value, allow_overwrites=True):
85
        if key not in self._flags:
86
            self._flags[key] = value
87
            return
88

89
        existing_value = self._flags[key]
90
        if isinstance(existing_value, list):
91
            existing_value.append(value)
92
            return
93

94
        if not allow_overwrites:
95
            raise ValueError("Invalid overwrite of %s requested" % key)
96

97
        self._flags[key] = value
98

99
    def add_cflags(self, flags):
100
        # No, I didn't intend to append ['-', 'O', '2'] to my flags, thanks :)
101
        assert not isinstance(flags, str)
102
        for f in CmakeInvocation._cflags:
103
            self._flags[f].extend(flags)
104

105
    def add_ldflags(self, flags):
106
        assert not isinstance(flags, str)
107
        for f in CmakeInvocation._ldflags:
108
            self._flags[f].extend(flags)
109

110
    def to_args(self):
111
        args = self._prefix.copy()
112
        for key, value in sorted(self._flags.items()):
113
            if isinstance(value, list):
114
                # We preload all of the list-y values (cflags, ...). If we've
115
                # nothing to add, don't.
116
                if not value:
117
                    continue
118
                value = " ".join(value)
119

120
            arg = "-D" + key
121
            if value != "":
122
                arg += "=" + value
123
            args.append(arg)
124
        return args
125

126

127
class Env:
128
    def __init__(self, llvm_dir, use_make, output_dir, default_cmake_args, dry_run):
129
        self.llvm_dir = llvm_dir
130
        self.use_make = use_make
131
        self.output_dir = output_dir
132
        self.default_cmake_args = default_cmake_args.copy()
133
        self.dry_run = dry_run
134

135
    def get_default_cmake_args_kv(self):
136
        return self.default_cmake_args.items()
137

138
    def get_cmake_maker(self):
139
        return "Ninja" if not self.use_make else "Unix Makefiles"
140

141
    def get_make_command(self):
142
        if self.use_make:
143
            return ["make", "-j{}".format(multiprocessing.cpu_count())]
144
        return ["ninja"]
145

146
    def output_subdir(self, name):
147
        return os.path.join(self.output_dir, name)
148

149
    def has_llvm_subproject(self, name):
150
        if name == "compiler-rt":
151
            subdir = "../compiler-rt"
152
        elif name == "clang":
153
            subdir = "../clang"
154
        else:
155
            raise ValueError("Unknown subproject: %s" % name)
156

157
        return os.path.isdir(os.path.join(self.llvm_dir, subdir))
158

159
    # Note that we don't allow capturing stdout/stderr. This works quite nicely
160
    # with dry_run.
161
    def run_command(self, cmd, cwd=None, check=False, silent_unless_error=False):
162
        print("Running `%s` in %s" % (cmd, shlex.quote(cwd or os.getcwd())))
163

164
        if self.dry_run:
165
            return
166

167
        if silent_unless_error:
168
            stdout, stderr = subprocess.PIPE, subprocess.STDOUT
169
        else:
170
            stdout, stderr = None, None
171

172
        # Don't use subprocess.run because it's >= py3.5 only, and it's not too
173
        # much extra effort to get what it gives us anyway.
174
        popen = subprocess.Popen(
175
            cmd, stdin=subprocess.DEVNULL, stdout=stdout, stderr=stderr, cwd=cwd
176
        )
177
        stdout, _ = popen.communicate()
178
        return_code = popen.wait(timeout=0)
179

180
        if not return_code:
181
            return
182

183
        if silent_unless_error:
184
            print(stdout.decode("utf-8", "ignore"))
185

186
        if check:
187
            raise subprocess.CalledProcessError(
188
                returncode=return_code, cmd=cmd, output=stdout, stderr=None
189
            )
190

191

192
def _get_default_cmake_invocation(env):
193
    inv = CmakeInvocation(
194
        cmake="cmake", maker=env.get_cmake_maker(), cmake_dir=env.llvm_dir
195
    )
196
    for key, value in env.get_default_cmake_args_kv():
197
        inv.add_new_flag(key, value)
198
    return inv
199

200

201
def _get_cmake_invocation_for_bootstrap_from(env, out_dir, skip_tablegens=True):
202
    clang = os.path.join(out_dir, "bin", "clang")
203
    cmake = _get_default_cmake_invocation(env)
204
    cmake.add_new_flag("CMAKE_C_COMPILER", clang)
205
    cmake.add_new_flag("CMAKE_CXX_COMPILER", clang + "++")
206

207
    # We often get no value out of building new tblgens; the previous build
208
    # should have them. It's still correct to build them, just slower.
209
    def add_tablegen(key, binary):
210
        path = os.path.join(out_dir, "bin", binary)
211

212
        # Check that this exists, since the user's allowed to specify their own
213
        # stage1 directory (which is generally where we'll source everything
214
        # from). Dry runs should hope for the best from our user, as well.
215
        if env.dry_run or os.path.exists(path):
216
            cmake.add_new_flag(key, path)
217

218
    if skip_tablegens:
219
        add_tablegen("LLVM_TABLEGEN", "llvm-tblgen")
220
        add_tablegen("CLANG_TABLEGEN", "clang-tblgen")
221

222
    return cmake
223

224

225
def _build_things_in(env, target_dir, what):
226
    cmd = env.get_make_command() + what
227
    env.run_command(cmd, cwd=target_dir, check=True)
228

229

230
def _run_fresh_cmake(env, cmake, target_dir):
231
    if not env.dry_run:
232
        try:
233
            shutil.rmtree(target_dir)
234
        except FileNotFoundError:
235
            pass
236

237
        os.makedirs(target_dir, mode=0o755)
238

239
    cmake_args = cmake.to_args()
240
    env.run_command(cmake_args, cwd=target_dir, check=True, silent_unless_error=True)
241

242

243
def _build_stage1_clang(env):
244
    target_dir = env.output_subdir("stage1")
245
    cmake = _get_default_cmake_invocation(env)
246
    _run_fresh_cmake(env, cmake, target_dir)
247
    _build_things_in(env, target_dir, what=["clang", "llvm-profdata", "profile"])
248
    return target_dir
249

250

251
def _generate_instrumented_clang_profile(env, stage1_dir, profile_dir, output_file):
252
    llvm_profdata = os.path.join(stage1_dir, "bin", "llvm-profdata")
253
    if env.dry_run:
254
        profiles = [os.path.join(profile_dir, "*.profraw")]
255
    else:
256
        profiles = [
257
            os.path.join(profile_dir, f)
258
            for f in os.listdir(profile_dir)
259
            if f.endswith(".profraw")
260
        ]
261
    cmd = [llvm_profdata, "merge", "-output=" + output_file] + profiles
262
    env.run_command(cmd, check=True)
263

264

265
def _build_instrumented_clang(env, stage1_dir):
266
    assert os.path.isabs(stage1_dir)
267

268
    target_dir = os.path.join(env.output_dir, "instrumented")
269
    cmake = _get_cmake_invocation_for_bootstrap_from(env, stage1_dir)
270
    cmake.add_new_flag("LLVM_BUILD_INSTRUMENTED", "IR")
271

272
    # libcxx's configure step messes with our link order: we'll link
273
    # libclang_rt.profile after libgcc, and the former requires atexit from the
274
    # latter. So, configure checks fail.
275
    #
276
    # Since we don't need libcxx or compiler-rt anyway, just disable them.
277
    cmake.add_new_flag("LLVM_BUILD_RUNTIME", "No")
278

279
    _run_fresh_cmake(env, cmake, target_dir)
280
    _build_things_in(env, target_dir, what=["clang", "lld"])
281

282
    profiles_dir = os.path.join(target_dir, "profiles")
283
    return target_dir, profiles_dir
284

285

286
def _build_optimized_clang(env, stage1_dir, profdata_file):
287
    if not env.dry_run and not os.path.exists(profdata_file):
288
        raise ValueError(
289
            "Looks like the profdata file at %s doesn't exist" % profdata_file
290
        )
291

292
    target_dir = os.path.join(env.output_dir, "optimized")
293
    cmake = _get_cmake_invocation_for_bootstrap_from(env, stage1_dir)
294
    cmake.add_new_flag("LLVM_PROFDATA_FILE", os.path.abspath(profdata_file))
295

296
    # We'll get complaints about hash mismatches in `main` in tools/etc. Ignore
297
    # it.
298
    cmake.add_cflags(["-Wno-backend-plugin"])
299
    _run_fresh_cmake(env, cmake, target_dir)
300
    _build_things_in(env, target_dir, what=["clang"])
301
    return target_dir
302

303

304
Args = collections.namedtuple(
305
    "Args",
306
    [
307
        "do_optimized_build",
308
        "include_debug_info",
309
        "profile_location",
310
        "stage1_dir",
311
    ],
312
)
313

314

315
def _parse_args():
316
    parser = argparse.ArgumentParser(
317
        description="Builds LLVM and Clang with instrumentation, collects "
318
        "instrumentation profiles for them, and (optionally) builds things "
319
        "with these PGO profiles. By default, it's assumed that you're "
320
        "running this from your LLVM root, and all build artifacts will be "
321
        "saved to $PWD/out."
322
    )
323
    parser.add_argument(
324
        "--cmake-extra-arg",
325
        action="append",
326
        default=[],
327
        help="an extra arg to pass to all cmake invocations. Note that this "
328
        "is interpreted as a -D argument, e.g. --cmake-extra-arg FOO=BAR will "
329
        "be passed as -DFOO=BAR. This may be specified multiple times.",
330
    )
331
    parser.add_argument(
332
        "--dry-run", action="store_true", help="print commands instead of running them"
333
    )
334
    parser.add_argument(
335
        "--llvm-dir",
336
        default=".",
337
        help="directory containing an LLVM checkout (default: $PWD)",
338
    )
339
    parser.add_argument(
340
        "--no-optimized-build",
341
        action="store_true",
342
        help="disable the final, PGO-optimized build",
343
    )
344
    parser.add_argument(
345
        "--out-dir", help="directory to write artifacts to (default: $llvm_dir/out)"
346
    )
347
    parser.add_argument(
348
        "--profile-output",
349
        help="where to output the profile (default is $out/pgo_profile.prof)",
350
    )
351
    parser.add_argument(
352
        "--stage1-dir",
353
        help="instead of having an initial build of everything, use the given "
354
        "directory. It is expected that this directory will have clang, "
355
        "llvm-profdata, and the appropriate libclang_rt.profile already built",
356
    )
357
    parser.add_argument(
358
        "--use-debug-info-in-benchmark",
359
        action="store_true",
360
        help="use a regular build instead of RelWithDebInfo in the benchmark. "
361
        "This increases benchmark execution time and disk space requirements, "
362
        "but gives more coverage over debuginfo bits in LLVM and clang.",
363
    )
364
    parser.add_argument(
365
        "--use-make",
366
        action="store_true",
367
        default=shutil.which("ninja") is None,
368
        help="use Makefiles instead of ninja",
369
    )
370

371
    args = parser.parse_args()
372

373
    llvm_dir = os.path.abspath(args.llvm_dir)
374
    if args.out_dir is None:
375
        output_dir = os.path.join(llvm_dir, "out")
376
    else:
377
        output_dir = os.path.abspath(args.out_dir)
378

379
    extra_args = {
380
        "CMAKE_BUILD_TYPE": "Release",
381
        "LLVM_ENABLE_PROJECTS": "clang;compiler-rt;lld",
382
    }
383
    for arg in args.cmake_extra_arg:
384
        if arg.startswith("-D"):
385
            arg = arg[2:]
386
        elif arg.startswith("-"):
387
            raise ValueError(
388
                "Unknown not- -D arg encountered; you may need "
389
                "to tweak the source..."
390
            )
391
        split = arg.split("=", 1)
392
        if len(split) == 1:
393
            key, val = split[0], ""
394
        else:
395
            key, val = split
396
        extra_args[key] = val
397

398
    env = Env(
399
        default_cmake_args=extra_args,
400
        dry_run=args.dry_run,
401
        llvm_dir=llvm_dir,
402
        output_dir=output_dir,
403
        use_make=args.use_make,
404
    )
405

406
    if args.profile_output is not None:
407
        profile_location = args.profile_output
408
    else:
409
        profile_location = os.path.join(env.output_dir, "pgo_profile.prof")
410

411
    result_args = Args(
412
        do_optimized_build=not args.no_optimized_build,
413
        include_debug_info=args.use_debug_info_in_benchmark,
414
        profile_location=profile_location,
415
        stage1_dir=args.stage1_dir,
416
    )
417

418
    return env, result_args
419

420

421
def _looks_like_llvm_dir(directory):
422
    """Arbitrary set of heuristics to determine if `directory` is an llvm dir.
423

424
    Errs on the side of false-positives."""
425

426
    contents = set(os.listdir(directory))
427
    expected_contents = [
428
        "CODE_OWNERS.TXT",
429
        "cmake",
430
        "docs",
431
        "include",
432
        "utils",
433
    ]
434

435
    if not all(c in contents for c in expected_contents):
436
        return False
437

438
    try:
439
        include_listing = os.listdir(os.path.join(directory, "include"))
440
    except NotADirectoryError:
441
        return False
442

443
    return "llvm" in include_listing
444

445

446
def _die(*args, **kwargs):
447
    kwargs["file"] = sys.stderr
448
    print(*args, **kwargs)
449
    sys.exit(1)
450

451

452
def _main():
453
    env, args = _parse_args()
454

455
    if not _looks_like_llvm_dir(env.llvm_dir):
456
        _die("Looks like %s isn't an LLVM directory; please see --help" % env.llvm_dir)
457
    if not env.has_llvm_subproject("clang"):
458
        _die("Need a clang checkout at tools/clang")
459
    if not env.has_llvm_subproject("compiler-rt"):
460
        _die("Need a compiler-rt checkout at projects/compiler-rt")
461

462
    def status(*args):
463
        print(*args, file=sys.stderr)
464

465
    if args.stage1_dir is None:
466
        status("*** Building stage1 clang...")
467
        stage1_out = _build_stage1_clang(env)
468
    else:
469
        stage1_out = args.stage1_dir
470

471
    status("*** Building instrumented clang...")
472
    instrumented_out, profile_dir = _build_instrumented_clang(env, stage1_out)
473
    status("*** Running profdata benchmarks...")
474
    _run_benchmark(env, instrumented_out, args.include_debug_info)
475
    status("*** Generating profile...")
476
    _generate_instrumented_clang_profile(
477
        env, stage1_out, profile_dir, args.profile_location
478
    )
479

480
    print("Final profile:", args.profile_location)
481
    if args.do_optimized_build:
482
        status("*** Building PGO-optimized binaries...")
483
        optimized_out = _build_optimized_clang(env, stage1_out, args.profile_location)
484
        print("Final build directory:", optimized_out)
485

486

487
if __name__ == "__main__":
488
    _main()
489

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.