llvm-project

Форк
0
523 строки · 18.9 Кб
1
#!/usr/bin/env python3
2

3
# type: ignore
4

5
"""
6
compare.py - versatile benchmark output compare tool
7
"""
8

9
import argparse
10
import json
11
import os
12
import sys
13
import unittest
14
from argparse import ArgumentParser
15

16
import gbench
17
from gbench import report, util
18

19

20
def check_inputs(in1, in2, flags):
21
    """
22
    Perform checking on the user provided inputs and diagnose any abnormalities
23
    """
24
    in1_kind, in1_err = util.classify_input_file(in1)
25
    in2_kind, in2_err = util.classify_input_file(in2)
26
    output_file = util.find_benchmark_flag("--benchmark_out=", flags)
27
    output_type = util.find_benchmark_flag("--benchmark_out_format=", flags)
28
    if (
29
        in1_kind == util.IT_Executable
30
        and in2_kind == util.IT_Executable
31
        and output_file
32
    ):
33
        print(
34
            (
35
                "WARNING: '--benchmark_out=%s' will be passed to both "
36
                "benchmarks causing it to be overwritten"
37
            )
38
            % output_file
39
        )
40
    if in1_kind == util.IT_JSON and in2_kind == util.IT_JSON:
41
        # When both sides are JSON the only supported flag is
42
        # --benchmark_filter=
43
        for flag in util.remove_benchmark_flags("--benchmark_filter=", flags):
44
            print(
45
                "WARNING: passing %s has no effect since both "
46
                "inputs are JSON" % flag
47
            )
48
    if output_type is not None and output_type != "json":
49
        print(
50
            (
51
                "ERROR: passing '--benchmark_out_format=%s' to 'compare.py`"
52
                " is not supported."
53
            )
54
            % output_type
55
        )
56
        sys.exit(1)
57

58

59
def create_parser():
60
    parser = ArgumentParser(
61
        description="versatile benchmark output compare tool"
62
    )
63

64
    parser.add_argument(
65
        "-a",
66
        "--display_aggregates_only",
67
        dest="display_aggregates_only",
68
        action="store_true",
69
        help="If there are repetitions, by default, we display everything - the"
70
        " actual runs, and the aggregates computed. Sometimes, it is "
71
        "desirable to only view the aggregates. E.g. when there are a lot "
72
        "of repetitions. Do note that only the display is affected. "
73
        "Internally, all the actual runs are still used, e.g. for U test.",
74
    )
75

76
    parser.add_argument(
77
        "--no-color",
78
        dest="color",
79
        default=True,
80
        action="store_false",
81
        help="Do not use colors in the terminal output",
82
    )
83

84
    parser.add_argument(
85
        "-d",
86
        "--dump_to_json",
87
        dest="dump_to_json",
88
        help="Additionally, dump benchmark comparison output to this file in JSON format.",
89
    )
90

91
    utest = parser.add_argument_group()
92
    utest.add_argument(
93
        "--no-utest",
94
        dest="utest",
95
        default=True,
96
        action="store_false",
97
        help="The tool can do a two-tailed Mann-Whitney U test with the null hypothesis that it is equally likely that a randomly selected value from one sample will be less than or greater than a randomly selected value from a second sample.\nWARNING: requires **LARGE** (no less than {}) number of repetitions to be meaningful!\nThe test is being done by default, if at least {} repetitions were done.\nThis option can disable the U Test.".format(
98
            report.UTEST_OPTIMAL_REPETITIONS, report.UTEST_MIN_REPETITIONS
99
        ),
100
    )
101
    alpha_default = 0.05
102
    utest.add_argument(
103
        "--alpha",
104
        dest="utest_alpha",
105
        default=alpha_default,
106
        type=float,
107
        help=(
108
            "significance level alpha. if the calculated p-value is below this value, then the result is said to be statistically significant and the null hypothesis is rejected.\n(default: %0.4f)"
109
        )
110
        % alpha_default,
111
    )
112

113
    subparsers = parser.add_subparsers(
114
        help="This tool has multiple modes of operation:", dest="mode"
115
    )
116

117
    parser_a = subparsers.add_parser(
118
        "benchmarks",
119
        help="The most simple use-case, compare all the output of these two benchmarks",
120
    )
121
    baseline = parser_a.add_argument_group("baseline", "The benchmark baseline")
122
    baseline.add_argument(
123
        "test_baseline",
124
        metavar="test_baseline",
125
        type=argparse.FileType("r"),
126
        nargs=1,
127
        help="A benchmark executable or JSON output file",
128
    )
129
    contender = parser_a.add_argument_group(
130
        "contender", "The benchmark that will be compared against the baseline"
131
    )
132
    contender.add_argument(
133
        "test_contender",
134
        metavar="test_contender",
135
        type=argparse.FileType("r"),
136
        nargs=1,
137
        help="A benchmark executable or JSON output file",
138
    )
139
    parser_a.add_argument(
140
        "benchmark_options",
141
        metavar="benchmark_options",
142
        nargs=argparse.REMAINDER,
143
        help="Arguments to pass when running benchmark executables",
144
    )
145

146
    parser_b = subparsers.add_parser(
147
        "filters", help="Compare filter one with the filter two of benchmark"
148
    )
149
    baseline = parser_b.add_argument_group("baseline", "The benchmark baseline")
150
    baseline.add_argument(
151
        "test",
152
        metavar="test",
153
        type=argparse.FileType("r"),
154
        nargs=1,
155
        help="A benchmark executable or JSON output file",
156
    )
157
    baseline.add_argument(
158
        "filter_baseline",
159
        metavar="filter_baseline",
160
        type=str,
161
        nargs=1,
162
        help="The first filter, that will be used as baseline",
163
    )
164
    contender = parser_b.add_argument_group(
165
        "contender", "The benchmark that will be compared against the baseline"
166
    )
167
    contender.add_argument(
168
        "filter_contender",
169
        metavar="filter_contender",
170
        type=str,
171
        nargs=1,
172
        help="The second filter, that will be compared against the baseline",
173
    )
174
    parser_b.add_argument(
175
        "benchmark_options",
176
        metavar="benchmark_options",
177
        nargs=argparse.REMAINDER,
178
        help="Arguments to pass when running benchmark executables",
179
    )
180

181
    parser_c = subparsers.add_parser(
182
        "benchmarksfiltered",
183
        help="Compare filter one of first benchmark with filter two of the second benchmark",
184
    )
185
    baseline = parser_c.add_argument_group("baseline", "The benchmark baseline")
186
    baseline.add_argument(
187
        "test_baseline",
188
        metavar="test_baseline",
189
        type=argparse.FileType("r"),
190
        nargs=1,
191
        help="A benchmark executable or JSON output file",
192
    )
193
    baseline.add_argument(
194
        "filter_baseline",
195
        metavar="filter_baseline",
196
        type=str,
197
        nargs=1,
198
        help="The first filter, that will be used as baseline",
199
    )
200
    contender = parser_c.add_argument_group(
201
        "contender", "The benchmark that will be compared against the baseline"
202
    )
203
    contender.add_argument(
204
        "test_contender",
205
        metavar="test_contender",
206
        type=argparse.FileType("r"),
207
        nargs=1,
208
        help="The second benchmark executable or JSON output file, that will be compared against the baseline",
209
    )
210
    contender.add_argument(
211
        "filter_contender",
212
        metavar="filter_contender",
213
        type=str,
214
        nargs=1,
215
        help="The second filter, that will be compared against the baseline",
216
    )
217
    parser_c.add_argument(
218
        "benchmark_options",
219
        metavar="benchmark_options",
220
        nargs=argparse.REMAINDER,
221
        help="Arguments to pass when running benchmark executables",
222
    )
223

224
    return parser
225

226

227
def main():
228
    # Parse the command line flags
229
    parser = create_parser()
230
    args, unknown_args = parser.parse_known_args()
231
    if args.mode is None:
232
        parser.print_help()
233
        exit(1)
234
    assert not unknown_args
235
    benchmark_options = args.benchmark_options
236

237
    if args.mode == "benchmarks":
238
        test_baseline = args.test_baseline[0].name
239
        test_contender = args.test_contender[0].name
240
        filter_baseline = ""
241
        filter_contender = ""
242

243
        # NOTE: if test_baseline == test_contender, you are analyzing the stdev
244

245
        description = "Comparing %s to %s" % (test_baseline, test_contender)
246
    elif args.mode == "filters":
247
        test_baseline = args.test[0].name
248
        test_contender = args.test[0].name
249
        filter_baseline = args.filter_baseline[0]
250
        filter_contender = args.filter_contender[0]
251

252
        # NOTE: if filter_baseline == filter_contender, you are analyzing the
253
        # stdev
254

255
        description = "Comparing %s to %s (from %s)" % (
256
            filter_baseline,
257
            filter_contender,
258
            args.test[0].name,
259
        )
260
    elif args.mode == "benchmarksfiltered":
261
        test_baseline = args.test_baseline[0].name
262
        test_contender = args.test_contender[0].name
263
        filter_baseline = args.filter_baseline[0]
264
        filter_contender = args.filter_contender[0]
265

266
        # NOTE: if test_baseline == test_contender and
267
        # filter_baseline == filter_contender, you are analyzing the stdev
268

269
        description = "Comparing %s (from %s) to %s (from %s)" % (
270
            filter_baseline,
271
            test_baseline,
272
            filter_contender,
273
            test_contender,
274
        )
275
    else:
276
        # should never happen
277
        print("Unrecognized mode of operation: '%s'" % args.mode)
278
        parser.print_help()
279
        exit(1)
280

281
    check_inputs(test_baseline, test_contender, benchmark_options)
282

283
    if args.display_aggregates_only:
284
        benchmark_options += ["--benchmark_display_aggregates_only=true"]
285

286
    options_baseline = []
287
    options_contender = []
288

289
    if filter_baseline and filter_contender:
290
        options_baseline = ["--benchmark_filter=%s" % filter_baseline]
291
        options_contender = ["--benchmark_filter=%s" % filter_contender]
292

293
    # Run the benchmarks and report the results
294
    json1 = json1_orig = gbench.util.sort_benchmark_results(
295
        gbench.util.run_or_load_benchmark(
296
            test_baseline, benchmark_options + options_baseline
297
        )
298
    )
299
    json2 = json2_orig = gbench.util.sort_benchmark_results(
300
        gbench.util.run_or_load_benchmark(
301
            test_contender, benchmark_options + options_contender
302
        )
303
    )
304

305
    # Now, filter the benchmarks so that the difference report can work
306
    if filter_baseline and filter_contender:
307
        replacement = "[%s vs. %s]" % (filter_baseline, filter_contender)
308
        json1 = gbench.report.filter_benchmark(
309
            json1_orig, filter_baseline, replacement
310
        )
311
        json2 = gbench.report.filter_benchmark(
312
            json2_orig, filter_contender, replacement
313
        )
314

315
    diff_report = gbench.report.get_difference_report(json1, json2, args.utest)
316
    output_lines = gbench.report.print_difference_report(
317
        diff_report,
318
        args.display_aggregates_only,
319
        args.utest,
320
        args.utest_alpha,
321
        args.color,
322
    )
323
    print(description)
324
    for ln in output_lines:
325
        print(ln)
326

327
    # Optionally, diff and output to JSON
328
    if args.dump_to_json is not None:
329
        with open(args.dump_to_json, "w") as f_json:
330
            json.dump(diff_report, f_json, indent=1)
331

332

333
class TestParser(unittest.TestCase):
334
    def setUp(self):
335
        self.parser = create_parser()
336
        testInputs = os.path.join(
337
            os.path.dirname(os.path.realpath(__file__)), "gbench", "Inputs"
338
        )
339
        self.testInput0 = os.path.join(testInputs, "test1_run1.json")
340
        self.testInput1 = os.path.join(testInputs, "test1_run2.json")
341

342
    def test_benchmarks_basic(self):
343
        parsed = self.parser.parse_args(
344
            ["benchmarks", self.testInput0, self.testInput1]
345
        )
346
        self.assertFalse(parsed.display_aggregates_only)
347
        self.assertTrue(parsed.utest)
348
        self.assertEqual(parsed.mode, "benchmarks")
349
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
350
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
351
        self.assertFalse(parsed.benchmark_options)
352

353
    def test_benchmarks_basic_without_utest(self):
354
        parsed = self.parser.parse_args(
355
            ["--no-utest", "benchmarks", self.testInput0, self.testInput1]
356
        )
357
        self.assertFalse(parsed.display_aggregates_only)
358
        self.assertFalse(parsed.utest)
359
        self.assertEqual(parsed.utest_alpha, 0.05)
360
        self.assertEqual(parsed.mode, "benchmarks")
361
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
362
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
363
        self.assertFalse(parsed.benchmark_options)
364

365
    def test_benchmarks_basic_display_aggregates_only(self):
366
        parsed = self.parser.parse_args(
367
            ["-a", "benchmarks", self.testInput0, self.testInput1]
368
        )
369
        self.assertTrue(parsed.display_aggregates_only)
370
        self.assertTrue(parsed.utest)
371
        self.assertEqual(parsed.mode, "benchmarks")
372
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
373
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
374
        self.assertFalse(parsed.benchmark_options)
375

376
    def test_benchmarks_basic_with_utest_alpha(self):
377
        parsed = self.parser.parse_args(
378
            ["--alpha=0.314", "benchmarks", self.testInput0, self.testInput1]
379
        )
380
        self.assertFalse(parsed.display_aggregates_only)
381
        self.assertTrue(parsed.utest)
382
        self.assertEqual(parsed.utest_alpha, 0.314)
383
        self.assertEqual(parsed.mode, "benchmarks")
384
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
385
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
386
        self.assertFalse(parsed.benchmark_options)
387

388
    def test_benchmarks_basic_without_utest_with_utest_alpha(self):
389
        parsed = self.parser.parse_args(
390
            [
391
                "--no-utest",
392
                "--alpha=0.314",
393
                "benchmarks",
394
                self.testInput0,
395
                self.testInput1,
396
            ]
397
        )
398
        self.assertFalse(parsed.display_aggregates_only)
399
        self.assertFalse(parsed.utest)
400
        self.assertEqual(parsed.utest_alpha, 0.314)
401
        self.assertEqual(parsed.mode, "benchmarks")
402
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
403
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
404
        self.assertFalse(parsed.benchmark_options)
405

406
    def test_benchmarks_with_remainder(self):
407
        parsed = self.parser.parse_args(
408
            ["benchmarks", self.testInput0, self.testInput1, "d"]
409
        )
410
        self.assertFalse(parsed.display_aggregates_only)
411
        self.assertTrue(parsed.utest)
412
        self.assertEqual(parsed.mode, "benchmarks")
413
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
414
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
415
        self.assertEqual(parsed.benchmark_options, ["d"])
416

417
    def test_benchmarks_with_remainder_after_doubleminus(self):
418
        parsed = self.parser.parse_args(
419
            ["benchmarks", self.testInput0, self.testInput1, "--", "e"]
420
        )
421
        self.assertFalse(parsed.display_aggregates_only)
422
        self.assertTrue(parsed.utest)
423
        self.assertEqual(parsed.mode, "benchmarks")
424
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
425
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
426
        self.assertEqual(parsed.benchmark_options, ["e"])
427

428
    def test_filters_basic(self):
429
        parsed = self.parser.parse_args(["filters", self.testInput0, "c", "d"])
430
        self.assertFalse(parsed.display_aggregates_only)
431
        self.assertTrue(parsed.utest)
432
        self.assertEqual(parsed.mode, "filters")
433
        self.assertEqual(parsed.test[0].name, self.testInput0)
434
        self.assertEqual(parsed.filter_baseline[0], "c")
435
        self.assertEqual(parsed.filter_contender[0], "d")
436
        self.assertFalse(parsed.benchmark_options)
437

438
    def test_filters_with_remainder(self):
439
        parsed = self.parser.parse_args(
440
            ["filters", self.testInput0, "c", "d", "e"]
441
        )
442
        self.assertFalse(parsed.display_aggregates_only)
443
        self.assertTrue(parsed.utest)
444
        self.assertEqual(parsed.mode, "filters")
445
        self.assertEqual(parsed.test[0].name, self.testInput0)
446
        self.assertEqual(parsed.filter_baseline[0], "c")
447
        self.assertEqual(parsed.filter_contender[0], "d")
448
        self.assertEqual(parsed.benchmark_options, ["e"])
449

450
    def test_filters_with_remainder_after_doubleminus(self):
451
        parsed = self.parser.parse_args(
452
            ["filters", self.testInput0, "c", "d", "--", "f"]
453
        )
454
        self.assertFalse(parsed.display_aggregates_only)
455
        self.assertTrue(parsed.utest)
456
        self.assertEqual(parsed.mode, "filters")
457
        self.assertEqual(parsed.test[0].name, self.testInput0)
458
        self.assertEqual(parsed.filter_baseline[0], "c")
459
        self.assertEqual(parsed.filter_contender[0], "d")
460
        self.assertEqual(parsed.benchmark_options, ["f"])
461

462
    def test_benchmarksfiltered_basic(self):
463
        parsed = self.parser.parse_args(
464
            ["benchmarksfiltered", self.testInput0, "c", self.testInput1, "e"]
465
        )
466
        self.assertFalse(parsed.display_aggregates_only)
467
        self.assertTrue(parsed.utest)
468
        self.assertEqual(parsed.mode, "benchmarksfiltered")
469
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
470
        self.assertEqual(parsed.filter_baseline[0], "c")
471
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
472
        self.assertEqual(parsed.filter_contender[0], "e")
473
        self.assertFalse(parsed.benchmark_options)
474

475
    def test_benchmarksfiltered_with_remainder(self):
476
        parsed = self.parser.parse_args(
477
            [
478
                "benchmarksfiltered",
479
                self.testInput0,
480
                "c",
481
                self.testInput1,
482
                "e",
483
                "f",
484
            ]
485
        )
486
        self.assertFalse(parsed.display_aggregates_only)
487
        self.assertTrue(parsed.utest)
488
        self.assertEqual(parsed.mode, "benchmarksfiltered")
489
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
490
        self.assertEqual(parsed.filter_baseline[0], "c")
491
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
492
        self.assertEqual(parsed.filter_contender[0], "e")
493
        self.assertEqual(parsed.benchmark_options[0], "f")
494

495
    def test_benchmarksfiltered_with_remainder_after_doubleminus(self):
496
        parsed = self.parser.parse_args(
497
            [
498
                "benchmarksfiltered",
499
                self.testInput0,
500
                "c",
501
                self.testInput1,
502
                "e",
503
                "--",
504
                "g",
505
            ]
506
        )
507
        self.assertFalse(parsed.display_aggregates_only)
508
        self.assertTrue(parsed.utest)
509
        self.assertEqual(parsed.mode, "benchmarksfiltered")
510
        self.assertEqual(parsed.test_baseline[0].name, self.testInput0)
511
        self.assertEqual(parsed.filter_baseline[0], "c")
512
        self.assertEqual(parsed.test_contender[0].name, self.testInput1)
513
        self.assertEqual(parsed.filter_contender[0], "e")
514
        self.assertEqual(parsed.benchmark_options[0], "g")
515

516

517
if __name__ == "__main__":
518
    # unittest.main()
519
    main()
520

521
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
522
# kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
523
# kate: indent-mode python; remove-trailing-spaces modified;
524

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.