pytorch

profiler_bench.py
114 строк · 3.4 Кб
Перенос по словам
1
import argparse
2
import sys
3
import timeit
4

5
import torch
6
from torch.utils.benchmark import Timer
7

8

9
PARALLEL_TASKS_NUM = 4
10
INTERNAL_ITER = None
11

12

13
def loop_workload(x):
14
    for i in range(INTERNAL_ITER):
15
        x = torch.mm(x, x)
16
    return x
17

18

19
def parallel_workload(x):
20
    def parallel_task(x):
21
        for i in range(int(INTERNAL_ITER / PARALLEL_TASKS_NUM)):
22
            x = torch.mm(x, x)
23
        return x
24

25
    futs = []
26
    for i in range(PARALLEL_TASKS_NUM):
27
        futs.append(torch.jit._fork(parallel_task, x))
28
    for i in range(PARALLEL_TASKS_NUM):
29
        torch.jit._wait(futs[i])
30
    return x
31

32

33
if __name__ == "__main__":
34
    torch._C._set_graph_executor_optimize(False)
35
    parser = argparse.ArgumentParser(description="Profiler benchmark")
36

37
    parser.add_argument("--with-cuda", "--with_cuda", action="store_true")
38
    parser.add_argument("--with-stack", "--with_stack", action="store_true")
39
    parser.add_argument("--use-script", "--use_script", action="store_true")
40
    parser.add_argument("--use-kineto", "--use_kineto", action="store_true")
41
    parser.add_argument(
42
        "--profiling-tensor-size", "--profiling_tensor_size", default=1, type=int
43
    )
44
    parser.add_argument("--workload", "--workload", default="loop", type=str)
45
    parser.add_argument("--internal-iter", "--internal_iter", default=256, type=int)
46
    parser.add_argument(
47
        "--timer-min-run-time", "--timer_min_run_time", default=10, type=int
48
    )
49
    parser.add_argument("--cuda-only", "--cuda_only", action="store_true")
50

51
    args = parser.parse_args()
52

53
    if args.with_cuda and not torch.cuda.is_available():
54
        print("No CUDA available")
55
        sys.exit()
56

57
    print(
58
        f"Payload: {args.workload}, {args.internal_iter} iterations; timer min. runtime = {args.timer_min_run_time}\n"
59
    )
60
    INTERNAL_ITER = args.internal_iter
61

62
    for profiling_enabled in [False, True]:
63
        print(
64
            "Profiling {}, tensor size {}x{}, use cuda: {}, use kineto: {}, with stacks: {}, use script: {}".format(
65
                "enabled" if profiling_enabled else "disabled",
66
                args.profiling_tensor_size,
67
                args.profiling_tensor_size,
68
                args.with_cuda,
69
                args.use_kineto,
70
                args.with_stack,
71
                args.use_script,
72
            )
73
        )
74

75
        input_x = torch.rand(args.profiling_tensor_size, args.profiling_tensor_size)
76

77
        if args.with_cuda:
78
            input_x = input_x.cuda()
79

80
        workload = None
81
        assert args.workload in ["loop", "parallel"]
82
        if args.workload == "loop":
83
            workload = loop_workload
84
        else:
85
            workload = parallel_workload
86

87
        if args.use_script:
88
            traced_workload = torch.jit.trace(workload, (input_x,))
89
            workload = traced_workload
90

91
        if profiling_enabled:
92

93
            def payload():
94
                x = None
95
                with torch.autograd.profiler.profile(
96
                    use_cuda=args.with_cuda,
97
                    with_stack=args.with_stack,
98
                    use_kineto=args.use_kineto,
99
                    use_cpu=not args.cuda_only,
100
                ) as prof:
101
                    x = workload(input_x)
102
                return x
103

104
        else:
105

106
            def payload():
107
                return workload(input_x)
108

109
        t = Timer(
110
            "payload()",
111
            globals={"payload": payload},
112
            timer=timeit.default_timer,
113
        ).blocked_autorange(min_run_time=args.timer_min_run_time)
114
        print(t)
115
pytorch

Использование cookies