pytorch
197 строк · 5.7 Кб
1import csv
2import os
3import re
4import sys
5
6# This script takes the logs produced by the benchmark scripts (e.g.,
7# torchbench.py) and parses it into a CSV file that summarizes what
8# is failing and why. It is kept separate from the benchmark script
9# emitting a more structured output as it is often more convenient
10# to iterate quickly on log files offline instead of having to make
11# a change to the benchmark script and then do a full sweep to see
12# the updates.
13#
14# This script is not very well written, feel free to rewrite it as necessary
15
16assert len(sys.argv) == 2
17
18full_log = open(sys.argv[1]).read()
19
20# If the log contains a gist URL, extract it so we can include it in the CSV
21gist_url = ""
22m = re.search(r"https://gist.github.com/[a-f0-9]+", full_log)
23if m is not None:
24gist_url = m.group(0)
25
26# Split the log into an entry per benchmark
27entries = re.split(
28r"(?:cuda (?:train|eval) +([^ ]+)|WARNING:root:([^ ]+) failed to load)", full_log
29)[1:]
30# Entries schema example:
31# `['hf_Bert', None, '
32# PASS\nTIMING: entire_frame_compile:1.80925 backend_compile:6e-05\nDynamo produced 1 graph(s) covering 367 ops\n']`
33
34
35def chunker(seq, size):
36return (seq[pos : pos + size] for pos in range(0, len(seq), size))
37
38
39c = 0
40i = 0
41
42out = csv.DictWriter(
43sys.stdout,
44[
45"bench",
46"name",
47"result",
48"component",
49"context",
50"explain",
51"frame_time",
52"backend_time",
53"graph_count",
54"op_count",
55"graph_breaks",
56"unique_graph_breaks",
57],
58dialect="excel",
59)
60out.writeheader()
61out.writerow({"explain": gist_url})
62
63
64# Sometimes backtraces will be in third party code, which results
65# in very long file names. Delete the absolute path in this case.
66def normalize_file(f):
67if "site-packages/" in f:
68return f.split("site-packages/", 2)[1]
69else:
70return os.path.relpath(f)
71
72
73# Assume we run torchbench, huggingface, timm_models in that order
74# (as output doesn't say which suite the benchmark is part of)
75# TODO: make this more robust
76
77bench = "torchbench"
78
79# 3 = 1 + number of matches in the entries split regex
80for name, name2, log in chunker(entries, 3):
81if name is None:
82name = name2
83if name.startswith("Albert"):
84bench = "huggingface"
85elif name.startswith("adv_inc"):
86bench = "timm_models"
87
88# Payload that will go into the csv
89r = "UNKNOWN"
90explain = ""
91component = ""
92context = ""
93
94if "PASS" in log:
95r = "PASS"
96if "TIMEOUT" in log:
97r = "FAIL TIMEOUT"
98if "Accuracy failed" in log:
99r = "FAIL ACCURACY"
100
101# Attempt to extract out useful information from the traceback
102
103log = log.split(
104"The above exception was the direct cause of the following exception"
105)[0]
106split = log.split("Traceback (most recent call last)", maxsplit=1)
107if len(split) == 2:
108log = split[1]
109log = log.split("Original traceback:")[0]
110m = re.search(
111r'File "([^"]+)", line ([0-9]+), in .+\n +(.+)\n([A-Za-z]+(?:Error|Exception|NotImplementedError): ?.*)',
112log,
113)
114
115if m is not None:
116r = "FAIL"
117component = f"{normalize_file(m.group(1))}:{m.group(2)}"
118context = m.group(3)
119explain = f"{m.group(4)}"
120else:
121m = re.search(
122r'File "([^"]+)", line ([0-9]+), in .+\n +(.+)\nAssertionError', log
123)
124if m is not None:
125r = "FAIL"
126component = f"{normalize_file(m.group(1))}:{m.group(2)}"
127context = m.group(3)
128explain = "AssertionError"
129
130# Sometimes, the benchmark will say FAIL without any useful info
131# See https://github.com/pytorch/torchdynamo/issues/1910
132if "FAIL" in log:
133r = "FAIL"
134
135if r == "UNKNOWN":
136c += 1
137
138backend_time = None
139frame_time = None
140if "TIMING:" in log:
141result = re.search("TIMING:(.*)\n", log).group(1)
142split_str = result.split("backend_compile:")
143if len(split_str) == 2:
144backend_time = float(split_str[1])
145frame_time = float(split_str[0].split("entire_frame_compile:")[1])
146
147if "STATS:" in log:
148result = re.search("STATS:(.*)\n", log).group(1)
149# call_* op count: 970 | FakeTensor.__torch_dispatch__:35285 | ProxyTorchDispatchMode.__torch_dispatch__:13339
150split_all = result.split("|")
151# TODO: rewrite this to work with arbitrarily many stats
152
153graph_count = None
154op_count = None
155graph_breaks = None
156unique_graph_breaks = None
157if m := re.search(
158r"Dynamo produced (\d+) graphs covering (\d+) ops with (\d+) graph breaks \((\d+) unique\)",
159log,
160):
161graph_count = m.group(1)
162op_count = m.group(2)
163graph_breaks = m.group(3)
164unique_graph_breaks = m.group(4)
165
166# If the context string is too long, don't put it in the CSV.
167# This is a hack to try to make it more likely that Google Sheets will
168# offer to split columns
169if len(context) > 78:
170context = ""
171
172# Temporary file names are meaningless, report it's generated code in this
173# case
174if "/tmp/" in component:
175component = "generated code"
176context = ""
177
178out.writerow(
179{
180"bench": bench,
181"name": name,
182"result": r,
183"component": component,
184"context": context,
185"explain": explain,
186"frame_time": frame_time,
187"backend_time": backend_time,
188"graph_count": graph_count,
189"op_count": op_count,
190"graph_breaks": graph_breaks,
191"unique_graph_breaks": unique_graph_breaks,
192}
193)
194i += 1
195
196if c:
197print(f"failed to classify {c} entries", file=sys.stderr)
198