optimum-habana
171 строка · 6.6 Кб
1# coding=utf-8
2# Copyright 2022 the HuggingFace Inc. team.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Tool to create or update a diff file between transformers and optimum examples."""
16
17import re
18import subprocess
19import tempfile
20from argparse import ArgumentParser
21from pathlib import Path
22
23from git import Repo
24
25
26DIFF_DIRECTORY = Path(__file__).parent.resolve() / "example_diff"
27
28
29def _ask_yes_or_no_question(message: str) -> str:
30if message[-1] == "?":
31message = message[:-1]
32message = f"{message} (y/n) ? "
33continue_ = True
34while continue_:
35res = input(message)
36if res not in ["y", "n"]:
37print(f"You must answer by either y (yes) or n (no), but {res} was provided.\n")
38else:
39continue_ = False
40return res
41
42
43def diff(filename1: Path, filename2: Path) -> str:
44if not filename1.exists() or not filename2.exists():
45raise FileNotFoundError(
46f"Cannot compute the diff because at least one of the files does not exist: {filename1} and/or {filename2}."
47)
48cmd_line = ["diff", str(filename1), str(filename2)]
49p = subprocess.Popen(cmd_line, stdout=subprocess.PIPE)
50outs, _ = p.communicate()
51return outs.decode("utf-8")
52
53
54def _colorize_lines(content):
55lines = content.split("\n")
56color_mapping = {
57"<": "\033[0;31m", # Red
58">": "\033[0;32m", # Green
59"-": "",
60"default": "\033[0;36m", # Blue
61}
62end_color = "\033[0;0m"
63for i, line in enumerate(lines):
64if not line:
65continue
66start_char = color_mapping.get(line[0], color_mapping["default"])
67lines[i] = "".join([start_char, line, end_color])
68return "\n".join(lines)
69
70
71def create_diff_content(raw_diff: str, keep_all_diffs: bool = False) -> str:
72matches = list(re.finditer(r"^[^><-]+", raw_diff, flags=re.MULTILINE))
73final_diff = []
74for m1, m2 in zip(matches, matches[1:] + [None]):
75start, end = m1.span()[0], m2.span()[0] if m2 is not None else None
76if end is not None and raw_diff[end - 1] == "\n":
77end = end - 1
78content = raw_diff[start:end]
79if not keep_all_diffs:
80print(_colorize_lines(content))
81keep_diff = _ask_yes_or_no_question("Keep this diff")
82if keep_diff == "n":
83continue
84final_diff.append(content)
85return "\n".join(final_diff)
86
87
88def auto_diff():
89with tempfile.TemporaryDirectory() as tmpdirname:
90# Clone the Transformers GH repo
91Repo.clone_from("https://github.com/huggingface/transformers.git", tmpdirname)
92
93# Get paths to Optimum and Transformers examples
94path_to_optimum_examples = Path(__file__).resolve().parent / "../examples/"
95optimum_example_dirs = [directory for directory in path_to_optimum_examples.iterdir() if directory.is_dir()]
96path_to_transformers_examples = Path(f"{tmpdirname}/examples/pytorch/")
97transformers_example_dirs = [
98directory for directory in path_to_transformers_examples.iterdir() if directory.is_dir()
99]
100
101# Loop over Optimum examples to compare them with their Transformers counterpart
102for directory in optimum_example_dirs:
103# Check if the example is in Transformers
104if directory.name in [folder.name for folder in transformers_example_dirs]:
105path_to_transformers = path_to_transformers_examples / directory.name
106# Loop over all the "run_*.py" scripts in the example folder
107for file in directory.iterdir():
108if file.is_file() and file.name.startswith("run_"):
109transformers_file = path_to_transformers / file.name
110if transformers_file.is_file():
111final_diff = create_diff_content(
112diff(
113transformers_file,
114file,
115),
116keep_all_diffs=True,
117)
118diff_filename = DIFF_DIRECTORY / f"{file.stem}.txt"
119with open(diff_filename, "w") as fp:
120fp.write(final_diff)
121
122
123def parse_args():
124parser = ArgumentParser(
125description="Tool to create or update a diff file between transformers and optimum examples."
126)
127parser.add_argument("--transformers", type=Path, help="The path to the transformers example")
128parser.add_argument("--optimum", type=Path, help="The path to the optimum example")
129parser.add_argument(
130"--auto",
131action="store_true",
132help="Whether to automatically write diff files or not. If true, all diffs will be accepted.",
133)
134return parser.parse_args()
135
136
137def main():
138args = parse_args()
139
140if args.auto:
141auto_diff()
142else:
143if args.transformers is None and args.optimum is None:
144raise ValueError("`--transformers` and `--optimum` must be both set if `--auto` is not set.")
145raw_diff = diff(args.transformers, args.optimum)
146print(f"Creating the diff file between {args.transformers} and {args.optimum}:\n")
147final_diff = create_diff_content(raw_diff)
148print(f"Difference between {args.transformers} and {args.optimum}:\n")
149print(_colorize_lines(final_diff))
150print("\n")
151
152default_filename = DIFF_DIRECTORY / f"{args.transformers.stem}.txt"
153filename = input(f"Would you like to save this file at {default_filename} (y/n/other path)? ")
154if filename == "y":
155filename = default_filename
156if filename != "n":
157filename = Path(filename)
158should_override = True
159if filename.exists():
160should_override = _ask_yes_or_no_question("This file already exists, do you want to overwrite it")
161should_override = should_override == "y"
162
163if should_override:
164with open(filename, "w") as fp:
165fp.write(final_diff)
166
167print(f"Content saved at: {filename}")
168
169
170if __name__ == "__main__":
171main()
172