optimum-habana

Форк
0
/
create_diff_file_for_example.py 
171 строка · 6.6 Кб
1
# coding=utf-8
2
# Copyright 2022 the HuggingFace Inc. team.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
"""Tool to create or update a diff file between transformers and optimum examples."""
16

17
import re
18
import subprocess
19
import tempfile
20
from argparse import ArgumentParser
21
from pathlib import Path
22

23
from git import Repo
24

25

26
DIFF_DIRECTORY = Path(__file__).parent.resolve() / "example_diff"
27

28

29
def _ask_yes_or_no_question(message: str) -> str:
30
    if message[-1] == "?":
31
        message = message[:-1]
32
    message = f"{message} (y/n) ? "
33
    continue_ = True
34
    while continue_:
35
        res = input(message)
36
        if res not in ["y", "n"]:
37
            print(f"You must answer by either y (yes) or n (no), but {res} was provided.\n")
38
        else:
39
            continue_ = False
40
    return res
41

42

43
def diff(filename1: Path, filename2: Path) -> str:
44
    if not filename1.exists() or not filename2.exists():
45
        raise FileNotFoundError(
46
            f"Cannot compute the diff because at least one of the files does not exist: {filename1} and/or {filename2}."
47
        )
48
    cmd_line = ["diff", str(filename1), str(filename2)]
49
    p = subprocess.Popen(cmd_line, stdout=subprocess.PIPE)
50
    outs, _ = p.communicate()
51
    return outs.decode("utf-8")
52

53

54
def _colorize_lines(content):
55
    lines = content.split("\n")
56
    color_mapping = {
57
        "<": "\033[0;31m",  # Red
58
        ">": "\033[0;32m",  # Green
59
        "-": "",
60
        "default": "\033[0;36m",  # Blue
61
    }
62
    end_color = "\033[0;0m"
63
    for i, line in enumerate(lines):
64
        if not line:
65
            continue
66
        start_char = color_mapping.get(line[0], color_mapping["default"])
67
        lines[i] = "".join([start_char, line, end_color])
68
    return "\n".join(lines)
69

70

71
def create_diff_content(raw_diff: str, keep_all_diffs: bool = False) -> str:
72
    matches = list(re.finditer(r"^[^><-]+", raw_diff, flags=re.MULTILINE))
73
    final_diff = []
74
    for m1, m2 in zip(matches, matches[1:] + [None]):
75
        start, end = m1.span()[0], m2.span()[0] if m2 is not None else None
76
        if end is not None and raw_diff[end - 1] == "\n":
77
            end = end - 1
78
        content = raw_diff[start:end]
79
        if not keep_all_diffs:
80
            print(_colorize_lines(content))
81
            keep_diff = _ask_yes_or_no_question("Keep this diff")
82
            if keep_diff == "n":
83
                continue
84
        final_diff.append(content)
85
    return "\n".join(final_diff)
86

87

88
def auto_diff():
89
    with tempfile.TemporaryDirectory() as tmpdirname:
90
        # Clone the Transformers GH repo
91
        Repo.clone_from("https://github.com/huggingface/transformers.git", tmpdirname)
92

93
        # Get paths to Optimum and Transformers examples
94
        path_to_optimum_examples = Path(__file__).resolve().parent / "../examples/"
95
        optimum_example_dirs = [directory for directory in path_to_optimum_examples.iterdir() if directory.is_dir()]
96
        path_to_transformers_examples = Path(f"{tmpdirname}/examples/pytorch/")
97
        transformers_example_dirs = [
98
            directory for directory in path_to_transformers_examples.iterdir() if directory.is_dir()
99
        ]
100

101
        # Loop over Optimum examples to compare them with their Transformers counterpart
102
        for directory in optimum_example_dirs:
103
            # Check if the example is in Transformers
104
            if directory.name in [folder.name for folder in transformers_example_dirs]:
105
                path_to_transformers = path_to_transformers_examples / directory.name
106
                # Loop over all the "run_*.py" scripts in the example folder
107
                for file in directory.iterdir():
108
                    if file.is_file() and file.name.startswith("run_"):
109
                        transformers_file = path_to_transformers / file.name
110
                        if transformers_file.is_file():
111
                            final_diff = create_diff_content(
112
                                diff(
113
                                    transformers_file,
114
                                    file,
115
                                ),
116
                                keep_all_diffs=True,
117
                            )
118
                            diff_filename = DIFF_DIRECTORY / f"{file.stem}.txt"
119
                            with open(diff_filename, "w") as fp:
120
                                fp.write(final_diff)
121

122

123
def parse_args():
124
    parser = ArgumentParser(
125
        description="Tool to create or update a diff file between transformers and optimum examples."
126
    )
127
    parser.add_argument("--transformers", type=Path, help="The path to the transformers example")
128
    parser.add_argument("--optimum", type=Path, help="The path to the optimum example")
129
    parser.add_argument(
130
        "--auto",
131
        action="store_true",
132
        help="Whether to automatically write diff files or not. If true, all diffs will be accepted.",
133
    )
134
    return parser.parse_args()
135

136

137
def main():
138
    args = parse_args()
139

140
    if args.auto:
141
        auto_diff()
142
    else:
143
        if args.transformers is None and args.optimum is None:
144
            raise ValueError("`--transformers` and `--optimum` must be both set if `--auto` is not set.")
145
        raw_diff = diff(args.transformers, args.optimum)
146
        print(f"Creating the diff file between {args.transformers} and {args.optimum}:\n")
147
        final_diff = create_diff_content(raw_diff)
148
        print(f"Difference between {args.transformers} and {args.optimum}:\n")
149
        print(_colorize_lines(final_diff))
150
        print("\n")
151

152
        default_filename = DIFF_DIRECTORY / f"{args.transformers.stem}.txt"
153
        filename = input(f"Would you like to save this file at {default_filename} (y/n/other path)? ")
154
        if filename == "y":
155
            filename = default_filename
156
        if filename != "n":
157
            filename = Path(filename)
158
            should_override = True
159
            if filename.exists():
160
                should_override = _ask_yes_or_no_question("This file already exists, do you want to overwrite it")
161
                should_override = should_override == "y"
162

163
            if should_override:
164
                with open(filename, "w") as fp:
165
                    fp.write(final_diff)
166

167
                print(f"Content saved at: {filename}")
168

169

170
if __name__ == "__main__":
171
    main()
172

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.