DeepSpeed
Зеркало из https://github.com/microsoft/DeepSpeed
1#!/usr/bin/env python3
2# Copyright (c) Microsoft Corporation.
3# SPDX-License-Identifier: Apache-2.0
4
5# DeepSpeed Team
6"""
7USAGE:
8$ python3 script/replace_copyright.py --repo_dir ./
9"""
10
11import os12import argparse13
14NEW_COPYRIGHT = ("Copyright (c) Microsoft Corporation.", "SPDX-License-Identifier: Apache-2.0", "", "DeepSpeed Team")15
16PY_SL_COMMENT = "#"17PY_ML_SINGLE = "'''"18PY_ML_DOUBLE = '"""'19PY_COMMENTS = (PY_SL_COMMENT, PY_ML_SINGLE, PY_ML_DOUBLE)20
21C_SL_COMMENT = "//"22C_ML_OPEN = "/*"23C_ML_CLOSE = "*/"24C_COMMENTS = (C_SL_COMMENT, C_ML_OPEN, C_ML_CLOSE)25
26BASH_SL_COMMENT = "#"27BASH_COMMENTS = (BASH_SL_COMMENT, )28
29DELIM = "|/-\|/-\|BARRIER|/-\|/-\|" # noqa: W60530
31
32def parser_args():33parser = argparse.ArgumentParser()34parser.add_argument("--repo_dir", type=str, help="Repository directory")35parser.add_argument("--python_style_ext",36type=str,37nargs="+",38default=[".py"],39help="File types to process with python-style comments")40parser.add_argument("--bash_style_ext",41type=str,42nargs="+",43default=[".sh"],44help="File types to process with bash-style comments")45parser.add_argument("--c_style_ext",46type=str,47nargs="+",48default=[49".c",50".cpp",51".cu",52".h",53".hpp",54".cuh",55".cc",56".hip",57".tr",58],59help="File types to process with C-style comments")60args = parser.parse_args()61return args62
63
64# These get_header_* functions are ugly, but they work :)
65def get_header_py(fp):66with open(fp, "r") as f:67lines = iter(l for l in f.readlines())68
69header = []70rest = []71in_multiline = False72multiline_type = None73
74while (l := next(lines, None)) is not None:75l = l.strip()76if l.startswith(PY_ML_SINGLE) or l.startswith(PY_ML_DOUBLE):77# Detected multiline comment78if in_multiline and multiline_type == l[:3]:79# Ended a multiline comment80in_multiline = False81else:82# Started a multiline comment83in_multiline = True84multiline_type = l[:3]85if l.endswith(multiline_type) and len(l) >= 6:86# Opened and closed multiline comment on single line87in_multiline = False88elif in_multiline and l.endswith(multiline_type):89# Ended a multiline comment90in_multiline = False91elif not (in_multiline or l.startswith(PY_SL_COMMENT) or l == ""):92# Not in a comment93rest += [l + "\n"]94break95header.append(l)96
97rest += list(lines)98
99return header, rest100
101
102def get_header_c(fp):103with open(fp, "r") as f:104lines = iter(l for l in f.readlines())105
106header = []107rest = []108in_multiline = False109
110while (l := next(lines, None)) is not None:111l = l.strip()112if l.startswith(C_ML_OPEN):113# Detected multiline comment114if not l.endswith(C_ML_CLOSE):115# multiline comment not closed on same line116in_multiline = True117elif l.endswith(C_ML_CLOSE):118# Ended a multiline comment119in_multiline = False120elif not in_multiline or l.startswith(C_SL_COMMENT) or l.isspace():121# Not in a comment122rest += [l + "\n"]123break124header.append(l)125
126rest += list(lines)127
128return header, rest129
130
131def get_header_bash(fp):132with open(fp, "r") as f:133lines = iter(l for l in f.readlines())134
135header = []136rest = []137
138while (l := next(lines, None)) is not None:139l = l.strip()140if not l.startswith(BASH_SL_COMMENT) or l.isspace():141# Not in a comment142rest += [l + "\n"]143break144header.append(l)145
146rest += list(lines)147
148return header, rest149
150
151def remove_comments(line, comment_strs):152for cstr in comment_strs:153line = line.replace(cstr, "")154return line155
156
157def format_multiline_comment(text, comment_type):158if comment_type == PY_COMMENTS:159text = f"\n{comment_type[2]}\n" + "\n".join(text) + f"{comment_type[2]}"160if comment_type == C_COMMENTS:161text = f"\n{comment_type[1]}\n" + "\n".join(text) + f"{comment_type[2]}"162if comment_type == BASH_COMMENTS:163text = "\n".join([f"{comment_type[0]}{l}" for l in text])164return text165
166
167def modify_file_header(fp, file_header, rest_of_file, preserve_text_store, comment_type):168header_text = "\n".join(file_header)169if not (header_text.strip() == "" or header_text in preserve_text_store):170# Unique header, need to get user input171print("\n", DELIM, "\n")172for idx, line in enumerate(file_header):173print(f"{idx}: {line}")174print("\n", DELIM, "\n")175print("\nIndicate the FIRST line of the Header to KEEP")176print("(shebang #! lines will be automatically processed and should not be included).")177keep_idx = input("Enter number (or leave blank if no lines should be preserved): ")178preserve_text_store[header_text] = file_header[int(keep_idx):] if keep_idx != "" else ""179
180# Identify any shebang lines in the file181shebang = "\n".join([l for l in file_header if l.startswith("#!")])182if shebang != "":183shebang += "\n"184
185# Get the text we should preserve in this file and process to remove comment characters186text_to_preserve = preserve_text_store.get(header_text, [""])187text_to_preserve = [remove_comments(l, comment_type) for l in text_to_preserve]188
189# Format the text we want to keep into a new multiline comment190if "".join(text_to_preserve) == "":191text_to_preserve = ""192else:193text_to_preserve = format_multiline_comment(text_to_preserve, comment_type)194
195# Generate the copyright text we will be adding196copyright_text = "\n".join([f"{comment_type[0]} {l}" if l != "" else l for l in NEW_COPYRIGHT])197
198# Assemble the new header199new_header = shebang + copyright_text + text_to_preserve200
201# Write out the new file202new_file_contents = new_header + "\n" + "".join(rest_of_file)203with open(fp, "w") as f:204f.write(new_file_contents)205
206return preserve_text_store # Return so we can reuse for future files207
208
209def main(args):210preserve_text_store = {} # Used to track header comments we should preserve211for root, dirs, fnames in os.walk(args.repo_dir):212# Walk across directory looking for all files with extensions we want to modify213for ext in args.python_style_ext:214fpaths = [os.path.join(root, fn) for fn in fnames if fn.endswith(ext)]215for fp in fpaths:216file_header, rest_of_file = get_header_py(fp)217preserve_text_store = modify_file_header(fp, file_header, rest_of_file, preserve_text_store,218PY_COMMENTS)219for ext in args.c_style_ext:220fpaths = [os.path.join(root, fn) for fn in fnames if fn.endswith(ext)]221for fp in fpaths:222file_header, rest_of_file = get_header_c(fp)223preserve_text_store = modify_file_header(fp, file_header, rest_of_file, preserve_text_store,224C_COMMENTS)225for ext in args.bash_style_ext:226fpaths = [os.path.join(root, fn) for fn in fnames if fn.endswith(ext)]227for fp in fpaths:228file_header, rest_of_file = get_header_bash(fp)229preserve_text_store = modify_file_header(fp, file_header, rest_of_file, preserve_text_store,230BASH_COMMENTS)231
232
233if __name__ == "__main__":234args = parser_args()235main(args)236