pytorch

Форк
0
/
upload_dynamo_perf_stats.py 
118 строк · 3.7 Кб
1
import argparse
2
import csv
3
import os
4
import re
5
from pathlib import Path
6
from tempfile import TemporaryDirectory
7
from typing import Any, Dict, List
8

9
from tools.stats.upload_stats_lib import download_s3_artifacts, unzip, upload_to_rockset
10

11

12
ARTIFACTS = [
13
    "test-reports",
14
]
15
ARTIFACT_REGEX = re.compile(
16
    r"test-reports-test-(?P<name>\w+)-\d+-\d+-(?P<runner>[\w\.]+)_(?P<job>\d+).zip"
17
)
18

19

20
def upload_dynamo_perf_stats_to_rockset(
21
    repo: str,
22
    workflow_run_id: int,
23
    workflow_run_attempt: int,
24
    head_branch: str,
25
) -> List[Dict[str, Any]]:
26
    perf_stats = []
27
    with TemporaryDirectory() as temp_dir:
28
        print("Using temporary directory:", temp_dir)
29
        os.chdir(temp_dir)
30

31
        for artifact in ARTIFACTS:
32
            artifact_paths = download_s3_artifacts(
33
                artifact, workflow_run_id, workflow_run_attempt
34
            )
35

36
            # Unzip to get perf stats csv files
37
            for path in artifact_paths:
38
                m = ARTIFACT_REGEX.match(str(path))
39
                if not m:
40
                    print(f"Test report {path} has an invalid name. Skipping")
41
                    continue
42

43
                test_name = m.group("name")
44
                runner = m.group("runner")
45
                job_id = m.group("job")
46

47
                # Extract all files
48
                unzip(path)
49

50
                for csv_file in Path(".").glob("**/*.csv"):
51
                    filename = os.path.splitext(os.path.basename(csv_file))[0]
52
                    print(f"Processing {filename} from {path}")
53

54
                    with open(csv_file) as csvfile:
55
                        reader = csv.DictReader(csvfile, delimiter=",")
56

57
                        for row in reader:
58
                            # If the row doesn't have a dev and a name column, it's not
59
                            # a torch dynamo perf stats csv file
60
                            if "dev" not in row or "name" not in row:
61
                                break
62

63
                            row.update(
64
                                {
65
                                    "workflow_id": workflow_run_id,  # type: ignore[dict-item]
66
                                    "run_attempt": workflow_run_attempt,  # type: ignore[dict-item]
67
                                    "test_name": test_name,
68
                                    "runner": runner,
69
                                    "job_id": job_id,
70
                                    "filename": filename,
71
                                    "head_branch": head_branch,
72
                                }
73
                            )
74
                            perf_stats.append(row)
75

76
                    # Done processing the file, removing it
77
                    os.remove(csv_file)
78

79
    return perf_stats
80

81

82
if __name__ == "__main__":
83
    parser = argparse.ArgumentParser(
84
        description="Upload dynamo perf stats from S3 to Rockset"
85
    )
86
    parser.add_argument(
87
        "--workflow-run-id",
88
        type=int,
89
        required=True,
90
        help="id of the workflow to get perf stats from",
91
    )
92
    parser.add_argument(
93
        "--workflow-run-attempt",
94
        type=int,
95
        required=True,
96
        help="which retry of the workflow this is",
97
    )
98
    parser.add_argument(
99
        "--repo",
100
        type=str,
101
        required=True,
102
        help="which GitHub repo this workflow run belongs to",
103
    )
104
    parser.add_argument(
105
        "--head-branch",
106
        type=str,
107
        required=True,
108
        help="Head branch of the workflow",
109
    )
110
    args = parser.parse_args()
111
    perf_stats = upload_dynamo_perf_stats_to_rockset(
112
        args.repo, args.workflow_run_id, args.workflow_run_attempt, args.head_branch
113
    )
114
    upload_to_rockset(
115
        collection="torch_dynamo_perf_stats",
116
        docs=perf_stats,
117
        workspace="inductor",
118
    )
119

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.