pytorch

Форк
0
/
generate_binary_build_matrix.py 
371 строка · 13.7 Кб
1
#!/usr/bin/env python3
2

3
"""Generates a matrix to be utilized through github actions
4

5
Will output a condensed version of the matrix if on a pull request that only
6
includes the latest version of python we support built on three different
7
architectures:
8
    * CPU
9
    * Latest CUDA
10
    * Latest ROCM
11
"""
12

13
import os
14
from typing import Dict, List, Optional, Tuple
15

16
CUDA_ARCHES = ["11.8", "12.1"]
17

18

19
CUDA_ARCHES_FULL_VERSION = {"11.8": "11.8.0", "12.1": "12.1.1"}
20

21

22
CUDA_ARCHES_CUDNN_VERSION = {"11.8": "8", "12.1": "8"}
23

24

25
ROCM_ARCHES = ["5.7", "6.0"]
26

27

28
CPU_CXX11_ABI_ARCH = ["cpu-cxx11-abi"]
29

30

31
CPU_AARCH64_ARCH = ["cpu-aarch64"]
32

33

34
PYTORCH_EXTRA_INSTALL_REQUIREMENTS = {
35
    "11.8": (
36
        "nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | "  # noqa: B950
37
        "nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | "
38
        "nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | "
39
        "nvidia-cudnn-cu11==8.7.0.84; platform_system == 'Linux' and platform_machine == 'x86_64' | "
40
        "nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | "
41
        "nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | "
42
        "nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
43
        "nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | "
44
        "nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | "
45
        "nvidia-nccl-cu11==2.19.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
46
        "nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'"
47
    ),
48
    "12.1": (
49
        "nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "  # noqa: B950
50
        "nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
51
        "nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | "
52
        "nvidia-cudnn-cu12==8.9.2.26; platform_system == 'Linux' and platform_machine == 'x86_64' | "
53
        "nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | "
54
        "nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | "
55
        "nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
56
        "nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | "
57
        "nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | "
58
        "nvidia-nccl-cu12==2.19.3; platform_system == 'Linux' and platform_machine == 'x86_64' | "
59
        "nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'"
60
    ),
61
}
62

63

64
def get_nccl_submodule_version() -> str:
65
    from pathlib import Path
66

67
    nccl_version_mk = (
68
        Path(__file__).absolute().parent.parent.parent
69
        / "third_party"
70
        / "nccl"
71
        / "nccl"
72
        / "makefiles"
73
        / "version.mk"
74
    )
75
    if not nccl_version_mk.exists():
76
        raise RuntimeError(
77
            "Please make sure that nccl submodule is checked out when importing this script"
78
        )
79
    with nccl_version_mk.open("r") as f:
80
        content = f.read()
81
    d = {}
82
    for l in content.split("\n"):
83
        if not l.startswith("NCCL_"):
84
            continue
85
        (k, v) = l.split(":=")
86
        d[k.strip()] = v.strip()
87
    return f"{d['NCCL_MAJOR']}.{d['NCCL_MINOR']}.{d['NCCL_PATCH']}"
88

89

90
def get_nccl_wheel_version(arch_version: str) -> str:
91
    import re
92

93
    requirements = map(
94
        str.strip, re.split("[;|]", PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version])
95
    )
96
    return next(x for x in requirements if x.startswith("nvidia-nccl-cu")).split("==")[
97
        1
98
    ]
99

100

101
def validate_nccl_dep_consistency(arch_version: str) -> None:
102
    wheel_ver = get_nccl_wheel_version(arch_version)
103
    submodule_ver = get_nccl_submodule_version()
104
    if wheel_ver != submodule_ver:
105
        raise RuntimeError(
106
            f"NCCL submodule version {submodule_ver} differs from wheel version {wheel_ver}"
107
        )
108

109

110
def arch_type(arch_version: str) -> str:
111
    if arch_version in CUDA_ARCHES:
112
        return "cuda"
113
    elif arch_version in ROCM_ARCHES:
114
        return "rocm"
115
    elif arch_version in CPU_CXX11_ABI_ARCH:
116
        return "cpu-cxx11-abi"
117
    elif arch_version in CPU_AARCH64_ARCH:
118
        return "cpu-aarch64"
119
    else:  # arch_version should always be "cpu" in this case
120
        return "cpu"
121

122

123
# This can be updated to the release version when cutting release branch, i.e. 2.1
124
DEFAULT_TAG = os.getenv("RELEASE_VERSION_TAG", "main")
125

126
WHEEL_CONTAINER_IMAGES = {
127
    **{
128
        gpu_arch: f"pytorch/manylinux-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
129
        for gpu_arch in CUDA_ARCHES
130
    },
131
    **{
132
        gpu_arch: f"pytorch/manylinux-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
133
        for gpu_arch in ROCM_ARCHES
134
    },
135
    "cpu": f"pytorch/manylinux-builder:cpu-{DEFAULT_TAG}",
136
    "cpu-cxx11-abi": f"pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-{DEFAULT_TAG}",
137
    "cpu-aarch64": f"pytorch/manylinuxaarch64-builder:cpu-aarch64-{DEFAULT_TAG}",
138
}
139

140
CONDA_CONTAINER_IMAGES = {
141
    **{
142
        gpu_arch: f"pytorch/conda-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
143
        for gpu_arch in CUDA_ARCHES
144
    },
145
    "cpu": f"pytorch/conda-builder:cpu-{DEFAULT_TAG}",
146
}
147

148
PRE_CXX11_ABI = "pre-cxx11"
149
CXX11_ABI = "cxx11-abi"
150
RELEASE = "release"
151
DEBUG = "debug"
152

153
LIBTORCH_CONTAINER_IMAGES: Dict[Tuple[str, str], str] = {
154
    **{
155
        (
156
            gpu_arch,
157
            PRE_CXX11_ABI,
158
        ): f"pytorch/manylinux-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
159
        for gpu_arch in CUDA_ARCHES
160
    },
161
    **{
162
        (
163
            gpu_arch,
164
            CXX11_ABI,
165
        ): f"pytorch/libtorch-cxx11-builder:cuda{gpu_arch}-{DEFAULT_TAG}"
166
        for gpu_arch in CUDA_ARCHES
167
    },
168
    **{
169
        (
170
            gpu_arch,
171
            PRE_CXX11_ABI,
172
        ): f"pytorch/manylinux-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
173
        for gpu_arch in ROCM_ARCHES
174
    },
175
    **{
176
        (
177
            gpu_arch,
178
            CXX11_ABI,
179
        ): f"pytorch/libtorch-cxx11-builder:rocm{gpu_arch}-{DEFAULT_TAG}"
180
        for gpu_arch in ROCM_ARCHES
181
    },
182
    ("cpu", PRE_CXX11_ABI): f"pytorch/manylinux-builder:cpu-{DEFAULT_TAG}",
183
    ("cpu", CXX11_ABI): f"pytorch/libtorch-cxx11-builder:cpu-{DEFAULT_TAG}",
184
}
185

186
FULL_PYTHON_VERSIONS = ["3.8", "3.9", "3.10", "3.11", "3.12"]
187

188

189
def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str:
190
    return {
191
        "cpu": "cpu",
192
        "cpu-aarch64": "cpu",
193
        "cpu-cxx11-abi": "cpu-cxx11-abi",
194
        "cuda": f"cu{gpu_arch_version.replace('.', '')}",
195
        "rocm": f"rocm{gpu_arch_version}",
196
    }.get(gpu_arch_type, gpu_arch_version)
197

198

199
def list_without(in_list: List[str], without: List[str]) -> List[str]:
200
    return [item for item in in_list if item not in without]
201

202

203
def generate_conda_matrix(os: str) -> List[Dict[str, str]]:
204
    ret: List[Dict[str, str]] = []
205
    arches = ["cpu"]
206
    python_versions = FULL_PYTHON_VERSIONS
207
    if os == "linux" or os == "windows":
208
        arches += CUDA_ARCHES
209
    for python_version in python_versions:
210
        # We don't currently build conda packages for rocm
211
        for arch_version in arches:
212
            gpu_arch_type = arch_type(arch_version)
213
            gpu_arch_version = "" if arch_version == "cpu" else arch_version
214
            ret.append(
215
                {
216
                    "python_version": python_version,
217
                    "gpu_arch_type": gpu_arch_type,
218
                    "gpu_arch_version": gpu_arch_version,
219
                    "desired_cuda": translate_desired_cuda(
220
                        gpu_arch_type, gpu_arch_version
221
                    ),
222
                    "container_image": CONDA_CONTAINER_IMAGES[arch_version],
223
                    "package_type": "conda",
224
                    "build_name": f"conda-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
225
                        ".", "_"
226
                    ),
227
                }
228
            )
229
    return ret
230

231

232
def generate_libtorch_matrix(
233
    os: str,
234
    abi_version: str,
235
    arches: Optional[List[str]] = None,
236
    libtorch_variants: Optional[List[str]] = None,
237
) -> List[Dict[str, str]]:
238
    if arches is None:
239
        arches = ["cpu"]
240
        if os == "linux":
241
            arches += CUDA_ARCHES
242
            arches += ROCM_ARCHES
243
        elif os == "windows":
244
            arches += CUDA_ARCHES
245

246
    if libtorch_variants is None:
247
        libtorch_variants = [
248
            "shared-with-deps",
249
            "shared-without-deps",
250
            "static-with-deps",
251
            "static-without-deps",
252
        ]
253

254
    ret: List[Dict[str, str]] = []
255
    for arch_version in arches:
256
        for libtorch_variant in libtorch_variants:
257
            # one of the values in the following list must be exactly
258
            # CXX11_ABI, but the precise value of the other one doesn't
259
            # matter
260
            gpu_arch_type = arch_type(arch_version)
261
            gpu_arch_version = "" if arch_version == "cpu" else arch_version
262
            # ROCm builds without-deps failed even in ROCm runners; skip for now
263
            if gpu_arch_type == "rocm" and "without-deps" in libtorch_variant:
264
                continue
265
            ret.append(
266
                {
267
                    "gpu_arch_type": gpu_arch_type,
268
                    "gpu_arch_version": gpu_arch_version,
269
                    "desired_cuda": translate_desired_cuda(
270
                        gpu_arch_type, gpu_arch_version
271
                    ),
272
                    "libtorch_variant": libtorch_variant,
273
                    "libtorch_config": abi_version if os == "windows" else "",
274
                    "devtoolset": abi_version if os != "windows" else "",
275
                    "container_image": LIBTORCH_CONTAINER_IMAGES[
276
                        (arch_version, abi_version)
277
                    ]
278
                    if os != "windows"
279
                    else "",
280
                    "package_type": "libtorch",
281
                    "build_name": f"libtorch-{gpu_arch_type}{gpu_arch_version}-{libtorch_variant}-{abi_version}".replace(
282
                        ".", "_"
283
                    ),
284
                }
285
            )
286
    return ret
287

288

289
def generate_wheels_matrix(
290
    os: str,
291
    arches: Optional[List[str]] = None,
292
    python_versions: Optional[List[str]] = None,
293
) -> List[Dict[str, str]]:
294
    package_type = "wheel"
295
    if os == "linux" or os == "linux-aarch64":
296
        # NOTE: We only build manywheel packages for x86_64 and aarch64 linux
297
        package_type = "manywheel"
298

299
    if python_versions is None:
300
        python_versions = FULL_PYTHON_VERSIONS
301

302
    if arches is None:
303
        # Define default compute archivectures
304
        arches = ["cpu"]
305
        if os == "linux":
306
            arches += CPU_CXX11_ABI_ARCH + CUDA_ARCHES + ROCM_ARCHES
307
        elif os == "windows":
308
            arches += CUDA_ARCHES
309
        elif os == "linux-aarch64":
310
            # Only want the one arch as the CPU type is different and
311
            # uses different build/test scripts
312
            arches = ["cpu-aarch64"]
313

314
    ret: List[Dict[str, str]] = []
315
    for python_version in python_versions:
316
        for arch_version in arches:
317
            gpu_arch_type = arch_type(arch_version)
318
            gpu_arch_version = (
319
                ""
320
                if arch_version == "cpu"
321
                or arch_version == "cpu-cxx11-abi"
322
                or arch_version == "cpu-aarch64"
323
                else arch_version
324
            )
325

326
            # 12.1 linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install
327
            if arch_version in ["12.1", "11.8"] and os == "linux":
328
                ret.append(
329
                    {
330
                        "python_version": python_version,
331
                        "gpu_arch_type": gpu_arch_type,
332
                        "gpu_arch_version": gpu_arch_version,
333
                        "desired_cuda": translate_desired_cuda(
334
                            gpu_arch_type, gpu_arch_version
335
                        ),
336
                        "devtoolset": "",
337
                        "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
338
                        "package_type": package_type,
339
                        "pytorch_extra_install_requirements": PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version],  # fmt: skip
340
                        "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(  # noqa: B950
341
                            ".", "_"
342
                        ),
343
                    }
344
                )
345
            else:
346
                ret.append(
347
                    {
348
                        "python_version": python_version,
349
                        "gpu_arch_type": gpu_arch_type,
350
                        "gpu_arch_version": gpu_arch_version,
351
                        "desired_cuda": translate_desired_cuda(
352
                            gpu_arch_type, gpu_arch_version
353
                        ),
354
                        "devtoolset": "cxx11-abi"
355
                        if arch_version == "cpu-cxx11-abi"
356
                        else "",
357
                        "container_image": WHEEL_CONTAINER_IMAGES[arch_version],
358
                        "package_type": package_type,
359
                        "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}".replace(
360
                            ".", "_"
361
                        ),
362
                        "pytorch_extra_install_requirements":
363
                        PYTORCH_EXTRA_INSTALL_REQUIREMENTS["12.1"]  # fmt: skip
364
                        if os != "linux" else "",
365
                    }
366
                )
367
    return ret
368

369

370
validate_nccl_dep_consistency("12.1")
371
validate_nccl_dep_consistency("11.8")
372

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.