deepspeed
/
setup.py
327 строк · 11.8 Кб
1# Copyright (c) Microsoft Corporation.
2# SPDX-License-Identifier: Apache-2.0
3
4# DeepSpeed Team
5"""
6DeepSpeed library
7
8To build wheel on Windows:
91. Install pytorch, such as pytorch 1.12 + cuda 11.6.
102. Install visual cpp build tool.
113. Include cuda toolkit.
124. Launch cmd console with Administrator privilege for creating required symlink folders.
13
14
15Create a new wheel via the following command:
16build_win.bat
17
18The wheel will be located at: dist/*.whl
19"""
20
21import os
22import sys
23import subprocess
24from setuptools import setup, find_packages
25from setuptools.command import egg_info
26import time
27import typing
28
29torch_available = True
30try:
31import torch
32except ImportError:
33torch_available = False
34print('[WARNING] Unable to import torch, pre-compiling ops will be disabled. ' \
35'Please visit https://pytorch.org/ to see how to properly install torch on your system.')
36
37from op_builder import get_default_compute_capabilities, OpBuilder
38from op_builder.all_ops import ALL_OPS
39from op_builder.builder import installed_cuda_version
40
41# Fetch rocm state.
42is_rocm_pytorch = OpBuilder.is_rocm_pytorch()
43rocm_version = OpBuilder.installed_rocm_version()
44
45RED_START = '\033[31m'
46RED_END = '\033[0m'
47ERROR = f"{RED_START} [ERROR] {RED_END}"
48
49
50def abort(msg):
51print(f"{ERROR} {msg}")
52assert False, msg
53
54
55def fetch_requirements(path):
56with open(path, 'r') as fd:
57return [r.strip() for r in fd.readlines()]
58
59
60def is_env_set(key):
61"""
62Checks if an environment variable is set and not "".
63"""
64return bool(os.environ.get(key, None))
65
66
67def get_env_if_set(key, default: typing.Any = ""):
68"""
69Returns an environment variable if it is set and not "",
70otherwise returns a default value. In contrast, the fallback
71parameter of os.environ.get() is skipped if the variable is set to "".
72"""
73return os.environ.get(key, None) or default
74
75
76install_requires = fetch_requirements('requirements/requirements.txt')
77extras_require = {
78'1bit': [], # add cupy based on cuda/rocm version
79'1bit_mpi': fetch_requirements('requirements/requirements-1bit-mpi.txt'),
80'readthedocs': fetch_requirements('requirements/requirements-readthedocs.txt'),
81'dev': fetch_requirements('requirements/requirements-dev.txt'),
82'autotuning': fetch_requirements('requirements/requirements-autotuning.txt'),
83'autotuning_ml': fetch_requirements('requirements/requirements-autotuning-ml.txt'),
84'sparse_attn': fetch_requirements('requirements/requirements-sparse_attn.txt'),
85'sparse': fetch_requirements('requirements/requirements-sparse_pruning.txt'),
86'inf': fetch_requirements('requirements/requirements-inf.txt'),
87'sd': fetch_requirements('requirements/requirements-sd.txt'),
88'triton': fetch_requirements('requirements/requirements-triton.txt'),
89}
90
91# Add specific cupy version to both onebit extension variants.
92if torch_available and torch.cuda.is_available():
93cupy = None
94if is_rocm_pytorch:
95rocm_major, rocm_minor = rocm_version
96# XXX cupy support for rocm 5 is not available yet.
97if rocm_major <= 4:
98cupy = f"cupy-rocm-{rocm_major}-{rocm_minor}"
99else:
100cuda_major_ver, cuda_minor_ver = installed_cuda_version()
101if (cuda_major_ver < 11) or ((cuda_major_ver == 11) and (cuda_minor_ver < 3)):
102cupy = f"cupy-cuda{cuda_major_ver}{cuda_minor_ver}"
103else:
104cupy = f"cupy-cuda{cuda_major_ver}x"
105
106if cupy:
107extras_require['1bit'].append(cupy)
108extras_require['1bit_mpi'].append(cupy)
109
110# Make an [all] extra that installs all needed dependencies.
111all_extras = set()
112for extra in extras_require.items():
113for req in extra[1]:
114all_extras.add(req)
115extras_require['all'] = list(all_extras)
116
117cmdclass = {}
118
119# For any pre-installed ops force disable ninja.
120if torch_available:
121from accelerator import get_accelerator
122use_ninja = is_env_set("DS_ENABLE_NINJA")
123cmdclass['build_ext'] = get_accelerator().build_extension().with_options(use_ninja=use_ninja)
124
125if torch_available:
126TORCH_MAJOR = torch.__version__.split('.')[0]
127TORCH_MINOR = torch.__version__.split('.')[1]
128else:
129TORCH_MAJOR = "0"
130TORCH_MINOR = "0"
131
132if torch_available and not torch.cuda.is_available():
133# Fix to allow docker builds, similar to https://github.com/NVIDIA/apex/issues/486.
134print("[WARNING] Torch did not find cuda available, if cross-compiling or running with cpu only "
135"you can ignore this message. Adding compute capability for Pascal, Volta, and Turing "
136"(compute capabilities 6.0, 6.1, 6.2)")
137if not is_env_set("TORCH_CUDA_ARCH_LIST"):
138os.environ["TORCH_CUDA_ARCH_LIST"] = get_default_compute_capabilities()
139
140ext_modules = []
141
142# Default to pre-install kernels to false so we rely on JIT on Linux, opposite on Windows.
143BUILD_OP_PLATFORM = 1 if sys.platform == "win32" else 0
144BUILD_OP_DEFAULT = int(get_env_if_set('DS_BUILD_OPS', BUILD_OP_PLATFORM))
145print(f"DS_BUILD_OPS={BUILD_OP_DEFAULT}")
146
147if BUILD_OP_DEFAULT:
148assert torch_available, "Unable to pre-compile ops without torch installed. Please install torch before attempting to pre-compile ops."
149
150
151def command_exists(cmd):
152if sys.platform == "win32":
153result = subprocess.Popen(f'{cmd}', stdout=subprocess.PIPE, shell=True)
154return result.wait() == 1
155else:
156result = subprocess.Popen(f'type {cmd}', stdout=subprocess.PIPE, shell=True)
157return result.wait() == 0
158
159
160def op_envvar(op_name):
161assert hasattr(ALL_OPS[op_name], 'BUILD_VAR'), \
162f"{op_name} is missing BUILD_VAR field"
163return ALL_OPS[op_name].BUILD_VAR
164
165
166def op_enabled(op_name):
167env_var = op_envvar(op_name)
168return int(get_env_if_set(env_var, BUILD_OP_DEFAULT))
169
170
171compatible_ops = dict.fromkeys(ALL_OPS.keys(), False)
172install_ops = dict.fromkeys(ALL_OPS.keys(), False)
173for op_name, builder in ALL_OPS.items():
174op_compatible = builder.is_compatible()
175compatible_ops[op_name] = op_compatible
176compatible_ops["deepspeed_not_implemented"] = False
177
178# If op is requested but not available, throw an error.
179if op_enabled(op_name) and not op_compatible:
180env_var = op_envvar(op_name)
181if not is_env_set(env_var):
182builder.warning(f"One can disable {op_name} with {env_var}=0")
183abort(f"Unable to pre-compile {op_name}")
184
185# If op is compatible but install is not enabled (JIT mode).
186if is_rocm_pytorch and op_compatible and not op_enabled(op_name):
187builder.hipify_extension()
188
189# If op install enabled, add builder to extensions.
190if op_enabled(op_name) and op_compatible:
191assert torch_available, f"Unable to pre-compile {op_name}, please first install torch"
192install_ops[op_name] = op_enabled(op_name)
193ext_modules.append(builder.builder())
194
195print(f'Install Ops={install_ops}')
196
197# Write out version/git info.
198git_hash_cmd = "git rev-parse --short HEAD"
199git_branch_cmd = "git rev-parse --abbrev-ref HEAD"
200if command_exists('git') and not is_env_set('DS_BUILD_STRING'):
201try:
202result = subprocess.check_output(git_hash_cmd, shell=True)
203git_hash = result.decode('utf-8').strip()
204result = subprocess.check_output(git_branch_cmd, shell=True)
205git_branch = result.decode('utf-8').strip()
206except subprocess.CalledProcessError:
207git_hash = "unknown"
208git_branch = "unknown"
209else:
210git_hash = "unknown"
211git_branch = "unknown"
212
213
214def create_dir_symlink(src, dest):
215if not os.path.islink(dest):
216if os.path.exists(dest):
217os.remove(dest)
218assert not os.path.exists(dest)
219os.symlink(src, dest)
220
221
222if sys.platform == "win32":
223# This creates a symbolic links on Windows.
224# It needs Administrator privilege to create symlinks on Windows.
225create_dir_symlink('..\\..\\csrc', '.\\deepspeed\\ops\\csrc')
226create_dir_symlink('..\\..\\op_builder', '.\\deepspeed\\ops\\op_builder')
227create_dir_symlink('..\\accelerator', '.\\deepspeed\\accelerator')
228egg_info.manifest_maker.template = 'MANIFEST_win.in'
229
230# Parse the DeepSpeed version string from version.txt.
231version_str = open('version.txt', 'r').read().strip()
232
233# Build specifiers like .devX can be added at install time. Otherwise, add the git hash.
234# Example: DS_BUILD_STRING=".dev20201022" python setup.py sdist bdist_wheel.
235
236# Building wheel for distribution, update version file.
237if is_env_set('DS_BUILD_STRING'):
238# Build string env specified, probably building for distribution.
239with open('build.txt', 'w') as fd:
240fd.write(os.environ['DS_BUILD_STRING'])
241version_str += os.environ['DS_BUILD_STRING']
242elif os.path.isfile('build.txt'):
243# build.txt exists, probably installing from distribution.
244with open('build.txt', 'r') as fd:
245version_str += fd.read().strip()
246else:
247# None of the above, probably installing from source.
248version_str += f'+{git_hash}'
249
250torch_version = ".".join([TORCH_MAJOR, TORCH_MINOR])
251bf16_support = False
252# Set cuda_version to 0.0 if cpu-only.
253cuda_version = "0.0"
254nccl_version = "0.0"
255# Set hip_version to 0.0 if cpu-only.
256hip_version = "0.0"
257if torch_available and torch.version.cuda is not None:
258cuda_version = ".".join(torch.version.cuda.split('.')[:2])
259if sys.platform != "win32":
260if isinstance(torch.cuda.nccl.version(), int):
261# This will break if minor version > 9.
262nccl_version = ".".join(str(torch.cuda.nccl.version())[:2])
263else:
264nccl_version = ".".join(map(str, torch.cuda.nccl.version()[:2]))
265if hasattr(torch.cuda, 'is_bf16_supported') and torch.cuda.is_available():
266bf16_support = torch.cuda.is_bf16_supported()
267if torch_available and hasattr(torch.version, 'hip') and torch.version.hip is not None:
268hip_version = ".".join(torch.version.hip.split('.')[:2])
269torch_info = {
270"version": torch_version,
271"bf16_support": bf16_support,
272"cuda_version": cuda_version,
273"nccl_version": nccl_version,
274"hip_version": hip_version
275}
276
277print(f"version={version_str}, git_hash={git_hash}, git_branch={git_branch}")
278with open('deepspeed/git_version_info_installed.py', 'w') as fd:
279fd.write(f"version='{version_str}'\n")
280fd.write(f"git_hash='{git_hash}'\n")
281fd.write(f"git_branch='{git_branch}'\n")
282fd.write(f"installed_ops={install_ops}\n")
283fd.write(f"compatible_ops={compatible_ops}\n")
284fd.write(f"torch_info={torch_info}\n")
285
286print(f'install_requires={install_requires}')
287print(f'compatible_ops={compatible_ops}')
288print(f'ext_modules={ext_modules}')
289
290# Parse README.md to make long_description for PyPI page.
291thisdir = os.path.abspath(os.path.dirname(__file__))
292with open(os.path.join(thisdir, 'README.md'), encoding='utf-8') as fin:
293readme_text = fin.read()
294
295start_time = time.time()
296
297setup(name='deepspeed',
298version=version_str,
299description='DeepSpeed library',
300long_description=readme_text,
301long_description_content_type='text/markdown',
302author='DeepSpeed Team',
303author_email='deepspeed-info@microsoft.com',
304url='http://deepspeed.ai',
305project_urls={
306'Documentation': 'https://deepspeed.readthedocs.io',
307'Source': 'https://github.com/microsoft/DeepSpeed',
308},
309install_requires=install_requires,
310extras_require=extras_require,
311packages=find_packages(include=['deepspeed', 'deepspeed.*']),
312include_package_data=True,
313scripts=[
314'bin/deepspeed', 'bin/deepspeed.pt', 'bin/ds', 'bin/ds_ssh', 'bin/ds_report', 'bin/ds_bench', 'bin/dsr',
315'bin/ds_elastic'
316],
317classifiers=[
318'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7',
319'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9',
320'Programming Language :: Python :: 3.10'
321],
322license='Apache Software License 2.0',
323ext_modules=ext_modules,
324cmdclass=cmdclass)
325
326end_time = time.time()
327print(f'deepspeed build time = {end_time - start_time} secs')
328