deepspeed
37 строк · 1.0 Кб
1# Copyright (c) Microsoft Corporation.
2# SPDX-License-Identifier: Apache-2.0
3
4# DeepSpeed Team
5
6from .builder import CUDAOpBuilder
7
8import sys
9
10
11class FusedLionBuilder(CUDAOpBuilder):
12BUILD_VAR = "DS_BUILD_FUSED_LION"
13NAME = "fused_lion"
14
15def __init__(self):
16super().__init__(name=self.NAME)
17
18def absolute_name(self):
19return f'deepspeed.ops.lion.{self.NAME}_op'
20
21def sources(self):
22return ['csrc/lion/fused_lion_frontend.cpp', 'csrc/lion/multi_tensor_lion.cu']
23
24def include_paths(self):
25return ['csrc/includes', 'csrc/lion']
26
27def cxx_args(self):
28args = super().cxx_args()
29return args + self.version_dependent_macros()
30
31def nvcc_args(self):
32nvcc_flags = ['-O3'] + self.version_dependent_macros()
33if not self.is_rocm_pytorch():
34nvcc_flags.extend(
35['-allow-unsupported-compiler' if sys.platform == "win32" else '', '-lineinfo', '--use_fast_math'] +
36self.compute_capability_args())
37return nvcc_flags
38