deepspeed

Форк
0
/
transformer_inference.py 
74 строки · 2.7 Кб
1
# Copyright (c) Microsoft Corporation.
2
# SPDX-License-Identifier: Apache-2.0
3

4
# DeepSpeed Team
5

6
from .builder import CUDAOpBuilder, installed_cuda_version
7

8

9
class InferenceBuilder(CUDAOpBuilder):
10
    BUILD_VAR = "DS_BUILD_TRANSFORMER_INFERENCE"
11
    NAME = "transformer_inference"
12

13
    def __init__(self, name=None):
14
        name = self.NAME if name is None else name
15
        super().__init__(name=name)
16

17
    def absolute_name(self):
18
        return f'deepspeed.ops.transformer.inference.{self.NAME}_op'
19

20
    def is_compatible(self, verbose=True):
21
        try:
22
            import torch
23
        except ImportError:
24
            self.warning("Please install torch if trying to pre-compile inference kernels")
25
            return False
26

27
        cuda_okay = True
28
        if not self.is_rocm_pytorch() and torch.cuda.is_available():
29
            sys_cuda_major, _ = installed_cuda_version()
30
            torch_cuda_major = int(torch.version.cuda.split('.')[0])
31
            cuda_capability = torch.cuda.get_device_properties(0).major
32
            if cuda_capability < 6:
33
                self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
34
                cuda_okay = False
35
            if cuda_capability >= 8:
36
                if torch_cuda_major < 11 or sys_cuda_major < 11:
37
                    self.warning("On Ampere and higher architectures please use CUDA 11+")
38
                    cuda_okay = False
39
        return super().is_compatible(verbose) and cuda_okay
40

41
    def filter_ccs(self, ccs):
42
        ccs_retained = []
43
        ccs_pruned = []
44
        for cc in ccs:
45
            if int(cc[0]) >= 6:
46
                ccs_retained.append(cc)
47
            else:
48
                ccs_pruned.append(cc)
49
        if len(ccs_pruned) > 0:
50
            self.warning(f"Filtered compute capabilities {ccs_pruned}")
51
        return ccs_retained
52

53
    def sources(self):
54
        return [
55
            'csrc/transformer/inference/csrc/pt_binding.cpp',
56
            'csrc/transformer/inference/csrc/gelu.cu',
57
            'csrc/transformer/inference/csrc/relu.cu',
58
            'csrc/transformer/inference/csrc/layer_norm.cu',
59
            'csrc/transformer/inference/csrc/rms_norm.cu',
60
            'csrc/transformer/inference/csrc/softmax.cu',
61
            'csrc/transformer/inference/csrc/dequantize.cu',
62
            'csrc/transformer/inference/csrc/apply_rotary_pos_emb.cu',
63
            'csrc/transformer/inference/csrc/transform.cu',
64
            'csrc/transformer/inference/csrc/pointwise_ops.cu',
65
        ]
66

67
    def extra_ldflags(self):
68
        if not self.is_rocm_pytorch():
69
            return ['-lcurand']
70
        else:
71
            return []
72

73
    def include_paths(self):
74
        return ['csrc/transformer/inference/includes', 'csrc/includes']
75

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.