intel-extension-for-pytorch

Форк
0
64 строки · 2.3 Кб
1
from transformers import PretrainedConfig
2

3

4
class ChatGLMConfig(PretrainedConfig):
5
    model_type = "chatglm"
6

7
    def __init__(
8
        self,
9
        num_layers=28,
10
        padded_vocab_size=65024,
11
        hidden_size=4096,
12
        ffn_hidden_size=13696,
13
        kv_channels=128,
14
        num_attention_heads=32,
15
        seq_length=2048,
16
        hidden_dropout=0.0,
17
        classifier_dropout=None,
18
        attention_dropout=0.0,
19
        layernorm_epsilon=1e-5,
20
        rmsnorm=True,
21
        apply_residual_connection_post_layernorm=False,
22
        post_layer_norm=True,
23
        add_bias_linear=False,
24
        add_qkv_bias=False,
25
        bias_dropout_fusion=True,
26
        multi_query_attention=False,
27
        multi_query_group_num=1,
28
        apply_query_key_layer_scaling=True,
29
        attention_softmax_in_fp32=True,
30
        fp32_residual_connection=False,
31
        quantization_bit=0,
32
        pre_seq_len=None,
33
        prefix_projection=False,
34
        **kwargs
35
    ):
36
        self.num_layers = num_layers
37
        self.vocab_size = padded_vocab_size
38
        self.padded_vocab_size = padded_vocab_size
39
        self.hidden_size = hidden_size
40
        self.ffn_hidden_size = ffn_hidden_size
41
        self.kv_channels = kv_channels
42
        self.num_attention_heads = num_attention_heads
43
        self.seq_length = seq_length
44
        self.hidden_dropout = hidden_dropout
45
        self.classifier_dropout = classifier_dropout
46
        self.attention_dropout = attention_dropout
47
        self.layernorm_epsilon = layernorm_epsilon
48
        self.rmsnorm = rmsnorm
49
        self.apply_residual_connection_post_layernorm = (
50
            apply_residual_connection_post_layernorm
51
        )
52
        self.post_layer_norm = post_layer_norm
53
        self.add_bias_linear = add_bias_linear
54
        self.add_qkv_bias = add_qkv_bias
55
        self.bias_dropout_fusion = bias_dropout_fusion
56
        self.multi_query_attention = multi_query_attention
57
        self.multi_query_group_num = multi_query_group_num
58
        self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
59
        self.attention_softmax_in_fp32 = attention_softmax_in_fp32
60
        self.fp32_residual_connection = fp32_residual_connection
61
        self.quantization_bit = quantization_bit
62
        self.pre_seq_len = pre_seq_len
63
        self.prefix_projection = prefix_projection
64
        super().__init__(**kwargs)
65

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.