google-research

redace_config.py
82 строки · 3.2 Кб
Перенос по словам
1
# coding=utf-8
2
# Copyright 2024 The Google Research Authors.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
"""RED-ACE config class."""
17
from official.legacy.bert import configs
18

19

20
class RedAceConfig(configs.BertConfig):
21
  """Model configuration for RED-ACE."""
22

23
  def __init__(
24
      self,
25
      vocab_size=30522,
26
      hidden_size=768,
27
      num_hidden_layers=12,
28
      num_attention_heads=12,
29
      intermediate_size=3072,
30
      hidden_act="gelu",
31
      hidden_dropout_prob=0.1,
32
      attention_probs_dropout_prob=0.1,
33
      max_position_embeddings=512,
34
      type_vocab_size=2,
35
      initializer_range=0.02,
36
      num_classes=2,
37
      enable_async_checkpoint=True,
38
  ):
39
    """Initializes an instance of RED-ACE configuration.
40

41
    This initializer expects both the BERT specific arguments and the
42
    Transformer decoder arguments listed below.
43

44
    Args:
45
      vocab_size: Vocabulary size of `inputs_ids` in `BertModel`.
46
      hidden_size: Size of the encoder layers and the pooler layer.
47
      num_hidden_layers: Number of hidden layers in the Transformer encoder.
48
      num_attention_heads: Number of attention heads for each attention layer in
49
        the Transformer encoder.
50
      intermediate_size: The size of the "intermediate" (i.e., feed-forward)
51
        layer in the Transformer encoder.
52
      hidden_act: The non-linear activation function (function or string) in the
53
        encoder and pooler.
54
      hidden_dropout_prob: The dropout probability for all fully connected
55
        layers in the embeddings, encoder, and pooler.
56
      attention_probs_dropout_prob: The dropout ratio for the attention
57
        probabilities.
58
      max_position_embeddings: The maximum sequence length that this model might
59
        ever be used with. Typically set this to something large just in case
60
        (e.g., 512 or 1024 or 2048).
61
      type_vocab_size: The vocabulary size of the `token_type_ids` passed into
62
        `BertModel`.
63
      initializer_range: The stdev of the truncated_normal_initializer for
64
        initializing all weight matrices.
65
      num_classes: Number of tags.
66
      enable_async_checkpoint: If saving the model should happen asynchronously.
67
    """
68
    super(RedAceConfig, self).__init__(
69
        vocab_size=vocab_size,
70
        hidden_size=hidden_size,
71
        num_hidden_layers=num_hidden_layers,
72
        num_attention_heads=num_attention_heads,
73
        intermediate_size=intermediate_size,
74
        hidden_act=hidden_act,
75
        hidden_dropout_prob=hidden_dropout_prob,
76
        attention_probs_dropout_prob=attention_probs_dropout_prob,
77
        max_position_embeddings=max_position_embeddings,
78
        type_vocab_size=type_vocab_size,
79
        initializer_range=initializer_range,
80
    )
81
    self.num_classes = num_classes
82
    self.enable_async_checkpoint = enable_async_checkpoint
83
google-research

Использование cookies