CSS-LM

modeling_outputs.py
558 строк · 34.5 Кб
Перенос по словам
1
from dataclasses import dataclass
2
from typing import List, Optional, Tuple
3

4
import torch
5

6
from .file_utils import ModelOutput
7

8

9
@dataclass
10
class BaseModelOutput(ModelOutput):
11
    """
12
    Base class for model's outputs, with potential hidden states and attentions.
13

14
    Args:
15
        last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`):
16
            Sequence of hidden-states at the output of the last layer of the model.
17
        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
18
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
19
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
20

21
            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
22
        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
23
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
24
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
25

26
            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
27
            heads.
28
    """
29

30
    last_hidden_state: torch.FloatTensor
31
    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
32
    attentions: Optional[Tuple[torch.FloatTensor]] = None
33

34

35
@dataclass
36
class BaseModelOutputWithPooling(ModelOutput):
37
    """
38
    Base class for model's outputs that also contains a pooling of the last hidden states.
39

40
    Args:
41
        last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`):
42
            Sequence of hidden-states at the output of the last layer of the model.
43
        pooler_output (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, hidden_size)`):
44
            Last layer hidden-state of the first token of the sequence (classification token)
45
            further processed by a Linear layer and a Tanh activation function. The Linear
46
            layer weights are trained from the next sentence prediction (classification)
47
            objective during pretraining.
48

49
            This output is usually *not* a good summary
50
            of the semantic content of the input, you're often better with averaging or pooling
51
            the sequence of hidden-states for the whole input sequence.
52
        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
53
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
54
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
55

56
            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
57
        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
58
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
59
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
60

61
            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
62
            heads.
63
    """
64

65
    last_hidden_state: torch.FloatTensor
66
    pooler_output: torch.FloatTensor = None
67
    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
68
    attentions: Optional[Tuple[torch.FloatTensor]] = None
69

70

71
@dataclass
72
class BaseModelOutputWithPast(ModelOutput):
73
    """
74
    Base class for model's outputs that may also contain a past key/values (to speed up sequential decoding).
75

76
    Args:
77
        last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`):
78
            Sequence of hidden-states at the output of the last layer of the model.
79

80
            If `past_key_values` is used only the last hidden-state of the sequences of shape :obj:`(batch_size, 1, hidden_size)` is output.
81
        past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
82
            List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`,  with each tensor of shape
83
            :obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`).
84

85
            Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
86
            ``past_key_values`` input) to speed up sequential decoding.
87
        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
88
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
89
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
90

91
            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
92
        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
93
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
94
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
95

96
            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
97
            heads.
98
    """
99

100
    last_hidden_state: torch.FloatTensor
101
    past_key_values: Optional[List[torch.FloatTensor]] = None
102
    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
103
    attentions: Optional[Tuple[torch.FloatTensor]] = None
104

105

106
@dataclass
107
class Seq2SeqModelOutput(ModelOutput):
108
    """
109
    Base class for model encoder's outputs that also contains : pre-computed hidden states that can speed up sequential
110
    decoding.
111

112
    Args:
113
        last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`):
114
            Sequence of hidden-states at the output of the last layer of the decoder of the model.
115

116
            If ``decoder_past_key_values`` is used only the last hidden-state of the sequences of shape :obj:`(batch_size, 1, hidden_size)` is output.
117
        decoder_past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
118
            List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`,  with each tensor of shape
119
            :obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`).
120

121
            Contains pre-computed hidden-states (key and values in the attention blocks) of the decoder that can be
122
            used (see ``decoder_past_key_values`` input) to speed up sequential decoding.
123
        decoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
124
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
125
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
126

127
            Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.
128
        decoder_attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
129
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
130
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
131

132
            Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the
133
            self-attention heads.
134
        encoder_last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
135
            Sequence of hidden-states at the output of the last layer of the encoder of the model.
136
        encoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
137
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
138
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
139

140
            Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.
141
        encoder_attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
142
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
143
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
144

145
            Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the
146
            self-attention heads.
147
    """
148

149
    last_hidden_state: torch.FloatTensor
150
    decoder_past_key_values: Optional[List[torch.FloatTensor]] = None
151
    decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
152
    decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
153
    encoder_last_hidden_state: Optional[torch.FloatTensor] = None
154
    encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
155
    encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
156

157

158
@dataclass
159
class CausalLMOutput(ModelOutput):
160
    """
161
    Base class for causal language model (or autoregressive) outputs.
162

163
    Args:
164
        loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided):
165
            Language modeling loss (for next-token prediction).
166
        logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`):
167
            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
168
        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
169
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
170
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
171

172
            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
173
        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
174
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
175
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
176

177
            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
178
            heads.
179
    """
180

181
    loss: Optional[torch.FloatTensor]
182
    logits: torch.FloatTensor = None
183
    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
184
    attentions: Optional[Tuple[torch.FloatTensor]] = None
185

186

187
@dataclass
188
class CausalLMOutputWithPast(ModelOutput):
189
    """
190
    Base class for causal language model (or autoregressive) outputs.
191

192
    Args:
193
        loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided):
194
            Language modeling loss (for next-token prediction).
195
        logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`):
196
            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
197
        past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
198
            List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`,  with each tensor of shape
199
            :obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`).
200

201
            Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
202
            ``past_key_values`` input) to speed up sequential decoding.
203
        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
204
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
205
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
206

207
            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
208
        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
209
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
210
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
211

212
            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
213
            heads.
214
    """
215

216
    loss: Optional[torch.FloatTensor] = None
217
    logits: torch.FloatTensor = None
218
    past_key_values: Optional[List[torch.FloatTensor]] = None
219
    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
220
    attentions: Optional[Tuple[torch.FloatTensor]] = None
221

222

223
@dataclass
224
class MaskedLMOutput(ModelOutput):
225
    """
226
    Base class for masked language models outputs.
227

228
    Args:
229
        loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided):
230
            Masked languaged modeling (MLM) loss.
231
        logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`):
232
            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
233
        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
234
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
235
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
236

237
            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
238
        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
239
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
240
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
241

242
            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
243
            heads.
244
    """
245

246
    loss: Optional[torch.FloatTensor] = None
247
    logits: torch.FloatTensor = None
248
    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
249
    attentions: Optional[Tuple[torch.FloatTensor]] = None
250

251

252
@dataclass
253
class Seq2SeqLMOutput(ModelOutput):
254
    """
255
    Base class for sequence-to-sequence language models outputs.
256

257
    Args:
258
        loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided):
259
            Languaged modeling loss.
260
        logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`):
261
            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
262
        decoder_past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
263
            List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`,  with each tensor of shape
264
            :obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`).
265

266
            Contains pre-computed hidden-states (key and values in the attention blocks) of the decoder that can be
267
            used (see ``decoder_past_key_values`` input) to speed up sequential decoding.
268
        decoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
269
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
270
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
271

272
            Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.
273
        decoder_attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
274
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
275
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
276

277
            Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the
278
            self-attention heads.
279
        encoder_last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
280
            Sequence of hidden-states at the output of the last layer of the encoder of the model.
281
        encoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
282
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
283
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
284

285
            Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.
286
        encoder_attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
287
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
288
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
289

290
            Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the
291
            self-attention heads.
292
    """
293

294
    loss: Optional[torch.FloatTensor] = None
295
    logits: torch.FloatTensor = None
296
    decoder_past_key_values: Optional[List[torch.FloatTensor]] = None
297
    decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
298
    decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
299
    encoder_last_hidden_state: Optional[torch.FloatTensor] = None
300
    encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
301
    encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
302

303

304
@dataclass
305
class NextSentencePredictorOutput(ModelOutput):
306
    """
307
    Base class for outputs of models predicting if two sentences are consecutive or not.
308

309
    Args:
310
        loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`next_sentence_label` is provided):
311
            Next sequence prediction (classification) loss.
312
        logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, 2)`):
313
            Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation before SoftMax).
314
        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
315
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
316
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
317

318
            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
319
        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
320
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
321
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
322

323
            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
324
            heads.
325
    """
326

327
    loss: Optional[torch.FloatTensor] = None
328
    logits: torch.FloatTensor = None
329
    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
330
    attentions: Optional[Tuple[torch.FloatTensor]] = None
331

332

333
@dataclass
334
class SequenceClassifierOutput(ModelOutput):
335
    """
336
    Base class for outputs of sentence classification models.
337

338
    Args:
339
        loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided):
340
            Classification (or regression if config.num_labels==1) loss.
341
        logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`):
342
            Classification (or regression if config.num_labels==1) scores (before SoftMax).
343
        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
344
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
345
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
346

347
            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
348
        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
349
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
350
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
351

352
            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
353
            heads.
354
    """
355

356
    loss: Optional[torch.FloatTensor] = None
357
    logits: torch.FloatTensor = None
358
    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
359
    attentions: Optional[Tuple[torch.FloatTensor]] = None
360

361

362
@dataclass
363
class Seq2SeqSequenceClassifierOutput(ModelOutput):
364
    """
365
    Base class for outputs of sequence-to-sequence sentence classification models.
366

367
    Args:
368
        loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`label` is provided):
369
            Classification (or regression if config.num_labels==1) loss.
370
        logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`):
371
            Classification (or regression if config.num_labels==1) scores (before SoftMax).
372
        decoder_past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
373
            List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`,  with each tensor of shape
374
            :obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`).
375

376
            Contains pre-computed hidden-states (key and values in the attention blocks) of the decoder that can be
377
            used (see ``decoder_past_key_values`` input) to speed up sequential decoding.
378
        decoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
379
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
380
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
381

382
            Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.
383
        decoder_attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
384
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
385
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
386

387
            Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the
388
            self-attention heads.
389
        encoder_last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
390
            Sequence of hidden-states at the output of the last layer of the encoder of the model.
391
        encoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
392
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
393
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
394

395
            Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.
396
        encoder_attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
397
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
398
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
399

400
            Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the
401
            self-attention heads.
402
    """
403

404
    loss: Optional[torch.FloatTensor] = None
405
    logits: torch.FloatTensor = None
406
    decoder_past_key_values: Optional[List[torch.FloatTensor]] = None
407
    decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
408
    decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
409
    encoder_last_hidden_state: Optional[torch.FloatTensor] = None
410
    encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
411
    encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
412

413

414
@dataclass
415
class MultipleChoiceModelOutput(ModelOutput):
416
    """
417
    Base class for outputs of multiple choice models.
418

419
    Args:
420
        loss (:obj:`torch.FloatTensor` of shape `(1,)`, `optional`, returned when :obj:`labels` is provided):
421
            Classification loss.
422
        logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, num_choices)`):
423
            `num_choices` is the second dimension of the input tensors. (see `input_ids` above).
424

425
            Classification scores (before SoftMax).
426
        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
427
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
428
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
429

430
            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
431
        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
432
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
433
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
434

435
            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
436
            heads.
437
    """
438

439
    loss: Optional[torch.FloatTensor] = None
440
    logits: torch.FloatTensor = None
441
    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
442
    attentions: Optional[Tuple[torch.FloatTensor]] = None
443

444

445
@dataclass
446
class TokenClassifierOutput(ModelOutput):
447
    """
448
    Base class for outputs of token classification models.
449

450
    Args:
451
        loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when ``labels`` is provided) :
452
            Classification loss.
453
        logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.num_labels)`):
454
            Classification scores (before SoftMax).
455
        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
456
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
457
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
458

459
            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
460
        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
461
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
462
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
463

464
            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
465
            heads.
466
    """
467

468
    loss: Optional[torch.FloatTensor] = None
469
    logits: torch.FloatTensor = None
470
    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
471
    attentions: Optional[Tuple[torch.FloatTensor]] = None
472

473

474
@dataclass
475
class QuestionAnsweringModelOutput(ModelOutput):
476
    """
477
    Base class for outputs of question answering models.
478

479
    Args:
480
        loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided):
481
            Total span extraction loss is the sum of a Cross-Entropy for the start and end positions.
482
        start_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length,)`):
483
            Span-start scores (before SoftMax).
484
        end_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length,)`):
485
            Span-end scores (before SoftMax).
486
        hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
487
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
488
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
489

490
            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
491
        attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
492
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
493
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
494

495
            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
496
            heads.
497
    """
498

499
    loss: Optional[torch.FloatTensor] = None
500
    start_logits: torch.FloatTensor = None
501
    end_logits: torch.FloatTensor = None
502
    hidden_states: Optional[Tuple[torch.FloatTensor]] = None
503
    attentions: Optional[Tuple[torch.FloatTensor]] = None
504

505

506
@dataclass
507
class Seq2SeqQuestionAnsweringModelOutput(ModelOutput):
508
    """
509
    Base class for outputs of sequence-to-sequence question answering models.
510

511
    Args:
512
        loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided):
513
            Total span extraction loss is the sum of a Cross-Entropy for the start and end positions.
514
        start_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length,)`):
515
            Span-start scores (before SoftMax).
516
        end_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length,)`):
517
            Span-end scores (before SoftMax).
518
        decoder_past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
519
            List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`,  with each tensor of shape
520
            :obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`).
521

522
            Contains pre-computed hidden-states (key and values in the attention blocks) of the decoder that can be
523
            used (see ``decoder_past_key_values`` input) to speed up sequential decoding.
524
        decoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
525
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
526
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
527

528
            Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.
529
        decoder_attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
530
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
531
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
532

533
            Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the
534
            self-attention heads.
535
        encoder_last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
536
            Sequence of hidden-states at the output of the last layer of the encoder of the model.
537
        encoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
538
            Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
539
            of shape :obj:`(batch_size, sequence_length, hidden_size)`.
540

541
            Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.
542
        encoder_attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
543
            Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
544
            :obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
545

546
            Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the
547
            self-attention heads.
548
    """
549

550
    loss: Optional[torch.FloatTensor] = None
551
    start_logits: torch.FloatTensor = None
552
    end_logits: torch.FloatTensor = None
553
    decoder_past_key_values: Optional[List[torch.FloatTensor]] = None
554
    decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
555
    decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
556
    encoder_last_hidden_state: Optional[torch.FloatTensor] = None
557
    encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
558
    encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
559
CSS-LM

Использование cookies