CSS-LM
558 строк · 34.5 Кб
1from dataclasses import dataclass
2from typing import List, Optional, Tuple
3
4import torch
5
6from .file_utils import ModelOutput
7
8
9@dataclass
10class BaseModelOutput(ModelOutput):
11"""
12Base class for model's outputs, with potential hidden states and attentions.
13
14Args:
15last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`):
16Sequence of hidden-states at the output of the last layer of the model.
17hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
18Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
19of shape :obj:`(batch_size, sequence_length, hidden_size)`.
20
21Hidden-states of the model at the output of each layer plus the initial embedding outputs.
22attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
23Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
24:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
25
26Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
27heads.
28"""
29
30last_hidden_state: torch.FloatTensor
31hidden_states: Optional[Tuple[torch.FloatTensor]] = None
32attentions: Optional[Tuple[torch.FloatTensor]] = None
33
34
35@dataclass
36class BaseModelOutputWithPooling(ModelOutput):
37"""
38Base class for model's outputs that also contains a pooling of the last hidden states.
39
40Args:
41last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`):
42Sequence of hidden-states at the output of the last layer of the model.
43pooler_output (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, hidden_size)`):
44Last layer hidden-state of the first token of the sequence (classification token)
45further processed by a Linear layer and a Tanh activation function. The Linear
46layer weights are trained from the next sentence prediction (classification)
47objective during pretraining.
48
49This output is usually *not* a good summary
50of the semantic content of the input, you're often better with averaging or pooling
51the sequence of hidden-states for the whole input sequence.
52hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
53Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
54of shape :obj:`(batch_size, sequence_length, hidden_size)`.
55
56Hidden-states of the model at the output of each layer plus the initial embedding outputs.
57attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
58Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
59:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
60
61Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
62heads.
63"""
64
65last_hidden_state: torch.FloatTensor
66pooler_output: torch.FloatTensor = None
67hidden_states: Optional[Tuple[torch.FloatTensor]] = None
68attentions: Optional[Tuple[torch.FloatTensor]] = None
69
70
71@dataclass
72class BaseModelOutputWithPast(ModelOutput):
73"""
74Base class for model's outputs that may also contain a past key/values (to speed up sequential decoding).
75
76Args:
77last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`):
78Sequence of hidden-states at the output of the last layer of the model.
79
80If `past_key_values` is used only the last hidden-state of the sequences of shape :obj:`(batch_size, 1, hidden_size)` is output.
81past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
82List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`, with each tensor of shape
83:obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`).
84
85Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
86``past_key_values`` input) to speed up sequential decoding.
87hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
88Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
89of shape :obj:`(batch_size, sequence_length, hidden_size)`.
90
91Hidden-states of the model at the output of each layer plus the initial embedding outputs.
92attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
93Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
94:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
95
96Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
97heads.
98"""
99
100last_hidden_state: torch.FloatTensor
101past_key_values: Optional[List[torch.FloatTensor]] = None
102hidden_states: Optional[Tuple[torch.FloatTensor]] = None
103attentions: Optional[Tuple[torch.FloatTensor]] = None
104
105
106@dataclass
107class Seq2SeqModelOutput(ModelOutput):
108"""
109Base class for model encoder's outputs that also contains : pre-computed hidden states that can speed up sequential
110decoding.
111
112Args:
113last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`):
114Sequence of hidden-states at the output of the last layer of the decoder of the model.
115
116If ``decoder_past_key_values`` is used only the last hidden-state of the sequences of shape :obj:`(batch_size, 1, hidden_size)` is output.
117decoder_past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
118List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`, with each tensor of shape
119:obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`).
120
121Contains pre-computed hidden-states (key and values in the attention blocks) of the decoder that can be
122used (see ``decoder_past_key_values`` input) to speed up sequential decoding.
123decoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
124Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
125of shape :obj:`(batch_size, sequence_length, hidden_size)`.
126
127Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.
128decoder_attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
129Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
130:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
131
132Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the
133self-attention heads.
134encoder_last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
135Sequence of hidden-states at the output of the last layer of the encoder of the model.
136encoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
137Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
138of shape :obj:`(batch_size, sequence_length, hidden_size)`.
139
140Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.
141encoder_attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
142Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
143:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
144
145Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the
146self-attention heads.
147"""
148
149last_hidden_state: torch.FloatTensor
150decoder_past_key_values: Optional[List[torch.FloatTensor]] = None
151decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
152decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
153encoder_last_hidden_state: Optional[torch.FloatTensor] = None
154encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
155encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
156
157
158@dataclass
159class CausalLMOutput(ModelOutput):
160"""
161Base class for causal language model (or autoregressive) outputs.
162
163Args:
164loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided):
165Language modeling loss (for next-token prediction).
166logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`):
167Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
168hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
169Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
170of shape :obj:`(batch_size, sequence_length, hidden_size)`.
171
172Hidden-states of the model at the output of each layer plus the initial embedding outputs.
173attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
174Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
175:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
176
177Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
178heads.
179"""
180
181loss: Optional[torch.FloatTensor]
182logits: torch.FloatTensor = None
183hidden_states: Optional[Tuple[torch.FloatTensor]] = None
184attentions: Optional[Tuple[torch.FloatTensor]] = None
185
186
187@dataclass
188class CausalLMOutputWithPast(ModelOutput):
189"""
190Base class for causal language model (or autoregressive) outputs.
191
192Args:
193loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided):
194Language modeling loss (for next-token prediction).
195logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`):
196Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
197past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
198List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`, with each tensor of shape
199:obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`).
200
201Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
202``past_key_values`` input) to speed up sequential decoding.
203hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
204Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
205of shape :obj:`(batch_size, sequence_length, hidden_size)`.
206
207Hidden-states of the model at the output of each layer plus the initial embedding outputs.
208attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
209Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
210:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
211
212Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
213heads.
214"""
215
216loss: Optional[torch.FloatTensor] = None
217logits: torch.FloatTensor = None
218past_key_values: Optional[List[torch.FloatTensor]] = None
219hidden_states: Optional[Tuple[torch.FloatTensor]] = None
220attentions: Optional[Tuple[torch.FloatTensor]] = None
221
222
223@dataclass
224class MaskedLMOutput(ModelOutput):
225"""
226Base class for masked language models outputs.
227
228Args:
229loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided):
230Masked languaged modeling (MLM) loss.
231logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`):
232Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
233hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
234Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
235of shape :obj:`(batch_size, sequence_length, hidden_size)`.
236
237Hidden-states of the model at the output of each layer plus the initial embedding outputs.
238attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
239Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
240:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
241
242Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
243heads.
244"""
245
246loss: Optional[torch.FloatTensor] = None
247logits: torch.FloatTensor = None
248hidden_states: Optional[Tuple[torch.FloatTensor]] = None
249attentions: Optional[Tuple[torch.FloatTensor]] = None
250
251
252@dataclass
253class Seq2SeqLMOutput(ModelOutput):
254"""
255Base class for sequence-to-sequence language models outputs.
256
257Args:
258loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided):
259Languaged modeling loss.
260logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.vocab_size)`):
261Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
262decoder_past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
263List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`, with each tensor of shape
264:obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`).
265
266Contains pre-computed hidden-states (key and values in the attention blocks) of the decoder that can be
267used (see ``decoder_past_key_values`` input) to speed up sequential decoding.
268decoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
269Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
270of shape :obj:`(batch_size, sequence_length, hidden_size)`.
271
272Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.
273decoder_attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
274Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
275:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
276
277Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the
278self-attention heads.
279encoder_last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
280Sequence of hidden-states at the output of the last layer of the encoder of the model.
281encoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
282Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
283of shape :obj:`(batch_size, sequence_length, hidden_size)`.
284
285Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.
286encoder_attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
287Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
288:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
289
290Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the
291self-attention heads.
292"""
293
294loss: Optional[torch.FloatTensor] = None
295logits: torch.FloatTensor = None
296decoder_past_key_values: Optional[List[torch.FloatTensor]] = None
297decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
298decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
299encoder_last_hidden_state: Optional[torch.FloatTensor] = None
300encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
301encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
302
303
304@dataclass
305class NextSentencePredictorOutput(ModelOutput):
306"""
307Base class for outputs of models predicting if two sentences are consecutive or not.
308
309Args:
310loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`next_sentence_label` is provided):
311Next sequence prediction (classification) loss.
312logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, 2)`):
313Prediction scores of the next sequence prediction (classification) head (scores of True/False continuation before SoftMax).
314hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
315Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
316of shape :obj:`(batch_size, sequence_length, hidden_size)`.
317
318Hidden-states of the model at the output of each layer plus the initial embedding outputs.
319attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
320Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
321:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
322
323Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
324heads.
325"""
326
327loss: Optional[torch.FloatTensor] = None
328logits: torch.FloatTensor = None
329hidden_states: Optional[Tuple[torch.FloatTensor]] = None
330attentions: Optional[Tuple[torch.FloatTensor]] = None
331
332
333@dataclass
334class SequenceClassifierOutput(ModelOutput):
335"""
336Base class for outputs of sentence classification models.
337
338Args:
339loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided):
340Classification (or regression if config.num_labels==1) loss.
341logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`):
342Classification (or regression if config.num_labels==1) scores (before SoftMax).
343hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
344Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
345of shape :obj:`(batch_size, sequence_length, hidden_size)`.
346
347Hidden-states of the model at the output of each layer plus the initial embedding outputs.
348attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
349Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
350:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
351
352Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
353heads.
354"""
355
356loss: Optional[torch.FloatTensor] = None
357logits: torch.FloatTensor = None
358hidden_states: Optional[Tuple[torch.FloatTensor]] = None
359attentions: Optional[Tuple[torch.FloatTensor]] = None
360
361
362@dataclass
363class Seq2SeqSequenceClassifierOutput(ModelOutput):
364"""
365Base class for outputs of sequence-to-sequence sentence classification models.
366
367Args:
368loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`label` is provided):
369Classification (or regression if config.num_labels==1) loss.
370logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`):
371Classification (or regression if config.num_labels==1) scores (before SoftMax).
372decoder_past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
373List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`, with each tensor of shape
374:obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`).
375
376Contains pre-computed hidden-states (key and values in the attention blocks) of the decoder that can be
377used (see ``decoder_past_key_values`` input) to speed up sequential decoding.
378decoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
379Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
380of shape :obj:`(batch_size, sequence_length, hidden_size)`.
381
382Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.
383decoder_attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
384Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
385:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
386
387Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the
388self-attention heads.
389encoder_last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
390Sequence of hidden-states at the output of the last layer of the encoder of the model.
391encoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
392Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
393of shape :obj:`(batch_size, sequence_length, hidden_size)`.
394
395Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.
396encoder_attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
397Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
398:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
399
400Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the
401self-attention heads.
402"""
403
404loss: Optional[torch.FloatTensor] = None
405logits: torch.FloatTensor = None
406decoder_past_key_values: Optional[List[torch.FloatTensor]] = None
407decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
408decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
409encoder_last_hidden_state: Optional[torch.FloatTensor] = None
410encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
411encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
412
413
414@dataclass
415class MultipleChoiceModelOutput(ModelOutput):
416"""
417Base class for outputs of multiple choice models.
418
419Args:
420loss (:obj:`torch.FloatTensor` of shape `(1,)`, `optional`, returned when :obj:`labels` is provided):
421Classification loss.
422logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, num_choices)`):
423`num_choices` is the second dimension of the input tensors. (see `input_ids` above).
424
425Classification scores (before SoftMax).
426hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
427Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
428of shape :obj:`(batch_size, sequence_length, hidden_size)`.
429
430Hidden-states of the model at the output of each layer plus the initial embedding outputs.
431attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
432Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
433:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
434
435Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
436heads.
437"""
438
439loss: Optional[torch.FloatTensor] = None
440logits: torch.FloatTensor = None
441hidden_states: Optional[Tuple[torch.FloatTensor]] = None
442attentions: Optional[Tuple[torch.FloatTensor]] = None
443
444
445@dataclass
446class TokenClassifierOutput(ModelOutput):
447"""
448Base class for outputs of token classification models.
449
450Args:
451loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when ``labels`` is provided) :
452Classification loss.
453logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, config.num_labels)`):
454Classification scores (before SoftMax).
455hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
456Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
457of shape :obj:`(batch_size, sequence_length, hidden_size)`.
458
459Hidden-states of the model at the output of each layer plus the initial embedding outputs.
460attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
461Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
462:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
463
464Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
465heads.
466"""
467
468loss: Optional[torch.FloatTensor] = None
469logits: torch.FloatTensor = None
470hidden_states: Optional[Tuple[torch.FloatTensor]] = None
471attentions: Optional[Tuple[torch.FloatTensor]] = None
472
473
474@dataclass
475class QuestionAnsweringModelOutput(ModelOutput):
476"""
477Base class for outputs of question answering models.
478
479Args:
480loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided):
481Total span extraction loss is the sum of a Cross-Entropy for the start and end positions.
482start_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length,)`):
483Span-start scores (before SoftMax).
484end_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length,)`):
485Span-end scores (before SoftMax).
486hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
487Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
488of shape :obj:`(batch_size, sequence_length, hidden_size)`.
489
490Hidden-states of the model at the output of each layer plus the initial embedding outputs.
491attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
492Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
493:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
494
495Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
496heads.
497"""
498
499loss: Optional[torch.FloatTensor] = None
500start_logits: torch.FloatTensor = None
501end_logits: torch.FloatTensor = None
502hidden_states: Optional[Tuple[torch.FloatTensor]] = None
503attentions: Optional[Tuple[torch.FloatTensor]] = None
504
505
506@dataclass
507class Seq2SeqQuestionAnsweringModelOutput(ModelOutput):
508"""
509Base class for outputs of sequence-to-sequence question answering models.
510
511Args:
512loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided):
513Total span extraction loss is the sum of a Cross-Entropy for the start and end positions.
514start_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length,)`):
515Span-start scores (before SoftMax).
516end_logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length,)`):
517Span-end scores (before SoftMax).
518decoder_past_key_values (:obj:`List[torch.FloatTensor]`, `optional`, returned when ``use_cache=True`` is passed or when ``config.use_cache=True``):
519List of :obj:`torch.FloatTensor` of length :obj:`config.n_layers`, with each tensor of shape
520:obj:`(2, batch_size, num_heads, sequence_length, embed_size_per_head)`).
521
522Contains pre-computed hidden-states (key and values in the attention blocks) of the decoder that can be
523used (see ``decoder_past_key_values`` input) to speed up sequential decoding.
524decoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
525Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
526of shape :obj:`(batch_size, sequence_length, hidden_size)`.
527
528Hidden-states of the decoder at the output of each layer plus the initial embedding outputs.
529decoder_attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
530Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
531:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
532
533Attentions weights of the decoder, after the attention softmax, used to compute the weighted average in the
534self-attention heads.
535encoder_last_hidden_state (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
536Sequence of hidden-states at the output of the last layer of the encoder of the model.
537encoder_hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``):
538Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer)
539of shape :obj:`(batch_size, sequence_length, hidden_size)`.
540
541Hidden-states of the encoder at the output of each layer plus the initial embedding outputs.
542encoder_attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``):
543Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape
544:obj:`(batch_size, num_heads, sequence_length, sequence_length)`.
545
546Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the
547self-attention heads.
548"""
549
550loss: Optional[torch.FloatTensor] = None
551start_logits: torch.FloatTensor = None
552end_logits: torch.FloatTensor = None
553decoder_past_key_values: Optional[List[torch.FloatTensor]] = None
554decoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
555decoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
556encoder_last_hidden_state: Optional[torch.FloatTensor] = None
557encoder_hidden_states: Optional[Tuple[torch.FloatTensor]] = None
558encoder_attentions: Optional[Tuple[torch.FloatTensor]] = None
559