CSS-LM
147 строк · 5.9 Кб
1# coding=utf-8
2# Copyright 2019 Facebook AI Research and the HuggingFace Inc. team.
3# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved.
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16""" TF 2.0 XLM-RoBERTa model. """
17
18
19import logging20
21from .configuration_xlm_roberta import XLMRobertaConfig22from .file_utils import add_start_docstrings23from .modeling_tf_roberta import (24TFRobertaForMaskedLM,25TFRobertaForMultipleChoice,26TFRobertaForQuestionAnswering,27TFRobertaForSequenceClassification,28TFRobertaForTokenClassification,29TFRobertaModel,30)
31
32
33logger = logging.getLogger(__name__)34
35TF_XLM_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST = [36# See all XLM-RoBERTa models at https://huggingface.co/models?filter=xlm-roberta37]
38
39
40XLM_ROBERTA_START_DOCSTRING = r"""41
42.. note::
43
44TF 2.0 models accepts two formats as inputs:
45
46- having all inputs as keyword arguments (like PyTorch models), or
47- having all inputs as a list, tuple or dict in the first positional arguments.
48
49This second option is useful when using :obj:`tf.keras.Model.fit()` method which currently requires having
50all the tensors in the first argument of the model call function: :obj:`model(inputs)`.
51
52If you choose this second option, there are three possibilities you can use to gather all the input Tensors
53in the first positional argument :
54
55- a single Tensor with input_ids only and nothing else: :obj:`model(inputs_ids)`
56- a list of varying length with one or several input Tensors IN THE ORDER given in the docstring:
57:obj:`model([input_ids, attention_mask])` or :obj:`model([input_ids, attention_mask, token_type_ids])`
58- a dictionary with one or several input Tensors associated to the input names given in the docstring:
59:obj:`model({'input_ids': input_ids, 'token_type_ids': token_type_ids})`
60
61Parameters:
62config (:class:`~transformers.XLMRobertaConfig`): Model configuration class with all the parameters of the
63model. Initializing with a config file does not load the weights associated with the model, only the configuration.
64Check out the :meth:`~transformers.PreTrainedModel.from_pretrained` method to load the model weights.
65output_attentions (:obj:`bool`, `optional`, defaults to :obj:`None`):
66If set to ``True``, the attentions tensors of all attention layers are returned. See ``attentions`` under returned tensors for more detail.
67"""
68
69
70@add_start_docstrings(71"The bare XLM-RoBERTa Model transformer outputting raw hidden-states without any specific head on top.",72XLM_ROBERTA_START_DOCSTRING,73)
74class TFXLMRobertaModel(TFRobertaModel):75"""76This class overrides :class:`~transformers.TFRobertaModel`. Please check the
77superclass for the appropriate documentation alongside usage examples.
78"""
79
80config_class = XLMRobertaConfig81
82
83@add_start_docstrings(84"""XLM-RoBERTa Model with a `language modeling` head on top. """, XLM_ROBERTA_START_DOCSTRING,85)
86class TFXLMRobertaForMaskedLM(TFRobertaForMaskedLM):87"""88This class overrides :class:`~transformers.TFRobertaForMaskedLM`. Please check the
89superclass for the appropriate documentation alongside usage examples.
90"""
91
92config_class = XLMRobertaConfig93
94
95@add_start_docstrings(96"""XLM-RoBERTa Model transformer with a sequence classification/regression head on top (a linear layer97on top of the pooled output) e.g. for GLUE tasks. """,98XLM_ROBERTA_START_DOCSTRING,99)
100class TFXLMRobertaForSequenceClassification(TFRobertaForSequenceClassification):101"""102This class overrides :class:`~transformers.TFRobertaForSequenceClassification`. Please check the
103superclass for the appropriate documentation alongside usage examples.
104"""
105
106config_class = XLMRobertaConfig107
108
109@add_start_docstrings(110"""XLM-RoBERTa Model with a token classification head on top (a linear layer on top of111the hidden-states output) e.g. for Named-Entity-Recognition (NER) tasks. """,112XLM_ROBERTA_START_DOCSTRING,113)
114class TFXLMRobertaForTokenClassification(TFRobertaForTokenClassification):115"""116This class overrides :class:`~transformers.TFRobertaForTokenClassification`. Please check the
117superclass for the appropriate documentation alongside usage examples.
118"""
119
120config_class = XLMRobertaConfig121
122
123@add_start_docstrings(124"""XLM-RoBERTa Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear layers on top of the hidden-states output to compute `span start logits` and `span end logits`). """,125XLM_ROBERTA_START_DOCSTRING,126)
127class TFXLMRobertaForQuestionAnswering(TFRobertaForQuestionAnswering):128"""129This class overrides :class:`~transformers.TFRobertaForQuestionAnsweringSimple`. Please check the
130superclass for the appropriate documentation alongside usage examples.
131"""
132
133config_class = XLMRobertaConfig134
135
136@add_start_docstrings(137"""Roberta Model with a multiple choice classification head on top (a linear layer on top of138the pooled output and a softmax) e.g. for RocStories/SWAG tasks. """,139XLM_ROBERTA_START_DOCSTRING,140)
141class TFXLMRobertaForMultipleChoice(TFRobertaForMultipleChoice):142"""143This class overrides :class:`~transformers.TFRobertaForMultipleChoice`. Please check the
144superclass for the appropriate documentation alongside usage examples.
145"""
146
147config_class = XLMRobertaConfig148