transformers

Форк
0
/
test_modeling_tf_roberta.py 
702 строки · 27.4 Кб
1
# coding=utf-8
2
# Copyright 2020 The HuggingFace Team. All rights reserved.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16

17
from __future__ import annotations
18

19
import unittest
20

21
from transformers import RobertaConfig, is_tf_available
22
from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow
23

24
from ...test_configuration_common import ConfigTester
25
from ...test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask
26
from ...test_pipeline_mixin import PipelineTesterMixin
27

28

29
if is_tf_available():
30
    import numpy
31
    import tensorflow as tf
32

33
    from transformers.models.roberta.modeling_tf_roberta import (
34
        TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST,
35
        TFRobertaForCausalLM,
36
        TFRobertaForMaskedLM,
37
        TFRobertaForMultipleChoice,
38
        TFRobertaForQuestionAnswering,
39
        TFRobertaForSequenceClassification,
40
        TFRobertaForTokenClassification,
41
        TFRobertaModel,
42
    )
43

44

45
class TFRobertaModelTester:
46
    def __init__(
47
        self,
48
        parent,
49
    ):
50
        self.parent = parent
51
        self.batch_size = 13
52
        self.seq_length = 7
53
        self.is_training = True
54
        self.use_input_mask = True
55
        self.use_token_type_ids = True
56
        self.use_labels = True
57
        self.vocab_size = 99
58
        self.hidden_size = 32
59
        self.num_hidden_layers = 2
60
        self.num_attention_heads = 4
61
        self.intermediate_size = 37
62
        self.hidden_act = "gelu"
63
        self.hidden_dropout_prob = 0.1
64
        self.attention_probs_dropout_prob = 0.1
65
        self.max_position_embeddings = 512
66
        self.type_vocab_size = 16
67
        self.type_sequence_label_size = 2
68
        self.initializer_range = 0.02
69
        self.num_labels = 3
70
        self.num_choices = 4
71
        self.scope = None
72

73
    def prepare_config_and_inputs(self):
74
        input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
75

76
        input_mask = None
77
        if self.use_input_mask:
78
            input_mask = random_attention_mask([self.batch_size, self.seq_length])
79

80
        token_type_ids = None
81
        if self.use_token_type_ids:
82
            token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
83

84
        sequence_labels = None
85
        token_labels = None
86
        choice_labels = None
87
        if self.use_labels:
88
            sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
89
            token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
90
            choice_labels = ids_tensor([self.batch_size], self.num_choices)
91

92
        config = RobertaConfig(
93
            vocab_size=self.vocab_size,
94
            hidden_size=self.hidden_size,
95
            num_hidden_layers=self.num_hidden_layers,
96
            num_attention_heads=self.num_attention_heads,
97
            intermediate_size=self.intermediate_size,
98
            hidden_act=self.hidden_act,
99
            hidden_dropout_prob=self.hidden_dropout_prob,
100
            attention_probs_dropout_prob=self.attention_probs_dropout_prob,
101
            max_position_embeddings=self.max_position_embeddings,
102
            type_vocab_size=self.type_vocab_size,
103
            initializer_range=self.initializer_range,
104
        )
105

106
        return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
107

108
    def prepare_config_and_inputs_for_decoder(self):
109
        (
110
            config,
111
            input_ids,
112
            token_type_ids,
113
            input_mask,
114
            sequence_labels,
115
            token_labels,
116
            choice_labels,
117
        ) = self.prepare_config_and_inputs()
118

119
        config.is_decoder = True
120
        encoder_hidden_states = floats_tensor([self.batch_size, self.seq_length, self.hidden_size])
121
        encoder_attention_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
122

123
        return (
124
            config,
125
            input_ids,
126
            token_type_ids,
127
            input_mask,
128
            sequence_labels,
129
            token_labels,
130
            choice_labels,
131
            encoder_hidden_states,
132
            encoder_attention_mask,
133
        )
134

135
    def create_and_check_model(
136
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
137
    ):
138
        model = TFRobertaModel(config=config)
139
        inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
140
        result = model(inputs)
141

142
        inputs = [input_ids, input_mask]
143
        result = model(inputs)
144

145
        result = model(input_ids)
146

147
        self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
148

149
    def create_and_check_causal_lm_base_model(
150
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
151
    ):
152
        config.is_decoder = True
153

154
        model = TFRobertaModel(config=config)
155
        inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
156
        result = model(inputs)
157

158
        inputs = [input_ids, input_mask]
159
        result = model(inputs)
160

161
        result = model(input_ids)
162

163
        self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
164

165
    def create_and_check_model_as_decoder(
166
        self,
167
        config,
168
        input_ids,
169
        token_type_ids,
170
        input_mask,
171
        sequence_labels,
172
        token_labels,
173
        choice_labels,
174
        encoder_hidden_states,
175
        encoder_attention_mask,
176
    ):
177
        config.add_cross_attention = True
178

179
        model = TFRobertaModel(config=config)
180
        inputs = {
181
            "input_ids": input_ids,
182
            "attention_mask": input_mask,
183
            "token_type_ids": token_type_ids,
184
            "encoder_hidden_states": encoder_hidden_states,
185
            "encoder_attention_mask": encoder_attention_mask,
186
        }
187
        result = model(inputs)
188

189
        inputs = [input_ids, input_mask]
190
        result = model(inputs, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states)
191

192
        # Also check the case where encoder outputs are not passed
193
        result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
194

195
        self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
196

197
    def create_and_check_causal_lm_model(
198
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
199
    ):
200
        config.is_decoder = True
201

202
        model = TFRobertaForCausalLM(config=config)
203
        inputs = {
204
            "input_ids": input_ids,
205
            "attention_mask": input_mask,
206
            "token_type_ids": token_type_ids,
207
        }
208
        prediction_scores = model(inputs)["logits"]
209
        self.parent.assertListEqual(
210
            list(prediction_scores.numpy().shape), [self.batch_size, self.seq_length, self.vocab_size]
211
        )
212

213
    def create_and_check_causal_lm_model_as_decoder(
214
        self,
215
        config,
216
        input_ids,
217
        token_type_ids,
218
        input_mask,
219
        sequence_labels,
220
        token_labels,
221
        choice_labels,
222
        encoder_hidden_states,
223
        encoder_attention_mask,
224
    ):
225
        config.add_cross_attention = True
226

227
        model = TFRobertaForCausalLM(config=config)
228
        inputs = {
229
            "input_ids": input_ids,
230
            "attention_mask": input_mask,
231
            "token_type_ids": token_type_ids,
232
            "encoder_hidden_states": encoder_hidden_states,
233
            "encoder_attention_mask": encoder_attention_mask,
234
        }
235
        result = model(inputs)
236

237
        inputs = [input_ids, input_mask]
238
        result = model(inputs, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states)
239

240
        prediction_scores = result["logits"]
241
        self.parent.assertListEqual(
242
            list(prediction_scores.numpy().shape), [self.batch_size, self.seq_length, self.vocab_size]
243
        )
244

245
    def create_and_check_causal_lm_model_past(
246
        self,
247
        config,
248
        input_ids,
249
        token_type_ids,
250
        input_mask,
251
        sequence_labels,
252
        token_labels,
253
        choice_labels,
254
    ):
255
        config.is_decoder = True
256

257
        model = TFRobertaForCausalLM(config=config)
258

259
        # special to `RobertaEmbeddings` in `Roberta`:
260
        #   - its `padding_idx` and its effect on `position_ids`
261
        #     (TFRobertaEmbeddings.create_position_ids_from_input_ids)
262
        #   - `1` here is `TFRobertaEmbeddings.padding_idx`
263
        input_ids = tf.where(input_ids == 1, 2, input_ids)
264

265
        # first forward pass
266
        outputs = model(input_ids, use_cache=True)
267
        outputs_use_cache_conf = model(input_ids)
268
        outputs_no_past = model(input_ids, use_cache=False)
269

270
        self.parent.assertTrue(len(outputs) == len(outputs_use_cache_conf))
271
        self.parent.assertTrue(len(outputs) == len(outputs_no_past) + 1)
272

273
        past_key_values = outputs.past_key_values
274

275
        # create hypothetical next token and extent to next_input_ids
276
        next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)
277

278
        # append to next input_ids and attn_mask
279
        next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
280

281
        output_from_no_past = model(next_input_ids, output_hidden_states=True).hidden_states[0]
282
        output_from_past = model(
283
            next_tokens, past_key_values=past_key_values, output_hidden_states=True
284
        ).hidden_states[0]
285

286
        # select random slice
287
        random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
288
        output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx]
289
        output_from_past_slice = output_from_past[:, 0, random_slice_idx]
290

291
        # test that outputs are equal for slice
292
        tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-6)
293

294
    def create_and_check_causal_lm_model_past_with_attn_mask(
295
        self,
296
        config,
297
        input_ids,
298
        token_type_ids,
299
        input_mask,
300
        sequence_labels,
301
        token_labels,
302
        choice_labels,
303
    ):
304
        config.is_decoder = True
305

306
        model = TFRobertaForCausalLM(config=config)
307

308
        # special to `RobertaEmbeddings` in `Roberta`:
309
        #   - its `padding_idx` and its effect on `position_ids`
310
        #     (TFRobertaEmbeddings.create_position_ids_from_input_ids)
311
        #   - `1` here is `TFRobertaEmbeddings.padding_idx`
312
        # avoid `padding_idx` in the past
313
        input_ids = tf.where(input_ids == 1, 2, input_ids)
314

315
        # create attention mask
316
        half_seq_length = self.seq_length // 2
317
        attn_mask_begin = tf.ones((self.batch_size, half_seq_length), dtype=tf.int32)
318
        attn_mask_end = tf.zeros((self.batch_size, self.seq_length - half_seq_length), dtype=tf.int32)
319
        attn_mask = tf.concat([attn_mask_begin, attn_mask_end], axis=1)
320

321
        # first forward pass
322
        outputs = model(input_ids, attention_mask=attn_mask, use_cache=True)
323

324
        # create hypothetical next token and extent to next_input_ids
325
        next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)
326

327
        past_key_values = outputs.past_key_values
328

329
        # change a random masked slice from input_ids
330
        random_seq_idx_to_change = ids_tensor((1,), half_seq_length).numpy() + 1
331
        random_other_next_tokens = ids_tensor((self.batch_size, self.seq_length), config.vocab_size)
332
        vector_condition = tf.range(self.seq_length) == (self.seq_length - random_seq_idx_to_change)
333
        condition = tf.transpose(
334
            tf.broadcast_to(tf.expand_dims(vector_condition, -1), (self.seq_length, self.batch_size))
335
        )
336
        input_ids = tf.where(condition, random_other_next_tokens, input_ids)
337
        # avoid `padding_idx` in the past
338
        input_ids = tf.where(input_ids == 1, 2, input_ids)
339

340
        # append to next input_ids and
341
        next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
342
        attn_mask = tf.concat(
343
            [attn_mask, tf.ones((attn_mask.shape[0], 1), dtype=tf.int32)],
344
            axis=1,
345
        )
346

347
        output_from_no_past = model(
348
            next_input_ids,
349
            attention_mask=attn_mask,
350
            output_hidden_states=True,
351
        ).hidden_states[0]
352
        output_from_past = model(
353
            next_tokens, past_key_values=past_key_values, attention_mask=attn_mask, output_hidden_states=True
354
        ).hidden_states[0]
355

356
        # select random slice
357
        random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
358
        output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx]
359
        output_from_past_slice = output_from_past[:, 0, random_slice_idx]
360

361
        # test that outputs are equal for slice
362
        tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-6)
363

364
    def create_and_check_causal_lm_model_past_large_inputs(
365
        self,
366
        config,
367
        input_ids,
368
        token_type_ids,
369
        input_mask,
370
        sequence_labels,
371
        token_labels,
372
        choice_labels,
373
    ):
374
        config.is_decoder = True
375

376
        model = TFRobertaForCausalLM(config=config)
377

378
        # special to `RobertaEmbeddings` in `Roberta`:
379
        #   - its `padding_idx` and its effect on `position_ids`
380
        #     (TFRobertaEmbeddings.create_position_ids_from_input_ids)
381
        #   - `1` here is `TFRobertaEmbeddings.padding_idx`
382
        # avoid `padding_idx` in the past
383
        input_ids = tf.where(input_ids == 1, 2, input_ids)
384

385
        input_ids = input_ids[:1, :]
386
        input_mask = input_mask[:1, :]
387
        self.batch_size = 1
388

389
        # first forward pass
390
        outputs = model(input_ids, attention_mask=input_mask, use_cache=True)
391
        past_key_values = outputs.past_key_values
392

393
        # create hypothetical next token and extent to next_input_ids
394
        next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
395
        next_attn_mask = ids_tensor((self.batch_size, 3), 2)
396

397
        # append to next input_ids and
398
        next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
399
        next_attention_mask = tf.concat([input_mask, next_attn_mask], axis=-1)
400

401
        output_from_no_past = model(
402
            next_input_ids,
403
            attention_mask=next_attention_mask,
404
            output_hidden_states=True,
405
        ).hidden_states[0]
406
        output_from_past = model(
407
            next_tokens,
408
            attention_mask=next_attention_mask,
409
            past_key_values=past_key_values,
410
            output_hidden_states=True,
411
        ).hidden_states[0]
412

413
        self.parent.assertEqual(next_tokens.shape[1], output_from_past.shape[1])
414

415
        # select random slice
416
        random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
417
        output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx]
418
        output_from_past_slice = output_from_past[:, :, random_slice_idx]
419

420
        # test that outputs are equal for slice
421
        tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
422

423
    def create_and_check_decoder_model_past_large_inputs(
424
        self,
425
        config,
426
        input_ids,
427
        token_type_ids,
428
        input_mask,
429
        sequence_labels,
430
        token_labels,
431
        choice_labels,
432
        encoder_hidden_states,
433
        encoder_attention_mask,
434
    ):
435
        config.add_cross_attention = True
436

437
        model = TFRobertaForCausalLM(config=config)
438

439
        # special to `RobertaEmbeddings` in `Roberta`:
440
        #   - its `padding_idx` and its effect on `position_ids`
441
        #     (TFRobertaEmbeddings.create_position_ids_from_input_ids)
442
        #   - `1` here is `TFRobertaEmbeddings.padding_idx`
443
        # avoid `padding_idx` in the past
444
        input_ids = tf.where(input_ids == 1, 2, input_ids)
445

446
        input_ids = input_ids[:1, :]
447
        input_mask = input_mask[:1, :]
448
        encoder_hidden_states = encoder_hidden_states[:1, :, :]
449
        encoder_attention_mask = encoder_attention_mask[:1, :]
450
        self.batch_size = 1
451

452
        # first forward pass
453
        outputs = model(
454
            input_ids,
455
            attention_mask=input_mask,
456
            encoder_hidden_states=encoder_hidden_states,
457
            encoder_attention_mask=encoder_attention_mask,
458
            use_cache=True,
459
        )
460
        past_key_values = outputs.past_key_values
461

462
        # create hypothetical next token and extent to next_input_ids
463
        next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
464
        next_attn_mask = ids_tensor((self.batch_size, 3), 2)
465

466
        # append to next input_ids and
467
        next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
468
        next_attention_mask = tf.concat([input_mask, next_attn_mask], axis=-1)
469

470
        output_from_no_past = model(
471
            next_input_ids,
472
            attention_mask=next_attention_mask,
473
            encoder_hidden_states=encoder_hidden_states,
474
            encoder_attention_mask=encoder_attention_mask,
475
            output_hidden_states=True,
476
        ).hidden_states[0]
477
        output_from_past = model(
478
            next_tokens,
479
            attention_mask=next_attention_mask,
480
            encoder_hidden_states=encoder_hidden_states,
481
            encoder_attention_mask=encoder_attention_mask,
482
            past_key_values=past_key_values,
483
            output_hidden_states=True,
484
        ).hidden_states[0]
485

486
        self.parent.assertEqual(next_tokens.shape[1], output_from_past.shape[1])
487

488
        # select random slice
489
        random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
490
        output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx]
491
        output_from_past_slice = output_from_past[:, :, random_slice_idx]
492

493
        # test that outputs are equal for slice
494
        tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)
495

496
    def create_and_check_for_masked_lm(
497
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
498
    ):
499
        model = TFRobertaForMaskedLM(config=config)
500
        result = model([input_ids, input_mask, token_type_ids])
501
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
502

503
    def create_and_check_for_token_classification(
504
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
505
    ):
506
        config.num_labels = self.num_labels
507
        model = TFRobertaForTokenClassification(config=config)
508
        inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
509
        result = model(inputs)
510
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
511

512
    def create_and_check_for_question_answering(
513
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
514
    ):
515
        model = TFRobertaForQuestionAnswering(config=config)
516
        inputs = {"input_ids": input_ids, "attention_mask": input_mask, "token_type_ids": token_type_ids}
517
        result = model(inputs)
518
        self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
519
        self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
520

521
    def create_and_check_for_multiple_choice(
522
        self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
523
    ):
524
        config.num_choices = self.num_choices
525
        model = TFRobertaForMultipleChoice(config=config)
526
        multiple_choice_inputs_ids = tf.tile(tf.expand_dims(input_ids, 1), (1, self.num_choices, 1))
527
        multiple_choice_input_mask = tf.tile(tf.expand_dims(input_mask, 1), (1, self.num_choices, 1))
528
        multiple_choice_token_type_ids = tf.tile(tf.expand_dims(token_type_ids, 1), (1, self.num_choices, 1))
529
        inputs = {
530
            "input_ids": multiple_choice_inputs_ids,
531
            "attention_mask": multiple_choice_input_mask,
532
            "token_type_ids": multiple_choice_token_type_ids,
533
        }
534
        result = model(inputs)
535
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
536

537
    def prepare_config_and_inputs_for_common(self):
538
        config_and_inputs = self.prepare_config_and_inputs()
539
        (
540
            config,
541
            input_ids,
542
            token_type_ids,
543
            input_mask,
544
            sequence_labels,
545
            token_labels,
546
            choice_labels,
547
        ) = config_and_inputs
548
        inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
549
        return config, inputs_dict
550

551

552
@require_tf
553
class TFRobertaModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
554
    all_model_classes = (
555
        (
556
            TFRobertaModel,
557
            TFRobertaForCausalLM,
558
            TFRobertaForMaskedLM,
559
            TFRobertaForSequenceClassification,
560
            TFRobertaForTokenClassification,
561
            TFRobertaForQuestionAnswering,
562
        )
563
        if is_tf_available()
564
        else ()
565
    )
566
    pipeline_model_mapping = (
567
        {
568
            "feature-extraction": TFRobertaModel,
569
            "fill-mask": TFRobertaForMaskedLM,
570
            "question-answering": TFRobertaForQuestionAnswering,
571
            "text-classification": TFRobertaForSequenceClassification,
572
            "text-generation": TFRobertaForCausalLM,
573
            "token-classification": TFRobertaForTokenClassification,
574
            "zero-shot": TFRobertaForSequenceClassification,
575
        }
576
        if is_tf_available()
577
        else {}
578
    )
579
    test_head_masking = False
580
    test_onnx = False
581

582
    def setUp(self):
583
        self.model_tester = TFRobertaModelTester(self)
584
        self.config_tester = ConfigTester(self, config_class=RobertaConfig, hidden_size=37)
585

586
    def test_config(self):
587
        self.config_tester.run_common_tests()
588

589
    def test_model(self):
590
        """Test the base model"""
591
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
592
        self.model_tester.create_and_check_model(*config_and_inputs)
593

594
    def test_causal_lm_base_model(self):
595
        """Test the base model of the causal LM model
596

597
        is_deocder=True, no cross_attention, no encoder outputs
598
        """
599
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
600
        self.model_tester.create_and_check_causal_lm_base_model(*config_and_inputs)
601

602
    def test_model_as_decoder(self):
603
        """Test the base model as a decoder (of an encoder-decoder architecture)
604

605
        is_deocder=True + cross_attention + pass encoder outputs
606
        """
607
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
608
        self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)
609

610
    def test_for_masked_lm(self):
611
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
612
        self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)
613

614
    def test_for_causal_lm(self):
615
        """Test the causal LM model"""
616
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
617
        self.model_tester.create_and_check_causal_lm_model(*config_and_inputs)
618

619
    def test_causal_lm_model_as_decoder(self):
620
        """Test the causal LM model as a decoder"""
621
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
622
        self.model_tester.create_and_check_causal_lm_model_as_decoder(*config_and_inputs)
623

624
    def test_causal_lm_model_past(self):
625
        """Test causal LM model with `past_key_values`"""
626
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
627
        self.model_tester.create_and_check_causal_lm_model_past(*config_and_inputs)
628

629
    def test_causal_lm_model_past_with_attn_mask(self):
630
        """Test the causal LM model with `past_key_values` and `attention_mask`"""
631
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
632
        self.model_tester.create_and_check_causal_lm_model_past_with_attn_mask(*config_and_inputs)
633

634
    def test_causal_lm_model_past_with_large_inputs(self):
635
        """Test the causal LM model with `past_key_values` and a longer decoder sequence length"""
636
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
637
        self.model_tester.create_and_check_causal_lm_model_past_large_inputs(*config_and_inputs)
638

639
    def test_decoder_model_past_with_large_inputs(self):
640
        """Similar to `test_causal_lm_model_past_with_large_inputs` but with cross-attention"""
641
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
642
        self.model_tester.create_and_check_decoder_model_past_large_inputs(*config_and_inputs)
643

644
    def test_for_token_classification(self):
645
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
646
        self.model_tester.create_and_check_for_token_classification(*config_and_inputs)
647

648
    def test_for_question_answering(self):
649
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
650
        self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
651

652
    def test_for_multiple_choice(self):
653
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
654
        self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
655

656
    @slow
657
    def test_model_from_pretrained(self):
658
        for model_name in TF_ROBERTA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
659
            model = TFRobertaModel.from_pretrained(model_name)
660
            self.assertIsNotNone(model)
661

662

663
@require_tf
664
@require_sentencepiece
665
@require_tokenizers
666
class TFRobertaModelIntegrationTest(unittest.TestCase):
667
    @slow
668
    def test_inference_masked_lm(self):
669
        model = TFRobertaForMaskedLM.from_pretrained("FacebookAI/roberta-base")
670

671
        input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
672
        output = model(input_ids)[0]
673
        expected_shape = [1, 11, 50265]
674
        self.assertEqual(list(output.numpy().shape), expected_shape)
675
        # compare the actual values for a slice.
676
        expected_slice = tf.constant(
677
            [[[33.8802, -4.3103, 22.7761], [4.6539, -2.8098, 13.6253], [1.8228, -3.6898, 8.8600]]]
678
        )
679
        self.assertTrue(numpy.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-4))
680

681
    @slow
682
    def test_inference_no_head(self):
683
        model = TFRobertaModel.from_pretrained("FacebookAI/roberta-base")
684

685
        input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
686
        output = model(input_ids)[0]
687
        # compare the actual values for a slice.
688
        expected_slice = tf.constant(
689
            [[[-0.0231, 0.0782, 0.0074], [-0.1854, 0.0540, -0.0175], [0.0548, 0.0799, 0.1687]]]
690
        )
691
        self.assertTrue(numpy.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-4))
692

693
    @slow
694
    def test_inference_classification_head(self):
695
        model = TFRobertaForSequenceClassification.from_pretrained("FacebookAI/roberta-large-mnli")
696

697
        input_ids = tf.constant([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
698
        output = model(input_ids)[0]
699
        expected_shape = [1, 3]
700
        self.assertEqual(list(output.numpy().shape), expected_shape)
701
        expected_tensor = tf.constant([[-0.9469, 0.3913, 0.5118]])
702
        self.assertTrue(numpy.allclose(output.numpy(), expected_tensor.numpy(), atol=1e-4))
703

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.