transformers
525 строк · 19.5 Кб
1# coding=utf-8
2# Copyright 2020 HuggingFace Inc. team.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16
17import unittest
18
19from transformers import FunnelConfig, FunnelTokenizer, is_torch_available
20from transformers.models.auto import get_values
21from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device
22
23from ...test_configuration_common import ConfigTester
24from ...test_modeling_common import ModelTesterMixin, ids_tensor
25from ...test_pipeline_mixin import PipelineTesterMixin
26
27
28if is_torch_available():
29import torch
30
31from transformers import (
32MODEL_FOR_PRETRAINING_MAPPING,
33FunnelBaseModel,
34FunnelForMaskedLM,
35FunnelForMultipleChoice,
36FunnelForPreTraining,
37FunnelForQuestionAnswering,
38FunnelForSequenceClassification,
39FunnelForTokenClassification,
40FunnelModel,
41)
42
43
44class FunnelModelTester:
45"""You can also import this e.g, from .test_modeling_funnel import FunnelModelTester"""
46
47def __init__(
48self,
49parent,
50batch_size=13,
51seq_length=7,
52is_training=True,
53use_input_mask=True,
54use_token_type_ids=True,
55use_labels=True,
56vocab_size=99,
57block_sizes=[1, 1, 2],
58num_decoder_layers=1,
59d_model=32,
60n_head=4,
61d_head=8,
62d_inner=37,
63hidden_act="gelu_new",
64hidden_dropout=0.1,
65attention_dropout=0.1,
66activation_dropout=0.0,
67max_position_embeddings=512,
68type_vocab_size=3,
69initializer_std=0.02, # Set to a smaller value, so we can keep the small error threshold (1e-5) in the test
70num_labels=3,
71num_choices=4,
72scope=None,
73base=False,
74):
75self.parent = parent
76self.batch_size = batch_size
77self.seq_length = seq_length
78self.is_training = is_training
79self.use_input_mask = use_input_mask
80self.use_token_type_ids = use_token_type_ids
81self.use_labels = use_labels
82self.vocab_size = vocab_size
83self.block_sizes = block_sizes
84self.num_decoder_layers = num_decoder_layers
85self.d_model = d_model
86self.n_head = n_head
87self.d_head = d_head
88self.d_inner = d_inner
89self.hidden_act = hidden_act
90self.hidden_dropout = hidden_dropout
91self.attention_dropout = attention_dropout
92self.activation_dropout = activation_dropout
93self.max_position_embeddings = max_position_embeddings
94self.type_vocab_size = type_vocab_size
95self.type_sequence_label_size = 2
96self.num_labels = num_labels
97self.num_choices = num_choices
98self.scope = scope
99self.initializer_std = initializer_std
100
101# Used in the tests to check the size of the first attention layer
102self.num_attention_heads = n_head
103# Used in the tests to check the size of the first hidden state
104self.hidden_size = self.d_model
105# Used in the tests to check the number of output hidden states/attentions
106self.num_hidden_layers = sum(self.block_sizes) + (0 if base else self.num_decoder_layers)
107# FunnelModel adds two hidden layers: input embeddings and the sum of the upsampled encoder hidden state with
108# the last hidden state of the first block (which is the first hidden state of the decoder).
109if not base:
110self.expected_num_hidden_layers = self.num_hidden_layers + 2
111
112def prepare_config_and_inputs(self):
113input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
114
115input_mask = None
116if self.use_input_mask:
117input_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
118
119token_type_ids = None
120if self.use_token_type_ids:
121token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
122
123sequence_labels = None
124token_labels = None
125choice_labels = None
126if self.use_labels:
127sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
128token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
129choice_labels = ids_tensor([self.batch_size], self.num_choices)
130fake_token_labels = ids_tensor([self.batch_size, self.seq_length], 1)
131
132config = self.get_config()
133
134return (
135config,
136input_ids,
137token_type_ids,
138input_mask,
139sequence_labels,
140token_labels,
141choice_labels,
142fake_token_labels,
143)
144
145def get_config(self):
146return FunnelConfig(
147vocab_size=self.vocab_size,
148block_sizes=self.block_sizes,
149num_decoder_layers=self.num_decoder_layers,
150d_model=self.d_model,
151n_head=self.n_head,
152d_head=self.d_head,
153d_inner=self.d_inner,
154hidden_act=self.hidden_act,
155hidden_dropout=self.hidden_dropout,
156attention_dropout=self.attention_dropout,
157activation_dropout=self.activation_dropout,
158max_position_embeddings=self.max_position_embeddings,
159type_vocab_size=self.type_vocab_size,
160initializer_std=self.initializer_std,
161)
162
163def create_and_check_model(
164self,
165config,
166input_ids,
167token_type_ids,
168input_mask,
169sequence_labels,
170token_labels,
171choice_labels,
172fake_token_labels,
173):
174model = FunnelModel(config=config)
175model.to(torch_device)
176model.eval()
177result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
178result = model(input_ids, token_type_ids=token_type_ids)
179result = model(input_ids)
180self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.d_model))
181
182model.config.truncate_seq = False
183result = model(input_ids)
184self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.d_model))
185
186model.config.separate_cls = False
187result = model(input_ids)
188self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.d_model))
189
190def create_and_check_base_model(
191self,
192config,
193input_ids,
194token_type_ids,
195input_mask,
196sequence_labels,
197token_labels,
198choice_labels,
199fake_token_labels,
200):
201model = FunnelBaseModel(config=config)
202model.to(torch_device)
203model.eval()
204result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
205result = model(input_ids, token_type_ids=token_type_ids)
206result = model(input_ids)
207self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, 2, self.d_model))
208
209model.config.truncate_seq = False
210result = model(input_ids)
211self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, 3, self.d_model))
212
213model.config.separate_cls = False
214result = model(input_ids)
215self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, 2, self.d_model))
216
217def create_and_check_for_pretraining(
218self,
219config,
220input_ids,
221token_type_ids,
222input_mask,
223sequence_labels,
224token_labels,
225choice_labels,
226fake_token_labels,
227):
228config.num_labels = self.num_labels
229model = FunnelForPreTraining(config=config)
230model.to(torch_device)
231model.eval()
232result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=fake_token_labels)
233self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length))
234
235def create_and_check_for_masked_lm(
236self,
237config,
238input_ids,
239token_type_ids,
240input_mask,
241sequence_labels,
242token_labels,
243choice_labels,
244fake_token_labels,
245):
246model = FunnelForMaskedLM(config=config)
247model.to(torch_device)
248model.eval()
249result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
250self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
251
252def create_and_check_for_sequence_classification(
253self,
254config,
255input_ids,
256token_type_ids,
257input_mask,
258sequence_labels,
259token_labels,
260choice_labels,
261fake_token_labels,
262):
263config.num_labels = self.num_labels
264model = FunnelForSequenceClassification(config)
265model.to(torch_device)
266model.eval()
267result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels)
268self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
269
270def create_and_check_for_multiple_choice(
271self,
272config,
273input_ids,
274token_type_ids,
275input_mask,
276sequence_labels,
277token_labels,
278choice_labels,
279fake_token_labels,
280):
281config.num_choices = self.num_choices
282model = FunnelForMultipleChoice(config=config)
283model.to(torch_device)
284model.eval()
285multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
286multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
287multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
288result = model(
289multiple_choice_inputs_ids,
290attention_mask=multiple_choice_input_mask,
291token_type_ids=multiple_choice_token_type_ids,
292labels=choice_labels,
293)
294self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_choices))
295
296def create_and_check_for_token_classification(
297self,
298config,
299input_ids,
300token_type_ids,
301input_mask,
302sequence_labels,
303token_labels,
304choice_labels,
305fake_token_labels,
306):
307config.num_labels = self.num_labels
308model = FunnelForTokenClassification(config=config)
309model.to(torch_device)
310model.eval()
311result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels)
312self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
313
314def create_and_check_for_question_answering(
315self,
316config,
317input_ids,
318token_type_ids,
319input_mask,
320sequence_labels,
321token_labels,
322choice_labels,
323fake_token_labels,
324):
325model = FunnelForQuestionAnswering(config=config)
326model.to(torch_device)
327model.eval()
328result = model(
329input_ids,
330attention_mask=input_mask,
331token_type_ids=token_type_ids,
332start_positions=sequence_labels,
333end_positions=sequence_labels,
334)
335self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
336self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
337
338def prepare_config_and_inputs_for_common(self):
339config_and_inputs = self.prepare_config_and_inputs()
340(
341config,
342input_ids,
343token_type_ids,
344input_mask,
345sequence_labels,
346token_labels,
347choice_labels,
348fake_token_labels,
349) = config_and_inputs
350inputs_dict = {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": input_mask}
351return config, inputs_dict
352
353
354@require_torch
355class FunnelModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
356test_head_masking = False
357test_pruning = False
358all_model_classes = (
359(
360FunnelModel,
361FunnelForMaskedLM,
362FunnelForPreTraining,
363FunnelForQuestionAnswering,
364FunnelForTokenClassification,
365)
366if is_torch_available()
367else ()
368)
369pipeline_model_mapping = (
370{
371"feature-extraction": (FunnelBaseModel, FunnelModel),
372"fill-mask": FunnelForMaskedLM,
373"question-answering": FunnelForQuestionAnswering,
374"text-classification": FunnelForSequenceClassification,
375"token-classification": FunnelForTokenClassification,
376"zero-shot": FunnelForSequenceClassification,
377}
378if is_torch_available()
379else {}
380)
381
382# special case for ForPreTraining model
383def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
384inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
385
386if return_labels:
387if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
388inputs_dict["labels"] = torch.zeros(
389(self.model_tester.batch_size, self.model_tester.seq_length), dtype=torch.long, device=torch_device
390)
391return inputs_dict
392
393def setUp(self):
394self.model_tester = FunnelModelTester(self)
395self.config_tester = ConfigTester(self, config_class=FunnelConfig)
396
397def test_config(self):
398self.config_tester.run_common_tests()
399
400def test_model(self):
401config_and_inputs = self.model_tester.prepare_config_and_inputs()
402self.model_tester.create_and_check_model(*config_and_inputs)
403
404def test_for_pretraining(self):
405config_and_inputs = self.model_tester.prepare_config_and_inputs()
406self.model_tester.create_and_check_for_pretraining(*config_and_inputs)
407
408def test_for_masked_lm(self):
409config_and_inputs = self.model_tester.prepare_config_and_inputs()
410self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)
411
412def test_for_token_classification(self):
413config_and_inputs = self.model_tester.prepare_config_and_inputs()
414self.model_tester.create_and_check_for_token_classification(*config_and_inputs)
415
416def test_for_question_answering(self):
417config_and_inputs = self.model_tester.prepare_config_and_inputs()
418self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
419
420# overwrite from test_modeling_common
421def _mock_init_weights(self, module):
422if hasattr(module, "weight") and module.weight is not None:
423module.weight.data.fill_(3)
424if hasattr(module, "bias") and module.bias is not None:
425module.bias.data.fill_(3)
426
427for param in ["r_w_bias", "r_r_bias", "r_kernel", "r_s_bias", "seg_embed"]:
428if hasattr(module, param) and getattr(module, param) is not None:
429weight = getattr(module, param)
430weight.data.fill_(3)
431
432
433@require_torch
434class FunnelBaseModelTest(ModelTesterMixin, unittest.TestCase):
435test_head_masking = False
436test_pruning = False
437all_model_classes = (
438(FunnelBaseModel, FunnelForMultipleChoice, FunnelForSequenceClassification) if is_torch_available() else ()
439)
440
441def setUp(self):
442self.model_tester = FunnelModelTester(self, base=True)
443self.config_tester = ConfigTester(self, config_class=FunnelConfig)
444
445def test_config(self):
446self.config_tester.run_common_tests()
447
448def test_base_model(self):
449config_and_inputs = self.model_tester.prepare_config_and_inputs()
450self.model_tester.create_and_check_base_model(*config_and_inputs)
451
452def test_for_sequence_classification(self):
453config_and_inputs = self.model_tester.prepare_config_and_inputs()
454self.model_tester.create_and_check_for_sequence_classification(*config_and_inputs)
455
456def test_for_multiple_choice(self):
457config_and_inputs = self.model_tester.prepare_config_and_inputs()
458self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
459
460# overwrite from test_modeling_common
461def test_training(self):
462config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
463config.return_dict = True
464
465for model_class in self.all_model_classes:
466if model_class.__name__ == "FunnelBaseModel":
467continue
468model = model_class(config)
469model.to(torch_device)
470model.train()
471inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
472loss = model(**inputs).loss
473loss.backward()
474
475# overwrite from test_modeling_common
476def _mock_init_weights(self, module):
477if hasattr(module, "weight") and module.weight is not None:
478module.weight.data.fill_(3)
479if hasattr(module, "bias") and module.bias is not None:
480module.bias.data.fill_(3)
481
482for param in ["r_w_bias", "r_r_bias", "r_kernel", "r_s_bias", "seg_embed"]:
483if hasattr(module, param) and getattr(module, param) is not None:
484weight = getattr(module, param)
485weight.data.fill_(3)
486
487
488@require_torch
489@require_sentencepiece
490@require_tokenizers
491class FunnelModelIntegrationTest(unittest.TestCase):
492def test_inference_tiny_model(self):
493batch_size = 13
494sequence_length = 7
495input_ids = torch.arange(0, batch_size * sequence_length).long().reshape(batch_size, sequence_length)
496lengths = [0, 1, 2, 3, 4, 5, 6, 4, 1, 3, 5, 0, 1]
497token_type_ids = torch.tensor([[2] + [0] * a + [1] * (sequence_length - a - 1) for a in lengths])
498
499model = FunnelModel.from_pretrained("sgugger/funnel-random-tiny")
500output = model(input_ids, token_type_ids=token_type_ids)[0].abs()
501
502expected_output_sum = torch.tensor(2344.8352)
503expected_output_mean = torch.tensor(0.8052)
504self.assertTrue(torch.allclose(output.sum(), expected_output_sum, atol=1e-4))
505self.assertTrue(torch.allclose(output.mean(), expected_output_mean, atol=1e-4))
506
507attention_mask = torch.tensor([[1] * 7, [1] * 4 + [0] * 3] * 6 + [[0, 1, 1, 0, 0, 1, 1]])
508output = model(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)[0].abs()
509
510expected_output_sum = torch.tensor(2343.8425)
511expected_output_mean = torch.tensor(0.8049)
512self.assertTrue(torch.allclose(output.sum(), expected_output_sum, atol=1e-4))
513self.assertTrue(torch.allclose(output.mean(), expected_output_mean, atol=1e-4))
514
515@slow
516def test_inference_model(self):
517tokenizer = FunnelTokenizer.from_pretrained("huggingface/funnel-small")
518model = FunnelModel.from_pretrained("huggingface/funnel-small")
519inputs = tokenizer("Hello! I am the Funnel Transformer model.", return_tensors="pt")
520output = model(**inputs)[0]
521
522expected_output_sum = torch.tensor(235.7246)
523expected_output_mean = torch.tensor(0.0256)
524self.assertTrue(torch.allclose(output.sum(), expected_output_sum, atol=1e-4))
525self.assertTrue(torch.allclose(output.mean(), expected_output_mean, atol=1e-4))
526