transformers
861 строка · 37.1 Кб
1# coding=utf-8
2# Copyright 2020 The HuggingFace Team. All rights reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16
17import datetime
18import gc
19import math
20import unittest
21
22from transformers import GPT2Config, is_torch_available
23from transformers.testing_utils import backend_empty_cache, require_torch, slow, torch_device
24
25from ...generation.test_utils import GenerationTesterMixin
26from ...test_configuration_common import ConfigTester
27from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, random_attention_mask
28from ...test_pipeline_mixin import PipelineTesterMixin
29
30
31if is_torch_available():
32import torch
33
34from transformers import (
35GPT2_PRETRAINED_MODEL_ARCHIVE_LIST,
36GPT2DoubleHeadsModel,
37GPT2ForQuestionAnswering,
38GPT2ForSequenceClassification,
39GPT2ForTokenClassification,
40GPT2LMHeadModel,
41GPT2Model,
42GPT2Tokenizer,
43)
44
45
46class GPT2ModelTester:
47def __init__(
48self,
49parent,
50batch_size=14,
51seq_length=7,
52is_training=True,
53use_token_type_ids=True,
54use_input_mask=True,
55use_labels=True,
56use_mc_token_ids=True,
57vocab_size=99,
58hidden_size=32,
59num_hidden_layers=2,
60num_attention_heads=4,
61intermediate_size=37,
62hidden_act="gelu",
63hidden_dropout_prob=0.1,
64attention_probs_dropout_prob=0.1,
65max_position_embeddings=512,
66type_vocab_size=16,
67type_sequence_label_size=2,
68initializer_range=0.02,
69num_labels=3,
70num_choices=4,
71scope=None,
72):
73self.parent = parent
74self.batch_size = batch_size
75self.seq_length = seq_length
76self.is_training = is_training
77self.use_token_type_ids = use_token_type_ids
78self.use_input_mask = use_input_mask
79self.use_labels = use_labels
80self.use_mc_token_ids = use_mc_token_ids
81self.vocab_size = vocab_size
82self.hidden_size = hidden_size
83self.num_hidden_layers = num_hidden_layers
84self.num_attention_heads = num_attention_heads
85self.intermediate_size = intermediate_size
86self.hidden_act = hidden_act
87self.hidden_dropout_prob = hidden_dropout_prob
88self.attention_probs_dropout_prob = attention_probs_dropout_prob
89self.max_position_embeddings = max_position_embeddings
90self.type_vocab_size = type_vocab_size
91self.type_sequence_label_size = type_sequence_label_size
92self.initializer_range = initializer_range
93self.num_labels = num_labels
94self.num_choices = num_choices
95self.scope = None
96self.bos_token_id = vocab_size - 1
97self.eos_token_id = vocab_size - 1
98self.pad_token_id = vocab_size - 1
99
100def get_large_model_config(self):
101return GPT2Config.from_pretrained("openai-community/gpt2")
102
103def prepare_config_and_inputs(
104self, gradient_checkpointing=False, scale_attn_by_inverse_layer_idx=False, reorder_and_upcast_attn=False
105):
106input_ids = ids_tensor([self.batch_size, self.seq_length], self.vocab_size)
107
108input_mask = None
109if self.use_input_mask:
110input_mask = random_attention_mask([self.batch_size, self.seq_length])
111
112token_type_ids = None
113if self.use_token_type_ids:
114token_type_ids = ids_tensor([self.batch_size, self.seq_length], self.type_vocab_size)
115
116mc_token_ids = None
117if self.use_mc_token_ids:
118mc_token_ids = ids_tensor([self.batch_size, self.num_choices], self.seq_length)
119
120sequence_labels = None
121token_labels = None
122choice_labels = None
123if self.use_labels:
124sequence_labels = ids_tensor([self.batch_size], self.type_sequence_label_size)
125token_labels = ids_tensor([self.batch_size, self.seq_length], self.num_labels)
126choice_labels = ids_tensor([self.batch_size], self.num_choices)
127
128config = self.get_config(
129gradient_checkpointing=gradient_checkpointing,
130scale_attn_by_inverse_layer_idx=scale_attn_by_inverse_layer_idx,
131reorder_and_upcast_attn=reorder_and_upcast_attn,
132)
133
134head_mask = ids_tensor([self.num_hidden_layers, self.num_attention_heads], 2)
135
136return (
137config,
138input_ids,
139input_mask,
140head_mask,
141token_type_ids,
142mc_token_ids,
143sequence_labels,
144token_labels,
145choice_labels,
146)
147
148def get_config(
149self, gradient_checkpointing=False, scale_attn_by_inverse_layer_idx=False, reorder_and_upcast_attn=False
150):
151return GPT2Config(
152vocab_size=self.vocab_size,
153n_embd=self.hidden_size,
154n_layer=self.num_hidden_layers,
155n_head=self.num_attention_heads,
156n_inner=self.intermediate_size,
157activation_function=self.hidden_act,
158resid_pdrop=self.hidden_dropout_prob,
159attn_pdrop=self.attention_probs_dropout_prob,
160n_positions=self.max_position_embeddings,
161type_vocab_size=self.type_vocab_size,
162initializer_range=self.initializer_range,
163use_cache=True,
164bos_token_id=self.bos_token_id,
165eos_token_id=self.eos_token_id,
166pad_token_id=self.pad_token_id,
167gradient_checkpointing=gradient_checkpointing,
168scale_attn_by_inverse_layer_idx=scale_attn_by_inverse_layer_idx,
169reorder_and_upcast_attn=reorder_and_upcast_attn,
170)
171
172def get_pipeline_config(self):
173config = self.get_config()
174config.vocab_size = 300
175return config
176
177def prepare_config_and_inputs_for_decoder(self):
178(
179config,
180input_ids,
181input_mask,
182head_mask,
183token_type_ids,
184mc_token_ids,
185sequence_labels,
186token_labels,
187choice_labels,
188) = self.prepare_config_and_inputs()
189
190encoder_hidden_states = floats_tensor([self.batch_size, self.seq_length, self.hidden_size])
191encoder_attention_mask = ids_tensor([self.batch_size, self.seq_length], vocab_size=2)
192
193return (
194config,
195input_ids,
196input_mask,
197head_mask,
198token_type_ids,
199sequence_labels,
200token_labels,
201choice_labels,
202encoder_hidden_states,
203encoder_attention_mask,
204)
205
206def create_and_check_gpt2_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
207model = GPT2Model(config=config)
208model.to(torch_device)
209model.eval()
210
211result = model(input_ids, token_type_ids=token_type_ids, head_mask=head_mask)
212result = model(input_ids, token_type_ids=token_type_ids)
213result = model(input_ids)
214
215self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
216self.parent.assertEqual(len(result.past_key_values), config.n_layer)
217
218def create_and_check_gpt2_model_past(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
219model = GPT2Model(config=config)
220model.to(torch_device)
221model.eval()
222
223# first forward pass
224outputs = model(input_ids, token_type_ids=token_type_ids, use_cache=True)
225outputs_use_cache_conf = model(input_ids, token_type_ids=token_type_ids)
226outputs_no_past = model(input_ids, token_type_ids=token_type_ids, use_cache=False)
227
228self.parent.assertTrue(len(outputs) == len(outputs_use_cache_conf))
229self.parent.assertTrue(len(outputs) == len(outputs_no_past) + 1)
230
231output, past = outputs.to_tuple()
232
233# create hypothetical next token and extent to next_input_ids
234next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)
235next_token_types = ids_tensor([self.batch_size, 1], self.type_vocab_size)
236
237# append to next input_ids and token_type_ids
238next_input_ids = torch.cat([input_ids, next_tokens], dim=-1)
239next_token_type_ids = torch.cat([token_type_ids, next_token_types], dim=-1)
240
241output_from_no_past = model(next_input_ids, token_type_ids=next_token_type_ids)["last_hidden_state"]
242output_from_past = model(next_tokens, token_type_ids=next_token_types, past_key_values=past)[
243"last_hidden_state"
244]
245
246# select random slice
247random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
248output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx].detach()
249output_from_past_slice = output_from_past[:, 0, random_slice_idx].detach()
250
251# test that outputs are equal for slice
252self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3))
253
254def create_and_check_gpt2_model_attention_mask_past(
255self, config, input_ids, input_mask, head_mask, token_type_ids, *args
256):
257model = GPT2Model(config=config)
258model.to(torch_device)
259model.eval()
260
261# create attention mask
262attn_mask = torch.ones(input_ids.shape, dtype=torch.long, device=torch_device)
263half_seq_length = self.seq_length // 2
264attn_mask[:, half_seq_length:] = 0
265
266# first forward pass
267output, past = model(input_ids, attention_mask=attn_mask).to_tuple()
268
269# create hypothetical next token and extent to next_input_ids
270next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)
271
272# change a random masked slice from input_ids
273random_seq_idx_to_change = ids_tensor((1,), half_seq_length).item() + 1
274random_other_next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size).squeeze(-1)
275input_ids[:, -random_seq_idx_to_change] = random_other_next_tokens
276
277# append to next input_ids and attn_mask
278next_input_ids = torch.cat([input_ids, next_tokens], dim=-1)
279attn_mask = torch.cat(
280[attn_mask, torch.ones((attn_mask.shape[0], 1), dtype=torch.long, device=torch_device)],
281dim=1,
282)
283
284# get two different outputs
285output_from_no_past = model(next_input_ids, attention_mask=attn_mask)["last_hidden_state"]
286output_from_past = model(next_tokens, past_key_values=past, attention_mask=attn_mask)["last_hidden_state"]
287
288# select random slice
289random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
290output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx].detach()
291output_from_past_slice = output_from_past[:, 0, random_slice_idx].detach()
292
293# test that outputs are equal for slice
294self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3))
295
296def create_and_check_gpt2_model_past_large_inputs(
297self, config, input_ids, input_mask, head_mask, token_type_ids, *args
298):
299model = GPT2Model(config=config)
300model.to(torch_device)
301model.eval()
302
303# first forward pass
304outputs = model(input_ids, token_type_ids=token_type_ids, attention_mask=input_mask, use_cache=True)
305
306output, past = outputs.to_tuple()
307
308# create hypothetical next token and extent to next_input_ids
309next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
310next_token_types = ids_tensor([self.batch_size, 3], self.type_vocab_size)
311next_mask = ids_tensor((self.batch_size, 3), vocab_size=2)
312
313# append to next input_ids and token_type_ids
314next_input_ids = torch.cat([input_ids, next_tokens], dim=-1)
315next_token_type_ids = torch.cat([token_type_ids, next_token_types], dim=-1)
316next_attention_mask = torch.cat([input_mask, next_mask], dim=-1)
317
318output_from_no_past = model(
319next_input_ids, token_type_ids=next_token_type_ids, attention_mask=next_attention_mask
320)["last_hidden_state"]
321output_from_past = model(
322next_tokens, token_type_ids=next_token_types, attention_mask=next_attention_mask, past_key_values=past
323)["last_hidden_state"]
324self.parent.assertTrue(output_from_past.shape[1] == next_tokens.shape[1])
325
326# select random slice
327random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
328output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx].detach()
329output_from_past_slice = output_from_past[:, :, random_slice_idx].detach()
330
331# test that outputs are equal for slice
332self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3))
333
334def create_and_check_lm_head_model(self, config, input_ids, input_mask, head_mask, token_type_ids, *args):
335model = GPT2LMHeadModel(config)
336model.to(torch_device)
337model.eval()
338
339result = model(input_ids, token_type_ids=token_type_ids, labels=input_ids)
340self.parent.assertEqual(result.loss.shape, ())
341self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
342
343def create_and_check_forward_and_backwards(
344self, config, input_ids, input_mask, head_mask, token_type_ids, *args, gradient_checkpointing=False
345):
346model = GPT2LMHeadModel(config)
347model.to(torch_device)
348if gradient_checkpointing:
349model.gradient_checkpointing_enable()
350
351result = model(input_ids, token_type_ids=token_type_ids, labels=input_ids)
352self.parent.assertEqual(result.loss.shape, ())
353self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
354result.loss.backward()
355
356def create_and_check_double_lm_head_model(
357self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, *args
358):
359model = GPT2DoubleHeadsModel(config)
360model.to(torch_device)
361model.eval()
362
363multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
364multiple_choice_input_mask = input_mask.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
365multiple_choice_token_type_ids = token_type_ids.unsqueeze(1).expand(-1, self.num_choices, -1).contiguous()
366
367inputs = {
368"input_ids": multiple_choice_inputs_ids,
369"mc_token_ids": mc_token_ids,
370"attention_mask": multiple_choice_input_mask,
371"token_type_ids": multiple_choice_token_type_ids,
372"labels": multiple_choice_inputs_ids,
373}
374
375result = model(**inputs)
376self.parent.assertEqual(result.loss.shape, ())
377self.parent.assertEqual(
378result.logits.shape, (self.batch_size, self.num_choices, self.seq_length, self.vocab_size)
379)
380self.parent.assertEqual(result.mc_logits.shape, (self.batch_size, self.num_choices))
381
382def create_and_check_gpt2_for_question_answering(
383self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, *args
384):
385config.num_labels = self.num_labels
386model = GPT2ForQuestionAnswering(config)
387model.to(torch_device)
388model.eval()
389result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
390self.parent.assertEqual(result.start_logits.shape, (self.batch_size, self.seq_length))
391self.parent.assertEqual(result.end_logits.shape, (self.batch_size, self.seq_length))
392
393def create_and_check_gpt2_for_sequence_classification(
394self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, *args
395):
396config.num_labels = self.num_labels
397model = GPT2ForSequenceClassification(config)
398model.to(torch_device)
399model.eval()
400result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels)
401self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
402
403def create_and_check_gpt2_for_token_classification(
404self, config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, *args
405):
406config.num_labels = self.num_labels
407model = GPT2ForTokenClassification(config)
408model.to(torch_device)
409model.eval()
410result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids)
411self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.num_labels))
412
413def create_and_check_gpt2_weight_initialization(self, config, *args):
414model = GPT2Model(config)
415model_std = model.config.initializer_range / math.sqrt(2 * model.config.n_layer)
416for key in model.state_dict().keys():
417if "c_proj" in key and "weight" in key:
418self.parent.assertLessEqual(abs(torch.std(model.state_dict()[key]) - model_std), 0.001)
419self.parent.assertLessEqual(abs(torch.mean(model.state_dict()[key]) - 0.0), 0.01)
420
421def prepare_config_and_inputs_for_common(self):
422config_and_inputs = self.prepare_config_and_inputs()
423
424(
425config,
426input_ids,
427input_mask,
428head_mask,
429token_type_ids,
430mc_token_ids,
431sequence_labels,
432token_labels,
433choice_labels,
434) = config_and_inputs
435
436inputs_dict = {
437"input_ids": input_ids,
438"token_type_ids": token_type_ids,
439"head_mask": head_mask,
440}
441
442return config, inputs_dict
443
444
445@require_torch
446class GPT2ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, unittest.TestCase):
447all_model_classes = (
448(
449GPT2Model,
450GPT2LMHeadModel,
451GPT2DoubleHeadsModel,
452GPT2ForQuestionAnswering,
453GPT2ForSequenceClassification,
454GPT2ForTokenClassification,
455)
456if is_torch_available()
457else ()
458)
459all_generative_model_classes = (GPT2LMHeadModel, GPT2DoubleHeadsModel) if is_torch_available() else ()
460pipeline_model_mapping = (
461{
462"feature-extraction": GPT2Model,
463"question-answering": GPT2ForQuestionAnswering,
464"text-classification": GPT2ForSequenceClassification,
465"text-generation": GPT2LMHeadModel,
466"token-classification": GPT2ForTokenClassification,
467"zero-shot": GPT2ForSequenceClassification,
468}
469if is_torch_available()
470else {}
471)
472all_parallelizable_model_classes = (GPT2LMHeadModel, GPT2DoubleHeadsModel) if is_torch_available() else ()
473fx_compatible = True
474test_missing_keys = False
475test_model_parallel = True
476
477# special case for DoubleHeads model
478def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
479inputs_dict = super()._prepare_for_class(inputs_dict, model_class, return_labels=return_labels)
480
481if return_labels:
482if model_class.__name__ == "GPT2DoubleHeadsModel":
483inputs_dict["labels"] = torch.zeros(
484(self.model_tester.batch_size, self.model_tester.num_choices, self.model_tester.seq_length),
485dtype=torch.long,
486device=torch_device,
487)
488inputs_dict["input_ids"] = inputs_dict["labels"]
489inputs_dict["token_type_ids"] = inputs_dict["labels"]
490inputs_dict["mc_token_ids"] = torch.zeros(
491(self.model_tester.batch_size, self.model_tester.num_choices),
492dtype=torch.long,
493device=torch_device,
494)
495inputs_dict["mc_labels"] = torch.zeros(
496self.model_tester.batch_size, dtype=torch.long, device=torch_device
497)
498return inputs_dict
499
500def setUp(self):
501self.model_tester = GPT2ModelTester(self)
502self.config_tester = ConfigTester(self, config_class=GPT2Config, n_embd=37)
503
504def tearDown(self):
505super().tearDown()
506# clean-up as much as possible GPU memory occupied by PyTorch
507gc.collect()
508backend_empty_cache(torch_device)
509
510def test_config(self):
511self.config_tester.run_common_tests()
512
513def test_gpt2_model(self):
514config_and_inputs = self.model_tester.prepare_config_and_inputs()
515self.model_tester.create_and_check_gpt2_model(*config_and_inputs)
516
517def test_gpt2_model_past(self):
518config_and_inputs = self.model_tester.prepare_config_and_inputs()
519self.model_tester.create_and_check_gpt2_model_past(*config_and_inputs)
520
521def test_gpt2_model_att_mask_past(self):
522config_and_inputs = self.model_tester.prepare_config_and_inputs()
523self.model_tester.create_and_check_gpt2_model_attention_mask_past(*config_and_inputs)
524
525def test_gpt2_model_past_large_inputs(self):
526config_and_inputs = self.model_tester.prepare_config_and_inputs()
527self.model_tester.create_and_check_gpt2_model_past_large_inputs(*config_and_inputs)
528
529def test_gpt2_lm_head_model(self):
530config_and_inputs = self.model_tester.prepare_config_and_inputs()
531self.model_tester.create_and_check_lm_head_model(*config_and_inputs)
532
533def test_gpt2_double_lm_head_model(self):
534config_and_inputs = self.model_tester.prepare_config_and_inputs()
535self.model_tester.create_and_check_double_lm_head_model(*config_and_inputs)
536
537def test_gpt2_question_answering_model(self):
538config_and_inputs = self.model_tester.prepare_config_and_inputs()
539self.model_tester.create_and_check_gpt2_for_question_answering(*config_and_inputs)
540
541def test_gpt2_sequence_classification_model(self):
542config_and_inputs = self.model_tester.prepare_config_and_inputs()
543self.model_tester.create_and_check_gpt2_for_sequence_classification(*config_and_inputs)
544
545def test_gpt2_token_classification_model(self):
546config_and_inputs = self.model_tester.prepare_config_and_inputs()
547self.model_tester.create_and_check_gpt2_for_token_classification(*config_and_inputs)
548
549def test_gpt2_gradient_checkpointing(self):
550config_and_inputs = self.model_tester.prepare_config_and_inputs()
551self.model_tester.create_and_check_forward_and_backwards(*config_and_inputs, gradient_checkpointing=True)
552
553def test_gpt2_scale_attn_by_inverse_layer_idx(self):
554config_and_inputs = self.model_tester.prepare_config_and_inputs(scale_attn_by_inverse_layer_idx=True)
555self.model_tester.create_and_check_forward_and_backwards(*config_and_inputs)
556
557def test_gpt2_reorder_and_upcast_attn(self):
558config_and_inputs = self.model_tester.prepare_config_and_inputs(reorder_and_upcast_attn=True)
559self.model_tester.create_and_check_forward_and_backwards(*config_and_inputs)
560
561def test_gpt2_weight_initialization(self):
562config_and_inputs = self.model_tester.prepare_config_and_inputs()
563self.model_tester.create_and_check_gpt2_weight_initialization(*config_and_inputs)
564
565@unittest.skip(
566reason="This architecure seem to not compute gradients properly when using GC, check: https://github.com/huggingface/transformers/pull/27124"
567)
568def test_training_gradient_checkpointing(self):
569pass
570
571@unittest.skip(
572reason="This architecure seem to not compute gradients properly when using GC, check: https://github.com/huggingface/transformers/pull/27124"
573)
574def test_training_gradient_checkpointing_use_reentrant(self):
575pass
576
577@unittest.skip(
578reason="This architecure seem to not compute gradients properly when using GC, check: https://github.com/huggingface/transformers/pull/27124"
579)
580def test_training_gradient_checkpointing_use_reentrant_false(self):
581pass
582
583@slow
584def test_batch_generation(self):
585model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
586model.to(torch_device)
587tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
588
589tokenizer.padding_side = "left"
590
591# Define PAD Token = EOS Token = 50256
592tokenizer.pad_token = tokenizer.eos_token
593model.config.pad_token_id = model.config.eos_token_id
594
595# use different length sentences to test batching
596sentences = [
597"Hello, my dog is a little",
598"Today, I",
599]
600
601inputs = tokenizer(sentences, return_tensors="pt", padding=True)
602input_ids = inputs["input_ids"].to(torch_device)
603token_type_ids = torch.cat(
604[
605input_ids.new_full((input_ids.shape[0], input_ids.shape[1] - 1), 0),
606input_ids.new_full((input_ids.shape[0], 1), 500),
607],
608dim=-1,
609)
610
611outputs = model.generate(
612input_ids=input_ids,
613attention_mask=inputs["attention_mask"].to(torch_device),
614)
615
616outputs_tt = model.generate(
617input_ids=input_ids,
618attention_mask=inputs["attention_mask"].to(torch_device),
619token_type_ids=token_type_ids,
620)
621
622inputs_non_padded = tokenizer(sentences[0], return_tensors="pt").input_ids.to(torch_device)
623output_non_padded = model.generate(input_ids=inputs_non_padded)
624
625num_paddings = inputs_non_padded.shape[-1] - inputs["attention_mask"][-1].long().sum().cpu().item()
626inputs_padded = tokenizer(sentences[1], return_tensors="pt").input_ids.to(torch_device)
627output_padded = model.generate(input_ids=inputs_padded, max_length=model.config.max_length - num_paddings)
628
629batch_out_sentence = tokenizer.batch_decode(outputs, skip_special_tokens=True)
630batch_out_sentence_tt = tokenizer.batch_decode(outputs_tt, skip_special_tokens=True)
631non_padded_sentence = tokenizer.decode(output_non_padded[0], skip_special_tokens=True)
632padded_sentence = tokenizer.decode(output_padded[0], skip_special_tokens=True)
633
634expected_output_sentence = [
635"Hello, my dog is a little bit of a mess. I'm not sure if he's going",
636"Today, I'm going to be doing a lot of research on this. I",
637]
638self.assertListEqual(expected_output_sentence, batch_out_sentence)
639self.assertTrue(batch_out_sentence_tt != batch_out_sentence) # token_type_ids should change output
640self.assertListEqual(expected_output_sentence, [non_padded_sentence, padded_sentence])
641
642@slow
643def test_batch_generation_2heads(self):
644model = GPT2DoubleHeadsModel.from_pretrained("openai-community/gpt2")
645model.to(torch_device)
646tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
647
648tokenizer.padding_side = "left"
649
650# This tokenizer has no pad token, so we have to set it in some way
651# Define PAD Token = EOS Token = 50256
652tokenizer.pad_token = tokenizer.eos_token
653model.config.pad_token_id = model.config.eos_token_id
654
655# use different length sentences to test batching
656sentences = [
657"Hello, my dog is a little",
658"Today, I",
659]
660
661inputs = tokenizer(sentences, return_tensors="pt", padding=True)
662input_ids = inputs["input_ids"].to(torch_device)
663token_type_ids = torch.cat(
664[
665input_ids.new_full((input_ids.shape[0], input_ids.shape[1] - 1), 0),
666input_ids.new_full((input_ids.shape[0], 1), 500),
667],
668dim=-1,
669)
670
671outputs = model.generate(
672input_ids=input_ids,
673attention_mask=inputs["attention_mask"].to(torch_device),
674)
675
676outputs_tt = model.generate(
677input_ids=input_ids,
678attention_mask=inputs["attention_mask"].to(torch_device),
679token_type_ids=token_type_ids,
680)
681
682inputs_non_padded = tokenizer(sentences[0], return_tensors="pt").input_ids.to(torch_device)
683output_non_padded = model.generate(input_ids=inputs_non_padded)
684
685num_paddings = inputs_non_padded.shape[-1] - inputs["attention_mask"][-1].long().sum().cpu().item()
686inputs_padded = tokenizer(sentences[1], return_tensors="pt").input_ids.to(torch_device)
687output_padded = model.generate(input_ids=inputs_padded, max_length=model.config.max_length - num_paddings)
688
689batch_out_sentence = tokenizer.batch_decode(outputs, skip_special_tokens=True)
690batch_out_sentence_tt = tokenizer.batch_decode(outputs_tt, skip_special_tokens=True)
691non_padded_sentence = tokenizer.decode(output_non_padded[0], skip_special_tokens=True)
692padded_sentence = tokenizer.decode(output_padded[0], skip_special_tokens=True)
693
694expected_output_sentence = [
695"Hello, my dog is a little bit of a mess. I'm not sure if he's going",
696"Today, I'm going to be doing a lot of research on this. I",
697]
698self.assertListEqual(expected_output_sentence, batch_out_sentence)
699self.assertTrue(batch_out_sentence_tt != batch_out_sentence) # token_type_ids should change output
700self.assertListEqual(expected_output_sentence, [non_padded_sentence, padded_sentence])
701
702@slow
703def test_model_from_pretrained(self):
704for model_name in GPT2_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
705model = GPT2Model.from_pretrained(model_name)
706self.assertIsNotNone(model)
707
708
709@require_torch
710class GPT2ModelLanguageGenerationTest(unittest.TestCase):
711def tearDown(self):
712super().tearDown()
713# clean-up as much as possible GPU memory occupied by PyTorch
714gc.collect()
715backend_empty_cache(torch_device)
716
717def _test_lm_generate_gpt2_helper(
718self,
719gradient_checkpointing=False,
720reorder_and_upcast_attn=False,
721scale_attn_by_inverse_layer_idx=False,
722verify_outputs=True,
723):
724model = GPT2LMHeadModel.from_pretrained(
725"openai-community/gpt2",
726reorder_and_upcast_attn=reorder_and_upcast_attn,
727scale_attn_by_inverse_layer_idx=scale_attn_by_inverse_layer_idx,
728)
729if gradient_checkpointing:
730model.gradient_checkpointing_enable()
731else:
732model.gradient_checkpointing_disable()
733model.to(torch_device)
734
735# The dog
736input_ids = torch.tensor([[464, 3290]], dtype=torch.long, device=torch_device)
737
738# The dog was found in a field near the intersection of West and West Streets.\n\nThe dog
739expected_output_ids = [464, 3290, 373, 1043, 287, 257, 2214, 1474, 262, 16246, 286, 2688, 290, 2688, 27262, 13, 198, 198, 464, 3290,] # fmt: skip
740output_ids = model.generate(input_ids, do_sample=False)
741if verify_outputs:
742self.assertListEqual(output_ids[0].tolist(), expected_output_ids)
743
744@slow
745def test_lm_generate_gpt2(self):
746self._test_lm_generate_gpt2_helper()
747
748@slow
749def test_lm_generate_gpt2_with_gradient_checkpointing(self):
750self._test_lm_generate_gpt2_helper(gradient_checkpointing=True)
751
752@slow
753def test_lm_generate_gpt2_with_reorder_and_upcast_attn(self):
754self._test_lm_generate_gpt2_helper(reorder_and_upcast_attn=True)
755
756@slow
757def test_lm_generate_gpt2_with_scale_attn_by_inverse_layer_idx(self):
758self._test_lm_generate_gpt2_helper(scale_attn_by_inverse_layer_idx=True, verify_outputs=False)
759
760@slow
761def test_gpt2_sample(self):
762tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
763model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
764model.to(torch_device)
765
766torch.manual_seed(0)
767tokenized = tokenizer("Today is a nice day and", return_tensors="pt", return_token_type_ids=True)
768input_ids = tokenized.input_ids.to(torch_device)
769output_ids = model.generate(input_ids, do_sample=True)
770output_str = tokenizer.decode(output_ids[0], skip_special_tokens=True)
771
772token_type_ids = tokenized.token_type_ids.to(torch_device)
773output_seq = model.generate(input_ids=input_ids, do_sample=True, num_return_sequences=5)
774output_seq_tt = model.generate(
775input_ids=input_ids, token_type_ids=token_type_ids, do_sample=True, num_return_sequences=5
776)
777output_seq_strs = tokenizer.batch_decode(output_seq, skip_special_tokens=True)
778output_seq_tt_strs = tokenizer.batch_decode(output_seq_tt, skip_special_tokens=True)
779
780EXPECTED_OUTPUT_STR = (
781"Today is a nice day and if you don't know anything about the state of play during your holiday"
782)
783self.assertEqual(output_str, EXPECTED_OUTPUT_STR)
784self.assertTrue(
785all(output_seq_strs[idx] != output_seq_tt_strs[idx] for idx in range(len(output_seq_tt_strs)))
786) # token_type_ids should change output
787
788@slow
789def test_gpt2_sample_max_time(self):
790tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2")
791model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2")
792model.to(torch_device)
793
794torch.manual_seed(0)
795tokenized = tokenizer("Today is a nice day and", return_tensors="pt", return_token_type_ids=True)
796input_ids = tokenized.input_ids.to(torch_device)
797
798MAX_TIME = 0.5
799
800start = datetime.datetime.now()
801model.generate(input_ids, do_sample=True, max_time=MAX_TIME, max_length=256)
802duration = datetime.datetime.now() - start
803self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME))
804self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME))
805
806start = datetime.datetime.now()
807model.generate(input_ids, do_sample=False, max_time=MAX_TIME, max_length=256)
808duration = datetime.datetime.now() - start
809self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME))
810self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME))
811
812start = datetime.datetime.now()
813model.generate(input_ids, do_sample=False, num_beams=2, max_time=MAX_TIME, max_length=256)
814duration = datetime.datetime.now() - start
815self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME))
816self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME))
817
818start = datetime.datetime.now()
819model.generate(input_ids, do_sample=True, num_beams=2, max_time=MAX_TIME, max_length=256)
820duration = datetime.datetime.now() - start
821self.assertGreater(duration, datetime.timedelta(seconds=MAX_TIME))
822self.assertLess(duration, datetime.timedelta(seconds=1.5 * MAX_TIME))
823
824start = datetime.datetime.now()
825model.generate(input_ids, do_sample=False, max_time=None, max_length=256)
826duration = datetime.datetime.now() - start
827self.assertGreater(duration, datetime.timedelta(seconds=1.5 * MAX_TIME))
828
829@slow
830def test_contrastive_search_gpt2(self):
831article = (
832"DeepMind Technologies is a British artificial intelligence subsidiary of Alphabet Inc. and research "
833"laboratory founded in 2010. DeepMind was acquired by Google in 2014. The company is based"
834)
835
836gpt2_tokenizer = GPT2Tokenizer.from_pretrained("openai-community/gpt2-large")
837gpt2_model = GPT2LMHeadModel.from_pretrained("openai-community/gpt2-large").to(torch_device)
838input_ids = gpt2_tokenizer(article, return_tensors="pt").input_ids.to(torch_device)
839
840outputs = gpt2_model.generate(input_ids, penalty_alpha=0.6, top_k=4, max_length=256)
841
842generated_text = gpt2_tokenizer.batch_decode(outputs, skip_special_tokens=True)
843
844self.assertListEqual(
845generated_text,
846[
847"DeepMind Technologies is a British artificial intelligence subsidiary of Alphabet Inc. and research "
848"laboratory founded in 2010. DeepMind was acquired by Google in 2014. The company is based in London, "
849"United Kingdom\n\nGoogle has a lot of data on its users and uses it to improve its products, such as "
850"Google Now, which helps users find the information they're looking for on the web. But the company "
851"is not the only one to collect data on its users. Facebook, for example, has its own facial "
852"recognition technology, as well as a database of millions of photos that it uses to personalize its "
853"News Feed.\n\nFacebook's use of data is a hot topic in the tech industry, with privacy advocates "
854"concerned about the company's ability to keep users' information private. In a blog post last "
855'year, Facebook CEO Mark Zuckerberg said his company would "do our best to be transparent about our '
856'data use and how we use it."\n\n"We have made it clear that we do not sell or share your data with '
857'third parties," Zuckerberg wrote. "If you have questions or concerns, please reach out to us at '
858'privacy@facebook.com."\n\nGoogle declined to comment on the privacy implications of its use of data, '
859"but said in a statement to The Associated Press that"
860],
861)
862