transformers
1874 строки · 89.2 Кб
1# coding=utf-8
2# Copyright 2019 HuggingFace Inc.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16
17from __future__ import annotations18
19import copy20import inspect21import json22import os23import random24import tempfile25import unittest26from importlib import import_module27from math import isnan28from typing import List, Tuple29
30from datasets import Dataset31
32from transformers import is_tf_available, is_torch_available33from transformers.models.auto import get_values34from transformers.testing_utils import ( # noqa: F40135CaptureLogger,36_tf_gpu_memory_limit,37is_pt_tf_cross_test,38require_tf,39require_tf2onnx,40slow,41torch_device,42)
43from transformers.utils import CONFIG_NAME, GENERATION_CONFIG_NAME, logging44from transformers.utils.generic import ModelOutput45
46
47logger = logging.get_logger(__name__)48
49
50if is_tf_available():51import numpy as np52import tensorflow as tf53
54from transformers import (55TF_MODEL_FOR_CAUSAL_LM_MAPPING,56TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING,57TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,58TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING,59TF_MODEL_FOR_MASKED_LM_MAPPING,60TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING,61TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING,62TF_MODEL_FOR_PRETRAINING_MAPPING,63TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING,64TF_MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING,65TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,66TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,67TF_MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING,68TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,69TFAutoModel,70TFAutoModelForSequenceClassification,71TFSharedEmbeddings,72)73from transformers.generation import (74TFBeamSampleDecoderOnlyOutput,75TFBeamSampleEncoderDecoderOutput,76TFBeamSearchDecoderOnlyOutput,77TFBeamSearchEncoderDecoderOutput,78TFGreedySearchDecoderOnlyOutput,79TFGreedySearchEncoderDecoderOutput,80TFSampleDecoderOnlyOutput,81TFSampleEncoderDecoderOutput,82)83from transformers.modeling_tf_utils import keras84
85tf.config.experimental.enable_tensor_float_32_execution(False)86
87if _tf_gpu_memory_limit is not None:88gpus = tf.config.list_physical_devices("GPU")89for gpu in gpus:90# Restrict TensorFlow to only allocate x GB of memory on the GPUs91try:92tf.config.set_logical_device_configuration(93gpu, [tf.config.LogicalDeviceConfiguration(memory_limit=_tf_gpu_memory_limit)]94)95logical_gpus = tf.config.list_logical_devices("GPU")96print("Logical GPUs", logical_gpus)97except RuntimeError as e:98# Virtual devices must be set before GPUs have been initialized99print(e)100
101if is_torch_available():102import torch103
104
105def _config_zero_init(config):106configs_no_init = copy.deepcopy(config)107for key in configs_no_init.__dict__.keys():108if "_range" in key or "_std" in key:109setattr(configs_no_init, key, 0.0)110return configs_no_init111
112
113@require_tf
114class TFModelTesterMixin:115model_tester = None116all_model_classes = ()117all_generative_model_classes = ()118test_mismatched_shapes = True119test_resize_embeddings = True120test_head_masking = True121is_encoder_decoder = False122has_attentions = True123
124def _prepare_for_class(self, inputs_dict, model_class, return_labels=False) -> dict:125inputs_dict = copy.deepcopy(inputs_dict)126
127if model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING):128inputs_dict = {129k: tf.tile(tf.expand_dims(v, 1), (1, self.model_tester.num_choices) + (1,) * (v.ndim - 1))130if isinstance(v, tf.Tensor) and v.ndim > 0131else v132for k, v in inputs_dict.items()133}134
135if return_labels:136if model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING):137inputs_dict["labels"] = tf.ones(self.model_tester.batch_size, dtype=tf.int32)138elif model_class in [139*get_values(TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING),140*get_values(TF_MODEL_FOR_DOCUMENT_QUESTION_ANSWERING_MAPPING),141]:142inputs_dict["start_positions"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)143inputs_dict["end_positions"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)144elif model_class in [145*get_values(TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING),146*get_values(TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING),147]:148inputs_dict["labels"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)149elif model_class in get_values(TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING):150inputs_dict["next_sentence_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32)151elif model_class in [152*get_values(TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING),153*get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING),154*get_values(TF_MODEL_FOR_MASKED_LM_MAPPING),155*get_values(TF_MODEL_FOR_PRETRAINING_MAPPING),156*get_values(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING),157*get_values(TF_MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING),158] and "labels" in dict(inspect.signature(model_class.call).parameters):159inputs_dict["labels"] = tf.zeros(160(self.model_tester.batch_size, self.model_tester.seq_length), dtype=tf.int32161)162elif model_class in get_values(TF_MODEL_FOR_MASKED_IMAGE_MODELING_MAPPING):163num_patches = self.model_tester.image_size // self.model_tester.patch_size164inputs_dict["bool_masked_pos"] = tf.zeros(165(self.model_tester.batch_size, num_patches**2), dtype=tf.int32166)167elif model_class in get_values(TF_MODEL_FOR_SEMANTIC_SEGMENTATION_MAPPING):168batch_size, num_channels, height, width = inputs_dict["pixel_values"].shape169inputs_dict["labels"] = tf.zeros((self.model_tester.batch_size, height, width), dtype=tf.int32)170elif model_class.__name__.endswith("ForCTC"):171# When we have enough CTC models for an AutoClass, we should use their mapping instead of name checks172inputs_dict["labels"] = tf.zeros(173(self.model_tester.batch_size, self.model_tester.seq_length), dtype=tf.int32174)175
176return inputs_dict177
178def test_initialization(self):179pass180
181def test_save_load(self):182config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()183
184for model_class in self.all_model_classes:185model = model_class(config)186outputs = model(self._prepare_for_class(inputs_dict, model_class))187
188with tempfile.TemporaryDirectory() as tmpdirname:189model.save_pretrained(tmpdirname, saved_model=False)190
191# the config file (and the generation config file, if it can generate) should be saved192self.assertTrue(os.path.exists(os.path.join(tmpdirname, CONFIG_NAME)))193self.assertEqual(194model.can_generate(), os.path.exists(os.path.join(tmpdirname, GENERATION_CONFIG_NAME))195)196
197model = model_class.from_pretrained(tmpdirname)198after_outputs = model(self._prepare_for_class(inputs_dict, model_class))199
200self.assert_outputs_same(after_outputs, outputs)201
202def test_save_load_config(self):203config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()204
205for model_class in self.all_model_classes:206model = model_class(config)207outputs = model(self._prepare_for_class(inputs_dict, model_class))208model_config = model.get_config()209# make sure that returned config is jsonifiable, which is required by keras210json.dumps(model_config)211new_model = model_class.from_config(model.get_config())212# make sure it also accepts a normal config213_ = model_class.from_config(model.config)214_ = new_model(self._prepare_for_class(inputs_dict, model_class)) # Build model215new_model.set_weights(model.get_weights())216after_outputs = new_model(self._prepare_for_class(inputs_dict, model_class))217
218self.assert_outputs_same(after_outputs, outputs)219
220@slow221def test_saved_model_creation(self):222config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()223config.output_hidden_states = False224config.output_attentions = False225
226if hasattr(config, "use_cache"):227config.use_cache = False228
229model_class = self.all_model_classes[0]230
231class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)232model = model_class(config)233
234model(class_inputs_dict)235
236with tempfile.TemporaryDirectory() as tmpdirname:237model.save_pretrained(tmpdirname, saved_model=True)238saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")239self.assertTrue(os.path.exists(saved_model_dir))240
241def test_prepare_serving_output(self):242config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()243config.output_hidden_states = True244config.output_attentions = self.has_attentions245
246for model_class in self.all_model_classes:247model = model_class(config)248inputs = self._prepare_for_class(inputs_dict, model_class)249outputs = model(inputs)250serving_outputs = model.serving_output(outputs)251
252for k, v in serving_outputs.items():253# Check that we have one of three possible outputs: None, tuple of tensors or a tensor254if isinstance(v, tuple):255self.assertTrue(all(isinstance(elem, tf.Tensor) for elem in v))256elif v is not None:257self.assertIsInstance(v, tf.Tensor)258else:259self.assertIsNone(v)260
261def test_forward_signature(self):262config, _ = self.model_tester.prepare_config_and_inputs_for_common()263
264for model_class in self.all_model_classes:265model = model_class(config)266signature = inspect.signature(model.call)267# signature.parameters is an OrderedDict => so arg_names order is deterministic268arg_names = [*signature.parameters.keys()]269
270if model.config.is_encoder_decoder:271expected_arg_names = [272"input_ids",273"attention_mask",274"decoder_input_ids",275"decoder_attention_mask",276]277expected_arg_names.extend(["decoder_position_ids"] if "decoder_position_ids" in arg_names else [])278expected_arg_names.extend(279["head_mask", "decoder_head_mask"] if "head_mask" and "decoder_head_mask" in arg_names else []280)281expected_arg_names.extend(282["cross_attn_head_mask", "encoder_outputs"]283if "cross_attn_head_mask" in arg_names284else ["encoder_outputs"]285)286self.assertListEqual(arg_names[: len(expected_arg_names)], expected_arg_names)287
288else:289expected_arg_names = ["input_ids"]290self.assertListEqual(arg_names[:1], expected_arg_names)291
292def test_onnx_compliancy(self):293if not self.test_onnx:294return295
296config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()297INTERNAL_OPS = [298"Assert",299"AssignVariableOp",300"EmptyTensorList",301"ReadVariableOp",302"ResourceGather",303"TruncatedNormal",304"VarHandleOp",305"VarIsInitializedOp",306]307onnx_ops = []308
309with open(os.path.join(".", "utils", "tf_ops", "onnx.json")) as f:310onnx_opsets = json.load(f)["opsets"]311
312for i in range(1, self.onnx_min_opset + 1):313onnx_ops.extend(onnx_opsets[str(i)])314
315for model_class in self.all_model_classes:316model_op_names = set()317
318with tf.Graph().as_default() as g:319model = model_class(config)320model.build_in_name_scope()321
322for op in g.get_operations():323model_op_names.add(op.node_def.op)324
325model_op_names = sorted(model_op_names)326incompatible_ops = []327
328for op in model_op_names:329if op not in onnx_ops and op not in INTERNAL_OPS:330incompatible_ops.append(op)331
332self.assertEqual(len(incompatible_ops), 0, incompatible_ops)333
334# `tf2onnx` issue page: https://github.com/onnx/tensorflow-onnx/issues/2172335# TODO: undo skip once a fix is done in `tf2onnx`336@unittest.skip("`tf2onnx` broke with TF 2.13")337@require_tf2onnx338@slow339def test_onnx_runtime_optimize(self):340if not self.test_onnx:341return342
343import onnxruntime344import tf2onnx345
346config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()347
348for model_class in self.all_model_classes[:2]:349model = model_class(config)350model.build_in_name_scope()351
352onnx_model_proto, _ = tf2onnx.convert.from_keras(model, opset=self.onnx_min_opset)353
354onnxruntime.InferenceSession(onnx_model_proto.SerializeToString())355
356def test_keras_save_load(self):357config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()358
359tf_main_layer_classes = {360module_member
361for model_class in self.all_model_classes362for module in (import_module(model_class.__module__),)363for module_member_name in dir(module)364if module_member_name.endswith("MainLayer")365# This condition is required, since `modeling_tf_clip.py` has 3 classes whose names end with `MainLayer`.366and module_member_name[: -len("MainLayer")] == model_class.__name__[: -len("Model")]367for module_member in (getattr(module, module_member_name),)368if isinstance(module_member, type)369and keras.layers.Layer in module_member.__bases__370and getattr(module_member, "_keras_serializable", False)371}372for main_layer_class in tf_main_layer_classes:373# T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter374if "T5" in main_layer_class.__name__:375# Take the same values than in TFT5ModelTester for this shared layer376shared = TFSharedEmbeddings(99, 32, name="shared")377config.use_cache = inputs_dict.pop("use_cache", None)378main_layer = main_layer_class(config, embed_tokens=shared)379else:380main_layer = main_layer_class(config)381
382symbolic_inputs = {383name: keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()384}385
386model = keras.Model(symbolic_inputs, outputs=main_layer(symbolic_inputs))387outputs = model(inputs_dict)388
389with tempfile.TemporaryDirectory() as tmpdirname:390filepath = os.path.join(tmpdirname, "keras_model.h5")391model.save(filepath)392if "T5" in main_layer_class.__name__:393model = keras.models.load_model(394filepath,395custom_objects={396main_layer_class.__name__: main_layer_class,397"TFSharedEmbeddings": TFSharedEmbeddings,398},399)400else:401model = keras.models.load_model(402filepath, custom_objects={main_layer_class.__name__: main_layer_class}403)404assert isinstance(model, keras.Model)405after_outputs = model(inputs_dict)406self.assert_outputs_same(after_outputs, outputs)407
408def assert_outputs_same(self, after_outputs, outputs):409# Make sure we don't have nans410if isinstance(after_outputs, tf.Tensor):411out_1 = after_outputs.numpy()412elif isinstance(after_outputs, dict):413out_1 = after_outputs[list(after_outputs.keys())[0]].numpy()414else:415out_1 = after_outputs[0].numpy()416out_2 = outputs[0].numpy()417self.assertEqual(out_1.shape, out_2.shape)418out_1 = out_1[~np.isnan(out_1)]419out_2 = out_2[~np.isnan(out_2)]420max_diff = np.amax(np.abs(out_1 - out_2))421self.assertLessEqual(max_diff, 1e-5)422
423# Don't copy this method to model specific test file!424# TODO: remove this method once the issues are all fixed!425def _make_attention_mask_non_null(self, inputs_dict):426"""Make sure no sequence has all zeros as attention mask"""427
428for k in ["attention_mask", "encoder_attention_mask", "decoder_attention_mask"]:429if k in inputs_dict:430attention_mask = inputs_dict[k]431
432# Make sure no all 0s attention masks - to avoid failure at this moment.433# Put `1` at the beginning of sequences to make it still work when combining causal attention masks.434# TODO: remove this line once a fix regarding large negative values for attention mask is done.435attention_mask = tf.concat(436[tf.ones_like(attention_mask[:, :1], dtype=attention_mask.dtype), attention_mask[:, 1:]], axis=-1437)438
439# Here we make the first sequence with all 0s as attention mask.440# Currently, this will fail for `TFWav2Vec2Model`. This is caused by the different large negative441# values, like `1e-4`, `1e-9`, `1e-30` and `-inf` for attention mask across models/frameworks.442# TODO: enable this block once the large negative values thing is cleaned up.443# (see https://github.com/huggingface/transformers/issues/14859)444# attention_mask = tf.concat(445# [446# tf.zeros_like(attention_mask[:1], dtype=tf.int32),447# tf.cast(attention_mask[1:], dtype=tf.int32)448# ],449# axis=0450# )451
452inputs_dict[k] = attention_mask453
454# Don't copy this method to model specific test file!455# TODO: remove this method once the issues are all fixed!456def _postprocessing_to_ignore_test_cases(self, tf_outputs, pt_outputs, model_class):457"""For temporarily ignoring some failed test cases (issues to be fixed)"""458
459tf_keys = {k for k, v in tf_outputs.items() if v is not None}460pt_keys = {k for k, v in pt_outputs.items() if v is not None}461
462key_differences = tf_keys.symmetric_difference(pt_keys)463
464if model_class.__name__ in [465"TFFlaubertWithLMHeadModel",466"TFFunnelForPreTraining",467"TFElectraForPreTraining",468"TFXLMWithLMHeadModel",469]:470for k in key_differences:471if k in ["loss", "losses"]:472tf_keys.discard(k)473pt_keys.discard(k)474elif model_class.__name__.startswith("TFGPT2"):475# `TFGPT2` has `past_key_values` as a tensor while `GPT2` has it as a tuple.476tf_keys.discard("past_key_values")477pt_keys.discard("past_key_values")478
479# create new outputs from the remaining fields480new_tf_outputs = type(tf_outputs)(**{k: tf_outputs[k] for k in tf_keys})481new_pt_outputs = type(pt_outputs)(**{k: pt_outputs[k] for k in pt_keys})482
483return new_tf_outputs, new_pt_outputs484
485def check_pt_tf_outputs(self, tf_outputs, pt_outputs, model_class, tol=1e-5, name="outputs", attributes=None):486"""Check the outputs from PyTorch and TensorFlow models are close enough. Checks are done in a recursive way.487
488Args:
489model_class: The class of the model that is currently testing. For example, `TFBertModel`,
490TFBertForMaskedLM`, `TFBertForSequenceClassification`, etc. Mainly used for providing more informative
491error messages.
492name (`str`): The name of the output. For example, `output.hidden_states`, `output.attentions`, etc.
493attributes (`Tuple[str]`): The names of the output's element if the output is a tuple/list with each element
494being a named field in the output.
495"""
496
497self.assertEqual(type(name), str)498if attributes is not None:499self.assertEqual(type(attributes), tuple, f"{name}: The argument `attributes` should be a `tuple`")500
501# Allow `ModelOutput` (e.g. `CLIPOutput` has `text_model_output` and `vision_model_output`).502if isinstance(tf_outputs, ModelOutput):503self.assertTrue(504isinstance(pt_outputs, ModelOutput),505f"{name}: `pt_outputs` should an instance of `ModelOutput` when `tf_outputs` is",506)507
508# Don't copy this block to model specific test file!509# TODO: remove this method and this line after issues are fixed510tf_outputs, pt_outputs = self._postprocessing_to_ignore_test_cases(tf_outputs, pt_outputs, model_class)511
512tf_keys = [k for k, v in tf_outputs.items() if v is not None]513pt_keys = [k for k, v in pt_outputs.items() if v is not None]514
515self.assertEqual(tf_keys, pt_keys, f"{name}: Output keys differ between TF and PyTorch")516
517# convert to the case of `tuple`518# appending each key to the current (string) `names`519attributes = tuple([f"{name}.{k}" for k in tf_keys])520self.check_pt_tf_outputs(521tf_outputs.to_tuple(), pt_outputs.to_tuple(), model_class, tol=tol, name=name, attributes=attributes522)523
524# Allow `list` (e.g. `TransfoXLModelOutput.mems` is a list of tensors.)525elif type(tf_outputs) in [tuple, list]:526self.assertEqual(type(tf_outputs), type(pt_outputs), f"{name}: Output types differ between TF and PyTorch")527self.assertEqual(len(tf_outputs), len(pt_outputs), f"{name}: Output lengths differ between TF and PyTorch")528
529if attributes is not None:530# case 1: each output has assigned name (e.g. a tuple form of a `ModelOutput`)531self.assertEqual(532len(attributes),533len(tf_outputs),534f"{name}: The tuple `names` should have the same length as `tf_outputs`",535)536else:537# case 2: each output has no assigned name (e.g. hidden states of each layer) -> add an index to `names`538attributes = tuple([f"{name}_{idx}" for idx in range(len(tf_outputs))])539
540for tf_output, pt_output, attr in zip(tf_outputs, pt_outputs, attributes):541self.check_pt_tf_outputs(tf_output, pt_output, model_class, tol=tol, name=attr)542
543elif isinstance(tf_outputs, tf.Tensor):544self.assertTrue(545isinstance(pt_outputs, torch.Tensor), f"{name}: `pt_outputs` should a tensor when `tf_outputs` is"546)547
548tf_outputs = tf_outputs.numpy()549pt_outputs = pt_outputs.detach().to("cpu").numpy()550
551self.assertEqual(552tf_outputs.shape, pt_outputs.shape, f"{name}: Output shapes differ between TF and PyTorch"553)554
555# deal with NumPy's scalars to make replacing nan values by 0 work.556if np.isscalar(tf_outputs):557tf_outputs = np.array([tf_outputs])558pt_outputs = np.array([pt_outputs])559
560tf_nans = np.isnan(tf_outputs)561pt_nans = np.isnan(pt_outputs)562
563pt_outputs[tf_nans] = 0564tf_outputs[tf_nans] = 0565pt_outputs[pt_nans] = 0566tf_outputs[pt_nans] = 0567
568max_diff = np.amax(np.abs(tf_outputs - pt_outputs))569self.assertLessEqual(max_diff, tol, f"{name}: Difference between torch and tf is {max_diff} (>= {tol}).")570else:571raise ValueError(572"`tf_outputs` should be an instance of `tf.Tensor`, a `tuple`, or an instance of `tf.Tensor`. Got"573f" {type(tf_outputs)} instead."574)575
576def prepare_pt_inputs_from_tf_inputs(self, tf_inputs_dict):577pt_inputs_dict = {}578for name, key in tf_inputs_dict.items():579if isinstance(key, bool):580pt_inputs_dict[name] = key581elif name == "input_values":582pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.float32)583elif name == "pixel_values":584pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.float32)585elif name == "input_features":586pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.float32)587# other general float inputs588elif tf_inputs_dict[name].dtype.is_floating:589pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.float32)590else:591pt_inputs_dict[name] = torch.from_numpy(key.numpy()).to(torch.long)592
593return pt_inputs_dict594
595def check_pt_tf_models(self, tf_model, pt_model, tf_inputs_dict):596pt_inputs_dict = self.prepare_pt_inputs_from_tf_inputs(tf_inputs_dict)597
598# send pytorch inputs to the correct device599pt_inputs_dict = {600k: v.to(device=torch_device) if isinstance(v, torch.Tensor) else v for k, v in pt_inputs_dict.items()601}602
603# send pytorch model to the correct device604pt_model.to(torch_device)605
606# Check predictions on first output (logits/hidden-states) are close enough given low-level computational differences607pt_model.eval()608
609with torch.no_grad():610pt_outputs = pt_model(**pt_inputs_dict)611tf_outputs = tf_model(tf_inputs_dict)612
613# tf models returned loss is usually a tensor rather than a scalar.614# (see `hf_compute_loss`: it uses `keras.losses.Reduction.NONE`)615# Change it here to a scalar to match PyTorch models' loss616tf_loss = getattr(tf_outputs, "loss", None)617if tf_loss is not None:618tf_outputs.loss = tf.math.reduce_mean(tf_loss)619
620self.check_pt_tf_outputs(tf_outputs, pt_outputs, type(tf_model))621
622@is_pt_tf_cross_test623def test_pt_tf_model_equivalence(self, allow_missing_keys=False):624import transformers625
626for model_class in self.all_model_classes:627config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()628
629# Output all for aggressive testing630config.output_hidden_states = True631config.output_attentions = self.has_attentions632
633# Make sure no sequence has all zeros as attention mask, otherwise some tests fail due to the inconsistency634# of the usage `1e-4`, `1e-9`, `1e-30`, `-inf`.635# TODO: Use a uniform value for all models, make sure all tests pass without this processing, and remove it.636self._make_attention_mask_non_null(inputs_dict)637
638pt_model_class_name = model_class.__name__[2:] # Skip the "TF" at the beginning639pt_model_class = getattr(transformers, pt_model_class_name)640
641tf_model = model_class(config)642pt_model = pt_model_class(config)643
644tf_inputs_dict = self._prepare_for_class(inputs_dict, model_class)645tf_inputs_dict_with_labels = self._prepare_for_class(646inputs_dict,647model_class,648# Not all models accept "labels" in the forward pass (yet :) )649return_labels=True if "labels" in inspect.signature(model_class.call).parameters.keys() else False,650)651
652# For some models (e.g. base models), there is no label returned.653# Set the input dict to `None` to avoid check outputs twice for the same input dicts.654if not set(tf_inputs_dict_with_labels.keys()).symmetric_difference(tf_inputs_dict.keys()):655tf_inputs_dict_with_labels = None656
657# Check we can load pt model in tf and vice-versa with model => model functions658tf_model = transformers.load_pytorch_model_in_tf2_model(659tf_model, pt_model, tf_inputs=tf_inputs_dict, allow_missing_keys=allow_missing_keys660)661pt_model = transformers.load_tf2_model_in_pytorch_model(662pt_model, tf_model, allow_missing_keys=allow_missing_keys663)664
665# Original test: check without `labels`666self.check_pt_tf_models(tf_model, pt_model, tf_inputs_dict)667# check with `labels`668if tf_inputs_dict_with_labels:669self.check_pt_tf_models(tf_model, pt_model, tf_inputs_dict_with_labels)670
671# Check we can load pt model in tf and vice-versa with checkpoint => model functions672with tempfile.TemporaryDirectory() as tmpdirname:673pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin")674torch.save(pt_model.state_dict(), pt_checkpoint_path)675tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(676tf_model, pt_checkpoint_path, allow_missing_keys=allow_missing_keys677)678
679tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")680tf_model.save_weights(tf_checkpoint_path)681pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(682pt_model, tf_checkpoint_path, allow_missing_keys=allow_missing_keys683)684
685# Original test: check without `labels`686self.check_pt_tf_models(tf_model, pt_model, tf_inputs_dict)687# check with `labels`688if tf_inputs_dict_with_labels:689self.check_pt_tf_models(tf_model, pt_model, tf_inputs_dict_with_labels)690
691@slow692def test_compile_tf_model(self):693config, _ = self.model_tester.prepare_config_and_inputs_for_common()694
695for model_class in self.all_model_classes[:2]:696# Prepare our model697model = model_class(config)698# These are maximally general inputs for the model, with multiple None dimensions699# Hopefully this will catch any conditionals that fail for flexible shapes700functional_inputs = {701key: keras.Input(shape=val.shape[1:], dtype=val.dtype, name=key)702for key, val in model.input_signature.items()703if key in model.dummy_inputs704}705outputs_dict = model(functional_inputs)706
707hidden_states = outputs_dict[0]708
709# Compile extended model710functional_model = keras.Model(inputs=functional_inputs, outputs=hidden_states)711model_out = functional_model.predict(model.dummy_inputs) # Check we can pass inputs with the Keras API712self.assertTrue(model_out is not None)713with tempfile.TemporaryDirectory() as tmpdirname:714functional_model.save(tmpdirname) # Ensure we can save/export the whole functional model715
716def test_keyword_and_dict_args(self):717config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()718
719for model_class in self.all_model_classes:720model = model_class(config)721inputs = self._prepare_for_class(inputs_dict, model_class)722
723outputs_dict = model(inputs)724
725inputs_keywords = copy.deepcopy(self._prepare_for_class(inputs_dict, model_class))726outputs_keywords = model(**inputs_keywords)727output_dict = outputs_dict[0].numpy()728output_keywords = outputs_keywords[0].numpy()729
730self.assertLess(np.sum(np.abs(output_dict - output_keywords)), 1e-6)731
732def test_attention_outputs(self):733if not self.has_attentions:734self.skipTest(reason="Model does not output attentions")735
736config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()737config.return_dict = True738decoder_seq_length = getattr(self.model_tester, "decoder_seq_length", self.model_tester.seq_length)739encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", self.model_tester.seq_length)740decoder_key_length = getattr(self.model_tester, "key_length", decoder_seq_length)741encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length)742
743def check_decoder_attentions_output(outputs):744out_len = len(outputs)745self.assertEqual(min(out_len % 2, out_len % 5), 0) # differentiation due to newly added cross_attentions746decoder_attentions = outputs.decoder_attentions747self.assertEqual(len(decoder_attentions), self.model_tester.num_hidden_layers)748self.assertListEqual(749list(decoder_attentions[0].shape[-3:]),750[self.model_tester.num_attention_heads, decoder_seq_length, decoder_key_length],751)752
753def check_encoder_attentions_output(outputs):754attentions = [755t.numpy() for t in (outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions)756]757self.assertEqual(len(attentions), self.model_tester.num_hidden_layers)758self.assertListEqual(759list(attentions[0].shape[-3:]),760[self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],761)762
763for model_class in self.all_model_classes:764inputs_dict["output_attentions"] = True765config.output_hidden_states = False766model = model_class(config)767outputs = model(self._prepare_for_class(inputs_dict, model_class))768out_len = len(outputs)769self.assertEqual(config.output_hidden_states, False)770check_encoder_attentions_output(outputs)771
772if self.is_encoder_decoder:773model = model_class(config)774outputs = model(self._prepare_for_class(inputs_dict, model_class))775self.assertEqual(config.output_hidden_states, False)776check_decoder_attentions_output(outputs)777
778# Check that output attentions can also be changed via the config779del inputs_dict["output_attentions"]780config.output_attentions = True781model = model_class(config)782outputs = model(self._prepare_for_class(inputs_dict, model_class))783self.assertEqual(config.output_hidden_states, False)784check_encoder_attentions_output(outputs)785
786# Check attention is always last and order is fine787inputs_dict["output_attentions"] = True788config.output_hidden_states = True789model = model_class(config)790outputs = model(self._prepare_for_class(inputs_dict, model_class))791
792self.assertEqual(out_len + (2 if self.is_encoder_decoder else 1), len(outputs))793self.assertEqual(model.config.output_hidden_states, True)794check_encoder_attentions_output(outputs)795
796def test_headmasking(self):797if not self.test_head_masking:798return799
800random.Random().seed(42)801config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()802random.Random().seed()803
804inputs_dict["output_attentions"] = True805config.output_hidden_states = True806configs_no_init = _config_zero_init(config) # To be sure we have no Nan807for model_class in self.all_model_classes:808model = model_class(config=configs_no_init)809
810# Prepare head_mask811def prepare_layer_head_mask(i, attention_heads, num_hidden_layers):812if i == 0:813return tf.concat(814(tf.zeros(1, dtype=tf.float32), tf.ones(attention_heads - 1, dtype=tf.float32)), 0815)816elif i == num_hidden_layers - 1:817return tf.concat(818(tf.zeros(attention_heads - 1, dtype=tf.float32), tf.ones(1, dtype=tf.float32)), 0819)820else:821return tf.ones(attention_heads, dtype=tf.float32)822
823head_mask = tf.stack(824[825prepare_layer_head_mask(i, config.num_attention_heads, config.num_hidden_layers)826for i in range(config.num_hidden_layers)827],8280,829)830
831inputs = self._prepare_for_class(inputs_dict, model_class).copy()832inputs["head_mask"] = head_mask833if model.config.is_encoder_decoder:834signature = inspect.signature(model.call)835arg_names = [*signature.parameters.keys()]836if "decoder_head_mask" in arg_names: # necessary diferentiation because of T5 model837inputs["decoder_head_mask"] = head_mask838if "cross_attn_head_mask" in arg_names:839inputs["cross_attn_head_mask"] = head_mask840
841outputs = model(**inputs, return_dict=True)842
843def check_attentions_validity(attentions):844# Remove Nan845for t in attentions:846self.assertLess(847(tf.math.reduce_sum(tf.cast(tf.math.is_nan(t), tf.float32))).numpy(), (tf.size(t) / 4).numpy()848) # Check we don't have more than 25% nans (arbitrary)849
850attentions = [851tf.where(tf.math.is_nan(t), 0.0, t) for t in attentions852] # remove them (the test is less complete)853
854self.assertAlmostEqual(tf.math.reduce_sum(attentions[0][..., 0, :, :]).numpy(), 0.0)855self.assertNotEqual(tf.math.reduce_sum(attentions[0][..., -1, :, :]).numpy(), 0.0)856if len(attentions) > 2: # encoder-decodere models have only 2 layers in each modules857self.assertNotEqual(tf.math.reduce_sum(attentions[1][..., 0, :, :]).numpy(), 0.0)858self.assertAlmostEqual(tf.math.reduce_sum(attentions[-1][..., -2, :, :]).numpy(), 0.0)859self.assertNotEqual(tf.math.reduce_sum(attentions[-1][..., -1, :, :]).numpy(), 0.0)860
861if model.config.is_encoder_decoder:862check_attentions_validity(outputs.encoder_attentions)863check_attentions_validity(outputs.decoder_attentions)864if "cross_attn_head_mask" in arg_names:865check_attentions_validity(outputs.cross_attentions)866else:867check_attentions_validity(outputs.attentions)868
869def test_hidden_states_output(self):870config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()871
872def check_hidden_states_output(config, inputs_dict, model_class):873model = model_class(config)874outputs = model(self._prepare_for_class(inputs_dict, model_class))875expected_num_layers = getattr(876self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1877)878
879if model.config.is_encoder_decoder:880encoder_hidden_states = outputs.encoder_hidden_states881decoder_hidden_states = outputs.decoder_hidden_states882
883self.assertEqual(config.output_attentions, False)884self.assertEqual(len(encoder_hidden_states), expected_num_layers)885self.assertListEqual(886list(encoder_hidden_states[0].shape[-2:]),887[self.model_tester.seq_length, self.model_tester.hidden_size],888)889self.assertEqual(len(decoder_hidden_states), expected_num_layers)890self.assertListEqual(891list(decoder_hidden_states[0].shape[-2:]),892[self.model_tester.seq_length, self.model_tester.hidden_size],893)894else:895hidden_states = outputs.hidden_states896self.assertEqual(config.output_attentions, False)897self.assertEqual(len(hidden_states), expected_num_layers)898self.assertListEqual(899list(hidden_states[0].shape[-2:]),900[self.model_tester.seq_length, self.model_tester.hidden_size],901)902
903for model_class in self.all_model_classes:904inputs_dict["output_hidden_states"] = True905check_hidden_states_output(config, inputs_dict, model_class)906
907del inputs_dict["output_hidden_states"]908config.output_hidden_states = True909check_hidden_states_output(config, inputs_dict, model_class)910
911def test_model_common_attributes(self):912config, _ = self.model_tester.prepare_config_and_inputs_for_common()913text_in_text_out_models = (914get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING)915+ get_values(TF_MODEL_FOR_MASKED_LM_MAPPING)916+ get_values(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING)917)918speech_in_text_out_models = get_values(TF_MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING)919
920for model_class in self.all_model_classes:921model = model_class(config)922self.assertIsInstance(model.get_input_embeddings(), keras.layers.Layer)923
924legacy_text_in_text_out = model.get_lm_head() is not None925if model_class in text_in_text_out_models or legacy_text_in_text_out:926out_embeddings = model.get_output_embeddings()927self.assertIsInstance(out_embeddings, keras.layers.Layer)928bias = model.get_bias()929if bias is not None:930self.assertIsInstance(bias, dict)931for _, v in bias.items():932self.assertIsInstance(v, tf.Variable)933elif model_class in speech_in_text_out_models:934out_embeddings = model.get_output_embeddings()935self.assertIsInstance(out_embeddings, keras.layers.Layer)936bias = model.get_bias()937self.assertIsNone(bias)938else:939out_embeddings = model.get_output_embeddings()940assert out_embeddings is None941bias = model.get_bias()942self.assertIsNone(bias)943
944def test_determinism(self):945config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()946
947for model_class in self.all_model_classes:948model = model_class(config)949first, second = (950model(self._prepare_for_class(inputs_dict, model_class), training=False)[0],951model(self._prepare_for_class(inputs_dict, model_class), training=False)[0],952)953out_1 = first.numpy()954out_2 = second.numpy()955out_1 = out_1[~np.isnan(out_1)]956out_2 = out_2[~np.isnan(out_2)]957max_diff = np.amax(np.abs(out_1 - out_2))958self.assertLessEqual(max_diff, 1e-5)959
960def test_model_outputs_equivalence(self):961config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()962
963def check_equivalence(model, tuple_inputs, dict_inputs, additional_kwargs={}):964tuple_output = model(tuple_inputs, return_dict=False, **additional_kwargs)965dict_output = model(dict_inputs, return_dict=True, **additional_kwargs).to_tuple()966
967def recursive_check(tuple_object, dict_object):968if isinstance(tuple_object, (List, Tuple)):969for tuple_iterable_value, dict_iterable_value in zip(tuple_object, dict_object):970recursive_check(tuple_iterable_value, dict_iterable_value)971elif tuple_object is None:972return973else:974self.assertTrue(975all(tf.equal(tuple_object, dict_object)),976msg=(977"Tuple and dict output are not equal. Difference:"978f" {tf.math.reduce_max(tf.abs(tuple_object - dict_object))}"979),980)981
982recursive_check(tuple_output, dict_output)983
984for model_class in self.all_model_classes:985model = model_class(config)986
987tuple_inputs = self._prepare_for_class(inputs_dict, model_class)988dict_inputs = self._prepare_for_class(inputs_dict, model_class)989check_equivalence(model, tuple_inputs, dict_inputs)990
991tuple_inputs = self._prepare_for_class(inputs_dict, model_class)992dict_inputs = self._prepare_for_class(inputs_dict, model_class)993check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True})994
995if self.has_attentions:996tuple_inputs = self._prepare_for_class(inputs_dict, model_class)997dict_inputs = self._prepare_for_class(inputs_dict, model_class)998check_equivalence(model, tuple_inputs, dict_inputs, {"output_attentions": True})999
1000# Not all models accept "labels" in the forward pass (yet :) )1001if "labels" in inspect.signature(model.call).parameters.keys():1002tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)1003dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)1004check_equivalence(model, tuple_inputs, dict_inputs)1005
1006tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)1007dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)1008check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True})1009
1010if self.has_attentions:1011tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)1012dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)1013check_equivalence(model, tuple_inputs, dict_inputs, {"output_attentions": True})1014
1015tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)1016dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)1017check_equivalence(1018model, tuple_inputs, dict_inputs, {"output_hidden_states": True, "output_attentions": True}1019)1020
1021def test_inputs_embeds(self):1022config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()1023
1024for model_class in self.all_model_classes:1025model = model_class(config)1026
1027inputs = copy.deepcopy(inputs_dict)1028
1029if not self.is_encoder_decoder:1030input_ids = inputs["input_ids"]1031del inputs["input_ids"]1032else:1033encoder_input_ids = inputs["input_ids"]1034decoder_input_ids = inputs.get("decoder_input_ids", encoder_input_ids)1035del inputs["input_ids"]1036inputs.pop("decoder_input_ids", None)1037
1038if not self.is_encoder_decoder:1039inputs["inputs_embeds"] = model.get_input_embeddings()(input_ids)1040else:1041inputs["inputs_embeds"] = model.get_input_embeddings()(encoder_input_ids)1042inputs["decoder_inputs_embeds"] = model.get_input_embeddings()(decoder_input_ids)1043
1044inputs = self._prepare_for_class(inputs, model_class)1045
1046model(inputs)1047
1048def test_numpy_arrays_inputs(self):1049config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()1050
1051def prepare_numpy_arrays(inputs_dict):1052inputs_np_dict = {}1053for k, v in inputs_dict.items():1054if tf.is_tensor(v):1055inputs_np_dict[k] = v.numpy()1056else:1057inputs_np_dict[k] = np.array(k)1058
1059return inputs_np_dict1060
1061for model_class in self.all_model_classes:1062model = model_class(config)1063
1064inputs = self._prepare_for_class(inputs_dict, model_class)1065inputs_np = prepare_numpy_arrays(inputs)1066
1067output_for_dict_input = model(inputs_np)1068output_for_kw_input = model(**inputs_np)1069self.assert_outputs_same(output_for_dict_input, output_for_kw_input)1070
1071def test_valid_input_signature_and_dummies(self):1072config, _ = self.model_tester.prepare_config_and_inputs_for_common()1073for model_class in self.all_model_classes:1074model = model_class(config)1075call_args = inspect.signature(model.call).parameters1076for key in model.input_signature:1077self.assertIn(key, call_args)1078for key in model.dummy_inputs:1079self.assertIn(key, call_args)1080
1081def test_resize_token_embeddings(self):1082# TODO (joao): after the embeddings refactor is complete, rework this test so as to rely exclusively on1083# keras.layers.Embedding1084
1085if not self.test_resize_embeddings:1086return1087config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()1088
1089def _get_word_embedding_weight(model, embedding_layer):1090if isinstance(embedding_layer, keras.layers.Embedding):1091# builds the embeddings layer1092model.build_in_name_scope()1093return embedding_layer.embeddings1094else:1095return model._get_word_embedding_weight(embedding_layer)1096
1097for model_class in self.all_model_classes:1098for size in [config.vocab_size - 10, config.vocab_size + 10, None]:1099# build the embeddings1100model = model_class(config=copy.deepcopy(config)) # `resize_token_embeddings` mutates `config`1101old_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())1102old_bias = model.get_bias()1103old_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())1104# reshape the embeddings1105model.resize_token_embeddings(size)1106new_input_embeddings = _get_word_embedding_weight(model, model.get_input_embeddings())1107new_bias = model.get_bias()1108new_output_embeddings = _get_word_embedding_weight(model, model.get_output_embeddings())1109
1110# check that the resized embeddings size matches the desired size.1111assert_size = size if size is not None else config.vocab_size1112self.assertEqual(new_input_embeddings.shape[0], assert_size)1113
1114# check that weights remain the same after resizing1115models_equal = True1116for p1, p2 in zip(old_input_embeddings.value(), new_input_embeddings.value()):1117if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:1118models_equal = False1119self.assertTrue(models_equal)1120
1121if old_bias is not None and new_bias is not None:1122for old_weight, new_weight in zip(old_bias.values(), new_bias.values()):1123self.assertEqual(new_weight.shape[-1], assert_size)1124
1125models_equal = True1126for p1, p2 in zip(tf.squeeze(old_weight), tf.squeeze(new_weight)):1127if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:1128models_equal = False1129self.assertTrue(models_equal)1130
1131if old_output_embeddings is not None and new_output_embeddings is not None:1132self.assertEqual(new_output_embeddings.shape[0], assert_size)1133self.assertEqual(new_output_embeddings.shape[1], old_output_embeddings.shape[1])1134
1135models_equal = True1136for p1, p2 in zip(old_output_embeddings.value(), new_output_embeddings.value()):1137if tf.math.reduce_sum(tf.math.abs(p1 - p2)) > 0:1138models_equal = False1139self.assertTrue(models_equal)1140
1141# TODO (Joao): this test is not slow, but it's tagged as such to keep track of failures on the scheduled CI runs,1142# while passing push CI. Fix the underlying issues and remove the tag.1143@slow1144def test_save_load_after_resize_token_embeddings(self):1145if not self.test_resize_embeddings:1146return1147config, original_inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()1148
1149for model_class in self.all_model_classes:1150# create a model with resized (expended) embeddings1151new_tokens_size = 101152old_total_size = config.vocab_size1153new_total_size = old_total_size + new_tokens_size1154model = model_class(config=copy.deepcopy(config)) # `resize_token_embeddings` mutates `config`1155model.build_in_name_scope()1156model.resize_token_embeddings(new_total_size)1157
1158# fetch the output for an input exclusively made of new members of the vocabulary1159inputs_dict = copy.deepcopy(original_inputs_dict)1160ids_feat_name = None1161if "input_ids" in inputs_dict:1162ids_feat_name = "input_ids"1163elif "decoder_input_ids" in inputs_dict:1164ids_feat_name = "decoder_input_ids"1165else:1166assert False, "No input ids feature found in the inputs dict"1167
1168new_vocab_input_ids = ids_tensor(inputs_dict[ids_feat_name].shape, new_tokens_size)1169new_vocab_input_ids += old_total_size1170inputs_dict[ids_feat_name] = new_vocab_input_ids1171if "input_ids" in inputs_dict:1172inputs_dict["input_ids"] = new_vocab_input_ids1173if "decoder_input_ids" in inputs_dict:1174inputs_dict["decoder_input_ids"] = new_vocab_input_ids1175prepared_inputs = self._prepare_for_class(inputs_dict, model_class)1176outputs = model(**prepared_inputs)1177
1178# save and load the model1179with tempfile.TemporaryDirectory() as tmpdirname:1180model.save_pretrained(tmpdirname, saved_model=False)1181model = model_class.from_pretrained(tmpdirname)1182restored_model_outputs = model(**prepared_inputs)1183
1184# check that the output for the restored model is the same1185self.assert_outputs_same(restored_model_outputs, outputs)1186
1187@unittest.skipIf(1188not is_tf_available() or len(tf.config.list_physical_devices("GPU")) == 0,1189reason="This test always passes on CPU.",1190)1191def test_embeddings_out_of_bounds_raise_exception(self):1192# TF embeddings layers don't raise an exception when an index is out of bounds on GPU, so we manually raise it.1193# This test should only fail on GPU for models where we haven't added the safety check.1194if not self.test_resize_embeddings:1195return1196config, original_inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()1197
1198for model_class in self.all_model_classes:1199model = model_class(config=config)1200inputs_dict = copy.deepcopy(original_inputs_dict)1201if "input_ids" in inputs_dict:1202inputs_dict["input_ids"] = inputs_dict["input_ids"] * int(1e9)1203if "decoder_input_ids" in inputs_dict:1204inputs_dict["decoder_input_ids"] = inputs_dict["decoder_input_ids"] * int(1e9)1205prepared_inputs = self._prepare_for_class(inputs_dict, model_class)1206with self.assertRaises(tf.errors.InvalidArgumentError):1207model(**prepared_inputs)1208
1209def test_lm_head_model_random_no_beam_search_generate(self):1210config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()1211input_ids = inputs_dict.get("input_ids", None)1212
1213# iterate over all generative models1214for model_class in self.all_generative_model_classes:1215model = model_class(config)1216
1217if config.bos_token_id is None:1218# if bos token id is not defined model needs input_ids1219with self.assertRaises(ValueError):1220model.generate(do_sample=True, max_length=5)1221# num_return_sequences = 11222self._check_generated_ids(model.generate(input_ids, do_sample=True))1223elif model_class.__name__ not in ["TFSpeech2TextForConditionalGeneration"]:1224# Models with non-text inputs won't work here; num_return_sequences = 11225self._check_generated_ids(model.generate(do_sample=True, max_length=5))1226
1227with self.assertRaises(ValueError):1228# generating multiple sequences when no beam search generation1229# is not allowed as it would always generate the same sequences1230model.generate(input_ids, do_sample=False, num_return_sequences=2)1231
1232# num_return_sequences > 1, sample1233self._check_generated_ids(model.generate(input_ids, do_sample=True, num_return_sequences=2))1234
1235# check bad words tokens language generation1236# create list of 1-seq bad token and list of 2-seq of bad tokens1237bad_words_ids = [self._generate_random_bad_tokens(1, model), self._generate_random_bad_tokens(2, model)]1238output_tokens = model.generate(1239input_ids, do_sample=True, bad_words_ids=bad_words_ids, num_return_sequences=21240)1241# only count generated tokens1242generated_ids = output_tokens[:, input_ids.shape[-1] :]1243self.assertFalse(self._check_match_tokens(generated_ids.numpy().tolist(), bad_words_ids))1244
1245def test_lm_head_model_no_beam_search_generate_dict_outputs(self):1246config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()1247input_ids = inputs_dict.get("input_ids", None)1248if input_ids is None:1249input_ids = inputs_dict.get("input_features", None)1250
1251# iterate over all generative models1252for model_class in self.all_generative_model_classes:1253model = model_class(config)1254output_greedy = model.generate(1255input_ids,1256do_sample=False,1257output_scores=True,1258output_hidden_states=True,1259output_attentions=True,1260return_dict_in_generate=True,1261)1262output_sample = model.generate(1263input_ids,1264do_sample=True,1265output_scores=True,1266output_hidden_states=True,1267output_attentions=True,1268return_dict_in_generate=True,1269)1270
1271if model.config.is_encoder_decoder:1272self.assertIsInstance(output_greedy, TFGreedySearchEncoderDecoderOutput)1273self.assertIsInstance(output_sample, TFSampleEncoderDecoderOutput)1274else:1275self.assertIsInstance(output_greedy, TFGreedySearchDecoderOnlyOutput)1276self.assertIsInstance(output_sample, TFSampleDecoderOnlyOutput)1277
1278def test_lm_head_model_random_beam_search_generate(self):1279config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()1280input_ids = inputs_dict.get("input_ids", None)1281
1282for model_class in self.all_generative_model_classes:1283model = model_class(config)1284
1285if config.bos_token_id is None:1286# if bos token id is not defined model needs input_ids, num_return_sequences = 11287self._check_generated_ids(model.generate(input_ids, do_sample=True, num_beams=2))1288else:1289# num_return_sequences = 11290self._check_generated_ids(model.generate(do_sample=True, max_length=5, num_beams=2))1291
1292with self.assertRaises(ValueError):1293# generating more sequences than having beams leads is not possible1294model.generate(input_ids, do_sample=False, num_return_sequences=3, num_beams=2)1295
1296# num_return_sequences > 1, sample1297self._check_generated_ids(1298model.generate(1299input_ids,1300do_sample=True,1301num_beams=2,1302num_return_sequences=2,1303)1304)1305# num_return_sequences > 1, greedy1306self._check_generated_ids(model.generate(input_ids, do_sample=False, num_beams=2, num_return_sequences=2))1307
1308# check bad words tokens language generation1309# create list of 1-seq bad token and list of 2-seq of bad tokens1310bad_words_ids = [self._generate_random_bad_tokens(1, model), self._generate_random_bad_tokens(2, model)]1311output_tokens = model.generate(1312input_ids, do_sample=False, bad_words_ids=bad_words_ids, num_beams=2, num_return_sequences=21313)1314# only count generated tokens1315generated_ids = output_tokens[:, input_ids.shape[-1] :]1316self.assertFalse(self._check_match_tokens(generated_ids.numpy().tolist(), bad_words_ids))1317
1318def test_lm_head_model_beam_search_generate_dict_outputs(self):1319config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()1320input_ids = inputs_dict.get("input_ids", None)1321if input_ids is None:1322input_ids = inputs_dict.get("input_features", None)1323
1324# iterate over all generative models1325for model_class in self.all_generative_model_classes:1326model = model_class(config)1327output_beam_search = model.generate(1328input_ids,1329num_beams=2,1330do_sample=False,1331output_scores=True,1332output_hidden_states=True,1333output_attentions=True,1334return_dict_in_generate=True,1335)1336output_beam_sample = model.generate(1337input_ids,1338num_beams=2,1339do_sample=True,1340output_scores=True,1341output_hidden_states=True,1342output_attentions=True,1343return_dict_in_generate=True,1344)1345
1346if model.config.is_encoder_decoder:1347self.assertIsInstance(output_beam_search, TFBeamSearchEncoderDecoderOutput)1348self.assertIsInstance(output_beam_sample, TFBeamSampleEncoderDecoderOutput)1349else:1350self.assertIsInstance(output_beam_search, TFBeamSearchDecoderOnlyOutput)1351self.assertIsInstance(output_beam_sample, TFBeamSampleDecoderOnlyOutput)1352
1353def test_loss_computation(self):1354config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()1355for model_class in self.all_model_classes:1356model = model_class(config)1357# The number of elements in the loss should be the same as the number of elements in the label1358prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)1359added_label_names = sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True)1360if not added_label_names:1361continue # This test is only for models with easily-separable labels1362added_label = prepared_for_class[added_label_names[0]]1363expected_loss_size = added_label.shape.as_list()[:1]1364
1365# Test that model correctly compute the loss with kwargs1366prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)1367possible_input_names = {"input_ids", "pixel_values", "input_features", "input_values"}1368input_name = possible_input_names.intersection(set(prepared_for_class)).pop()1369model_input = prepared_for_class.pop(input_name)1370
1371outputs = model(model_input, **prepared_for_class)1372if not isinstance(outputs, ModelOutput) or not hasattr(outputs, "loss"):1373continue1374
1375loss = outputs.loss1376self.assertTrue(loss.shape.as_list() == expected_loss_size or loss.shape.as_list() == [1])1377
1378# Test that model correctly compute the loss when we mask some positions1379prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)1380possible_input_names = {"input_ids", "pixel_values", "input_features", "input_values"}1381input_name = possible_input_names.intersection(set(prepared_for_class)).pop()1382model_input = prepared_for_class.pop(input_name)1383if "labels" in prepared_for_class:1384labels = prepared_for_class["labels"].numpy()1385if len(labels.shape) > 1 and labels.shape[1] != 1:1386labels[0] = -1001387prepared_for_class["labels"] = tf.convert_to_tensor(labels)1388loss = model(model_input, **prepared_for_class)[0]1389self.assertTrue(loss.shape.as_list() == expected_loss_size or loss.shape.as_list() == [1])1390self.assertTrue(not np.any(np.isnan(loss.numpy())))1391
1392# Test that model correctly compute the loss with a dict1393prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)1394loss = model(prepared_for_class)[0]1395self.assertTrue(loss.shape.as_list() == expected_loss_size or loss.shape.as_list() == [1])1396
1397# Test that model correctly compute the loss with a tuple1398prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)1399
1400# Get keys that were added with the _prepare_for_class function1401label_keys = prepared_for_class.keys() - inputs_dict.keys()1402signature = inspect.signature(model.call).parameters1403signature_names = list(signature.keys())1404
1405# Create a dictionary holding the location of the tensors in the tuple1406tuple_index_mapping = {0: input_name}1407for label_key in label_keys:1408label_key_index = signature_names.index(label_key)1409tuple_index_mapping[label_key_index] = label_key1410sorted_tuple_index_mapping = sorted(tuple_index_mapping.items())1411# Initialize a list with their default values, update the values and convert to a tuple1412list_input = []1413
1414for name in signature_names:1415if name != "kwargs":1416list_input.append(signature[name].default)1417
1418for index, value in sorted_tuple_index_mapping:1419list_input[index] = prepared_for_class[value]1420
1421tuple_input = tuple(list_input)1422
1423# Send to model1424loss = model(tuple_input[:-1])[0]1425
1426self.assertTrue(loss.shape.as_list() == expected_loss_size or loss.shape.as_list() == [1])1427
1428def check_keras_fit_results(self, val_loss1, val_loss2, atol=1e-2, rtol=1e-3):1429self.assertTrue(np.allclose(val_loss1, val_loss2, atol=atol, rtol=rtol))1430
1431@slow1432def test_keras_fit(self):1433config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()1434for model_class in self.all_model_classes:1435model = model_class(config)1436# Test that model correctly compute the loss with kwargs1437prepared_for_class = self._prepare_for_class(inputs_dict.copy(), model_class, return_labels=True)1438# We also remove "return_loss" as this is covered by the train_step when using fit()1439prepared_for_class = {1440key: val1441for key, val in prepared_for_class.items()1442if key not in ("head_mask", "decoder_head_mask", "cross_attn_head_mask", "return_loss")1443}1444if "labels" in prepared_for_class and "decoder_input_ids" in prepared_for_class:1445del prepared_for_class["decoder_input_ids"]1446
1447accuracy_classes = [1448"ForPreTraining",1449"ForCausalLM",1450"ForMaskedLM",1451"ForQuestionAnswering",1452"ForMultipleChoice",1453"ForSequenceClassification",1454"ForTokenClassification",1455"ForNextSentencePrediction",1456"LMHeadModel",1457]1458for accuracy_class in accuracy_classes:1459if model.__class__.__name__.endswith(accuracy_class):1460metrics = [keras.metrics.SparseCategoricalAccuracy()]1461break1462else:1463metrics = []1464
1465if hasattr(self.model_tester, "batch_size"):1466sample_weight = tf.convert_to_tensor([0.5] * self.model_tester.batch_size, dtype=tf.float32)1467else:1468sample_weight = None1469# Build the model so we can get some constant weights and check outputs1470outputs = model(prepared_for_class)1471if getattr(outputs, "loss", None) is None:1472continue1473model_weights = model.get_weights()1474
1475# Run eagerly to save some expensive compilation times1476model.compile(optimizer=keras.optimizers.SGD(0.0), run_eagerly=True, metrics=metrics)1477# Make sure the model fits without crashing regardless of where we pass the labels1478history1 = model.fit(1479prepared_for_class,1480validation_data=prepared_for_class,1481sample_weight=sample_weight,1482steps_per_epoch=1,1483validation_steps=1,1484shuffle=False,1485)1486val_loss1 = history1.history["val_loss"][0]1487self.assertTrue(not isnan(val_loss1))1488accuracy1 = {key: val[0] for key, val in history1.history.items() if key.endswith("accuracy")}1489
1490possible_label_cols = {1491"labels",1492"label",1493"label_ids",1494"start_positions",1495"start_position",1496"end_positions",1497"end_position",1498"next_sentence_label",1499}1500label_names = possible_label_cols.intersection(set(prepared_for_class))1501if len(label_names) == 0:1502# The next tests only make sense for models with separate inputs and labels, and do not make1503# sense for models that don't clearly distinguish between the two (e.g. CLIP)1504return1505labels = {key: val for key, val in prepared_for_class.items() if key in label_names}1506inputs_minus_labels = {key: val for key, val in prepared_for_class.items() if key not in label_names}1507self.assertGreater(len(inputs_minus_labels), 0)1508
1509# We reinitialize the model here even though our learning rate was zero1510# because BatchNorm updates weights by means other than gradient descent.1511model.set_weights(model_weights)1512
1513history2 = model.fit(1514inputs_minus_labels,1515labels,1516validation_data=(inputs_minus_labels, labels),1517sample_weight=sample_weight,1518steps_per_epoch=1,1519validation_steps=1,1520shuffle=False,1521)1522val_loss2 = history2.history["val_loss"][0]1523self.assertTrue(not isnan(val_loss2))1524accuracy2 = {key: val[0] for key, val in history2.history.items() if key.endswith("accuracy")}1525self.check_keras_fit_results(val_loss1, val_loss2)1526self.assertEqual(history1.history.keys(), history2.history.keys())1527for key in history1.history.keys():1528if not key.startswith("val_"):1529self.assertTrue("val_" + key in history1.history.keys(), "Outputs differ in train/test step!")1530if metrics:1531self.assertTrue(len(accuracy1) == len(accuracy2) > 0, "Missing metrics!")1532
1533def test_int_support(self):1534config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()1535for model_class in self.all_model_classes:1536prepared_for_class = self._prepare_for_class(1537inputs_dict.copy(),1538model_class,1539return_labels=True if "labels" in inspect.signature(model_class.call).parameters.keys() else False,1540)1541if not any(1542tensor.dtype.is_integer for tensor in prepared_for_class.values() if isinstance(tensor, tf.Tensor)1543):1544return # No integer inputs means no need for this test1545
1546prepared_for_class = {1547key: tf.cast(tensor, tf.int64) if isinstance(tensor, tf.Tensor) and tensor.dtype.is_integer else tensor1548for key, tensor in prepared_for_class.items()1549}1550model = model_class(config)1551model(**prepared_for_class) # No assertion, we're just checking this doesn't throw an error1552int32_prepared_for_class = {1553key: tf.cast(tensor, tf.int32) if isinstance(tensor, tf.Tensor) and tensor.dtype.is_integer else tensor1554for key, tensor in prepared_for_class.items()1555}1556model(**int32_prepared_for_class) # No assertion, we're just checking this doesn't throw an error1557
1558# After testing that the model accepts all int inputs, confirm that its dummies are int321559for key, tensor in model.dummy_inputs.items():1560self.assertTrue(1561isinstance(tensor, tf.Tensor) or keras.backend.is_keras_tensor(tensor),1562"Dummy inputs should be tf.Tensor!",1563)1564if tensor.dtype.is_integer:1565self.assertTrue(tensor.dtype == tf.int32, "Integer dummy inputs should be tf.int32!")1566
1567# Also confirm that the input_signature uses int321568for key, tensor_spec in model.input_signature.items():1569if tensor_spec.dtype.is_integer:1570self.assertTrue(tensor_spec.dtype == tf.int32, "Input signatures should use tf.int32 for ints!")1571
1572def test_generate_with_headmasking(self):1573attention_names = ["encoder_attentions", "decoder_attentions", "cross_attentions"]1574config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()1575
1576for model_class in self.all_generative_model_classes:1577model = model_class(config)1578
1579# We want to test only encoder-decoder models1580if not config.is_encoder_decoder:1581continue1582
1583head_masking = {1584"head_mask": tf.zeros((config.encoder_layers, config.encoder_attention_heads)),1585"decoder_head_mask": tf.zeros((config.decoder_layers, config.decoder_attention_heads)),1586"cross_attn_head_mask": tf.zeros((config.decoder_layers, config.decoder_attention_heads)),1587}1588
1589signature = inspect.signature(model.call)1590if set(head_masking.keys()) < {*signature.parameters.keys()}:1591continue1592
1593for attn_name, (name, mask) in zip(attention_names, head_masking.items()):1594out = model.generate(1595inputs_dict["input_ids"],1596num_beams=1,1597max_length=inputs_dict["input_ids"] + 5,1598output_attentions=True,1599return_dict_in_generate=True,1600**{name: mask},1601)1602# We check the state of decoder_attentions and cross_attentions just from the last step1603attn_weights = out[attn_name] if attn_name == attention_names[0] else out[attn_name][-1]1604self.assertEqual(sum([tf.reduce_sum(w).numpy() for w in attn_weights]), 0.0)1605
1606def test_load_with_mismatched_shapes(self):1607if not self.test_mismatched_shapes:1608return1609config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()1610
1611for model_class in self.all_model_classes:1612if model_class not in get_values(TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING):1613continue1614
1615with self.subTest(msg=f"Testing {model_class}"):1616with tempfile.TemporaryDirectory() as tmp_dir:1617model = model_class(config)1618inputs = self._prepare_for_class(inputs_dict, model_class)1619_ = model(**inputs)1620model.save_pretrained(tmp_dir)1621
1622# Fails when we don't set ignore_mismatched_sizes=True1623with self.assertRaises(ValueError):1624new_model = TFAutoModelForSequenceClassification.from_pretrained(tmp_dir, num_labels=42)1625with self.assertRaises(ValueError):1626new_model_without_prefix = TFAutoModel.from_pretrained(tmp_dir, vocab_size=10)1627
1628logger = logging.get_logger("transformers.modeling_tf_utils")1629with CaptureLogger(logger) as cl:1630new_model = TFAutoModelForSequenceClassification.from_pretrained(1631tmp_dir, num_labels=42, ignore_mismatched_sizes=True1632)1633self.assertIn("the shapes did not match", cl.out)1634
1635logits = new_model(**inputs).logits1636self.assertEqual(logits.shape[1], 42)1637
1638with CaptureLogger(logger) as cl:1639new_model_without_prefix = TFAutoModel.from_pretrained(1640tmp_dir, vocab_size=10, ignore_mismatched_sizes=True1641)1642self.assertIn("the shapes did not match", cl.out)1643
1644# Although Tf models always have a prefix pointing to `MainLayer`,1645# we still add this "without prefix" test to keep a consistency between tf and pt tests.1646input_ids = ids_tensor((2, 8), 10)1647if self.is_encoder_decoder:1648new_model_without_prefix(input_ids, decoder_input_ids=input_ids)1649else:1650new_model_without_prefix(input_ids)1651
1652def test_model_main_input_name(self):1653for model_class in self.all_model_classes:1654model_signature = inspect.signature(getattr(model_class, "call"))1655# The main input is the name of the argument after `self`1656observed_main_input_name = list(model_signature.parameters.keys())[1]1657self.assertEqual(model_class.main_input_name, observed_main_input_name)1658
1659def test_dataset_conversion(self):1660config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()1661for model_class in self.all_model_classes:1662model = model_class(config)1663tf_inputs_dict = self._prepare_for_class(inputs_dict, model_class, return_labels=False)1664if "labels" in tf_inputs_dict:1665return # This is some kinda funky decoder model that needs labels in its forward pass1666tf_inputs_dict = {1667key: val1668for key, val in tf_inputs_dict.items()1669if "head_mask" not in key and isinstance(val, tf.Tensor)1670}1671tf_inputs_dict["extra_unwanted_column"] = list(tf_inputs_dict.values())[0] # Use a random other tensor1672input_dataset = Dataset.from_dict(tf_inputs_dict)1673tf_dataset = model.prepare_tf_dataset(1674input_dataset, batch_size=len(input_dataset), drop_remainder=False, shuffle=False1675)1676test_batch = next(iter(tf_dataset))1677if isinstance(test_batch, tf.Tensor):1678self.assertEqual(len(test_batch), len(input_dataset)) # Assert we didn't lose any data1679elif isinstance(test_batch, dict):1680# Assert we discarded the unwanted extra column but kept everything else1681self.assertEqual(len(test_batch), len(input_dataset.features) - 1)1682self.assertNotIn("extra_unwanted_column", test_batch)1683for tensor in test_batch.values():1684self.assertTrue(isinstance(tensor, tf.Tensor))1685self.assertEqual(len(tensor), len(input_dataset)) # Assert we didn't lose any data1686model(test_batch, training=False)1687
1688if "labels" in inspect.signature(model_class.call).parameters.keys():1689tf_inputs_dict = self._prepare_for_class(inputs_dict, model_class, return_labels=True)1690if "labels" not in tf_inputs_dict:1691return # This model isn't giving us labels after all, don't try training with it1692tf_inputs_dict = {key: val for key, val in tf_inputs_dict.items() if "head_mask" not in key}1693tf_inputs_dict["extra_unwanted_column"] = list(tf_inputs_dict.values())[0] # Use a random other tensor1694input_dataset = Dataset.from_dict(tf_inputs_dict)1695tf_dataset = model.prepare_tf_dataset(1696input_dataset, batch_size=len(input_dataset), drop_remainder=False, shuffle=False1697)1698test_batch, test_batch_labels = next(iter(tf_dataset))1699self.assertGreater(len(test_batch_labels), 0) # Assert the labels are present1700feature_columns = 1 if isinstance(test_batch, tf.Tensor) else len(test_batch)1701label_columns = 1 if isinstance(test_batch_labels, tf.Tensor) else len(test_batch_labels)1702# Assert we discarded the unwanted extra column but kept everything else1703self.assertEqual(feature_columns + label_columns, len(input_dataset.features) - 1)1704if isinstance(test_batch, dict):1705self.assertNotIn("extra_unwanted_column", test_batch)1706if isinstance(test_batch_labels, dict):1707self.assertNotIn("extra_unwanted_column", test_batch_labels)1708model.compile(optimizer="sgd", run_eagerly=True)1709model.train_on_batch(test_batch, test_batch_labels)1710
1711def _test_xla_generate(self, **generate_kwargs):1712def _generate_and_check_results(model, inputs_dict):1713if "input_ids" in inputs_dict:1714inputs = inputs_dict["input_ids"]1715# make sure there are no pad tokens in prompt, which may trigger unwanted behavior1716if model.generation_config.pad_token_id is not None:1717if config.pad_token_id == 0:1718new_pad_token = model.generation_config.pad_token_id + 11719else:1720new_pad_token = model.generation_config.pad_token_id - 11721else:1722new_pad_token = None1723inputs = tf.where(inputs != model.generation_config.pad_token_id, inputs, new_pad_token)1724elif "input_features" in inputs_dict:1725inputs = inputs_dict["input_features"]1726else:1727raise ValueError("No valid generate input found in inputs_dict")1728
1729generated = model.generate(inputs, **generate_kwargs).numpy()1730generate_xla = tf.function(model.generate, jit_compile=True)1731generated_xla = generate_xla(inputs, **generate_kwargs).numpy()1732
1733# Due to numerical instability, let's fail the test only if there are more than 10% of input sequences give1734# different outputs between XLA and non-XLA versions. If there are less than 10 examples, let's be strict1735# and not allow any difference.1736diff = [[], []]1737for _generated, _generated_xla in zip(generated.tolist(), generated_xla.tolist()):1738if _generated != _generated_xla:1739diff[0].append(_generated)1740diff[1].append(_generated_xla)1741ratio = len(diff[0]) / len(generated)1742if ratio > 0.1 or (len(diff[0]) > 0 and len(generated) < 10):1743self.assertListEqual(diff[0], diff[1])1744
1745for model_class in self.all_generative_model_classes:1746config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()1747config.eos_token_id = None # Generate until max length1748config.do_sample = False1749
1750# fix config for models with additional sequence-length limiting settings1751for var_name in ["max_position_embeddings", "max_target_positions"]:1752attr = getattr(config, var_name, None)1753if attr is not None and attr < generate_kwargs["max_new_tokens"]:1754try:1755setattr(config, var_name, generate_kwargs["max_new_tokens"])1756except NotImplementedError:1757# xlnet will raise an exception when trying to set1758# max_position_embeddings.1759pass1760
1761model = model_class(config)1762
1763if model.supports_xla_generation:1764_generate_and_check_results(model, inputs_dict)1765else:1766with self.assertRaises(ValueError):1767_generate_and_check_results(model, inputs_dict)1768
1769def test_xla_generate_fast(self):1770"""1771Basic quick test for generate-compatible classes that confirms that XLA-generated tokens are the same as their
1772non XLA counterparts.
1773
1774Either the model supports XLA generation and passes the inner test, or it raises an appropriate exception
1775"""
1776self._test_xla_generate(num_beams=1, num_return_sequences=1, max_new_tokens=3)1777
1778@slow1779def test_xla_generate_contrastive(self):1780"""1781Slow and challenging version of `test_xla_generate_fast` for contrastive search -- contrastive search directly
1782manipulates the model cache and other outputs, and this test ensures that they are in a valid format that is
1783also supported by XLA.
1784
1785Either the model supports XLA generation and passes the inner test, or it raises an appropriate exception
1786"""
1787self._test_xla_generate(num_beams=1, num_return_sequences=1, max_new_tokens=16, penalty_alpha=0.5, top_k=4)1788
1789@slow1790def test_xla_generate_slow(self):1791"""1792Slow and challenging version of `test_xla_generate_fast` -- this test asks for several long sequences using
1793beam search, with and without XLA. The two outputs should match, and a failure in this test indicates that the
1794model may need further analysis if it is to be used for XLA generation.
1795
1796Either the model supports XLA generation and passes the inner test, or it raises an appropriate exception
1797"""
1798self._test_xla_generate(num_beams=8, num_return_sequences=2, max_new_tokens=128)1799
1800def _generate_random_bad_tokens(self, num_bad_tokens, model):1801# special tokens cannot be bad tokens1802special_tokens = []1803if model.config.bos_token_id is not None:1804special_tokens.append(model.config.bos_token_id)1805if model.config.pad_token_id is not None:1806special_tokens.append(model.config.pad_token_id)1807if model.config.eos_token_id is not None:1808special_tokens.append(model.config.eos_token_id)1809
1810# create random bad tokens that are not special tokens1811bad_tokens = []1812while len(bad_tokens) < num_bad_tokens:1813token = tf.squeeze(ids_tensor((1, 1), self.model_tester.vocab_size), 0).numpy()[0]1814if token not in special_tokens:1815bad_tokens.append(token)1816return bad_tokens1817
1818def _check_generated_ids(self, output_ids):1819for token_id in output_ids[0].numpy().tolist():1820self.assertGreaterEqual(token_id, 0)1821self.assertLess(token_id, self.model_tester.vocab_size)1822
1823def _check_match_tokens(self, generated_ids, bad_words_ids):1824# for all bad word tokens1825for bad_word_ids in bad_words_ids:1826# for all slices in batch1827for generated_ids_slice in generated_ids:1828# for all word idx1829for i in range(len(bad_word_ids), len(generated_ids_slice)):1830# if tokens match1831if generated_ids_slice[i - len(bad_word_ids) : i] == bad_word_ids:1832return True1833return False1834
1835
1836def ids_tensor(shape, vocab_size, rng=None, name=None, dtype=None):1837"""Creates a random int32 tensor of the shape within the vocab size."""1838if rng is None:1839rng = random.Random()1840
1841total_dims = 11842for dim in shape:1843total_dims *= dim1844
1845values = []1846for _ in range(total_dims):1847values.append(rng.randint(0, vocab_size - 1))1848
1849output = tf.constant(values, shape=shape, dtype=dtype if dtype is not None else tf.int32)1850
1851return output1852
1853
1854def random_attention_mask(shape, rng=None, name=None, dtype=None):1855attn_mask = ids_tensor(shape, vocab_size=2, rng=None, name=None, dtype=dtype)1856# make sure that at least one token is attended to for each batch1857attn_mask = tf.concat([attn_mask[:, :-1], tf.ones_like(attn_mask[:, -1:], dtype=dtype)], axis=-1)1858return attn_mask1859
1860
1861def floats_tensor(shape, scale=1.0, rng=None, name=None, dtype=None):1862"""Creates a random float32 tensor"""1863if rng is None:1864rng = random.Random()1865
1866total_dims = 11867for dim in shape:1868total_dims *= dim1869
1870values = []1871for _ in range(total_dims):1872values.append(rng.random() * scale)1873
1874return tf.reshape(tf.constant(values, dtype=dtype if dtype is not None else tf.float32), shape=shape)1875