transformers
506 строк · 21.6 Кб
1# coding=utf-8
2# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15""" Testing suite for the TensorFlow SegFormer model. """
16
17from __future__ import annotations18
19import inspect20import unittest21from typing import List, Tuple22
23from transformers import SegformerConfig24from transformers.file_utils import is_tf_available, is_vision_available25from transformers.testing_utils import require_tf, slow26
27from ...test_configuration_common import ConfigTester28from ...test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor29from ...test_pipeline_mixin import PipelineTesterMixin30
31
32if is_tf_available():33import numpy as np34import tensorflow as tf35
36from transformers import TFSegformerForImageClassification, TFSegformerForSemanticSegmentation, TFSegformerModel37from transformers.models.segformer.modeling_tf_segformer import TF_SEGFORMER_PRETRAINED_MODEL_ARCHIVE_LIST38
39if is_vision_available():40from PIL import Image41
42from transformers import SegformerImageProcessor43
44
45class TFSegformerConfigTester(ConfigTester):46def create_and_test_config_common_properties(self):47config = self.config_class(**self.inputs_dict)48self.parent.assertTrue(hasattr(config, "hidden_sizes"))49self.parent.assertTrue(hasattr(config, "num_attention_heads"))50self.parent.assertTrue(hasattr(config, "num_encoder_blocks"))51
52
53class TFSegformerModelTester:54def __init__(55self,56parent,57batch_size=13,58image_size=64,59num_channels=3,60num_encoder_blocks=4,61depths=[1, 1, 1, 1],62sr_ratios=[8, 4, 2, 1],63hidden_sizes=[8, 8, 16, 16],64downsampling_rates=[1, 4, 8, 16],65num_attention_heads=[1, 1, 2, 2],66is_training=True,67use_labels=True,68hidden_act="gelu",69hidden_dropout_prob=0.1,70attention_probs_dropout_prob=0.1,71initializer_range=0.02,72num_labels=3,73scope=None,74):75self.parent = parent76self.batch_size = batch_size77self.image_size = image_size78self.num_channels = num_channels79self.num_encoder_blocks = num_encoder_blocks80self.sr_ratios = sr_ratios81self.depths = depths82self.hidden_sizes = hidden_sizes83self.downsampling_rates = downsampling_rates84self.num_attention_heads = num_attention_heads85self.is_training = is_training86self.use_labels = use_labels87self.hidden_act = hidden_act88self.hidden_dropout_prob = hidden_dropout_prob89self.attention_probs_dropout_prob = attention_probs_dropout_prob90self.initializer_range = initializer_range91self.num_labels = num_labels92self.scope = scope93
94def prepare_config_and_inputs(self):95pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])96
97labels = None98if self.use_labels:99labels = ids_tensor([self.batch_size, self.image_size, self.image_size], self.num_labels)100
101config = self.get_config()102return config, pixel_values, labels103
104def get_config(self):105return SegformerConfig(106image_size=self.image_size,107num_channels=self.num_channels,108num_encoder_blocks=self.num_encoder_blocks,109depths=self.depths,110hidden_sizes=self.hidden_sizes,111num_attention_heads=self.num_attention_heads,112hidden_act=self.hidden_act,113hidden_dropout_prob=self.hidden_dropout_prob,114attention_probs_dropout_prob=self.attention_probs_dropout_prob,115initializer_range=self.initializer_range,116num_labels=self.num_labels,117)118
119def create_and_check_model(self, config, pixel_values, labels):120model = TFSegformerModel(config=config)121result = model(pixel_values, training=False)122expected_height = expected_width = self.image_size // (self.downsampling_rates[-1] * 2)123self.parent.assertEqual(124result.last_hidden_state.shape, (self.batch_size, self.hidden_sizes[-1], expected_height, expected_width)125)126
127def create_and_check_for_image_segmentation(self, config, pixel_values, labels):128config.num_labels = self.num_labels129model = TFSegformerForSemanticSegmentation(config)130result = model(pixel_values, training=False)131self.parent.assertEqual(132result.logits.shape, (self.batch_size, self.num_labels, self.image_size // 4, self.image_size // 4)133)134result = model(pixel_values, labels=labels, training=False)135self.parent.assertEqual(136result.logits.shape, (self.batch_size, self.num_labels, self.image_size // 4, self.image_size // 4)137)138
139def prepare_config_and_inputs_for_common(self):140config_and_inputs = self.prepare_config_and_inputs()141config, pixel_values, labels = config_and_inputs142inputs_dict = {"pixel_values": pixel_values}143return config, inputs_dict144
145def prepare_config_and_inputs_for_keras_fit(self, for_segmentation: bool = False):146config_and_inputs = self.prepare_config_and_inputs()147config, pixel_values, seg_labels = config_and_inputs148if for_segmentation:149inputs_dict = {"pixel_values": pixel_values, "labels": seg_labels}150else:151inputs_dict = {"pixel_values": pixel_values, "labels": tf.zeros((self.batch_size))}152return config, inputs_dict153
154
155@require_tf
156class TFSegformerModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase):157all_model_classes = (158(TFSegformerModel, TFSegformerForImageClassification, TFSegformerForSemanticSegmentation)159if is_tf_available()160else ()161)162pipeline_model_mapping = (163{"feature-extraction": TFSegformerModel, "image-classification": TFSegformerForImageClassification}164if is_tf_available()165else {}166)167
168test_head_masking = False169test_onnx = False170test_pruning = False171test_resize_embeddings = False172
173def setUp(self):174self.model_tester = TFSegformerModelTester(self)175self.config_tester = TFSegformerConfigTester(self, config_class=SegformerConfig, has_text_modality=False)176
177def test_model(self):178config_and_inputs = self.model_tester.prepare_config_and_inputs()179self.model_tester.create_and_check_model(*config_and_inputs)180
181@unittest.skip("SegFormer does not use inputs_embeds")182def test_inputs_embeds(self):183pass184
185@unittest.skip("SegFormer does not have get_input_embeddings method and get_output_embeddings methods")186def test_model_common_attributes(self):187pass188
189def test_forward_signature(self):190config, _ = self.model_tester.prepare_config_and_inputs_for_common()191
192for model_class in self.all_model_classes:193model = model_class(config)194signature = inspect.signature(model.call)195# signature.parameters is an OrderedDict => so arg_names order is deterministic196arg_names = [*signature.parameters.keys()]197
198expected_arg_names = ["pixel_values"]199self.assertListEqual(arg_names[:1], expected_arg_names)200
201def test_attention_outputs(self):202config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()203config.return_dict = True204
205for model_class in self.all_model_classes:206inputs_dict["output_attentions"] = True207inputs_dict["output_hidden_states"] = False208config.return_dict = True209model = model_class(config)210outputs = model(**self._prepare_for_class(inputs_dict, model_class))211attentions = outputs.attentions212
213expected_num_attentions = sum(self.model_tester.depths)214self.assertEqual(len(attentions), expected_num_attentions)215
216# check that output_attentions also work using config217del inputs_dict["output_attentions"]218config.output_attentions = True219model = model_class(config)220outputs = model(**self._prepare_for_class(inputs_dict, model_class))221attentions = outputs.attentions222
223self.assertEqual(len(attentions), expected_num_attentions)224
225# verify the first attentions (first block, first layer)226expected_seq_len = (self.model_tester.image_size // 4) ** 2227expected_reduced_seq_len = (self.model_tester.image_size // (4 * self.model_tester.sr_ratios[0])) ** 2228self.assertListEqual(229list(attentions[0].shape[-3:]),230[self.model_tester.num_attention_heads[0], expected_seq_len, expected_reduced_seq_len],231)232
233# verify the last attentions (last block, last layer)234expected_seq_len = (self.model_tester.image_size // 32) ** 2235expected_reduced_seq_len = (self.model_tester.image_size // (32 * self.model_tester.sr_ratios[-1])) ** 2236self.assertListEqual(237list(attentions[-1].shape[-3:]),238[self.model_tester.num_attention_heads[-1], expected_seq_len, expected_reduced_seq_len],239)240out_len = len(outputs)241
242# Check attention is always last and order is fine243inputs_dict["output_attentions"] = True244inputs_dict["output_hidden_states"] = True245model = model_class(config)246outputs = model(**self._prepare_for_class(inputs_dict, model_class))247
248self.assertEqual(out_len + 1, len(outputs))249
250self_attentions = outputs.attentions251
252self.assertEqual(len(self_attentions), expected_num_attentions)253# verify the first attentions (first block, first layer)254expected_seq_len = (self.model_tester.image_size // 4) ** 2255expected_reduced_seq_len = (self.model_tester.image_size // (4 * self.model_tester.sr_ratios[0])) ** 2256self.assertListEqual(257list(self_attentions[0].shape[-3:]),258[self.model_tester.num_attention_heads[0], expected_seq_len, expected_reduced_seq_len],259)260
261def test_hidden_states_output(self):262def check_hidden_states_output(inputs_dict, config, model_class):263model = model_class(config)264
265outputs = model(**self._prepare_for_class(inputs_dict, model_class))266
267hidden_states = outputs.hidden_states268
269expected_num_layers = self.model_tester.num_encoder_blocks270self.assertEqual(len(hidden_states), expected_num_layers)271
272# verify the first hidden states (first block)273self.assertListEqual(274list(hidden_states[0].shape[-3:]),275[276self.model_tester.hidden_sizes[0],277self.model_tester.image_size // 4,278self.model_tester.image_size // 4,279],280)281
282config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()283
284for model_class in self.all_model_classes:285inputs_dict["output_hidden_states"] = True286check_hidden_states_output(inputs_dict, config, model_class)287
288# check that output_hidden_states also work using config289del inputs_dict["output_hidden_states"]290config.output_hidden_states = True291
292check_hidden_states_output(inputs_dict, config, model_class)293
294def test_model_outputs_equivalence(self):295config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()296
297def check_equivalence(model, tuple_inputs, dict_inputs, additional_kwargs={}):298tuple_output = model(tuple_inputs, return_dict=False, **additional_kwargs)299dict_output = model(dict_inputs, return_dict=True, **additional_kwargs).to_tuple()300
301def recursive_check(tuple_object, dict_object):302if isinstance(tuple_object, (List, Tuple)):303for tuple_iterable_value, dict_iterable_value in zip(tuple_object, dict_object):304recursive_check(tuple_iterable_value, dict_iterable_value)305elif tuple_object is None:306return307else:308self.assertTrue(309all(tf.equal(tuple_object, dict_object)),310msg=(311"Tuple and dict output are not equal. Difference:"312f" {tf.math.reduce_max(tf.abs(tuple_object - dict_object))}"313),314)315
316recursive_check(tuple_output, dict_output)317
318for model_class in self.all_model_classes:319model = model_class(config)320
321tuple_inputs = self._prepare_for_class(inputs_dict, model_class)322dict_inputs = self._prepare_for_class(inputs_dict, model_class)323check_equivalence(model, tuple_inputs, dict_inputs)324
325tuple_inputs = self._prepare_for_class(inputs_dict, model_class)326dict_inputs = self._prepare_for_class(inputs_dict, model_class)327check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True})328
329if self.has_attentions:330tuple_inputs = self._prepare_for_class(inputs_dict, model_class)331dict_inputs = self._prepare_for_class(inputs_dict, model_class)332check_equivalence(model, tuple_inputs, dict_inputs, {"output_attentions": True})333
334# todo: incorporate label support for semantic segmentation in `test_modeling_tf_common.py`.335
336@unittest.skipIf(337not is_tf_available() or len(tf.config.list_physical_devices("GPU")) == 0,338reason="TF does not support backprop for grouped convolutions on CPU.",339)340def test_dataset_conversion(self):341super().test_dataset_conversion()342
343def check_keras_fit_results(self, val_loss1, val_loss2, atol=2e-1, rtol=2e-1):344self.assertTrue(np.allclose(val_loss1, val_loss2, atol=atol, rtol=rtol))345
346@unittest.skipIf(347not is_tf_available() or len(tf.config.list_physical_devices("GPU")) == 0,348reason="TF does not support backprop for grouped convolutions on CPU.",349)350@slow351def test_keras_fit(self):352config, _ = self.model_tester.prepare_config_and_inputs_for_common()353
354for model_class in self.all_model_classes:355# Since `TFSegformerModel` cannot operate with the default `fit()` method.356if model_class.__name__ != "TFSegformerModel":357model = model_class(config)358if getattr(model, "hf_compute_loss", None):359super().test_keras_fit()360
361def test_loss_computation(self):362config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()363
364def apply(model):365for_segmentation = True if model_class.__name__ == "TFSegformerForSemanticSegmentation" else False366# The number of elements in the loss should be the same as the number of elements in the label367_, prepared_for_class = self.model_tester.prepare_config_and_inputs_for_keras_fit(368for_segmentation=for_segmentation369)370added_label = prepared_for_class[sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True)[0]]371loss_size = tf.size(added_label)372
373# Test that model correctly compute the loss with kwargs374possible_input_names = {"input_ids", "pixel_values", "input_features"}375input_name = possible_input_names.intersection(set(prepared_for_class)).pop()376model_input = prepared_for_class.pop(input_name)377
378loss = model(model_input, **prepared_for_class)[0]379
380if model_class.__name__ == "TFSegformerForSemanticSegmentation":381# Semantic segmentation loss is computed similarly as382# https://github.com/huggingface/transformers/blob/main/src/transformers/modeling_tf_utils.py#L210.383self.assertEqual(loss.shape, (1,))384else:385self.assertEqual(loss.shape, [loss_size])386
387# Test that model correctly compute the loss with a dict388_, prepared_for_class = self.model_tester.prepare_config_and_inputs_for_keras_fit(389for_segmentation=for_segmentation390)391loss = model(**prepared_for_class)[0]392
393if model_class.__name__ == "TFSegformerForSemanticSegmentation":394self.assertEqual(loss.shape, (1,))395else:396self.assertEqual(loss.shape, [loss_size])397
398# Test that model correctly compute the loss with a tuple399label_keys = prepared_for_class.keys() - inputs_dict.keys()400signature = inspect.signature(model.call).parameters401signature_names = list(signature.keys())402
403# Create a dictionary holding the location of the tensors in the tuple404tuple_index_mapping = {0: input_name}405for label_key in label_keys:406label_key_index = signature_names.index(label_key)407tuple_index_mapping[label_key_index] = label_key408sorted_tuple_index_mapping = sorted(tuple_index_mapping.items())409# Initialize a list with their default values, update the values and convert to a tuple410list_input = []411
412for name in signature_names:413if name != "kwargs":414list_input.append(signature[name].default)415
416for index, value in sorted_tuple_index_mapping:417list_input[index] = prepared_for_class[value]418
419tuple_input = tuple(list_input)420
421# Send to model422loss = model(tuple_input[:-1])[0]423if model_class.__name__ == "TFSegformerForSemanticSegmentation":424self.assertEqual(loss.shape, (1,))425else:426self.assertEqual(loss.shape, [loss_size])427
428for model_class in self.all_model_classes:429# Since `TFSegformerModel` won't have labels against which we430# could compute loss.431if model_class.__name__ != "TFSegformerModel":432model = model_class(config)433apply(model)434
435def check_pt_tf_outputs(self, tf_outputs, pt_outputs, model_class, tol=2e-4, name="outputs", attributes=None):436# We override with a slightly higher tol value, as semseg models tend to diverge a bit more437super().check_pt_tf_outputs(tf_outputs, pt_outputs, model_class, tol, name, attributes)438
439@slow440def test_model_from_pretrained(self):441for model_name in TF_SEGFORMER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:442model = TFSegformerModel.from_pretrained(model_name)443self.assertIsNotNone(model)444
445
446# We will verify our results on an image of cute cats
447def prepare_img():448image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")449return image450
451
452@require_tf
453class TFSegformerModelIntegrationTest(unittest.TestCase):454@slow455def test_inference_image_segmentation_ade(self):456# only resize + normalize457image_processor = SegformerImageProcessor(458image_scale=(512, 512), keep_ratio=False, align=False, do_random_crop=False459)460model = TFSegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")461
462image = prepare_img()463encoded_inputs = image_processor(images=image, return_tensors="tf")464pixel_values = encoded_inputs.pixel_values465
466outputs = model(pixel_values, training=False)467
468expected_shape = tf.TensorShape((1, model.config.num_labels, 128, 128))469self.assertEqual(outputs.logits.shape, expected_shape)470
471expected_slice = tf.constant(472[473[[-4.6310, -5.5232, -6.2356], [-5.1921, -6.1444, -6.5996], [-5.4424, -6.2790, -6.7574]],474[[-12.1391, -13.3122, -13.9554], [-12.8732, -13.9352, -14.3563], [-12.9438, -13.8226, -14.2513]],475[[-12.5134, -13.4686, -14.4915], [-12.8669, -14.4343, -14.7758], [-13.2523, -14.5819, -15.0694]],476]477)478tf.debugging.assert_near(outputs.logits[0, :3, :3, :3], expected_slice, atol=1e-4)479
480@slow481def test_inference_image_segmentation_city(self):482# only resize + normalize483image_processor = SegformerImageProcessor(484image_scale=(512, 512), keep_ratio=False, align=False, do_random_crop=False485)486model = TFSegformerForSemanticSegmentation.from_pretrained(487"nvidia/segformer-b1-finetuned-cityscapes-1024-1024"488)489
490image = prepare_img()491encoded_inputs = image_processor(images=image, return_tensors="tf")492pixel_values = encoded_inputs.pixel_values493
494outputs = model(pixel_values, training=False)495
496expected_shape = tf.TensorShape((1, model.config.num_labels, 128, 128))497self.assertEqual(outputs.logits.shape, expected_shape)498
499expected_slice = tf.constant(500[501[[-13.5748, -13.9111, -12.6500], [-14.3500, -15.3683, -14.2328], [-14.7532, -16.0424, -15.6087]],502[[-17.1651, -15.8725, -12.9653], [-17.2580, -17.3718, -14.8223], [-16.6058, -16.8783, -16.7452]],503[[-3.6456, -3.0209, -1.4203], [-3.0797, -3.1959, -2.0000], [-1.8757, -1.9217, -1.6997]],504]505)506tf.debugging.assert_near(outputs.logits[0, :3, :3, :3], expected_slice, atol=1e-1)507