transformers

test_modeling_tf_segformer.py
506 строк · 21.6 Кб
Перенос по словам
1
# coding=utf-8
2
# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
""" Testing suite for the TensorFlow SegFormer model. """
16

17
from __future__ import annotations
18

19
import inspect
20
import unittest
21
from typing import List, Tuple
22

23
from transformers import SegformerConfig
24
from transformers.file_utils import is_tf_available, is_vision_available
25
from transformers.testing_utils import require_tf, slow
26

27
from ...test_configuration_common import ConfigTester
28
from ...test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor
29
from ...test_pipeline_mixin import PipelineTesterMixin
30

31

32
if is_tf_available():
33
    import numpy as np
34
    import tensorflow as tf
35

36
    from transformers import TFSegformerForImageClassification, TFSegformerForSemanticSegmentation, TFSegformerModel
37
    from transformers.models.segformer.modeling_tf_segformer import TF_SEGFORMER_PRETRAINED_MODEL_ARCHIVE_LIST
38

39
if is_vision_available():
40
    from PIL import Image
41

42
    from transformers import SegformerImageProcessor
43

44

45
class TFSegformerConfigTester(ConfigTester):
46
    def create_and_test_config_common_properties(self):
47
        config = self.config_class(**self.inputs_dict)
48
        self.parent.assertTrue(hasattr(config, "hidden_sizes"))
49
        self.parent.assertTrue(hasattr(config, "num_attention_heads"))
50
        self.parent.assertTrue(hasattr(config, "num_encoder_blocks"))
51

52

53
class TFSegformerModelTester:
54
    def __init__(
55
        self,
56
        parent,
57
        batch_size=13,
58
        image_size=64,
59
        num_channels=3,
60
        num_encoder_blocks=4,
61
        depths=[1, 1, 1, 1],
62
        sr_ratios=[8, 4, 2, 1],
63
        hidden_sizes=[8, 8, 16, 16],
64
        downsampling_rates=[1, 4, 8, 16],
65
        num_attention_heads=[1, 1, 2, 2],
66
        is_training=True,
67
        use_labels=True,
68
        hidden_act="gelu",
69
        hidden_dropout_prob=0.1,
70
        attention_probs_dropout_prob=0.1,
71
        initializer_range=0.02,
72
        num_labels=3,
73
        scope=None,
74
    ):
75
        self.parent = parent
76
        self.batch_size = batch_size
77
        self.image_size = image_size
78
        self.num_channels = num_channels
79
        self.num_encoder_blocks = num_encoder_blocks
80
        self.sr_ratios = sr_ratios
81
        self.depths = depths
82
        self.hidden_sizes = hidden_sizes
83
        self.downsampling_rates = downsampling_rates
84
        self.num_attention_heads = num_attention_heads
85
        self.is_training = is_training
86
        self.use_labels = use_labels
87
        self.hidden_act = hidden_act
88
        self.hidden_dropout_prob = hidden_dropout_prob
89
        self.attention_probs_dropout_prob = attention_probs_dropout_prob
90
        self.initializer_range = initializer_range
91
        self.num_labels = num_labels
92
        self.scope = scope
93

94
    def prepare_config_and_inputs(self):
95
        pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
96

97
        labels = None
98
        if self.use_labels:
99
            labels = ids_tensor([self.batch_size, self.image_size, self.image_size], self.num_labels)
100

101
        config = self.get_config()
102
        return config, pixel_values, labels
103

104
    def get_config(self):
105
        return SegformerConfig(
106
            image_size=self.image_size,
107
            num_channels=self.num_channels,
108
            num_encoder_blocks=self.num_encoder_blocks,
109
            depths=self.depths,
110
            hidden_sizes=self.hidden_sizes,
111
            num_attention_heads=self.num_attention_heads,
112
            hidden_act=self.hidden_act,
113
            hidden_dropout_prob=self.hidden_dropout_prob,
114
            attention_probs_dropout_prob=self.attention_probs_dropout_prob,
115
            initializer_range=self.initializer_range,
116
            num_labels=self.num_labels,
117
        )
118

119
    def create_and_check_model(self, config, pixel_values, labels):
120
        model = TFSegformerModel(config=config)
121
        result = model(pixel_values, training=False)
122
        expected_height = expected_width = self.image_size // (self.downsampling_rates[-1] * 2)
123
        self.parent.assertEqual(
124
            result.last_hidden_state.shape, (self.batch_size, self.hidden_sizes[-1], expected_height, expected_width)
125
        )
126

127
    def create_and_check_for_image_segmentation(self, config, pixel_values, labels):
128
        config.num_labels = self.num_labels
129
        model = TFSegformerForSemanticSegmentation(config)
130
        result = model(pixel_values, training=False)
131
        self.parent.assertEqual(
132
            result.logits.shape, (self.batch_size, self.num_labels, self.image_size // 4, self.image_size // 4)
133
        )
134
        result = model(pixel_values, labels=labels, training=False)
135
        self.parent.assertEqual(
136
            result.logits.shape, (self.batch_size, self.num_labels, self.image_size // 4, self.image_size // 4)
137
        )
138

139
    def prepare_config_and_inputs_for_common(self):
140
        config_and_inputs = self.prepare_config_and_inputs()
141
        config, pixel_values, labels = config_and_inputs
142
        inputs_dict = {"pixel_values": pixel_values}
143
        return config, inputs_dict
144

145
    def prepare_config_and_inputs_for_keras_fit(self, for_segmentation: bool = False):
146
        config_and_inputs = self.prepare_config_and_inputs()
147
        config, pixel_values, seg_labels = config_and_inputs
148
        if for_segmentation:
149
            inputs_dict = {"pixel_values": pixel_values, "labels": seg_labels}
150
        else:
151
            inputs_dict = {"pixel_values": pixel_values, "labels": tf.zeros((self.batch_size))}
152
        return config, inputs_dict
153

154

155
@require_tf
156
class TFSegformerModelTest(TFModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
157
    all_model_classes = (
158
        (TFSegformerModel, TFSegformerForImageClassification, TFSegformerForSemanticSegmentation)
159
        if is_tf_available()
160
        else ()
161
    )
162
    pipeline_model_mapping = (
163
        {"feature-extraction": TFSegformerModel, "image-classification": TFSegformerForImageClassification}
164
        if is_tf_available()
165
        else {}
166
    )
167

168
    test_head_masking = False
169
    test_onnx = False
170
    test_pruning = False
171
    test_resize_embeddings = False
172

173
    def setUp(self):
174
        self.model_tester = TFSegformerModelTester(self)
175
        self.config_tester = TFSegformerConfigTester(self, config_class=SegformerConfig, has_text_modality=False)
176

177
    def test_model(self):
178
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
179
        self.model_tester.create_and_check_model(*config_and_inputs)
180

181
    @unittest.skip("SegFormer does not use inputs_embeds")
182
    def test_inputs_embeds(self):
183
        pass
184

185
    @unittest.skip("SegFormer does not have get_input_embeddings method and get_output_embeddings methods")
186
    def test_model_common_attributes(self):
187
        pass
188

189
    def test_forward_signature(self):
190
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()
191

192
        for model_class in self.all_model_classes:
193
            model = model_class(config)
194
            signature = inspect.signature(model.call)
195
            # signature.parameters is an OrderedDict => so arg_names order is deterministic
196
            arg_names = [*signature.parameters.keys()]
197

198
            expected_arg_names = ["pixel_values"]
199
            self.assertListEqual(arg_names[:1], expected_arg_names)
200

201
    def test_attention_outputs(self):
202
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
203
        config.return_dict = True
204

205
        for model_class in self.all_model_classes:
206
            inputs_dict["output_attentions"] = True
207
            inputs_dict["output_hidden_states"] = False
208
            config.return_dict = True
209
            model = model_class(config)
210
            outputs = model(**self._prepare_for_class(inputs_dict, model_class))
211
            attentions = outputs.attentions
212

213
            expected_num_attentions = sum(self.model_tester.depths)
214
            self.assertEqual(len(attentions), expected_num_attentions)
215

216
            # check that output_attentions also work using config
217
            del inputs_dict["output_attentions"]
218
            config.output_attentions = True
219
            model = model_class(config)
220
            outputs = model(**self._prepare_for_class(inputs_dict, model_class))
221
            attentions = outputs.attentions
222

223
            self.assertEqual(len(attentions), expected_num_attentions)
224

225
            # verify the first attentions (first block, first layer)
226
            expected_seq_len = (self.model_tester.image_size // 4) ** 2
227
            expected_reduced_seq_len = (self.model_tester.image_size // (4 * self.model_tester.sr_ratios[0])) ** 2
228
            self.assertListEqual(
229
                list(attentions[0].shape[-3:]),
230
                [self.model_tester.num_attention_heads[0], expected_seq_len, expected_reduced_seq_len],
231
            )
232

233
            # verify the last attentions (last block, last layer)
234
            expected_seq_len = (self.model_tester.image_size // 32) ** 2
235
            expected_reduced_seq_len = (self.model_tester.image_size // (32 * self.model_tester.sr_ratios[-1])) ** 2
236
            self.assertListEqual(
237
                list(attentions[-1].shape[-3:]),
238
                [self.model_tester.num_attention_heads[-1], expected_seq_len, expected_reduced_seq_len],
239
            )
240
            out_len = len(outputs)
241

242
            # Check attention is always last and order is fine
243
            inputs_dict["output_attentions"] = True
244
            inputs_dict["output_hidden_states"] = True
245
            model = model_class(config)
246
            outputs = model(**self._prepare_for_class(inputs_dict, model_class))
247

248
            self.assertEqual(out_len + 1, len(outputs))
249

250
            self_attentions = outputs.attentions
251

252
            self.assertEqual(len(self_attentions), expected_num_attentions)
253
            # verify the first attentions (first block, first layer)
254
            expected_seq_len = (self.model_tester.image_size // 4) ** 2
255
            expected_reduced_seq_len = (self.model_tester.image_size // (4 * self.model_tester.sr_ratios[0])) ** 2
256
            self.assertListEqual(
257
                list(self_attentions[0].shape[-3:]),
258
                [self.model_tester.num_attention_heads[0], expected_seq_len, expected_reduced_seq_len],
259
            )
260

261
    def test_hidden_states_output(self):
262
        def check_hidden_states_output(inputs_dict, config, model_class):
263
            model = model_class(config)
264

265
            outputs = model(**self._prepare_for_class(inputs_dict, model_class))
266

267
            hidden_states = outputs.hidden_states
268

269
            expected_num_layers = self.model_tester.num_encoder_blocks
270
            self.assertEqual(len(hidden_states), expected_num_layers)
271

272
            # verify the first hidden states (first block)
273
            self.assertListEqual(
274
                list(hidden_states[0].shape[-3:]),
275
                [
276
                    self.model_tester.hidden_sizes[0],
277
                    self.model_tester.image_size // 4,
278
                    self.model_tester.image_size // 4,
279
                ],
280
            )
281

282
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
283

284
        for model_class in self.all_model_classes:
285
            inputs_dict["output_hidden_states"] = True
286
            check_hidden_states_output(inputs_dict, config, model_class)
287

288
            # check that output_hidden_states also work using config
289
            del inputs_dict["output_hidden_states"]
290
            config.output_hidden_states = True
291

292
            check_hidden_states_output(inputs_dict, config, model_class)
293

294
    def test_model_outputs_equivalence(self):
295
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
296

297
        def check_equivalence(model, tuple_inputs, dict_inputs, additional_kwargs={}):
298
            tuple_output = model(tuple_inputs, return_dict=False, **additional_kwargs)
299
            dict_output = model(dict_inputs, return_dict=True, **additional_kwargs).to_tuple()
300

301
            def recursive_check(tuple_object, dict_object):
302
                if isinstance(tuple_object, (List, Tuple)):
303
                    for tuple_iterable_value, dict_iterable_value in zip(tuple_object, dict_object):
304
                        recursive_check(tuple_iterable_value, dict_iterable_value)
305
                elif tuple_object is None:
306
                    return
307
                else:
308
                    self.assertTrue(
309
                        all(tf.equal(tuple_object, dict_object)),
310
                        msg=(
311
                            "Tuple and dict output are not equal. Difference:"
312
                            f" {tf.math.reduce_max(tf.abs(tuple_object - dict_object))}"
313
                        ),
314
                    )
315

316
                recursive_check(tuple_output, dict_output)
317

318
        for model_class in self.all_model_classes:
319
            model = model_class(config)
320

321
            tuple_inputs = self._prepare_for_class(inputs_dict, model_class)
322
            dict_inputs = self._prepare_for_class(inputs_dict, model_class)
323
            check_equivalence(model, tuple_inputs, dict_inputs)
324

325
            tuple_inputs = self._prepare_for_class(inputs_dict, model_class)
326
            dict_inputs = self._prepare_for_class(inputs_dict, model_class)
327
            check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True})
328

329
            if self.has_attentions:
330
                tuple_inputs = self._prepare_for_class(inputs_dict, model_class)
331
                dict_inputs = self._prepare_for_class(inputs_dict, model_class)
332
                check_equivalence(model, tuple_inputs, dict_inputs, {"output_attentions": True})
333

334
            # todo: incorporate label support for semantic segmentation in `test_modeling_tf_common.py`.
335

336
    @unittest.skipIf(
337
        not is_tf_available() or len(tf.config.list_physical_devices("GPU")) == 0,
338
        reason="TF does not support backprop for grouped convolutions on CPU.",
339
    )
340
    def test_dataset_conversion(self):
341
        super().test_dataset_conversion()
342

343
    def check_keras_fit_results(self, val_loss1, val_loss2, atol=2e-1, rtol=2e-1):
344
        self.assertTrue(np.allclose(val_loss1, val_loss2, atol=atol, rtol=rtol))
345

346
    @unittest.skipIf(
347
        not is_tf_available() or len(tf.config.list_physical_devices("GPU")) == 0,
348
        reason="TF does not support backprop for grouped convolutions on CPU.",
349
    )
350
    @slow
351
    def test_keras_fit(self):
352
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()
353

354
        for model_class in self.all_model_classes:
355
            # Since `TFSegformerModel` cannot operate with the default `fit()` method.
356
            if model_class.__name__ != "TFSegformerModel":
357
                model = model_class(config)
358
                if getattr(model, "hf_compute_loss", None):
359
                    super().test_keras_fit()
360

361
    def test_loss_computation(self):
362
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
363

364
        def apply(model):
365
            for_segmentation = True if model_class.__name__ == "TFSegformerForSemanticSegmentation" else False
366
            # The number of elements in the loss should be the same as the number of elements in the label
367
            _, prepared_for_class = self.model_tester.prepare_config_and_inputs_for_keras_fit(
368
                for_segmentation=for_segmentation
369
            )
370
            added_label = prepared_for_class[sorted(prepared_for_class.keys() - inputs_dict.keys(), reverse=True)[0]]
371
            loss_size = tf.size(added_label)
372

373
            # Test that model correctly compute the loss with kwargs
374
            possible_input_names = {"input_ids", "pixel_values", "input_features"}
375
            input_name = possible_input_names.intersection(set(prepared_for_class)).pop()
376
            model_input = prepared_for_class.pop(input_name)
377

378
            loss = model(model_input, **prepared_for_class)[0]
379

380
            if model_class.__name__ == "TFSegformerForSemanticSegmentation":
381
                # Semantic segmentation loss is computed similarly as
382
                # https://github.com/huggingface/transformers/blob/main/src/transformers/modeling_tf_utils.py#L210.
383
                self.assertEqual(loss.shape, (1,))
384
            else:
385
                self.assertEqual(loss.shape, [loss_size])
386

387
            # Test that model correctly compute the loss with a dict
388
            _, prepared_for_class = self.model_tester.prepare_config_and_inputs_for_keras_fit(
389
                for_segmentation=for_segmentation
390
            )
391
            loss = model(**prepared_for_class)[0]
392

393
            if model_class.__name__ == "TFSegformerForSemanticSegmentation":
394
                self.assertEqual(loss.shape, (1,))
395
            else:
396
                self.assertEqual(loss.shape, [loss_size])
397

398
            # Test that model correctly compute the loss with a tuple
399
            label_keys = prepared_for_class.keys() - inputs_dict.keys()
400
            signature = inspect.signature(model.call).parameters
401
            signature_names = list(signature.keys())
402

403
            # Create a dictionary holding the location of the tensors in the tuple
404
            tuple_index_mapping = {0: input_name}
405
            for label_key in label_keys:
406
                label_key_index = signature_names.index(label_key)
407
                tuple_index_mapping[label_key_index] = label_key
408
            sorted_tuple_index_mapping = sorted(tuple_index_mapping.items())
409
            # Initialize a list with their default values, update the values and convert to a tuple
410
            list_input = []
411

412
            for name in signature_names:
413
                if name != "kwargs":
414
                    list_input.append(signature[name].default)
415

416
            for index, value in sorted_tuple_index_mapping:
417
                list_input[index] = prepared_for_class[value]
418

419
            tuple_input = tuple(list_input)
420

421
            # Send to model
422
            loss = model(tuple_input[:-1])[0]
423
            if model_class.__name__ == "TFSegformerForSemanticSegmentation":
424
                self.assertEqual(loss.shape, (1,))
425
            else:
426
                self.assertEqual(loss.shape, [loss_size])
427

428
        for model_class in self.all_model_classes:
429
            # Since `TFSegformerModel` won't have labels against which we
430
            # could compute loss.
431
            if model_class.__name__ != "TFSegformerModel":
432
                model = model_class(config)
433
                apply(model)
434

435
    def check_pt_tf_outputs(self, tf_outputs, pt_outputs, model_class, tol=2e-4, name="outputs", attributes=None):
436
        # We override with a slightly higher tol value, as semseg models tend to diverge a bit more
437
        super().check_pt_tf_outputs(tf_outputs, pt_outputs, model_class, tol, name, attributes)
438

439
    @slow
440
    def test_model_from_pretrained(self):
441
        for model_name in TF_SEGFORMER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
442
            model = TFSegformerModel.from_pretrained(model_name)
443
            self.assertIsNotNone(model)
444

445

446
# We will verify our results on an image of cute cats
447
def prepare_img():
448
    image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
449
    return image
450

451

452
@require_tf
453
class TFSegformerModelIntegrationTest(unittest.TestCase):
454
    @slow
455
    def test_inference_image_segmentation_ade(self):
456
        # only resize + normalize
457
        image_processor = SegformerImageProcessor(
458
            image_scale=(512, 512), keep_ratio=False, align=False, do_random_crop=False
459
        )
460
        model = TFSegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
461

462
        image = prepare_img()
463
        encoded_inputs = image_processor(images=image, return_tensors="tf")
464
        pixel_values = encoded_inputs.pixel_values
465

466
        outputs = model(pixel_values, training=False)
467

468
        expected_shape = tf.TensorShape((1, model.config.num_labels, 128, 128))
469
        self.assertEqual(outputs.logits.shape, expected_shape)
470

471
        expected_slice = tf.constant(
472
            [
473
                [[-4.6310, -5.5232, -6.2356], [-5.1921, -6.1444, -6.5996], [-5.4424, -6.2790, -6.7574]],
474
                [[-12.1391, -13.3122, -13.9554], [-12.8732, -13.9352, -14.3563], [-12.9438, -13.8226, -14.2513]],
475
                [[-12.5134, -13.4686, -14.4915], [-12.8669, -14.4343, -14.7758], [-13.2523, -14.5819, -15.0694]],
476
            ]
477
        )
478
        tf.debugging.assert_near(outputs.logits[0, :3, :3, :3], expected_slice, atol=1e-4)
479

480
    @slow
481
    def test_inference_image_segmentation_city(self):
482
        # only resize + normalize
483
        image_processor = SegformerImageProcessor(
484
            image_scale=(512, 512), keep_ratio=False, align=False, do_random_crop=False
485
        )
486
        model = TFSegformerForSemanticSegmentation.from_pretrained(
487
            "nvidia/segformer-b1-finetuned-cityscapes-1024-1024"
488
        )
489

490
        image = prepare_img()
491
        encoded_inputs = image_processor(images=image, return_tensors="tf")
492
        pixel_values = encoded_inputs.pixel_values
493

494
        outputs = model(pixel_values, training=False)
495

496
        expected_shape = tf.TensorShape((1, model.config.num_labels, 128, 128))
497
        self.assertEqual(outputs.logits.shape, expected_shape)
498

499
        expected_slice = tf.constant(
500
            [
501
                [[-13.5748, -13.9111, -12.6500], [-14.3500, -15.3683, -14.2328], [-14.7532, -16.0424, -15.6087]],
502
                [[-17.1651, -15.8725, -12.9653], [-17.2580, -17.3718, -14.8223], [-16.6058, -16.8783, -16.7452]],
503
                [[-3.6456, -3.0209, -1.4203], [-3.0797, -3.1959, -2.0000], [-1.8757, -1.9217, -1.6997]],
504
            ]
505
        )
506
        tf.debugging.assert_near(outputs.logits[0, :3, :3, :3], expected_slice, atol=1e-1)
507
transformers

Использование cookies