transformers

test_modeling_swiftformer.py
293 строки · 10.7 Кб
Перенос по словам
1
# coding=utf-8
2
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15
""" Testing suite for the PyTorch SwiftFormer model. """
16

17

18
import copy
19
import unittest
20

21
from transformers import PretrainedConfig, SwiftFormerConfig
22
from transformers.testing_utils import (
23
    require_torch,
24
    require_vision,
25
    slow,
26
    torch_device,
27
)
28
from transformers.utils import cached_property, is_torch_available, is_vision_available
29

30
from ...test_configuration_common import ConfigTester
31
from ...test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
32
from ...test_pipeline_mixin import PipelineTesterMixin
33

34

35
if is_torch_available():
36
    import torch
37
    from torch import nn
38

39
    from transformers import SwiftFormerForImageClassification, SwiftFormerModel
40
    from transformers.models.swiftformer.modeling_swiftformer import SWIFTFORMER_PRETRAINED_MODEL_ARCHIVE_LIST
41

42

43
if is_vision_available():
44
    from PIL import Image
45

46
    from transformers import ViTImageProcessor
47

48

49
class SwiftFormerModelTester:
50
    def __init__(
51
        self,
52
        parent,
53
        batch_size=13,
54
        num_channels=3,
55
        is_training=True,
56
        use_labels=True,
57
        hidden_dropout_prob=0.1,
58
        attention_probs_dropout_prob=0.1,
59
        image_size=224,
60
        num_labels=3,
61
        layer_depths=[1, 1, 1, 1],
62
        embed_dims=[16, 16, 32, 32],
63
    ):
64
        self.parent = parent
65
        self.batch_size = batch_size
66
        self.num_channels = num_channels
67
        self.is_training = is_training
68
        self.use_labels = use_labels
69
        self.hidden_dropout_prob = hidden_dropout_prob
70
        self.attention_probs_dropout_prob = attention_probs_dropout_prob
71
        self.num_labels = num_labels
72
        self.image_size = image_size
73
        self.layer_depths = layer_depths
74
        self.embed_dims = embed_dims
75

76
    def prepare_config_and_inputs(self):
77
        pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
78

79
        labels = None
80
        if self.use_labels:
81
            labels = ids_tensor([self.batch_size], self.num_labels)
82

83
        config = self.get_config()
84

85
        return config, pixel_values, labels
86

87
    def get_config(self):
88
        return SwiftFormerConfig(
89
            depths=self.layer_depths,
90
            embed_dims=self.embed_dims,
91
            mlp_ratio=4,
92
            downsamples=[True, True, True, True],
93
            hidden_act="gelu",
94
            num_labels=self.num_labels,
95
            down_patch_size=3,
96
            down_stride=2,
97
            down_pad=1,
98
            drop_rate=0.0,
99
            drop_path_rate=0.0,
100
            use_layer_scale=True,
101
            layer_scale_init_value=1e-5,
102
        )
103

104
    def create_and_check_model(self, config, pixel_values, labels):
105
        model = SwiftFormerModel(config=config)
106
        model.to(torch_device)
107
        model.eval()
108
        result = model(pixel_values)
109
        self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.embed_dims[-1], 7, 7))
110

111
    def create_and_check_for_image_classification(self, config, pixel_values, labels):
112
        config.num_labels = self.num_labels
113
        model = SwiftFormerForImageClassification(config)
114
        model.to(torch_device)
115
        model.eval()
116
        result = model(pixel_values, labels=labels)
117
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
118

119
        model = SwiftFormerForImageClassification(config)
120
        model.to(torch_device)
121
        model.eval()
122

123
        pixel_values = floats_tensor([self.batch_size, self.num_channels, self.image_size, self.image_size])
124
        result = model(pixel_values)
125
        self.parent.assertEqual(result.logits.shape, (self.batch_size, self.num_labels))
126

127
    def prepare_config_and_inputs_for_common(self):
128
        (config, pixel_values, labels) = self.prepare_config_and_inputs()
129
        inputs_dict = {"pixel_values": pixel_values}
130
        return config, inputs_dict
131

132

133
@require_torch
134
class SwiftFormerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest.TestCase):
135
    """
136
    Here we also overwrite some of the tests of test_modeling_common.py, as SwiftFormer does not use input_ids, inputs_embeds,
137
    attention_mask and seq_length.
138
    """
139

140
    all_model_classes = (SwiftFormerModel, SwiftFormerForImageClassification) if is_torch_available() else ()
141
    pipeline_model_mapping = (
142
        {"image-feature-extraction": SwiftFormerModel, "image-classification": SwiftFormerForImageClassification}
143
        if is_torch_available()
144
        else {}
145
    )
146

147
    fx_compatible = False
148
    test_pruning = False
149
    test_resize_embeddings = False
150
    test_head_masking = False
151
    has_attentions = False
152

153
    def setUp(self):
154
        self.model_tester = SwiftFormerModelTester(self)
155
        self.config_tester = ConfigTester(
156
            self,
157
            config_class=SwiftFormerConfig,
158
            has_text_modality=False,
159
            hidden_size=37,
160
            num_attention_heads=12,
161
            num_hidden_layers=12,
162
        )
163

164
    def test_config(self):
165
        self.config_tester.run_common_tests()
166

167
    @unittest.skip(reason="SwiftFormer does not use inputs_embeds")
168
    def test_inputs_embeds(self):
169
        pass
170

171
    def test_model_common_attributes(self):
172
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()
173

174
        for model_class in self.all_model_classes:
175
            model = model_class(config)
176
            x = model.get_output_embeddings()
177
            self.assertTrue(x is None or isinstance(x, nn.Linear))
178

179
    def test_model(self):
180
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
181
        self.model_tester.create_and_check_model(*config_and_inputs)
182

183
    def test_for_image_classification(self):
184
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
185
        self.model_tester.create_and_check_for_image_classification(*config_and_inputs)
186

187
    @slow
188
    def test_model_from_pretrained(self):
189
        for model_name in SWIFTFORMER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
190
            model = SwiftFormerModel.from_pretrained(model_name)
191
            self.assertIsNotNone(model)
192

193
    @unittest.skip(reason="SwiftFormer does not output attentions")
194
    def test_attention_outputs(self):
195
        pass
196

197
    def test_hidden_states_output(self):
198
        def check_hidden_states_output(inputs_dict, config, model_class):
199
            model = model_class(config)
200
            model.to(torch_device)
201
            model.eval()
202

203
            with torch.no_grad():
204
                outputs = model(**self._prepare_for_class(inputs_dict, model_class))
205

206
            hidden_states = outputs.hidden_states
207

208
            expected_num_stages = 8
209
            self.assertEqual(len(hidden_states), expected_num_stages)  # TODO
210

211
            # SwiftFormer's feature maps are of shape (batch_size, embed_dims, height, width)
212
            # with the width and height being successively divided by 2, after every 2 blocks
213
            for i in range(len(hidden_states)):
214
                self.assertEqual(
215
                    hidden_states[i].shape,
216
                    torch.Size(
217
                        [
218
                            self.model_tester.batch_size,
219
                            self.model_tester.embed_dims[i // 2],
220
                            (self.model_tester.image_size // 4) // 2 ** (i // 2),
221
                            (self.model_tester.image_size // 4) // 2 ** (i // 2),
222
                        ]
223
                    ),
224
                )
225

226
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
227

228
        for model_class in self.all_model_classes:
229
            inputs_dict["output_hidden_states"] = True
230
            check_hidden_states_output(inputs_dict, config, model_class)
231

232
            # check that output_hidden_states also work using config
233
            del inputs_dict["output_hidden_states"]
234
            config.output_hidden_states = True
235

236
            check_hidden_states_output(inputs_dict, config, model_class)
237

238
    def test_initialization(self):
239
        def _config_zero_init(config):
240
            configs_no_init = copy.deepcopy(config)
241
            for key in configs_no_init.__dict__.keys():
242
                if "_range" in key or "_std" in key or "initializer_factor" in key or "layer_scale" in key:
243
                    setattr(configs_no_init, key, 1e-10)
244
                if isinstance(getattr(configs_no_init, key, None), PretrainedConfig):
245
                    no_init_subconfig = _config_zero_init(getattr(configs_no_init, key))
246
                    setattr(configs_no_init, key, no_init_subconfig)
247
            return configs_no_init
248

249
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
250

251
        configs_no_init = _config_zero_init(config)
252
        for model_class in self.all_model_classes:
253
            model = model_class(config=configs_no_init)
254
            for name, param in model.named_parameters():
255
                if param.requires_grad:
256
                    self.assertIn(
257
                        ((param.data.mean() * 1e9) / 1e9).round().item(),
258
                        [0.0, 1.0],
259
                        msg=f"Parameter {name} of model {model_class} seems not properly initialized",
260
                    )
261

262

263
# We will verify our results on an image of cute cats
264
def prepare_img():
265
    image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png")
266
    return image
267

268

269
@require_torch
270
@require_vision
271
class SwiftFormerModelIntegrationTest(unittest.TestCase):
272
    @cached_property
273
    def default_image_processor(self):
274
        return ViTImageProcessor.from_pretrained("MBZUAI/swiftformer-xs") if is_vision_available() else None
275

276
    @slow
277
    def test_inference_image_classification_head(self):
278
        model = SwiftFormerForImageClassification.from_pretrained("MBZUAI/swiftformer-xs").to(torch_device)
279

280
        image_processor = self.default_image_processor
281
        image = prepare_img()
282
        inputs = image_processor(images=image, return_tensors="pt").to(torch_device)
283

284
        # forward pass
285
        with torch.no_grad():
286
            outputs = model(**inputs)
287

288
        # verify the logits
289
        expected_shape = torch.Size((1, 1000))
290
        self.assertEqual(outputs.logits.shape, expected_shape)
291

292
        expected_slice = torch.tensor([[-2.1703e00, 2.1107e00, -2.0811e00]]).to(torch_device)
293
        self.assertTrue(torch.allclose(outputs.logits[0, :3], expected_slice, atol=1e-4))
294
transformers

Использование cookies