transformers

Форк
0
/
test_pipeline_mixin.py 
527 строк · 23.0 Кб
1
# coding=utf-8
2
# Copyright 2023 The HuggingFace Inc. team. All rights reserved.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
import copy
17
import json
18
import os
19
import random
20
import unittest
21
from pathlib import Path
22

23
from transformers.testing_utils import (
24
    is_pipeline_test,
25
    require_decord,
26
    require_pytesseract,
27
    require_timm,
28
    require_torch,
29
    require_torch_or_tf,
30
    require_vision,
31
)
32
from transformers.utils import direct_transformers_import, logging
33

34
from .pipelines.test_pipelines_audio_classification import AudioClassificationPipelineTests
35
from .pipelines.test_pipelines_automatic_speech_recognition import AutomaticSpeechRecognitionPipelineTests
36
from .pipelines.test_pipelines_conversational import ConversationalPipelineTests
37
from .pipelines.test_pipelines_depth_estimation import DepthEstimationPipelineTests
38
from .pipelines.test_pipelines_document_question_answering import DocumentQuestionAnsweringPipelineTests
39
from .pipelines.test_pipelines_feature_extraction import FeatureExtractionPipelineTests
40
from .pipelines.test_pipelines_fill_mask import FillMaskPipelineTests
41
from .pipelines.test_pipelines_image_classification import ImageClassificationPipelineTests
42
from .pipelines.test_pipelines_image_feature_extraction import ImageFeatureExtractionPipelineTests
43
from .pipelines.test_pipelines_image_segmentation import ImageSegmentationPipelineTests
44
from .pipelines.test_pipelines_image_to_image import ImageToImagePipelineTests
45
from .pipelines.test_pipelines_image_to_text import ImageToTextPipelineTests
46
from .pipelines.test_pipelines_mask_generation import MaskGenerationPipelineTests
47
from .pipelines.test_pipelines_object_detection import ObjectDetectionPipelineTests
48
from .pipelines.test_pipelines_question_answering import QAPipelineTests
49
from .pipelines.test_pipelines_summarization import SummarizationPipelineTests
50
from .pipelines.test_pipelines_table_question_answering import TQAPipelineTests
51
from .pipelines.test_pipelines_text2text_generation import Text2TextGenerationPipelineTests
52
from .pipelines.test_pipelines_text_classification import TextClassificationPipelineTests
53
from .pipelines.test_pipelines_text_generation import TextGenerationPipelineTests
54
from .pipelines.test_pipelines_text_to_audio import TextToAudioPipelineTests
55
from .pipelines.test_pipelines_token_classification import TokenClassificationPipelineTests
56
from .pipelines.test_pipelines_translation import TranslationPipelineTests
57
from .pipelines.test_pipelines_video_classification import VideoClassificationPipelineTests
58
from .pipelines.test_pipelines_visual_question_answering import VisualQuestionAnsweringPipelineTests
59
from .pipelines.test_pipelines_zero_shot import ZeroShotClassificationPipelineTests
60
from .pipelines.test_pipelines_zero_shot_audio_classification import ZeroShotAudioClassificationPipelineTests
61
from .pipelines.test_pipelines_zero_shot_image_classification import ZeroShotImageClassificationPipelineTests
62
from .pipelines.test_pipelines_zero_shot_object_detection import ZeroShotObjectDetectionPipelineTests
63

64

65
pipeline_test_mapping = {
66
    "audio-classification": {"test": AudioClassificationPipelineTests},
67
    "automatic-speech-recognition": {"test": AutomaticSpeechRecognitionPipelineTests},
68
    "conversational": {"test": ConversationalPipelineTests},
69
    "depth-estimation": {"test": DepthEstimationPipelineTests},
70
    "document-question-answering": {"test": DocumentQuestionAnsweringPipelineTests},
71
    "feature-extraction": {"test": FeatureExtractionPipelineTests},
72
    "fill-mask": {"test": FillMaskPipelineTests},
73
    "image-classification": {"test": ImageClassificationPipelineTests},
74
    "image-feature-extraction": {"test": ImageFeatureExtractionPipelineTests},
75
    "image-segmentation": {"test": ImageSegmentationPipelineTests},
76
    "image-to-image": {"test": ImageToImagePipelineTests},
77
    "image-to-text": {"test": ImageToTextPipelineTests},
78
    "mask-generation": {"test": MaskGenerationPipelineTests},
79
    "object-detection": {"test": ObjectDetectionPipelineTests},
80
    "question-answering": {"test": QAPipelineTests},
81
    "summarization": {"test": SummarizationPipelineTests},
82
    "table-question-answering": {"test": TQAPipelineTests},
83
    "text2text-generation": {"test": Text2TextGenerationPipelineTests},
84
    "text-classification": {"test": TextClassificationPipelineTests},
85
    "text-generation": {"test": TextGenerationPipelineTests},
86
    "text-to-audio": {"test": TextToAudioPipelineTests},
87
    "token-classification": {"test": TokenClassificationPipelineTests},
88
    "translation": {"test": TranslationPipelineTests},
89
    "video-classification": {"test": VideoClassificationPipelineTests},
90
    "visual-question-answering": {"test": VisualQuestionAnsweringPipelineTests},
91
    "zero-shot": {"test": ZeroShotClassificationPipelineTests},
92
    "zero-shot-audio-classification": {"test": ZeroShotAudioClassificationPipelineTests},
93
    "zero-shot-image-classification": {"test": ZeroShotImageClassificationPipelineTests},
94
    "zero-shot-object-detection": {"test": ZeroShotObjectDetectionPipelineTests},
95
}
96

97
for task, task_info in pipeline_test_mapping.items():
98
    test = task_info["test"]
99
    task_info["mapping"] = {
100
        "pt": getattr(test, "model_mapping", None),
101
        "tf": getattr(test, "tf_model_mapping", None),
102
    }
103

104

105
# The default value `hf-internal-testing` is for running the pipeline testing against the tiny models on the Hub.
106
# For debugging purpose, we can specify a local path which is the `output_path` argument of a previous run of
107
# `utils/create_dummy_models.py`.
108
TRANSFORMERS_TINY_MODEL_PATH = os.environ.get("TRANSFORMERS_TINY_MODEL_PATH", "hf-internal-testing")
109
if TRANSFORMERS_TINY_MODEL_PATH == "hf-internal-testing":
110
    TINY_MODEL_SUMMARY_FILE_PATH = os.path.join(Path(__file__).parent.parent, "tests/utils/tiny_model_summary.json")
111
else:
112
    TINY_MODEL_SUMMARY_FILE_PATH = os.path.join(TRANSFORMERS_TINY_MODEL_PATH, "reports", "tiny_model_summary.json")
113
with open(TINY_MODEL_SUMMARY_FILE_PATH) as fp:
114
    tiny_model_summary = json.load(fp)
115

116

117
PATH_TO_TRANSFORMERS = os.path.join(Path(__file__).parent.parent, "src/transformers")
118

119

120
# Dynamically import the Transformers module to grab the attribute classes of the processor form their names.
121
transformers_module = direct_transformers_import(PATH_TO_TRANSFORMERS)
122

123
logger = logging.get_logger(__name__)
124

125

126
class PipelineTesterMixin:
127
    model_tester = None
128
    pipeline_model_mapping = None
129
    supported_frameworks = ["pt", "tf"]
130

131
    def run_task_tests(self, task):
132
        """Run pipeline tests for a specific `task`
133

134
        Args:
135
            task (`str`):
136
                A task name. This should be a key in the mapping `pipeline_test_mapping`.
137
        """
138
        if task not in self.pipeline_model_mapping:
139
            self.skipTest(
140
                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: `{task}` is not in "
141
                f"`self.pipeline_model_mapping` for `{self.__class__.__name__}`."
142
            )
143

144
        model_architectures = self.pipeline_model_mapping[task]
145
        if not isinstance(model_architectures, tuple):
146
            model_architectures = (model_architectures,)
147
        if not isinstance(model_architectures, tuple):
148
            raise ValueError(f"`model_architectures` must be a tuple. Got {type(model_architectures)} instead.")
149

150
        for model_architecture in model_architectures:
151
            model_arch_name = model_architecture.__name__
152

153
            # Get the canonical name
154
            for _prefix in ["Flax", "TF"]:
155
                if model_arch_name.startswith(_prefix):
156
                    model_arch_name = model_arch_name[len(_prefix) :]
157
                    break
158

159
            tokenizer_names = []
160
            processor_names = []
161
            commit = None
162
            if model_arch_name in tiny_model_summary:
163
                tokenizer_names = tiny_model_summary[model_arch_name]["tokenizer_classes"]
164
                processor_names = tiny_model_summary[model_arch_name]["processor_classes"]
165
                if "sha" in tiny_model_summary[model_arch_name]:
166
                    commit = tiny_model_summary[model_arch_name]["sha"]
167
            # Adding `None` (if empty) so we can generate tests
168
            tokenizer_names = [None] if len(tokenizer_names) == 0 else tokenizer_names
169
            processor_names = [None] if len(processor_names) == 0 else processor_names
170

171
            repo_name = f"tiny-random-{model_arch_name}"
172
            if TRANSFORMERS_TINY_MODEL_PATH != "hf-internal-testing":
173
                repo_name = model_arch_name
174

175
            self.run_model_pipeline_tests(
176
                task, repo_name, model_architecture, tokenizer_names, processor_names, commit
177
            )
178

179
    def run_model_pipeline_tests(self, task, repo_name, model_architecture, tokenizer_names, processor_names, commit):
180
        """Run pipeline tests for a specific `task` with the give model class and tokenizer/processor class names
181

182
        Args:
183
            task (`str`):
184
                A task name. This should be a key in the mapping `pipeline_test_mapping`.
185
            repo_name (`str`):
186
                A model repository id on the Hub.
187
            model_architecture (`type`):
188
                A subclass of `PretrainedModel` or `PretrainedModel`.
189
            tokenizer_names (`List[str]`):
190
                A list of names of a subclasses of `PreTrainedTokenizerFast` or `PreTrainedTokenizer`.
191
            processor_names (`List[str]`):
192
                A list of names of subclasses of `BaseImageProcessor` or `FeatureExtractionMixin`.
193
        """
194
        # Get an instance of the corresponding class `XXXPipelineTests` in order to use `get_test_pipeline` and
195
        # `run_pipeline_test`.
196
        pipeline_test_class_name = pipeline_test_mapping[task]["test"].__name__
197

198
        for tokenizer_name in tokenizer_names:
199
            for processor_name in processor_names:
200
                if self.is_pipeline_test_to_skip(
201
                    pipeline_test_class_name,
202
                    model_architecture.config_class,
203
                    model_architecture,
204
                    tokenizer_name,
205
                    processor_name,
206
                ):
207
                    logger.warning(
208
                        f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: test is "
209
                        f"currently known to fail for: model `{model_architecture.__name__}` | tokenizer "
210
                        f"`{tokenizer_name}` | processor `{processor_name}`."
211
                    )
212
                    continue
213
                self.run_pipeline_test(task, repo_name, model_architecture, tokenizer_name, processor_name, commit)
214

215
    def run_pipeline_test(self, task, repo_name, model_architecture, tokenizer_name, processor_name, commit):
216
        """Run pipeline tests for a specific `task` with the give model class and tokenizer/processor class name
217

218
        The model will be loaded from a model repository on the Hub.
219

220
        Args:
221
            task (`str`):
222
                A task name. This should be a key in the mapping `pipeline_test_mapping`.
223
            repo_name (`str`):
224
                A model repository id on the Hub.
225
            model_architecture (`type`):
226
                A subclass of `PretrainedModel` or `PretrainedModel`.
227
            tokenizer_name (`str`):
228
                The name of a subclass of `PreTrainedTokenizerFast` or `PreTrainedTokenizer`.
229
            processor_name (`str`):
230
                The name of a subclass of `BaseImageProcessor` or `FeatureExtractionMixin`.
231
        """
232
        repo_id = f"{TRANSFORMERS_TINY_MODEL_PATH}/{repo_name}"
233
        if TRANSFORMERS_TINY_MODEL_PATH != "hf-internal-testing":
234
            model_type = model_architecture.config_class.model_type
235
            repo_id = os.path.join(TRANSFORMERS_TINY_MODEL_PATH, model_type, repo_name)
236

237
        tokenizer = None
238
        if tokenizer_name is not None:
239
            tokenizer_class = getattr(transformers_module, tokenizer_name)
240
            tokenizer = tokenizer_class.from_pretrained(repo_id, revision=commit)
241

242
        processor = None
243
        if processor_name is not None:
244
            processor_class = getattr(transformers_module, processor_name)
245
            # If the required packages (like `Pillow` or `torchaudio`) are not installed, this will fail.
246
            try:
247
                processor = processor_class.from_pretrained(repo_id, revision=commit)
248
            except Exception:
249
                logger.warning(
250
                    f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not load the "
251
                    f"processor from `{repo_id}` with `{processor_name}`."
252
                )
253
                return
254

255
        # TODO: Maybe not upload such problematic tiny models to Hub.
256
        if tokenizer is None and processor is None:
257
            logger.warning(
258
                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not find or load "
259
                f"any tokenizer / processor from `{repo_id}`."
260
            )
261
            return
262

263
        # TODO: We should check if a model file is on the Hub repo. instead.
264
        try:
265
            model = model_architecture.from_pretrained(repo_id, revision=commit)
266
        except Exception:
267
            logger.warning(
268
                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not find or load "
269
                f"the model from `{repo_id}` with `{model_architecture}`."
270
            )
271
            return
272

273
        pipeline_test_class_name = pipeline_test_mapping[task]["test"].__name__
274
        if self.is_pipeline_test_to_skip_more(pipeline_test_class_name, model.config, model, tokenizer, processor):
275
            logger.warning(
276
                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: test is "
277
                f"currently known to fail for: model `{model_architecture.__name__}` | tokenizer "
278
                f"`{tokenizer_name}` | processor `{processor_name}`."
279
            )
280
            return
281

282
        # validate
283
        validate_test_components(self, task, model, tokenizer, processor)
284

285
        if hasattr(model, "eval"):
286
            model = model.eval()
287

288
        # Get an instance of the corresponding class `XXXPipelineTests` in order to use `get_test_pipeline` and
289
        # `run_pipeline_test`.
290
        task_test = pipeline_test_mapping[task]["test"]()
291

292
        pipeline, examples = task_test.get_test_pipeline(model, tokenizer, processor)
293
        if pipeline is None:
294
            # The test can disable itself, but it should be very marginal
295
            # Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist)
296
            logger.warning(
297
                f"{self.__class__.__name__}::test_pipeline_{task.replace('-', '_')} is skipped: Could not get the "
298
                "pipeline for testing."
299
            )
300
            return
301

302
        task_test.run_pipeline_test(pipeline, examples)
303

304
        def run_batch_test(pipeline, examples):
305
            # Need to copy because `Conversation` are stateful
306
            if pipeline.tokenizer is not None and pipeline.tokenizer.pad_token_id is None:
307
                return  # No batching for this and it's OK
308

309
            # 10 examples with batch size 4 means there needs to be a unfinished batch
310
            # which is important for the unbatcher
311
            def data(n):
312
                for _ in range(n):
313
                    # Need to copy because Conversation object is mutated
314
                    yield copy.deepcopy(random.choice(examples))
315

316
            out = []
317
            if task == "conversational":
318
                for item in pipeline(data(10), batch_size=4, max_new_tokens=5):
319
                    out.append(item)
320
            else:
321
                for item in pipeline(data(10), batch_size=4):
322
                    out.append(item)
323
            self.assertEqual(len(out), 10)
324

325
        run_batch_test(pipeline, examples)
326

327
    @is_pipeline_test
328
    def test_pipeline_audio_classification(self):
329
        self.run_task_tests(task="audio-classification")
330

331
    @is_pipeline_test
332
    def test_pipeline_automatic_speech_recognition(self):
333
        self.run_task_tests(task="automatic-speech-recognition")
334

335
    @is_pipeline_test
336
    def test_pipeline_conversational(self):
337
        self.run_task_tests(task="conversational")
338

339
    @is_pipeline_test
340
    @require_vision
341
    @require_timm
342
    @require_torch
343
    def test_pipeline_depth_estimation(self):
344
        self.run_task_tests(task="depth-estimation")
345

346
    @is_pipeline_test
347
    @require_pytesseract
348
    @require_torch
349
    @require_vision
350
    def test_pipeline_document_question_answering(self):
351
        self.run_task_tests(task="document-question-answering")
352

353
    @is_pipeline_test
354
    def test_pipeline_feature_extraction(self):
355
        self.run_task_tests(task="feature-extraction")
356

357
    @is_pipeline_test
358
    def test_pipeline_fill_mask(self):
359
        self.run_task_tests(task="fill-mask")
360

361
    @is_pipeline_test
362
    @require_torch_or_tf
363
    @require_vision
364
    def test_pipeline_image_classification(self):
365
        self.run_task_tests(task="image-classification")
366

367
    @is_pipeline_test
368
    @require_vision
369
    @require_timm
370
    @require_torch
371
    def test_pipeline_image_segmentation(self):
372
        self.run_task_tests(task="image-segmentation")
373

374
    @is_pipeline_test
375
    @require_vision
376
    def test_pipeline_image_to_text(self):
377
        self.run_task_tests(task="image-to-text")
378

379
    @is_pipeline_test
380
    @require_timm
381
    @require_vision
382
    @require_torch
383
    def test_pipeline_image_feature_extraction(self):
384
        self.run_task_tests(task="image-feature-extraction")
385

386
    @unittest.skip(reason="`run_pipeline_test` is currently not implemented.")
387
    @is_pipeline_test
388
    @require_vision
389
    @require_torch
390
    def test_pipeline_mask_generation(self):
391
        self.run_task_tests(task="mask-generation")
392

393
    @is_pipeline_test
394
    @require_vision
395
    @require_timm
396
    @require_torch
397
    def test_pipeline_object_detection(self):
398
        self.run_task_tests(task="object-detection")
399

400
    @is_pipeline_test
401
    def test_pipeline_question_answering(self):
402
        self.run_task_tests(task="question-answering")
403

404
    @is_pipeline_test
405
    def test_pipeline_summarization(self):
406
        self.run_task_tests(task="summarization")
407

408
    @is_pipeline_test
409
    def test_pipeline_table_question_answering(self):
410
        self.run_task_tests(task="table-question-answering")
411

412
    @is_pipeline_test
413
    def test_pipeline_text2text_generation(self):
414
        self.run_task_tests(task="text2text-generation")
415

416
    @is_pipeline_test
417
    def test_pipeline_text_classification(self):
418
        self.run_task_tests(task="text-classification")
419

420
    @is_pipeline_test
421
    @require_torch_or_tf
422
    def test_pipeline_text_generation(self):
423
        self.run_task_tests(task="text-generation")
424

425
    @is_pipeline_test
426
    @require_torch
427
    def test_pipeline_text_to_audio(self):
428
        self.run_task_tests(task="text-to-audio")
429

430
    @is_pipeline_test
431
    def test_pipeline_token_classification(self):
432
        self.run_task_tests(task="token-classification")
433

434
    @is_pipeline_test
435
    def test_pipeline_translation(self):
436
        self.run_task_tests(task="translation")
437

438
    @is_pipeline_test
439
    @require_torch_or_tf
440
    @require_vision
441
    @require_decord
442
    def test_pipeline_video_classification(self):
443
        self.run_task_tests(task="video-classification")
444

445
    @is_pipeline_test
446
    @require_torch
447
    @require_vision
448
    def test_pipeline_visual_question_answering(self):
449
        self.run_task_tests(task="visual-question-answering")
450

451
    @is_pipeline_test
452
    def test_pipeline_zero_shot(self):
453
        self.run_task_tests(task="zero-shot")
454

455
    @is_pipeline_test
456
    @require_torch
457
    def test_pipeline_zero_shot_audio_classification(self):
458
        self.run_task_tests(task="zero-shot-audio-classification")
459

460
    @is_pipeline_test
461
    @require_vision
462
    def test_pipeline_zero_shot_image_classification(self):
463
        self.run_task_tests(task="zero-shot-image-classification")
464

465
    @is_pipeline_test
466
    @require_vision
467
    @require_torch
468
    def test_pipeline_zero_shot_object_detection(self):
469
        self.run_task_tests(task="zero-shot-object-detection")
470

471
    # This contains the test cases to be skipped without model architecture being involved.
472
    def is_pipeline_test_to_skip(
473
        self, pipeline_test_casse_name, config_class, model_architecture, tokenizer_name, processor_name
474
    ):
475
        """Skip some tests based on the classes or their names without the instantiated objects.
476

477
        This is to avoid calling `from_pretrained` (so reducing the runtime) if we already know the tests will fail.
478
        """
479
        # No fix is required for this case.
480
        if (
481
            pipeline_test_casse_name == "DocumentQuestionAnsweringPipelineTests"
482
            and tokenizer_name is not None
483
            and not tokenizer_name.endswith("Fast")
484
        ):
485
            # `DocumentQuestionAnsweringPipelineTests` requires a fast tokenizer.
486
            return True
487

488
        return False
489

490
    def is_pipeline_test_to_skip_more(self, pipeline_test_casse_name, config, model, tokenizer, processor):  # noqa
491
        """Skip some more tests based on the information from the instantiated objects."""
492
        # No fix is required for this case.
493
        if (
494
            pipeline_test_casse_name == "QAPipelineTests"
495
            and tokenizer is not None
496
            and getattr(tokenizer, "pad_token", None) is None
497
            and not tokenizer.__class__.__name__.endswith("Fast")
498
        ):
499
            # `QAPipelineTests` doesn't work with a slow tokenizer that has no pad token.
500
            return True
501

502
        return False
503

504

505
def validate_test_components(test_case, task, model, tokenizer, processor):
506
    # TODO: Move this to tiny model creation script
507
    # head-specific (within a model type) necessary changes to the config
508
    # 1. for `BlenderbotForCausalLM`
509
    if model.__class__.__name__ == "BlenderbotForCausalLM":
510
        model.config.encoder_no_repeat_ngram_size = 0
511

512
    # TODO: Change the tiny model creation script: don't create models with problematic tokenizers
513
    # Avoid `IndexError` in embedding layers
514
    CONFIG_WITHOUT_VOCAB_SIZE = ["CanineConfig"]
515
    if tokenizer is not None:
516
        config_vocab_size = getattr(model.config, "vocab_size", None)
517
        # For CLIP-like models
518
        if config_vocab_size is None:
519
            if hasattr(model.config, "text_config"):
520
                config_vocab_size = getattr(model.config.text_config, "vocab_size", None)
521
            elif hasattr(model.config, "text_encoder"):
522
                config_vocab_size = getattr(model.config.text_encoder, "vocab_size", None)
523

524
        if config_vocab_size is None and model.config.__class__.__name__ not in CONFIG_WITHOUT_VOCAB_SIZE:
525
            raise ValueError(
526
                "Could not determine `vocab_size` from model configuration while `tokenizer` is not `None`."
527
            )
528

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.