optimum-intel
1335 строк · 56.6 Кб
1# Copyright 2021 The HuggingFace Team. All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import gc
16import os
17import tempfile
18import time
19import unittest
20from typing import Dict
21
22import numpy as np
23import requests
24import timm
25import torch
26from datasets import load_dataset
27from evaluate import evaluator
28from parameterized import parameterized
29from PIL import Image
30from transformers import (
31AutoFeatureExtractor,
32AutoModel,
33AutoModelForAudioClassification,
34AutoModelForAudioFrameClassification,
35AutoModelForAudioXVector,
36AutoModelForCausalLM,
37AutoModelForCTC,
38AutoModelForImageClassification,
39AutoModelForMaskedLM,
40AutoModelForQuestionAnswering,
41AutoModelForSeq2SeqLM,
42AutoModelForSequenceClassification,
43AutoModelForSpeechSeq2Seq,
44AutoModelForTokenClassification,
45AutoTokenizer,
46GenerationConfig,
47Pix2StructForConditionalGeneration,
48PretrainedConfig,
49pipeline,
50set_seed,
51)
52from transformers.onnx.utils import get_preprocessor
53from utils_tests import MODEL_NAMES
54
55from optimum.exporters.onnx import MODEL_TYPES_REQUIRING_POSITION_IDS
56from optimum.intel import (
57OVModelForAudioClassification,
58OVModelForAudioFrameClassification,
59OVModelForAudioXVector,
60OVModelForCausalLM,
61OVModelForCTC,
62OVModelForFeatureExtraction,
63OVModelForImageClassification,
64OVModelForMaskedLM,
65OVModelForPix2Struct,
66OVModelForQuestionAnswering,
67OVModelForSeq2SeqLM,
68OVModelForSequenceClassification,
69OVModelForSpeechSeq2Seq,
70OVModelForTokenClassification,
71OVStableDiffusionPipeline,
72)
73from optimum.intel.openvino import OV_DECODER_NAME, OV_DECODER_WITH_PAST_NAME, OV_ENCODER_NAME, OV_XML_FILE_NAME
74from optimum.intel.openvino.modeling_seq2seq import OVDecoder, OVEncoder
75from optimum.intel.openvino.modeling_timm import TimmImageProcessor
76from optimum.intel.openvino.utils import _print_compiled_model_properties
77from optimum.intel.utils.import_utils import is_openvino_version
78from optimum.utils import (
79DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER,
80DIFFUSION_MODEL_UNET_SUBFOLDER,
81DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER,
82DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER,
83)
84from optimum.utils.testing_utils import require_diffusers
85
86
87TENSOR_ALIAS_TO_TYPE = {
88"pt": torch.Tensor,
89"np": np.ndarray,
90}
91
92SEED = 42
93
94F32_CONFIG = {"INFERENCE_PRECISION_HINT": "f32"}
95
96
97class Timer(object):
98def __enter__(self):
99self.elapsed = time.perf_counter()
100return self
101
102def __exit__(self, type, value, traceback):
103self.elapsed = (time.perf_counter() - self.elapsed) * 1e3
104
105
106class OVModelIntegrationTest(unittest.TestCase):
107def __init__(self, *args, **kwargs):
108super().__init__(*args, **kwargs)
109self.OV_MODEL_ID = "echarlaix/distilbert-base-uncased-finetuned-sst-2-english-openvino"
110self.OV_DECODER_MODEL_ID = "helenai/gpt2-ov"
111self.OV_SEQ2SEQ_MODEL_ID = "echarlaix/t5-small-openvino"
112self.OV_DIFFUSION_MODEL_ID = "hf-internal-testing/tiny-stable-diffusion-openvino"
113
114def test_load_from_hub_and_save_model(self):
115tokenizer = AutoTokenizer.from_pretrained(self.OV_MODEL_ID)
116tokens = tokenizer("This is a sample input", return_tensors="pt")
117loaded_model = OVModelForSequenceClassification.from_pretrained(self.OV_MODEL_ID)
118self.assertIsInstance(loaded_model.config, PretrainedConfig)
119loaded_model_outputs = loaded_model(**tokens)
120
121# Test specifying ov_config with throughput hint and manual cache dir
122manual_openvino_cache_dir = loaded_model.model_save_dir / "manual_model_cache"
123ov_config = {"CACHE_DIR": str(manual_openvino_cache_dir), "PERFORMANCE_HINT": "THROUGHPUT"}
124loaded_model = OVModelForSequenceClassification.from_pretrained(self.OV_MODEL_ID, ov_config=ov_config)
125self.assertTrue(manual_openvino_cache_dir.is_dir())
126self.assertGreaterEqual(len(list(manual_openvino_cache_dir.glob("*.blob"))), 1)
127if is_openvino_version("<", "2023.3"):
128self.assertEqual(loaded_model.request.get_property("PERFORMANCE_HINT").name, "THROUGHPUT")
129else:
130self.assertEqual(loaded_model.request.get_property("PERFORMANCE_HINT"), "THROUGHPUT")
131
132with tempfile.TemporaryDirectory() as tmpdirname:
133loaded_model.save_pretrained(tmpdirname)
134folder_contents = os.listdir(tmpdirname)
135self.assertTrue(OV_XML_FILE_NAME in folder_contents)
136self.assertTrue(OV_XML_FILE_NAME.replace(".xml", ".bin") in folder_contents)
137model = OVModelForSequenceClassification.from_pretrained(tmpdirname)
138
139outputs = model(**tokens)
140self.assertTrue(torch.equal(loaded_model_outputs.logits, outputs.logits))
141
142del loaded_model
143del model
144gc.collect()
145
146@parameterized.expand((True, False))
147def test_load_from_hub_and_save_decoder_model(self, use_cache):
148model_id = "vuiseng9/ov-gpt2-fp32-kv-cache" if use_cache else "vuiseng9/ov-gpt2-fp32-no-cache"
149tokenizer = AutoTokenizer.from_pretrained(model_id)
150tokens = tokenizer("This is a sample input", return_tensors="pt")
151loaded_model = OVModelForCausalLM.from_pretrained(model_id, use_cache=use_cache)
152self.assertIsInstance(loaded_model.config, PretrainedConfig)
153loaded_model_outputs = loaded_model(**tokens)
154
155with tempfile.TemporaryDirectory() as tmpdirname:
156loaded_model.save_pretrained(tmpdirname)
157folder_contents = os.listdir(tmpdirname)
158self.assertTrue(OV_XML_FILE_NAME in folder_contents)
159self.assertTrue(OV_XML_FILE_NAME.replace(".xml", ".bin") in folder_contents)
160model = OVModelForCausalLM.from_pretrained(tmpdirname, use_cache=use_cache)
161self.assertEqual(model.use_cache, use_cache)
162
163outputs = model(**tokens)
164self.assertTrue(torch.equal(loaded_model_outputs.logits, outputs.logits))
165del loaded_model
166del model
167gc.collect()
168
169def test_load_from_hub_and_save_seq2seq_model(self):
170tokenizer = AutoTokenizer.from_pretrained(self.OV_SEQ2SEQ_MODEL_ID)
171tokens = tokenizer("This is a sample input", return_tensors="pt")
172loaded_model = OVModelForSeq2SeqLM.from_pretrained(self.OV_SEQ2SEQ_MODEL_ID, compile=False)
173self.assertIsInstance(loaded_model.config, PretrainedConfig)
174loaded_model.to("cpu")
175loaded_model_outputs = loaded_model.generate(**tokens)
176
177with tempfile.TemporaryDirectory() as tmpdirname:
178loaded_model.save_pretrained(tmpdirname)
179folder_contents = os.listdir(tmpdirname)
180self.assertTrue(OV_ENCODER_NAME in folder_contents)
181self.assertTrue(OV_DECODER_NAME in folder_contents)
182self.assertTrue(OV_DECODER_WITH_PAST_NAME in folder_contents)
183model = OVModelForSeq2SeqLM.from_pretrained(tmpdirname, device="cpu")
184
185outputs = model.generate(**tokens)
186self.assertTrue(torch.equal(loaded_model_outputs, outputs))
187del loaded_model
188del model
189gc.collect()
190
191@require_diffusers
192def test_load_from_hub_and_save_stable_diffusion_model(self):
193loaded_pipeline = OVStableDiffusionPipeline.from_pretrained(self.OV_DIFFUSION_MODEL_ID, compile=False)
194self.assertIsInstance(loaded_pipeline.config, Dict)
195batch_size, height, width = 2, 16, 16
196np.random.seed(0)
197inputs = {
198"prompt": ["sailing ship in storm by Leonardo da Vinci"] * batch_size,
199"height": height,
200"width": width,
201"num_inference_steps": 2,
202"output_type": "np",
203}
204pipeline_outputs = loaded_pipeline(**inputs).images
205self.assertEqual(pipeline_outputs.shape, (batch_size, height, width, 3))
206with tempfile.TemporaryDirectory() as tmpdirname:
207loaded_pipeline.save_pretrained(tmpdirname)
208pipeline = OVStableDiffusionPipeline.from_pretrained(tmpdirname)
209folder_contents = os.listdir(tmpdirname)
210self.assertIn(loaded_pipeline.config_name, folder_contents)
211for subfoler in {
212DIFFUSION_MODEL_UNET_SUBFOLDER,
213DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER,
214DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER,
215DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER,
216}:
217folder_contents = os.listdir(os.path.join(tmpdirname, subfoler))
218self.assertIn(OV_XML_FILE_NAME, folder_contents)
219self.assertIn(OV_XML_FILE_NAME.replace(".xml", ".bin"), folder_contents)
220np.random.seed(0)
221outputs = pipeline(**inputs).images
222self.assertTrue(np.array_equal(pipeline_outputs, outputs))
223del pipeline
224gc.collect()
225
226
227class OVModelForSequenceClassificationIntegrationTest(unittest.TestCase):
228SUPPORTED_ARCHITECTURES = (
229"albert",
230"bert",
231# "camembert",
232"convbert",
233# "data2vec_text",
234# "deberta_v2",
235"distilbert",
236"electra",
237"flaubert",
238"ibert",
239# "mobilebert",
240# "nystromformer",
241"roberta",
242"roformer",
243"squeezebert",
244"xlm",
245# "xlm_roberta",
246)
247
248@parameterized.expand(SUPPORTED_ARCHITECTURES)
249def test_compare_to_transformers(self, model_arch):
250model_id = MODEL_NAMES[model_arch]
251set_seed(SEED)
252ov_model = OVModelForSequenceClassification.from_pretrained(model_id, export=True, ov_config=F32_CONFIG)
253self.assertIsInstance(ov_model.config, PretrainedConfig)
254transformers_model = AutoModelForSequenceClassification.from_pretrained(model_id)
255tokenizer = AutoTokenizer.from_pretrained(model_id)
256inputs = "This is a sample input"
257tokens = tokenizer(inputs, return_tensors="pt")
258with torch.no_grad():
259transformers_outputs = transformers_model(**tokens)
260for input_type in ["pt", "np"]:
261tokens = tokenizer(inputs, return_tensors=input_type)
262ov_outputs = ov_model(**tokens)
263self.assertIn("logits", ov_outputs)
264self.assertIsInstance(ov_outputs.logits, TENSOR_ALIAS_TO_TYPE[input_type])
265# Compare tensor outputs
266self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), transformers_outputs.logits, atol=1e-4))
267del transformers_model
268del ov_model
269gc.collect()
270
271@parameterized.expand(SUPPORTED_ARCHITECTURES)
272def test_pipeline(self, model_arch):
273model_id = MODEL_NAMES[model_arch]
274model = OVModelForSequenceClassification.from_pretrained(model_id, export=True, compile=False)
275tokenizer = AutoTokenizer.from_pretrained(model_id)
276pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
277text = "This restaurant is awesome"
278outputs = pipe(text)
279self.assertTrue(model.is_dynamic)
280self.assertEqual(pipe.device, model.device)
281self.assertGreaterEqual(outputs[0]["score"], 0.0)
282self.assertIsInstance(outputs[0]["label"], str)
283if model_arch == "bert":
284# Test FP16 conversion
285model.half()
286model.to("cpu")
287model.compile()
288outputs = pipe(text)
289self.assertGreaterEqual(outputs[0]["score"], 0.0)
290self.assertIsInstance(outputs[0]["label"], str)
291# Test static shapes
292model.reshape(1, 25)
293model.compile()
294outputs = pipe(text)
295self.assertTrue(not model.is_dynamic)
296self.assertGreaterEqual(outputs[0]["score"], 0.0)
297self.assertIsInstance(outputs[0]["label"], str)
298# Test that model caching was not automatically enabled for exported model
299openvino_cache_dir = model.model_save_dir / "model_cache"
300self.assertFalse(openvino_cache_dir.is_dir())
301
302del model
303del pipe
304gc.collect()
305
306
307class OVModelForQuestionAnsweringIntegrationTest(unittest.TestCase):
308SUPPORTED_ARCHITECTURES = (
309"bert",
310"distilbert",
311"roberta",
312)
313
314@parameterized.expand(SUPPORTED_ARCHITECTURES)
315def test_compare_to_transformers(self, model_arch):
316model_id = MODEL_NAMES[model_arch]
317set_seed(SEED)
318ov_model = OVModelForQuestionAnswering.from_pretrained(model_id, export=True, ov_config=F32_CONFIG)
319self.assertIsInstance(ov_model.config, PretrainedConfig)
320transformers_model = AutoModelForQuestionAnswering.from_pretrained(model_id)
321tokenizer = AutoTokenizer.from_pretrained(model_id)
322inputs = "This is a sample input"
323tokens = tokenizer(inputs, return_tensors="pt")
324with torch.no_grad():
325transformers_outputs = transformers_model(**tokens)
326for input_type in ["pt", "np"]:
327tokens = tokenizer(inputs, return_tensors=input_type)
328ov_outputs = ov_model(**tokens)
329self.assertIn("start_logits", ov_outputs)
330self.assertIn("end_logits", ov_outputs)
331self.assertIsInstance(ov_outputs.start_logits, TENSOR_ALIAS_TO_TYPE[input_type])
332self.assertIsInstance(ov_outputs.end_logits, TENSOR_ALIAS_TO_TYPE[input_type])
333# Compare tensor outputs
334self.assertTrue(
335torch.allclose(torch.Tensor(ov_outputs.start_logits), transformers_outputs.start_logits, atol=1e-4)
336)
337self.assertTrue(
338torch.allclose(torch.Tensor(ov_outputs.end_logits), transformers_outputs.end_logits, atol=1e-4)
339)
340del ov_model
341del transformers_model
342gc.collect()
343
344@parameterized.expand(SUPPORTED_ARCHITECTURES)
345def test_pipeline(self, model_arch):
346model_id = MODEL_NAMES[model_arch]
347model = OVModelForQuestionAnswering.from_pretrained(model_id, export=True)
348tokenizer = AutoTokenizer.from_pretrained(model_id)
349pipe = pipeline("question-answering", model=model, tokenizer=tokenizer)
350question = "What's my name?"
351context = "My Name is Arthur and I live in Lyon."
352outputs = pipe(question, context)
353self.assertEqual(pipe.device, model.device)
354self.assertGreaterEqual(outputs["score"], 0.0)
355self.assertIsInstance(outputs["answer"], str)
356del model
357gc.collect()
358
359def test_metric(self):
360model_id = "distilbert-base-cased-distilled-squad"
361set_seed(SEED)
362ov_model = OVModelForQuestionAnswering.from_pretrained(model_id, export=True)
363transformers_model = AutoModelForQuestionAnswering.from_pretrained(model_id)
364tokenizer = AutoTokenizer.from_pretrained(model_id)
365data = load_dataset("squad", split="validation").select(range(50))
366task_evaluator = evaluator("question-answering")
367transformers_pipe = pipeline("question-answering", model=transformers_model, tokenizer=tokenizer)
368ov_pipe = pipeline("question-answering", model=ov_model, tokenizer=tokenizer)
369transformers_metric = task_evaluator.compute(model_or_pipeline=transformers_pipe, data=data, metric="squad")
370ov_metric = task_evaluator.compute(model_or_pipeline=ov_pipe, data=data, metric="squad")
371self.assertEqual(ov_metric["exact_match"], transformers_metric["exact_match"])
372self.assertEqual(ov_metric["f1"], transformers_metric["f1"])
373del transformers_pipe
374del transformers_model
375del ov_pipe
376del ov_model
377gc.collect()
378
379
380class OVModelForTokenClassificationIntegrationTest(unittest.TestCase):
381SUPPORTED_ARCHITECTURES = (
382"bert",
383"distilbert",
384"roberta",
385)
386
387@parameterized.expand(SUPPORTED_ARCHITECTURES)
388def test_compare_to_transformers(self, model_arch):
389model_id = MODEL_NAMES[model_arch]
390set_seed(SEED)
391ov_model = OVModelForTokenClassification.from_pretrained(model_id, export=True, ov_config=F32_CONFIG)
392self.assertIsInstance(ov_model.config, PretrainedConfig)
393transformers_model = AutoModelForTokenClassification.from_pretrained(model_id)
394tokenizer = AutoTokenizer.from_pretrained(model_id)
395inputs = "This is a sample input"
396tokens = tokenizer(inputs, return_tensors="pt")
397with torch.no_grad():
398transformers_outputs = transformers_model(**tokens)
399for input_type in ["pt", "np"]:
400tokens = tokenizer(inputs, return_tensors=input_type)
401ov_outputs = ov_model(**tokens)
402self.assertIn("logits", ov_outputs)
403self.assertIsInstance(ov_outputs.logits, TENSOR_ALIAS_TO_TYPE[input_type])
404# Compare tensor outputs
405self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), transformers_outputs.logits, atol=1e-4))
406del transformers_model
407del ov_model
408gc.collect()
409
410@parameterized.expand(SUPPORTED_ARCHITECTURES)
411def test_pipeline(self, model_arch):
412model_id = MODEL_NAMES[model_arch]
413model = OVModelForTokenClassification.from_pretrained(model_id, export=True)
414tokenizer = AutoTokenizer.from_pretrained(model_id)
415pipe = pipeline("token-classification", model=model, tokenizer=tokenizer)
416outputs = pipe("My Name is Arthur and I live in Lyon.")
417self.assertEqual(pipe.device, model.device)
418self.assertTrue(all(item["score"] > 0.0 for item in outputs))
419del model
420del pipe
421gc.collect()
422
423
424class OVModelForFeatureExtractionIntegrationTest(unittest.TestCase):
425SUPPORTED_ARCHITECTURES = (
426"bert",
427"distilbert",
428"roberta",
429"sentence-transformers-bert",
430)
431
432@parameterized.expand(SUPPORTED_ARCHITECTURES)
433def test_compare_to_transformers(self, model_arch):
434model_id = MODEL_NAMES[model_arch]
435set_seed(SEED)
436ov_model = OVModelForFeatureExtraction.from_pretrained(model_id, export=True, ov_config=F32_CONFIG)
437self.assertIsInstance(ov_model.config, PretrainedConfig)
438transformers_model = AutoModel.from_pretrained(model_id)
439tokenizer = AutoTokenizer.from_pretrained(model_id)
440inputs = "This is a sample input"
441tokens = tokenizer(inputs, return_tensors="pt")
442with torch.no_grad():
443transformers_outputs = transformers_model(**tokens)
444for input_type in ["pt", "np"]:
445tokens = tokenizer(inputs, return_tensors=input_type)
446ov_outputs = ov_model(**tokens)
447self.assertIn("last_hidden_state", ov_outputs)
448self.assertIsInstance(ov_outputs.last_hidden_state, TENSOR_ALIAS_TO_TYPE[input_type])
449# Compare tensor outputs
450self.assertTrue(
451torch.allclose(
452torch.Tensor(ov_outputs.last_hidden_state), transformers_outputs.last_hidden_state, atol=1e-4
453)
454)
455del transformers_model
456del ov_model
457gc.collect()
458
459@parameterized.expand(SUPPORTED_ARCHITECTURES)
460def test_pipeline(self, model_arch):
461model_id = MODEL_NAMES[model_arch]
462model = OVModelForFeatureExtraction.from_pretrained(model_id, export=True)
463tokenizer = AutoTokenizer.from_pretrained(model_id)
464pipe = pipeline("feature-extraction", model=model, tokenizer=tokenizer)
465outputs = pipe("My Name is Arthur and I live in Lyon.")
466self.assertEqual(pipe.device, model.device)
467self.assertTrue(all(all(isinstance(item, float) for item in row) for row in outputs[0]))
468del pipe
469del model
470gc.collect()
471
472
473class OVModelForCausalLMIntegrationTest(unittest.TestCase):
474SUPPORTED_ARCHITECTURES = (
475"bart",
476"gpt_bigcode",
477"blenderbot",
478"blenderbot-small",
479"bloom",
480"codegen",
481# "data2vec-text", # TODO : enable when enabled in exporters
482"gpt2",
483"gpt_neo",
484"gpt_neox",
485"llama",
486# "llama_gptq",
487"marian",
488"mistral",
489"mpt",
490"opt",
491"pegasus",
492)
493GENERATION_LENGTH = 100
494IS_SUPPORT_STATEFUL = is_openvino_version(">=", "2023.3")
495
496@parameterized.expand(SUPPORTED_ARCHITECTURES)
497def test_compare_to_transformers(self, model_arch):
498model_id = MODEL_NAMES[model_arch]
499
500if "gptq" in model_arch:
501self.skipTest("GPTQ model loading unsupported with AutoModelForCausalLM")
502
503set_seed(SEED)
504ov_model = OVModelForCausalLM.from_pretrained(model_id, export=True, ov_config=F32_CONFIG)
505self.assertIsInstance(ov_model.config, PretrainedConfig)
506self.assertTrue(ov_model.use_cache)
507
508transformers_model = AutoModelForCausalLM.from_pretrained(model_id)
509tokenizer = AutoTokenizer.from_pretrained(model_id)
510tokens = tokenizer(
511"This is a sample", return_tensors="pt", return_token_type_ids=False if model_arch == "llama" else None
512)
513position_ids = None
514if model_arch.replace("_", "-") in MODEL_TYPES_REQUIRING_POSITION_IDS:
515input_shape = tokens["input_ids"].shape
516position_ids = torch.arange(0, input_shape[-1], dtype=torch.long).unsqueeze(0).view(-1, input_shape[-1])
517ov_outputs = ov_model(**tokens, position_ids=position_ids)
518
519self.assertTrue("logits" in ov_outputs)
520self.assertIsInstance(ov_outputs.logits, torch.Tensor)
521self.assertTrue("past_key_values" in ov_outputs)
522self.assertIsInstance(ov_outputs.past_key_values, tuple)
523
524is_stateful = ov_model.config.model_type not in {"gpt_bigcode", "llama"} and self.IS_SUPPORT_STATEFUL
525self.assertEqual(ov_model.stateful, is_stateful)
526if is_stateful:
527self.assertTrue(len(ov_outputs.past_key_values) == 1 and len(ov_outputs.past_key_values[0]) == 0)
528
529with torch.no_grad():
530transformers_outputs = transformers_model(**tokens)
531
532# Compare tensor outputs
533self.assertTrue(torch.allclose(ov_outputs.logits, transformers_outputs.logits, atol=1e-4))
534del transformers_model
535del ov_model
536gc.collect()
537
538@parameterized.expand(SUPPORTED_ARCHITECTURES)
539def test_pipeline(self, model_arch):
540model_id = MODEL_NAMES[model_arch]
541tokenizer = AutoTokenizer.from_pretrained(model_id)
542model = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=False, compile=False)
543model.config.encoder_no_repeat_ngram_size = 0
544model.to("cpu")
545model.half()
546model.compile()
547pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
548outputs = pipe("This is a sample", max_length=20)
549self.assertEqual(pipe.device, model.device)
550self.assertTrue(all("This is a sample" in item["generated_text"] for item in outputs))
551del pipe
552del model
553gc.collect()
554
555@parameterized.expand(SUPPORTED_ARCHITECTURES)
556def test_multiple_inputs(self, model_arch):
557model_id = MODEL_NAMES[model_arch]
558set_seed(SEED)
559model = OVModelForCausalLM.from_pretrained(model_id, export=True, compile=False)
560tokenizer = AutoTokenizer.from_pretrained(model_id)
561tokenizer.pad_token = tokenizer.eos_token
562texts = ["this is a simple input", "this is a second simple input", "this is a third simple input"]
563tokens = tokenizer(texts, padding=True, return_tensors="pt")
564generation_config = GenerationConfig(encoder_no_repeat_ngram_size=0, max_new_tokens=20, num_beams=2)
565outputs = model.generate(**tokens, generation_config=generation_config)
566self.assertIsInstance(outputs, torch.Tensor)
567self.assertEqual(outputs.shape[0], 3)
568del model
569gc.collect()
570
571def test_model_and_decoder_same_device(self):
572model_id = MODEL_NAMES["gpt2"]
573model = OVModelForCausalLM.from_pretrained(model_id, export=True)
574model.to("TEST")
575self.assertEqual(model._device, "TEST")
576# Verify that request is being reset
577self.assertEqual(model.request, None)
578del model
579gc.collect()
580
581def test_compare_with_and_without_past_key_values(self):
582model_id = MODEL_NAMES["gpt2"]
583tokenizer = AutoTokenizer.from_pretrained(model_id)
584tokens = tokenizer("This is a sample input", return_tensors="pt")
585model_with_pkv = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, stateful=False)
586outputs_model_with_pkv = model_with_pkv.generate(
587**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
588)
589model_without_pkv = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=False)
590outputs_model_without_pkv = model_without_pkv.generate(
591**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
592)
593self.assertTrue(torch.equal(outputs_model_with_pkv, outputs_model_without_pkv))
594self.assertEqual(outputs_model_with_pkv.shape[1], self.GENERATION_LENGTH)
595self.assertEqual(outputs_model_without_pkv.shape[1], self.GENERATION_LENGTH)
596if self.IS_SUPPORT_STATEFUL:
597model_stateful = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True, stateful=True)
598outputs_model_stateful = model_stateful.generate(
599**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
600)
601self.assertTrue(torch.equal(outputs_model_without_pkv, outputs_model_stateful))
602
603del model_with_pkv
604del model_without_pkv
605gc.collect()
606
607def test_print_model_properties(self):
608# test setting OPENVINO_LOG_LEVEL to 3, which calls _print_compiled_model_properties
609openvino_log_level = os.environ.get("OPENVINO_LOG_LEVEL", None)
610os.environ["OPENVINO_LOG_LEVEL"] = "3"
611model = OVModelForSequenceClassification.from_pretrained(MODEL_NAMES["bert"], export=True)
612if openvino_log_level is not None:
613os.environ["OPENVINO_LOG_LEVEL"] = openvino_log_level
614# test calling function directly
615_print_compiled_model_properties(model.request)
616
617def test_auto_device_loading(self):
618OV_MODEL_ID = "echarlaix/distilbert-base-uncased-finetuned-sst-2-english-openvino"
619for device in ("AUTO", "AUTO:CPU"):
620model = OVModelForSequenceClassification.from_pretrained(OV_MODEL_ID, device=device)
621model.half()
622self.assertEqual(model._device, device)
623if device == "AUTO:CPU":
624model = OVModelForSequenceClassification.from_pretrained(OV_MODEL_ID, device=device)
625message = "Model should not be loaded from cache without explicitly setting CACHE_DIR"
626self.assertFalse(model.request.get_property("LOADED_FROM_CACHE"), message)
627del model
628gc.collect()
629
630def test_default_filling_attention_mask(self):
631model_id = MODEL_NAMES["gpt2"]
632model_with_cache = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True)
633tokenizer = AutoTokenizer.from_pretrained(model_id)
634tokenizer.pad_token = tokenizer.eos_token
635texts = ["this is a simple input"]
636tokens = tokenizer(texts, return_tensors="pt")
637self.assertTrue("attention_mask" in model_with_cache.input_names)
638outs = model_with_cache(**tokens)
639attention_mask = tokens.pop("attention_mask")
640outs_without_attn_mask = model_with_cache(**tokens)
641self.assertTrue(torch.allclose(outs.logits, outs_without_attn_mask.logits))
642input_ids = torch.argmax(outs.logits[:, -1:, :], dim=2)
643past_key_values = outs.past_key_values
644attention_mask = torch.ones((input_ids.shape[0], tokens.input_ids.shape[1] + 1), dtype=torch.long)
645outs_step2 = model_with_cache(
646input_ids=input_ids, attention_mask=attention_mask, past_key_values=past_key_values
647)
648outs_without_attn_mask_step2 = model_with_cache(input_ids=input_ids, past_key_values=past_key_values)
649self.assertTrue(torch.allclose(outs_step2.logits, outs_without_attn_mask_step2.logits))
650del model_with_cache
651gc.collect()
652
653def test_default_filling_attention_mask_and_position_ids(self):
654model_id = MODEL_NAMES["llama"]
655model_with_cache = OVModelForCausalLM.from_pretrained(model_id, export=True, use_cache=True)
656tokenizer = AutoTokenizer.from_pretrained(model_id)
657tokenizer.pad_token = tokenizer.eos_token
658texts = ["this is a simple input"]
659tokens = tokenizer(texts, return_tensors="pt")
660self.assertTrue("position_ids" in model_with_cache.input_names)
661outs = model_with_cache(**tokens)
662attention_mask = tokens.pop("attention_mask")
663outs_without_attn_mask = model_with_cache(**tokens)
664self.assertTrue(torch.allclose(outs.logits, outs_without_attn_mask.logits))
665input_ids = torch.argmax(outs.logits[:, -1:, :], dim=2)
666past_key_values = outs.past_key_values
667attention_mask = torch.ones((input_ids.shape[0], tokens.input_ids.shape[1] + 1), dtype=torch.long)
668outs_step2 = model_with_cache(
669input_ids=input_ids, attention_mask=attention_mask, past_key_values=past_key_values
670)
671outs_without_attn_mask_step2 = model_with_cache(input_ids=input_ids, past_key_values=past_key_values)
672self.assertTrue(torch.allclose(outs_step2.logits, outs_without_attn_mask_step2.logits))
673del model_with_cache
674gc.collect()
675
676
677class OVModelForMaskedLMIntegrationTest(unittest.TestCase):
678SUPPORTED_ARCHITECTURES = (
679"albert",
680"bert",
681# "camembert",
682# "convbert",
683# "data2vec_text",
684"deberta",
685# "deberta_v2",
686"distilbert",
687"electra",
688"flaubert",
689"ibert",
690# "mobilebert",
691"roberta",
692"roformer",
693"squeezebert",
694"xlm",
695"xlm_roberta",
696)
697
698@parameterized.expand(SUPPORTED_ARCHITECTURES)
699def test_compare_to_transformers(self, model_arch):
700model_id = MODEL_NAMES[model_arch]
701set_seed(SEED)
702ov_model = OVModelForMaskedLM.from_pretrained(model_id, export=True, ov_config=F32_CONFIG)
703self.assertIsInstance(ov_model.config, PretrainedConfig)
704transformers_model = AutoModelForMaskedLM.from_pretrained(model_id)
705tokenizer = AutoTokenizer.from_pretrained(model_id)
706inputs = f"This is a sample {tokenizer.mask_token}"
707tokens = tokenizer(inputs, return_tensors="pt")
708with torch.no_grad():
709transformers_outputs = transformers_model(**tokens)
710for input_type in ["pt", "np"]:
711tokens = tokenizer(inputs, return_tensors=input_type)
712ov_outputs = ov_model(**tokens)
713self.assertIn("logits", ov_outputs)
714self.assertIsInstance(ov_outputs.logits, TENSOR_ALIAS_TO_TYPE[input_type])
715# Compare tensor outputs
716self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), transformers_outputs.logits, atol=1e-4))
717del transformers_model
718del ov_model
719gc.collect()
720
721@parameterized.expand(SUPPORTED_ARCHITECTURES)
722def test_pipeline(self, model_arch):
723model_id = MODEL_NAMES[model_arch]
724model = OVModelForMaskedLM.from_pretrained(model_id, export=True)
725tokenizer = AutoTokenizer.from_pretrained(model_id)
726pipe = pipeline("fill-mask", model=model, tokenizer=tokenizer)
727outputs = pipe(f"This is a {tokenizer.mask_token}.")
728self.assertEqual(pipe.device, model.device)
729self.assertTrue(all(item["score"] > 0.0 for item in outputs))
730del pipe
731del model
732gc.collect()
733
734
735class OVModelForImageClassificationIntegrationTest(unittest.TestCase):
736SUPPORTED_ARCHITECTURES = (
737"beit",
738"convnext",
739# "data2vec_vision",
740# "deit",
741"levit",
742"mobilenet_v1",
743"mobilenet_v2",
744"mobilevit",
745# "poolformer",
746"resnet",
747# "segformer",
748# "swin",
749"vit",
750)
751
752TIMM_MODELS = ("timm/pit_s_distilled_224.in1k", "timm/vit_tiny_patch16_224.augreg_in21k")
753
754@parameterized.expand(SUPPORTED_ARCHITECTURES)
755def test_compare_to_transformers(self, model_arch):
756model_id = MODEL_NAMES[model_arch]
757set_seed(SEED)
758ov_model = OVModelForImageClassification.from_pretrained(model_id, export=True, ov_config=F32_CONFIG)
759self.assertIsInstance(ov_model.config, PretrainedConfig)
760transformers_model = AutoModelForImageClassification.from_pretrained(model_id)
761preprocessor = AutoFeatureExtractor.from_pretrained(model_id)
762url = "http://images.cocodataset.org/val2017/000000039769.jpg"
763image = Image.open(requests.get(url, stream=True).raw)
764inputs = preprocessor(images=image, return_tensors="pt")
765with torch.no_grad():
766transformers_outputs = transformers_model(**inputs)
767for input_type in ["pt", "np"]:
768inputs = preprocessor(images=image, return_tensors=input_type)
769ov_outputs = ov_model(**inputs)
770self.assertIn("logits", ov_outputs)
771self.assertIsInstance(ov_outputs.logits, TENSOR_ALIAS_TO_TYPE[input_type])
772# Compare tensor outputs
773self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), transformers_outputs.logits, atol=1e-4))
774del transformers_model
775del ov_model
776gc.collect()
777
778@parameterized.expand(SUPPORTED_ARCHITECTURES)
779def test_pipeline(self, model_arch):
780model_id = MODEL_NAMES[model_arch]
781model = OVModelForImageClassification.from_pretrained(model_id, export=True)
782preprocessor = AutoFeatureExtractor.from_pretrained(model_id)
783pipe = pipeline("image-classification", model=model, feature_extractor=preprocessor)
784outputs = pipe("http://images.cocodataset.org/val2017/000000039769.jpg")
785self.assertEqual(pipe.device, model.device)
786self.assertGreaterEqual(outputs[0]["score"], 0.0)
787self.assertTrue(isinstance(outputs[0]["label"], str))
788del model
789del pipe
790gc.collect()
791
792@parameterized.expand(TIMM_MODELS)
793def test_compare_to_timm(self, model_id):
794ov_model = OVModelForImageClassification.from_pretrained(model_id, export=True, ov_config=F32_CONFIG)
795self.assertEqual(ov_model.request.get_property("INFERENCE_PRECISION_HINT").to_string(), "f32")
796self.assertIsInstance(ov_model.config, PretrainedConfig)
797timm_model = timm.create_model(model_id, pretrained=True)
798preprocessor = TimmImageProcessor.from_pretrained(model_id)
799url = "http://images.cocodataset.org/val2017/000000039769.jpg"
800image = Image.open(requests.get(url, stream=True).raw)
801inputs = preprocessor(images=image, return_tensors="pt")
802with torch.no_grad():
803timm_model.eval()
804timm_outputs = timm_model(inputs["pixel_values"].float())
805for input_type in ["pt", "np"]:
806inputs = preprocessor(images=image, return_tensors=input_type)
807ov_outputs = ov_model(**inputs)
808self.assertIn("logits", ov_outputs)
809self.assertIsInstance(ov_outputs.logits, TENSOR_ALIAS_TO_TYPE[input_type])
810# Compare tensor outputs
811self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), timm_outputs, atol=1e-3))
812gc.collect()
813
814@parameterized.expand(TIMM_MODELS)
815def test_timm_save_and_infer(self, model_id):
816ov_model = OVModelForImageClassification.from_pretrained(model_id, export=True)
817with tempfile.TemporaryDirectory() as tmpdirname:
818model_save_path = os.path.join(tmpdirname, "timm_ov_model")
819ov_model.save_pretrained(model_save_path)
820model = OVModelForImageClassification.from_pretrained(model_save_path)
821model(pixel_values=torch.zeros((5, 3, model.config.image_size, model.config.image_size)))
822gc.collect()
823
824
825class OVModelForSeq2SeqLMIntegrationTest(unittest.TestCase):
826SUPPORTED_ARCHITECTURES = (
827"bart",
828# "bigbird_pegasus",
829"blenderbot",
830"blenderbot-small",
831# "longt5",
832"m2m_100",
833"marian",
834"mbart",
835"mt5",
836"pegasus",
837"t5",
838)
839
840GENERATION_LENGTH = 100
841SPEEDUP_CACHE = 1.1
842
843@parameterized.expand(SUPPORTED_ARCHITECTURES)
844def test_compare_to_transformers(self, model_arch):
845model_id = MODEL_NAMES[model_arch]
846set_seed(SEED)
847ov_model = OVModelForSeq2SeqLM.from_pretrained(model_id, export=True, ov_config=F32_CONFIG)
848
849self.assertIsInstance(ov_model.encoder, OVEncoder)
850self.assertIsInstance(ov_model.decoder, OVDecoder)
851self.assertIsInstance(ov_model.decoder_with_past, OVDecoder)
852self.assertIsInstance(ov_model.config, PretrainedConfig)
853
854transformers_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
855tokenizer = AutoTokenizer.from_pretrained(model_id)
856tokens = tokenizer("This is a sample input", return_tensors="pt")
857decoder_start_token_id = transformers_model.config.decoder_start_token_id if model_arch != "mbart" else 2
858decoder_inputs = {"decoder_input_ids": torch.ones((1, 1), dtype=torch.long) * decoder_start_token_id}
859ov_outputs = ov_model(**tokens, **decoder_inputs)
860
861self.assertTrue("logits" in ov_outputs)
862self.assertIsInstance(ov_outputs.logits, torch.Tensor)
863
864with torch.no_grad():
865transformers_outputs = transformers_model(**tokens, **decoder_inputs)
866# Compare tensor outputs
867self.assertTrue(torch.allclose(ov_outputs.logits, transformers_outputs.logits, atol=1e-4))
868del transformers_model
869del ov_model
870
871gc.collect()
872
873@parameterized.expand(SUPPORTED_ARCHITECTURES)
874def test_pipeline(self, model_arch):
875model_id = MODEL_NAMES[model_arch]
876tokenizer = AutoTokenizer.from_pretrained(model_id)
877model = OVModelForSeq2SeqLM.from_pretrained(model_id, export=True, compile=False)
878model.half()
879model.to("cpu")
880model.compile()
881
882# Text2Text generation
883pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
884text = "This is a test"
885outputs = pipe(text)
886self.assertEqual(pipe.device, model.device)
887self.assertIsInstance(outputs[0]["generated_text"], str)
888
889# Summarization
890pipe = pipeline("summarization", model=model, tokenizer=tokenizer)
891text = "This is a test"
892outputs = pipe(text)
893self.assertEqual(pipe.device, model.device)
894self.assertIsInstance(outputs[0]["summary_text"], str)
895
896# Translation
897pipe = pipeline("translation_en_to_fr", model=model, tokenizer=tokenizer)
898text = "This is a test"
899outputs = pipe(text)
900self.assertEqual(pipe.device, model.device)
901self.assertIsInstance(outputs[0]["translation_text"], str)
902del pipe
903del model
904gc.collect()
905
906@parameterized.expand(SUPPORTED_ARCHITECTURES)
907def test_generate_utils(self, model_arch):
908model_id = MODEL_NAMES[model_arch]
909model = OVModelForSeq2SeqLM.from_pretrained(model_id, export=True)
910tokenizer = AutoTokenizer.from_pretrained(model_id)
911text = "This is a sample input"
912tokens = tokenizer(text, return_tensors="pt")
913
914# General case
915outputs = model.generate(**tokens)
916outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)
917self.assertIsInstance(outputs[0], str)
918
919# With input ids
920outputs = model.generate(input_ids=tokens["input_ids"])
921outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)
922self.assertIsInstance(outputs[0], str)
923del model
924
925gc.collect()
926
927def test_compare_with_and_without_past_key_values(self):
928model_id = MODEL_NAMES["t5"]
929tokenizer = AutoTokenizer.from_pretrained(model_id)
930text = "This is a sample input"
931tokens = tokenizer(text, return_tensors="pt")
932
933model_with_pkv = OVModelForSeq2SeqLM.from_pretrained(model_id, export=True, use_cache=True)
934_ = model_with_pkv.generate(**tokens) # warmup
935with Timer() as with_pkv_timer:
936outputs_model_with_pkv = model_with_pkv.generate(
937**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
938)
939
940model_without_pkv = OVModelForSeq2SeqLM.from_pretrained(model_id, export=True, use_cache=False)
941_ = model_without_pkv.generate(**tokens) # warmup
942with Timer() as without_pkv_timer:
943outputs_model_without_pkv = model_without_pkv.generate(
944**tokens, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
945)
946
947self.assertTrue(torch.equal(outputs_model_with_pkv, outputs_model_without_pkv))
948self.assertEqual(outputs_model_with_pkv.shape[1], self.GENERATION_LENGTH)
949self.assertEqual(outputs_model_without_pkv.shape[1], self.GENERATION_LENGTH)
950self.assertTrue(
951without_pkv_timer.elapsed / with_pkv_timer.elapsed > self.SPEEDUP_CACHE,
952f"With pkv latency: {with_pkv_timer.elapsed:.3f} ms, without pkv latency: {without_pkv_timer.elapsed:.3f} ms,"
953f" speedup: {without_pkv_timer.elapsed / with_pkv_timer.elapsed:.3f}",
954)
955del model_with_pkv
956del model_without_pkv
957gc.collect()
958
959
960class OVModelForAudioClassificationIntegrationTest(unittest.TestCase):
961SUPPORTED_ARCHITECTURES = (
962# "audio_spectrogram_transformer",
963# "data2vec_audio",
964# "hubert",
965# "sew",
966# "sew_d",
967# "wav2vec2-conformer",
968"unispeech",
969# "unispeech_sat",
970# "wavlm",
971"wav2vec2",
972# "wav2vec2-conformer",
973)
974
975def _generate_random_audio_data(self):
976np.random.seed(10)
977t = np.linspace(0, 5.0, int(5.0 * 22050), endpoint=False)
978# generate pure sine wave at 220 Hz
979audio_data = 0.5 * np.sin(2 * np.pi * 220 * t)
980return audio_data
981
982@parameterized.expand(SUPPORTED_ARCHITECTURES)
983def test_compare_to_transformers(self, model_arch):
984model_id = MODEL_NAMES[model_arch]
985set_seed(SEED)
986ov_model = OVModelForAudioClassification.from_pretrained(model_id, export=True, ov_config=F32_CONFIG)
987self.assertIsInstance(ov_model.config, PretrainedConfig)
988transformers_model = AutoModelForAudioClassification.from_pretrained(model_id)
989preprocessor = AutoFeatureExtractor.from_pretrained(model_id)
990inputs = preprocessor(self._generate_random_audio_data(), return_tensors="pt")
991
992with torch.no_grad():
993transformers_outputs = transformers_model(**inputs)
994
995for input_type in ["pt", "np"]:
996inputs = preprocessor(self._generate_random_audio_data(), return_tensors=input_type)
997ov_outputs = ov_model(**inputs)
998self.assertIn("logits", ov_outputs)
999self.assertIsInstance(ov_outputs.logits, TENSOR_ALIAS_TO_TYPE[input_type])
1000# Compare tensor outputs
1001self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), transformers_outputs.logits, atol=1e-3))
1002
1003del transformers_model
1004del ov_model
1005gc.collect()
1006
1007@parameterized.expand(SUPPORTED_ARCHITECTURES)
1008def test_pipeline(self, model_arch):
1009model_id = MODEL_NAMES[model_arch]
1010model = OVModelForAudioClassification.from_pretrained(model_id, export=True)
1011preprocessor = AutoFeatureExtractor.from_pretrained(model_id)
1012pipe = pipeline("audio-classification", model=model, feature_extractor=preprocessor)
1013outputs = pipe([np.random.random(16000)])
1014self.assertEqual(pipe.device, model.device)
1015self.assertTrue(all(item["score"] > 0.0 for item in outputs[0]))
1016del pipe
1017del model
1018gc.collect()
1019
1020
1021class OVModelForCTCIntegrationTest(unittest.TestCase):
1022SUPPORTED_ARCHITECTURES = [
1023"data2vec_audio",
1024"hubert",
1025"sew",
1026"sew_d",
1027"unispeech",
1028"unispeech_sat",
1029"wavlm",
1030"wav2vec2-hf",
1031"wav2vec2-conformer",
1032]
1033
1034def _generate_random_audio_data(self):
1035np.random.seed(10)
1036t = np.linspace(0, 5.0, int(5.0 * 22050), endpoint=False)
1037# generate pure sine wave at 220 Hz
1038audio_data = 0.5 * np.sin(2 * np.pi * 220 * t)
1039return audio_data
1040
1041def test_load_vanilla_transformers_which_is_not_supported(self):
1042with self.assertRaises(Exception) as context:
1043_ = OVModelForCTC.from_pretrained(MODEL_NAMES["t5"], export=True)
1044
1045self.assertIn("only supports the tasks", str(context.exception))
1046
1047@parameterized.expand(SUPPORTED_ARCHITECTURES)
1048def test_compare_to_transformers(self, model_arch):
1049model_id = MODEL_NAMES[model_arch]
1050set_seed(SEED)
1051ov_model = OVModelForCTC.from_pretrained(model_id, export=True, ov_config=F32_CONFIG)
1052self.assertIsInstance(ov_model.config, PretrainedConfig)
1053
1054set_seed(SEED)
1055transformers_model = AutoModelForCTC.from_pretrained(model_id)
1056processor = AutoFeatureExtractor.from_pretrained(model_id)
1057input_values = processor(self._generate_random_audio_data(), return_tensors="pt")
1058
1059with torch.no_grad():
1060transformers_outputs = transformers_model(**input_values)
1061
1062for input_type in ["pt", "np"]:
1063input_values = processor(self._generate_random_audio_data(), return_tensors=input_type)
1064ov_outputs = ov_model(**input_values)
1065
1066self.assertTrue("logits" in ov_outputs)
1067self.assertIsInstance(ov_outputs.logits, TENSOR_ALIAS_TO_TYPE[input_type])
1068
1069# compare tensor outputs
1070self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), transformers_outputs.logits, atol=1e-4))
1071
1072del transformers_model
1073del ov_model
1074gc.collect()
1075
1076
1077class OVModelForAudioXVectorIntegrationTest(unittest.TestCase):
1078SUPPORTED_ARCHITECTURES = [
1079"data2vec_audio",
1080"unispeech_sat",
1081"wavlm",
1082"wav2vec2-hf",
1083"wav2vec2-conformer",
1084]
1085
1086def _generate_random_audio_data(self):
1087np.random.seed(10)
1088t = np.linspace(0, 5.0, int(5.0 * 22050), endpoint=False)
1089# generate pure sine wave at 220 Hz
1090audio_data = 0.5 * np.sin(2 * np.pi * 220 * t)
1091return audio_data
1092
1093def test_load_vanilla_transformers_which_is_not_supported(self):
1094with self.assertRaises(Exception) as context:
1095_ = OVModelForAudioXVector.from_pretrained(MODEL_NAMES["t5"], export=True)
1096
1097self.assertIn("only supports the tasks", str(context.exception))
1098
1099@parameterized.expand(SUPPORTED_ARCHITECTURES)
1100def test_compare_to_transformers(self, model_arch):
1101model_id = MODEL_NAMES[model_arch]
1102set_seed(SEED)
1103ov_model = OVModelForAudioXVector.from_pretrained(model_id, export=True, ov_config=F32_CONFIG)
1104self.assertIsInstance(ov_model.config, PretrainedConfig)
1105
1106set_seed(SEED)
1107transformers_model = AutoModelForAudioXVector.from_pretrained(model_id)
1108processor = AutoFeatureExtractor.from_pretrained(model_id)
1109input_values = processor(self._generate_random_audio_data(), return_tensors="pt")
1110
1111with torch.no_grad():
1112transformers_outputs = transformers_model(**input_values)
1113for input_type in ["pt", "np"]:
1114input_values = processor(self._generate_random_audio_data(), return_tensors=input_type)
1115ov_outputs = ov_model(**input_values)
1116
1117self.assertTrue("logits" in ov_outputs)
1118self.assertIsInstance(ov_outputs.logits, TENSOR_ALIAS_TO_TYPE[input_type])
1119
1120# compare tensor outputs
1121self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), transformers_outputs.logits, atol=1e-4))
1122self.assertTrue(
1123torch.allclose(torch.Tensor(ov_outputs.embeddings), transformers_outputs.embeddings, atol=1e-4)
1124)
1125
1126del transformers_model
1127del ov_model
1128gc.collect()
1129
1130
1131class OVModelForAudioFrameClassificationIntegrationTest(unittest.TestCase):
1132SUPPORTED_ARCHITECTURES = [
1133"data2vec_audio",
1134"unispeech_sat",
1135"wavlm",
1136"wav2vec2-hf",
1137"wav2vec2-conformer",
1138]
1139
1140def _generate_random_audio_data(self):
1141np.random.seed(10)
1142t = np.linspace(0, 5.0, int(5.0 * 22050), endpoint=False)
1143# generate pure sine wave at 220 Hz
1144audio_data = 0.5 * np.sin(2 * np.pi * 220 * t)
1145return audio_data
1146
1147def test_load_vanilla_transformers_which_is_not_supported(self):
1148with self.assertRaises(Exception) as context:
1149_ = OVModelForAudioFrameClassification.from_pretrained(MODEL_NAMES["t5"], export=True)
1150
1151self.assertIn("only supports the tasks", str(context.exception))
1152
1153@parameterized.expand(SUPPORTED_ARCHITECTURES)
1154def test_compare_to_transformers(self, model_arch):
1155model_id = MODEL_NAMES[model_arch]
1156set_seed(SEED)
1157ov_model = OVModelForAudioFrameClassification.from_pretrained(model_id, export=True, ov_config=F32_CONFIG)
1158self.assertIsInstance(ov_model.config, PretrainedConfig)
1159
1160set_seed(SEED)
1161transformers_model = AutoModelForAudioFrameClassification.from_pretrained(model_id)
1162processor = AutoFeatureExtractor.from_pretrained(model_id)
1163input_values = processor(self._generate_random_audio_data(), return_tensors="pt")
1164
1165with torch.no_grad():
1166transformers_outputs = transformers_model(**input_values)
1167for input_type in ["pt", "np"]:
1168input_values = processor(self._generate_random_audio_data(), return_tensors=input_type)
1169ov_outputs = ov_model(**input_values)
1170
1171self.assertTrue("logits" in ov_outputs)
1172self.assertIsInstance(ov_outputs.logits, TENSOR_ALIAS_TO_TYPE[input_type])
1173
1174# compare tensor outputs
1175self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), transformers_outputs.logits, atol=1e-4))
1176
1177del transformers_model
1178del ov_model
1179gc.collect()
1180
1181
1182class OVModelForPix2StructIntegrationTest(unittest.TestCase):
1183SUPPORTED_ARCHITECTURES = ["pix2struct"]
1184TASK = "image-to-text" # is it fine as well with visual-question-answering?
1185
1186GENERATION_LENGTH = 100
1187SPEEDUP_CACHE = 1.1
1188
1189IMAGE = Image.open(
1190requests.get(
1191"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg",
1192stream=True,
1193).raw
1194)
1195
1196@parameterized.expand(SUPPORTED_ARCHITECTURES)
1197def test_compare_to_transformers(self, model_arch):
1198model_id = MODEL_NAMES[model_arch]
1199set_seed(SEED)
1200ov_model = OVModelForPix2Struct.from_pretrained(model_id, export=True, ov_config=F32_CONFIG)
1201
1202self.assertIsInstance(ov_model.encoder, OVEncoder)
1203self.assertIsInstance(ov_model.decoder, OVDecoder)
1204self.assertIsInstance(ov_model.decoder_with_past, OVDecoder)
1205self.assertIsInstance(ov_model.config, PretrainedConfig)
1206
1207question = "Who am I?"
1208transformers_model = Pix2StructForConditionalGeneration.from_pretrained(model_id)
1209preprocessor = get_preprocessor(model_id)
1210
1211inputs = preprocessor(images=self.IMAGE, text=question, padding=True, return_tensors="pt")
1212ov_outputs = ov_model(**inputs)
1213
1214self.assertTrue("logits" in ov_outputs)
1215self.assertIsInstance(ov_outputs.logits, torch.Tensor)
1216
1217with torch.no_grad():
1218transformers_outputs = transformers_model(**inputs)
1219# Compare tensor outputs
1220self.assertTrue(torch.allclose(ov_outputs.logits, transformers_outputs.logits, atol=1e-4))
1221del transformers_model
1222del ov_model
1223
1224gc.collect()
1225
1226@parameterized.expand(SUPPORTED_ARCHITECTURES)
1227def test_generate_utils(self, model_arch):
1228model_id = MODEL_NAMES[model_arch]
1229model = OVModelForPix2Struct.from_pretrained(model_id, export=True)
1230preprocessor = get_preprocessor(model_id)
1231question = "Who am I?"
1232inputs = preprocessor(images=self.IMAGE, text=question, return_tensors="pt")
1233
1234# General case
1235outputs = model.generate(**inputs)
1236outputs = preprocessor.batch_decode(outputs, skip_special_tokens=True)
1237self.assertIsInstance(outputs[0], str)
1238del model
1239
1240gc.collect()
1241
1242def test_compare_with_and_without_past_key_values(self):
1243model_id = MODEL_NAMES["pix2struct"]
1244preprocessor = get_preprocessor(model_id)
1245question = "Who am I?"
1246inputs = preprocessor(images=self.IMAGE, text=question, return_tensors="pt")
1247
1248model_with_pkv = OVModelForPix2Struct.from_pretrained(model_id, export=True, use_cache=True)
1249_ = model_with_pkv.generate(**inputs) # warmup
1250with Timer() as with_pkv_timer:
1251outputs_model_with_pkv = model_with_pkv.generate(
1252**inputs, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
1253)
1254
1255model_without_pkv = OVModelForPix2Struct.from_pretrained(model_id, export=True, use_cache=False)
1256_ = model_without_pkv.generate(**inputs) # warmup
1257with Timer() as without_pkv_timer:
1258outputs_model_without_pkv = model_without_pkv.generate(
1259**inputs, min_length=self.GENERATION_LENGTH, max_length=self.GENERATION_LENGTH, num_beams=1
1260)
1261
1262self.assertTrue(torch.equal(outputs_model_with_pkv, outputs_model_without_pkv))
1263self.assertEqual(outputs_model_with_pkv.shape[1], self.GENERATION_LENGTH)
1264self.assertEqual(outputs_model_without_pkv.shape[1], self.GENERATION_LENGTH)
1265self.assertTrue(
1266without_pkv_timer.elapsed / with_pkv_timer.elapsed > self.SPEEDUP_CACHE,
1267f"With pkv latency: {with_pkv_timer.elapsed:.3f} ms, without pkv latency: {without_pkv_timer.elapsed:.3f} ms,"
1268f" speedup: {without_pkv_timer.elapsed / with_pkv_timer.elapsed:.3f}",
1269)
1270del model_with_pkv
1271del model_without_pkv
1272gc.collect()
1273
1274
1275class OVModelForSpeechSeq2SeqIntegrationTest(unittest.TestCase):
1276SUPPORTED_ARCHITECTURES = ("whisper",)
1277
1278def _generate_random_audio_data(self):
1279np.random.seed(10)
1280t = np.linspace(0, 5.0, int(5.0 * 22050), endpoint=False)
1281# generate pure sine wave at 220 Hz
1282audio_data = 0.5 * np.sin(2 * np.pi * 220 * t)
1283return audio_data
1284
1285@parameterized.expand(SUPPORTED_ARCHITECTURES)
1286def test_compare_to_transformers(self, model_arch):
1287model_id = MODEL_NAMES[model_arch]
1288set_seed(SEED)
1289ov_model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, export=True, ov_config=F32_CONFIG)
1290self.assertIsInstance(ov_model.config, PretrainedConfig)
1291transformers_model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id)
1292processor = get_preprocessor(model_id)
1293data = self._generate_random_audio_data()
1294features = processor.feature_extractor(data, return_tensors="pt")
1295
1296decoder_start_token_id = transformers_model.config.decoder_start_token_id
1297decoder_inputs = {"decoder_input_ids": torch.ones((1, 1), dtype=torch.long) * decoder_start_token_id}
1298
1299with torch.no_grad():
1300transformers_outputs = transformers_model(**features, **decoder_inputs)
1301
1302for input_type in ["pt", "np"]:
1303features = processor.feature_extractor(data, return_tensors=input_type)
1304
1305if input_type == "np":
1306decoder_inputs = {"decoder_input_ids": np.ones((1, 1), dtype=np.int64) * decoder_start_token_id}
1307
1308ov_outputs = ov_model(**features, **decoder_inputs)
1309self.assertIn("logits", ov_outputs)
1310# Compare tensor outputs
1311self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), transformers_outputs.logits, atol=1e-3))
1312
1313del transformers_model
1314del ov_model
1315gc.collect()
1316
1317@parameterized.expand(SUPPORTED_ARCHITECTURES)
1318def test_pipeline(self, model_arch):
1319model_id = MODEL_NAMES[model_arch]
1320model = OVModelForSpeechSeq2Seq.from_pretrained(model_id, export=True)
1321processor = get_preprocessor(model_id)
1322GenerationConfig.from_pretrained(model_id)
1323pipe = pipeline(
1324"automatic-speech-recognition",
1325model=model,
1326tokenizer=processor.tokenizer,
1327feature_extractor=processor.feature_extractor,
1328)
1329data = self._generate_random_audio_data()
1330outputs = pipe(data)
1331self.assertIsInstance(outputs["text"], str)
1332
1333del pipe
1334del model
1335gc.collect()
1336