optimum-intel
80 строк · 3.1 Кб
1# Copyright 2023 The HuggingFace Team. All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15# ruff: noqa
16
17
18import os19import tempfile20
21from neural_compressor.config import PostTrainingQuantConfig22from parameterized import parameterized23from transformers import AutoTokenizer, set_seed24from utils_tests import SEED, INCTestMixin, _generate_dataset25
26from optimum.intel import (27INCConfig,28INCModelForCausalLM,29INCModelForSeq2SeqLM,30INCModelForQuestionAnswering,31INCModelForSequenceClassification,32INCModelForMaskedLM,33INCModelForTokenClassification,34INCQuantizer,35)
36from optimum.onnxruntime import ORTModelForCausalLM, ORTModelForSequenceClassification37from optimum.pipelines import ORT_SUPPORTED_TASKS38
39os.environ["CUDA_VISIBLE_DEVICES"] = ""40set_seed(SEED)41
42
43class OptimizationTest(INCTestMixin):44SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (45("text-classification", "hf-internal-testing/tiny-random-bert", 64),46)47
48@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)49def test_static_quantization(self, task, model_name, expected_quantized_matmuls):50num_samples = 1051model = ORT_SUPPORTED_TASKS[task]["class"][0].auto_model_class.from_pretrained(model_name)52tokenizer = AutoTokenizer.from_pretrained(model_name)53if tokenizer.pad_token is None:54tokenizer.pad_token = tokenizer.eos_token55quantizer = INCQuantizer.from_pretrained(model, task=task)56calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=num_samples)57save_onnx_model = False58op_type_dict = (59{"Embedding": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}}}60if save_onnx_model61else None62)63quantization_config = PostTrainingQuantConfig(approach="static", op_type_dict=op_type_dict)64with tempfile.TemporaryDirectory() as tmp_dir:65quantizer.quantize(66quantization_config=quantization_config,67calibration_dataset=calibration_dataset,68save_directory=tmp_dir,69save_onnx_model=save_onnx_model,70)71self.check_model_outputs(72q_model=quantizer._quantized_model,73task=task,74tokenizer=tokenizer,75save_directory=tmp_dir,76expected_quantized_matmuls=expected_quantized_matmuls,77is_static=True,78num_samples=num_samples,79load_onnx_model=save_onnx_model,80)81