optimum-intel

test_onnx.py
80 строк · 3.1 Кб
Перенос по словам
1
#  Copyright 2023 The HuggingFace Team. All rights reserved.
2
#
3
#  Licensed under the Apache License, Version 2.0 (the "License");
4
#  you may not use this file except in compliance with the License.
5
#  You may obtain a copy of the License at
6
#
7
#      http://www.apache.org/licenses/LICENSE-2.0
8
#
9
#  Unless required by applicable law or agreed to in writing, software
10
#  distributed under the License is distributed on an "AS IS" BASIS,
11
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
#  See the License for the specific language governing permissions and
13
#  limitations under the License.
14

15
# ruff: noqa
16

17

18
import os
19
import tempfile
20

21
from neural_compressor.config import PostTrainingQuantConfig
22
from parameterized import parameterized
23
from transformers import AutoTokenizer, set_seed
24
from utils_tests import SEED, INCTestMixin, _generate_dataset
25

26
from optimum.intel import (
27
    INCConfig,
28
    INCModelForCausalLM,
29
    INCModelForSeq2SeqLM,
30
    INCModelForQuestionAnswering,
31
    INCModelForSequenceClassification,
32
    INCModelForMaskedLM,
33
    INCModelForTokenClassification,
34
    INCQuantizer,
35
)
36
from optimum.onnxruntime import ORTModelForCausalLM, ORTModelForSequenceClassification
37
from optimum.pipelines import ORT_SUPPORTED_TASKS
38

39
os.environ["CUDA_VISIBLE_DEVICES"] = ""
40
set_seed(SEED)
41

42

43
class OptimizationTest(INCTestMixin):
44
    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (
45
        ("text-classification", "hf-internal-testing/tiny-random-bert", 64),
46
    )
47

48
    @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
49
    def test_static_quantization(self, task, model_name, expected_quantized_matmuls):
50
        num_samples = 10
51
        model = ORT_SUPPORTED_TASKS[task]["class"][0].auto_model_class.from_pretrained(model_name)
52
        tokenizer = AutoTokenizer.from_pretrained(model_name)
53
        if tokenizer.pad_token is None:
54
            tokenizer.pad_token = tokenizer.eos_token
55
        quantizer = INCQuantizer.from_pretrained(model, task=task)
56
        calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=num_samples)
57
        save_onnx_model = False
58
        op_type_dict = (
59
            {"Embedding": {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}}}
60
            if save_onnx_model
61
            else None
62
        )
63
        quantization_config = PostTrainingQuantConfig(approach="static", op_type_dict=op_type_dict)
64
        with tempfile.TemporaryDirectory() as tmp_dir:
65
            quantizer.quantize(
66
                quantization_config=quantization_config,
67
                calibration_dataset=calibration_dataset,
68
                save_directory=tmp_dir,
69
                save_onnx_model=save_onnx_model,
70
            )
71
            self.check_model_outputs(
72
                q_model=quantizer._quantized_model,
73
                task=task,
74
                tokenizer=tokenizer,
75
                save_directory=tmp_dir,
76
                expected_quantized_matmuls=expected_quantized_matmuls,
77
                is_static=True,
78
                num_samples=num_samples,
79
                load_onnx_model=save_onnx_model,
80
            )
81
optimum-intel

Использование cookies