paddlenlp

predict.py
244 строки · 8.8 Кб
Перенос по словам
1
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6
#
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14

15
import argparse
16
import os
17

18
import numpy as np
19
import paddle
20
from paddle import inference
21

22
from paddlenlp.data import Pad, Tuple
23
from paddlenlp.datasets import load_dataset
24
from paddlenlp.transformers import AutoTokenizer
25
from paddlenlp.utils.log import logger
26

27

28
def convert_example(example, tokenizer, max_seq_length=512, is_test=False):
29

30
    query, title = example["query"], example["title"]
31

32
    encoded_inputs = tokenizer(text=query, text_pair=title, max_seq_len=max_seq_length)
33

34
    input_ids = encoded_inputs["input_ids"]
35
    token_type_ids = encoded_inputs["token_type_ids"]
36

37
    if not is_test:
38
        label = np.array([example["label"]], dtype="int64")
39
        return input_ids, token_type_ids, label
40
    else:
41
        return input_ids, token_type_ids
42

43

44
class Predictor(object):
45
    def __init__(
46
        self,
47
        model_dir,
48
        device="gpu",
49
        max_seq_length=128,
50
        batch_size=32,
51
        use_tensorrt=False,
52
        precision="fp32",
53
        cpu_threads=10,
54
        enable_mkldnn=False,
55
        benchmark=False,
56
        save_log_path="./log_output",
57
    ):
58
        self.max_seq_length = max_seq_length
59
        self.batch_size = batch_size
60
        self.benchmark = benchmark
61

62
        model_file = os.path.join(model_dir, "inference.pdmodel")
63
        params_file = os.path.join(model_dir, "inference.pdiparams")
64
        if not os.path.exists(model_file):
65
            raise ValueError("not find model file path {}".format(model_file))
66
        if not os.path.exists(params_file):
67
            raise ValueError("not find params file path {}".format(params_file))
68
        config = paddle.inference.Config(model_file, params_file)
69

70
        if device == "gpu":
71
            # set GPU configs accordingly
72
            # such as initialize the gpu memory, enable tensorrt
73
            config.enable_use_gpu(100, 0)
74
            precision_map = {
75
                "fp16": inference.PrecisionType.Half,
76
                "fp32": inference.PrecisionType.Float32,
77
                "int8": inference.PrecisionType.Int8,
78
            }
79
            precision_mode = precision_map[precision]
80

81
            if use_tensorrt:
82
                config.enable_tensorrt_engine(
83
                    max_batch_size=batch_size, min_subgraph_size=30, precision_mode=precision_mode
84
                )
85
        elif device == "cpu":
86
            # set CPU configs accordingly,
87
            # such as enable_mkldnn, set_cpu_math_library_num_threads
88
            config.disable_gpu()
89
            if enable_mkldnn:
90
                # cache 10 different shapes for mkldnn to avoid memory leak
91
                config.set_mkldnn_cache_capacity(10)
92
                config.enable_mkldnn()
93
            config.set_cpu_math_library_num_threads(cpu_threads)
94
        elif device == "xpu":
95
            # set XPU configs accordingly
96
            config.enable_xpu()
97
        elif device == "npu":
98
            # set NPU configs accordingly
99
            config.enable_custom_device("npu")
100

101
        config.switch_use_feed_fetch_ops(False)
102
        self.predictor = paddle.inference.create_predictor(config)
103
        self.input_handles = [self.predictor.get_input_handle(name) for name in self.predictor.get_input_names()]
104
        self.output_handle = self.predictor.get_output_handle(self.predictor.get_output_names()[0])
105

106
        if benchmark:
107
            import auto_log
108

109
            pid = os.getpid()
110
            self.autolog = auto_log.AutoLogger(
111
                model_name="ernie-tiny",
112
                model_precision=precision,
113
                batch_size=self.batch_size,
114
                data_shape="dynamic",
115
                save_path=save_log_path,
116
                inference_config=config,
117
                pids=pid,
118
                process_name=None,
119
                gpu_ids=0,
120
                time_keys=["preprocess_time", "inference_time", "postprocess_time"],
121
                warmup=0,
122
                logger=logger,
123
            )
124

125
    def predict(self, data, tokenizer, label_map):
126
        """
127
        Predicts the data labels.
128

129
        Args:
130
            data (obj:`List(str)`): The batch data whose each element is a raw text.
131
            tokenizer(obj:`PretrainedTokenizer`): This tokenizer inherits from :class:`~paddlenlp.transformers.PretrainedTokenizer`
132
                which contains most of the methods. Users should refer to the superclass for more information regarding methods.
133
            label_map(obj:`dict`): The label id (key) to label str (value) map.
134

135
        Returns:
136
            results(obj:`dict`): All the predictions labels.
137
        """
138
        if self.benchmark:
139
            self.autolog.times.start()
140

141
        examples = []
142
        for text in data:
143
            input_ids, segment_ids = convert_example(text, tokenizer, max_seq_length=self.max_seq_length, is_test=True)
144
            examples.append((input_ids, segment_ids))
145

146
        batchify_fn = lambda samples, fn=Tuple(
147
            Pad(axis=0, pad_val=tokenizer.pad_token_id),  # input
148
            Pad(axis=0, pad_val=tokenizer.pad_token_id),  # segment
149
        ): fn(samples)
150

151
        if self.benchmark:
152
            self.autolog.times.stamp()
153

154
        input_ids, segment_ids = batchify_fn(examples)
155
        self.input_handles[0].copy_from_cpu(input_ids)
156
        self.input_handles[1].copy_from_cpu(segment_ids)
157
        self.predictor.run()
158
        probs = self.output_handle.copy_to_cpu()
159
        if self.benchmark:
160
            self.autolog.times.stamp()
161

162
        # probs = softmax(logits, axis=1)
163
        idx = np.argmax(probs, axis=1)
164
        idx = idx.tolist()
165
        labels = [label_map[i] for i in idx]
166

167
        if args.benchmark:
168
            self.autolog.times.end(stamp=True)
169

170
        return labels
171

172

173
if __name__ == "__main__":
174
    parser = argparse.ArgumentParser()
175
    parser.add_argument("--model_dir", type=str, required=True, help="The directory to static model.")
176
    parser.add_argument(
177
        "--max_seq_length",
178
        default=128,
179
        type=int,
180
        help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.",
181
    )
182
    parser.add_argument("--batch_size", default=32, type=int, help="Batch size per GPU/CPU for training.")
183
    parser.add_argument(
184
        "--device",
185
        choices=["cpu", "gpu", "xpu", "npu"],
186
        default="gpu",
187
        help="Select which device to train model, defaults to gpu.",
188
    )
189
    parser.add_argument(
190
        "--use_tensorrt", default=False, type=eval, choices=[True, False], help="Enable to use tensorrt to speed up."
191
    )
192
    parser.add_argument(
193
        "--precision", default="fp32", type=str, choices=["fp32", "fp16", "int8"], help="The tensorrt precision."
194
    )
195
    parser.add_argument("--cpu_threads", default=10, type=int, help="Number of threads to predict when using cpu.")
196
    parser.add_argument(
197
        "--enable_mkldnn",
198
        default=False,
199
        type=eval,
200
        choices=[True, False],
201
        help="Enable to use mkldnn to speed up when using cpu.",
202
    )
203
    parser.add_argument(
204
        "--benchmark", type=eval, default=False, help="To log some information about environment and running."
205
    )
206
    parser.add_argument("--save_log_path", type=str, default="./log_output/", help="The file path to save log.")
207
    parser.add_argument(
208
        "--max_steps", default=-1, type=int, help="If > 0: set total number of predict steps to perform."
209
    )
210

211
    args = parser.parse_args()
212

213
    # Define predictor to do prediction.
214
    predictor = Predictor(
215
        args.model_dir,
216
        args.device,
217
        args.max_seq_length,
218
        args.batch_size,
219
        args.use_tensorrt,
220
        args.precision,
221
        args.cpu_threads,
222
        args.enable_mkldnn,
223
        args.benchmark,
224
        args.save_log_path,
225
    )
226

227
    tokenizer = AutoTokenizer.from_pretrained("ernie-3.0-medium-zh")
228

229
    test_ds = load_dataset("lcqmc", splits=["test"])
230

231
    data = [{"query": d["query"], "title": d["title"]} for d in test_ds]
232
    if args.max_steps > 0:
233
        data = data[: args.max_steps]
234

235
    batches = [data[idx : idx + args.batch_size] for idx in range(0, len(data), args.batch_size)]
236
    label_map = {0: "dissimilar", 1: "similar"}
237

238
    results = []
239
    for batch_data in batches:
240
        results.extend(predictor.predict(batch_data, tokenizer, label_map))
241
    for idx, text in enumerate(data):
242
        print("Data: {} \t Label: {}".format(text, results[idx]))
243
    if args.benchmark:
244
        predictor.autolog.report()
245
paddlenlp

Использование cookies