paddlenlp
244 строки · 8.8 Кб
1# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import argparse
16import os
17
18import numpy as np
19import paddle
20from paddle import inference
21
22from paddlenlp.data import Pad, Tuple
23from paddlenlp.datasets import load_dataset
24from paddlenlp.transformers import AutoTokenizer
25from paddlenlp.utils.log import logger
26
27
28def convert_example(example, tokenizer, max_seq_length=512, is_test=False):
29
30query, title = example["query"], example["title"]
31
32encoded_inputs = tokenizer(text=query, text_pair=title, max_seq_len=max_seq_length)
33
34input_ids = encoded_inputs["input_ids"]
35token_type_ids = encoded_inputs["token_type_ids"]
36
37if not is_test:
38label = np.array([example["label"]], dtype="int64")
39return input_ids, token_type_ids, label
40else:
41return input_ids, token_type_ids
42
43
44class Predictor(object):
45def __init__(
46self,
47model_dir,
48device="gpu",
49max_seq_length=128,
50batch_size=32,
51use_tensorrt=False,
52precision="fp32",
53cpu_threads=10,
54enable_mkldnn=False,
55benchmark=False,
56save_log_path="./log_output",
57):
58self.max_seq_length = max_seq_length
59self.batch_size = batch_size
60self.benchmark = benchmark
61
62model_file = os.path.join(model_dir, "inference.pdmodel")
63params_file = os.path.join(model_dir, "inference.pdiparams")
64if not os.path.exists(model_file):
65raise ValueError("not find model file path {}".format(model_file))
66if not os.path.exists(params_file):
67raise ValueError("not find params file path {}".format(params_file))
68config = paddle.inference.Config(model_file, params_file)
69
70if device == "gpu":
71# set GPU configs accordingly
72# such as initialize the gpu memory, enable tensorrt
73config.enable_use_gpu(100, 0)
74precision_map = {
75"fp16": inference.PrecisionType.Half,
76"fp32": inference.PrecisionType.Float32,
77"int8": inference.PrecisionType.Int8,
78}
79precision_mode = precision_map[precision]
80
81if use_tensorrt:
82config.enable_tensorrt_engine(
83max_batch_size=batch_size, min_subgraph_size=30, precision_mode=precision_mode
84)
85elif device == "cpu":
86# set CPU configs accordingly,
87# such as enable_mkldnn, set_cpu_math_library_num_threads
88config.disable_gpu()
89if enable_mkldnn:
90# cache 10 different shapes for mkldnn to avoid memory leak
91config.set_mkldnn_cache_capacity(10)
92config.enable_mkldnn()
93config.set_cpu_math_library_num_threads(cpu_threads)
94elif device == "xpu":
95# set XPU configs accordingly
96config.enable_xpu()
97elif device == "npu":
98# set NPU configs accordingly
99config.enable_custom_device("npu")
100
101config.switch_use_feed_fetch_ops(False)
102self.predictor = paddle.inference.create_predictor(config)
103self.input_handles = [self.predictor.get_input_handle(name) for name in self.predictor.get_input_names()]
104self.output_handle = self.predictor.get_output_handle(self.predictor.get_output_names()[0])
105
106if benchmark:
107import auto_log
108
109pid = os.getpid()
110self.autolog = auto_log.AutoLogger(
111model_name="ernie-tiny",
112model_precision=precision,
113batch_size=self.batch_size,
114data_shape="dynamic",
115save_path=save_log_path,
116inference_config=config,
117pids=pid,
118process_name=None,
119gpu_ids=0,
120time_keys=["preprocess_time", "inference_time", "postprocess_time"],
121warmup=0,
122logger=logger,
123)
124
125def predict(self, data, tokenizer, label_map):
126"""
127Predicts the data labels.
128
129Args:
130data (obj:`List(str)`): The batch data whose each element is a raw text.
131tokenizer(obj:`PretrainedTokenizer`): This tokenizer inherits from :class:`~paddlenlp.transformers.PretrainedTokenizer`
132which contains most of the methods. Users should refer to the superclass for more information regarding methods.
133label_map(obj:`dict`): The label id (key) to label str (value) map.
134
135Returns:
136results(obj:`dict`): All the predictions labels.
137"""
138if self.benchmark:
139self.autolog.times.start()
140
141examples = []
142for text in data:
143input_ids, segment_ids = convert_example(text, tokenizer, max_seq_length=self.max_seq_length, is_test=True)
144examples.append((input_ids, segment_ids))
145
146batchify_fn = lambda samples, fn=Tuple(
147Pad(axis=0, pad_val=tokenizer.pad_token_id), # input
148Pad(axis=0, pad_val=tokenizer.pad_token_id), # segment
149): fn(samples)
150
151if self.benchmark:
152self.autolog.times.stamp()
153
154input_ids, segment_ids = batchify_fn(examples)
155self.input_handles[0].copy_from_cpu(input_ids)
156self.input_handles[1].copy_from_cpu(segment_ids)
157self.predictor.run()
158probs = self.output_handle.copy_to_cpu()
159if self.benchmark:
160self.autolog.times.stamp()
161
162# probs = softmax(logits, axis=1)
163idx = np.argmax(probs, axis=1)
164idx = idx.tolist()
165labels = [label_map[i] for i in idx]
166
167if args.benchmark:
168self.autolog.times.end(stamp=True)
169
170return labels
171
172
173if __name__ == "__main__":
174parser = argparse.ArgumentParser()
175parser.add_argument("--model_dir", type=str, required=True, help="The directory to static model.")
176parser.add_argument(
177"--max_seq_length",
178default=128,
179type=int,
180help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.",
181)
182parser.add_argument("--batch_size", default=32, type=int, help="Batch size per GPU/CPU for training.")
183parser.add_argument(
184"--device",
185choices=["cpu", "gpu", "xpu", "npu"],
186default="gpu",
187help="Select which device to train model, defaults to gpu.",
188)
189parser.add_argument(
190"--use_tensorrt", default=False, type=eval, choices=[True, False], help="Enable to use tensorrt to speed up."
191)
192parser.add_argument(
193"--precision", default="fp32", type=str, choices=["fp32", "fp16", "int8"], help="The tensorrt precision."
194)
195parser.add_argument("--cpu_threads", default=10, type=int, help="Number of threads to predict when using cpu.")
196parser.add_argument(
197"--enable_mkldnn",
198default=False,
199type=eval,
200choices=[True, False],
201help="Enable to use mkldnn to speed up when using cpu.",
202)
203parser.add_argument(
204"--benchmark", type=eval, default=False, help="To log some information about environment and running."
205)
206parser.add_argument("--save_log_path", type=str, default="./log_output/", help="The file path to save log.")
207parser.add_argument(
208"--max_steps", default=-1, type=int, help="If > 0: set total number of predict steps to perform."
209)
210
211args = parser.parse_args()
212
213# Define predictor to do prediction.
214predictor = Predictor(
215args.model_dir,
216args.device,
217args.max_seq_length,
218args.batch_size,
219args.use_tensorrt,
220args.precision,
221args.cpu_threads,
222args.enable_mkldnn,
223args.benchmark,
224args.save_log_path,
225)
226
227tokenizer = AutoTokenizer.from_pretrained("ernie-3.0-medium-zh")
228
229test_ds = load_dataset("lcqmc", splits=["test"])
230
231data = [{"query": d["query"], "title": d["title"]} for d in test_ds]
232if args.max_steps > 0:
233data = data[: args.max_steps]
234
235batches = [data[idx : idx + args.batch_size] for idx in range(0, len(data), args.batch_size)]
236label_map = {0: "dissimilar", 1: "similar"}
237
238results = []
239for batch_data in batches:
240results.extend(predictor.predict(batch_data, tokenizer, label_map))
241for idx, text in enumerate(data):
242print("Data: {} \t Label: {}".format(text, results[idx]))
243if args.benchmark:
244predictor.autolog.report()
245