paddlenlp
277 строк · 10.4 Кб
1# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import argparse16import os17
18import numpy as np19import paddle20from paddle import inference21from scipy.special import softmax22
23from paddlenlp.data import Pad, Tuple24from paddlenlp.datasets import load_dataset25from paddlenlp.transformers import AutoTokenizer26from paddlenlp.utils.log import logger27
28
29def convert_example(example, tokenizer, label_list, max_seq_length=512, is_test=False):30"""31Builds model inputs from a sequence or a pair of sequence for sequence classification tasks
32by concatenating and adding special tokens. And creates a mask from the two sequences passed
33to be used in a sequence-pair classification task.
34
35A BERT sequence has the following format:
36
37- single sequence: ``[CLS] X [SEP]``
38- pair of sequences: ``[CLS] A [SEP] B [SEP]``
39
40A BERT sequence pair mask has the following format:
41::
420 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
43| first sequence | second sequence |
44
45If only one sequence, only returns the first portion of the mask (0's).
46
47
48Args:
49example(obj:`list[str]`): List of input data, containing text and label if it have label.
50tokenizer(obj:`PretrainedTokenizer`): This tokenizer inherits from :class:`~paddlenlp.transformers.PretrainedTokenizer`
51which contains most of the methods. Users should refer to the superclass for more information regarding methods.
52label_list(obj:`list[str]`): All the labels that the data has.
53max_seq_len(obj:`int`): The maximum total input sequence length after tokenization.
54Sequences longer than this will be truncated, sequences shorter will be padded.
55is_test(obj:`False`, defaults to `False`): Whether the example contains label or not.
56
57Returns:
58input_ids(obj:`list[int]`): The list of token ids.
59segment_ids(obj: `list[int]`): List of sequence pair mask.
60label(obj:`numpy.array`, data type of int64, optional): The input label if not is_test.
61"""
62text = example63encoded_inputs = tokenizer(text=text, max_seq_len=max_seq_length)64input_ids = encoded_inputs["input_ids"]65segment_ids = encoded_inputs["token_type_ids"]66
67if not is_test:68# create label maps69label_map = {}70for (i, l) in enumerate(label_list):71label_map[l] = i72
73label = label_map[example["labels"]]74label = np.array([label], dtype="int64")75return input_ids, segment_ids, label76else:77return input_ids, segment_ids78
79
80class Predictor(object):81def __init__(82self,83model_dir,84device="gpu",85max_seq_length=128,86batch_size=32,87use_tensorrt=False,88precision="fp32",89cpu_threads=10,90enable_mkldnn=False,91benchmark=False,92save_log_path="./log_output/",93):94self.max_seq_length = max_seq_length95self.batch_size = batch_size96self.benchmark = benchmark97
98model_file = os.path.join(model_dir, "inference.pdmodel")99params_file = os.path.join(model_dir, "inference.pdiparams")100if not os.path.exists(model_file):101raise ValueError("not find model file path {}".format(model_file))102if not os.path.exists(params_file):103raise ValueError("not find params file path {}".format(params_file))104config = paddle.inference.Config(model_file, params_file)105
106if device == "gpu":107# set GPU configs accordingly108# such as initialize the gpu memory, enable tensorrt109config.enable_use_gpu(100, 0)110precision_map = {111"fp16": inference.PrecisionType.Half,112"fp32": inference.PrecisionType.Float32,113"int8": inference.PrecisionType.Int8,114}115precision_mode = precision_map[precision]116
117if use_tensorrt:118config.enable_tensorrt_engine(119max_batch_size=batch_size, min_subgraph_size=30, precision_mode=precision_mode120)121elif device == "cpu":122# set CPU configs accordingly,123# such as enable_mkldnn, set_cpu_math_library_num_threads124config.disable_gpu()125if enable_mkldnn:126# cache 10 different shapes for mkldnn to avoid memory leak127config.set_mkldnn_cache_capacity(10)128config.enable_mkldnn()129config.set_cpu_math_library_num_threads(cpu_threads)130elif device == "xpu":131# set XPU configs accordingly132config.enable_xpu(100)133
134config.switch_use_feed_fetch_ops(False)135self.predictor = paddle.inference.create_predictor(config)136self.input_handles = [self.predictor.get_input_handle(name) for name in self.predictor.get_input_names()]137self.output_handle = self.predictor.get_output_handle(self.predictor.get_output_names()[0])138
139if benchmark:140import auto_log141
142pid = os.getpid()143self.autolog = auto_log.AutoLogger(144model_name="bert-base",145model_precision=precision,146batch_size=self.batch_size,147data_shape="dynamic",148save_path=save_log_path,149inference_config=config,150pids=pid,151process_name=None,152gpu_ids=0,153time_keys=["preprocess_time", "inference_time", "postprocess_time"],154warmup=0,155logger=logger,156)157
158def predict(self, data, tokenizer, label_map):159"""160Predicts the data labels.
161
162Args:
163data (obj:`List(str)`): The batch data whose each element is a raw text.
164tokenizer(obj:`PretrainedTokenizer`): This tokenizer inherits from :class:`~paddlenlp.transformers.PretrainedTokenizer`
165which contains most of the methods. Users should refer to the superclass for more information regarding methods.
166label_map(obj:`dict`): The label id (key) to label str (value) map.
167
168Returns:
169results(obj:`dict`): All the predictions labels.
170"""
171if self.benchmark:172self.autolog.times.start()173
174examples = []175for text in data:176input_ids, segment_ids = convert_example(177text, tokenizer, label_list=label_map.values(), max_seq_length=self.max_seq_length, is_test=True178)179examples.append((input_ids, segment_ids))180
181batchify_fn = lambda samples, fn=Tuple(182Pad(axis=0, pad_val=tokenizer.pad_token_id), # input183Pad(axis=0, pad_val=tokenizer.pad_token_id), # segment184): fn(samples)185
186if self.benchmark:187self.autolog.times.stamp()188
189input_ids, segment_ids = batchify_fn(examples)190self.input_handles[0].copy_from_cpu(input_ids)191self.input_handles[1].copy_from_cpu(segment_ids)192self.predictor.run()193logits = self.output_handle.copy_to_cpu()194if self.benchmark:195self.autolog.times.stamp()196
197probs = softmax(logits, axis=1)198idx = np.argmax(probs, axis=1)199idx = idx.tolist()200labels = [label_map[i] for i in idx]201
202if self.benchmark:203self.autolog.times.end(stamp=True)204
205return labels206
207
208if __name__ == "__main__":209parser = argparse.ArgumentParser()210parser.add_argument("--model_dir", type=str, required=True, help="The directory to static model.")211parser.add_argument(212"--max_seq_length",213default=128,214type=int,215help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.",216)217parser.add_argument("--batch_size", default=2, type=int, help="Batch size per GPU/CPU for training.")218parser.add_argument(219"--device",220choices=["cpu", "gpu", "xpu", "npu"],221default="gpu",222help="Select which device to train model, defaults to gpu.",223)224parser.add_argument(225"--use_tensorrt", default=False, type=eval, choices=[True, False], help="Enable to use tensorrt to speed up."226)227parser.add_argument(228"--precision", default="fp32", type=str, choices=["fp32", "fp16", "int8"], help="The tensorrt precision."229)230parser.add_argument("--cpu_threads", default=10, type=int, help="Number of threads to predict when using cpu.")231parser.add_argument(232"--enable_mkldnn",233default=False,234type=eval,235choices=[True, False],236help="Enable to use mkldnn to speed up when using cpu.",237)238parser.add_argument(239"--benchmark", type=eval, default=False, help="To log some information about environment and running."240)241parser.add_argument("--save_log_path", type=str, default="./log_output/", help="The file path to save log.")242parser.add_argument(243"--max_steps", default=-1, type=int, help="If > 0: set total number of predict steps to perform."244)245args = parser.parse_args()246
247# Define predictor to do prediction.248predictor = Predictor(249args.model_dir,250args.device,251args.max_seq_length,252args.batch_size,253args.use_tensorrt,254args.precision,255args.cpu_threads,256args.enable_mkldnn,257args.benchmark,258args.save_log_path,259)260
261tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")262test_ds = load_dataset("glue", "sst-2", splits=["test"])263
264data = [d["sentence"] for d in test_ds]265if args.max_steps > 0:266data = data[: args.max_steps]267
268batches = [data[idx : idx + args.batch_size] for idx in range(0, len(data), args.batch_size)]269label_map = {0: "negative", 1: "positive"}270
271results = []272for batch_data in batches:273results.extend(predictor.predict(batch_data, tokenizer, label_map))274for idx, text in enumerate(data):275print("Data: {} \t Label: {}".format(text, results[idx]))276if args.benchmark:277predictor.autolog.report()278