paddlenlp

predict.py
277 строк · 10.4 Кб
Перенос по словам
1
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6
#
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14

15
import argparse
16
import os
17

18
import numpy as np
19
import paddle
20
from paddle import inference
21
from scipy.special import softmax
22

23
from paddlenlp.data import Pad, Tuple
24
from paddlenlp.datasets import load_dataset
25
from paddlenlp.transformers import AutoTokenizer
26
from paddlenlp.utils.log import logger
27

28

29
def convert_example(example, tokenizer, label_list, max_seq_length=512, is_test=False):
30
    """
31
    Builds model inputs from a sequence or a pair of sequence for sequence classification tasks
32
    by concatenating and adding special tokens. And creates a mask from the two sequences passed
33
    to be used in a sequence-pair classification task.
34

35
    A BERT sequence has the following format:
36

37
    - single sequence: ``[CLS] X [SEP]``
38
    - pair of sequences: ``[CLS] A [SEP] B [SEP]``
39

40
    A BERT sequence pair mask has the following format:
41
    ::
42
        0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
43
        | first sequence    | second sequence |
44

45
    If only one sequence, only returns the first portion of the mask (0's).
46

47

48
    Args:
49
        example(obj:`list[str]`): List of input data, containing text and label if it have label.
50
        tokenizer(obj:`PretrainedTokenizer`): This tokenizer inherits from :class:`~paddlenlp.transformers.PretrainedTokenizer`
51
            which contains most of the methods. Users should refer to the superclass for more information regarding methods.
52
        label_list(obj:`list[str]`): All the labels that the data has.
53
        max_seq_len(obj:`int`): The maximum total input sequence length after tokenization.
54
            Sequences longer than this will be truncated, sequences shorter will be padded.
55
        is_test(obj:`False`, defaults to `False`): Whether the example contains label or not.
56

57
    Returns:
58
        input_ids(obj:`list[int]`): The list of token ids.
59
        segment_ids(obj: `list[int]`): List of sequence pair mask.
60
        label(obj:`numpy.array`, data type of int64, optional): The input label if not is_test.
61
    """
62
    text = example
63
    encoded_inputs = tokenizer(text=text, max_seq_len=max_seq_length)
64
    input_ids = encoded_inputs["input_ids"]
65
    segment_ids = encoded_inputs["token_type_ids"]
66

67
    if not is_test:
68
        # create label maps
69
        label_map = {}
70
        for (i, l) in enumerate(label_list):
71
            label_map[l] = i
72

73
        label = label_map[example["labels"]]
74
        label = np.array([label], dtype="int64")
75
        return input_ids, segment_ids, label
76
    else:
77
        return input_ids, segment_ids
78

79

80
class Predictor(object):
81
    def __init__(
82
        self,
83
        model_dir,
84
        device="gpu",
85
        max_seq_length=128,
86
        batch_size=32,
87
        use_tensorrt=False,
88
        precision="fp32",
89
        cpu_threads=10,
90
        enable_mkldnn=False,
91
        benchmark=False,
92
        save_log_path="./log_output/",
93
    ):
94
        self.max_seq_length = max_seq_length
95
        self.batch_size = batch_size
96
        self.benchmark = benchmark
97

98
        model_file = os.path.join(model_dir, "inference.pdmodel")
99
        params_file = os.path.join(model_dir, "inference.pdiparams")
100
        if not os.path.exists(model_file):
101
            raise ValueError("not find model file path {}".format(model_file))
102
        if not os.path.exists(params_file):
103
            raise ValueError("not find params file path {}".format(params_file))
104
        config = paddle.inference.Config(model_file, params_file)
105

106
        if device == "gpu":
107
            # set GPU configs accordingly
108
            # such as initialize the gpu memory, enable tensorrt
109
            config.enable_use_gpu(100, 0)
110
            precision_map = {
111
                "fp16": inference.PrecisionType.Half,
112
                "fp32": inference.PrecisionType.Float32,
113
                "int8": inference.PrecisionType.Int8,
114
            }
115
            precision_mode = precision_map[precision]
116

117
            if use_tensorrt:
118
                config.enable_tensorrt_engine(
119
                    max_batch_size=batch_size, min_subgraph_size=30, precision_mode=precision_mode
120
                )
121
        elif device == "cpu":
122
            # set CPU configs accordingly,
123
            # such as enable_mkldnn, set_cpu_math_library_num_threads
124
            config.disable_gpu()
125
            if enable_mkldnn:
126
                # cache 10 different shapes for mkldnn to avoid memory leak
127
                config.set_mkldnn_cache_capacity(10)
128
                config.enable_mkldnn()
129
            config.set_cpu_math_library_num_threads(cpu_threads)
130
        elif device == "xpu":
131
            # set XPU configs accordingly
132
            config.enable_xpu(100)
133

134
        config.switch_use_feed_fetch_ops(False)
135
        self.predictor = paddle.inference.create_predictor(config)
136
        self.input_handles = [self.predictor.get_input_handle(name) for name in self.predictor.get_input_names()]
137
        self.output_handle = self.predictor.get_output_handle(self.predictor.get_output_names()[0])
138

139
        if benchmark:
140
            import auto_log
141

142
            pid = os.getpid()
143
            self.autolog = auto_log.AutoLogger(
144
                model_name="bert-base",
145
                model_precision=precision,
146
                batch_size=self.batch_size,
147
                data_shape="dynamic",
148
                save_path=save_log_path,
149
                inference_config=config,
150
                pids=pid,
151
                process_name=None,
152
                gpu_ids=0,
153
                time_keys=["preprocess_time", "inference_time", "postprocess_time"],
154
                warmup=0,
155
                logger=logger,
156
            )
157

158
    def predict(self, data, tokenizer, label_map):
159
        """
160
        Predicts the data labels.
161

162
        Args:
163
            data (obj:`List(str)`): The batch data whose each element is a raw text.
164
            tokenizer(obj:`PretrainedTokenizer`): This tokenizer inherits from :class:`~paddlenlp.transformers.PretrainedTokenizer`
165
                which contains most of the methods. Users should refer to the superclass for more information regarding methods.
166
            label_map(obj:`dict`): The label id (key) to label str (value) map.
167

168
        Returns:
169
            results(obj:`dict`): All the predictions labels.
170
        """
171
        if self.benchmark:
172
            self.autolog.times.start()
173

174
        examples = []
175
        for text in data:
176
            input_ids, segment_ids = convert_example(
177
                text, tokenizer, label_list=label_map.values(), max_seq_length=self.max_seq_length, is_test=True
178
            )
179
            examples.append((input_ids, segment_ids))
180

181
        batchify_fn = lambda samples, fn=Tuple(
182
            Pad(axis=0, pad_val=tokenizer.pad_token_id),  # input
183
            Pad(axis=0, pad_val=tokenizer.pad_token_id),  # segment
184
        ): fn(samples)
185

186
        if self.benchmark:
187
            self.autolog.times.stamp()
188

189
        input_ids, segment_ids = batchify_fn(examples)
190
        self.input_handles[0].copy_from_cpu(input_ids)
191
        self.input_handles[1].copy_from_cpu(segment_ids)
192
        self.predictor.run()
193
        logits = self.output_handle.copy_to_cpu()
194
        if self.benchmark:
195
            self.autolog.times.stamp()
196

197
        probs = softmax(logits, axis=1)
198
        idx = np.argmax(probs, axis=1)
199
        idx = idx.tolist()
200
        labels = [label_map[i] for i in idx]
201

202
        if self.benchmark:
203
            self.autolog.times.end(stamp=True)
204

205
        return labels
206

207

208
if __name__ == "__main__":
209
    parser = argparse.ArgumentParser()
210
    parser.add_argument("--model_dir", type=str, required=True, help="The directory to static model.")
211
    parser.add_argument(
212
        "--max_seq_length",
213
        default=128,
214
        type=int,
215
        help="The maximum total input sequence length after tokenization. Sequences longer than this will be truncated, sequences shorter will be padded.",
216
    )
217
    parser.add_argument("--batch_size", default=2, type=int, help="Batch size per GPU/CPU for training.")
218
    parser.add_argument(
219
        "--device",
220
        choices=["cpu", "gpu", "xpu", "npu"],
221
        default="gpu",
222
        help="Select which device to train model, defaults to gpu.",
223
    )
224
    parser.add_argument(
225
        "--use_tensorrt", default=False, type=eval, choices=[True, False], help="Enable to use tensorrt to speed up."
226
    )
227
    parser.add_argument(
228
        "--precision", default="fp32", type=str, choices=["fp32", "fp16", "int8"], help="The tensorrt precision."
229
    )
230
    parser.add_argument("--cpu_threads", default=10, type=int, help="Number of threads to predict when using cpu.")
231
    parser.add_argument(
232
        "--enable_mkldnn",
233
        default=False,
234
        type=eval,
235
        choices=[True, False],
236
        help="Enable to use mkldnn to speed up when using cpu.",
237
    )
238
    parser.add_argument(
239
        "--benchmark", type=eval, default=False, help="To log some information about environment and running."
240
    )
241
    parser.add_argument("--save_log_path", type=str, default="./log_output/", help="The file path to save log.")
242
    parser.add_argument(
243
        "--max_steps", default=-1, type=int, help="If > 0: set total number of predict steps to perform."
244
    )
245
    args = parser.parse_args()
246

247
    # Define predictor to do prediction.
248
    predictor = Predictor(
249
        args.model_dir,
250
        args.device,
251
        args.max_seq_length,
252
        args.batch_size,
253
        args.use_tensorrt,
254
        args.precision,
255
        args.cpu_threads,
256
        args.enable_mkldnn,
257
        args.benchmark,
258
        args.save_log_path,
259
    )
260

261
    tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
262
    test_ds = load_dataset("glue", "sst-2", splits=["test"])
263

264
    data = [d["sentence"] for d in test_ds]
265
    if args.max_steps > 0:
266
        data = data[: args.max_steps]
267

268
    batches = [data[idx : idx + args.batch_size] for idx in range(0, len(data), args.batch_size)]
269
    label_map = {0: "negative", 1: "positive"}
270

271
    results = []
272
    for batch_data in batches:
273
        results.extend(predictor.predict(batch_data, tokenizer, label_map))
274
    for idx, text in enumerate(data):
275
        print("Data: {} \t Label: {}".format(text, results[idx]))
276
    if args.benchmark:
277
        predictor.autolog.report()
278
paddlenlp

Использование cookies