19
import onnxruntime as ort
23
from paddlenlp.transformers import (
26
tokenize_special_chars,
28
from paddlenlp.utils.log import logger
31
class InferBackend(object):
32
def __init__(self, model_path_prefix, device="cpu", device_id=0, use_fp16=False, num_threads=10):
34
if not isinstance(device, six.string_types):
36
">>> [InferBackend] The type of device must be string, but the type you set is: ", type(device)
39
if device not in ["cpu", "gpu"]:
40
logger.error(">>> [InferBackend] The device must be cpu or gpu, but your device is set to:", type(device))
43
logger.info(">>> [InferBackend] Creating Engine ...")
45
onnx_model = paddle2onnx.command.c_paddle_to_onnx(
46
model_file=model_path_prefix + ".pdmodel",
47
params_file=model_path_prefix + ".pdiparams",
49
enable_onnx_checker=True,
51
infer_model_dir = model_path_prefix.rsplit("/", 1)[0]
52
float_onnx_file = os.path.join(infer_model_dir, "model.onnx")
53
with open(float_onnx_file, "wb") as f:
57
logger.info(">>> [InferBackend] Use GPU to inference ...")
58
providers = ["CUDAExecutionProvider"]
60
logger.info(">>> [InferBackend] Use FP16 to inference ...")
62
from onnxconverter_common import float16
64
fp16_model_file = os.path.join(infer_model_dir, "fp16_model.onnx")
65
onnx_model = onnx.load_model(float_onnx_file)
66
trans_model = float16.convert_float_to_float16(onnx_model, keep_io_types=True)
67
onnx.save_model(trans_model, fp16_model_file)
68
onnx_model = fp16_model_file
70
logger.info(">>> [InferBackend] Use CPU to inference ...")
71
providers = ["CPUExecutionProvider"]
74
">>> [InferBackend] Ignore use_fp16 as it only " + "takes effect when deploying on gpu..."
77
sess_options = ort.SessionOptions()
78
sess_options.intra_op_num_threads = num_threads
79
self.predictor = ort.InferenceSession(
80
onnx_model, sess_options=sess_options, providers=providers, provider_options=[{"device_id": device_id}]
83
self.input_handles = [
84
self.predictor.get_inputs()[0].name,
85
self.predictor.get_inputs()[1].name,
90
assert "CUDAExecutionProvider" in self.predictor.get_providers()
91
except AssertionError:
93
"""The environment for GPU inference is not set properly. \nA possible cause is that you had installed both onnxruntime and onnxruntime-gpu. \nPlease run the following commands to reinstall: \n1) pip uninstall -y onnxruntime onnxruntime-gpu \n2) pip install onnxruntime-gpu"""
95
logger.info(">>> [InferBackend] Engine Created ...")
97
def infer(self, input_dict: dict):
98
input_dict = {k: v for k, v in input_dict.items() if k in self.input_handles}
99
result = self.predictor.run(None, input_dict)
103
class EHealthPredictor(object):
104
def __init__(self, args, label_list):
105
self.label_list = label_list
106
self._tokenizer = AutoTokenizer.from_pretrained(args.model_name_or_path, use_fast=True)
107
self._max_seq_length = args.max_seq_length
108
self._batch_size = args.batch_size
109
self.inference_backend = InferBackend(
110
args.model_path_prefix, args.device, args.device_id, args.use_fp16, args.num_threads
113
def predict(self, input_data: list):
114
encoded_inputs = self.preprocess(input_data)
115
infer_result = self.infer_batch(encoded_inputs)
116
result = self.postprocess(infer_result)
117
self.printer(result, input_data)
120
def _infer(self, input_dict):
121
infer_data = self.inference_backend.infer(input_dict)
124
def infer_batch(self, encoded_inputs):
125
num_sample = len(encoded_inputs["input_ids"])
127
num_infer_data = None
128
for idx in range(0, num_sample, self._batch_size):
129
l, r = idx, idx + self._batch_size
130
keys = encoded_inputs.keys()
131
input_dict = {k: encoded_inputs[k][l:r] for k in keys}
132
results = self._infer(input_dict)
133
if infer_data is None:
134
infer_data = [[x] for x in results]
135
num_infer_data = len(results)
137
for i in range(num_infer_data):
138
infer_data[i].append(results[i])
139
for i in range(num_infer_data):
140
infer_data[i] = np.concatenate(infer_data[i], axis=0)
143
def performance(self, encoded_inputs):
144
nums = len(encoded_inputs["input_ids"])
145
start_time = time.time()
146
infer_result = self.infer_batch(preprocess_result)
147
total_time = time.time() - start_time
148
logger.info("sample nums: %d, time: %.2f, latency: %.2f ms" % (nums, total_time, 1000 * total_time / nums))
150
def get_text_and_label(self, dataset):
151
raise NotImplementedError
153
def preprocess(self, input_data: list):
154
raise NotImplementedError
156
def postprocess(self, infer_data):
157
raise NotImplementedError
159
def printer(self, result, input_data):
160
raise NotImplementedError
163
class CLSPredictor(EHealthPredictor):
164
def preprocess(self, input_data: list):
165
norm_text = lambda x: tokenize_special_chars(normalize_chars(x))
167
if isinstance(input_data[0], list):
168
text = [norm_text(sample[0]) for sample in input_data]
169
text_pair = [norm_text(sample[1]) for sample in input_data]
171
text = [norm_text(x) for x in input_data]
174
data = self._tokenizer(
175
text=text, text_pair=text_pair, max_length=self._max_seq_length, padding=True, truncation=True
179
"input_ids": np.array(data["input_ids"], dtype="int64"),
180
"token_type_ids": np.array(data["token_type_ids"], dtype="int64"),
182
return encoded_inputs
184
def postprocess(self, infer_data):
185
infer_data = infer_data[0]
186
max_value = np.max(infer_data, axis=1, keepdims=True)
187
exp_data = np.exp(infer_data - max_value)
188
probs = exp_data / np.sum(exp_data, axis=1, keepdims=True)
189
label = probs.argmax(axis=-1)
190
confidence = probs.max(axis=-1)
191
return {"label": label, "confidence": confidence}
193
def printer(self, result, input_data):
194
label, confidence = result["label"], result["confidence"]
195
for i in range(len(label)):
196
logger.info("input data: {}".format(input_data[i]))
197
logger.info("labels: {}, confidence: {}".format(self.label_list[label[i]], confidence[i]))
198
logger.info("-----------------------------")
201
class NERPredictor(EHealthPredictor):
202
"""The predictor for CMeEE dataset."""
216
def _extract_chunk(self, tokens):
218
start_idx, cur_idx = 0, 0
219
while cur_idx < len(tokens):
220
if tokens[cur_idx][0] == "B":
223
while cur_idx < len(tokens) and tokens[cur_idx][0] == "I":
224
if tokens[cur_idx][2:] == tokens[start_idx][2:]:
228
if cur_idx < len(tokens) and tokens[cur_idx][0] == "E":
229
if tokens[cur_idx][2:] == tokens[start_idx][2:]:
230
chunks.add((tokens[cur_idx][2:], start_idx - 1, cur_idx))
232
elif tokens[cur_idx][0] == "S":
233
chunks.add((tokens[cur_idx][2:], cur_idx - 1, cur_idx))
239
def preprocess(self, infer_data):
240
infer_data = [[x.lower() for x in text] for text in infer_data]
241
data = self._tokenizer(
242
infer_data, max_length=self._max_seq_length, padding=True, is_split_into_words=True, truncation=True
246
"input_ids": np.array(data["input_ids"], dtype="int64"),
247
"token_type_ids": np.array(data["token_type_ids"], dtype="int64"),
249
return encoded_inputs
251
def postprocess(self, infer_data):
252
tokens_oth = np.argmax(infer_data[0], axis=-1)
253
tokens_sym = np.argmax(infer_data[1], axis=-1)
255
for oth_ids, sym_ids in zip(tokens_oth, tokens_sym):
256
token_oth = [self.label_list[0][x] for x in oth_ids]
257
token_sym = [self.label_list[1][x] for x in sym_ids]
258
chunks = self._extract_chunk(token_oth) + self._extract_chunk(token_sym)
260
for etype, sid, eid in chunks:
261
sub_entity.append({"type": self.en_to_cn[etype], "start_id": sid, "end_id": eid})
262
entity.append(sub_entity)
263
return {"entity": entity}
265
def printer(self, result, input_data):
266
result = result["entity"]
267
for i, preds in enumerate(result):
268
logger.info("input data: {}".format(input_data[i]))
269
logger.info("detected entities:")
272
"* entity: {}, type: {}, position: ({}, {})".format(
273
input_data[i][item["start_id"] : item["end_id"]],
279
logger.info("-----------------------------")
282
class SPOPredictor(EHealthPredictor):
283
"""The predictor for the CMeIE dataset."""
285
def predict(self, input_data: list):
286
encoded_inputs = self.preprocess(input_data)
287
lengths = encoded_inputs["attention_mask"].sum(axis=-1)
288
infer_result = self.infer_batch(encoded_inputs)
289
result = self.postprocess(infer_result, lengths)
290
self.printer(result, input_data)
293
def preprocess(self, infer_data):
294
infer_data = [[x.lower() for x in text] for text in infer_data]
295
data = self._tokenizer(
297
max_length=self._max_seq_length,
299
is_split_into_words=True,
301
return_attention_mask=True,
304
"input_ids": np.array(data["input_ids"], dtype="int64"),
305
"token_type_ids": np.array(data["token_type_ids"], dtype="int64"),
306
"attention_mask": np.array(data["attention_mask"], dtype="float32"),
308
return encoded_inputs
310
def postprocess(self, infer_data, lengths):
311
ent_logits = np.array(infer_data[0])
312
spo_logits = np.array(infer_data[1])
315
for idx, ent_pred in enumerate(ent_logits):
316
seq_len = lengths[idx] - 2
317
start = np.where(ent_pred[:, 0] > 0.5)[0]
318
end = np.where(ent_pred[:, 1] > 0.5)[0]
323
if (x == 0) or (x > seq_len):
329
ent_idxs[x] = (x - 1, y - 1)
330
ent_pred.append((x - 1, y - 1))
331
ent_pred_list.append(ent_pred)
332
ent_idxs_list.append(ent_idxs)
334
spo_preds = spo_logits > 0
335
spo_pred_list = [[] for _ in range(len(spo_preds))]
336
idxs, preds, subs, objs = np.nonzero(spo_preds)
337
for idx, p_id, s_id, o_id in zip(idxs, preds, subs, objs):
338
obj = ent_idxs_list[idx].get(o_id, None)
341
sub = ent_idxs_list[idx].get(s_id, None)
344
spo_pred_list[idx].append((tuple(sub), p_id, tuple(obj)))
346
return {"entity": ent_pred_list, "spo": spo_pred_list}
348
def printer(self, result, input_data):
349
ent_pred_list, spo_pred_list = result["entity"], result["spo"]
350
for i, (ent, rel) in enumerate(zip(ent_pred_list, spo_pred_list)):
351
logger.info("input data: {}".format(input_data[i]))
352
logger.info("detected entities and relations:")
354
logger.info("* entity: {}, position: ({}, {})".format(input_data[i][sid : eid + 1], sid, eid))
357
"+ spo: ({}, {}, {})".format(
358
input_data[i][s[0] : s[1] + 1], self.label_list[p], input_data[i][o[0] : o[1] + 1]
361
logger.info("-----------------------------")