fastembed
29 строк · 1.0 Кб
1import numpy as np2import onnx3import onnxruntime4from transformers import AutoTokenizer5
6model_id = "sentence-transformers/paraphrase-MiniLM-L6-v2"7output_dir = f"models/{model_id.replace('/', '_')}"8model_kwargs = {"output_attentions": True, "return_dict": True}9tokenizer = AutoTokenizer.from_pretrained(model_id)10
11model_path = f"{output_dir}/model.onnx"12onnx_model = onnx.load(model_path)13ort_session = onnxruntime.InferenceSession(model_path)14text = "This is a test sentence"15tokenizer_output = tokenizer(text, return_tensors="np")16input_ids = tokenizer_output["input_ids"]17attention_mask = tokenizer_output["attention_mask"]18print(attention_mask)19# Prepare the input
20input_ids = np.array(input_ids).astype(np.int64) # Replace your_input_ids with actual input data21
22# Run the ONNX model
23outputs = ort_session.run(None, {"input_ids": input_ids, "attention_mask": attention_mask})24
25# Get the attention weights
26attentions = outputs[-1]27
28# Print the attention weights for the first layer and first head
29print(attentions[0][0])30