financial-assistant
33 строки · 1.0 Кб
1import numpy as np, csv2from sentence_transformers import SentenceTransformer, util3from rank_bm25 import BM25Okapi4
5model_id = 'intfloat/multilingual-e5-base'6model = SentenceTransformer(model_id)7
8def DenseRetriever(query, vectors):9top_k = 2010vec_query = model.encode(query, normalize_embeddings=True)11sim = util.pytorch_cos_sim(vec_query, vectors)[0]12top_indices = np.argsort(-sim)[:top_k].tolist()13
14return top_indices15
16def SparseRetriever(query, filtered_indices):17tokenized_corpus = []18texts = []19
20with open('db_texts.csv', 'r', encoding='utf-8') as csvfile:21reader = csv.DictReader(csvfile)22for i, row in enumerate(reader):23texts.append(f"{i} {row['preprocessed']}")24
25for index in filtered_indices:26tokenized_corpus.append(texts[index].split(' '))27
28bm25 = BM25Okapi(tokenized_corpus)29tokenized_query = query.split(" ")30top_results = bm25.get_top_n(tokenized_query, tokenized_corpus, n=20)31top_indices = [int(text[0]) for text in top_results]32
33return top_indices