financial-assistant
92 строки · 3.9 Кб
1import csv, numpy as np2from sentence_transformers import SentenceTransformer3from ..models import Bank, LoanDetailedDescription, ProductCategories4from .preprocessing import preprocessing5
6model_id = 'intfloat/multilingual-e5-base'7model = SentenceTransformer(model_id)8prepocessed_db_texts = []9prepocessed_db_texts_idx = []10
11def load_db_texts_from_csv(bank_id, product_id, filename='db_texts.csv'):12db_texts = []13db_texts_idx = []14try:15with open(filename, 'r', encoding='utf-8') as csvfile:16reader = csv.DictReader(csvfile)17if bank_id is not None and product_id is not None:18for idx, row in enumerate(reader):19if(int(row['bank_id']) == bank_id and int(row['product_id']) == product_id):20db_texts.append(row['description'])21db_texts_idx.append(idx)22
23elif bank_id is not None and product_id is None:24for idx, row in enumerate(reader):25if(int(row['bank_id']) == bank_id):26db_texts.append(row['description'])27db_texts_idx.append(idx)28
29elif bank_id is None and product_id is not None:30for idx, row in enumerate(reader):31if(int(row['product_id']) == product_id):32db_texts.append(row['description'])33db_texts_idx.append(idx)34else:35for row in reader:36db_texts.append(row['description'])37db_texts_idx.append(idx)38# db_texts.append(f"{row['bank_id']}|{row['product_id']}|{row['category_id']}|{row['preprocessed']}|{row['description']}")39except FileNotFoundError:40pass41return db_texts, db_texts_idx42
43def save_db_texts_to_csv(db_texts , filename='db_texts.csv'):44with open(filename, 'w', newline='', encoding='utf-8') as csvfile:45fieldnames = ['bank_id', 'product_id', 'category_id', 'preprocessed', 'description']46writer = csv.DictWriter(csvfile, fieldnames=fieldnames)47writer.writeheader()48for text in db_texts:49bank_id, product_id, category_id, preprocessed, description = text.split('||')50writer.writerow({'bank_id': bank_id, 'product_id': product_id, 'category_id': category_id, 'preprocessed': preprocessed, 'description': description})51
52def get_db_texts(bank_id, product_id):53global prepocessed_db_texts, prepocessed_db_texts_idx54
55if not prepocessed_db_texts:56prepocessed_db_texts, prepocessed_db_texts_idx = load_db_texts_from_csv(bank_id, product_id)57
58if not prepocessed_db_texts:59loan_descriptions = LoanDetailedDescription.objects.order_by('id').all()60
61for desc in loan_descriptions:62bank = Bank.objects.get(id=desc.bank_id_id).nameRus.lower()63category = ProductCategories.objects.get(id=desc.category_id_id).categoryNameRus.lower()64title = desc.title.lower()65description = desc.description66link = desc.link67preprocessed = preprocessing(f"{description}")68
69prepocessed_db_texts.append(f"{desc.bank_id_id}||{desc.product_id_id}||{desc.category_id_id}||{preprocessed}||{bank}|{category}|{title}|{description}|{link}")70
71save_db_texts_to_csv(prepocessed_db_texts)72prepocessed_db_texts, prepocessed_db_texts_idx = load_db_texts_from_csv(bank_id, product_id)73return prepocessed_db_texts, prepocessed_db_texts_idx74
75def load_dense_vectors(indexies):76try:77vecs_texts = np.load('dense_vectors.npy')78except FileNotFoundError:79preprocessed_texts = []80with open('db_texts.csv', 'r', encoding='utf-8') as csvfile:81reader = csv.DictReader(csvfile)82for row in reader:83preprocessed_texts.append(row['preprocessed'])84
85vecs_texts = model.encode(preprocessed_texts, normalize_embeddings=True)86np.save('dense_vectors.npy', vecs_texts)87
88return vecs_texts[indexies]89
90def load_texts_by_indices(db_texts, indices):91topK_texts = [db_texts[int(idx)] for idx in indices if int(idx) < len(db_texts)]92return topK_texts