financial-assistant
63 строки · 2.3 Кб
1from django.http import JsonResponse2from langchain.retrievers import EnsembleRetriever3from langchain_community.retrievers import BM25Retriever4from langchain_community.vectorstores import FAISS5from langchain_community.embeddings import HuggingFaceEmbeddings6import os7
8from sentence_transformers import SentenceTransformer9from ..assistant import preprocessing, namedEntities, dataLoader10
11model_id = 'intfloat/multilingual-e5-base'12model = SentenceTransformer(model_id)13
14def get_preprocessed_texts(query):15preprocessed_query = preprocessing.preprocessing(query)16lemmas = preprocessed_query.split(' ')17bank_id, product_id = namedEntities.determine_bank_product(lemmas)18prepocessed_db_texts, indicies = dataLoader.get_db_texts(bank_id, product_id)19
20return prepocessed_db_texts21
22
23def get_faiss_vectorstore(prepocessed_db_texts):24embedding = HuggingFaceEmbeddings(model_name=model_id)25if os.path.exists('faiss_index'):26faiss_vectorstore = FAISS.load_local('faiss_index', embeddings=embedding, allow_dangerous_deserialization=True)27else:28faiss_vectorstore = FAISS.from_texts(prepocessed_db_texts, embedding)29faiss_vectorstore.save_local('faiss_index')30return faiss_vectorstore31
32
33def init_retrievers(prepocessed_db_texts, faiss_vectorstore):34bm25_retriever = BM25Retriever.from_texts(prepocessed_db_texts)35bm25_retriever.k = 536faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={"k": 5})37
38return bm25_retriever, faiss_retriever39
40
41def main(request):42query = request.GET.get('q')43
44prepocessed_db_texts = get_preprocessed_texts(query)45faiss_vectorstore = get_faiss_vectorstore(prepocessed_db_texts)46
47bm25_retriever, faiss_retriever = init_retrievers(prepocessed_db_texts, faiss_vectorstore)48ensemble_retriever = EnsembleRetriever(49retrievers=[bm25_retriever, faiss_retriever], weights=[0.4, 0.6]50)51
52results = ensemble_retriever.get_relevant_documents(query, return_source_documents=True)53
54for result in results:55bank, category, title, description, link = result.to_json().get('kwargs').get('page_content').split('|')56response_data = {57"bank": bank,58"category": category,59"title": title,60"answer": description,61"link": link62}63return JsonResponse(response_data)