rag-chatbot-2
47 строк · 2.0 Кб
1from pathlib import Path
2
3import chromadb
4from bot.memory.embedder import EmbedderHuggingFace
5from bot.memory.vector_memory import VectorMemory
6from helpers.prettier import prettify_source
7from langchain_community.vectorstores.chroma import Chroma
8
9if __name__ == "__main__":
10root_folder = Path(__file__).resolve().parent.parent
11# Contains an extract of documents uploaded to the RAG bot;
12declarative_vector_store_path = root_folder / "vector_store" / "docs_index"
13# Contains an extract of things the user said in the past;
14episodic_vector_store_path = root_folder / "vector_store" / "episodic_index"
15
16embedding = EmbedderHuggingFace().get_embedding()
17index = VectorMemory(vector_store_path=str(declarative_vector_store_path), embedding=embedding)
18
19# query = "<write_your_query_here>"
20query = "tell me a joke about ClearML"
21
22matched_docs, sources = index.similarity_search(query)
23
24for source in sources:
25print(prettify_source(source))
26
27persistent_client = chromadb.PersistentClient(path=str(episodic_vector_store_path))
28collection = persistent_client.get_or_create_collection("episodic_memory")
29collection.add(ids=["1", "2", "3"], documents=["a", "b", "c"])
30langchain_chroma = Chroma(
31client=persistent_client,
32collection_name="episodic_memory",
33embedding_function=embedding,
34)
35docs = langchain_chroma.similarity_search("a")
36docs_with_score = langchain_chroma.similarity_search_with_score("a")
37docs_with_relevance_score = langchain_chroma.similarity_search_with_relevance_scores("a")
38matched_doc = max(docs_with_relevance_score, key=lambda x: x[1])
39
40# The returned distance score is cosine distance. Therefore, a lower score is better.
41results = collection.query(
42query_texts=["a"],
43n_results=2,
44# where={"metadata_field": "is_equal_to_this"}, # optional filter
45# where_document={"$contains":"search_string"} # optional filter
46)
47print(results)
48