2
"""This example shows how to use ChromaDB as a vector embedding database with llmware"""
4
""" (A) Python Dependencies -
6
As a first step, you should pip install the ChromaDB, which is not included in the llmware package:
7
1. pip3 install chromadb
11
Installing ChromaDB via pip installs everything you need.
12
However, if you need help, there are many great online sources and communities, e.g.,:
13
-- ChromaDB documentation - https://docs.trychroma.com/
14
-- Docker - https://hub.docker.com/u/chromadb
15
-- please also see the docker-compose-chromadb.yaml script provided in the llmware script repository
19
You can configure ChromaDB with environment variables. Here is the list of variable names we currently
20
support - for more information see ChromaDBConfig.
21
-- CHROMADB_COLLECTION
22
-- CHROMADB_PERSISTENT_PATH
27
-- CHROMADB_SERVER_AUTH_PROVIDER
28
-- CHROMADB_SERVER_AUTH_CREDENTIALS_PROVIDER
29
-- CHROMADB_SERVER_AUTH_CREDENTIALS_PROVIDER
31
-- CHROMADB_SERVER_AUTH_CREDENTIALS_FILE
32
-- CHROMADB_SERVER_AUTH_CREDENTIALS
33
-- CHROMADB_SERVER_AUTH_TOKEN_TRANSPORT_HEADER
39
from llmware.setup import Setup
40
from llmware.library import Library
41
from llmware.retrieval import Query
44
os.environ["CHROMADB_COLLECTION"] = "llmware"
50
def build_lib (library_name, folder="Agreements"):
53
print ("\nupdate: Step 1 - Creating library: {}".format(library_name))
55
library = Library().create_new_library(library_name)
59
print ("update: Step 2 - Downloading Sample Files")
61
sample_files_path = Setup().load_sample_files(over_write=False)
65
print("update: Step 3 - Parsing and Text Indexing Files")
68
library.add_files(input_folder_path=os.path.join(sample_files_path, folder))
75
print("update: Step 1- starting here- building library- parsing PDFs into text chunks")
77
lib = build_lib("chromadb_lib_0")
80
lib_card = lib.get_library_card()
81
print("update: -- before embedding process - check library card - ", lib_card)
83
print("update: Step 2 - starting to install embeddings")
90
lib.install_new_embedding(embedding_model_name="industry-bert-contracts",vector_db="chromadb",batch_size=300)
93
lib_card = lib.get_library_card()
94
print("update: -- after embedding process - check updated library card - ", lib_card)
101
query_chromadb = Query(lib, embedding_model_name="industry-bert-contracts")
104
my_search_results = query_chromadb.semantic_query("What is the sale bonus?", result_count = 24)
106
for i, qr in enumerate(my_search_results):
107
print("update: semantic query results: ", i, qr)
113
emb_record = lib.get_embedding_status()
114
for j, entries in enumerate(emb_record):
115
print("update: embeddings on library: ", j, entries)