2
"""This example shows how to use Neo4j as a vector embedding database with llmware"""
4
""" (A) Python Dependencies -
6
As a first step, you should pip install theh Neo4j driver, which is not included in the llmware package:
11
If you need help installing Neo4j, there are many great online sources and communities, e.g.,:
12
-- Neo4j Installation - https://neo4j.com/docs/operations-manual/current/installation/ (All OS)
13
-- Mac OS - https://neo4j.com/docs/operations-manual/current/installation/osx/
14
-- Linux - https://neo4j.com/docs/operations-manual/current/installation/linux/
15
-- Debian repository - https://debian.neo4j.com/
16
-- Windows - https://neo4j.com/docs/operations-manual/current/installation/windows/
17
-- Docker - https://hub.docker.com/_/neo4j
18
-- please also see the docker-compose-neo4j.yaml script provided in the llmware script repository
22
-- set os.environ variables to 'automatically' pass in installing embedding
23
-- os.environ["NEO4J_URI"] = "neo4j://localhost:7687"
24
-- os.environ["NEO4J_USERNAME"] = "neo4j" # by default
25
-- os.environ["NEO4J_PASSWORD"] = "neo4j" # by default
26
-- os.environ["NEO4J_DATABASE"] = "llmware"
33
from llmware.setup import Setup
34
from llmware.library import Library
35
from llmware.retrieval import Query
37
# example default Neo4j install with database = llmware & user = neo4j
38
os.environ["NEO4J_URI"] = "neo4j://localhost:7687"
39
os.environ["NEO4J_USERNAME"] = "neo4j"
40
os.environ["NEO4J_PASSWORD"] = "neo4j"
41
os.environ["NEO4J_DATABASE"] = "llmware"
44
def build_lib (library_name, folder="Agreements"):
46
# Step 1 - Create library which is the main 'organizing construct' in llmware
47
print ("\nupdate: Step 1 - Creating library: {}".format(library_name))
49
library = Library().create_new_library(library_name)
51
# Step 2 - Pull down the sample files from S3 through the .load_sample_files() command
52
# --note: if you need to refresh the sample files, set 'over_write=True'
53
print ("update: Step 2 - Downloading Sample Files")
55
sample_files_path = Setup().load_sample_files(over_write=False)
57
# Step 3 - point ".add_files" method to the folder of documents that was just created
58
# this method parses the documents, text chunks, and captures in MongoDB
59
print("update: Step 3 - Parsing and Text Indexing Files")
61
# options: Agreements | UN-Resolutions-500
62
library.add_files(input_folder_path=os.path.join(sample_files_path, folder))
69
print("update: Step 1- starting here- building library- parsing PDFs into text chunks")
71
lib = build_lib("neo4j_lib_0")
73
# optional - check the status of the library card and embedding
74
lib_card = lib.get_library_card()
75
print("update: -- before embedding process - check library card - ", lib_card)
77
print("update: Step 2 - starting to install embeddings")
79
# alt embedding models - "mini-lm-sbert" | industry-bert-contracts | text-embedding-ada-002
80
# note: if you want to use text-embedding-ada-002, you will need an OpenAI key and enter into os.environ variable
81
# e.g., os.environ["USER_MANAGED_OPENAI_API_KEY"] = "<insert your key>"
83
# batch sizes from 100-500 usually give good performance and work on most environments
84
lib.install_new_embedding(embedding_model_name="industry-bert-contracts",vector_db="neo4j",batch_size=300)
86
# optional - check the status of the library card and embedding
87
lib_card = lib.get_library_card()
88
print("update: -- after embedding process - check updated library card - ", lib_card)
91
# note: embedding_model_name is optional, but useful if you create multiple embeddings on the same library
92
# --see other example scripts for multiple embeddings
95
query_neo4j = Query(lib, embedding_model_name="industry-bert-contracts")
97
# run multiple queries using query_neo4j
98
my_search_results = query_neo4j.semantic_query("What is the sale bonus?", result_count = 24)
100
for i, qr in enumerate(my_search_results):
101
print("update: semantic query results: ", i, qr)
103
# if you want to delete the embedding - uncomment the line below
104
# lib.delete_installed_embedding("industry-bert-contracts", "neo4j")
106
# optional - check the embeddings on the library
107
emb_record = lib.get_embedding_status()
108
for j, entries in enumerate(emb_record):
109
print("update: embeddings on library: ", j, entries)