3
# This example shows the general recipe for creating an embedding. This scenario uses FAISS for local
9
from llmware.library import Library
10
from llmware.retrieval import Query
11
from llmware.setup import Setup
14
def embeddings_fast_start (library_name, vector_db="faiss"):
16
# Create and populate a library
17
print (f"\nstep 1 - creating and populating library: {library_name}...")
18
library = Library().create_new_library(library_name)
19
sample_files_path = Setup().load_sample_files()
20
library.add_files(input_folder_path=os.path.join(sample_files_path, "AgreementsLarge"))
22
# To create vector embeddings you just need to specify the embedding model and the vector embedding DB
23
# For examples of using HuggingFace and SentenceTransformer models, see those examples in this same folder
25
embedding_model = "mini-lm-sbert"
27
print (f"\n > Generating embedding vectors and storing in '{vector_db}'...")
28
library.install_new_embedding(embedding_model_name=embedding_model, vector_db=vector_db)
30
# Then when doing semantic queries, the most recent vector DB used for embeddings will be used.
32
# We just find the best 3 hits for "Salary"
34
print (f"\n > Running a query for 'Salary'...")
35
query_results = q.semantic_query(query="Salary", result_count=10, results_only=True)
37
for i, entries in enumerate(query_results):
39
# each query result is a dictionary with many useful keys
41
text = entries["text"]
42
document_source = entries["file_source"]
43
page_num = entries["page_num"]
44
vector_distance = entries["distance"]
46
# for display purposes only, we will only show the first 100 characters of the text
47
if len(text) > 125: text = text[0:125] + " ... "
49
print("\nupdate: query results - {} - document - {} - page num - {} - distance - {} "
50
.format( i, document_source, page_num, vector_distance))
52
print("update: text sample - ", text)
57
if __name__ == "__main__":
59
# set to 'faiss' by default -> switch to 'milvus' once installed and running
61
embeddings_fast_start("embedding_test_1", vector_db=db)