2
"""This example shows how to use sentence transformers as a vector embedding model with llmware"""
4
"""Note: this example illustrates capability from llmware==0.1.13 - please update pip install, or pull from repo"""
9
from llmware.setup import Setup
10
from llmware.library import Library
11
from llmware.retrieval import Query
12
from llmware.models import ModelCatalog
15
def build_lib (library_name, folder="Agreements"):
18
print ("\nupdate: Step 1 - Creating library: {}".format(library_name))
20
library = Library().create_new_library(library_name)
24
print ("update: Step 2 - Downloading Sample Files")
26
sample_files_path = Setup().load_sample_files(over_write=False)
30
print("update: Step 3 - Parsing and Text Indexing Files")
33
library.add_files(input_folder_path=os.path.join(sample_files_path, folder))
40
print("update: Step 1- starting here- building library- parsing PDFs into text chunks")
42
lib = build_lib("st_embedding_0_454")
57
sentence_transformer_pretrained_model_name = "all-MiniLM-L6-v2"
61
ModelCatalog().register_sentence_transformer_model(model_name=sentence_transformer_pretrained_model_name,
62
embedding_dims=embedding_dims, context_window=context_window)
65
ModelCatalog().add_model_list({"model_name": sentence_transformer_pretrained_model_name,
66
"embedding_dims":embedding_dims,
67
"context_window":context_window,
68
"model_category": "embedding",
69
"model_family": "LLMWareSemanticModel",
70
"display_name": "MySentenceTransformer", "model_location": "st_repo"})
74
mc = ModelCatalog().list_all_models()
75
model_card = ModelCatalog().lookup_model_card(sentence_transformer_pretrained_model_name)
76
print("update: model card - ", model_card)
79
lib.install_new_embedding(embedding_model_name=sentence_transformer_pretrained_model_name,
80
vector_db="milvus",batch_size=300)
83
lib_card = lib.get_library_card()
84
print("update: -- after embedding process - check updated library card - ", lib_card)
87
query_st = Query(lib, embedding_model_name=sentence_transformer_pretrained_model_name)
90
my_search_results = query_st.semantic_query("What is the sale bonus?", result_count = 24)
92
for i, qr in enumerate(my_search_results):
93
print("update: semantic query results: ", i, qr)
99
emb_record = lib.get_embedding_status()
100
for j, entries in enumerate(emb_record):
101
print("update: embeddings on library: ", j, entries)