llmware

using_redis.py
95 строк · 3.7 Кб
Перенос по словам
1

2
"""This example shows how to use Redis as a vector embedding database with llmware"""
3

4
""" (A) Python Dependencies - 
5

6
    As a first step, you should pip install dependencies not included in the llmware package:
7
        -- pip3 install redis
8
    
9
    (B) Installing Redis - 
10
    
11
    If you need help installing Redis, please see the official redis implementation docs (or many widely available tutorials), e.g.,:
12
        -- https://redis.io/docs/install/install-redis/
13
        -- for a fast development install with docker-compose:
14
             -- please see docker-compose-redis-stack.yaml in the llmware repository
15

16
    (C) Configurations - 
17
        -- set os.environ variables to 'automatically' pass in installing embedding
18
        -- os.environ["USER_MANAGED_REDIS_HOST"] = "localhost"
19
        -- os.environ["USER_MANAGED_REDIS_PORT"] = 6379
20
        
21
"""
22

23

24
import os
25

26
from llmware.setup import Setup
27
from llmware.library import Library
28
from llmware.retrieval import Query
29

30

31
def build_lib (library_name, folder="Agreements"):
32

33
    # Step 1 - Create library which is the main 'organizing construct' in llmware
34
    print ("\nupdate: Step 1 - Creating library: {}".format(library_name))
35

36
    library = Library().create_new_library(library_name)
37

38
    # Step 2 - Pull down the sample files from S3 through the .load_sample_files() command
39
    #   --note: if you need to refresh the sample files, set 'over_write=True'
40
    print ("update: Step 2 - Downloading Sample Files")
41

42
    sample_files_path = Setup().load_sample_files(over_write=False)
43

44
    # Step 3 - point ".add_files" method to the folder of documents that was just created
45
    #   this method parses the documents, text chunks, and captures in MongoDB
46
    print("update: Step 3 - Parsing and Text Indexing Files")
47

48
    #   options:   Agreements | UN-Resolutions-500
49
    library.add_files(input_folder_path=os.path.join(sample_files_path, folder))
50

51
    return library
52

53

54
# start script
55

56
print("update: Step 1- starting here- building library- parsing PDFs into text chunks")
57

58
lib = build_lib("redis_lib_1114_0")
59

60
# optional - check the status of the library card and embedding
61
lib_card = lib.get_library_card()
62
print("update: -- before embedding process - check library card - ", lib_card)
63

64
print("update: Step 2 - starting to install embeddings")
65

66
#   alt embedding models - "mini-lm-sbert" | industry-bert-contracts |  text-embedding-ada-002
67
#   note: if you want to use text-embedding-ada-002, you will need an OpenAI key and enter into os.environ variable
68
#   e.g., os.environ["USER_MANAGED_OPENAI_API_KEY"] = "<insert your key>"
69

70
#   batch sizes from 100-500 usually give good performance and work on most environments
71
lib.install_new_embedding(embedding_model_name="industry-bert-contracts",vector_db="redis",batch_size=300)
72

73
#   optional - check the status of the library card and embedding
74
lib_card = lib.get_library_card()
75
print("update: -- after embedding process - check updated library card - ", lib_card)
76

77
#   run a query
78
#   note: embedding_model_name is optional, but useful if you create multiple embeddings on the same library
79
#   --see other example scripts for multiple embeddings
80

81
#   create query object
82
query_pgv = Query(lib, embedding_model_name="industry-bert-contracts")
83

84
#   run multiple queries using query_pgv
85
my_search_results = query_pgv.semantic_query("What is the sale bonus?", result_count = 24)
86

87
for i, qr in enumerate(my_search_results):
88
    print("update: semantic query results: ", i, qr)
89

90
# if you want to delete the embedding  - uncomment the line below
91
# lib.delete_installed_embedding("industry-bert-contracts", "redis")
92

93
#   optional - check the embeddings on the library
94
emb_record = lib.get_embedding_status()
95
for j, entries in enumerate(emb_record):
96
    print("update: embeddings on library: ", j, entries)
97

98
llmware

Использование cookies