llmware

Форк
0
/
using_neo4j.py 
108 строк · 4.5 Кб
1

2
"""This example shows how to use Neo4j as a vector embedding database with llmware"""
3

4
""" (A) Python Dependencies - 
5

6
    As a first step, you should pip install theh Neo4j driver, which is not included in the llmware package:
7
    1.  pip3 install neo4j 
8
    
9
    (B) Installing Neo4j - 
10
    
11
    If you need help installing Neo4j, there are many great online sources and communities, e.g.,:
12
        -- Neo4j Installation -  https://neo4j.com/docs/operations-manual/current/installation/  (All OS)
13
        -- Mac OS - https://neo4j.com/docs/operations-manual/current/installation/osx/
14
        -- Linux - https://neo4j.com/docs/operations-manual/current/installation/linux/
15
            -- Debian repository - https://debian.neo4j.com/
16
        -- Windows - https://neo4j.com/docs/operations-manual/current/installation/windows/
17
        -- Docker - https://hub.docker.com/_/neo4j
18
        -- please also see the docker-compose-neo4j.yaml script provided in the llmware script repository
19
        
20
    (C) Configurations - 
21
    
22
        -- set os.environ variables to 'automatically' pass in installing embedding
23
        -- os.environ["NEO4J_URI"] = "neo4j://localhost:7687"
24
        -- os.environ["NEO4J_USERNAME"] = "neo4j" # by default
25
        -- os.environ["NEO4J_PASSWORD"] = "neo4j" # by default
26
        -- os.environ["NEO4J_DATABASE"] = "llmware" 
27
        
28
"""
29

30

31
import os
32

33
from llmware.setup import Setup
34
from llmware.library import Library
35
from llmware.retrieval import Query
36

37
#  example default Neo4j install with database = llmware & user = neo4j
38
os.environ["NEO4J_URI"] = "neo4j://localhost:7687"
39
os.environ["NEO4J_USERNAME"] = "neo4j"
40
os.environ["NEO4J_PASSWORD"] = "neo4j"
41
os.environ["NEO4J_DATABASE"] = "llmware"
42

43

44
def build_lib (library_name, folder="Agreements"):
45

46
    # Step 1 - Create library which is the main 'organizing construct' in llmware
47
    print ("\nupdate: Step 1 - Creating library: {}".format(library_name))
48

49
    library = Library().create_new_library(library_name)
50

51
    # Step 2 - Pull down the sample files from S3 through the .load_sample_files() command
52
    #   --note: if you need to refresh the sample files, set 'over_write=True'
53
    print ("update: Step 2 - Downloading Sample Files")
54

55
    sample_files_path = Setup().load_sample_files(over_write=False)
56

57
    # Step 3 - point ".add_files" method to the folder of documents that was just created
58
    #   this method parses the documents, text chunks, and captures in MongoDB
59
    print("update: Step 3 - Parsing and Text Indexing Files")
60

61
    #   options:   Agreements | UN-Resolutions-500
62
    library.add_files(input_folder_path=os.path.join(sample_files_path, folder))
63

64
    return library
65

66

67
# start script
68

69
print("update: Step 1- starting here- building library- parsing PDFs into text chunks")
70

71
lib = build_lib("neo4j_lib_0")
72

73
# optional - check the status of the library card and embedding
74
lib_card = lib.get_library_card()
75
print("update: -- before embedding process - check library card - ", lib_card)
76

77
print("update: Step 2 - starting to install embeddings")
78

79
#   alt embedding models - "mini-lm-sbert" | industry-bert-contracts |  text-embedding-ada-002
80
#   note: if you want to use text-embedding-ada-002, you will need an OpenAI key and enter into os.environ variable
81
#   e.g., os.environ["USER_MANAGED_OPENAI_API_KEY"] = "<insert your key>"
82

83
#   batch sizes from 100-500 usually give good performance and work on most environments
84
lib.install_new_embedding(embedding_model_name="industry-bert-contracts",vector_db="neo4j",batch_size=300)
85

86
#   optional - check the status of the library card and embedding
87
lib_card = lib.get_library_card()
88
print("update: -- after embedding process - check updated library card - ", lib_card)
89

90
#   run a query
91
#   note: embedding_model_name is optional, but useful if you create multiple embeddings on the same library
92
#   --see other example scripts for multiple embeddings
93

94
#   create query object
95
query_neo4j = Query(lib, embedding_model_name="industry-bert-contracts")
96

97
#   run multiple queries using query_neo4j
98
my_search_results = query_neo4j.semantic_query("What is the sale bonus?", result_count = 24)
99

100
for i, qr in enumerate(my_search_results):
101
    print("update: semantic query results: ", i, qr)
102

103
# if you want to delete the embedding  - uncomment the line below
104
# lib.delete_installed_embedding("industry-bert-contracts", "neo4j")
105

106
#   optional - check the embeddings on the library
107
emb_record = lib.get_embedding_status()
108
for j, entries in enumerate(emb_record):
109
    print("update: embeddings on library: ", j, entries)
110

111

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.