llama-index

Форк
0
204 строки · 5.9 Кб
1
"""AwaDB vector store index.
2

3
An index that is built on top of an existing vector store.
4

5
"""
6

7
import logging
8
import uuid
9
from typing import Any, List, Optional, Set
10

11
from llama_index.legacy.schema import BaseNode, MetadataMode, TextNode
12
from llama_index.legacy.vector_stores.types import (
13
    VectorStore,
14
    VectorStoreQuery,
15
    VectorStoreQueryResult,
16
)
17
from llama_index.legacy.vector_stores.utils import (
18
    legacy_metadata_dict_to_node,
19
    metadata_dict_to_node,
20
    node_to_metadata_dict,
21
)
22

23
logger = logging.getLogger(__name__)
24

25

26
class AwaDBVectorStore(VectorStore):
27
    """AwaDB vector store.
28

29
    In this vector store, embeddings are stored within a AwaDB table.
30

31
    During query time, the index uses AwaDB to query for the top
32
    k most similar nodes.
33

34
    Args:
35
        chroma_collection (chromadb.api.models.Collection.Collection):
36
            ChromaDB collection instance
37

38
    """
39

40
    flat_metadata: bool = True
41
    stores_text: bool = True
42
    DEFAULT_TABLE_NAME = "llamaindex_awadb"
43

44
    @property
45
    def client(self) -> Any:
46
        """Get AwaDB client."""
47
        return self.awadb_client
48

49
    def __init__(
50
        self,
51
        table_name: str = DEFAULT_TABLE_NAME,
52
        log_and_data_dir: Optional[str] = None,
53
        **kwargs: Any,
54
    ) -> None:
55
        """Initialize with AwaDB client.
56
           If table_name is not specified,
57
           a random table name of `DEFAULT_TABLE_NAME + last segment of uuid`
58
           would be created automatically.
59

60
        Args:
61
            table_name: Name of the table created, default DEFAULT_TABLE_NAME.
62
            log_and_data_dir: Optional the root directory of log and data.
63
            kwargs: Any possible extend parameters in the future.
64

65
        Returns:
66
            None.
67
        """
68
        import_err_msg = "`awadb` package not found, please run `pip install awadb`"
69
        try:
70
            import awadb
71
        except ImportError:
72
            raise ImportError(import_err_msg)
73
        if log_and_data_dir is not None:
74
            self.awadb_client = awadb.Client(log_and_data_dir)
75
        else:
76
            self.awadb_client = awadb.Client()
77

78
        if table_name == self.DEFAULT_TABLE_NAME:
79
            table_name += "_"
80
            table_name += str(uuid.uuid4()).split("-")[-1]
81

82
        self.awadb_client.Create(table_name)
83

84
    def add(
85
        self,
86
        nodes: List[BaseNode],
87
        **add_kwargs: Any,
88
    ) -> List[str]:
89
        """Add nodes to AwaDB.
90

91
        Args:
92
            nodes: List[BaseNode]: list of nodes with embeddings
93

94
        Returns:
95
            Added node ids
96
        """
97
        if not self.awadb_client:
98
            raise ValueError("AwaDB client not initialized")
99

100
        embeddings = []
101
        metadatas = []
102
        ids = []
103
        texts = []
104
        for node in nodes:
105
            embeddings.append(node.get_embedding())
106
            metadatas.append(
107
                node_to_metadata_dict(
108
                    node, remove_text=True, flat_metadata=self.flat_metadata
109
                )
110
            )
111
            ids.append(node.node_id)
112
            texts.append(node.get_content(metadata_mode=MetadataMode.NONE) or "")
113

114
        self.awadb_client.AddTexts(
115
            "embedding_text",
116
            "text_embedding",
117
            texts,
118
            embeddings,
119
            metadatas,
120
            is_duplicate_texts=False,
121
            ids=ids,
122
        )
123

124
        return ids
125

126
    def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
127
        """Delete nodes using with ref_doc_id.
128

129
        Args:
130
            ref_doc_id (str): The doc_id of the document to delete.
131

132
        Returns:
133
            None
134
        """
135
        if len(ref_doc_id) == 0:
136
            return
137
        ids: List[str] = []
138
        ids.append(ref_doc_id)
139
        self.awadb_client.Delete(ids)
140

141
    def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
142
        """Query index for top k most similar nodes.
143

144
        Args:
145
            query : vector store query
146

147
        Returns:
148
            VectorStoreQueryResult: Query results
149
        """
150
        meta_filters = {}
151
        if query.filters is not None:
152
            for filter in query.filters.legacy_filters():
153
                meta_filters[filter.key] = filter.value
154

155
        not_include_fields: Set[str] = {"text_embedding"}
156
        results = self.awadb_client.Search(
157
            query=query.query_embedding,
158
            topn=query.similarity_top_k,
159
            meta_filter=meta_filters,
160
            not_include_fields=not_include_fields,
161
        )
162

163
        nodes = []
164
        similarities = []
165
        ids = []
166

167
        for item_detail in results[0]["ResultItems"]:
168
            content = ""
169
            meta_data = {}
170
            node_id = ""
171
            for item_key in item_detail:
172
                if item_key == "embedding_text":
173
                    content = item_detail[item_key]
174
                    continue
175
                elif item_key == "_id":
176
                    node_id = item_detail[item_key]
177
                    ids.append(node_id)
178
                    continue
179
                elif item_key == "score":
180
                    similarities.append(item_detail[item_key])
181
                    continue
182
                meta_data[item_key] = item_detail[item_key]
183

184
            try:
185
                node = metadata_dict_to_node(meta_data)
186
                node.set_content(content)
187
            except Exception:
188
                # NOTE: deprecated legacy logic for backward compatibility
189
                metadata, node_info, relationships = legacy_metadata_dict_to_node(
190
                    meta_data
191
                )
192

193
                node = TextNode(
194
                    text=content,
195
                    id_=node_id,
196
                    metadata=metadata,
197
                    start_char_idx=node_info.get("start", None),
198
                    end_char_idx=node_info.get("end", None),
199
                    relationships=relationships,
200
                )
201

202
            nodes.append(node)
203

204
        return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids)
205

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.