llama-index

Форк
0
231 строка · 8.7 Кб
1
import os
2
from dataclasses import dataclass
3
from pathlib import Path
4
from typing import Dict, Optional, Union
5

6
import fsspec
7

8
from llama_index.legacy.constants import (
9
    DOC_STORE_KEY,
10
    GRAPH_STORE_KEY,
11
    INDEX_STORE_KEY,
12
    VECTOR_STORE_KEY,
13
)
14
from llama_index.legacy.graph_stores.simple import (
15
    DEFAULT_PERSIST_FNAME as GRAPH_STORE_FNAME,
16
)
17
from llama_index.legacy.graph_stores.simple import SimpleGraphStore
18
from llama_index.legacy.graph_stores.types import GraphStore
19
from llama_index.legacy.storage.docstore.simple_docstore import SimpleDocumentStore
20
from llama_index.legacy.storage.docstore.types import (
21
    DEFAULT_PERSIST_FNAME as DOCSTORE_FNAME,
22
)
23
from llama_index.legacy.storage.docstore.types import BaseDocumentStore
24
from llama_index.legacy.storage.index_store.simple_index_store import SimpleIndexStore
25
from llama_index.legacy.storage.index_store.types import (
26
    DEFAULT_PERSIST_FNAME as INDEX_STORE_FNAME,
27
)
28
from llama_index.legacy.storage.index_store.types import BaseIndexStore
29
from llama_index.legacy.utils import concat_dirs
30
from llama_index.legacy.vector_stores.simple import (
31
    DEFAULT_PERSIST_FNAME as VECTOR_STORE_FNAME,
32
)
33
from llama_index.legacy.vector_stores.simple import (
34
    DEFAULT_VECTOR_STORE,
35
    NAMESPACE_SEP,
36
    SimpleVectorStore,
37
)
38
from llama_index.legacy.vector_stores.types import BasePydanticVectorStore, VectorStore
39

40
DEFAULT_PERSIST_DIR = "./storage"
41
IMAGE_STORE_FNAME = "image_store.json"
42
IMAGE_VECTOR_STORE_NAMESPACE = "image"
43

44

45
@dataclass
46
class StorageContext:
47
    """Storage context.
48

49
    The storage context container is a utility container for storing nodes,
50
    indices, and vectors. It contains the following:
51
    - docstore: BaseDocumentStore
52
    - index_store: BaseIndexStore
53
    - vector_store: VectorStore
54
    - graph_store: GraphStore
55

56
    """
57

58
    docstore: BaseDocumentStore
59
    index_store: BaseIndexStore
60
    vector_stores: Dict[str, VectorStore]
61
    graph_store: GraphStore
62

63
    @classmethod
64
    def from_defaults(
65
        cls,
66
        docstore: Optional[BaseDocumentStore] = None,
67
        index_store: Optional[BaseIndexStore] = None,
68
        vector_store: Optional[Union[VectorStore, BasePydanticVectorStore]] = None,
69
        image_store: Optional[VectorStore] = None,
70
        vector_stores: Optional[
71
            Dict[str, Union[VectorStore, BasePydanticVectorStore]]
72
        ] = None,
73
        graph_store: Optional[GraphStore] = None,
74
        persist_dir: Optional[str] = None,
75
        fs: Optional[fsspec.AbstractFileSystem] = None,
76
    ) -> "StorageContext":
77
        """Create a StorageContext from defaults.
78

79
        Args:
80
            docstore (Optional[BaseDocumentStore]): document store
81
            index_store (Optional[BaseIndexStore]): index store
82
            vector_store (Optional[VectorStore]): vector store
83
            graph_store (Optional[GraphStore]): graph store
84
            image_store (Optional[VectorStore]): image store
85

86
        """
87
        if persist_dir is None:
88
            docstore = docstore or SimpleDocumentStore()
89
            index_store = index_store or SimpleIndexStore()
90
            graph_store = graph_store or SimpleGraphStore()
91
            image_store = image_store or SimpleVectorStore()
92

93
            if vector_store:
94
                vector_stores = {DEFAULT_VECTOR_STORE: vector_store}
95
            else:
96
                vector_stores = vector_stores or {
97
                    DEFAULT_VECTOR_STORE: SimpleVectorStore()
98
                }
99
            if image_store:
100
                # append image store to vector stores
101
                vector_stores[IMAGE_VECTOR_STORE_NAMESPACE] = image_store
102
        else:
103
            docstore = docstore or SimpleDocumentStore.from_persist_dir(
104
                persist_dir, fs=fs
105
            )
106
            index_store = index_store or SimpleIndexStore.from_persist_dir(
107
                persist_dir, fs=fs
108
            )
109
            graph_store = graph_store or SimpleGraphStore.from_persist_dir(
110
                persist_dir, fs=fs
111
            )
112

113
            if vector_store:
114
                vector_stores = {DEFAULT_VECTOR_STORE: vector_store}
115
            elif vector_stores:
116
                vector_stores = vector_stores
117
            else:
118
                vector_stores = SimpleVectorStore.from_namespaced_persist_dir(
119
                    persist_dir, fs=fs
120
                )
121
            if image_store:
122
                # append image store to vector stores
123
                vector_stores[IMAGE_VECTOR_STORE_NAMESPACE] = image_store
124

125
        return cls(
126
            docstore=docstore,
127
            index_store=index_store,
128
            vector_stores=vector_stores,
129
            graph_store=graph_store,
130
        )
131

132
    def persist(
133
        self,
134
        persist_dir: Union[str, os.PathLike] = DEFAULT_PERSIST_DIR,
135
        docstore_fname: str = DOCSTORE_FNAME,
136
        index_store_fname: str = INDEX_STORE_FNAME,
137
        vector_store_fname: str = VECTOR_STORE_FNAME,
138
        image_store_fname: str = IMAGE_STORE_FNAME,
139
        graph_store_fname: str = GRAPH_STORE_FNAME,
140
        fs: Optional[fsspec.AbstractFileSystem] = None,
141
    ) -> None:
142
        """Persist the storage context.
143

144
        Args:
145
            persist_dir (str): directory to persist the storage context
146
        """
147
        if fs is not None:
148
            persist_dir = str(persist_dir)  # NOTE: doesn't support Windows here
149
            docstore_path = concat_dirs(persist_dir, docstore_fname)
150
            index_store_path = concat_dirs(persist_dir, index_store_fname)
151
            graph_store_path = concat_dirs(persist_dir, graph_store_fname)
152
        else:
153
            persist_dir = Path(persist_dir)
154
            docstore_path = str(persist_dir / docstore_fname)
155
            index_store_path = str(persist_dir / index_store_fname)
156
            graph_store_path = str(persist_dir / graph_store_fname)
157

158
        self.docstore.persist(persist_path=docstore_path, fs=fs)
159
        self.index_store.persist(persist_path=index_store_path, fs=fs)
160
        self.graph_store.persist(persist_path=graph_store_path, fs=fs)
161

162
        # save each vector store under it's namespace
163
        for vector_store_name, vector_store in self.vector_stores.items():
164
            if fs is not None:
165
                vector_store_path = concat_dirs(
166
                    str(persist_dir),
167
                    f"{vector_store_name}{NAMESPACE_SEP}{vector_store_fname}",
168
                )
169
            else:
170
                vector_store_path = str(
171
                    Path(persist_dir)
172
                    / f"{vector_store_name}{NAMESPACE_SEP}{vector_store_fname}"
173
                )
174

175
            vector_store.persist(persist_path=vector_store_path, fs=fs)
176

177
    def to_dict(self) -> dict:
178
        all_simple = (
179
            isinstance(self.docstore, SimpleDocumentStore)
180
            and isinstance(self.index_store, SimpleIndexStore)
181
            and isinstance(self.graph_store, SimpleGraphStore)
182
            and all(
183
                isinstance(vs, SimpleVectorStore) for vs in self.vector_stores.values()
184
            )
185
        )
186
        if not all_simple:
187
            raise ValueError(
188
                "to_dict only available when using simple doc/index/vector stores"
189
            )
190

191
        assert isinstance(self.docstore, SimpleDocumentStore)
192
        assert isinstance(self.index_store, SimpleIndexStore)
193
        assert isinstance(self.graph_store, SimpleGraphStore)
194

195
        return {
196
            VECTOR_STORE_KEY: {
197
                key: vector_store.to_dict()
198
                for key, vector_store in self.vector_stores.items()
199
                if isinstance(vector_store, SimpleVectorStore)
200
            },
201
            DOC_STORE_KEY: self.docstore.to_dict(),
202
            INDEX_STORE_KEY: self.index_store.to_dict(),
203
            GRAPH_STORE_KEY: self.graph_store.to_dict(),
204
        }
205

206
    @classmethod
207
    def from_dict(cls, save_dict: dict) -> "StorageContext":
208
        """Create a StorageContext from dict."""
209
        docstore = SimpleDocumentStore.from_dict(save_dict[DOC_STORE_KEY])
210
        index_store = SimpleIndexStore.from_dict(save_dict[INDEX_STORE_KEY])
211
        graph_store = SimpleGraphStore.from_dict(save_dict[GRAPH_STORE_KEY])
212

213
        vector_stores: Dict[str, VectorStore] = {}
214
        for key, vector_store_dict in save_dict[VECTOR_STORE_KEY].items():
215
            vector_stores[key] = SimpleVectorStore.from_dict(vector_store_dict)
216

217
        return cls(
218
            docstore=docstore,
219
            index_store=index_store,
220
            vector_stores=vector_stores,
221
            graph_store=graph_store,
222
        )
223

224
    @property
225
    def vector_store(self) -> VectorStore:
226
        """Backwrds compatibility for vector_store property."""
227
        return self.vector_stores[DEFAULT_VECTOR_STORE]
228

229
    def add_vector_store(self, vector_store: VectorStore, namespace: str) -> None:
230
        """Add a vector store to the storage context."""
231
        self.vector_stores[namespace] = vector_store
232

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.