llama-index
204 строки · 6.2 Кб
1"""Faiss Vector store index.
2
3An index that is built on top of an existing vector store.
4
5"""
6
7import logging8import os9from typing import Any, List, Optional, cast10
11import fsspec12import numpy as np13from fsspec.implementations.local import LocalFileSystem14
15from llama_index.legacy.bridge.pydantic import PrivateAttr16from llama_index.legacy.schema import BaseNode17from llama_index.legacy.vector_stores.simple import DEFAULT_VECTOR_STORE, NAMESPACE_SEP18from llama_index.legacy.vector_stores.types import (19DEFAULT_PERSIST_DIR,20DEFAULT_PERSIST_FNAME,21BasePydanticVectorStore,22VectorStoreQuery,23VectorStoreQueryResult,24)
25
26logger = logging.getLogger()27
28DEFAULT_PERSIST_PATH = os.path.join(29DEFAULT_PERSIST_DIR, f"{DEFAULT_VECTOR_STORE}{NAMESPACE_SEP}{DEFAULT_PERSIST_FNAME}"30)
31
32
33class FaissVectorStore(BasePydanticVectorStore):34"""Faiss Vector Store.35
36Embeddings are stored within a Faiss index.
37
38During query time, the index uses Faiss to query for the top
39k embeddings, and returns the corresponding indices.
40
41Args:
42faiss_index (faiss.Index): Faiss index instance
43
44"""
45
46stores_text: bool = False47
48_faiss_index = PrivateAttr()49
50def __init__(51self,52faiss_index: Any,53) -> None:54"""Initialize params."""55import_err_msg = """56`faiss` package not found. For instructions on
57how to install `faiss` please visit
58https://github.com/facebookresearch/faiss/wiki/Installing-Faiss
59"""
60try:61import faiss62except ImportError:63raise ImportError(import_err_msg)64
65self._faiss_index = cast(faiss.Index, faiss_index)66
67super().__init__()68
69@classmethod70def from_persist_dir(71cls,72persist_dir: str = DEFAULT_PERSIST_DIR,73fs: Optional[fsspec.AbstractFileSystem] = None,74) -> "FaissVectorStore":75persist_path = os.path.join(76persist_dir,77f"{DEFAULT_VECTOR_STORE}{NAMESPACE_SEP}{DEFAULT_PERSIST_FNAME}",78)79# only support local storage for now80if fs and not isinstance(fs, LocalFileSystem):81raise NotImplementedError("FAISS only supports local storage for now.")82return cls.from_persist_path(persist_path=persist_path, fs=None)83
84@classmethod85def from_persist_path(86cls,87persist_path: str,88fs: Optional[fsspec.AbstractFileSystem] = None,89) -> "FaissVectorStore":90import faiss91
92# I don't think FAISS supports fsspec, it requires a path in the SWIG interface93# TODO: copy to a temp file and load into memory from there94if fs and not isinstance(fs, LocalFileSystem):95raise NotImplementedError("FAISS only supports local storage for now.")96
97if not os.path.exists(persist_path):98raise ValueError(f"No existing {__name__} found at {persist_path}.")99
100logger.info(f"Loading {__name__} from {persist_path}.")101faiss_index = faiss.read_index(persist_path)102return cls(faiss_index=faiss_index)103
104def add(105self,106nodes: List[BaseNode],107**add_kwargs: Any,108) -> List[str]:109"""Add nodes to index.110
111NOTE: in the Faiss vector store, we do not store text in Faiss.
112
113Args:
114nodes: List[BaseNode]: list of nodes with embeddings
115
116"""
117new_ids = []118for node in nodes:119text_embedding = node.get_embedding()120text_embedding_np = np.array(text_embedding, dtype="float32")[np.newaxis, :]121new_id = str(self._faiss_index.ntotal)122self._faiss_index.add(text_embedding_np)123new_ids.append(new_id)124return new_ids125
126@property127def client(self) -> Any:128"""Return the faiss index."""129return self._faiss_index130
131def persist(132self,133persist_path: str = DEFAULT_PERSIST_PATH,134fs: Optional[fsspec.AbstractFileSystem] = None,135) -> None:136"""Save to file.137
138This method saves the vector store to disk.
139
140Args:
141persist_path (str): The save_path of the file.
142
143"""
144# I don't think FAISS supports fsspec, it requires a path in the SWIG interface145# TODO: write to a temporary file and then copy to the final destination146if fs and not isinstance(fs, LocalFileSystem):147raise NotImplementedError("FAISS only supports local storage for now.")148import faiss149
150dirpath = os.path.dirname(persist_path)151if not os.path.exists(dirpath):152os.makedirs(dirpath)153
154faiss.write_index(self._faiss_index, persist_path)155
156def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:157"""158Delete nodes using with ref_doc_id.
159
160Args:
161ref_doc_id (str): The doc_id of the document to delete.
162
163"""
164raise NotImplementedError("Delete not yet implemented for Faiss index.")165
166def query(167self,168query: VectorStoreQuery,169**kwargs: Any,170) -> VectorStoreQueryResult:171"""Query index for top k most similar nodes.172
173Args:
174query_embedding (List[float]): query embedding
175similarity_top_k (int): top k most similar nodes
176
177"""
178if query.filters is not None:179raise ValueError("Metadata filters not implemented for Faiss yet.")180
181query_embedding = cast(List[float], query.query_embedding)182query_embedding_np = np.array(query_embedding, dtype="float32")[np.newaxis, :]183dists, indices = self._faiss_index.search(184query_embedding_np, query.similarity_top_k185)186dists = list(dists[0])187# if empty, then return an empty response188if len(indices) == 0:189return VectorStoreQueryResult(similarities=[], ids=[])190
191# returned dimension is 1 x k192node_idxs = indices[0]193
194filtered_dists = []195filtered_node_idxs = []196for dist, idx in zip(dists, node_idxs):197if idx < 0:198continue199filtered_dists.append(dist)200filtered_node_idxs.append(str(idx))201
202return VectorStoreQueryResult(203similarities=filtered_dists, ids=filtered_node_idxs204)205