llama-index

Форк
0
77 строк · 2.4 Кб
1
"""Faiss reader."""
2

3
from typing import Any, Dict, List
4

5
import numpy as np
6

7
from llama_index.legacy.readers.base import BaseReader
8
from llama_index.legacy.schema import Document
9

10

11
class FaissReader(BaseReader):
12
    """Faiss reader.
13

14
    Retrieves documents through an existing in-memory Faiss index.
15
    These documents can then be used in a downstream LlamaIndex data structure.
16
    If you wish use Faiss itself as an index to to organize documents,
17
    insert documents, and perform queries on them, please use VectorStoreIndex
18
    with FaissVectorStore.
19

20
    Args:
21
        faiss_index (faiss.Index): A Faiss Index object (required)
22

23
    """
24

25
    def __init__(self, index: Any):
26
        """Initialize with parameters."""
27
        import_err_msg = """
28
            `faiss` package not found. For instructions on
29
            how to install `faiss` please visit
30
            https://github.com/facebookresearch/faiss/wiki/Installing-Faiss
31
        """
32
        try:
33
            import faiss  # noqa
34
        except ImportError:
35
            raise ImportError(import_err_msg)
36

37
        self._index = index
38

39
    def load_data(
40
        self,
41
        query: np.ndarray,
42
        id_to_text_map: Dict[str, str],
43
        k: int = 4,
44
        separate_documents: bool = True,
45
    ) -> List[Document]:
46
        """Load data from Faiss.
47

48
        Args:
49
            query (np.ndarray): A 2D numpy array of query vectors.
50
            id_to_text_map (Dict[str, str]): A map from ID's to text.
51
            k (int): Number of nearest neighbors to retrieve. Defaults to 4.
52
            separate_documents (Optional[bool]): Whether to return separate
53
                documents. Defaults to True.
54

55
        Returns:
56
            List[Document]: A list of documents.
57

58
        """
59
        dists, indices = self._index.search(query, k)
60
        documents = []
61
        for qidx in range(indices.shape[0]):
62
            for didx in range(indices.shape[1]):
63
                doc_id = indices[qidx, didx]
64
                if doc_id not in id_to_text_map:
65
                    raise ValueError(
66
                        f"Document ID {doc_id} not found in id_to_text_map."
67
                    )
68
                text = id_to_text_map[doc_id]
69
                documents.append(Document(text=text))
70

71
        if not separate_documents:
72
            # join all documents into one
73
            text_list = [doc.get_content() for doc in documents]
74
            text = "\n\n".join(text_list)
75
            documents = [Document(text=text)]
76

77
        return documents
78

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.