llama-index

awadb.py
71 строка · 2.0 Кб
Перенос по словам
1
"""Awadb reader."""
2

3
from typing import Any, List
4

5
import numpy as np
6

7
from llama_index.legacy.readers.base import BaseReader
8
from llama_index.legacy.schema import Document
9

10

11
class AwadbReader(BaseReader):
12
    """Awadb reader.
13

14
    Retrieves documents through an existing awadb client.
15
    These documents can then be used in a downstream LlamaIndex data structure.
16

17
    Args:
18
        client (awadb.client): An awadb client.
19

20
    """
21

22
    def __init__(self, client: Any):
23
        """Initialize with parameters."""
24
        import_err_msg = """
25
            `faiss` package not found. For instructions on
26
            how to install `faiss` please visit
27
            https://github.com/facebookresearch/faiss/wiki/Installing-Faiss
28
        """
29
        try:
30
            pass
31
        except ImportError:
32
            raise ImportError(import_err_msg)
33

34
        self.awadb_client = client
35

36
    def load_data(
37
        self,
38
        query: np.ndarray,
39
        k: int = 4,
40
        separate_documents: bool = True,
41
    ) -> List[Document]:
42
        """Load data from Faiss.
43

44
        Args:
45
            query (np.ndarray): A 2D numpy array of query vectors.
46
            k (int): Number of nearest neighbors to retrieve. Defaults to 4.
47
            separate_documents (Optional[bool]): Whether to return separate
48
                documents. Defaults to True.
49

50
        Returns:
51
            List[Document]: A list of documents.
52

53
        """
54
        results = self.awadb_client.Search(
55
            query,
56
            k,
57
            text_in_page_content=None,
58
            meta_filter=None,
59
            not_include_fields=None,
60
        )
61
        documents = []
62
        for item_detail in results[0]["ResultItems"]:
63
            documents.append(Document(text=item_detail["embedding_text"]))
64

65
        if not separate_documents:
66
            # join all documents into one
67
            text_list = [doc.get_content() for doc in documents]
68
            text = "\n\n".join(text_list)
69
            documents = [Document(text=text)]
70

71
        return documents
72
llama-index

Использование cookies