llama-index
77 строк · 2.4 Кб
1"""txtai reader."""
2
3from typing import Any, Dict, List4
5import numpy as np6
7from llama_index.legacy.readers.base import BaseReader8from llama_index.legacy.schema import Document9
10
11class TxtaiReader(BaseReader):12"""txtai reader.13
14Retrieves documents through an existing in-memory txtai index.
15These documents can then be used in a downstream LlamaIndex data structure.
16If you wish use txtai itself as an index to to organize documents,
17insert documents, and perform queries on them, please use VectorStoreIndex
18with TxtaiVectorStore.
19
20Args:
21txtai_index (txtai.ann.ANN): A txtai Index object (required)
22
23"""
24
25def __init__(self, index: Any):26"""Initialize with parameters."""27import_err_msg = """28`txtai` package not found. For instructions on
29how to install `txtai` please visit
30https://neuml.github.io/txtai/install/
31"""
32try:33import txtai # noqa34except ImportError:35raise ImportError(import_err_msg)36
37self._index = index38
39def load_data(40self,41query: np.ndarray,42id_to_text_map: Dict[str, str],43k: int = 4,44separate_documents: bool = True,45) -> List[Document]:46"""Load data from txtai index.47
48Args:
49query (np.ndarray): A 2D numpy array of query vectors.
50id_to_text_map (Dict[str, str]): A map from ID's to text.
51k (int): Number of nearest neighbors to retrieve. Defaults to 4.
52separate_documents (Optional[bool]): Whether to return separate
53documents. Defaults to True.
54
55Returns:
56List[Document]: A list of documents.
57
58"""
59search_result = self._index.search(query, k)60documents = []61for query_result in search_result:62for doc_id, _ in query_result:63doc_id = str(doc_id)64if doc_id not in id_to_text_map:65raise ValueError(66f"Document ID {doc_id} not found in id_to_text_map."67)68text = id_to_text_map[doc_id]69documents.append(Document(text=text))70
71if not separate_documents:72# join all documents into one73text_list = [doc.get_content() for doc in documents]74text = "\n\n".join(text_list)75documents = [Document(text=text)]76
77return documents78