llama-index
85 строк · 2.8 Кб
1"""DashVector reader."""
2
3from typing import Dict, List, Optional4
5from llama_index.legacy.readers.base import BaseReader6from llama_index.legacy.schema import Document7
8
9class DashVectorReader(BaseReader):10"""DashVector reader.11
12Args:
13api_key (str): DashVector API key.
14endpoint (str): DashVector cluster endpoint.
15"""
16
17def __init__(self, api_key: str, endpoint: str):18"""Initialize with parameters."""19try:20import dashvector21except ImportError:22raise ImportError(23"`dashvector` package not found, please run `pip install dashvector`"24)25
26self._client = dashvector.Client(api_key=api_key, endpoint=endpoint)27
28def load_data(29self,30collection_name: str,31id_to_text_map: Dict[str, str],32vector: Optional[List[float]],33top_k: int,34separate_documents: bool = True,35filter: Optional[str] = None,36include_vector: bool = True,37) -> List[Document]:38"""Load data from DashVector.39
40Args:
41collection_name (str): Name of the collection.
42id_to_text_map (Dict[str, str]): A map from ID's to text.
43separate_documents (Optional[bool]): Whether to return separate
44documents per retrieved entry. Defaults to True.
45vector (List[float]): Query vector.
46top_k (int): Number of results to return.
47filter (Optional[str]): doc fields filter conditions that meet the SQL
48where clause specification.
49include_vector (bool): Whether to include the embedding in the response.
50Defaults to True.
51
52Returns:
53List[Document]: A list of documents.
54"""
55collection = self._client.get(collection_name)56if not collection:57raise ValueError(58f"Failed to get collection: {collection_name}," f"Error: {collection}"59)60
61resp = collection.query(62vector=vector,63topk=top_k,64filter=filter,65include_vector=include_vector,66)67if not resp:68raise Exception(f"Failed to query document," f"Error: {resp}")69
70documents = []71for doc in resp:72if doc.id not in id_to_text_map:73raise ValueError("ID not found in id_to_text_map.")74text = id_to_text_map[doc.id]75embedding = doc.vector76if len(embedding) == 0:77embedding = None78documents.append(Document(text=text, embedding=embedding))79
80if not separate_documents:81text_list = [doc.get_content() for doc in documents]82text = "\n\n".join(text_list)83documents = [Document(text=text)]84
85return documents86