llama-index
120 строк · 3.8 Кб
1"""Chroma Reader."""
2
3from typing import Any, List, Optional, Union4
5from llama_index.legacy.readers.base import BaseReader6from llama_index.legacy.schema import Document7
8
9class ChromaReader(BaseReader):10"""Chroma reader.11
12Retrieve documents from existing persisted Chroma collections.
13
14Args:
15collection_name: Name of the persisted collection.
16persist_directory: Directory where the collection is persisted.
17
18"""
19
20def __init__(21self,22collection_name: str,23persist_directory: Optional[str] = None,24chroma_api_impl: str = "rest",25chroma_db_impl: Optional[str] = None,26host: str = "localhost",27port: int = 8000,28) -> None:29"""Initialize with parameters."""30import_err_msg = (31"`chromadb` package not found, please run `pip install chromadb`"32)33try:34import chromadb35except ImportError:36raise ImportError(import_err_msg)37
38if collection_name is None:39raise ValueError("Please provide a collection name.")40# from chromadb.config import Settings41
42if persist_directory is not None:43self._client = chromadb.PersistentClient(44path=persist_directory if persist_directory else "./chroma",45)46elif (host is not None) or (port is not None):47self._client = chromadb.HttpClient(48host=host,49port=port,50)51
52self._collection = self._client.get_collection(collection_name)53
54def create_documents(self, results: Any) -> List[Document]:55"""Create documents from the results.56
57Args:
58results: Results from the query.
59
60Returns:
61List of documents.
62"""
63documents = []64for result in zip(65results["ids"][0],66results["documents"][0],67results["embeddings"][0],68results["metadatas"][0],69):70document = Document(71id_=result[0],72text=result[1],73embedding=result[2],74metadata=result[3],75)76documents.append(document)77
78return documents79
80def load_data(81self,82query_embedding: Optional[List[float]] = None,83limit: int = 10,84where: Optional[dict] = None,85where_document: Optional[dict] = None,86query: Optional[Union[str, List[str]]] = None,87) -> Any:88"""Load data from the collection.89
90Args:
91limit: Number of results to return.
92where: Filter results by metadata. {"metadata_field": "is_equal_to_this"}
93where_document: Filter results by document. {"$contains":"search_string"}
94
95Returns:
96List of documents.
97"""
98where = where or {}99where_document = where_document or {}100if query_embedding is not None:101results = self._collection.search(102query_embedding=query_embedding,103n_results=limit,104where=where,105where_document=where_document,106include=["metadatas", "documents", "distances", "embeddings"],107)108return self.create_documents(results)109elif query is not None:110query = query if isinstance(query, list) else [query]111results = self._collection.query(112query_texts=query,113n_results=limit,114where=where,115where_document=where_document,116include=["metadatas", "documents", "distances", "embeddings"],117)118return self.create_documents(results)119else:120raise ValueError("Please provide either query embedding or query.")121