llama-index

Форк
0
120 строк · 3.8 Кб
1
"""Chroma Reader."""
2

3
from typing import Any, List, Optional, Union
4

5
from llama_index.legacy.readers.base import BaseReader
6
from llama_index.legacy.schema import Document
7

8

9
class ChromaReader(BaseReader):
10
    """Chroma reader.
11

12
    Retrieve documents from existing persisted Chroma collections.
13

14
    Args:
15
        collection_name: Name of the persisted collection.
16
        persist_directory: Directory where the collection is persisted.
17

18
    """
19

20
    def __init__(
21
        self,
22
        collection_name: str,
23
        persist_directory: Optional[str] = None,
24
        chroma_api_impl: str = "rest",
25
        chroma_db_impl: Optional[str] = None,
26
        host: str = "localhost",
27
        port: int = 8000,
28
    ) -> None:
29
        """Initialize with parameters."""
30
        import_err_msg = (
31
            "`chromadb` package not found, please run `pip install chromadb`"
32
        )
33
        try:
34
            import chromadb
35
        except ImportError:
36
            raise ImportError(import_err_msg)
37

38
        if collection_name is None:
39
            raise ValueError("Please provide a collection name.")
40
        # from chromadb.config import Settings
41

42
        if persist_directory is not None:
43
            self._client = chromadb.PersistentClient(
44
                path=persist_directory if persist_directory else "./chroma",
45
            )
46
        elif (host is not None) or (port is not None):
47
            self._client = chromadb.HttpClient(
48
                host=host,
49
                port=port,
50
            )
51

52
        self._collection = self._client.get_collection(collection_name)
53

54
    def create_documents(self, results: Any) -> List[Document]:
55
        """Create documents from the results.
56

57
        Args:
58
            results: Results from the query.
59

60
        Returns:
61
            List of documents.
62
        """
63
        documents = []
64
        for result in zip(
65
            results["ids"][0],
66
            results["documents"][0],
67
            results["embeddings"][0],
68
            results["metadatas"][0],
69
        ):
70
            document = Document(
71
                id_=result[0],
72
                text=result[1],
73
                embedding=result[2],
74
                metadata=result[3],
75
            )
76
            documents.append(document)
77

78
        return documents
79

80
    def load_data(
81
        self,
82
        query_embedding: Optional[List[float]] = None,
83
        limit: int = 10,
84
        where: Optional[dict] = None,
85
        where_document: Optional[dict] = None,
86
        query: Optional[Union[str, List[str]]] = None,
87
    ) -> Any:
88
        """Load data from the collection.
89

90
        Args:
91
            limit: Number of results to return.
92
            where: Filter results by metadata. {"metadata_field": "is_equal_to_this"}
93
            where_document: Filter results by document. {"$contains":"search_string"}
94

95
        Returns:
96
            List of documents.
97
        """
98
        where = where or {}
99
        where_document = where_document or {}
100
        if query_embedding is not None:
101
            results = self._collection.search(
102
                query_embedding=query_embedding,
103
                n_results=limit,
104
                where=where,
105
                where_document=where_document,
106
                include=["metadatas", "documents", "distances", "embeddings"],
107
            )
108
            return self.create_documents(results)
109
        elif query is not None:
110
            query = query if isinstance(query, list) else [query]
111
            results = self._collection.query(
112
                query_texts=query,
113
                n_results=limit,
114
                where=where,
115
                where_document=where_document,
116
                include=["metadatas", "documents", "distances", "embeddings"],
117
            )
118
            return self.create_documents(results)
119
        else:
120
            raise ValueError("Please provide either query embedding or query.")
121

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.