llama-index
171 строка · 5.4 Кб
1from typing import Any, Dict, List, Literal, Mapping, Optional, Sequence, TypeVar, Union2
3from llama_index.legacy.readers.base import BaseReader4from llama_index.legacy.readers.schema.base import Document5
6# define types
7ID = str8IDs = List[ID]9
10Vector = Union[Sequence[float], Sequence[int]]11Embedding = Vector12Embeddings = List[Embedding]13
14Metadata = Mapping[str, Union[str, int, float]]15Metadatas = List[Metadata]16
17# Metadata Query Grammar
18LiteralValue = Union[str, int, float]19LogicalOperator = Literal["$and", "$or"]20WhereOperator = Literal["$gt", "$gte", "$lt", "$lte", "$ne", "$eq"]21OperatorExpression = Dict[Union[WhereOperator, LogicalOperator], LiteralValue]22
23Where = Dict[24Union[str, LogicalOperator], Union[LiteralValue, OperatorExpression, List["Where"]]25]
26
27WhereDocumentOperator = Union[Literal["$contains"], LogicalOperator]28WhereDocument = Dict[WhereDocumentOperator, Union[str, List["WhereDocument"]]]29
30ClusterMetadata = Dict[Any, Any]31
32Doc = str33Documents = List[Doc]34
35Parameter = TypeVar("Parameter", Embedding, Doc, Metadata, ID)36T = TypeVar("T")37OneOrMany = Union[T, List[T]]38
39# This should ust be List[Literal["documents", "embeddings", "metadatas", "distances"]]
40# However, this provokes an incompatibility with the Overrides library and Python 3.7
41Include = List[Literal["documents", "embeddings", "metadatas", "distances"]]42
43LiteralValue = LiteralValue44LogicalOperator = LogicalOperator45WhereOperator = WhereOperator46OperatorExpression = OperatorExpression47Where = Where48WhereDocumentOperator = WhereDocumentOperator49
50
51class BagelReader(BaseReader):52"""Reader for Bagel files."""53
54def __init__(self, collection_name: str) -> None:55"""Initialize BagelReader.56
57Args: collection_name: Name of the collection to load from.
58
59Returns: None
60"""
61try:62import bagel63except ImportError:64raise ImportError(65"`bagel` package not found, please run `pip install bagel`"66)67from bagel.config import Settings68
69if not collection_name:70raise ValueError("collection_name cannot be empty")71
72self.collection_name = collection_name73
74server_settings = Settings(75bagel_api_impl="rest", bagel_server_host="api.bageldb.ai"76)77
78self.client = bagel.Client(server_settings)79
80self._collection = self.client.get_cluster(collection_name)81
82def create_documents(self, results: Any) -> Any:83"""Create documents from the results.84
85Args:
86results: Results from the query.
87
88Returns:
89List of documents.
90"""
91documents = []92# create a list of results93all_results = list(94zip(95results["ids"][0],96results["documents"][0],97results["embeddings"][0],98results["metadatas"][0],99)100)101# iterate through the results102for result in all_results:103# create a Llama Document104document = Document(105doc_id=result[0],106text=result[1],107embedding=result[2],108metadata=result[3],109)110documents.append(document)111
112return documents113
114def load_data(115self,116query_vector: Optional[OneOrMany[Embedding]] = None,117query_texts: Optional[OneOrMany[Doc]] = None,118limit: int = 10,119where: Optional[Where] = None,120where_document: Optional[WhereDocument] = None,121include: Include = ["metadatas", "documents", "embeddings", "distances"],122) -> Any:123"""Get the top n_results documents for provided query_embeddings or query_texts.124
125Args:
126query_embeddings: The embeddings to get the closes neighbors of. Optional.
127query_texts: The document texts to get the closes neighbors of. Optional.
128n_results: The number of neighbors to return for each query. Optional.
129where: A Where type dict used to filter results by. Optional.
130where_document: A WhereDocument type dict used to filter. Optional.
131include: A list of what to include in the results. Optional.
132
133Returns:
134Llama Index Document(s) with the closest embeddings to the
135query_embeddings or query_texts.
136"""
137# get the results from the collection138# If neither query_embeddings nor query_texts are provided,139# or both are provided, raise an error140if (query_vector is None and query_texts is None) or (141query_vector is not None and query_texts is not None142):143raise ValueError(144"You must provide either embeddings or texts to find, but not both"145)146
147if where is None:148where = {}149
150if where_document is None:151where_document = {}152
153results = self._collection.find(154query_embeddings=query_vector,155query_texts=query_texts,156n_results=limit,157where=where,158where_document=where_document,159include=include,160)161
162# check if there are results163if not results:164raise ValueError("No results found")165
166# check if there are embeddings or documents167if not results["embeddings"] and not results["documents"]:168raise ValueError("No embeddings or documents found")169
170# create documents from the results171return self.create_documents(results)172