llama-index
66 строк · 2.1 Кб
1"""ChatGPT Plugin."""
2
3import os4from typing import Any, List, Optional5
6import requests7from requests.adapters import HTTPAdapter, Retry8
9from llama_index.legacy.readers.base import BaseReader10from llama_index.legacy.schema import Document11
12
13class ChatGPTRetrievalPluginReader(BaseReader):14"""ChatGPT Retrieval Plugin reader."""15
16def __init__(17self,18endpoint_url: str,19bearer_token: Optional[str] = None,20retries: Optional[Retry] = None,21batch_size: int = 100,22) -> None:23"""Chatgpt Retrieval Plugin."""24self._endpoint_url = endpoint_url25self._bearer_token = bearer_token or os.getenv("BEARER_TOKEN")26self._retries = retries27self._batch_size = batch_size28
29self._s = requests.Session()30self._s.mount("http://", HTTPAdapter(max_retries=self._retries))31
32def load_data(33self,34query: str,35top_k: int = 10,36separate_documents: bool = True,37**kwargs: Any,38) -> List[Document]:39"""Load data from ChatGPT Retrieval Plugin."""40headers = {"Authorization": f"Bearer {self._bearer_token}"}41queries = [{"query": query, "top_k": top_k}]42res = requests.post(43f"{self._endpoint_url}/query", headers=headers, json={"queries": queries}44)45documents: List[Document] = []46for query_result in res.json()["results"]:47for result in query_result["results"]:48result_id = result["id"]49result_txt = result["text"]50result_embedding = result["embedding"]51document = Document(52text=result_txt,53id_=result_id,54embedding=result_embedding,55)56documents.append(document)57
58# NOTE: there should only be one query59break60
61if not separate_documents:62text_list = [doc.get_content() for doc in documents]63text = "\n\n".join(text_list)64documents = [Document(text=text)]65
66return documents67