llama-index
52 строки · 2.2 Кб
1from typing import Any, Dict, Type2
3from llama_index.legacy.readers.base import BasePydanticReader4from llama_index.legacy.readers.discord_reader import DiscordReader5from llama_index.legacy.readers.elasticsearch import ElasticsearchReader6from llama_index.legacy.readers.google_readers.gdocs import GoogleDocsReader7from llama_index.legacy.readers.google_readers.gsheets import GoogleSheetsReader8from llama_index.legacy.readers.notion import NotionPageReader9from llama_index.legacy.readers.slack import SlackReader10from llama_index.legacy.readers.string_iterable import StringIterableReader11from llama_index.legacy.readers.twitter import TwitterTweetReader12from llama_index.legacy.readers.web import (13BeautifulSoupWebReader,14RssReader,15SimpleWebPageReader,16TrafilaturaWebReader,17)
18from llama_index.legacy.readers.wikipedia import WikipediaReader19from llama_index.legacy.readers.youtube_transcript import YoutubeTranscriptReader20
21ALL_READERS: Dict[str, Type[BasePydanticReader]] = {22DiscordReader.class_name(): DiscordReader,23ElasticsearchReader.class_name(): ElasticsearchReader,24GoogleDocsReader.class_name(): GoogleDocsReader,25GoogleSheetsReader.class_name(): GoogleSheetsReader,26NotionPageReader.class_name(): NotionPageReader,27SlackReader.class_name(): SlackReader,28StringIterableReader.class_name(): StringIterableReader,29TwitterTweetReader.class_name(): TwitterTweetReader,30SimpleWebPageReader.class_name(): SimpleWebPageReader,31TrafilaturaWebReader.class_name(): TrafilaturaWebReader,32RssReader.class_name(): RssReader,33BeautifulSoupWebReader.class_name(): BeautifulSoupWebReader,34WikipediaReader.class_name(): WikipediaReader,35YoutubeTranscriptReader.class_name(): YoutubeTranscriptReader,36}
37
38
39def load_reader(data: Dict[str, Any]) -> BasePydanticReader:40if isinstance(data, BasePydanticReader):41return data42class_name = data.get("class_name", None)43if class_name is None:44raise ValueError("Must specify `class_name` in reader data.")45
46if class_name not in ALL_READERS:47raise ValueError(f"Reader class name {class_name} not found.")48
49# remove static attribute50data.pop("is_remote", None)51
52return ALL_READERS[class_name].from_dict(data)53