llama-index
34 строки · 910.0 Байт
1"""Flat reader."""
2
3from pathlib import Path
4from typing import Any, Dict, List, Optional
5
6from llama_index.legacy.readers.base import BaseReader
7from llama_index.legacy.schema import Document
8
9
10class FlatReader(BaseReader):
11"""Flat reader.
12
13Extract raw text from a file and save the file type in the metadata
14"""
15
16def __init__(
17self,
18*args: Any,
19**kwargs: Any,
20) -> None:
21"""Init params."""
22super().__init__(*args, **kwargs)
23
24def load_data(
25self, file: Path, extra_info: Optional[Dict] = None
26) -> List[Document]:
27"""Parse file into string."""
28with open(file, encoding="utf-8") as f:
29content = f.read()
30metadata = {"filename": file.name, "extension": file.suffix}
31if extra_info:
32metadata = {**metadata, **extra_info}
33
34return [Document(text=content, metadata=metadata)]
35