llama-index

Форк
0
91 строка · 3.4 Кб
1
"""Load Documents from a set of persistent Steamship Files."""
2

3
from typing import List, Optional
4

5
from llama_index.legacy.readers.base import BaseReader
6
from llama_index.legacy.schema import Document
7

8

9
class SteamshipFileReader(BaseReader):
10
    """Reads persistent Steamship Files and converts them to Documents.
11

12
    Args:
13
        api_key: Steamship API key. Defaults to STEAMSHIP_API_KEY value if not provided.
14

15
    Note:
16
        Requires install of `steamship` package and an active Steamship API Key.
17
        To get a Steamship API Key, visit: https://steamship.com/account/api.
18
        Once you have an API Key, expose it via an environment variable named
19
        `STEAMSHIP_API_KEY` or pass it as an init argument (`api_key`).
20
    """
21

22
    def __init__(self, api_key: Optional[str] = None) -> None:
23
        """Initialize the Reader."""
24
        try:
25
            import steamship  # noqa
26

27
            self.api_key = api_key
28
        except ImportError:
29
            raise ImportError(
30
                "`steamship` must be installed to use the SteamshipFileReader.\n"
31
                "Please run `pip install --upgrade steamship."
32
            )
33

34
    def load_data(
35
        self,
36
        workspace: str,
37
        query: Optional[str] = None,
38
        file_handles: Optional[List[str]] = None,
39
        collapse_blocks: bool = True,
40
        join_str: str = "\n\n",
41
    ) -> List[Document]:
42
        """Load data from persistent Steamship Files into Documents.
43

44
        Args:
45
            workspace: the handle for a Steamship workspace
46
                (see: https://docs.steamship.com/workspaces/index.html)
47
            query: a Steamship tag query for retrieving files
48
                (ex: 'filetag and value("import-id")="import-001"')
49
            file_handles: a list of Steamship File handles
50
                (ex: `smooth-valley-9kbdr`)
51
            collapse_blocks: whether to merge individual File Blocks into a
52
                single Document, or separate them.
53
            join_str: when collapse_blocks is True, this is how the block texts
54
                will be concatenated.
55

56
        Note:
57
            The collection of Files from both `query` and `file_handles` will be
58
            combined. There is no (current) support for deconflicting the collections
59
            (meaning that if a file appears both in the result set of the query and
60
            as a handle in file_handles, it will be loaded twice).
61
        """
62
        from steamship import File, Steamship
63

64
        client = Steamship(workspace=workspace, api_key=self.api_key)
65
        files = []
66
        if query:
67
            files_from_query = File.query(client=client, tag_filter_query=query).files
68
            files.extend(files_from_query)
69

70
        if file_handles:
71
            files.extend([File.get(client=client, handle=h) for h in file_handles])
72

73
        docs = []
74
        for file in files:
75
            metadata = {"source": file.handle}
76

77
            for tag in file.tags:
78
                metadata[tag.kind] = tag.value
79

80
            if collapse_blocks:
81
                text = join_str.join([b.text for b in file.blocks])
82
                docs.append(Document(text=text, id_=file.handle, metadata=metadata))
83
            else:
84
                docs.extend(
85
                    [
86
                        Document(text=b.text, id_=file.handle, metadata=metadata)
87
                        for b in file.blocks
88
                    ]
89
                )
90

91
        return docs
92

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.