llama-index

Форк
0
161 строка · 5.8 Кб
1
"""Ad-hoc data loader tool.
2

3
Tool that wraps any data loader, and is able to load data on-demand.
4

5
"""
6

7
from typing import Any, Callable, Dict, List, Optional, Tuple, Type
8

9
from llama_index.legacy.bridge.pydantic import BaseModel
10
from llama_index.legacy.indices.base import BaseIndex
11
from llama_index.legacy.indices.vector_store import VectorStoreIndex
12
from llama_index.legacy.readers.base import BaseReader
13
from llama_index.legacy.readers.schema.base import Document
14
from llama_index.legacy.tools.function_tool import FunctionTool
15
from llama_index.legacy.tools.types import AsyncBaseTool, ToolMetadata, ToolOutput
16
from llama_index.legacy.tools.utils import create_schema_from_function
17

18

19
class OnDemandLoaderTool(AsyncBaseTool):
20
    """On-demand data loader tool.
21

22
    Loads data with by calling the provided loader function,
23
    stores in index, and queries for relevant data with a
24
    natural language query string.
25

26
    """
27

28
    def __init__(
29
        self,
30
        loader: Callable[..., List[Document]],
31
        index_cls: Type[BaseIndex],
32
        index_kwargs: Dict,
33
        metadata: ToolMetadata,
34
        use_query_str_in_loader: bool = False,
35
        query_str_kwargs_key: str = "query_str",
36
    ) -> None:
37
        """Init params."""
38
        self._loader = loader
39
        self._index_cls = index_cls
40
        self._index_kwargs = index_kwargs
41
        self._use_query_str_in_loader = use_query_str_in_loader
42
        self._metadata = metadata
43
        self._query_str_kwargs_key = query_str_kwargs_key
44

45
    @property
46
    def metadata(self) -> ToolMetadata:
47
        return self._metadata
48

49
    @classmethod
50
    def from_defaults(
51
        cls,
52
        reader: BaseReader,
53
        index_cls: Optional[Type[BaseIndex]] = None,
54
        index_kwargs: Optional[Dict] = None,
55
        use_query_str_in_loader: bool = False,
56
        query_str_kwargs_key: str = "query_str",
57
        name: Optional[str] = None,
58
        description: Optional[str] = None,
59
        fn_schema: Optional[Type[BaseModel]] = None,
60
    ) -> "OnDemandLoaderTool":
61
        """From defaults."""
62
        # NOTE: fn_schema should be specified if you want to use as langchain Tool
63

64
        index_cls = index_cls or VectorStoreIndex
65
        index_kwargs = index_kwargs or {}
66
        if description is None:
67
            description = f"Tool to load data from {reader.__class__.__name__}"
68
        if fn_schema is None:
69
            fn_schema = create_schema_from_function(
70
                name or "LoadData",
71
                reader.load_data,
72
                [(query_str_kwargs_key, str, None)],
73
            )
74

75
        metadata = ToolMetadata(name=name, description=description, fn_schema=fn_schema)
76
        return cls(
77
            loader=reader.load_data,
78
            index_cls=index_cls,
79
            index_kwargs=index_kwargs,
80
            use_query_str_in_loader=use_query_str_in_loader,
81
            query_str_kwargs_key=query_str_kwargs_key,
82
            metadata=metadata,
83
        )
84

85
    @classmethod
86
    def from_tool(
87
        cls,
88
        tool: FunctionTool,
89
        index_cls: Optional[Type[BaseIndex]] = None,
90
        index_kwargs: Optional[Dict] = None,
91
        use_query_str_in_loader: bool = False,
92
        query_str_kwargs_key: str = "query_str",
93
        name: Optional[str] = None,
94
        description: Optional[str] = None,
95
        fn_schema: Optional[Type[BaseModel]] = None,
96
    ) -> "OnDemandLoaderTool":
97
        """From defaults."""
98
        # NOTE: fn_schema should be specified if you want to use as langchain Tool
99

100
        index_cls = index_cls or VectorStoreIndex
101
        index_kwargs = index_kwargs or {}
102
        if description is None:
103
            description = f"Tool to load data from {tool.__class__.__name__}"
104
        if fn_schema is None:
105
            fn_schema = create_schema_from_function(
106
                name or "LoadData", tool._fn, [(query_str_kwargs_key, str, None)]
107
            )
108
        metadata = ToolMetadata(name=name, description=description, fn_schema=fn_schema)
109
        return cls(
110
            loader=tool._fn,
111
            index_cls=index_cls,
112
            index_kwargs=index_kwargs,
113
            use_query_str_in_loader=use_query_str_in_loader,
114
            query_str_kwargs_key=query_str_kwargs_key,
115
            metadata=metadata,
116
        )
117

118
    def _parse_args(self, *args: Any, **kwargs: Any) -> Tuple[str, List[Document]]:
119
        if self._query_str_kwargs_key not in kwargs:
120
            raise ValueError(
121
                "Missing query_str in kwargs with parameter name: "
122
                f"{self._query_str_kwargs_key}"
123
            )
124
        if self._use_query_str_in_loader:
125
            query_str = kwargs[self._query_str_kwargs_key]
126
        else:
127
            query_str = kwargs.pop(self._query_str_kwargs_key)
128

129
        docs = self._loader(*args, **kwargs)
130

131
        return query_str, docs
132

133
    def call(self, *args: Any, **kwargs: Any) -> ToolOutput:
134
        """Call."""
135
        query_str, docs = self._parse_args(*args, **kwargs)
136

137
        index = self._index_cls.from_documents(docs, **self._index_kwargs)
138
        # TODO: add query kwargs
139
        query_engine = index.as_query_engine()
140
        response = query_engine.query(query_str)
141
        return ToolOutput(
142
            content=str(response),
143
            tool_name=self.metadata.name,
144
            raw_input={"query": query_str},
145
            raw_output=response,
146
        )
147

148
    async def acall(self, *args: Any, **kwargs: Any) -> ToolOutput:
149
        """Async Call."""
150
        query_str, docs = self._parse_args(*args, **kwargs)
151

152
        index = self._index_cls.from_documents(docs, **self._index_kwargs)
153
        # TODO: add query kwargs
154
        query_engine = index.as_query_engine()
155
        response = await query_engine.aquery(query_str)
156
        return ToolOutput(
157
            content=str(response),
158
            tool_name=self.metadata.name,
159
            raw_input={"query": query_str},
160
            raw_output=response,
161
        )
162

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.