llama-index

pandas_query_engine.py
183 строки · 6.9 Кб
Перенос по словам
1
"""Default query for PandasIndex.
2

3
WARNING: This tool provides the Agent access to the `eval` function.
4
Arbitrary code execution is possible on the machine running this tool.
5
This tool is not recommended to be used in a production setting, and would
6
require heavy sandboxing or virtual machines
7

8
"""
9

10
import logging
11
from typing import Any, Dict, Optional
12

13
import pandas as pd
14

15
from llama_index.legacy.core.base_query_engine import BaseQueryEngine
16
from llama_index.legacy.core.response.schema import Response
17
from llama_index.legacy.indices.struct_store.pandas import PandasIndex
18
from llama_index.legacy.llms.utils import LLMType
19
from llama_index.legacy.prompts import BasePromptTemplate, PromptTemplate
20
from llama_index.legacy.prompts.default_prompts import DEFAULT_PANDAS_PROMPT
21
from llama_index.legacy.prompts.mixin import PromptDictType, PromptMixinType
22
from llama_index.legacy.query_engine.pandas.output_parser import PandasInstructionParser
23
from llama_index.legacy.schema import QueryBundle
24
from llama_index.legacy.service_context import ServiceContext
25
from llama_index.legacy.utils import print_text
26

27
logger = logging.getLogger(__name__)
28

29

30
DEFAULT_INSTRUCTION_STR = (
31
    "1. Convert the query to executable Python code using Pandas.\n"
32
    "2. The final line of code should be a Python expression that can be called with the `eval()` function.\n"
33
    "3. The code should represent a solution to the query.\n"
34
    "4. PRINT ONLY THE EXPRESSION.\n"
35
    "5. Do not quote the expression.\n"
36
)
37

38

39
# **NOTE**: newer version of sql query engine
40
DEFAULT_RESPONSE_SYNTHESIS_PROMPT_TMPL = (
41
    "Given an input question, synthesize a response from the query results.\n"
42
    "Query: {query_str}\n\n"
43
    "Pandas Instructions (optional):\n{pandas_instructions}\n\n"
44
    "Pandas Output: {pandas_output}\n\n"
45
    "Response: "
46
)
47
DEFAULT_RESPONSE_SYNTHESIS_PROMPT = PromptTemplate(
48
    DEFAULT_RESPONSE_SYNTHESIS_PROMPT_TMPL,
49
)
50

51

52
class PandasQueryEngine(BaseQueryEngine):
53
    """Pandas query engine.
54

55
    Convert natural language to Pandas python code.
56

57
    WARNING: This tool provides the Agent access to the `eval` function.
58
    Arbitrary code execution is possible on the machine running this tool.
59
    This tool is not recommended to be used in a production setting, and would
60
    require heavy sandboxing or virtual machines
61

62

63
    Args:
64
        df (pd.DataFrame): Pandas dataframe to use.
65
        instruction_str (Optional[str]): Instruction string to use.
66
        output_processor (Optional[Callable[[str], str]]): Output processor.
67
            A callable that takes in the output string, pandas DataFrame,
68
            and any output kwargs and returns a string.
69
            eg.kwargs["max_colwidth"] = [int] is used to set the length of text
70
            that each column can display during str(df). Set it to a higher number
71
            if there is possibly long text in the dataframe.
72
        pandas_prompt (Optional[BasePromptTemplate]): Pandas prompt to use.
73
        head (int): Number of rows to show in the table context.
74
        llm (Optional[LLM]): Language model to use.
75

76
    """
77

78
    def __init__(
79
        self,
80
        df: pd.DataFrame,
81
        instruction_str: Optional[str] = None,
82
        instruction_parser: Optional[PandasInstructionParser] = None,
83
        pandas_prompt: Optional[BasePromptTemplate] = None,
84
        output_kwargs: Optional[dict] = None,
85
        head: int = 5,
86
        verbose: bool = False,
87
        service_context: Optional[ServiceContext] = None,
88
        llm: Optional[LLMType] = "default",
89
        synthesize_response: bool = False,
90
        response_synthesis_prompt: Optional[BasePromptTemplate] = None,
91
        **kwargs: Any,
92
    ) -> None:
93
        """Initialize params."""
94
        self._df = df
95

96
        self._head = head
97
        self._pandas_prompt = pandas_prompt or DEFAULT_PANDAS_PROMPT
98
        self._instruction_str = instruction_str or DEFAULT_INSTRUCTION_STR
99
        self._instruction_parser = instruction_parser or PandasInstructionParser(
100
            df, output_kwargs or {}
101
        )
102
        self._verbose = verbose
103

104
        self._service_context = service_context or ServiceContext.from_defaults(llm=llm)
105
        self._synthesize_response = synthesize_response
106
        self._response_synthesis_prompt = (
107
            response_synthesis_prompt or DEFAULT_RESPONSE_SYNTHESIS_PROMPT
108
        )
109

110
        super().__init__(self._service_context.callback_manager)
111

112
    def _get_prompt_modules(self) -> PromptMixinType:
113
        """Get prompt sub-modules."""
114
        return {}
115

116
    def _get_prompts(self) -> Dict[str, Any]:
117
        """Get prompts."""
118
        return {
119
            "pandas_prompt": self._pandas_prompt,
120
            "response_synthesis_prompt": self._response_synthesis_prompt,
121
        }
122

123
    def _update_prompts(self, prompts: PromptDictType) -> None:
124
        """Update prompts."""
125
        if "pandas_prompt" in prompts:
126
            self._pandas_prompt = prompts["pandas_prompt"]
127
        if "response_synthesis_prompt" in prompts:
128
            self._response_synthesis_prompt = prompts["response_synthesis_prompt"]
129

130
    @classmethod
131
    def from_index(cls, index: PandasIndex, **kwargs: Any) -> "PandasQueryEngine":
132
        logger.warning(
133
            "PandasIndex is deprecated. "
134
            "Directly construct PandasQueryEngine with df instead."
135
        )
136
        return cls(df=index.df, service_context=index.service_context, **kwargs)
137

138
    def _get_table_context(self) -> str:
139
        """Get table context."""
140
        return str(self._df.head(self._head))
141

142
    def _query(self, query_bundle: QueryBundle) -> Response:
143
        """Answer a query."""
144
        context = self._get_table_context()
145

146
        pandas_response_str = self._service_context.llm.predict(
147
            self._pandas_prompt,
148
            df_str=context,
149
            query_str=query_bundle.query_str,
150
            instruction_str=self._instruction_str,
151
        )
152

153
        if self._verbose:
154
            print_text(f"> Pandas Instructions:\n" f"```\n{pandas_response_str}\n```\n")
155
        pandas_output = self._instruction_parser.parse(pandas_response_str)
156
        if self._verbose:
157
            print_text(f"> Pandas Output: {pandas_output}\n")
158

159
        response_metadata = {
160
            "pandas_instruction_str": pandas_response_str,
161
            "raw_pandas_output": pandas_output,
162
        }
163
        if self._synthesize_response:
164
            response_str = str(
165
                self._service_context.llm.predict(
166
                    self._response_synthesis_prompt,
167
                    query_str=query_bundle.query_str,
168
                    pandas_instructions=pandas_response_str,
169
                    pandas_output=pandas_output,
170
                )
171
            )
172
        else:
173
            response_str = str(pandas_output)
174

175
        return Response(response=response_str, metadata=response_metadata)
176

177
    async def _aquery(self, query_bundle: QueryBundle) -> Response:
178
        return self._query(query_bundle)
179

180

181
# legacy
182
NLPandasQueryEngine = PandasQueryEngine
183
GPTNLPandasQueryEngine = PandasQueryEngine
184
llama-index

Использование cookies