llama-index

Форк
0
86 строк · 2.9 Кб
1
"""Pandas output parser."""
2

3
import logging
4
from typing import Any, Dict, Optional
5

6
import numpy as np
7
import pandas as pd
8

9
from llama_index.legacy.exec_utils import safe_eval, safe_exec
10
from llama_index.legacy.output_parsers.base import ChainableOutputParser
11
from llama_index.legacy.output_parsers.utils import parse_code_markdown
12

13
logger = logging.getLogger(__name__)
14

15

16
def default_output_processor(
17
    output: str, df: pd.DataFrame, **output_kwargs: Any
18
) -> str:
19
    """Process outputs in a default manner."""
20
    import ast
21
    import sys
22
    import traceback
23

24
    if sys.version_info < (3, 9):
25
        logger.warning(
26
            "Python version must be >= 3.9 in order to use "
27
            "the default output processor, which executes "
28
            "the Python query. Instead, we will return the "
29
            "raw Python instructions as a string."
30
        )
31
        return output
32

33
    local_vars = {"df": df}
34

35
    output = parse_code_markdown(output, only_last=True)[0]
36

37
    # NOTE: inspired from langchain's tool
38
    # see langchain.tools.python.tool (PythonAstREPLTool)
39
    try:
40
        tree = ast.parse(output)
41
        module = ast.Module(tree.body[:-1], type_ignores=[])
42
        safe_exec(ast.unparse(module), {}, local_vars)  # type: ignore
43
        module_end = ast.Module(tree.body[-1:], type_ignores=[])
44
        module_end_str = ast.unparse(module_end)  # type: ignore
45
        if module_end_str.strip("'\"") != module_end_str:
46
            # if there's leading/trailing quotes, then we need to eval
47
            # string to get the actual expression
48
            module_end_str = safe_eval(module_end_str, {"np": np}, local_vars)
49
        try:
50
            # str(pd.dataframe) will truncate output by display.max_colwidth
51
            # set width temporarily to extract more text
52
            if "max_colwidth" in output_kwargs:
53
                pd.set_option("display.max_colwidth", output_kwargs["max_colwidth"])
54
            output_str = str(safe_eval(module_end_str, {"np": np}, local_vars))
55
            pd.reset_option("display.max_colwidth")
56
            return output_str
57

58
        except Exception:
59
            raise
60
    except Exception as e:
61
        err_string = (
62
            "There was an error running the output as Python code. "
63
            f"Error message: {e}"
64
        )
65
        traceback.print_exc()
66
        return err_string
67

68

69
class PandasInstructionParser(ChainableOutputParser):
70
    """Pandas instruction parser.
71

72
    This 'output parser' takes in pandas instructions (in Python code) and
73
    executes them to return an output.
74

75
    """
76

77
    def __init__(
78
        self, df: pd.DataFrame, output_kwargs: Optional[Dict[str, Any]] = None
79
    ) -> None:
80
        """Initialize params."""
81
        self.df = df
82
        self.output_kwargs = output_kwargs or {}
83

84
    def parse(self, output: str) -> Any:
85
        """Parse, validate, and correct errors programmatically."""
86
        return default_output_processor(output, self.df, **self.output_kwargs)
87

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.