llama-index

output_parser.py
112 строк · 3.7 Кб
Перенос по словам
1
"""ReAct output parser."""
2

3
import re
4
from typing import Tuple
5

6
from llama_index.legacy.agent.react.types import (
7
    ActionReasoningStep,
8
    BaseReasoningStep,
9
    ResponseReasoningStep,
10
)
11
from llama_index.legacy.output_parsers.utils import extract_json_str
12
from llama_index.legacy.types import BaseOutputParser
13

14

15
def extract_tool_use(input_text: str) -> Tuple[str, str, str]:
16
    pattern = (
17
        r"\s*Thought: (.*?)\nAction: ([a-zA-Z0-9_]+).*?\nAction Input: .*?(\{.*\})"
18
    )
19

20
    match = re.search(pattern, input_text, re.DOTALL)
21
    if not match:
22
        raise ValueError(f"Could not extract tool use from input text: {input_text}")
23

24
    thought = match.group(1).strip()
25
    action = match.group(2).strip()
26
    action_input = match.group(3).strip()
27
    return thought, action, action_input
28

29

30
def action_input_parser(json_str: str) -> dict:
31
    processed_string = re.sub(r"(?<!\w)\'|\'(?!\w)", '"', json_str)
32
    pattern = r'"(\w+)":\s*"([^"]*)"'
33
    matches = re.findall(pattern, processed_string)
34
    return dict(matches)
35

36

37
def extract_final_response(input_text: str) -> Tuple[str, str]:
38
    pattern = r"\s*Thought:(.*?)Answer:(.*?)(?:$)"
39

40
    match = re.search(pattern, input_text, re.DOTALL)
41
    if not match:
42
        raise ValueError(
43
            f"Could not extract final answer from input text: {input_text}"
44
        )
45

46
    thought = match.group(1).strip()
47
    answer = match.group(2).strip()
48
    return thought, answer
49

50

51
def parse_action_reasoning_step(output: str) -> ActionReasoningStep:
52
    """
53
    Parse an action reasoning step from the LLM output.
54
    """
55
    # Weaker LLMs may generate ReActAgent steps whose Action Input are horrible JSON strings.
56
    # `dirtyjson` is more lenient than `json` in parsing JSON strings.
57
    import dirtyjson as json
58

59
    thought, action, action_input = extract_tool_use(output)
60
    json_str = extract_json_str(action_input)
61
    # First we try json, if this fails we use ast
62
    try:
63
        action_input_dict = json.loads(json_str)
64
    except Exception:
65
        action_input_dict = action_input_parser(json_str)
66
    return ActionReasoningStep(
67
        thought=thought, action=action, action_input=action_input_dict
68
    )
69

70

71
class ReActOutputParser(BaseOutputParser):
72
    """ReAct Output parser."""
73

74
    def parse(self, output: str, is_streaming: bool = False) -> BaseReasoningStep:
75
        """Parse output from ReAct agent.
76

77
        We expect the output to be in one of the following formats:
78
        1. If the agent need to use a tool to answer the question:
79
            ```
80
            Thought: <thought>
81
            Action: <action>
82
            Action Input: <action_input>
83
            ```
84
        2. If the agent can answer the question without any tools:
85
            ```
86
            Thought: <thought>
87
            Answer: <answer>
88
            ```
89
        """
90
        if "Thought:" not in output:
91
            # NOTE: handle the case where the agent directly outputs the answer
92
            # instead of following the thought-answer format
93
            return ResponseReasoningStep(
94
                thought="(Implicit) I can answer without any more tools!",
95
                response=output,
96
                is_streaming=is_streaming,
97
            )
98

99
        if "Answer:" in output:
100
            thought, answer = extract_final_response(output)
101
            return ResponseReasoningStep(
102
                thought=thought, response=answer, is_streaming=is_streaming
103
            )
104

105
        if "Action:" in output:
106
            return parse_action_reasoning_step(output)
107

108
        raise ValueError(f"Could not parse output: {output}")
109

110
    def format(self, output: str) -> str:
111
        """Format a query with structured output formatting instructions."""
112
        raise NotImplementedError
113
llama-index

Использование cookies