llama-index

guidance_utils.py
152 строки · 5.5 Кб
Перенос по словам
1
from typing import Optional, Type, TypeVar
2

3
from llama_index.legacy.bridge.pydantic import BaseModel
4
from llama_index.legacy.output_parsers.base import OutputParserException
5
from llama_index.legacy.output_parsers.utils import parse_json_markdown
6

7

8
def convert_to_handlebars(text: str) -> str:
9
    """Convert a python format string to handlebars-style template.
10

11
    In python format string, single braces {} are used for variable substitution,
12
        and double braces {{}} are used for escaping actual braces (e.g. for JSON dict)
13
    In handlebars template, double braces {{}} are used for variable substitution,
14
        and single braces are actual braces (e.g. for JSON dict)
15

16
    This is currently only used to convert a python format string based prompt template
17
    to a guidance program template.
18
    """
19
    # Replace double braces with a temporary placeholder
20
    var_left = "TEMP_BRACE_LEFT"
21
    var_right = "TEMP_BRACE_RIGHT"
22
    text = text.replace("{{", var_left)
23
    text = text.replace("}}", var_right)
24

25
    # Replace single braces with double braces
26
    text = text.replace("{", "{{")
27
    text = text.replace("}", "}}")
28

29
    # Replace the temporary placeholder with single braces
30
    text = text.replace(var_left, "{")
31
    return text.replace(var_right, "}")
32

33

34
def wrap_json_markdown(text: str) -> str:
35
    """Wrap text in json markdown formatting block."""
36
    return "```json\n" + text + "\n```"
37

38

39
def pydantic_to_guidance_output_template(cls: Type[BaseModel]) -> str:
40
    """Convert a pydantic model to guidance output template."""
41
    return json_schema_to_guidance_output_template(cls.schema(), root=cls.schema())
42

43

44
def pydantic_to_guidance_output_template_markdown(cls: Type[BaseModel]) -> str:
45
    """Convert a pydantic model to guidance output template wrapped in json markdown."""
46
    output = json_schema_to_guidance_output_template(cls.schema(), root=cls.schema())
47
    return wrap_json_markdown(output)
48

49

50
def json_schema_to_guidance_output_template(
51
    schema: dict,
52
    key: Optional[str] = None,
53
    indent: int = 0,
54
    root: Optional[dict] = None,
55
    use_pattern_control: bool = False,
56
) -> str:
57
    """Convert a json schema to guidance output template.
58

59
    Implementation based on https://github.com/microsoft/guidance/\
60
        blob/main/notebooks/applications/jsonformer.ipynb
61
    Modified to support nested pydantic models.
62
    """
63
    out = ""
64
    if "type" not in schema and "$ref" in schema:
65
        if root is None:
66
            raise ValueError("Must specify root schema for nested object")
67

68
        ref = schema["$ref"]
69
        model = ref.split("/")[-1]
70
        return json_schema_to_guidance_output_template(
71
            root["definitions"][model], key, indent, root
72
        )
73

74
    if schema["type"] == "object":
75
        out += "  " * indent + "{\n"
76
        for k, v in schema["properties"].items():
77
            out += (
78
                "  " * (indent + 1)
79
                + f'"{k}"'
80
                + ": "
81
                + json_schema_to_guidance_output_template(v, k, indent + 1, root)
82
                + ",\n"
83
            )
84
        out += "  " * indent + "}"
85
        return out
86
    elif schema["type"] == "array":
87
        if key is None:
88
            raise ValueError("Key should not be None")
89
        if "max_items" in schema:
90
            extra_args = f" max_iterations={schema['max_items']}"
91
        else:
92
            extra_args = ""
93
        return (
94
            "[{{#geneach '"
95
            + key
96
            + "' stop=']'"
97
            + extra_args
98
            + "}}{{#unless @first}}, {{/unless}}"
99
            + json_schema_to_guidance_output_template(schema["items"], "this", 0, root)
100
            + "{{/geneach}}]"
101
        )
102
    elif schema["type"] == "string":
103
        if key is None:
104
            raise ValueError("key should not be None")
105
        return "\"{{gen '" + key + "' stop='\"'}}\""
106
    elif schema["type"] in ["integer", "number"]:
107
        if key is None:
108
            raise ValueError("key should not be None")
109
        if use_pattern_control:
110
            return "{{gen '" + key + "' pattern='[0-9\\.]' stop=','}}"
111
        else:
112
            return "\"{{gen '" + key + "' stop='\"'}}\""
113
    elif schema["type"] == "boolean":
114
        if key is None:
115
            raise ValueError("key should not be None")
116
        return "{{#select '" + key + "'}}True{{or}}False{{/select}}"
117
    else:
118
        schema_type = schema["type"]
119
        raise ValueError(f"Unknown schema type {schema_type}")
120

121

122
Model = TypeVar("Model", bound=BaseModel)
123

124

125
def parse_pydantic_from_guidance_program(
126
    response: str, cls: Type[Model], verbose: bool = False
127
) -> Model:
128
    """Parse output from guidance program.
129

130
    This is a temporary solution for parsing a pydantic object out of an executed
131
    guidance program.
132

133
    NOTE: right now we assume the output is the last markdown formatted json block
134

135
    NOTE: a better way is to extract via Program.variables, but guidance does not
136
          support extracting nested objects right now.
137
          So we call back to manually parsing the final text after program execution
138
    """
139
    try:
140
        output = response.split("```json")[-1]
141
        output = "```json" + output
142
        if verbose:
143
            print("Raw output:")
144
            print(output)
145
        json_dict = parse_json_markdown(output)
146
        sub_questions = cls.parse_obj(json_dict)
147
    except Exception as e:
148
        raise OutputParserException(
149
            "Failed to parse pydantic object from guidance program"
150
            ". Probably the LLM failed to produce data with right json schema"
151
        ) from e
152
    return sub_questions
153
llama-index

Использование cookies