llama-index
152 строки · 5.5 Кб
1from typing import Optional, Type, TypeVar
2
3from llama_index.legacy.bridge.pydantic import BaseModel
4from llama_index.legacy.output_parsers.base import OutputParserException
5from llama_index.legacy.output_parsers.utils import parse_json_markdown
6
7
8def convert_to_handlebars(text: str) -> str:
9"""Convert a python format string to handlebars-style template.
10
11In python format string, single braces {} are used for variable substitution,
12and double braces {{}} are used for escaping actual braces (e.g. for JSON dict)
13In handlebars template, double braces {{}} are used for variable substitution,
14and single braces are actual braces (e.g. for JSON dict)
15
16This is currently only used to convert a python format string based prompt template
17to a guidance program template.
18"""
19# Replace double braces with a temporary placeholder
20var_left = "TEMP_BRACE_LEFT"
21var_right = "TEMP_BRACE_RIGHT"
22text = text.replace("{{", var_left)
23text = text.replace("}}", var_right)
24
25# Replace single braces with double braces
26text = text.replace("{", "{{")
27text = text.replace("}", "}}")
28
29# Replace the temporary placeholder with single braces
30text = text.replace(var_left, "{")
31return text.replace(var_right, "}")
32
33
34def wrap_json_markdown(text: str) -> str:
35"""Wrap text in json markdown formatting block."""
36return "```json\n" + text + "\n```"
37
38
39def pydantic_to_guidance_output_template(cls: Type[BaseModel]) -> str:
40"""Convert a pydantic model to guidance output template."""
41return json_schema_to_guidance_output_template(cls.schema(), root=cls.schema())
42
43
44def pydantic_to_guidance_output_template_markdown(cls: Type[BaseModel]) -> str:
45"""Convert a pydantic model to guidance output template wrapped in json markdown."""
46output = json_schema_to_guidance_output_template(cls.schema(), root=cls.schema())
47return wrap_json_markdown(output)
48
49
50def json_schema_to_guidance_output_template(
51schema: dict,
52key: Optional[str] = None,
53indent: int = 0,
54root: Optional[dict] = None,
55use_pattern_control: bool = False,
56) -> str:
57"""Convert a json schema to guidance output template.
58
59Implementation based on https://github.com/microsoft/guidance/\
60blob/main/notebooks/applications/jsonformer.ipynb
61Modified to support nested pydantic models.
62"""
63out = ""
64if "type" not in schema and "$ref" in schema:
65if root is None:
66raise ValueError("Must specify root schema for nested object")
67
68ref = schema["$ref"]
69model = ref.split("/")[-1]
70return json_schema_to_guidance_output_template(
71root["definitions"][model], key, indent, root
72)
73
74if schema["type"] == "object":
75out += " " * indent + "{\n"
76for k, v in schema["properties"].items():
77out += (
78" " * (indent + 1)
79+ f'"{k}"'
80+ ": "
81+ json_schema_to_guidance_output_template(v, k, indent + 1, root)
82+ ",\n"
83)
84out += " " * indent + "}"
85return out
86elif schema["type"] == "array":
87if key is None:
88raise ValueError("Key should not be None")
89if "max_items" in schema:
90extra_args = f" max_iterations={schema['max_items']}"
91else:
92extra_args = ""
93return (
94"[{{#geneach '"
95+ key
96+ "' stop=']'"
97+ extra_args
98+ "}}{{#unless @first}}, {{/unless}}"
99+ json_schema_to_guidance_output_template(schema["items"], "this", 0, root)
100+ "{{/geneach}}]"
101)
102elif schema["type"] == "string":
103if key is None:
104raise ValueError("key should not be None")
105return "\"{{gen '" + key + "' stop='\"'}}\""
106elif schema["type"] in ["integer", "number"]:
107if key is None:
108raise ValueError("key should not be None")
109if use_pattern_control:
110return "{{gen '" + key + "' pattern='[0-9\\.]' stop=','}}"
111else:
112return "\"{{gen '" + key + "' stop='\"'}}\""
113elif schema["type"] == "boolean":
114if key is None:
115raise ValueError("key should not be None")
116return "{{#select '" + key + "'}}True{{or}}False{{/select}}"
117else:
118schema_type = schema["type"]
119raise ValueError(f"Unknown schema type {schema_type}")
120
121
122Model = TypeVar("Model", bound=BaseModel)
123
124
125def parse_pydantic_from_guidance_program(
126response: str, cls: Type[Model], verbose: bool = False
127) -> Model:
128"""Parse output from guidance program.
129
130This is a temporary solution for parsing a pydantic object out of an executed
131guidance program.
132
133NOTE: right now we assume the output is the last markdown formatted json block
134
135NOTE: a better way is to extract via Program.variables, but guidance does not
136support extracting nested objects right now.
137So we call back to manually parsing the final text after program execution
138"""
139try:
140output = response.split("```json")[-1]
141output = "```json" + output
142if verbose:
143print("Raw output:")
144print(output)
145json_dict = parse_json_markdown(output)
146sub_questions = cls.parse_obj(json_dict)
147except Exception as e:
148raise OutputParserException(
149"Failed to parse pydantic object from guidance program"
150". Probably the LLM failed to produce data with right json schema"
151) from e
152return sub_questions
153