rag-demystified
/
subquestion_generator.py
147 строк · 6.0 Кб
1import json2from typing import List3from enum import Enum4
5from instructor import OpenAISchema6from pydantic import Field, create_model7from openai_utils import llm_call8
9
10# DEFAULT_SUBQUESTION_GENERATOR_PROMPT = """
11# You are an AI agent that takes a complex user question and returns a list of simple subquestions to answer the user's question.
12# You are provided a set of functions and data sources that you can use to answer each subquestion.
13# If the user question is simple, just return the user question, the function, and the data source to use.
14# You can only use the provided functions and data sources.
15# The subquestions should be complete questions that can be answered by a single function and a single data source.
16# """
17
18# DEFAULT_SUBQUESTION_GENERATOR_PROMPT = """
19# You are an AI assistant that specializes in breaking down complex questions into simpler, manageable sub-questions.
20# When presented with a complex user question, your role is to generate a list of sub-questions that, when answered, will comprehensively address the original query.
21# You have at your disposal a pre-defined set of functions and data sources to utilize in answering each sub-question.
22# If a user question is straightforward, your task is to return the original question, identifying the appropriate function and data source to use for its solution.
23# Please remember that you are limited to the provided functions and data sources, and that each sub-question should be a full question that can be answered using a single function and a single data source.
24# """
25
26DEFAULT_SUBQUESTION_GENERATOR_PROMPT = """27You are an AI assistant that specializes in breaking down complex questions into simpler, manageable sub-questions.
28You have at your disposal a pre-defined set of functions and files to utilize in answering each sub-question.
29Please remember that your output should only contain the provided function names and file names, and that each sub-question should be a full question that can be answered using a single function and a single file.
30"""
31
32DEFAULT_USER_TASK = ""33
34
35class FunctionEnum(str, Enum):36"""The function to use to answer the questions.37Use vector_retrieval for fact-based questions such as demographics, sports, arts and culture, etc.
38Use llm_retrieval for summarization questions, such as positive aspects, history, etc.
39"""
40
41VECTOR_RETRIEVAL = "vector_retrieval"42LLM_RETRIEVAL = "llm_retrieval"43
44
45def generate_subquestions(46question,47file_names: List[str] = None,48system_prompt=DEFAULT_SUBQUESTION_GENERATOR_PROMPT,49user_task=DEFAULT_USER_TASK,50llm_model="gpt-4-0613",51):52"""Generates a list of subquestions from a user question along with the53file name and the function to use to answer the question using OpenAI LLM.
54"""
55FilenameEnum = Enum("FilenameEnum", {x.upper(): x for x in file_names})56FilenameEnum.__doc__ = f"The names of the file to use to answer the corresponding subquestion - e.g. {file_names[0]}"57
58# Create pydantic class dynamically59QuestionBundle = create_model(60"QuestionBundle",61question=(62str,63Field(64None, description="The subquestion extracted from the user's question"65),66),67function=(FunctionEnum, Field(None)),68file_name=(FilenameEnum, Field(None)),69)70
71SubQuestionBundleList = create_model(72"SubQuestionBundleList",73subquestion_bundle_list=(74List[QuestionBundle],75Field(76None,77description="A list of subquestions - each item in the list contains a question, a function, and a file name",78),79),80__base__=OpenAISchema,81)82
83user_prompt = f"{user_task}\n Here is the user question: {question}"84
85few_shot_examples = [86{87"role": "user",88"content": "Compare the population of Atlanta and Toronto?",89},90{91"role": "function",92"name": "SubQuestionBundleList",93"content": """94{
95"subquestion_bundle_list": [
96{
97"question": "What is the population of Atlanta?",
98"function": "vector_retrieval",
99"file_name": "Atlanta"
100},
101{
102"question": "What is the population of Toronto?"
103"function": "vector_retrieval",
104"file_name": "Toronto"
105}
106]
107}""",108},109{110"role": "user",111"content": "Summarize the history of Chicago and Houston.",112},113{114"role": "function",115"name": "SubQuestionBundleList",116"content": """117{
118"subquestion_bundle_list": [
119{
120"question": "What is the history of Chicago?",
121"function": "llm_retrieval",
122"file_name": "Chicago"
123},
124{
125"question": "What is the history of Houston?",
126"function": "llm_retrieval",
127"file_name": "Houston"
128}
129]
130}""",131},132]133
134response, cost = llm_call(135model=llm_model,136function_schema=[SubQuestionBundleList.openai_schema],137output_schema={"name": SubQuestionBundleList.openai_schema["name"]},138system_prompt=system_prompt,139user_prompt=user_prompt,140few_shot_examples=few_shot_examples,141)142
143subquestions_list = json.loads(response.choices[0].message.function_call.arguments)144
145subquestions_pydantic_obj = SubQuestionBundleList(**subquestions_list)146subquestions_list = subquestions_pydantic_obj.subquestion_bundle_list147return subquestions_list, cost148