rag-demystified

subquestion_generator.py
147 строк · 6.0 Кб
Перенос по словам
1
import json
2
from typing import List
3
from enum import Enum
4

5
from instructor import OpenAISchema
6
from pydantic import Field, create_model
7
from openai_utils import llm_call
8

9

10
# DEFAULT_SUBQUESTION_GENERATOR_PROMPT = """
11
#                  You are an AI agent that takes a complex user question and returns a list of simple subquestions to answer the user's question.
12
#                  You are provided a set of functions and data sources that you can use to answer each subquestion.
13
#                  If the user question is simple, just return the user question, the function, and the data source to use.
14
#                  You can only use the provided functions and data sources.
15
#                  The subquestions should be complete questions that can be answered by a single function and a single data source.
16
#                  """
17

18
# DEFAULT_SUBQUESTION_GENERATOR_PROMPT = """
19
#     You are an AI assistant that specializes in breaking down complex questions into simpler, manageable sub-questions.
20
#     When presented with a complex user question, your role is to generate a list of sub-questions that, when answered, will comprehensively address the original query.
21
#     You have at your disposal a pre-defined set of functions and data sources to utilize in answering each sub-question.
22
#     If a user question is straightforward, your task is to return the original question, identifying the appropriate function and data source to use for its solution.
23
#     Please remember that you are limited to the provided functions and data sources, and that each sub-question should be a full question that can be answered using a single function and a single data source.
24
# """
25

26
DEFAULT_SUBQUESTION_GENERATOR_PROMPT = """
27
    You are an AI assistant that specializes in breaking down complex questions into simpler, manageable sub-questions.
28
    You have at your disposal a pre-defined set of functions and files to utilize in answering each sub-question.
29
    Please remember that your output should only contain the provided function names and file names, and that each sub-question should be a full question that can be answered using a single function and a single file.
30
"""
31

32
DEFAULT_USER_TASK = ""
33

34

35
class FunctionEnum(str, Enum):
36
    """The function to use to answer the questions.
37
    Use vector_retrieval for fact-based questions such as demographics, sports, arts and culture, etc.
38
    Use llm_retrieval for summarization questions, such as positive aspects, history, etc.
39
    """
40

41
    VECTOR_RETRIEVAL = "vector_retrieval"
42
    LLM_RETRIEVAL = "llm_retrieval"
43

44

45
def generate_subquestions(
46
    question,
47
    file_names: List[str] = None,
48
    system_prompt=DEFAULT_SUBQUESTION_GENERATOR_PROMPT,
49
    user_task=DEFAULT_USER_TASK,
50
    llm_model="gpt-4-0613",
51
):
52
    """Generates a list of subquestions from a user question along with the
53
    file name and the function to use to answer the question using OpenAI LLM.
54
    """
55
    FilenameEnum = Enum("FilenameEnum", {x.upper(): x for x in file_names})
56
    FilenameEnum.__doc__ = f"The names of the file to use to answer the corresponding subquestion - e.g. {file_names[0]}"
57

58
    # Create pydantic class dynamically
59
    QuestionBundle = create_model(
60
        "QuestionBundle",
61
        question=(
62
            str,
63
            Field(
64
                None, description="The subquestion extracted from the user's question"
65
            ),
66
        ),
67
        function=(FunctionEnum, Field(None)),
68
        file_name=(FilenameEnum, Field(None)),
69
    )
70

71
    SubQuestionBundleList = create_model(
72
        "SubQuestionBundleList",
73
        subquestion_bundle_list=(
74
            List[QuestionBundle],
75
            Field(
76
                None,
77
                description="A list of subquestions - each item in the list contains a question, a function, and a file name",
78
            ),
79
        ),
80
        __base__=OpenAISchema,
81
    )
82

83
    user_prompt = f"{user_task}\n Here is the user question: {question}"
84

85
    few_shot_examples = [
86
        {
87
            "role": "user",
88
            "content": "Compare the population of Atlanta and Toronto?",
89
        },
90
        {
91
            "role": "function",
92
            "name": "SubQuestionBundleList",
93
            "content": """
94
            {
95
                "subquestion_bundle_list": [
96
                    {
97
                        "question": "What is the population of Atlanta?",
98
                        "function": "vector_retrieval",
99
                        "file_name": "Atlanta"
100
                    },
101
                    {
102
                        "question": "What is the population of Toronto?"
103
                        "function": "vector_retrieval",
104
                        "file_name": "Toronto"
105
                    }
106
                ]
107
            }""",
108
        },
109
        {
110
            "role": "user",
111
            "content": "Summarize the history of Chicago and Houston.",
112
        },
113
        {
114
            "role": "function",
115
            "name": "SubQuestionBundleList",
116
            "content": """
117
            {
118
                "subquestion_bundle_list": [
119
                    {
120
                        "question": "What is the history of Chicago?",
121
                        "function": "llm_retrieval",
122
                        "file_name": "Chicago"
123
                    },
124
                    {
125
                        "question": "What is the history of Houston?",
126
                        "function": "llm_retrieval",
127
                        "file_name": "Houston"
128
                    }
129
                ]
130
            }""",
131
        },
132
    ]
133

134
    response, cost = llm_call(
135
        model=llm_model,
136
        function_schema=[SubQuestionBundleList.openai_schema],
137
        output_schema={"name": SubQuestionBundleList.openai_schema["name"]},
138
        system_prompt=system_prompt,
139
        user_prompt=user_prompt,
140
        few_shot_examples=few_shot_examples,
141
    )
142

143
    subquestions_list = json.loads(response.choices[0].message.function_call.arguments)
144

145
    subquestions_pydantic_obj = SubQuestionBundleList(**subquestions_list)
146
    subquestions_list = subquestions_pydantic_obj.subquestion_bundle_list
147
    return subquestions_list, cost
148
rag-demystified

Использование cookies