openai-cookbook

answers_with_ft.py
159 строк · 5.3 Кб
Перенос по словам
1
"""
2
TODO: This example is deprecated.
3
Note: To answer questions based on text documents, we recommend the procedure in 
4
[Question Answering using Embeddings](https://github.com/openai/openai-cookbook/blob/main/examples/Question_answering_using_embeddings.ipynb).
5
Some of the code below may rely on [deprecated API endpoints](https://github.com/openai/openai-cookbook/tree/main/transition_guides_for_deprecated_API_endpoints).
6
"""
7

8
import argparse
9

10
from openai import OpenAI
11
import os
12

13
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "<your OpenAI API key if not set as env var>"))
14

15

16
def create_context(
17
    question, search_file_id, max_len=1800, search_model="ada", max_rerank=10
18
):
19
    """
20
    Create a context for a question by finding the most similar context from the search file.
21
    :param question: The question
22
    :param search_file_id: The file id of the search file
23
    :param max_len: The maximum length of the returned context (in tokens)
24
    :param search_model: The search model to use
25
    :param max_rerank: The maximum number of reranking
26
    :return: The context
27
    """
28
    # TODO: openai.Engine(search_model) is deprecated
29
    results = client.Engine(search_model).search(
30
        search_model=search_model,
31
        query=question,
32
        max_rerank=max_rerank,
33
        file=search_file_id,
34
        return_metadata=True,
35
    )
36
    returns = []
37
    cur_len = 0
38
    for result in results["data"]:
39
        cur_len += int(result["metadata"]) + 4
40
        if cur_len > max_len:
41
            break
42
        returns.append(result["text"])
43
    return "\n\n###\n\n".join(returns)
44

45

46
def answer_question(
47
    search_file_id="<SEARCH_FILE_ID>",
48
    fine_tuned_qa_model="<FT_QA_MODEL_ID>",
49
    question="Which country won the European Football championship in 2021?",
50
    max_len=1800,
51
    search_model="ada",
52
    max_rerank=10,
53
    debug=False,
54
    stop_sequence=["\n", "."],
55
    max_tokens=100,
56
):
57
    """
58
    Answer a question based on the most similar context from the search file, using your fine-tuned model.
59
    :param question: The question
60
    :param fine_tuned_qa_model: The fine tuned QA model
61
    :param search_file_id: The file id of the search file
62
    :param max_len: The maximum length of the returned context (in tokens)
63
    :param search_model: The search model to use
64
    :param max_rerank: The maximum number of reranking
65
    :param debug: Whether to output debug information
66
    :param stop_sequence: The stop sequence for Q&A model
67
    :param max_tokens: The maximum number of tokens to return
68
    :return: The answer
69
    """
70
    context = create_context(
71
        question,
72
        search_file_id,
73
        max_len=max_len,
74
        search_model=search_model,
75
        max_rerank=max_rerank,
76
    )
77
    if debug:
78
        print("Context:\n" + context)
79
        print("\n\n")
80
    try:
81
        # fine-tuned models requires model parameter, whereas other models require engine parameter
82
        model_param = (
83
            {"model": fine_tuned_qa_model}
84
            if ":" in fine_tuned_qa_model
85
            and fine_tuned_qa_model.split(":")[1].startswith("ft")
86
            else {"engine": fine_tuned_qa_model}
87
        )
88
        response = client.chat.completions.create(prompt=f"Answer the question based on the context below\n\nText: {context}\n\n---\n\nQuestion: {question}\nAnswer:",
89
        temperature=0,
90
        max_tokens=max_tokens,
91
        top_p=1,
92
        frequency_penalty=0,
93
        presence_penalty=0,
94
        stop=stop_sequence,
95
        **model_param)
96
        return response["choices"][0]["text"]
97
    except Exception as e:
98
        print(e)
99
        return ""
100

101

102
if __name__ == "__main__":
103
    parser = argparse.ArgumentParser(
104
        description="Rudimentary functionality of the answers endpoint with a fine-tuned Q&A model.",
105
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
106
    )
107
    parser.add_argument(
108
        "--search_file_id", help="Search file id", required=True, type=str
109
    )
110
    parser.add_argument(
111
        "--fine_tuned_qa_model", help="Fine-tuned QA model id", required=True, type=str
112
    )
113
    parser.add_argument(
114
        "--question", help="Question to answer", required=True, type=str
115
    )
116
    parser.add_argument(
117
        "--max_len",
118
        help="Maximum length of the returned context (in tokens)",
119
        default=1800,
120
        type=int,
121
    )
122
    parser.add_argument(
123
        "--search_model", help="Search model to use", default="ada", type=str
124
    )
125
    parser.add_argument(
126
        "--max_rerank",
127
        help="Maximum number of reranking for the search",
128
        default=10,
129
        type=int,
130
    )
131
    parser.add_argument(
132
        "--debug", help="Print debug information (context used)", action="store_true"
133
    )
134
    parser.add_argument(
135
        "--stop_sequence",
136
        help="Stop sequences for the Q&A model",
137
        default=["\n", "."],
138
        nargs="+",
139
        type=str,
140
    )
141
    parser.add_argument(
142
        "--max_tokens",
143
        help="Maximum number of tokens to return",
144
        default=100,
145
        type=int,
146
    )
147
    args = parser.parse_args()
148
    response = answer_question(
149
        search_file_id=args.search_file_id,
150
        fine_tuned_qa_model=args.fine_tuned_qa_model,
151
        question=args.question,
152
        max_len=args.max_len,
153
        search_model=args.search_model,
154
        max_rerank=args.max_rerank,
155
        debug=args.debug,
156
        stop_sequence=args.stop_sequence,
157
        max_tokens=args.max_tokens,
158
    )
159
    print(f"Answer:{response}")
160
openai-cookbook

Использование cookies