GPTCache

pre.py
385 строк · 11.8 Кб
Перенос по словам
1
import re
2
import string
3
from typing import Dict, Any
4

5

6
def last_content(data: Dict[str, Any], **_: Dict[str, Any]) -> Any:
7
    """get the last content of the message list
8

9
    :param data: the user llm request data
10
    :type data: Dict[str, Any]
11

12
    Example:
13
        .. code-block:: python
14

15
            from gptcache.processor.pre import last_content
16

17
            content = last_content({"messages": [{"content": "foo1"}, {"content": "foo2"}]})
18
            # content = "foo2"
19
    """
20
    return data.get("messages")[-1]["content"]
21

22

23
def last_content_without_prompt(data: Dict[str, Any], **params: Dict[str, Any]) -> Any:
24
    """get the last content of the message list without prompts content
25

26
    :param data: the user llm request data
27
    :type data: Dict[str, Any]
28
    :param params: the special gptcache params, like prompts param in the cache object
29
    :type params: Dict[str, Any]
30

31
    Example:
32
        .. code-block:: python
33

34
            from gptcache.processor.pre import last_content_without_prompt
35

36
            content = last_content_without_prompt(
37
                    {"messages": [{"content": "foo1"}, {"content": "foo2"}]}, prompts=["foo"]
38
                )
39
            # content = "2"
40
    """
41

42
    last_content_str = data.get("messages")[-1]["content"]
43
    prompts = params.get("prompts", [])
44
    if prompts is None:
45
        return last_content_str
46
    pattern = "|".join(prompts)
47
    new_content_str = re.sub(pattern, "", last_content_str)
48
    return new_content_str
49

50

51
def _get_pattern_value(pattern_str: str, value_str: str):
52
    literal_text_arr = []
53
    field_name_arr = []
54
    for literal_text, field_name, _, _ in string.Formatter().parse(pattern_str):
55
        literal_text_arr.append(literal_text)
56
        if field_name is not None:
57
            field_name_arr.append(
58
                field_name if field_name else str(len(field_name_arr))
59
            )
60

61
    pattern_values = {}
62
    last_end = 0
63
    for i, literal_text in enumerate(literal_text_arr):
64
        start = value_str.find(literal_text, last_end)
65
        if i == len(literal_text_arr) - 1:
66
            end = len(value_str)
67
        else:
68
            end = value_str.find(literal_text_arr[i + 1], start + 1)
69
        if start == -1 or end == -1:
70
            break
71
        start += len(literal_text)
72
        pattern_values[field_name_arr[i]] = value_str[start:end]
73
        last_end = end
74
    return pattern_values
75

76

77
def last_content_without_template(data: Dict[str, Any], **params: Dict[str, Any]) -> Any:
78
    """get the last content's template values of the message list without template content.
79

80
    When considering a cache agent or chain, the majority of the content consists of template content,
81
    while the essential information is simply a list of parameters within the template.
82
    In this way, the cache key is composed of a string made up of all the parameter values in the list.
83

84
    WARNING: Two parameters without intervals cannot appear in the template,
85
    for example: template = "{foo}{hoo}" is not supported,
86
    but template = "{foo}:{hoo}" is supported
87

88
    :param data: the user llm request data
89
    :type data: Dict[str, Any]
90

91
    :Example with str template:
92
        .. code-block:: python
93

94
            from gptcache import Config
95
            from gptcache.processor.pre import last_content_without_template
96

97
            template_obj = "tell me a joke about {subject}"
98
            prompt = template_obj.format(subject="animal")
99
            value = last_content_without_template(
100
                data={"messages": [{"content": prompt}]}, cache_config=Config(template=template_obj)
101
            )
102
            print(value)
103
            # ['animal']
104

105
    :Example with langchain template:
106
        .. code-block:: python
107

108
            from langchain import PromptTemplate
109

110
            from gptcache import Config
111
            from gptcache.processor.pre import last_content_without_template
112

113
            template_obj = PromptTemplate.from_template("tell me a joke about {subject}")
114
            prompt = template_obj.format(subject="animal")
115

116
            value = last_content_without_template(
117
                data={"messages": [{"content": prompt}]},
118
                cache_config=Config(template=template_obj.template),
119
            )
120
            print(value)
121
            # ['animal']
122

123
    NOTE: At present, only the simple PromptTemplate in langchain is supported.
124
    For ChatPromptTemplate, it needs to be adjusted according to the template array.
125
    If you need to use it, you need to pass in the final dialog template yourself.
126
    The reason why it cannot be advanced is that ChatPromptTemplate
127
    does not provide a method to directly return the template string.
128
    """
129
    last_content_str = data.get("messages")[-1]["content"]
130
    cache_config = params.get("cache_config", None)
131
    if not (cache_config and cache_config.template):
132
        return last_content_str
133

134
    pattern_value = _get_pattern_value(cache_config.template, last_content_str)
135
    return str(list(pattern_value.values()))
136

137

138
def all_content(data: Dict[str, Any], **_: Dict[str, Any]) -> Any:
139
    """get all content of the message list
140

141
    :param data: the user llm request data
142
    :type data: Dict[str, Any]
143

144
    :Example:
145
        .. code-block:: python
146

147
            from gptcache.processor.pre import all_content
148

149
            content = all_content(
150
                {"messages": [{"content": "foo1"}, {"content": "foo2"}]}
151
            )
152
            # content = "foo1\\nfoo2"
153
    """
154
    s = ""
155
    messages = data.get("messages")
156
    for i, message in enumerate(messages):
157
        if i == len(messages) - 1:
158
            s += message["content"]
159
        else:
160
            s += message["content"] + "\n"
161
    return s
162

163

164
def nop(data: Dict[str, Any], **_: Dict[str, Any]) -> Any:
165
    """do nothing of the llm request params
166

167
    :param data: the user llm request data
168
    :type data: Dict[str, Any]
169

170
    Example:
171
        .. code-block:: python
172

173
            from gptcache.processor.pre import nop
174

175
            content = nop({"str": "hello"})
176
            # {"str": "hello"}
177
    """
178
    return data
179

180

181
def get_prompt(data: Dict[str, Any], **_: Dict[str, Any]) -> Any:
182
    """get the prompt of the llm request params
183

184
    :param data: the user llm request data
185
    :type data: Dict[str, Any]
186

187
    Example:
188
        .. code-block:: python
189

190
            from gptcache.processor.pre import get_prompt
191

192
            content = get_prompt({"prompt": "foo"})
193
            # "foo"
194
    """
195
    return data.get("prompt")
196

197

198
def get_file_name(data: Dict[str, Any], **_: Dict[str, Any]) -> str:
199
    """get the file name of the llm request params
200

201
    :param data: the user llm request data
202
    :type data: Dict[str, Any]
203

204
    Example:
205
        .. code-block:: python
206

207
            from gptcache.processor.pre import get_file_name
208

209
            file = open("test.txt", "a")
210
            content = get_file_name({"file": file})
211
            # "test.txt"
212
    """
213
    return data.get("file").name
214

215

216
def get_file_bytes(data: Dict[str, Any], **_: Dict[str, Any]) -> bytes:
217
    """get the file bytes of the llm request params
218

219
    :param data: the user llm request data
220
    :type data: Dict[str, Any]
221

222
    Example:
223
        .. code-block:: python
224

225
            from gptcache.processor.pre import get_file_bytes
226

227
            content = get_file_bytes({"file": open("test.txt", "rb")})
228
    """
229
    return data.get("file").peek()
230

231

232
def get_input_str(data: Dict[str, Any], **_: Dict[str, Any]) -> str:
233
    """get the image and question str of the llm request params
234

235
    :param data: the user llm request data
236
    :type data: Dict[str, Any]
237

238
    Example:
239
        .. code-block:: python
240

241
            from gptcache.processor.pre import get_input_str
242

243
            content = get_input_str({"input": {"image": open("test.png", "rb"), "question": "foo"}})
244
    """
245
    input_data = data.get("input")
246
    return str(input_data["image"].peek()) + input_data["question"]
247

248

249
def get_input_image_file_name(data: Dict[str, Any], **_: Dict[str, Any]) -> str:
250
    """get the image file name of the llm request params
251

252
    :param data: the user llm request data
253
    :type data: Dict[str, Any]
254

255
    Example:
256
        .. code-block:: python
257

258
            from gptcache.processor.pre import get_input_image_file_name
259

260
            content = get_input_image_file_name({"input": {"image": open("test.png", "rb")}})
261
            # "test.png"
262
    """
263
    input_data = data.get("input")
264
    return input_data["image"].name
265

266

267
def get_image_question(data: Dict[str, Any], **_: Dict[str, Any]) -> str:  # pragma: no cover
268
    """get the image and question str of the llm request params
269

270
    :param data: the user llm request data
271
    :type data: Dict[str, Any]
272

273
    Example:
274
        .. code-block:: python
275

276
            from gptcache.processor.pre import get_image_question
277

278
            content = get_image_question({"image": open("test.png", "rb"), "question": "foo"})
279
    """
280
    img = data.get("image")
281
    data_img = str(open(img, "rb").peek()) if isinstance(img, str) else str(img)  # pylint: disable=consider-using-with
282
    return data_img + data.get("question")
283

284

285
def get_image(data: Dict[str, Any], **_: Dict[str, Any]) -> str:  # pragma: no cover
286
    """get the image of the llm request params
287

288
    :param data: the user llm request data
289
    :type data: Dict[str, Any]
290

291
    Example:
292
        .. code-block:: python
293

294
            from gptcache.processor.pre import get_image
295

296
            content = get_image({"image": open("test.png", "rb")})
297
            # "test.png"
298
    """
299
    return data.get("image")
300

301

302
def get_inputs(data: Dict[str, Any], **_: Dict[str, Any]):
303
    """get the inputs of the llm request params
304

305
    :param data: the user llm request data
306
    :type data: Dict[str, Any]
307

308
    Example:
309
        .. code-block:: python
310

311
            from gptcache.processor.pre import get_inputs
312

313
            content = get_inputs({"inputs": "hello"})
314
            # "hello"
315
    """
316
    return data.get("inputs")
317

318

319
def get_messages_last_content(data: Dict[str, Any], **_: Any) -> str:
320
    """ get the last content of the llm request messages array
321

322
    :param data: the user llm request data
323
    :type data: Dict[str, Any]
324

325
    Example:
326
        .. code-block:: python
327

328
            from gptcache.processor.pre import get_messages_last_content
329

330
            content = get_messages_last_content({"messages": [{"content": "hello"}, {"content": "world"}]})
331
            # "world"
332
    """
333
    return data.get("messages")[-1].content
334

335

336
def get_openai_moderation_input(data: Dict[str, Any], **_: Dict[str, Any]) -> str:
337
    """get the input param of the openai moderation request params
338

339
    :param data: the user openai moderation request data
340
    :type data: Dict[str, Any]
341

342
    Example:
343
        .. code-block:: python
344

345
            from gptcache.processor.pre import get_openai_moderation_input
346

347
            content = get_openai_moderation_input({"input": ["hello", "world"]})
348
            # "['hello', 'world']"
349
    """
350

351
    return str(data.get("input"))
352

353

354
def concat_all_queries(data: Dict[str, Any], **params: Dict[str, Any]) -> Any:
355
    """
356

357
    :param data: the user llm request data
358
    :type data: Dict[str, Any]
359

360
    Example:
361
        .. code-block:: python
362

363
            from gptcache.processor.pre import concat_all_queries
364

365
            content = concat_all_queries({"messages": [{"role": "system", "content": "hello"},
366
                {"role": "user", "content": "world"},
367
                {"role": "assistant", "content": "alice"}]})
368

369
    """
370
    cache_config = params.get("cache_config", None)
371
    skip_list = cache_config.skip_list
372
    context_len = cache_config.context_len
373
    context_len = context_len * 2
374
    s = ""
375
    messages = data.get("messages")
376
    length = min(context_len, len(messages))
377
    messages = messages[len(messages) - length:]
378
    for i, message in enumerate(messages):
379
        if message["role"] in skip_list:
380
            continue
381
        if i == len(messages) - 1:
382
            s += f'{message["role"].upper()}: {message["content"]}'
383
        else:
384
            s += f'{message["role"].upper()}: {message["content"]}\n'
385
    return s
386
GPTCache

Использование cookies