MetaGPT

token_counter.py
312 строк · 14.4 Кб
Перенос по словам
1
#!/usr/bin/env python
2
# -*- coding: utf-8 -*-
3
"""
4
@Time    : 2023/5/18 00:40
5
@Author  : alexanderwu
6
@File    : token_counter.py
7
ref1: https://openai.com/pricing
8
ref2: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
9
ref3: https://github.com/Significant-Gravitas/Auto-GPT/blob/master/autogpt/llm/token_counter.py
10
ref4: https://github.com/hwchase17/langchain/blob/master/langchain/chat_models/openai.py
11
ref5: https://ai.google.dev/models/gemini
12
"""
13
import tiktoken
14
from openai.types import CompletionUsage
15
from openai.types.chat import ChatCompletionChunk
16

17
from metagpt.utils.ahttp_client import apost
18

19
TOKEN_COSTS = {
20
    "gpt-3.5-turbo": {"prompt": 0.0015, "completion": 0.002},
21
    "gpt-3.5-turbo-0301": {"prompt": 0.0015, "completion": 0.002},
22
    "gpt-3.5-turbo-0613": {"prompt": 0.0015, "completion": 0.002},
23
    "gpt-3.5-turbo-16k": {"prompt": 0.003, "completion": 0.004},
24
    "gpt-3.5-turbo-16k-0613": {"prompt": 0.003, "completion": 0.004},
25
    "gpt-35-turbo": {"prompt": 0.0015, "completion": 0.002},
26
    "gpt-35-turbo-16k": {"prompt": 0.003, "completion": 0.004},
27
    "gpt-3.5-turbo-1106": {"prompt": 0.001, "completion": 0.002},
28
    "gpt-3.5-turbo-0125": {"prompt": 0.001, "completion": 0.002},
29
    "gpt-4-0314": {"prompt": 0.03, "completion": 0.06},
30
    "gpt-4": {"prompt": 0.03, "completion": 0.06},
31
    "gpt-4-32k": {"prompt": 0.06, "completion": 0.12},
32
    "gpt-4-32k-0314": {"prompt": 0.06, "completion": 0.12},
33
    "gpt-4-0613": {"prompt": 0.06, "completion": 0.12},
34
    "gpt-4-turbo-preview": {"prompt": 0.01, "completion": 0.03},
35
    "gpt-4-1106-preview": {"prompt": 0.01, "completion": 0.03},
36
    "gpt-4-0125-preview": {"prompt": 0.01, "completion": 0.03},
37
    "gpt-4-turbo": {"prompt": 0.01, "completion": 0.03},
38
    "gpt-4-vision-preview": {"prompt": 0.01, "completion": 0.03},  # TODO add extra image price calculator
39
    "gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03},
40
    "text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
41
    "glm-3-turbo": {"prompt": 0.0007, "completion": 0.0007},  # 128k version, prompt + completion tokens=0.005￥/k-tokens
42
    "glm-4": {"prompt": 0.014, "completion": 0.014},  # 128k version, prompt + completion tokens=0.1￥/k-tokens
43
    "gemini-pro": {"prompt": 0.00025, "completion": 0.0005},
44
    "moonshot-v1-8k": {"prompt": 0.012, "completion": 0.012},  # prompt + completion tokens=0.012￥/k-tokens
45
    "moonshot-v1-32k": {"prompt": 0.024, "completion": 0.024},
46
    "moonshot-v1-128k": {"prompt": 0.06, "completion": 0.06},
47
    "open-mistral-7b": {"prompt": 0.00025, "completion": 0.00025},
48
    "open-mixtral-8x7b": {"prompt": 0.0007, "completion": 0.0007},
49
    "mistral-small-latest": {"prompt": 0.002, "completion": 0.006},
50
    "mistral-medium-latest": {"prompt": 0.0027, "completion": 0.0081},
51
    "mistral-large-latest": {"prompt": 0.008, "completion": 0.024},
52
    "claude-instant-1.2": {"prompt": 0.0008, "completion": 0.0024},
53
    "claude-2.0": {"prompt": 0.008, "completion": 0.024},
54
    "claude-2.1": {"prompt": 0.008, "completion": 0.024},
55
    "claude-3-sonnet-20240229": {"prompt": 0.003, "completion": 0.015},
56
    "claude-3-opus-20240229": {"prompt": 0.015, "completion": 0.075},
57
    "yi-34b-chat-0205": {"prompt": 0.0003, "completion": 0.0003},
58
    "yi-34b-chat-200k": {"prompt": 0.0017, "completion": 0.0017},
59
    "microsoft/wizardlm-2-8x22b": {"prompt": 0.00108, "completion": 0.00108},  # for openrouter, start
60
    "meta-llama/llama-3-70b-instruct": {"prompt": 0.008, "completion": 0.008},
61
    "llama3-70b-8192": {"prompt": 0.0059, "completion": 0.0079},
62
    "openai/gpt-3.5-turbo-0125": {"prompt": 0.0005, "completion": 0.0015},
63
    "openai/gpt-4-turbo-preview": {"prompt": 0.01, "completion": 0.03},
64
}
65

66

67
"""
68
QianFan Token Price https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7#tokens%E5%90%8E%E4%BB%98%E8%B4%B9
69
Due to QianFan has multi price strategies, we unify `Tokens post-payment` as a statistical method.
70
"""
71
QIANFAN_MODEL_TOKEN_COSTS = {
72
    "ERNIE-Bot-4": {"prompt": 0.017, "completion": 0.017},
73
    "ERNIE-Bot-8k": {"prompt": 0.0034, "completion": 0.0067},
74
    "ERNIE-Bot": {"prompt": 0.0017, "completion": 0.0017},
75
    "ERNIE-Bot-turbo": {"prompt": 0.0011, "completion": 0.0011},
76
    "EB-turbo-AppBuilder": {"prompt": 0.0011, "completion": 0.0011},
77
    "ERNIE-Speed": {"prompt": 0.00056, "completion": 0.0011},
78
    "BLOOMZ-7B": {"prompt": 0.00056, "completion": 0.00056},
79
    "Llama-2-7B-Chat": {"prompt": 0.00056, "completion": 0.00056},
80
    "Llama-2-13B-Chat": {"prompt": 0.00084, "completion": 0.00084},
81
    "Llama-2-70B-Chat": {"prompt": 0.0049, "completion": 0.0049},
82
    "ChatGLM2-6B-32K": {"prompt": 0.00056, "completion": 0.00056},
83
    "AquilaChat-7B": {"prompt": 0.00056, "completion": 0.00056},
84
    "Mixtral-8x7B-Instruct": {"prompt": 0.0049, "completion": 0.0049},
85
    "SQLCoder-7B": {"prompt": 0.00056, "completion": 0.00056},
86
    "CodeLlama-7B-Instruct": {"prompt": 0.00056, "completion": 0.00056},
87
    "XuanYuan-70B-Chat-4bit": {"prompt": 0.0049, "completion": 0.0049},
88
    "Qianfan-BLOOMZ-7B-compressed": {"prompt": 0.00056, "completion": 0.00056},
89
    "Qianfan-Chinese-Llama-2-7B": {"prompt": 0.00056, "completion": 0.00056},
90
    "Qianfan-Chinese-Llama-2-13B": {"prompt": 0.00084, "completion": 0.00084},
91
    "ChatLaw": {"prompt": 0.0011, "completion": 0.0011},
92
    "Yi-34B-Chat": {"prompt": 0.0, "completion": 0.0},
93
}
94

95
QIANFAN_ENDPOINT_TOKEN_COSTS = {
96
    "completions_pro": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot-4"],
97
    "ernie_bot_8k": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot-8k"],
98
    "completions": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot"],
99
    "eb-instant": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot-turbo"],
100
    "ai_apaas": QIANFAN_MODEL_TOKEN_COSTS["EB-turbo-AppBuilder"],
101
    "ernie_speed": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Speed"],
102
    "bloomz_7b1": QIANFAN_MODEL_TOKEN_COSTS["BLOOMZ-7B"],
103
    "llama_2_7b": QIANFAN_MODEL_TOKEN_COSTS["Llama-2-7B-Chat"],
104
    "llama_2_13b": QIANFAN_MODEL_TOKEN_COSTS["Llama-2-13B-Chat"],
105
    "llama_2_70b": QIANFAN_MODEL_TOKEN_COSTS["Llama-2-70B-Chat"],
106
    "chatglm2_6b_32k": QIANFAN_MODEL_TOKEN_COSTS["ChatGLM2-6B-32K"],
107
    "aquilachat_7b": QIANFAN_MODEL_TOKEN_COSTS["AquilaChat-7B"],
108
    "mixtral_8x7b_instruct": QIANFAN_MODEL_TOKEN_COSTS["Mixtral-8x7B-Instruct"],
109
    "sqlcoder_7b": QIANFAN_MODEL_TOKEN_COSTS["SQLCoder-7B"],
110
    "codellama_7b_instruct": QIANFAN_MODEL_TOKEN_COSTS["CodeLlama-7B-Instruct"],
111
    "xuanyuan_70b_chat": QIANFAN_MODEL_TOKEN_COSTS["XuanYuan-70B-Chat-4bit"],
112
    "qianfan_bloomz_7b_compressed": QIANFAN_MODEL_TOKEN_COSTS["Qianfan-BLOOMZ-7B-compressed"],
113
    "qianfan_chinese_llama_2_7b": QIANFAN_MODEL_TOKEN_COSTS["Qianfan-Chinese-Llama-2-7B"],
114
    "qianfan_chinese_llama_2_13b": QIANFAN_MODEL_TOKEN_COSTS["Qianfan-Chinese-Llama-2-13B"],
115
    "chatlaw": QIANFAN_MODEL_TOKEN_COSTS["ChatLaw"],
116
    "yi_34b_chat": QIANFAN_MODEL_TOKEN_COSTS["Yi-34B-Chat"],
117
}
118

119
"""
120
DashScope Token price https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing
121
Different model has different detail page. Attention, some model are free for a limited time.
122
"""
123
DASHSCOPE_TOKEN_COSTS = {
124
    "qwen-turbo": {"prompt": 0.0011, "completion": 0.0011},
125
    "qwen-plus": {"prompt": 0.0028, "completion": 0.0028},
126
    "qwen-max": {"prompt": 0.0, "completion": 0.0},
127
    "qwen-max-1201": {"prompt": 0.0, "completion": 0.0},
128
    "qwen-max-longcontext": {"prompt": 0.0, "completion": 0.0},
129
    "llama2-7b-chat-v2": {"prompt": 0.0, "completion": 0.0},
130
    "llama2-13b-chat-v2": {"prompt": 0.0, "completion": 0.0},
131
    "qwen-72b-chat": {"prompt": 0.0, "completion": 0.0},
132
    "qwen-14b-chat": {"prompt": 0.0011, "completion": 0.0011},
133
    "qwen-7b-chat": {"prompt": 0.00084, "completion": 0.00084},
134
    "qwen-1.8b-chat": {"prompt": 0.0, "completion": 0.0},
135
    "baichuan2-13b-chat-v1": {"prompt": 0.0011, "completion": 0.0011},
136
    "baichuan2-7b-chat-v1": {"prompt": 0.00084, "completion": 0.00084},
137
    "baichuan-7b-v1": {"prompt": 0.0, "completion": 0.0},
138
    "chatglm-6b-v2": {"prompt": 0.0011, "completion": 0.0011},
139
    "chatglm3-6b": {"prompt": 0.0, "completion": 0.0},
140
    "ziya-llama-13b-v1": {"prompt": 0.0, "completion": 0.0},  # no price page, judge it as free
141
    "dolly-12b-v2": {"prompt": 0.0, "completion": 0.0},
142
    "belle-llama-13b-2m-v1": {"prompt": 0.0, "completion": 0.0},
143
    "moss-moon-003-sft-v1": {"prompt": 0.0, "completion": 0.0},
144
    "chatyuan-large-v2": {"prompt": 0.0, "completion": 0.0},
145
    "billa-7b-sft-v1": {"prompt": 0.0, "completion": 0.0},
146
}
147

148

149
FIREWORKS_GRADE_TOKEN_COSTS = {
150
    "-1": {"prompt": 0.0, "completion": 0.0},  # abnormal condition
151
    "16": {"prompt": 0.2, "completion": 0.8},  # 16 means model size <= 16B; 0.2 means $0.2/1M tokens
152
    "80": {"prompt": 0.7, "completion": 2.8},  # 80 means 16B < model size <= 80B
153
    "mixtral-8x7b": {"prompt": 0.4, "completion": 1.6},
154
}
155

156
# https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo
157
TOKEN_MAX = {
158
    "gpt-4-0125-preview": 128000,
159
    "gpt-4-turbo-preview": 128000,
160
    "gpt-4-1106-preview": 128000,
161
    "gpt-4-turbo": 128000,
162
    "gpt-4-vision-preview": 128000,
163
    "gpt-4-1106-vision-preview": 128000,
164
    "gpt-4": 8192,
165
    "gpt-4-0613": 8192,
166
    "gpt-4-32k": 32768,
167
    "gpt-4-32k-0613": 32768,
168
    "gpt-3.5-turbo-0125": 16385,
169
    "gpt-3.5-turbo": 16385,
170
    "gpt-3.5-turbo-1106": 16385,
171
    "gpt-3.5-turbo-instruct": 4096,
172
    "gpt-3.5-turbo-16k": 16385,
173
    "gpt-3.5-turbo-0613": 4096,
174
    "gpt-3.5-turbo-16k-0613": 16385,
175
    "text-embedding-ada-002": 8192,
176
    "glm-3-turbo": 128000,
177
    "glm-4": 128000,
178
    "gemini-pro": 32768,
179
    "moonshot-v1-8k": 8192,
180
    "moonshot-v1-32k": 32768,
181
    "moonshot-v1-128k": 128000,
182
    "open-mistral-7b": 8192,
183
    "open-mixtral-8x7b": 32768,
184
    "mistral-small-latest": 32768,
185
    "mistral-medium-latest": 32768,
186
    "mistral-large-latest": 32768,
187
    "claude-instant-1.2": 100000,
188
    "claude-2.0": 100000,
189
    "claude-2.1": 200000,
190
    "claude-3-sonnet-20240229": 200000,
191
    "claude-3-opus-20240229": 200000,
192
    "yi-34b-chat-0205": 4000,
193
    "yi-34b-chat-200k": 200000,
194
    "microsoft/wizardlm-2-8x22b": 65536,
195
    "meta-llama/llama-3-70b-instruct": 8192,
196
    "llama3-70b-8192": 8192,
197
    "openai/gpt-3.5-turbo-0125": 16385,
198
    "openai/gpt-4-turbo-preview": 128000,
199
}
200

201

202
def count_message_tokens(messages, model="gpt-3.5-turbo-0125"):
203
    """Return the number of tokens used by a list of messages."""
204
    try:
205
        encoding = tiktoken.encoding_for_model(model)
206
    except KeyError:
207
        print("Warning: model not found. Using cl100k_base encoding.")
208
        encoding = tiktoken.get_encoding("cl100k_base")
209
    if model in {
210
        "gpt-3.5-turbo-0613",
211
        "gpt-3.5-turbo-16k-0613",
212
        "gpt-35-turbo",
213
        "gpt-35-turbo-16k",
214
        "gpt-3.5-turbo-16k",
215
        "gpt-3.5-turbo-1106",
216
        "gpt-3.5-turbo-0125",
217
        "gpt-4-0314",
218
        "gpt-4-32k-0314",
219
        "gpt-4-0613",
220
        "gpt-4-32k-0613",
221
        "gpt-4-turbo",
222
        "gpt-4-turbo-preview",
223
        "gpt-4-0125-preview",
224
        "gpt-4-turbo",
225
        "gpt-4-vision-preview",
226
        "gpt-4-1106-vision-preview",
227
    }:
228
        tokens_per_message = 3  # # every reply is primed with <|start|>assistant<|message|>
229
        tokens_per_name = 1
230
    elif model == "gpt-3.5-turbo-0301":
231
        tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
232
        tokens_per_name = -1  # if there's a name, the role is omitted
233
    elif "gpt-3.5-turbo" == model:
234
        print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0125.")
235
        return count_message_tokens(messages, model="gpt-3.5-turbo-0125")
236
    elif "gpt-4" == model:
237
        print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
238
        return count_message_tokens(messages, model="gpt-4-0613")
239
    elif "open-llm-model" == model:
240
        """
241
        For self-hosted open_llm api, they include lots of different models. The message tokens calculation is
242
        inaccurate. It's a reference result.
243
        """
244
        tokens_per_message = 0  # ignore conversation message template prefix
245
        tokens_per_name = 0
246
    else:
247
        raise NotImplementedError(
248
            f"num_tokens_from_messages() is not implemented for model {model}. "
249
            f"See https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken "
250
            f"for information on how messages are converted to tokens."
251
        )
252
    num_tokens = 0
253
    for message in messages:
254
        num_tokens += tokens_per_message
255
        for key, value in message.items():
256
            content = value
257
            if isinstance(value, list):
258
                # for gpt-4v
259
                for item in value:
260
                    if isinstance(item, dict) and item.get("type") in ["text"]:
261
                        content = item.get("text", "")
262
            num_tokens += len(encoding.encode(content))
263
            if key == "name":
264
                num_tokens += tokens_per_name
265
    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
266
    return num_tokens
267

268

269
def count_string_tokens(string: str, model_name: str) -> int:
270
    """
271
    Returns the number of tokens in a text string.
272

273
    Args:
274
        string (str): The text string.
275
        model_name (str): The name of the encoding to use. (e.g., "gpt-3.5-turbo")
276

277
    Returns:
278
        int: The number of tokens in the text string.
279
    """
280
    try:
281
        encoding = tiktoken.encoding_for_model(model_name)
282
    except KeyError:
283
        print("Warning: model not found. Using cl100k_base encoding.")
284
        encoding = tiktoken.get_encoding("cl100k_base")
285
    return len(encoding.encode(string))
286

287

288
def get_max_completion_tokens(messages: list[dict], model: str, default: int) -> int:
289
    """Calculate the maximum number of completion tokens for a given model and list of messages.
290

291
    Args:
292
        messages: A list of messages.
293
        model: The model name.
294

295
    Returns:
296
        The maximum number of completion tokens.
297
    """
298
    if model not in TOKEN_MAX:
299
        return default
300
    return TOKEN_MAX[model] - count_message_tokens(messages) - 1
301

302

303
async def get_openrouter_tokens(chunk: ChatCompletionChunk) -> CompletionUsage:
304
    """refs to https://openrouter.ai/docs#querying-cost-and-stats"""
305
    url = f"https://openrouter.ai/api/v1/generation?id={chunk.id}"
306
    resp = await apost(url=url, as_json=True)
307
    tokens_prompt = resp.get("tokens_prompt", 0)
308
    completion_tokens = resp.get("tokens_completion", 0)
309
    usage = CompletionUsage(
310
        prompt_tokens=tokens_prompt, completion_tokens=completion_tokens, total_tokens=tokens_prompt + completion_tokens
311
    )
312
    return usage
313
MetaGPT

Использование cookies