6
@File : token_counter.py
7
ref1: https://openai.com/pricing
8
ref2: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
9
ref3: https://github.com/Significant-Gravitas/Auto-GPT/blob/master/autogpt/llm/token_counter.py
10
ref4: https://github.com/hwchase17/langchain/blob/master/langchain/chat_models/openai.py
11
ref5: https://ai.google.dev/models/gemini
14
from openai.types import CompletionUsage
15
from openai.types.chat import ChatCompletionChunk
17
from metagpt.utils.ahttp_client import apost
20
"gpt-3.5-turbo": {"prompt": 0.0015, "completion": 0.002},
21
"gpt-3.5-turbo-0301": {"prompt": 0.0015, "completion": 0.002},
22
"gpt-3.5-turbo-0613": {"prompt": 0.0015, "completion": 0.002},
23
"gpt-3.5-turbo-16k": {"prompt": 0.003, "completion": 0.004},
24
"gpt-3.5-turbo-16k-0613": {"prompt": 0.003, "completion": 0.004},
25
"gpt-35-turbo": {"prompt": 0.0015, "completion": 0.002},
26
"gpt-35-turbo-16k": {"prompt": 0.003, "completion": 0.004},
27
"gpt-3.5-turbo-1106": {"prompt": 0.001, "completion": 0.002},
28
"gpt-3.5-turbo-0125": {"prompt": 0.001, "completion": 0.002},
29
"gpt-4-0314": {"prompt": 0.03, "completion": 0.06},
30
"gpt-4": {"prompt": 0.03, "completion": 0.06},
31
"gpt-4-32k": {"prompt": 0.06, "completion": 0.12},
32
"gpt-4-32k-0314": {"prompt": 0.06, "completion": 0.12},
33
"gpt-4-0613": {"prompt": 0.06, "completion": 0.12},
34
"gpt-4-turbo-preview": {"prompt": 0.01, "completion": 0.03},
35
"gpt-4-1106-preview": {"prompt": 0.01, "completion": 0.03},
36
"gpt-4-0125-preview": {"prompt": 0.01, "completion": 0.03},
37
"gpt-4-turbo": {"prompt": 0.01, "completion": 0.03},
38
"gpt-4-vision-preview": {"prompt": 0.01, "completion": 0.03},
39
"gpt-4-1106-vision-preview": {"prompt": 0.01, "completion": 0.03},
40
"text-embedding-ada-002": {"prompt": 0.0004, "completion": 0.0},
41
"glm-3-turbo": {"prompt": 0.0007, "completion": 0.0007},
42
"glm-4": {"prompt": 0.014, "completion": 0.014},
43
"gemini-pro": {"prompt": 0.00025, "completion": 0.0005},
44
"moonshot-v1-8k": {"prompt": 0.012, "completion": 0.012},
45
"moonshot-v1-32k": {"prompt": 0.024, "completion": 0.024},
46
"moonshot-v1-128k": {"prompt": 0.06, "completion": 0.06},
47
"open-mistral-7b": {"prompt": 0.00025, "completion": 0.00025},
48
"open-mixtral-8x7b": {"prompt": 0.0007, "completion": 0.0007},
49
"mistral-small-latest": {"prompt": 0.002, "completion": 0.006},
50
"mistral-medium-latest": {"prompt": 0.0027, "completion": 0.0081},
51
"mistral-large-latest": {"prompt": 0.008, "completion": 0.024},
52
"claude-instant-1.2": {"prompt": 0.0008, "completion": 0.0024},
53
"claude-2.0": {"prompt": 0.008, "completion": 0.024},
54
"claude-2.1": {"prompt": 0.008, "completion": 0.024},
55
"claude-3-sonnet-20240229": {"prompt": 0.003, "completion": 0.015},
56
"claude-3-opus-20240229": {"prompt": 0.015, "completion": 0.075},
57
"yi-34b-chat-0205": {"prompt": 0.0003, "completion": 0.0003},
58
"yi-34b-chat-200k": {"prompt": 0.0017, "completion": 0.0017},
59
"microsoft/wizardlm-2-8x22b": {"prompt": 0.00108, "completion": 0.00108},
60
"meta-llama/llama-3-70b-instruct": {"prompt": 0.008, "completion": 0.008},
61
"llama3-70b-8192": {"prompt": 0.0059, "completion": 0.0079},
62
"openai/gpt-3.5-turbo-0125": {"prompt": 0.0005, "completion": 0.0015},
63
"openai/gpt-4-turbo-preview": {"prompt": 0.01, "completion": 0.03},
68
QianFan Token Price https://cloud.baidu.com/doc/WENXINWORKSHOP/s/hlrk4akp7#tokens%E5%90%8E%E4%BB%98%E8%B4%B9
69
Due to QianFan has multi price strategies, we unify `Tokens post-payment` as a statistical method.
71
QIANFAN_MODEL_TOKEN_COSTS = {
72
"ERNIE-Bot-4": {"prompt": 0.017, "completion": 0.017},
73
"ERNIE-Bot-8k": {"prompt": 0.0034, "completion": 0.0067},
74
"ERNIE-Bot": {"prompt": 0.0017, "completion": 0.0017},
75
"ERNIE-Bot-turbo": {"prompt": 0.0011, "completion": 0.0011},
76
"EB-turbo-AppBuilder": {"prompt": 0.0011, "completion": 0.0011},
77
"ERNIE-Speed": {"prompt": 0.00056, "completion": 0.0011},
78
"BLOOMZ-7B": {"prompt": 0.00056, "completion": 0.00056},
79
"Llama-2-7B-Chat": {"prompt": 0.00056, "completion": 0.00056},
80
"Llama-2-13B-Chat": {"prompt": 0.00084, "completion": 0.00084},
81
"Llama-2-70B-Chat": {"prompt": 0.0049, "completion": 0.0049},
82
"ChatGLM2-6B-32K": {"prompt": 0.00056, "completion": 0.00056},
83
"AquilaChat-7B": {"prompt": 0.00056, "completion": 0.00056},
84
"Mixtral-8x7B-Instruct": {"prompt": 0.0049, "completion": 0.0049},
85
"SQLCoder-7B": {"prompt": 0.00056, "completion": 0.00056},
86
"CodeLlama-7B-Instruct": {"prompt": 0.00056, "completion": 0.00056},
87
"XuanYuan-70B-Chat-4bit": {"prompt": 0.0049, "completion": 0.0049},
88
"Qianfan-BLOOMZ-7B-compressed": {"prompt": 0.00056, "completion": 0.00056},
89
"Qianfan-Chinese-Llama-2-7B": {"prompt": 0.00056, "completion": 0.00056},
90
"Qianfan-Chinese-Llama-2-13B": {"prompt": 0.00084, "completion": 0.00084},
91
"ChatLaw": {"prompt": 0.0011, "completion": 0.0011},
92
"Yi-34B-Chat": {"prompt": 0.0, "completion": 0.0},
95
QIANFAN_ENDPOINT_TOKEN_COSTS = {
96
"completions_pro": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot-4"],
97
"ernie_bot_8k": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot-8k"],
98
"completions": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot"],
99
"eb-instant": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Bot-turbo"],
100
"ai_apaas": QIANFAN_MODEL_TOKEN_COSTS["EB-turbo-AppBuilder"],
101
"ernie_speed": QIANFAN_MODEL_TOKEN_COSTS["ERNIE-Speed"],
102
"bloomz_7b1": QIANFAN_MODEL_TOKEN_COSTS["BLOOMZ-7B"],
103
"llama_2_7b": QIANFAN_MODEL_TOKEN_COSTS["Llama-2-7B-Chat"],
104
"llama_2_13b": QIANFAN_MODEL_TOKEN_COSTS["Llama-2-13B-Chat"],
105
"llama_2_70b": QIANFAN_MODEL_TOKEN_COSTS["Llama-2-70B-Chat"],
106
"chatglm2_6b_32k": QIANFAN_MODEL_TOKEN_COSTS["ChatGLM2-6B-32K"],
107
"aquilachat_7b": QIANFAN_MODEL_TOKEN_COSTS["AquilaChat-7B"],
108
"mixtral_8x7b_instruct": QIANFAN_MODEL_TOKEN_COSTS["Mixtral-8x7B-Instruct"],
109
"sqlcoder_7b": QIANFAN_MODEL_TOKEN_COSTS["SQLCoder-7B"],
110
"codellama_7b_instruct": QIANFAN_MODEL_TOKEN_COSTS["CodeLlama-7B-Instruct"],
111
"xuanyuan_70b_chat": QIANFAN_MODEL_TOKEN_COSTS["XuanYuan-70B-Chat-4bit"],
112
"qianfan_bloomz_7b_compressed": QIANFAN_MODEL_TOKEN_COSTS["Qianfan-BLOOMZ-7B-compressed"],
113
"qianfan_chinese_llama_2_7b": QIANFAN_MODEL_TOKEN_COSTS["Qianfan-Chinese-Llama-2-7B"],
114
"qianfan_chinese_llama_2_13b": QIANFAN_MODEL_TOKEN_COSTS["Qianfan-Chinese-Llama-2-13B"],
115
"chatlaw": QIANFAN_MODEL_TOKEN_COSTS["ChatLaw"],
116
"yi_34b_chat": QIANFAN_MODEL_TOKEN_COSTS["Yi-34B-Chat"],
120
DashScope Token price https://help.aliyun.com/zh/dashscope/developer-reference/tongyi-thousand-questions-metering-and-billing
121
Different model has different detail page. Attention, some model are free for a limited time.
123
DASHSCOPE_TOKEN_COSTS = {
124
"qwen-turbo": {"prompt": 0.0011, "completion": 0.0011},
125
"qwen-plus": {"prompt": 0.0028, "completion": 0.0028},
126
"qwen-max": {"prompt": 0.0, "completion": 0.0},
127
"qwen-max-1201": {"prompt": 0.0, "completion": 0.0},
128
"qwen-max-longcontext": {"prompt": 0.0, "completion": 0.0},
129
"llama2-7b-chat-v2": {"prompt": 0.0, "completion": 0.0},
130
"llama2-13b-chat-v2": {"prompt": 0.0, "completion": 0.0},
131
"qwen-72b-chat": {"prompt": 0.0, "completion": 0.0},
132
"qwen-14b-chat": {"prompt": 0.0011, "completion": 0.0011},
133
"qwen-7b-chat": {"prompt": 0.00084, "completion": 0.00084},
134
"qwen-1.8b-chat": {"prompt": 0.0, "completion": 0.0},
135
"baichuan2-13b-chat-v1": {"prompt": 0.0011, "completion": 0.0011},
136
"baichuan2-7b-chat-v1": {"prompt": 0.00084, "completion": 0.00084},
137
"baichuan-7b-v1": {"prompt": 0.0, "completion": 0.0},
138
"chatglm-6b-v2": {"prompt": 0.0011, "completion": 0.0011},
139
"chatglm3-6b": {"prompt": 0.0, "completion": 0.0},
140
"ziya-llama-13b-v1": {"prompt": 0.0, "completion": 0.0},
141
"dolly-12b-v2": {"prompt": 0.0, "completion": 0.0},
142
"belle-llama-13b-2m-v1": {"prompt": 0.0, "completion": 0.0},
143
"moss-moon-003-sft-v1": {"prompt": 0.0, "completion": 0.0},
144
"chatyuan-large-v2": {"prompt": 0.0, "completion": 0.0},
145
"billa-7b-sft-v1": {"prompt": 0.0, "completion": 0.0},
149
FIREWORKS_GRADE_TOKEN_COSTS = {
150
"-1": {"prompt": 0.0, "completion": 0.0},
151
"16": {"prompt": 0.2, "completion": 0.8},
152
"80": {"prompt": 0.7, "completion": 2.8},
153
"mixtral-8x7b": {"prompt": 0.4, "completion": 1.6},
158
"gpt-4-0125-preview": 128000,
159
"gpt-4-turbo-preview": 128000,
160
"gpt-4-1106-preview": 128000,
161
"gpt-4-turbo": 128000,
162
"gpt-4-vision-preview": 128000,
163
"gpt-4-1106-vision-preview": 128000,
167
"gpt-4-32k-0613": 32768,
168
"gpt-3.5-turbo-0125": 16385,
169
"gpt-3.5-turbo": 16385,
170
"gpt-3.5-turbo-1106": 16385,
171
"gpt-3.5-turbo-instruct": 4096,
172
"gpt-3.5-turbo-16k": 16385,
173
"gpt-3.5-turbo-0613": 4096,
174
"gpt-3.5-turbo-16k-0613": 16385,
175
"text-embedding-ada-002": 8192,
176
"glm-3-turbo": 128000,
179
"moonshot-v1-8k": 8192,
180
"moonshot-v1-32k": 32768,
181
"moonshot-v1-128k": 128000,
182
"open-mistral-7b": 8192,
183
"open-mixtral-8x7b": 32768,
184
"mistral-small-latest": 32768,
185
"mistral-medium-latest": 32768,
186
"mistral-large-latest": 32768,
187
"claude-instant-1.2": 100000,
188
"claude-2.0": 100000,
189
"claude-2.1": 200000,
190
"claude-3-sonnet-20240229": 200000,
191
"claude-3-opus-20240229": 200000,
192
"yi-34b-chat-0205": 4000,
193
"yi-34b-chat-200k": 200000,
194
"microsoft/wizardlm-2-8x22b": 65536,
195
"meta-llama/llama-3-70b-instruct": 8192,
196
"llama3-70b-8192": 8192,
197
"openai/gpt-3.5-turbo-0125": 16385,
198
"openai/gpt-4-turbo-preview": 128000,
202
def count_message_tokens(messages, model="gpt-3.5-turbo-0125"):
203
"""Return the number of tokens used by a list of messages."""
205
encoding = tiktoken.encoding_for_model(model)
207
print("Warning: model not found. Using cl100k_base encoding.")
208
encoding = tiktoken.get_encoding("cl100k_base")
210
"gpt-3.5-turbo-0613",
211
"gpt-3.5-turbo-16k-0613",
215
"gpt-3.5-turbo-1106",
216
"gpt-3.5-turbo-0125",
222
"gpt-4-turbo-preview",
223
"gpt-4-0125-preview",
225
"gpt-4-vision-preview",
226
"gpt-4-1106-vision-preview",
228
tokens_per_message = 3
230
elif model == "gpt-3.5-turbo-0301":
231
tokens_per_message = 4
233
elif "gpt-3.5-turbo" == model:
234
print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0125.")
235
return count_message_tokens(messages, model="gpt-3.5-turbo-0125")
236
elif "gpt-4" == model:
237
print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
238
return count_message_tokens(messages, model="gpt-4-0613")
239
elif "open-llm-model" == model:
241
For self-hosted open_llm api, they include lots of different models. The message tokens calculation is
242
inaccurate. It's a reference result.
244
tokens_per_message = 0
247
raise NotImplementedError(
248
f"num_tokens_from_messages() is not implemented for model {model}. "
249
f"See https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken "
250
f"for information on how messages are converted to tokens."
253
for message in messages:
254
num_tokens += tokens_per_message
255
for key, value in message.items():
257
if isinstance(value, list):
260
if isinstance(item, dict) and item.get("type") in ["text"]:
261
content = item.get("text", "")
262
num_tokens += len(encoding.encode(content))
264
num_tokens += tokens_per_name
269
def count_string_tokens(string: str, model_name: str) -> int:
271
Returns the number of tokens in a text string.
274
string (str): The text string.
275
model_name (str): The name of the encoding to use. (e.g., "gpt-3.5-turbo")
278
int: The number of tokens in the text string.
281
encoding = tiktoken.encoding_for_model(model_name)
283
print("Warning: model not found. Using cl100k_base encoding.")
284
encoding = tiktoken.get_encoding("cl100k_base")
285
return len(encoding.encode(string))
288
def get_max_completion_tokens(messages: list[dict], model: str, default: int) -> int:
289
"""Calculate the maximum number of completion tokens for a given model and list of messages.
292
messages: A list of messages.
293
model: The model name.
296
The maximum number of completion tokens.
298
if model not in TOKEN_MAX:
300
return TOKEN_MAX[model] - count_message_tokens(messages) - 1
303
async def get_openrouter_tokens(chunk: ChatCompletionChunk) -> CompletionUsage:
304
"""refs to https://openrouter.ai/docs#querying-cost-and-stats"""
305
url = f"https://openrouter.ai/api/v1/generation?id={chunk.id}"
306
resp = await apost(url=url, as_json=True)
307
tokens_prompt = resp.get("tokens_prompt", 0)
308
completion_tokens = resp.get("tokens_completion", 0)
309
usage = CompletionUsage(
310
prompt_tokens=tokens_prompt, completion_tokens=completion_tokens, total_tokens=tokens_prompt + completion_tokens