2
from collections import defaultdict, OrderedDict
3
from typing import Dict, Optional
6
CHOICES = ["A", "B", "C", "D"]
8
DEFAULT_MODULE = defaultdict(str)
10
DEFAULT_TEMPLATE = defaultdict(str)
14
LAYERNORM_NAMES = {"norm", "ln"}
16
LOG_FILE_NAME = "trainer_log.jsonl"
18
METHODS = ["full", "freeze", "lora"]
20
SUBJECTS = ["Average", "STEM", "Social Sciences", "Humanities", "Other"]
22
SUPPORTED_MODELS = OrderedDict()
25
"Supervised Fine-Tuning": "sft",
26
"Reward Modeling": "rm",
32
class DownloadSource(str, Enum):
37
def register_model_group(
38
models: Dict[str, Dict[DownloadSource, str]],
39
module: Optional[str] = None,
40
template: Optional[str] = None
43
for name, path in models.items():
45
prefix = name.split("-")[0]
47
assert prefix == name.split("-")[0], "prefix should be identical."
48
SUPPORTED_MODELS[name] = path
49
if module is not None:
50
DEFAULT_MODULE[prefix] = module
51
if template is not None:
52
DEFAULT_TEMPLATE[prefix] = template
58
DownloadSource.DEFAULT: "baichuan-inc/Baichuan-7B",
59
DownloadSource.MODELSCOPE: "baichuan-inc/baichuan-7B"
61
"Baichuan-13B-Base": {
62
DownloadSource.DEFAULT: "baichuan-inc/Baichuan-13B-Base",
63
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan-13B-Base"
65
"Baichuan-13B-Chat": {
66
DownloadSource.DEFAULT: "baichuan-inc/Baichuan-13B-Chat",
67
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan-13B-Chat"
77
"Baichuan2-7B-Base": {
78
DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-7B-Base",
79
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-7B-Base"
81
"Baichuan2-13B-Base": {
82
DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-13B-Base",
83
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Base"
85
"Baichuan2-7B-Chat": {
86
DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-7B-Chat",
87
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-7B-Chat"
89
"Baichuan2-13B-Chat": {
90
DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-13B-Chat",
91
DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Chat"
102
DownloadSource.DEFAULT: "bigscience/bloom-560m",
103
DownloadSource.MODELSCOPE: "AI-ModelScope/bloom-560m"
106
DownloadSource.DEFAULT: "bigscience/bloom-3b",
107
DownloadSource.MODELSCOPE: "AI-ModelScope/bloom-3b"
110
DownloadSource.DEFAULT: "bigscience/bloom-7b1",
111
DownloadSource.MODELSCOPE: "AI-ModelScope/bloom-7b1"
114
module="query_key_value"
121
DownloadSource.DEFAULT: "bigscience/bloomz-560m",
122
DownloadSource.MODELSCOPE: "AI-ModelScope/bloomz-560m"
125
DownloadSource.DEFAULT: "bigscience/bloomz-3b",
126
DownloadSource.MODELSCOPE: "AI-ModelScope/bloomz-3b"
129
DownloadSource.DEFAULT: "bigscience/bloomz-7b1-mt",
130
DownloadSource.MODELSCOPE: "AI-ModelScope/bloomz-7b1-mt"
133
module="query_key_value"
140
DownloadSource.DEFAULT: "vivo-ai/BlueLM-7B-Base",
141
DownloadSource.MODELSCOPE: "vivo-ai/BlueLM-7B-Base"
144
DownloadSource.DEFAULT: "vivo-ai/BlueLM-7B-Chat",
145
DownloadSource.MODELSCOPE: "vivo-ai/BlueLM-7B-Chat"
154
"ChatGLM2-6B-Chat": {
155
DownloadSource.DEFAULT: "THUDM/chatglm2-6b",
156
DownloadSource.MODELSCOPE: "ZhipuAI/chatglm2-6b"
159
module="query_key_value",
166
"ChatGLM3-6B-Base": {
167
DownloadSource.DEFAULT: "THUDM/chatglm3-6b-base",
168
DownloadSource.MODELSCOPE: "ZhipuAI/chatglm3-6b-base"
170
"ChatGLM3-6B-Chat": {
171
DownloadSource.DEFAULT: "THUDM/chatglm3-6b",
172
DownloadSource.MODELSCOPE: "ZhipuAI/chatglm3-6b"
175
module="query_key_value",
182
"ChineseLLaMA2-1.3B": {
183
DownloadSource.DEFAULT: "hfl/chinese-llama-2-1.3b",
184
DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-llama-2-1.3b"
186
"ChineseLLaMA2-7B": {
187
DownloadSource.DEFAULT: "hfl/chinese-llama-2-7b",
188
DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-llama-2-7b"
190
"ChineseLLaMA2-13B": {
191
DownloadSource.DEFAULT: "hfl/chinese-llama-2-13b",
192
DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-llama-2-13b"
194
"ChineseLLaMA2-1.3B-Chat": {
195
DownloadSource.DEFAULT: "hfl/chinese-alpaca-2-1.3b",
196
DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-alpaca-2-1.3b"
198
"ChineseLLaMA2-7B-Chat": {
199
DownloadSource.DEFAULT: "hfl/chinese-alpaca-2-7b",
200
DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-alpaca-2-7b"
202
"ChineseLLaMA2-13B-Chat": {
203
DownloadSource.DEFAULT: "hfl/chinese-alpaca-2-13b",
204
DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-alpaca-2-13b"
213
"DeepseekLLM-7B-Base": {
214
DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-7b-base",
215
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-7b-base"
217
"DeepseekLLM-67B-Base": {
218
DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-67b-base",
219
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-67b-base"
221
"DeepseekLLM-7B-Chat": {
222
DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-7b-chat",
223
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-7b-chat"
225
"DeepseekLLM-67B-Chat": {
226
DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-67b-chat",
227
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-67b-chat"
236
"DeepseekCoder-6.7B-Base": {
237
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-6.7b-base",
238
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-6.7b-base"
240
"DeepseekCoder-33B-Base": {
241
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-base",
242
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-33b-base"
244
"DeepseekCoder-6.7B-Chat": {
245
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-6.7b-instruct",
246
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-6.7b-instruct"
248
"DeepseekCoder-33B-Chat": {
249
DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-instruct",
250
DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-33b-instruct"
253
template="deepseekcoder"
260
DownloadSource.DEFAULT: "tiiuae/falcon-7b",
261
DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-7b"
264
DownloadSource.DEFAULT: "tiiuae/falcon-40b",
265
DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-40b"
268
DownloadSource.DEFAULT: "tiiuae/falcon-180b",
269
DownloadSource.MODELSCOPE: "modelscope/falcon-180B"
272
DownloadSource.DEFAULT: "tiiuae/falcon-7b-instruct",
273
DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-7b-instruct"
276
DownloadSource.DEFAULT: "tiiuae/falcon-40b-instruct",
277
DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-40b-instruct"
279
"Falcon-180B-Chat": {
280
DownloadSource.DEFAULT: "tiiuae/falcon-180b-chat",
281
DownloadSource.MODELSCOPE: "modelscope/falcon-180B-chat"
284
module="query_key_value",
292
DownloadSource.DEFAULT: "internlm/internlm-7b",
293
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-7b"
296
DownloadSource.DEFAULT: "internlm/internlm-20b",
297
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-20b"
299
"InternLM-7B-Chat": {
300
DownloadSource.DEFAULT: "internlm/internlm-chat-7b",
301
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-chat-7b"
303
"InternLM-20B-Chat": {
304
DownloadSource.DEFAULT: "internlm/internlm-chat-20b",
305
DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-chat-20b"
315
DownloadSource.DEFAULT: "deeplang-ai/LingoWhale-8B",
316
DownloadSource.MODELSCOPE: "DeepLang/LingoWhale-8B"
326
DownloadSource.DEFAULT: "huggyllama/llama-7b",
327
DownloadSource.MODELSCOPE: "skyline2006/llama-7b"
330
DownloadSource.DEFAULT: "huggyllama/llama-13b",
331
DownloadSource.MODELSCOPE: "skyline2006/llama-13b"
334
DownloadSource.DEFAULT: "huggyllama/llama-30b",
335
DownloadSource.MODELSCOPE: "skyline2006/llama-30b"
338
DownloadSource.DEFAULT: "huggyllama/llama-65b",
339
DownloadSource.MODELSCOPE: "skyline2006/llama-65b"
348
DownloadSource.DEFAULT: "meta-llama/Llama-2-7b-hf",
349
DownloadSource.MODELSCOPE: "modelscope/Llama-2-7b-ms"
352
DownloadSource.DEFAULT: "meta-llama/Llama-2-13b-hf",
353
DownloadSource.MODELSCOPE: "modelscope/Llama-2-13b-ms"
356
DownloadSource.DEFAULT: "meta-llama/Llama-2-70b-hf",
357
DownloadSource.MODELSCOPE: "modelscope/Llama-2-70b-ms"
360
DownloadSource.DEFAULT: "meta-llama/Llama-2-7b-chat-hf",
361
DownloadSource.MODELSCOPE: "modelscope/Llama-2-7b-chat-ms"
364
DownloadSource.DEFAULT: "meta-llama/Llama-2-13b-chat-hf",
365
DownloadSource.MODELSCOPE: "modelscope/Llama-2-13b-chat-ms"
368
DownloadSource.DEFAULT: "meta-llama/Llama-2-70b-chat-hf",
369
DownloadSource.MODELSCOPE: "modelscope/Llama-2-70b-chat-ms"
379
DownloadSource.DEFAULT: "mistralai/Mistral-7B-v0.1",
380
DownloadSource.MODELSCOPE: "AI-ModelScope/Mistral-7B-v0.1"
383
DownloadSource.DEFAULT: "mistralai/Mistral-7B-Instruct-v0.1",
384
DownloadSource.MODELSCOPE: "AI-ModelScope/Mistral-7B-Instruct-v0.1"
386
"Mistral-7B-v0.2-Chat": {
387
DownloadSource.DEFAULT: "mistralai/Mistral-7B-Instruct-v0.2",
388
DownloadSource.MODELSCOPE: "AI-ModelScope/Mistral-7B-Instruct-v0.2"
398
DownloadSource.DEFAULT: "mistralai/Mixtral-8x7B-v0.1",
399
DownloadSource.MODELSCOPE: "AI-ModelScope/Mixtral-8x7B-v0.1"
401
"Mixtral-8x7B-Chat": {
402
DownloadSource.DEFAULT: "mistralai/Mixtral-8x7B-Instruct-v0.1"
411
"OpenChat3.5-7B-Chat": {
412
DownloadSource.DEFAULT: "openchat/openchat_3.5",
413
DownloadSource.MODELSCOPE: "myxiongmodel/openchat_3.5"
423
DownloadSource.DEFAULT: "microsoft/phi-1_5",
424
DownloadSource.MODELSCOPE: "allspace/PHI_1-5"
434
DownloadSource.DEFAULT: "Qwen/Qwen-1_8B",
435
DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B"
438
DownloadSource.DEFAULT: "Qwen/Qwen-7B",
439
DownloadSource.MODELSCOPE: "qwen/Qwen-7B"
442
DownloadSource.DEFAULT: "Qwen/Qwen-14B",
443
DownloadSource.MODELSCOPE: "qwen/Qwen-14B"
446
DownloadSource.DEFAULT: "Qwen/Qwen-72B",
447
DownloadSource.MODELSCOPE: "qwen/Qwen-72B"
450
DownloadSource.DEFAULT: "Qwen/Qwen-1_8B-Chat",
451
DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B-Chat"
454
DownloadSource.DEFAULT: "Qwen/Qwen-7B-Chat",
455
DownloadSource.MODELSCOPE: "qwen/Qwen-7B-Chat"
458
DownloadSource.DEFAULT: "Qwen/Qwen-14B-Chat",
459
DownloadSource.MODELSCOPE: "qwen/Qwen-14B-Chat"
462
DownloadSource.DEFAULT: "Qwen/Qwen-72B-Chat",
463
DownloadSource.MODELSCOPE: "qwen/Qwen-72B-Chat"
465
"Qwen-1.8B-int8-Chat": {
466
DownloadSource.DEFAULT: "Qwen/Qwen-1_8B-Chat-Int8",
467
DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B-Chat-Int8"
469
"Qwen-1.8B-int4-Chat": {
470
DownloadSource.DEFAULT: "Qwen/Qwen-1_8B-Chat-Int4",
471
DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B-Chat-Int4"
473
"Qwen-7B-int8-Chat": {
474
DownloadSource.DEFAULT: "Qwen/Qwen-7B-Chat-Int8",
475
DownloadSource.MODELSCOPE: "qwen/Qwen-7B-Chat-Int8"
477
"Qwen-7B-int4-Chat": {
478
DownloadSource.DEFAULT: "Qwen/Qwen-7B-Chat-Int4",
479
DownloadSource.MODELSCOPE: "qwen/Qwen-7B-Chat-Int4"
481
"Qwen-14B-int8-Chat": {
482
DownloadSource.DEFAULT: "Qwen/Qwen-14B-Chat-Int8",
483
DownloadSource.MODELSCOPE: "qwen/Qwen-14B-Chat-Int8"
485
"Qwen-14B-int4-Chat": {
486
DownloadSource.DEFAULT: "Qwen/Qwen-14B-Chat-Int4",
487
DownloadSource.MODELSCOPE: "qwen/Qwen-14B-Chat-Int4"
489
"Qwen-72B-int8-Chat": {
490
DownloadSource.DEFAULT: "Qwen/Qwen-72B-Chat-Int8",
491
DownloadSource.MODELSCOPE: "qwen/Qwen-72B-Chat-Int8"
493
"Qwen-72B-int4-Chat": {
494
DownloadSource.DEFAULT: "Qwen/Qwen-72B-Chat-Int4",
495
DownloadSource.MODELSCOPE: "qwen/Qwen-72B-Chat-Int4"
505
"Skywork-13B-Base": {
506
DownloadSource.DEFAULT: "Skywork/Skywork-13B-base",
507
DownloadSource.MODELSCOPE: "skywork/Skywork-13B-base"
515
"Vicuna1.5-7B-Chat": {
516
DownloadSource.DEFAULT: "lmsys/vicuna-7b-v1.5",
517
DownloadSource.MODELSCOPE: "Xorbits/vicuna-7b-v1.5"
519
"Vicuna1.5-13B-Chat": {
520
DownloadSource.DEFAULT: "lmsys/vicuna-13b-v1.5",
521
DownloadSource.MODELSCOPE: "Xorbits/vicuna-13b-v1.5"
531
DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B"
533
"XuanYuan-70B-Chat": {
534
DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat"
536
"XuanYuan-70B-int8-Chat": {
537
DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat-8bit"
539
"XuanYuan-70B-int4-Chat": {
540
DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat-4bit"
550
DownloadSource.DEFAULT: "xverse/XVERSE-7B",
551
DownloadSource.MODELSCOPE: "xverse/XVERSE-7B"
554
DownloadSource.DEFAULT: "xverse/XVERSE-13B",
555
DownloadSource.MODELSCOPE: "xverse/XVERSE-13B"
558
DownloadSource.DEFAULT: "xverse/XVERSE-65B",
559
DownloadSource.MODELSCOPE: "xverse/XVERSE-65B"
562
DownloadSource.DEFAULT: "xverse/XVERSE-7B-Chat",
563
DownloadSource.MODELSCOPE: "xverse/XVERSE-7B-Chat"
566
DownloadSource.DEFAULT: "xverse/XVERSE-13B-Chat",
567
DownloadSource.MODELSCOPE: "xverse/XVERSE-13B-Chat"
577
DownloadSource.DEFAULT: "wenge-research/yayi-7b-llama2",
578
DownloadSource.MODELSCOPE: "AI-ModelScope/yayi-7b-llama2"
581
DownloadSource.DEFAULT: "wenge-research/yayi-13b-llama2",
582
DownloadSource.MODELSCOPE: "AI-ModelScope/yayi-13b-llama2"
592
DownloadSource.DEFAULT: "01-ai/Yi-6B",
593
DownloadSource.MODELSCOPE: "01ai/Yi-6B"
596
DownloadSource.DEFAULT: "01-ai/Yi-34B",
597
DownloadSource.MODELSCOPE: "01ai/Yi-34B"
600
DownloadSource.DEFAULT: "01-ai/Yi-6B-Chat",
601
DownloadSource.MODELSCOPE: "01ai/Yi-6B-Chat"
604
DownloadSource.DEFAULT: "01-ai/Yi-34B-Chat",
605
DownloadSource.MODELSCOPE: "01ai/Yi-34B-Chat"
608
DownloadSource.DEFAULT: "01-ai/Yi-6B-Chat-8bits",
609
DownloadSource.MODELSCOPE: "01ai/Yi-6B-Chat-8bits"
611
"Yi-34B-int8-Chat": {
612
DownloadSource.DEFAULT: "01-ai/Yi-34B-Chat-8bits",
613
DownloadSource.MODELSCOPE: "01ai/Yi-34B-Chat-8bits"
622
"Zephyr-7B-Alpha-Chat": {
623
DownloadSource.DEFAULT: "HuggingFaceH4/zephyr-7b-alpha",
624
DownloadSource.MODELSCOPE: "AI-ModelScope/zephyr-7b-alpha"
626
"Zephyr-7B-Beta-Chat": {
627
DownloadSource.DEFAULT: "HuggingFaceH4/zephyr-7b-beta",
628
DownloadSource.MODELSCOPE: "modelscope/zephyr-7b-beta"