aurora

Форк
0
/
constants.py 
632 строки · 19.1 Кб
1
from enum import Enum
2
from collections import defaultdict, OrderedDict
3
from typing import Dict, Optional
4

5

6
CHOICES = ["A", "B", "C", "D"]
7

8
DEFAULT_MODULE = defaultdict(str)
9

10
DEFAULT_TEMPLATE = defaultdict(str)
11

12
IGNORE_INDEX = -100
13

14
LAYERNORM_NAMES = {"norm", "ln"}
15

16
LOG_FILE_NAME = "trainer_log.jsonl"
17

18
METHODS = ["full", "freeze", "lora"]
19

20
SUBJECTS = ["Average", "STEM", "Social Sciences", "Humanities", "Other"]
21

22
SUPPORTED_MODELS = OrderedDict()
23

24
TRAINING_STAGES = {
25
    "Supervised Fine-Tuning": "sft",
26
    "Reward Modeling": "rm",
27
    "PPO": "ppo",
28
    "DPO": "dpo",
29
    "Pre-Training": "pt"
30
}
31

32
class DownloadSource(str, Enum):
33
    DEFAULT = "hf"
34
    MODELSCOPE = "ms"
35

36

37
def register_model_group(
38
    models: Dict[str, Dict[DownloadSource, str]],
39
    module: Optional[str] = None,
40
    template: Optional[str] = None
41
) -> None:
42
    prefix = None
43
    for name, path in models.items():
44
        if prefix is None:
45
            prefix = name.split("-")[0]
46
        else:
47
            assert prefix == name.split("-")[0], "prefix should be identical."
48
        SUPPORTED_MODELS[name] = path
49
    if module is not None:
50
        DEFAULT_MODULE[prefix] = module
51
    if template is not None:
52
        DEFAULT_TEMPLATE[prefix] = template
53

54

55
register_model_group(
56
    models={
57
        "Baichuan-7B-Base": {
58
            DownloadSource.DEFAULT: "baichuan-inc/Baichuan-7B",
59
            DownloadSource.MODELSCOPE: "baichuan-inc/baichuan-7B"
60
        },
61
        "Baichuan-13B-Base": {
62
            DownloadSource.DEFAULT: "baichuan-inc/Baichuan-13B-Base",
63
            DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan-13B-Base"
64
        },
65
        "Baichuan-13B-Chat": {
66
            DownloadSource.DEFAULT: "baichuan-inc/Baichuan-13B-Chat",
67
            DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan-13B-Chat"
68
        }
69
    },
70
    module="W_pack",
71
    template="baichuan"
72
)
73

74

75
register_model_group(
76
    models={
77
        "Baichuan2-7B-Base": {
78
            DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-7B-Base",
79
            DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-7B-Base"
80
        },
81
        "Baichuan2-13B-Base": {
82
            DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-13B-Base",
83
            DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Base"
84
        },
85
        "Baichuan2-7B-Chat": {
86
            DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-7B-Chat",
87
            DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-7B-Chat"
88
        },
89
        "Baichuan2-13B-Chat": {
90
            DownloadSource.DEFAULT: "baichuan-inc/Baichuan2-13B-Chat",
91
            DownloadSource.MODELSCOPE: "baichuan-inc/Baichuan2-13B-Chat"
92
        }
93
    },
94
    module="W_pack",
95
    template="baichuan2"
96
)
97

98

99
register_model_group(
100
    models={
101
        "BLOOM-560M": {
102
            DownloadSource.DEFAULT: "bigscience/bloom-560m",
103
            DownloadSource.MODELSCOPE: "AI-ModelScope/bloom-560m"
104
        },
105
        "BLOOM-3B": {
106
            DownloadSource.DEFAULT: "bigscience/bloom-3b",
107
            DownloadSource.MODELSCOPE: "AI-ModelScope/bloom-3b"
108
        },
109
        "BLOOM-7B1": {
110
            DownloadSource.DEFAULT: "bigscience/bloom-7b1",
111
            DownloadSource.MODELSCOPE: "AI-ModelScope/bloom-7b1"
112
        }
113
    },
114
    module="query_key_value"
115
)
116

117

118
register_model_group(
119
    models={
120
        "BLOOMZ-560M": {
121
            DownloadSource.DEFAULT: "bigscience/bloomz-560m",
122
            DownloadSource.MODELSCOPE: "AI-ModelScope/bloomz-560m"
123
        },
124
        "BLOOMZ-3B": {
125
            DownloadSource.DEFAULT: "bigscience/bloomz-3b",
126
            DownloadSource.MODELSCOPE: "AI-ModelScope/bloomz-3b"
127
        },
128
        "BLOOMZ-7B1-mt": {
129
            DownloadSource.DEFAULT: "bigscience/bloomz-7b1-mt",
130
            DownloadSource.MODELSCOPE: "AI-ModelScope/bloomz-7b1-mt"
131
        }
132
    },
133
    module="query_key_value"
134
)
135

136

137
register_model_group(
138
    models={
139
        "BlueLM-7B-Base": {
140
            DownloadSource.DEFAULT: "vivo-ai/BlueLM-7B-Base",
141
            DownloadSource.MODELSCOPE: "vivo-ai/BlueLM-7B-Base"
142
        },
143
        "BlueLM-7B-Chat": {
144
            DownloadSource.DEFAULT: "vivo-ai/BlueLM-7B-Chat",
145
            DownloadSource.MODELSCOPE: "vivo-ai/BlueLM-7B-Chat"
146
        }
147
    },
148
    template="bluelm"
149
)
150

151

152
register_model_group(
153
    models={
154
        "ChatGLM2-6B-Chat": {
155
            DownloadSource.DEFAULT: "THUDM/chatglm2-6b",
156
            DownloadSource.MODELSCOPE: "ZhipuAI/chatglm2-6b"
157
        }
158
    },
159
    module="query_key_value",
160
    template="chatglm2"
161
)
162

163

164
register_model_group(
165
    models={
166
        "ChatGLM3-6B-Base": {
167
            DownloadSource.DEFAULT: "THUDM/chatglm3-6b-base",
168
            DownloadSource.MODELSCOPE: "ZhipuAI/chatglm3-6b-base"
169
        },
170
        "ChatGLM3-6B-Chat": {
171
            DownloadSource.DEFAULT: "THUDM/chatglm3-6b",
172
            DownloadSource.MODELSCOPE: "ZhipuAI/chatglm3-6b"
173
        }
174
    },
175
    module="query_key_value",
176
    template="chatglm3"
177
)
178

179

180
register_model_group(
181
    models={
182
        "ChineseLLaMA2-1.3B": {
183
            DownloadSource.DEFAULT: "hfl/chinese-llama-2-1.3b",
184
            DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-llama-2-1.3b"
185
        },
186
        "ChineseLLaMA2-7B": {
187
            DownloadSource.DEFAULT: "hfl/chinese-llama-2-7b",
188
            DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-llama-2-7b"
189
        },
190
        "ChineseLLaMA2-13B": {
191
            DownloadSource.DEFAULT: "hfl/chinese-llama-2-13b",
192
            DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-llama-2-13b"
193
        },
194
        "ChineseLLaMA2-1.3B-Chat": {
195
            DownloadSource.DEFAULT: "hfl/chinese-alpaca-2-1.3b",
196
            DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-alpaca-2-1.3b"
197
        },
198
        "ChineseLLaMA2-7B-Chat": {
199
            DownloadSource.DEFAULT: "hfl/chinese-alpaca-2-7b",
200
            DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-alpaca-2-7b"
201
        },
202
        "ChineseLLaMA2-13B-Chat": {
203
            DownloadSource.DEFAULT: "hfl/chinese-alpaca-2-13b",
204
            DownloadSource.MODELSCOPE: "AI-ModelScope/chinese-alpaca-2-13b"
205
        }
206
    },
207
    template="llama2_zh"
208
)
209

210

211
register_model_group(
212
    models={
213
        "DeepseekLLM-7B-Base": {
214
            DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-7b-base",
215
            DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-7b-base"
216
        },
217
        "DeepseekLLM-67B-Base": {
218
            DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-67b-base",
219
            DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-67b-base"
220
        },
221
        "DeepseekLLM-7B-Chat": {
222
            DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-7b-chat",
223
            DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-7b-chat"
224
        },
225
        "DeepseekLLM-67B-Chat": {
226
            DownloadSource.DEFAULT: "deepseek-ai/deepseek-llm-67b-chat",
227
            DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-llm-67b-chat"
228
        }
229
    },
230
    template="deepseek"
231
)
232

233

234
register_model_group(
235
    models={
236
        "DeepseekCoder-6.7B-Base": {
237
            DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-6.7b-base",
238
            DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-6.7b-base"
239
        },
240
        "DeepseekCoder-33B-Base": {
241
            DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-base",
242
            DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-33b-base"
243
        },
244
        "DeepseekCoder-6.7B-Chat": {
245
            DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-6.7b-instruct",
246
            DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-6.7b-instruct"
247
        },
248
        "DeepseekCoder-33B-Chat": {
249
            DownloadSource.DEFAULT: "deepseek-ai/deepseek-coder-33b-instruct",
250
            DownloadSource.MODELSCOPE: "deepseek-ai/deepseek-coder-33b-instruct"
251
        }
252
    },
253
    template="deepseekcoder"
254
)
255

256

257
register_model_group(
258
    models={
259
        "Falcon-7B": {
260
            DownloadSource.DEFAULT: "tiiuae/falcon-7b",
261
            DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-7b"
262
        },
263
        "Falcon-40B": {
264
            DownloadSource.DEFAULT: "tiiuae/falcon-40b",
265
            DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-40b"
266
        },
267
        "Falcon-180B": {
268
            DownloadSource.DEFAULT: "tiiuae/falcon-180b",
269
            DownloadSource.MODELSCOPE: "modelscope/falcon-180B"
270
        },
271
        "Falcon-7B-Chat": {
272
            DownloadSource.DEFAULT: "tiiuae/falcon-7b-instruct",
273
            DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-7b-instruct"
274
        },
275
        "Falcon-40B-Chat": {
276
            DownloadSource.DEFAULT: "tiiuae/falcon-40b-instruct",
277
            DownloadSource.MODELSCOPE: "AI-ModelScope/falcon-40b-instruct"
278
        },
279
        "Falcon-180B-Chat": {
280
            DownloadSource.DEFAULT: "tiiuae/falcon-180b-chat",
281
            DownloadSource.MODELSCOPE: "modelscope/falcon-180B-chat"
282
        }
283
    },
284
    module="query_key_value",
285
    template="falcon"
286
)
287

288

289
register_model_group(
290
    models={
291
        "InternLM-7B": {
292
            DownloadSource.DEFAULT: "internlm/internlm-7b",
293
            DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-7b"
294
        },
295
        "InternLM-20B": {
296
            DownloadSource.DEFAULT: "internlm/internlm-20b",
297
            DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-20b"
298
        },
299
        "InternLM-7B-Chat": {
300
            DownloadSource.DEFAULT: "internlm/internlm-chat-7b",
301
            DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-chat-7b"
302
        },
303
        "InternLM-20B-Chat": {
304
            DownloadSource.DEFAULT: "internlm/internlm-chat-20b",
305
            DownloadSource.MODELSCOPE: "Shanghai_AI_Laboratory/internlm-chat-20b"
306
        }
307
    },
308
    template="intern"
309
)
310

311

312
register_model_group(
313
    models={
314
        "LingoWhale-8B": {
315
            DownloadSource.DEFAULT: "deeplang-ai/LingoWhale-8B",
316
            DownloadSource.MODELSCOPE: "DeepLang/LingoWhale-8B"
317
        }
318
    },
319
    module="qkv_proj"
320
)
321

322

323
register_model_group(
324
    models={
325
        "LLaMA-7B": {
326
            DownloadSource.DEFAULT: "huggyllama/llama-7b",
327
            DownloadSource.MODELSCOPE: "skyline2006/llama-7b"
328
        },
329
        "LLaMA-13B": {
330
            DownloadSource.DEFAULT: "huggyllama/llama-13b",
331
            DownloadSource.MODELSCOPE: "skyline2006/llama-13b"
332
        },
333
        "LLaMA-30B": {
334
            DownloadSource.DEFAULT: "huggyllama/llama-30b",
335
            DownloadSource.MODELSCOPE: "skyline2006/llama-30b"
336
        },
337
        "LLaMA-65B": {
338
            DownloadSource.DEFAULT: "huggyllama/llama-65b",
339
            DownloadSource.MODELSCOPE: "skyline2006/llama-65b"
340
        }
341
    }
342
)
343

344

345
register_model_group(
346
    models={
347
        "LLaMA2-7B": {
348
            DownloadSource.DEFAULT: "meta-llama/Llama-2-7b-hf",
349
            DownloadSource.MODELSCOPE: "modelscope/Llama-2-7b-ms"
350
        },
351
        "LLaMA2-13B": {
352
            DownloadSource.DEFAULT: "meta-llama/Llama-2-13b-hf",
353
            DownloadSource.MODELSCOPE: "modelscope/Llama-2-13b-ms"
354
        },
355
        "LLaMA2-70B": {
356
            DownloadSource.DEFAULT: "meta-llama/Llama-2-70b-hf",
357
            DownloadSource.MODELSCOPE: "modelscope/Llama-2-70b-ms"
358
        },
359
        "LLaMA2-7B-Chat": {
360
            DownloadSource.DEFAULT: "meta-llama/Llama-2-7b-chat-hf",
361
            DownloadSource.MODELSCOPE: "modelscope/Llama-2-7b-chat-ms"
362
        },
363
        "LLaMA2-13B-Chat": {
364
            DownloadSource.DEFAULT: "meta-llama/Llama-2-13b-chat-hf",
365
            DownloadSource.MODELSCOPE: "modelscope/Llama-2-13b-chat-ms"
366
        },
367
        "LLaMA2-70B-Chat": {
368
            DownloadSource.DEFAULT: "meta-llama/Llama-2-70b-chat-hf",
369
            DownloadSource.MODELSCOPE: "modelscope/Llama-2-70b-chat-ms"
370
        }
371
    },
372
    template="llama2"
373
)
374

375

376
register_model_group(
377
    models={
378
        "Mistral-7B": {
379
            DownloadSource.DEFAULT: "mistralai/Mistral-7B-v0.1",
380
            DownloadSource.MODELSCOPE: "AI-ModelScope/Mistral-7B-v0.1"
381
        },
382
        "Mistral-7B-Chat": {
383
            DownloadSource.DEFAULT: "mistralai/Mistral-7B-Instruct-v0.1",
384
            DownloadSource.MODELSCOPE: "AI-ModelScope/Mistral-7B-Instruct-v0.1"
385
        },
386
        "Mistral-7B-v0.2-Chat": {
387
            DownloadSource.DEFAULT: "mistralai/Mistral-7B-Instruct-v0.2",
388
            DownloadSource.MODELSCOPE: "AI-ModelScope/Mistral-7B-Instruct-v0.2"
389
        }
390
    },
391
    template="mistral"
392
)
393

394

395
register_model_group(
396
    models={
397
        "Mixtral-8x7B": {
398
            DownloadSource.DEFAULT: "mistralai/Mixtral-8x7B-v0.1",
399
            DownloadSource.MODELSCOPE: "AI-ModelScope/Mixtral-8x7B-v0.1"
400
        },
401
        "Mixtral-8x7B-Chat": {
402
            DownloadSource.DEFAULT: "mistralai/Mixtral-8x7B-Instruct-v0.1"
403
        }
404
    },
405
    template="mistral"
406
)
407

408

409
register_model_group(
410
    models={
411
        "OpenChat3.5-7B-Chat": {
412
            DownloadSource.DEFAULT: "openchat/openchat_3.5",
413
            DownloadSource.MODELSCOPE: "myxiongmodel/openchat_3.5"
414
        }
415
    },
416
    template="openchat"
417
)
418

419

420
register_model_group(
421
    models={
422
        "Phi1.5-1.3B": {
423
            DownloadSource.DEFAULT: "microsoft/phi-1_5",
424
            DownloadSource.MODELSCOPE: "allspace/PHI_1-5"
425
        }
426
    },
427
    module="Wqkv"
428
)
429

430

431
register_model_group(
432
    models={
433
        "Qwen-1.8B": {
434
            DownloadSource.DEFAULT: "Qwen/Qwen-1_8B",
435
            DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B"
436
        },
437
        "Qwen-7B": {
438
            DownloadSource.DEFAULT: "Qwen/Qwen-7B",
439
            DownloadSource.MODELSCOPE: "qwen/Qwen-7B"
440
        },
441
        "Qwen-14B": {
442
            DownloadSource.DEFAULT: "Qwen/Qwen-14B",
443
            DownloadSource.MODELSCOPE: "qwen/Qwen-14B"
444
        },
445
        "Qwen-72B": {
446
            DownloadSource.DEFAULT: "Qwen/Qwen-72B",
447
            DownloadSource.MODELSCOPE: "qwen/Qwen-72B"
448
        },
449
        "Qwen-1.8B-Chat": {
450
            DownloadSource.DEFAULT: "Qwen/Qwen-1_8B-Chat",
451
            DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B-Chat"
452
        },
453
        "Qwen-7B-Chat": {
454
            DownloadSource.DEFAULT: "Qwen/Qwen-7B-Chat",
455
            DownloadSource.MODELSCOPE: "qwen/Qwen-7B-Chat"
456
        },
457
        "Qwen-14B-Chat": {
458
            DownloadSource.DEFAULT: "Qwen/Qwen-14B-Chat",
459
            DownloadSource.MODELSCOPE: "qwen/Qwen-14B-Chat"
460
        },
461
        "Qwen-72B-Chat": {
462
            DownloadSource.DEFAULT: "Qwen/Qwen-72B-Chat",
463
            DownloadSource.MODELSCOPE: "qwen/Qwen-72B-Chat"
464
        },
465
        "Qwen-1.8B-int8-Chat": {
466
            DownloadSource.DEFAULT: "Qwen/Qwen-1_8B-Chat-Int8",
467
            DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B-Chat-Int8"
468
        },
469
        "Qwen-1.8B-int4-Chat": {
470
            DownloadSource.DEFAULT: "Qwen/Qwen-1_8B-Chat-Int4",
471
            DownloadSource.MODELSCOPE: "qwen/Qwen-1_8B-Chat-Int4"
472
        },
473
        "Qwen-7B-int8-Chat": {
474
            DownloadSource.DEFAULT: "Qwen/Qwen-7B-Chat-Int8",
475
            DownloadSource.MODELSCOPE: "qwen/Qwen-7B-Chat-Int8"
476
        },
477
        "Qwen-7B-int4-Chat": {
478
            DownloadSource.DEFAULT: "Qwen/Qwen-7B-Chat-Int4",
479
            DownloadSource.MODELSCOPE: "qwen/Qwen-7B-Chat-Int4"
480
        },
481
        "Qwen-14B-int8-Chat": {
482
            DownloadSource.DEFAULT: "Qwen/Qwen-14B-Chat-Int8",
483
            DownloadSource.MODELSCOPE: "qwen/Qwen-14B-Chat-Int8"
484
        },
485
        "Qwen-14B-int4-Chat": {
486
            DownloadSource.DEFAULT: "Qwen/Qwen-14B-Chat-Int4",
487
            DownloadSource.MODELSCOPE: "qwen/Qwen-14B-Chat-Int4"
488
        },
489
        "Qwen-72B-int8-Chat": {
490
            DownloadSource.DEFAULT: "Qwen/Qwen-72B-Chat-Int8",
491
            DownloadSource.MODELSCOPE: "qwen/Qwen-72B-Chat-Int8"
492
        },
493
        "Qwen-72B-int4-Chat": {
494
            DownloadSource.DEFAULT: "Qwen/Qwen-72B-Chat-Int4",
495
            DownloadSource.MODELSCOPE: "qwen/Qwen-72B-Chat-Int4"
496
        }
497
    },
498
    module="c_attn",
499
    template="qwen"
500
)
501

502

503
register_model_group(
504
    models={
505
        "Skywork-13B-Base": {
506
            DownloadSource.DEFAULT: "Skywork/Skywork-13B-base",
507
            DownloadSource.MODELSCOPE: "skywork/Skywork-13B-base"
508
        }
509
    }
510
)
511

512

513
register_model_group(
514
    models={
515
        "Vicuna1.5-7B-Chat": {
516
            DownloadSource.DEFAULT: "lmsys/vicuna-7b-v1.5",
517
            DownloadSource.MODELSCOPE: "Xorbits/vicuna-7b-v1.5"
518
        },
519
        "Vicuna1.5-13B-Chat": {
520
            DownloadSource.DEFAULT: "lmsys/vicuna-13b-v1.5",
521
            DownloadSource.MODELSCOPE: "Xorbits/vicuna-13b-v1.5"
522
        }
523
    },
524
    template="vicuna"
525
)
526

527

528
register_model_group(
529
    models={
530
        "XuanYuan-70B": {
531
            DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B"
532
        },
533
        "XuanYuan-70B-Chat": {
534
            DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat"
535
        },
536
        "XuanYuan-70B-int8-Chat": {
537
            DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat-8bit"
538
        },
539
        "XuanYuan-70B-int4-Chat": {
540
            DownloadSource.DEFAULT: "Duxiaoman-DI/XuanYuan-70B-Chat-4bit"
541
        }
542
    },
543
    template="xuanyuan"
544
)
545

546

547
register_model_group(
548
    models={
549
        "XVERSE-7B": {
550
            DownloadSource.DEFAULT: "xverse/XVERSE-7B",
551
            DownloadSource.MODELSCOPE: "xverse/XVERSE-7B"
552
        },
553
        "XVERSE-13B": {
554
            DownloadSource.DEFAULT: "xverse/XVERSE-13B",
555
            DownloadSource.MODELSCOPE: "xverse/XVERSE-13B"
556
        },
557
        "XVERSE-65B": {
558
            DownloadSource.DEFAULT: "xverse/XVERSE-65B",
559
            DownloadSource.MODELSCOPE: "xverse/XVERSE-65B"
560
        },
561
        "XVERSE-7B-Chat": {
562
            DownloadSource.DEFAULT: "xverse/XVERSE-7B-Chat",
563
            DownloadSource.MODELSCOPE: "xverse/XVERSE-7B-Chat"
564
        },
565
        "XVERSE-13B-Chat": {
566
            DownloadSource.DEFAULT: "xverse/XVERSE-13B-Chat",
567
            DownloadSource.MODELSCOPE: "xverse/XVERSE-13B-Chat"
568
        }
569
    },
570
    template="xverse"
571
)
572

573

574
register_model_group(
575
    models={
576
        "Yayi-7B": {
577
            DownloadSource.DEFAULT: "wenge-research/yayi-7b-llama2",
578
            DownloadSource.MODELSCOPE: "AI-ModelScope/yayi-7b-llama2"
579
        },
580
        "Yayi-13B": {
581
            DownloadSource.DEFAULT: "wenge-research/yayi-13b-llama2",
582
            DownloadSource.MODELSCOPE: "AI-ModelScope/yayi-13b-llama2"
583
        }
584
    },
585
    template="yayi"
586
)
587

588

589
register_model_group(
590
    models={
591
        "Yi-6B": {
592
            DownloadSource.DEFAULT: "01-ai/Yi-6B",
593
            DownloadSource.MODELSCOPE: "01ai/Yi-6B"
594
        },
595
        "Yi-34B": {
596
            DownloadSource.DEFAULT: "01-ai/Yi-34B",
597
            DownloadSource.MODELSCOPE: "01ai/Yi-34B"
598
        },
599
        "Yi-6B-Chat": {
600
            DownloadSource.DEFAULT: "01-ai/Yi-6B-Chat",
601
            DownloadSource.MODELSCOPE: "01ai/Yi-6B-Chat"
602
        },
603
        "Yi-34B-Chat": {
604
            DownloadSource.DEFAULT: "01-ai/Yi-34B-Chat",
605
            DownloadSource.MODELSCOPE: "01ai/Yi-34B-Chat"
606
        },
607
        "Yi-6B-int8-Chat": {
608
            DownloadSource.DEFAULT: "01-ai/Yi-6B-Chat-8bits",
609
            DownloadSource.MODELSCOPE: "01ai/Yi-6B-Chat-8bits"
610
        },
611
        "Yi-34B-int8-Chat": {
612
            DownloadSource.DEFAULT: "01-ai/Yi-34B-Chat-8bits",
613
            DownloadSource.MODELSCOPE: "01ai/Yi-34B-Chat-8bits"
614
        }
615
    },
616
    template="yi"
617
)
618

619

620
register_model_group(
621
    models={
622
        "Zephyr-7B-Alpha-Chat": {
623
            DownloadSource.DEFAULT: "HuggingFaceH4/zephyr-7b-alpha",
624
            DownloadSource.MODELSCOPE: "AI-ModelScope/zephyr-7b-alpha"
625
        },
626
        "Zephyr-7B-Beta-Chat": {
627
            DownloadSource.DEFAULT: "HuggingFaceH4/zephyr-7b-beta",
628
            DownloadSource.MODELSCOPE: "modelscope/zephyr-7b-beta"
629
        }
630
    },
631
    template="zephyr"
632
)
633

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.