rulm
52 строки · 1.5 Кб
1{
2"trainer": {
3"evaluation_strategy": "steps",
4"per_device_train_batch_size": 2,
5"per_device_eval_batch_size": 2,
6"gradient_accumulation_steps": 16,
7"eval_steps": 150,
8"save_steps": 150,
9"logging_steps": 5,
10"learning_rate": 0.003,
11"num_train_epochs": 5,
12"lr_scheduler_type": "cosine",
13"warmup_steps": 100,
14"fp16": false,
15"bf16": true,
16"gradient_checkpointing": false,
17"torch_compile": false,
18"optim": "adamw_torch",
19"half_precision_backend": "auto",
20"fp16_opt_level": "O2"
21},
22"deepspeed": {
23"bf16": {
24"enabled": true
25},
26"optimizer": {
27"type": "AdamW",
28"params": {
29"lr": "auto",
30"betas": "auto",
31"eps": "auto",
32"weight_decay": "auto"
33}
34},
35"zero_optimization": {
36"stage": 2,
37"offload_optimizer": {
38"device": "cpu",
39"pin_memory": true
40},
41"overlap_comm": true,
42"round_robin_gradients": true
43},
44"train_batch_size": "auto",
45"gradient_accumulation_steps": "auto"
46},
47"model_name": "ai-forever/FRED-T5-1.7B",
48"templates_path": "ru_alpaca_seq2seq_template.json",
49"model_type": "seq2seq",
50"max_source_tokens_count": 512,
51"max_target_tokens_count": 512
52}
53
54