rulm
32 строки · 898.0 Байт
1{
2"trainer": {
3"evaluation_strategy": "steps",
4"per_device_train_batch_size": 1,
5"per_device_eval_batch_size": 2,
6"gradient_accumulation_steps": 32,
7"eval_steps": 150,
8"save_steps": 150,
9"logging_steps": 10,
10"learning_rate": 0.0003,
11"num_train_epochs": 3,
12"lr_scheduler_type": "cosine",
13"warmup_steps": 100,
14"fp16": false,
15"bf16": false,
16"torch_compile": false,
17"optim": "adamw_torch"
18},
19"lora": {
20"r": 4,
21"lora_alpha": 32,
22"lora_dropout": 0.02,
23"inference_mode": false,
24"task_type": "SEQ_2_SEQ_LM"
25},
26"load_in_8bit": true,
27"model_name": "bigscience/mt0-xxl-mt",
28"model_type": "seq2seq",
29"template_category": "seq2seq_no_newlines",
30"max_source_tokens_count": 400,
31"max_target_tokens_count": 400
32}
33
34