rulm
34 строки · 960.0 Байт
1{
2"trainer": {3"evaluation_strategy": "steps",4"per_device_train_batch_size": 2,5"per_device_eval_batch_size": 2,6"gradient_accumulation_steps": 16,7"eval_steps": 150,8"save_steps": 150,9"logging_steps": 10,10"learning_rate": 0.00005,11"num_train_epochs": 3,12"lr_scheduler_type": "cosine",13"warmup_steps": 100,14"fp16": false,15"bf16": true,16"gradient_checkpointing": false,17"torch_compile": false,18"optim": "adamw_torch",19"half_precision_backend": "auto",20"fp16_opt_level": "O2"21},22"lora": {23"r": 8,24"lora_alpha": 32,25"lora_dropout": 0.1,26"inference_mode": false,27"task_type": "SEQ_2_SEQ_LM"28},29"load_in_8bit": true,30"model_name": "bigscience/mt0-xl",31"model_type": "seq2seq",32"max_source_tokens_count": 256,33"max_target_tokens_count": 25634}
35
36