rulm
36 строк · 1.0 Кб
1{
2"trainer": {3"evaluation_strategy": "steps",4"per_device_train_batch_size": 2,5"per_device_eval_batch_size": 2,6"gradient_accumulation_steps": 64,7"eval_steps": 50,8"save_steps": 50,9"logging_steps": 5,10"learning_rate": 0.00025,11"num_train_epochs": 4,12"lr_scheduler_type": "cosine",13"warmup_steps": 30,14"fp16": false,15"bf16": true,16"torch_compile": false,17"optim": "adamw_torch"18},19"lora": {20"r": 16,21"lora_alpha": 16,22"lora_dropout": 0.05,23"bias": "none",24"target_modules": ["q_proj", "v_proj", "k_proj", "o_proj"],25"task_type": "CAUSAL_LM"26},27"use_flash_attention_2": true,28"load_in_8bit": false,29"load_in_4bit": true,30"only_target_loss": true,31"mode": "chat",32"templates_path": "internal_prompts/saiga_v2.json",33"model_name": "models/Yarn-Mistral-7b-128k",34"model_type": "causal",35"max_tokens_count": 200036}
37
38