rulm
36 строк · 1014.0 Байт
1{
2"trainer": {
3"evaluation_strategy": "steps",
4"per_device_train_batch_size": 4,
5"per_device_eval_batch_size": 4,
6"gradient_accumulation_steps": 8,
7"eval_steps": 3,
8"save_steps": 3,
9"logging_steps": 1,
10"learning_rate": 0.0001,
11"num_train_epochs": 2,
12"lr_scheduler_type": "cosine",
13"warmup_steps": 3,
14"fp16": false,
15"bf16": true,
16"torch_compile": false,
17"optim": "adamw_torch"
18},
19"lora": {
20"r": 8,
21"lora_alpha": 16,
22"lora_dropout": 0.05,
23"bias": "none",
24"target_modules": ["up_proj", "down_proj"],
25"task_type": "CAUSAL_LM"
26},
27"load_in_8bit": true,
28"load_in_4bit": false,
29"only_target_loss": true,
30"mode": "chat",
31"templates_path": "internal_prompts/saiga_v2.json",
32"model_name": "models/mpt-7b-8k",
33"tokenizer_name": "EleutherAI/gpt-neox-20b",
34"model_type": "causal",
35"max_tokens_count": 8192
36}
37