2
"model_name_or_path": "facebook/opt-125m",
3
"dataset_name_or_path": "./data",
4
"output_dir": "./checkpoints/opt_sft_ckpts",
5
"per_device_train_batch_size": 4,
6
"gradient_accumulation_steps": 4,
7
"per_device_eval_batch_size": 8,
8
"eval_accumulation_steps":16,
10
"learning_rate": 3e-05,
13
"evaluation_strategy": "epoch",
14
"save_strategy": "epoch",
18
"fp16_opt_level": "O2",
22
"load_best_model_at_end": true,
23
"eval_with_do_generation": false,
24
"metric_for_best_model": "accuracy",
26
"save_total_limit": 1,
27
"sharding_parallel_degree": 4,
29
"zero_padding": false,
30
"use_flash_attention": false