paddlenlp
83 строки · 1.9 Кб
1{
2"dp_degree": "auto",
3"invalid_strategy": [
4"stage3_mp*"
5],
6"max_search_time": 900,
7"max_time_per_task": 300,
8"metric_cfg": {
9"OptimizationDirection": "Maximize",
10"name": "interval_samples_per_second"
11},
12"micro_batch_size": "auto",
13"mode": "SFT",
14"model_cfg": {
15"global_batch_size": 16,
16"hidden_size": 4096,
17"num_attention_heads": 32,
18"num_layers": 28,
19"vocab_size": 65024
20},
21"mp_degree": "auto",
22"need_baseline": true,
23"pp_degree": [
241
25],
26"run_cmd": {
27"gradient_accumulation_steps": [
28"./autoconfig/llama7b_sft_params.json",
29"gradient_accumulation_steps"
30],
31"micro_batch_size": [
32"./autoconfig/llama7b_sft_params.json",
33"per_device_train_batch_size"
34],
35"mp_degree": [
36"./autoconfig/llama7b_sft_params.json",
37"tensor_parallel_degree"
38],
39"pp_degree": [
40"./autoconfig/llama7b_sft_params.json",
41"pipeline_parallel_degree"
42],
43"run_best_stage": {
44"autotuner_benchmark": [
45"./autoconfig/llama7b_sft_params.json",
46"autotuner_benchmark",
470
48]
49},
50"search_stage": {
51"autotuner_benchmark": [
52"./autoconfig/llama7b_sft_params.json",
53"autotuner_benchmark",
541
55]
56},
57"sharding_degree": [
58"./autoconfig/llama7b_sft_params.json",
59"sharding_parallel_degree"
60],
61"sharding_stage": [
62"./autoconfig/llama7b_sft_params.json",
63"sharding",
64"stage"
65],
66"use_recompute": [
67"./autoconfig/llama7b_sft_params.json",
68"recompute"
69],
70"recompute_granularity": [
71"./autoconfig/llama7b_lora_params.json",
72"recompute_granularity"
73]
74},
75"schedule_prior": [
76"mp4"
77],
78"sharding_degree": "auto",
79"sharding_stage": "auto",
80"task_limit": 2000,
81"use_recompute": "auto",
82"recompute_granularity":"auto"
83}