llm-adapters

math_running_commands
34 строки · 4.3 Кб
Перенос по словам
1
# LLaMA-7B-LORA
2
CUDA_VISIBLE_DEVICES=0 python finetune.py   --base_model 'yahma/llama-7b-hf'   --data_path 'math_10k.json'   --output_dir './trained_models/llama-7b-lora-math/'   --batch_size 16  --micro_batch_size 4   --num_epochs 3   --learning_rate 3e-4   --cutoff_len 256   --val_set_size 0 --eval_step 80 --save_step 80  --adapter_name lora --target_modules '["q_proj", "k_proj", "v_proj", "up_proj", "down_proj"]' --lora_r 32 --lora_alpha 64
3

4
# LLaMA-7B-prefix
5
CUDA_VISIBLE_DEVICES=0 python finetune.py   --base_model 'yahma/llama-7b-hf'   --data_path 'math_10k.json'   --output_dir './trained_models/llama-7b-prefix-math-vt10/'   --batch_size 8  --micro_batch_size 4   --num_epochs 5   --learning_rate 3e-2   --cutoff_len 256   --val_set_size 120 --eval_step 10 --save_step 10  --adapter_name prefix-tuning --num_virtual_tokens 10
6

7
# LLaMA-7B-series
8
CUDA_VISIBLE_DEVICES=0 python finetune.py   --base_model 'yahma/llama-7b-hf'   --data_path 'math_10k.json'   --output_dir './trained_models/llama-7b-bottleneck-math-attn/'   --batch_size 16  --micro_batch_size 4   --num_epochs 3   --learning_rate 3e-4   --cutoff_len 256   --val_set_size 120 --eval_step 80 --save_step 80  --adapter_name bottleneck --load_8bit --target_modules '["up_proj", "gate_proj"]'
9

10
# LLaMA-7B-Parallel
11
CUDA_VISIBLE_DEVICES=0 python finetune.py   --base_model 'yahma/llama-7b-hf'   --data_path 'math_10k.json'   --output_dir './trained_models/llama-7b-parallel-math/'   --batch_size 16  --micro_batch_size 4   --num_epochs 3   --learning_rate 3e-4   --cutoff_len 256   --val_set_size 0 --eval_step 80 --save_step 80  --adapter_name bottleneck --use_parallel_adapter --target_modules '["up_proj", "down_proj"]' 
12

13
#For LLaMA-13B models, we use `--use_gradient_checkpointing` to save memory
14

15
# BLOOMZ-7B-LORA
16
CUDA_VISIBLE_DEVICES=0 python finetune.py   --base_model 'bigscience/bloomz-7b1'   --data_path 'math_10k.json'   --output_dir './trained_models/bloomz-7b-lora-math-all-r32/'   --batch_size 16  --micro_batch_size 4   --num_epochs 3   --learning_rate 3e-4   --cutoff_len 256   --val_set_size 0 --eval_step 80 --save_step 80  --adapter_name lora --load_8bit --target_modules '["query_key_value", "dense_4h_to_h", "dense_h_to_4h"]' --lora_r 32 --lora_alpha 64 
17

18
# BLOOMZ-7B-series
19
CUDA_VISIBLE_DEVICES=0 python finetune.py   --base_model 'bigscience/bloomz-7b1'   --data_path 'math_10k.json'   --output_dir './trained_models/bloomz-7b-bottleneck-math-mlp-bs256/'   --batch_size 16  --micro_batch_size 4   --num_epochs 3   --learning_rate 3e-4   --cutoff_len 256   --val_set_size 0 --eval_step 80 --save_step 80  --adapter_name bottleneck --target_modules '["dense_4h_to_h"]'
20

21

22
# BLOOMZ-7B-Parallel
23
CUDA_VISIBLE_DEVICES=0 python finetune.py   --base_model 'bigscience/bloomz-7b1'   --data_path 'math_10k.json'   --output_dir './trained_models/bloomz-7b-parallel-math-mlp-bs256/'   --batch_size 16  --micro_batch_size 4   --num_epochs 3   --learning_rate 3e-4   --cutoff_len 256   --val_set_size 0 --eval_step 80 --save_step 80  --adapter_name bottleneck --use_parallel_adapter --load_8bit --target_modules '["dense_4h_to_h", "dense_h_to_4h"]' 
24

25
# GPT-6B-LORA
26
CUDA_VISIBLE_DEVICES=0 python finetune.py   --base_model 'EleutherAI/gpt-j-6b'   --data_path 'math_10k.json'   --output_dir './trained_models/gptj-6b-lora-math-all-r32/'   --batch_size 16  --micro_batch_size 4   --num_epochs 3   --learning_rate 3e-4   --cutoff_len 256   --val_set_size 0 --eval_step 80 --save_step 80  --adapter_name lora --target_modules '["q_proj", "k_proj", "v_proj", "fc_in", "fc_out"]' --lora_r 32 --lora_alpha 64
27

28

29
# GPT-6B-Series
30
CUDA_VISIBLE_DEVICES=0 python finetune.py   --base_model 'EleutherAI/gpt-j-6b'   --data_path 'math_10k.json'   --output_dir './trained_models/gptj-6b-bottleneck-math-mlp-bs256/'   --batch_size 16  --micro_batch_size 4   --num_epochs 3   --learning_rate 3e-4   --cutoff_len 256   --val_set_size 0 --eval_step 80 --save_step 80  --adapter_name bottleneck --target_modules '["fc_out"]'
31

32

33
# GPT-6B-Parallel
34
CUDA_VISIBLE_DEVICES=0 python finetune.py   --base_model 'EleutherAI/gpt-j-6b'   --data_path 'math_10k.json'   --output_dir './trained_models/gptj-6b-parallel-math-mlp-bs256/'   --batch_size 16  --micro_batch_size 4   --num_epochs 3   --learning_rate 3e-4   --cutoff_len 256   --val_set_size 0 --eval_step 80 --save_step 80  --adapter_name bottleneck --use_parallel_adapter --load_8bit --target_modules '["fc_in", "fc_out"]'
35
llm-adapters

Использование cookies