openprompt
69 строк · 1.3 Кб
1dataset:
2name: super_glue.wic
3path: # dataset in huggingface doesn't need path
4
5plm:
6model_name: t5
7model_path: t5-large
8optimize:
9freeze_para: True
10lr: 1.0e-5
11weight_decay: 0.0
12scheduler:
13type:
14num_warmup_steps: 500
15
16dataloader:
17max_seq_length: 256 # max_seq_length
18decoder_max_length: 3 # the decoder max length to truncate decoder input sequence
19# if it is an encoder-decoder architecture. Note that it's not equavalent
20# to generation.max_length which is used merely in the generation phase.
21truncate_method: "head" # choosing from balanced, head, tail
22decode_from_pad: true
23
24train:
25batch_size: 8
26gradient_accumulation_steps: 1
27max_grad_norm: 1.0
28num_epochs:
29num_training_steps: 30000
30
31
32test:
33batch_size: 16
34
35dev:
36batch_size: 16
37
38
39template: soft_template
40verbalizer: manual_verbalizer
41
42
43
44soft_template:
45choice: 0
46file_path: scripts/SuperGLUE/WiC/soft_template.txt
47num_tokens: 20
48initialize_from_vocab: true
49random_range: 0.5
50optimize:
51name: AdamW
52lr: 0.3
53adam_epsilon: 1.0e-8
54scheduler:
55num_warmup_steps: 500
56
57
58manual_verbalizer:
59choice: 0
60file_path: scripts/SuperGLUE/WiC/manual_verbalizer.txt
61
62environment:
63num_gpus: 3
64cuda_visible_devices:
65local_rank: 0
66model_parallel: True
67device_map:
68
69learning_setting: full