openprompt
78 строк · 1.6 Кб
1dataset:
2name: super_glue.copa
3path: # dataset in huggingface doesn't need path
4
5plm:
6model_name: bert
7model_path: bert-large-cased
8optimize:
9freeze_para: True
10lr: 1.0e-5
11weight_decay: 0.0
12scheduler:
13type:
14num_warmup_steps: 500
15
16dataloader:
17max_seq_length: 384 # max_seq_length
18decoder_max_length: 3 # the decoder max length to truncate decoder input sequence
19# if it is an encoder-decoder architecture. Note that it's not equavalent
20# to generation.max_length which is used merely in the generation phase.
21truncate_method: "head" # choosing from balanced, head, tail
22decode_from_pad: false
23
24train:
25batch_size: 4
26gradient_accumulation_steps: 1
27max_grad_norm: 1.0
28num_epochs:
29num_training_steps: 30000
30teacher_forcing: false
31
32
33test:
34batch_size: 16
35
36dev:
37batch_size: 16
38
39
40template: soft_template
41verbalizer: contextual_verbalizer
42
43
44
45soft_template:
46choice: 0
47file_path: scripts/SuperGLUE/COPA/soft_template.txt
48num_tokens: 20
49initialize_from_vocab: true
50random_range: 0.5
51optimize:
52name: AdamW
53lr: 0.03
54adam_epsilon: 1.0e-8
55scheduler:
56num_warmup_steps: 500
57
58
59contextual_verbalizer:
60
61environment:
62num_gpus: 1
63cuda_visible_devices:
64local_rank: 0
65
66learning_setting: full #few_shot
67
68# few_shot:
69# parent_config: learning_setting
70# few_shot_sampling: sampling_from_train
71
72# sampling_from_train:
73# parent_config: few_shot_sampling
74# num_examples_per_label: 100
75# also_sample_dev: True
76# num_examples_per_label_dev: 100
77# seed:
78# - 123
79