LLM-FineTuning-Large-Language-Models
/
const.py
36 строк · 1.2 Кб
1import os
2import torch
3
4# You can run the Notebook `Pytorch-Roberta_Large.ipynb` either Locally or in Kaggle - Just modify the 'ROOT_DIR' variable to properly refer to the dataset
5# ROOT_DIR = '../input/feedback-prize-2021/' # Kaggle
6ROOT_DIR = '../input/' # local
7
8# MODEL_NAME = 'roberta-large'
9MODEL_NAME = 'roberta-base'
10
11MODEL_PATH = 'model'
12
13RUN_NAME = f"{MODEL_NAME}"
14
15MAX_LEN = 512
16
17DOC_STRIDE = 128
18
19config = {'train_batch_size': 4,
20'valid_batch_size': 1,
21'epochs': 5,
22'learning_rates': [2.5e-5, 2.5e-5, 2.5e-6, 2.5e-6, 2.5e-7],
23'max_grad_norm': 10,
24'device': 'cuda' if torch.cuda.is_available() else 'cpu',
25'model_name': MODEL_NAME,
26'max_length': MAX_LEN,
27'doc_stride': DOC_STRIDE,
28}
29
30# Note in above, I have 5 Learning rates for 5 epochs
31
32output_labels = ['O', 'B-Lead', 'I-Lead', 'B-Position', 'I-Position', 'B-Claim', 'I-Claim', 'B-Counterclaim', 'I-Counterclaim',
33'B-Rebuttal', 'I-Rebuttal', 'B-Evidence', 'I-Evidence', 'B-Concluding Statement', 'I-Concluding Statement']
34
35LABELS_TO_IDS = {v:k for k,v in enumerate(output_labels)}
36IDS_TO_LABELS = {k:v for k,v in enumerate(output_labels)}