1
# Suggested data paths when using GPT-NeoX locally
3
"data_path": "data/enwik8/enwik8_text_document",
5
# or for weighted datasets:
6
# "train-data-paths": ["data/enwik8/enwik8_text_document", "data/enwik8/enwik8_text_document"],
7
# "test-data-paths": ["data/enwik8/enwik8_text_document", "data/enwik8/enwik8_text_document"],
8
# "valid-data-paths": ["data/enwik8/enwik8_text_document", "data/enwik8/enwik8_text_document"],
9
# "train-data-weights": [1., 2.],
10
# "test-data-weights": [2., 1.],
11
# "valid-data-weights": [0.5, 0.4],
13
# If weight_by_num_documents is True, Builds dataset weights from a multinomial distribution over groups of data according to the number of documents in each group.
14
# WARNING: setting this to True will override any user provided weights
15
# "weight_by_num_documents": false,
16
# "weighted_sampler_alpha": 0.3,
18
"vocab_file": "data/gpt2-vocab.json",
19
"merge_file": "data/gpt2-merges.txt",
21
"save": "checkpoints",
22
"load": "checkpoints",
23
"checkpoint_validation_with_forward_pass": False,
25
"tensorboard_dir": "tensorboard",
28
"wandb_host": "https://api.wandb.ai",
29
"wandb_project": "neox"