simpletransformers
22 строки · 602.0 Байт
1import os2from datasets import load_dataset3
4
5os.makedirs("data/msmarco", exist_ok=True)6
7print("=== Downloading MSMARCO ===")8print("Downloading MSMARCO training triples...")9dataset = load_dataset("thilina/negative-sampling")["train"]10
11print("Dataset loaded. Sample:")12print(dataset[0])13
14qrels = load_dataset("BeIR/msmarco-qrels")["validation"]15
16print("Saving dataset to disk...")17# Save the dataset to disk
18dataset.to_csv("data/msmarco/msmarco-train.tsv", sep="\t", index=False)19qrels.to_csv("data/msmarco/devs.tsv", sep="\t", index=False)20
21print("Done.")22print("=== MSMARCO download complete ===")23