CSS-LM
37 строк · 838.0 Байт
1import csv2
3counter = 04#max_num_sent = 1000000
5max_num_sent = 5000006
7all_data_list = list()8with open("domain_data/all_fewshot.txt") as f:9for line in f:10if line == "\n":11continue12if counter < max_num_sent:13counter+=114line = line.strip()15#print(line)16all_data_list.append([line])17else:18break19
20print("Domain:",counter)21
22#with open("openwebtext.txt") as f:
23with open("retrieve.txt") as f:24for line in f:25if line == "\n":26continue27if counter < max_num_sent:28counter+=129line = line.strip()30all_data_list.append([line])31else:32break33print("All:",counter)34
35with open("train.txt", "w") as f:36writer = csv.writer(f)37writer.writerows(all_data_list)38
39