CSS-LM
78 строк · 2.8 Кб
1import json2
3#{"label": "main subject", "tokens": "For the 1971 film \" A Blank on the Map \" , he joined the first Western expedition to a remote highland valley in New Guinea to seek out a lost tribe .", "h": ["A Blank on the Map", ["Q4655508", 20, 38, 0.5]], "t": ["lost tribe", ["Q672979", 138, 148, 0.5]]}
4
5train_label = list()6train_list = list()7with open("org/train.txt","r") as f:8for line in f:9train_dict = dict()10line = json.loads(line)11train_dict["sentiment"] = line["label"]12train_label.append(line["label"])13train_dict["sentence"] = line["text"]14train_dict["aspect"] = "scicite"15#h_site = line["metadata"][:2]16#t_site = line["metadata"][2:]17#line = line["text"].replace("[[","").replace("]]","").replace("<<","").replace(">>","")18#line = line.strip().split()19#h = " ".join(line[h_site[0]:h_site[1]+1])20#t = " ".join(line[t_site[0]:t_site[1]+1])21#train_dict["h"] = [h]22#train_dict["t"] = [t]23train_list.append(train_dict)24
25
26dev_label = list()27dev_list = list()28with open("org/dev.txt","r") as f:29for line in f:30dev_dict = dict()31line = json.loads(line)32dev_dict["sentiment"] = line["label"]33dev_label.append(line["label"])34dev_dict["sentence"] = line["text"]35dev_dict["aspect"] = "scicite"36#h_site = line["metadata"][:2]37#t_site = line["metadata"][2:]38#line = line["text"].replace("[[","").replace("]]","").replace("<<","").replace(">>","")39#line = line.strip().split()40#h = " ".join(line[h_site[0]:h_site[1]+1])41#t = " ".join(line[t_site[0]:t_site[1]+1])42#dev_dict["h"] = [h]43#dev_dict["t"] = [t]44dev_list.append(dev_dict)45
46test_label = list()47test_list = list()48with open("org/test.txt","r") as f:49for line in f:50test_dict = dict()51line = json.loads(line)52test_dict["sentiment"] = line["label"]53test_label.append(line["label"])54test_dict["sentence"] = line["text"]55test_dict["aspect"] = "scicite"56#h_site = line["metadata"][:2]57#t_site = line["metadata"][2:]58#line = line["text"].replace("[[","").replace("]]","").replace("<<","").replace(">>","")59#line = line.strip().split()60#h = " ".join(line[h_site[0]:h_site[1]+1])61#t = " ".join(line[t_site[0]:t_site[1]+1])62#test_dict["h"] = [h]63#test_dict["t"] = [t]64test_list.append(test_dict)65
66print(len(set(train_label)))67print(len(set(dev_label)))68print(len(set(test_label)))69
70
71with open("train_all.json","w", encoding='utf-8') as f:72json.dump(train_list,f)73
74with open("dev.json","w", encoding='utf-8') as f:75json.dump(dev_list,f)76
77with open("test.json","w", encoding='utf-8') as f:78json.dump(test_list,f)79