CSS-LM
78 строк · 2.8 Кб
1import json
2
3#{"label": "main subject", "tokens": "For the 1971 film \" A Blank on the Map \" , he joined the first Western expedition to a remote highland valley in New Guinea to seek out a lost tribe .", "h": ["A Blank on the Map", ["Q4655508", 20, 38, 0.5]], "t": ["lost tribe", ["Q672979", 138, 148, 0.5]]}
4
5train_label = list()
6train_list = list()
7with open("org/train.txt","r") as f:
8for line in f:
9train_dict = dict()
10line = json.loads(line)
11train_dict["sentiment"] = line["label"]
12train_label.append(line["label"])
13train_dict["sentence"] = line["text"]
14train_dict["aspect"] = "scii"
15#h_site = line["metadata"][:2]
16#t_site = line["metadata"][2:]
17#line = line["text"].replace("[[","").replace("]]","").replace("<<","").replace(">>","")
18#line = line.strip().split()
19#h = " ".join(line[h_site[0]:h_site[1]+1])
20#t = " ".join(line[t_site[0]:t_site[1]+1])
21#train_dict["h"] = [h]
22#train_dict["t"] = [t]
23train_list.append(train_dict)
24
25
26dev_label = list()
27dev_list = list()
28with open("org/dev.txt","r") as f:
29for line in f:
30dev_dict = dict()
31line = json.loads(line)
32dev_dict["sentiment"] = line["label"]
33dev_label.append(line["label"])
34dev_dict["sentence"] = line["text"]
35dev_dict["aspect"] = "scii"
36#h_site = line["metadata"][:2]
37#t_site = line["metadata"][2:]
38#line = line["text"].replace("[[","").replace("]]","").replace("<<","").replace(">>","")
39#line = line.strip().split()
40#h = " ".join(line[h_site[0]:h_site[1]+1])
41#t = " ".join(line[t_site[0]:t_site[1]+1])
42#dev_dict["h"] = [h]
43#dev_dict["t"] = [t]
44dev_list.append(dev_dict)
45
46test_label = list()
47test_list = list()
48with open("org/test.txt","r") as f:
49for line in f:
50test_dict = dict()
51line = json.loads(line)
52test_dict["sentiment"] = line["label"]
53test_label.append(line["label"])
54test_dict["sentence"] = line["text"]
55test_dict["aspect"] = "scii"
56#h_site = line["metadata"][:2]
57#t_site = line["metadata"][2:]
58#line = line["text"].replace("[[","").replace("]]","").replace("<<","").replace(">>","")
59#line = line.strip().split()
60#h = " ".join(line[h_site[0]:h_site[1]+1])
61#t = " ".join(line[t_site[0]:t_site[1]+1])
62#test_dict["h"] = [h]
63#test_dict["t"] = [t]
64test_list.append(test_dict)
65
66#print(len(set(train_label)))
67#print(len(set(dev_label)))
68#print(len(set(test_label)))
69
70
71with open("train_all.json","w", encoding='utf-8') as f:
72json.dump(train_list,f)
73
74with open("dev.json","w", encoding='utf-8') as f:
75json.dump(dev_list,f)
76
77with open("test.json","w", encoding='utf-8') as f:
78json.dump(test_list,f)
79