CSS-LM

Форк
0
/
extract_from_org.py 
78 строк · 2.8 Кб
1
import json
2

3
#{"label": "main subject", "tokens": "For the 1971 film \" A Blank on the Map \" , he joined the first Western expedition to a remote highland valley in New Guinea to seek out a lost tribe .", "h": ["A Blank on the Map", ["Q4655508", 20, 38, 0.5]], "t": ["lost tribe", ["Q672979", 138, 148, 0.5]]}
4

5
train_label = list()
6
train_list = list()
7
with open("org/train.txt","r") as f:
8
    for line in f:
9
        train_dict = dict()
10
        line = json.loads(line)
11
        train_dict["sentiment"] = line["label"]
12
        train_label.append(line["label"])
13
        train_dict["sentence"] = line["text"]
14
        train_dict["aspect"] = "scii"
15
        #h_site = line["metadata"][:2]
16
        #t_site = line["metadata"][2:]
17
        #line = line["text"].replace("[[","").replace("]]","").replace("<<","").replace(">>","")
18
        #line = line.strip().split()
19
        #h = " ".join(line[h_site[0]:h_site[1]+1])
20
        #t = " ".join(line[t_site[0]:t_site[1]+1])
21
        #train_dict["h"] = [h]
22
        #train_dict["t"] = [t]
23
        train_list.append(train_dict)
24

25

26
dev_label = list()
27
dev_list = list()
28
with open("org/dev.txt","r") as f:
29
    for line in f:
30
        dev_dict = dict()
31
        line = json.loads(line)
32
        dev_dict["sentiment"] = line["label"]
33
        dev_label.append(line["label"])
34
        dev_dict["sentence"] = line["text"]
35
        dev_dict["aspect"] = "scii"
36
        #h_site = line["metadata"][:2]
37
        #t_site = line["metadata"][2:]
38
        #line = line["text"].replace("[[","").replace("]]","").replace("<<","").replace(">>","")
39
        #line = line.strip().split()
40
        #h = " ".join(line[h_site[0]:h_site[1]+1])
41
        #t = " ".join(line[t_site[0]:t_site[1]+1])
42
        #dev_dict["h"] = [h]
43
        #dev_dict["t"] = [t]
44
        dev_list.append(dev_dict)
45

46
test_label = list()
47
test_list = list()
48
with open("org/test.txt","r") as f:
49
    for line in f:
50
        test_dict = dict()
51
        line = json.loads(line)
52
        test_dict["sentiment"] = line["label"]
53
        test_label.append(line["label"])
54
        test_dict["sentence"] = line["text"]
55
        test_dict["aspect"] = "scii"
56
        #h_site = line["metadata"][:2]
57
        #t_site = line["metadata"][2:]
58
        #line = line["text"].replace("[[","").replace("]]","").replace("<<","").replace(">>","")
59
        #line = line.strip().split()
60
        #h = " ".join(line[h_site[0]:h_site[1]+1])
61
        #t = " ".join(line[t_site[0]:t_site[1]+1])
62
        #test_dict["h"] = [h]
63
        #test_dict["t"] = [t]
64
        test_list.append(test_dict)
65

66
#print(len(set(train_label)))
67
#print(len(set(dev_label)))
68
#print(len(set(test_label)))
69

70

71
with open("train_all.json","w", encoding='utf-8') as f:
72
    json.dump(train_list,f)
73

74
with open("dev.json","w", encoding='utf-8') as f:
75
    json.dump(dev_list,f)
76

77
with open("test.json","w", encoding='utf-8') as f:
78
    json.dump(test_list,f)
79

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.