dream

Форк
0
216 строк · 7.5 Кб
1
#!/usr/bin/env python
2

3
import re
4
import requests
5
import json
6
import datetime
7
from itertools import chain
8
from collections import Counter
9

10

11
class Client:
12
    def __init__(self, api_key, entity_resolution_url, knowledge_query_url):
13
        """
14
        Class for Evi KG requests
15
        """
16
        self.entity_resolution_url = entity_resolution_url
17
        self.knowledge_query_url = knowledge_query_url
18
        self.headers = {"Content-Type": "application/json;charset=utf-8", "x-api-key": api_key}
19

20
    def get_knowledge_query_answer(self, query, variableBindings, timeout_in_millis):
21
        req = requests.request(
22
            url=self.knowledge_query_url,
23
            headers=self.headers,
24
            data=json.dumps({"query": query, "variableBindings": variableBindings}),
25
            method="POST",
26
        ).json()
27
        return req
28

29
    def get_knowledge_entity_resolution(self, mention, classConstraints, timeout_in_millis):
30
        req = requests.request(
31
            url=self.entity_resolution_url,
32
            headers=self.headers,
33
            data=json.dumps({"mention": mention, "classConstraints": classConstraints}),
34
            method="POST",
35
        ).json()
36
        return req
37

38

39
class EntityDatabase:
40
    def __init__(self):
41
        """
42
        Database class.
43

44
        Lets you contain entities and related post, add entity properties and relations between
45
        entities and find related to given entities.
46
        """
47
        self.data = dict()
48
        self.counter = Counter()
49

50
    def __iter__(self):
51
        for key in self.data:
52
            yield key
53

54
    def __len__(self):
55
        return len(self.data)
56

57
    def __contains__(self, item):
58
        return item in self.data
59

60
    def __getitem__(self, key):
61
        return self.data[key]
62

63
    def __str__(self):
64
        return self.data.__str__()
65

66
    def __setitem__(self, entity, post):
67
        """
68
        Add entity and related post
69
        """
70
        if entity in self:
71
            if post not in self.data[entity]["posts"]:
72
                self.data[entity]["posts"].append(post)
73
            else:
74
                return None
75
        else:
76
            self.data[entity] = {
77
                "posts": [post],
78
                "base_class": "",
79
                "prefLabel": "",
80
                "related": [],  # [{'entity':entity, 'connection_type':connection}]
81
                "classes": [],
82
            }
83
        self.counter[entity] += 1
84

85
    def _recount(self):
86
        self.counter = Counter()
87
        for entity in self:
88
            self.counter[entity] = len(self[entity]["posts"])
89

90
    def save(self, filename):
91
        fp = open(filename, "w")
92
        json.dump(self.data, fp)
93

94
    def load(self, filename):
95
        self.data = json.load(open(filename))
96
        assert type(self.data) == dict
97
        self._recount()
98
        return self
99

100
    def most_common(self, n=None):
101
        """
102
        Return most common entities and their number
103
        """
104
        return self.counter.most_common(n)
105

106
    def get_posts(self, entity, expired_days=3, topic=None):
107
        """
108
        Get posts related to entity, optionally filtered by days and topic
109
        """
110
        today = datetime.today()
111
        posts = self.data[entity]["posts"]
112
        filtered = list()
113
        for post in posts:
114
            post_day = datetime.fromtimestamp(int(post["created_utc"]))  # filter news by timedate and topic:
115
            if topic is not None and post["topic"] != topic:  # topic
116
                continue
117
            if (today - post_day).days > expired_days and post["content_category"] == "news":  # date
118
                continue
119
            filtered.append(post)
120
        return filtered
121

122
    def get_related_entities(self, entity):
123
        """
124
        Get the list of related entities
125
        """
126
        return self.data[entity]["related"]  # [(entity, connection_type)]
127

128
    def get_related_posts(self, entity, expired_days=3, topic=None):
129
        """
130
        Get all posts from related entities
131
        """
132
        related = list(
133
            chain(
134
                [
135
                    {
136
                        "posts": self.get_posts(related["entity"], expired_days, topic),
137
                        "connection_type": related["connection_type"],
138
                    }
139
                    for related in self.get_related_entities(entity)
140
                ]
141
            )
142
        )
143
        return related
144

145
    def add_connection(self, original_entity, related_entity, connection):
146
        """
147
        Add a connection from original_entity to related_entity
148
        """
149
        if original_entity in self and related_entity in self:
150
            self.data[original_entity]["related"].append((related_entity, connection["name"]))
151
            if connection["bidirectional"]:
152
                self.data[related_entity]["related"].append((original_entity, connection["name"]))
153

154
    def add_classes(self, entity, entity_classes, client):
155
        """
156
        Add list of classes to entity with respect to entity_classes
157
        """
158
        query = {"text": "query cls | m <aio:isAnInstanceOf> cls"}
159
        variableBindings = [{"variable": "m", "dataType": "aio:Entity", "value": entity}]
160
        try:
161
            #             time.sleep(1)
162
            answer = client.get_knowledge_query_answer(
163
                query=query, variableBindings=variableBindings, timeout_in_millis=10000
164
            )
165
            results = {a["bindingList"][0]["value"] for a in answer["results"]}
166
        except Exception as e:
167
            print(e)
168
            results = set()
169
        # add classes to an entity
170
        for base_class in entity_classes:
171
            classes = set(entity_classes[base_class])
172
            classes.add(base_class)
173
            if base_class in results:
174
                self.data[entity]["classes"] = self.data[entity]["classes"].union(classes.intersection(results))
175
                if base_class != "aio:Thing":  # aio:Thing - common base class for miscellanious types of entities
176
                    self.data[entity]["base_class"] = base_class
177
            if self.data[entity]["base_class"] == "":
178
                self.data[entity]["base_class"] = "aio:Thing"
179

180
    def add_class(self, entity, cls, client):
181
        if cls in self.data[entity]["classes"]:
182
            return True
183
        query = {"text": "query | a <aio:isAnInstanceOf> cls"}
184
        variableBindings = [
185
            {"variable": "a", "dataType": "aio:Entity", "value": entity},
186
            {"variable": "cls", "dataType": "aio:Entity", "value": cls},
187
        ]
188
        try:
189
            #             time.sleep(1)
190
            answer = client.get_knowledge_query_answer(
191
                query=query, variableBindings=variableBindings, timeout_in_millis=1000
192
            )
193
            result = answer["status"] == "YES"
194
        except Exception as e:
195
            print(e)
196
            result = False
197
        if result:
198
            self.data[entity]["classes"].append(cls)
199
        return result
200

201
    def add_preflabel(self, entity, client):
202
        """
203
        Add preferable label (name) to entity
204
        """
205
        query = {"text": "query lab | m <aio:prefLabel> lab"}
206
        variableBindings = [{"variable": "m", "dataType": "aio:Entity", "value": entity}]
207
        try:
208
            #             time.sleep(1)
209
            answer = client.get_knowledge_query_answer(
210
                query=query, variableBindings=variableBindings, timeout_in_millis=10000
211
            )
212
            label = answer["results"][0]["bindingList"][0]["value"]
213
        except Exception as e:
214
            print(e)
215
            label = re.sub(r"[^A-Za-z ]+", "", entity[4:].replace("_", " "))
216
        self.data[entity]["prefLabel"] = label
217

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.