dream
216 строк · 7.5 Кб
1#!/usr/bin/env python
2
3import re4import requests5import json6import datetime7from itertools import chain8from collections import Counter9
10
11class Client:12def __init__(self, api_key, entity_resolution_url, knowledge_query_url):13"""14Class for Evi KG requests
15"""
16self.entity_resolution_url = entity_resolution_url17self.knowledge_query_url = knowledge_query_url18self.headers = {"Content-Type": "application/json;charset=utf-8", "x-api-key": api_key}19
20def get_knowledge_query_answer(self, query, variableBindings, timeout_in_millis):21req = requests.request(22url=self.knowledge_query_url,23headers=self.headers,24data=json.dumps({"query": query, "variableBindings": variableBindings}),25method="POST",26).json()27return req28
29def get_knowledge_entity_resolution(self, mention, classConstraints, timeout_in_millis):30req = requests.request(31url=self.entity_resolution_url,32headers=self.headers,33data=json.dumps({"mention": mention, "classConstraints": classConstraints}),34method="POST",35).json()36return req37
38
39class EntityDatabase:40def __init__(self):41"""42Database class.
43
44Lets you contain entities and related post, add entity properties and relations between
45entities and find related to given entities.
46"""
47self.data = dict()48self.counter = Counter()49
50def __iter__(self):51for key in self.data:52yield key53
54def __len__(self):55return len(self.data)56
57def __contains__(self, item):58return item in self.data59
60def __getitem__(self, key):61return self.data[key]62
63def __str__(self):64return self.data.__str__()65
66def __setitem__(self, entity, post):67"""68Add entity and related post
69"""
70if entity in self:71if post not in self.data[entity]["posts"]:72self.data[entity]["posts"].append(post)73else:74return None75else:76self.data[entity] = {77"posts": [post],78"base_class": "",79"prefLabel": "",80"related": [], # [{'entity':entity, 'connection_type':connection}]81"classes": [],82}83self.counter[entity] += 184
85def _recount(self):86self.counter = Counter()87for entity in self:88self.counter[entity] = len(self[entity]["posts"])89
90def save(self, filename):91fp = open(filename, "w")92json.dump(self.data, fp)93
94def load(self, filename):95self.data = json.load(open(filename))96assert type(self.data) == dict97self._recount()98return self99
100def most_common(self, n=None):101"""102Return most common entities and their number
103"""
104return self.counter.most_common(n)105
106def get_posts(self, entity, expired_days=3, topic=None):107"""108Get posts related to entity, optionally filtered by days and topic
109"""
110today = datetime.today()111posts = self.data[entity]["posts"]112filtered = list()113for post in posts:114post_day = datetime.fromtimestamp(int(post["created_utc"])) # filter news by timedate and topic:115if topic is not None and post["topic"] != topic: # topic116continue117if (today - post_day).days > expired_days and post["content_category"] == "news": # date118continue119filtered.append(post)120return filtered121
122def get_related_entities(self, entity):123"""124Get the list of related entities
125"""
126return self.data[entity]["related"] # [(entity, connection_type)]127
128def get_related_posts(self, entity, expired_days=3, topic=None):129"""130Get all posts from related entities
131"""
132related = list(133chain(134[135{136"posts": self.get_posts(related["entity"], expired_days, topic),137"connection_type": related["connection_type"],138}139for related in self.get_related_entities(entity)140]141)142)143return related144
145def add_connection(self, original_entity, related_entity, connection):146"""147Add a connection from original_entity to related_entity
148"""
149if original_entity in self and related_entity in self:150self.data[original_entity]["related"].append((related_entity, connection["name"]))151if connection["bidirectional"]:152self.data[related_entity]["related"].append((original_entity, connection["name"]))153
154def add_classes(self, entity, entity_classes, client):155"""156Add list of classes to entity with respect to entity_classes
157"""
158query = {"text": "query cls | m <aio:isAnInstanceOf> cls"}159variableBindings = [{"variable": "m", "dataType": "aio:Entity", "value": entity}]160try:161# time.sleep(1)162answer = client.get_knowledge_query_answer(163query=query, variableBindings=variableBindings, timeout_in_millis=10000164)165results = {a["bindingList"][0]["value"] for a in answer["results"]}166except Exception as e:167print(e)168results = set()169# add classes to an entity170for base_class in entity_classes:171classes = set(entity_classes[base_class])172classes.add(base_class)173if base_class in results:174self.data[entity]["classes"] = self.data[entity]["classes"].union(classes.intersection(results))175if base_class != "aio:Thing": # aio:Thing - common base class for miscellanious types of entities176self.data[entity]["base_class"] = base_class177if self.data[entity]["base_class"] == "":178self.data[entity]["base_class"] = "aio:Thing"179
180def add_class(self, entity, cls, client):181if cls in self.data[entity]["classes"]:182return True183query = {"text": "query | a <aio:isAnInstanceOf> cls"}184variableBindings = [185{"variable": "a", "dataType": "aio:Entity", "value": entity},186{"variable": "cls", "dataType": "aio:Entity", "value": cls},187]188try:189# time.sleep(1)190answer = client.get_knowledge_query_answer(191query=query, variableBindings=variableBindings, timeout_in_millis=1000192)193result = answer["status"] == "YES"194except Exception as e:195print(e)196result = False197if result:198self.data[entity]["classes"].append(cls)199return result200
201def add_preflabel(self, entity, client):202"""203Add preferable label (name) to entity
204"""
205query = {"text": "query lab | m <aio:prefLabel> lab"}206variableBindings = [{"variable": "m", "dataType": "aio:Entity", "value": entity}]207try:208# time.sleep(1)209answer = client.get_knowledge_query_answer(210query=query, variableBindings=variableBindings, timeout_in_millis=10000211)212label = answer["results"][0]["bindingList"][0]["value"]213except Exception as e:214print(e)215label = re.sub(r"[^A-Za-z ]+", "", entity[4:].replace("_", " "))216self.data[entity]["prefLabel"] = label217