3
from common.combined_classes import TOPIC_GROUPS
4
from common import utils, universal_templates
6
logger = logging.getLogger(__name__)
8
TOP_5k_FREQUENT_WORDS = set(
9
[k.strip() for k in open("common/google-10000-english-no-swears.txt", "r").readlines()[:5000]]
12
GOSSIP_COMPILED_PATTERN = re.compile(
13
r"\b(celebrit|actor|actress|writer|author|entrepreneur|sportsperson|musician|gossip)", re.IGNORECASE
15
HAVE_YOU_GOSSIP_TEMPLATE = re.compile(r"(would|have|did|was|had|were|are|do) you .*gossip", re.IGNORECASE)
17
GOSSIP_SKILL_TRIGGER_PHRASES = [
18
"Would you want to hear the latest gossip?",
19
"Are you interested in the latest gossip?",
20
"Would you be interested in the latest gossip?",
23
CELEBRITY_TRIGGER_PHRASES = ["What is your favourite celebrity?"]
25
TOPICS_TO_PEOPLE_MAPPINGS = [
27
"Topic": "Entertainment_Movies",
52
"Topic": "Entertainment_Music",
77
"Topic": "Entertainment_Books",
87
"Emily St. John Mandel",
102
"Topic": "Entertainment_General",
107
"Benedict Cumberbatch",
108
"Phoebe Waller-Bridge",
122
"Millie Bobbie Brown",
162
# "Alexandria Ocasio-Cortez",
163
# "Arnold Schwarzenegger",
168
"Topic": "Science_and_Technology",
174
# "Philip Scheinfeld",
178
# "Billionaire Barbie",
189
# "Jordan Montgomery",
200
{"Topic": "Phatic", "People": []},
201
{"Topic": "Interactive", "People": []},
202
{"Topic": "Inappropriate_Content", "People": []},
203
{"Topic": "Other", "People": []},
207
COBOT_TOPICS_TO_WIKI_OCCUPATIONS = {
208
"Politics": [["Q82955", "politician"], ["Q193391", "diplomat"]],
209
"Science_and_Technology": [["Q131524", "entrepreneur"]],
210
"Entertainment_Movies": [
212
["Q10800557", "film actor"],
213
["Q2526255", "film director"],
214
["Q28389", "screenwriter"],
215
["Q10798782", "television actor"],
216
["Q3282637", "film producer"],
217
["Q2259451", "stage actor"],
218
["Q3455803", "director"],
219
["Q947873", "television presenter"],
220
["Q222344", "cinematographer"],
221
["Q2405480", "voice actor"],
223
"Entertainment_Books": [
224
["Q36180", "writer"],
226
["Q6625963", "novelist"],
227
["Q214917", "playwright"],
228
["Q1607826", "editor"],
230
"Entertainment_General": [
231
["Q1028181", "painter"],
232
["Q483501", "artist"],
233
["Q33231", "photographer"],
234
["Q1281618", "sculptor"],
235
["Q644687", "illustrator"],
236
["Q15296811", "drawer"],
237
["Q1930187", "journalist"],
240
["Q2066131", "athlete"],
241
["Q937857", "association football player"],
242
["Q3665646", "basketball player"],
243
["Q10871364", "baseball player"],
244
["Q12299841", "cricketer"],
245
["Q11513337", "athletics competitor"],
246
["Q19204627", "American football player"],
247
["Q11774891", "ice hockey player"],
248
["Q2309784", "sport cyclist"],
249
["Q628099", "association football manager"],
250
["Q13141064", "badminton player"],
251
["Q10873124", "chess player"],
252
["Q14089670", "rugby union player"],
253
["Q11338576", "boxer"],
254
["Q15117302", "volleyball player"],
255
["Q10843402", "swimmer"],
256
["Q12840545", "handball player"],
257
["Q10833314", "tennis player"],
259
"Entertainment_Music": [
260
["Q177220", "singer"],
261
["Q36834", "composer"],
262
["Q639669", "musician"],
263
["Q753110", "songwriter"],
264
["Q486748", "pianist"],
265
["Q488205", "singer-songwriter"],
266
["Q855091", "guitarist"],
267
["Q2865819", "opera singer"],
272
def skill_trigger_phrases():
273
return GOSSIP_SKILL_TRIGGER_PHRASES + CELEBRITY_TRIGGER_PHRASES
276
def talk_about_gossip(human_utterance, bot_utterance):
277
user_lets_chat_about = universal_templates.if_chat_about_particular_topic(
278
human_utterance, bot_utterance, compiled_pattern=GOSSIP_COMPILED_PATTERN
280
flag = bool(user_lets_chat_about)
284
def get_all_supported_occupations_lists():
287
all_topics_mappings = TOPICS_TO_PEOPLE_MAPPINGS
288
for topic_mapping in all_topics_mappings:
289
topic = topic_mapping["Topic"]
291
people = topic_mapping["People"]
293
raw_occupations_list = COBOT_TOPICS_TO_WIKI_OCCUPATIONS[topic]
294
for occupation_pair in raw_occupations_list:
295
occupation_id = occupation_pair[0]
296
all_occupations.append(occupation_id)
298
return all_occupations
301
def celebrity_from_uttr(human_utterance):
302
logger.debug(f'Calling get_celebrity_from_uttr on {human_utterance["text"]}')
304
# we need to get all supported occupations
305
raw_profession_list = get_all_supported_occupations_lists()
307
celebrity_name, matching_types, mismatching_types = None, None, None
308
entity_dict = human_utterance["annotations"].get("wiki_parser", {}).get("topic_skill_entities_info", {})
309
logger.info(f"found entities: {entity_dict}")
310
for celebrity_name in entity_dict:
312
"occupation" in entity_dict[celebrity_name]
313
and entity_dict[celebrity_name]["pos"] == 0
314
and entity_dict[celebrity_name]["conf"] > 0.5
315
and celebrity_name.lower() not in TOP_5k_FREQUENT_WORDS
317
occupation_list = entity_dict[celebrity_name]["occupation"]
318
matching_types = [job[1] for job in occupation_list if job[0] in raw_profession_list]
319
mismatching_types = [job[1] for job in occupation_list if job[0] not in raw_profession_list]
322
if not matching_types:
323
return None, None, None
324
celebrity_name = celebrity_name.title()
325
logger.warning(f"Relations {celebrity_name} {matching_types} {mismatching_types}")
326
return celebrity_name, matching_types, mismatching_types
329
def check_is_celebrity_mentioned(human_utterance):
330
celebrity_name, _, _ = celebrity_from_uttr(human_utterance)
331
if celebrity_name is not None:
336
def about_celebrities(annotated_utterance):
337
found_topics = utils.get_topics(annotated_utterance, probs=False, which="all")
338
if any([topic in found_topics for topic in TOPIC_GROUPS["celebrities"]]):
340
elif re.findall(GOSSIP_COMPILED_PATTERN, annotated_utterance["text"]):