dream

utils.py
597 строк · 20.9 Кб
Перенос по словам
1
# %%
2
import os
3
import logging
4
import re
5

6
import requests
7

8
import sentry_sdk
9

10
import common.dialogflow_framework.utils.state as state_utils
11
import common.custom_requests as custom_requests
12

13
import common.utils as common_utils
14

15
import dialogflows.scenarios.gossip as this_gossip
16

17
import common.gossip as common_gossip
18

19
sentry_sdk.init(dsn=os.getenv("SENTRY_DSN"))
20

21
ENTITY_LINKING_URL = os.getenv("DP_ENTITY_LINKING_URL")
22
WIKIDATA_URL = os.getenv("DP_WIKIDATA_URL")
23
assert ENTITY_LINKING_URL, ENTITY_LINKING_URL
24
assert WIKIDATA_URL, WIKIDATA_URL
25

26
logger = logging.getLogger(__name__)
27

28
DIALOG_BEGINNING_START_CONFIDENCE = 0.98
29
DIALOG_BEGINNING_CONTINUE_CONFIDENCE = 0.9
30
DIALOG_BEGINNING_SHORT_ANSWER_CONFIDENCE = 0.98
31
MIDDLE_DIALOG_START_CONFIDENCE = 0.7
32
SUPER_CONFIDENCE = 1.0
33
HIGH_CONFIDENCE = 0.98
34

35

36
##################################################################################################################
37
# utils
38
##################################################################################################################
39

40
##################################################################################################################
41
# Entity Linking & Wiki Parser
42
##################################################################################################################
43

44

45
def request_el_wp_entities(person, utterance):
46
    entities_info = {}
47
    try:
48
        el_output = requests.post(
49
            ENTITY_LINKING_URL,
50
            json={"entity_substr": [[person]], "template": [""], "context": [[utterance]]},
51
            timeout=0.8,
52
        ).json()
53
        entity_info = el_output and el_output[0] and el_output[0][0]
54
        if isinstance(entity_info, list) and entity_info and entity_info[0]:
55
            entity_ids = entity_info[0]
56
            entity_id = entity_ids[0]
57
            wp_output = requests.post(
58
                WIKIDATA_URL,
59
                json={"parser_info": ["find_top_triplets"], "query": [[[entity_id]]]},
60
                timeout=0.8,
61
            ).json()
62
        elif isinstance(entity_info, dict):
63
            entity_ids = entity_info.get("entity_ids", [])
64
            entity_id = entity_ids and entity_ids[0]
65
            wp_output = (
66
                entity_id
67
                and requests.post(
68
                    WIKIDATA_URL,
69
                    json={
70
                        "parser_info": ["find_top_triplets"],
71
                        "query": [[{"entity_substr": person, "entity_ids": [entity_id]}]],
72
                    },
73
                    timeout=0.8,
74
                ).json()
75
            )
76
        else:
77
            raise Exception(entities_info)
78
        entities_info = wp_output and wp_output[0].get("entities_info", {})
79
    except Exception as exc:
80
        msg = f"request_el_wp_entities exception: {exc}"
81
        logger.debug(msg)
82
        sentry_sdk.capture_message(msg)
83
    return entities_info if entities_info else {}
84

85

86
def get_relationship_between_two_people(person_1, person_2):
87
    wp_output = []
88
    try:
89
        persons = [person_1, person_2]
90
        el_output = requests.post(
91
            ENTITY_LINKING_URL,
92
            json={"entity_substr": [persons], "template": [""], "context": [[""]]},
93
            timeout=0.8,
94
        ).json()
95
        entity_info_list = el_output and el_output[0]
96
        if entity_info_list:
97
            if (
98
                isinstance(entity_info_list[0], list)
99
                and len(entity_info_list[0]) == 2
100
                and entity_info_list[0][0]
101
                and entity_info_list[0][1]
102
            ):
103
                entities1, entities2 = entity_info_list[0]
104
            if isinstance(entity_info_list[0], dict) and len(entity_info_list) == 2:
105
                entities1, entities2 = entity_info_list
106
                entities1 = entities1.get("entity_ids", [])
107
                entities2 = entities2.get("entity_ids", [])
108

109
        wp_output = requests.post(
110
            WIKIDATA_URL,
111
            json={"parser_info": ["find_connection"], "query": [[entities1, entities2]]},
112
            timeout=0.8,
113
        ).json()
114
    except Exception as exc:
115
        msg = f"request_relationship_between_two_people exception: {exc}"
116
        logger.debug(msg)
117
        sentry_sdk.capture_message(msg)
118

119
    relationship = wp_output and wp_output[0] and wp_output[0][0]
120

121
    return relationship
122

123

124
def get_occupations_for_person_from_wiki_parser(person, utterance):
125
    occupations = []
126

127
    entities_info = request_el_wp_entities(person, utterance)
128

129
    logger.debug(f"Get Occupations: {entities_info}")
130

131
    for entity_label in entities_info:
132
        triplets = entities_info[entity_label]
133
        logger.debug(triplets)
134
        if "occupation" in triplets:
135
            occupations += [triplets["occupation"]]
136

137
    return occupations
138

139

140
def get_gender_age_person(person, utterance):
141
    gender = "unknown"
142
    age = 0
143

144
    entities_info = request_el_wp_entities(person, utterance)
145

146
    for entity_label in entities_info:
147
        triplets = entities_info[entity_label]
148
        gender = triplets.get("gender", [])
149
        age = triplets.get("age", 0)
150

151
        gender = gender and gender[0] and gender[0][1]
152
        gender = gender if gender else "they"
153
    return gender, age
154

155

156
# def is_creative_person(person, utterance):
157
#     entities_info = request_el_wp_entities(person, utterance)
158

159
#     for entity_label in entities_info:
160
#         triplets = entities_info[entity_label]
161

162
#         occupations = triplets["occupation"]
163
#         occupation_titles = set([occ_title for occ_id, occ_title in occupations])
164

165
#     sports_occupations = this_news.COBOT_TOPICS_TO_WIKI_OCCUPATIONS["Sports"]
166

167
#     is_sports_person = False
168

169
#     for occupation_title in occupation_titles:
170
#         if occupation_title in sports_occupations:
171
#             is_sports_person = True
172

173
#     return is_sports_person
174

175

176
def get_teams_for_sportsperson(person, utterance):
177
    sport = [[]]
178
    teams = [[]]
179

180
    entities_info = request_el_wp_entities(person, utterance)
181

182
    for entity_label in entities_info:
183
        triplets = entities_info[entity_label]
184

185
    sport = triplets.get("sport", [[]])
186
    teams = triplets.get("member of sports team", [[]])
187

188
    return sport, teams
189

190

191
def get_spouse_or_partner_person(person, utterance):
192
    spouse = ""
193
    partner = ""
194

195
    entities_info = request_el_wp_entities(person, utterance)
196

197
    for entity_label in entities_info:
198
        triplets = entities_info[entity_label]
199
        spouse = triplets.get("spouse", [])
200
        partner = triplets.get("partner", [])
201
        spouse = spouse[0][1] if spouse else None
202
        partner = partner[0][1] if partner else None
203

204
    return spouse, partner
205

206

207
def get_human_readable_gender_statement_current_is(gender: str):
208
    if "female" in gender.lower():
209
        return "she is"
210
    if "male" in gender.lower():
211
        return "he is"
212
    return "they are"
213

214

215
def get_human_readable_gender_statement_current_eir(gender: str):
216
    if "female" in gender.lower():
217
        return "her"
218
    if "male" in gender.lower():
219
        return "his"
220
    return "their"
221

222

223
def get_human_readable_gender_statement_current_im(gender: str):
224
    if "female" in gender.lower():
225
        return "her"
226
    if "male" in gender.lower():
227
        return "him"
228
    return "them"
229

230

231
def get_notable_works_for_creative_person(person, utterance):
232
    films = []
233
    notable_works = []
234
    songs = []
235
    albums = []
236

237
    entities_info = request_el_wp_entities(person, utterance)
238
    for entity_label in entities_info:
239
        triplets = entities_info[entity_label]
240

241
        occupations = triplets["occupation"]
242
        occupation_titles = set([occ_title for occ_id, occ_title in occupations])
243
        if {"actor", "film actor", "television actor"}.intersection(occupation_titles):
244
            films_obtained = triplets.get("films of actor", [])
245

246
            films.append(films_obtained)
247

248
        if {"singer", "songwriter", "composer"}.intersection(occupation_titles):
249
            songs_obtained = triplets.get("songs", [])
250
            albums_obtained = triplets.get("albums", [])
251

252
            songs.append(songs_obtained)
253
            albums.append(albums_obtained)
254

255
        if {"writer", "poet", "novelist", "playwright"}.intersection(occupation_titles):
256
            notable_works_obtained = triplets.get("notable work", [])
257

258
            notable_works.append(notable_works_obtained)
259

260
        # if {"athlete"}.intersection(occupation_titles):
261
        #     sport = triplets.get("sport", [])
262
        #     teams = triplets.get("member of sports team", [])
263
        #     print("sport", sport)
264
        #     print("teams", teams)
265
        # if {"entrepreneur"}.intersection(occupation_titles):
266
        #     companies = triplets.get("owner of", [])
267
        #     products = triplets.get("notable work", [])
268
        #     print("companies", companies)
269
        #     print("products", products)
270
        # if {"politician", "statesperson"}.intersection(occupation_titles):
271
        #     country = triplets.get("country", [])
272
        #     parties = triplets.get("member of political party", [])
273
        #     print("country", country)
274
        #     print("parties", parties)
275

276
        # print(occupations)
277

278
    # returning our treasure!
279
    return films, songs, albums, notable_works
280

281

282
def get_top_people_from_wiki_for_cobot_topic(cobot_topic, top_people):
283
    raw_occupations_list = common_gossip.COBOT_TOPICS_TO_WIKI_OCCUPATIONS[cobot_topic]
284

285
    processed_occupations_tuple = tuple([occupation_item[1] for occupation_item in raw_occupations_list])
286
    results = custom_requests.request_triples_wikidata("find_top_people", [processed_occupations_tuple])
287
    results = results[0] if results else results
288
    if results:
289
        # if person is actually a ['Wikidata_ID', 'Display_Name']
290
        return [person_item[1] for person_item in results[0][0] if person_item]
291
    else:
292
        return []
293

294

295
def get_cobot_topic_for_occupation(occupation):
296
    all_topics_mappings = common_gossip.COBOT_TOPICS_TO_WIKI_OCCUPATIONS
297
    for topic, occupations in all_topics_mappings.items():
298
        for occupation_pair in occupations:
299
            occupation_name = occupation_pair[1]
300
            # not "in" but "equals"
301
            if str(occupation).lower() == str(occupation_name).lower():
302
                return topic
303

304
    return None
305

306

307
###
308

309
###
310

311

312
def get_not_used_and_save_reaction_to_new_mentioned_person(vars):
313
    shared_memory = state_utils.get_shared_memory(vars)
314
    last_reactions_to_new_person = shared_memory.get("last_reactions_to_new_person", [])
315

316
    reaction = common_utils.get_not_used_template(
317
        used_templates=last_reactions_to_new_person, all_templates=this_gossip.OPINION_TO_USER_MENTIONING_SOMEONE_NEW
318
    )
319

320
    used_reacts = last_reactions_to_new_person + [reaction]
321
    state_utils.save_to_shared_memory(vars, last_reactions_to_new_person=used_reacts[-2:])
322
    return reaction
323

324

325
# def get_not_used_and_save_wait_but_why_question(vars):
326
#     shared_memory = state_utils.get_shared_memory(vars)
327
#     last_wait_but_why_questions = shared_memory.get("last_wait_but_why_questions", [])
328
#
329
#     question = common_utils.get_not_used_template(
330
#         used_templates=last_wait_but_why_questions, all_templates=this_gossip.WAIT_BUT_WHY_QUESTIONS
331
#     )
332
#
333
#     used_questions = last_wait_but_why_questions + [question]
334
#     state_utils.save_to_shared_memory(vars, last_reactions_to_new_person=used_questions[-2:])
335
#     return question
336

337

338
##################################################################################################################
339
# speech functions
340
##################################################################################################################
341

342

343
def get_speech_function_for_human_utterance(human_utterance):
344
    sf_type = human_utterance["annotations"].get("speech_function_classifier", {}).get("type", "")
345
    sf_confidence = human_utterance["annotations"].get("speech_function_classifier", {}).get("confidence", 0.0)
346
    return sf_type, sf_confidence
347

348

349
def get_speech_function_predictions_for_human_utterance(human_utterance):
350
    predicted_sfs = human_utterance["annotations"].get("speech_function_predictor", [])
351

352
    return predicted_sfs
353

354

355
def filter_speech_function_predictions_for_human_utterance(predicted_sfs):
356
    filtered_sfs = [sf_item for sf_item in predicted_sfs if "Open" not in sf_item]
357
    return filtered_sfs
358

359

360
patterns_agree = [
361
    "Support.Reply.Accept",
362
    "Support.Reply.Agree",
363
    "Support.Reply.Comply",
364
    "Support.Reply.Acknowledge",
365
    "Support.Reply.Affirm",
366
]
367
agree_patterns_re = re.compile("(" + "|".join(patterns_agree) + ")", re.IGNORECASE)
368

369

370
def is_speech_function_agree(vars):
371
    # fallback to MIDAS
372
    human_utterance = state_utils.get_last_human_utterance(vars)
373
    sf_type, sf_confidence = get_speech_function_for_human_utterance(human_utterance)
374
    flag = sf_type and bool(re.search(agree_patterns_re, sf_type))
375
    # fallback to MIDAS
376
    flag = flag or is_midas_positive_answer(vars)
377
    # fallback to yes/no intents
378
    flag = flag or common_utils.is_yes(human_utterance)
379

380
    flag = flag and not is_not_interested_speech_function(vars)
381
    return flag
382

383

384
patterns_disagree = [
385
    "Support.Reply.Decline",
386
    "Support.Reply.Disagree",
387
    "Support.Reply.Non-comply",
388
    "Support.Reply.Withold",
389
    "Support.Reply.Disawow",
390
    "Support.Reply.Conflict",
391
]
392
disagree_patterns_re = re.compile("(" + "|".join(patterns_disagree) + ")", re.IGNORECASE)
393

394

395
def is_speech_function_disagree(vars):
396
    human_utterance = state_utils.get_last_human_utterance(vars)
397
    sf_type, sf_confidence = get_speech_function_for_human_utterance(human_utterance)
398
    flag = sf_type and bool(re.search(disagree_patterns_re, sf_type))
399
    # fallback to MIDAS
400
    flag = flag or is_midas_negative_answer(vars)
401
    # fallback to yes/no intents
402
    flag = flag or common_utils.is_no(human_utterance)
403

404
    flag = flag and not is_not_interested_speech_function(vars)
405
    return flag
406

407

408
patterns_express_opinion = [
409
    "Initiate.Give.Opinion",
410
]
411
express_opinion_patterns_re = re.compile("(" + "|".join(patterns_express_opinion) + ")", re.IGNORECASE)
412

413

414
def is_cobot_opinion_expressed(vars):
415
    intents = common_utils.get_intents(state_utils.get_last_human_utterance(vars), which="all")
416
    opinion_expression_detected = "Opinion_ExpressionIntent" in intents
417
    return bool(opinion_expression_detected)
418

419

420
def is_cobot_opinion_demanded(vars):
421
    intents = common_utils.get_intents(state_utils.get_last_human_utterance(vars), which="all")
422
    opinion_request_detected = "Opinion_RequestIntent" in intents
423
    return bool(opinion_request_detected)
424

425

426
def is_speech_function_express_opinion(vars):
427
    human_utterance = state_utils.get_last_human_utterance(vars)
428
    sf_type, sf_confidence = get_speech_function_for_human_utterance(human_utterance)
429
    flag = sf_type and bool(re.search(express_opinion_patterns_re, sf_type))
430
    # fallback to MIDAS
431
    flag = flag or is_midas_opinion_expression(vars)
432
    # # fallback to CoBot intents
433
    flag = flag or is_cobot_opinion_expressed(vars)
434
    flag = flag or common_utils.is_no(human_utterance)
435
    # bug check (sometimes opinion by MIDAS can be incorrectly detected in a simple yes/no answer from user)
436
    flag = flag and not common_utils.is_no(human_utterance) and not common_utils.is_yes(human_utterance)
437
    return flag
438

439

440
patterns_demand_opinion = [
441
    "Initiate.Demand.Opinion",
442
]
443
demand_opinion_patterns_re = re.compile("(" + "|".join(patterns_demand_opinion) + ")", re.IGNORECASE)
444

445

446
def is_speech_function_demand_opinion(vars):
447
    human_utterance = state_utils.get_last_human_utterance(vars)
448
    sf_type, sf_confidence = get_speech_function_for_human_utterance(human_utterance)
449
    flag = sf_type and bool(re.search(demand_opinion_patterns_re, sf_type))
450
    # # fallback to CoBot intents
451
    flag = flag or is_cobot_opinion_demanded(vars)
452
    flag = flag or common_utils.is_no(human_utterance)
453
    # bug check (sometimes opinion by MIDAS can be incorrectly detected in a simple yes/no answer from user)
454
    flag = flag and not common_utils.is_no(human_utterance) and not common_utils.is_yes(human_utterance)
455
    return flag
456

457

458
def get_mentioned_people(vars):
459
    user_mentioned_named_entities = state_utils.get_named_entities_from_human_utterance(vars)
460
    user_mentioned_names = []
461

462
    logger.info("user_mentioned_named_entities: " + str(user_mentioned_named_entities))
463

464
    for named_entity in user_mentioned_named_entities:
465
        logger.debug(f"named entity: {named_entity}")
466
        if named_entity["type"] == "PER":
467
            user_mentioned_names.append(named_entity["text"])
468
    return user_mentioned_names
469

470

471
def get_mentioned_orgs(vars):
472
    user_mentioned_named_entities = state_utils.get_named_entities_from_human_utterance(vars)
473
    user_mentioned_names = []
474
    for named_entity in user_mentioned_named_entities:
475
        if named_entity["type"] == "ORG":
476
            user_mentioned_names.append(named_entity["text"])
477
    return user_mentioned_names
478

479

480
# def get_mentioned_people(vars, cobot_topic):
481
#     # obtaining named entities
482
#     named_entities = state_utils.get_named_entities_from_human_utterance(vars)
483

484
#     # human_utterance = state_utils.get_last_human_utterance(vars)
485

486
#     # basic_celebrities_list = ['Q33999',  # actor
487
#     #                        "Q10800557",  # film actor
488
#     #                        "Q10798782",  # television actor
489
#     #                        "Q2405480",  # voice actor
490
#     #                        'Q17125263',  # youtuber
491
#     #                        'Q245068',  # comedian
492
#     #                        'Q2066131',  # sportsman
493
#     #                        'Q947873',  # television presenter
494
#     #                        'Q2405480',  # comedian
495
#     #                        'Q211236',  # celebrity
496
#     #                        'Q177220']  # singer
497

498
#     # professions_for_cobot_topic =
499

500
#     # celebrity_name, celebrity_type, celebrity_raw_type = state_utils.get_types_from_annotations(
501
#     #     human_utterance['annotations'], tocheck_relation='occupation',
502
#     #     types=raw_profession_list, exclude_types=[])
503

504
#     logger.debug("detected entities: " + str(named_entities))
505

506
#     return False
507

508

509
##################################################################################################################
510
# more specific intents
511
##################################################################################################################
512

513

514
patterns_not_interested = [
515
    "not interested",
516
    "don't care",
517
    "move on",
518
    "skip",
519
    "cancel",
520
    "avoid",
521
    "not into",
522
    "not really into",
523
    "no interest for me",
524
    "I don't bother",
525
    "don't really want to talk about this",
526
    "don't feel comfortable discussing this",
527
    "We’d better not to enter this subject",
528
    "I'd rather not go there right now",
529
    "have no interest in discussing that",
530
    "What an idiotic topic of conversation",
531
    "That subject really bothers me and I don't want to talk about it",
532
    "Can we talk about something else",
533
    "Must we discuss this",
534
    "Can we discuss this later",
535
]
536
patterns_not_interested_re = re.compile("(" + "|".join(patterns_not_interested) + ")", re.IGNORECASE)
537

538

539
def is_not_interested_speech_function(vars):
540
    human_text = state_utils.get_last_human_utterance(vars)["text"]
541

542
    flag = bool(re.search(patterns_not_interested_re, human_text))
543

544
    return flag
545

546

547
##################################################################################################################
548
# MIDAS
549
##################################################################################################################
550

551

552
def is_midas_positive_answer(vars):
553
    midas_classes = common_utils.get_intents(state_utils.get_last_human_utterance(vars), which="midas")
554

555
    intent_detected = any([intent in midas_classes for intent in ["pos_answer"]])
556

557
    return intent_detected
558

559

560
def is_midas_negative_answer(vars):
561
    midas_classes = common_utils.get_intents(state_utils.get_last_human_utterance(vars), which="midas")
562

563
    intent_detected = any([intent in midas_classes for intent in ["neg_answer"]])
564

565
    return intent_detected
566

567

568
def is_midas_opinion_expression(vars):
569
    midas_classes = common_utils.get_intents(state_utils.get_last_human_utterance(vars), which="midas")
570
    intent_detected = any([intent in midas_classes for intent in ["opinion"]])
571

572
    return intent_detected
573

574

575
##################################################################################################################
576
# occupation
577
##################################################################################################################
578

579

580
def get_basic_occupation_for_topic(cobot_topic):
581
    occupations = [x["Occupation"] for x in this_gossip.TOPICS_TO_OCCUPATIONS if x["Topic"] == cobot_topic]
582
    if occupations:
583
        return occupations[0]
584

585

586
def get_occupation_for_person(person, topic, utterance):
587
    # obtaining basic occupation
588
    basic_occupation = get_basic_occupation_for_topic(topic)
589
    occupation = basic_occupation if basic_occupation else "person"
590
    logger.debug(f"basic occupation: {occupation}")
591

592
    # trying to get a better thing from Wiki
593
    occupations = get_occupations_for_person_from_wiki_parser(person, utterance)
594
    new_occupation = occupations and len(occupations[0]) > 1 and len(occupations[0][1]) > 1 and occupations[0][1][1]
595
    occupation = new_occupation if new_occupation else occupation
596

597
    return occupation
598
dream

Использование cookies