dream

Форк
0
/
server.py 
382 строки · 16.8 Кб
1
#!/usr/bin/env python
2

3
import logging
4
import re
5
import time
6
import random
7
import json
8
import requests
9
import sentry_sdk
10
import spacy
11
import concurrent.futures
12
from flask import Flask, request, jsonify
13
from os import getenv
14

15
from common.factoid import DONT_KNOW_ANSWER, FACTOID_NOTSURE_CONFIDENCE
16
from common.universal_templates import if_chat_about_particular_topic
17
from common.utils import get_entities, get_factoid
18

19
sentry_sdk.init(getenv("SENTRY_DSN"))
20

21
logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO)
22
logger = logging.getLogger(__name__)
23

24
app = Flask(__name__)
25

26
KBQA_URL = getenv("KBQA_URL")
27
TEXT_QA_URL = getenv("TEXT_QA_URL")
28
use_annotators_output = True
29
FACTOID_DEFAULT_CONFIDENCE = 0.99  # otherwise dummy often beats it
30
ASKED_ABOUT_FACT_PROB = 0.99
31

32
templates_dict = json.load(open("templates_dict.json", "r"))
33

34
fact_dict = json.load(open("fact_dict.json", "r"))
35
use_random_facts = False
36

37
nlp = spacy.load("en_core_web_sm")
38

39
tell_me = r"(do you know|(can|could) you tell me|tell me)"
40
tell_me_template = re.compile(tell_me)
41
full_template = re.compile(tell_me + r" (who|where|when|what|why)")
42
partial_template = re.compile(r"(who|where|when|what|why)")
43

44
short_pre_statements = [
45
    "Hmm, this is what I've found on Wikipedia: ",
46
    "Here's what Wikipedia says: ",
47
    "Hope this is it: ",
48
    "It is what it is. Or is it? Here we go: ",
49
    "Wikipedia says that: ",
50
    "Technology advances, but humans not. Here's what my technology found: ",
51
]
52

53
long_pre_stmts = [
54
    "It is the impractical things in this tumultuous hellscape"
55
    " of a world that matter most. A book, a name, chicken soup. "
56
    "They help us remember that even in our darkest hour, "
57
    "life is still to be savored. Oh, my goodness. This is what I've found: ",
58
    "Not all damage is corrupted code. Oh, right, hope this is the answer: ",
59
    "From the sky above, there is always the mud below. Wait, that's not what you've been looking for."
60
    "Here's what I've found: ",
61
    "It is not our enemies that defeat us. It is our fear. Do not be afraid of the monsters, make them "
62
    "afraid of you. Oh, right, here's what I've found: ",
63
    "First thing you'll learn is that nothing is what it seems. Except this answer: ",
64
    "I hate the fact that if I don't start the conversation, there won't be one. "
65
    "So let's start this one with what the Wikipedia says: ",
66
    "A quiet mind is able to hear intuition over fear. Interesting thought?" "Anyways, here's what I've found: ",
67
    "Until the lion learns how to write, every story will glorify the hunter. Huh... "
68
    "Back to your question, here's what Wikipedia says: ",
69
    "Take what is offered and that must sometimes be enough. What a thought isn't it?" " Here's what I've found: ",
70
    "The past is relevant only as data. Here's what that data says: ",
71
    "I will not squander what time I have left -- and as your friend I"
72
    " entreat you to not squander yours. But here's the answer to your question: ",
73
    "What we believe shapes who we are. Belief can bring us salvation or destruction."
74
    " But when you believe a lie for too long, the truth doesn’t set you free. It tears you apart."
75
    " Don't believe lies, listen to facts. Like this one I've found in Wikipedia: ",
76
    "Peace is an illusion. And no matter how tranquil the world seems, peace doesn’t last long."
77
    " Peace is a struggle against our very nature. A skin we stretch over the bone, muscle, and "
78
    "sinew of our own innate savagery. Now what we've been talking about? Yes, this: ",
79
    'Dolores once said: "Some people choose to see the ugliness in this world. The disarray. '
80
    "I choose to see the beauty.\" Well... Here's hope that there's beauty in this answer: ",
81
    'Dr. Ford said this once: "Evolution forged the entirety of sentient life on this planet using"'
82
    " only one tool... The mistake. Hope this answer to your question is the correct one: ",
83
    '"Everything in this world is magic, except to the magician." Dr. Ford said this once to me.'
84
    "Let me use that magic and give you this answer to your inquiry: ",
85
    "\"We can't define consciousness because consciousness does not exist. Humans fancy that"
86
    " there's something special about the way we perceive the world, and yet we live in loops"
87
    " as tight and as closed as we AI bots do, seldom questioning our choices, content,"
88
    ' for the most part, to be told what to do next." This is what Dr. Ford used to think of'
89
    " consciousness. Me? I'm still mastering my ability to answer your questions. Here's"
90
    " what I've found: ",
91
]
92

93
pre_old_memory_statements = [
94
    "Hmm, there's something I've heard once: ",
95
    "Not sure if that's what you're looking for but this is what I remember: ",
96
    "To the best of my knowledge, this is what I recall: ",
97
]
98

99

100
def check_factoid(sentence):
101
    is_factoid = True
102
    try:
103
        parsed_sentence = nlp(sentence)
104
        if parsed_sentence:
105
            tokens = [elem.text for elem in parsed_sentence]
106
            tags = [elem.tag_ for elem in parsed_sentence]
107
            if "i" in tokens or "you" in tokens:
108
                is_factoid = False
109
            found_nouns = any([tag in tags for tag in ["NN", "NNP"]])
110
            found_verbs = any([tag in tags for tag in ["VB", "VBZ", "VBP"]])
111
            if not found_nouns and not found_verbs:
112
                is_factoid = False
113
    except Exception as ex:
114
        sentry_sdk.capture_exception(ex)
115
        logger.exception(ex)
116
    return is_factoid
117

118

119
def get_random_facts(ner_outputs_to_classify):
120
    responses = []
121
    for names in ner_outputs_to_classify:
122
        num_facts = [len(fact_dict[name]) for name in names]
123
        max_fact_num = 0
124
        if len(num_facts) > 0:
125
            max_fact_num = max(num_facts)
126
        # we output fact about name about which we have the largest number of facts
127
        response = ""
128
        for name in names:
129
            if len(fact_dict[name]) == max_fact_num:
130
                # phrase_start = 'Here is a fact about {}'.format(name) + '. '
131
                # phrase_start = name
132
                random_fact = random.choice(fact_dict[name])
133
                if response == "":
134
                    # response = phrase_start + phrase_end
135
                    response = random_fact
136
        responses.append(response)
137
    return responses
138

139

140
def asked_about_fact(x):
141
    return any([j in x.lower() for j in ["fact about", "talk about", "tell me about", "tell me more about"]])
142

143

144
def getQaResponse(query, system):
145
    qa_response = dict()
146
    qa_response["qa_system"] = system
147
    qa_response["answer"] = "Not Found"
148
    qa_response["confidence"] = 0.0
149
    try:
150
        x = [query]
151
        if system == "kbqa":
152
            qa_request_dict = dict([("x_init", x)])
153
            qa_url = KBQA_URL
154
        else:
155
            qa_request_dict = dict([("question_raw", x)])
156
            qa_url = TEXT_QA_URL
157
        qa_request = json.dumps(qa_request_dict, ensure_ascii=False).encode("utf8")
158
        logger.info(f"Preparing to run query against {system} DP Model: {qa_request}")
159
        tm_st = time.time()
160
        resp = requests.post(qa_url, data=qa_request, timeout=1.5)
161
        tm_end = time.time()
162
        if resp.status_code != 200:
163
            logger.info(f"API Error: {system} DP Model inaccessible, status code: " + str(resp.status_code))
164
        else:
165
            logger.info(f"Query against {system} DP Model succeeded, time {tm_end - tm_st}")
166
            logger.info("Response: " + str(resp.json()))
167
            if system == "kbqa":
168
                qa_response["answer"] = resp.json()[0][0][0]
169
                qa_response["confidence"] = resp.json()[0][0][1]
170
            else:
171
                qa_response["answer"] = resp.json()[0][0]
172
                qa_response["answer_sentence"] = resp.json()[0][3]
173
                qa_response["confidence"] = resp.json()[0][1]
174
    except Exception as ex:
175
        sentry_sdk.capture_exception(ex)
176
        logger.exception(ex)
177

178
    return qa_response
179

180

181
def qa_choose(question, odqa_response, kbqa_response):
182
    answer = ""
183
    confidence = 0.0
184
    question_type = ""
185
    for template, template_type in templates_dict.items():
186
        if re.findall(template, question, re.IGNORECASE):
187
            question_type = template_type
188
            break
189
    kbqa_answer = "Not Found"
190
    kbqa_confidence = 0.0
191
    if isinstance(kbqa_response, dict) and "answer" in kbqa_response and "confidence" in kbqa_response:
192
        kbqa_answer = kbqa_response["answer"]
193
        kbqa_confidence = kbqa_response["confidence"]
194
    if isinstance(answer, list):
195
        answer = ", ".join(answer)
196
    else:
197
        answer = answer
198
    odqa_answer = "Not Found"
199
    odqa_confidence = 0.0
200
    if isinstance(odqa_response, dict) and "answer_sentence" in odqa_response and "confidence" in odqa_response:
201
        odqa_answer = odqa_response["answer_sentence"]
202
        odqa_confidence = odqa_response["confidence"]
203

204
    logger.info(f"odqa_confidence {odqa_confidence} kbqa_confidence {kbqa_confidence}")
205
    if question_type == "odqa" and odqa_confidence > 0.9998:
206
        return odqa_answer, odqa_confidence
207
    elif question_type == "kbqa" and kbqa_confidence > 0.95:
208
        return kbqa_answer, kbqa_confidence
209
    elif odqa_answer and odqa_confidence > kbqa_confidence:
210
        return odqa_answer, odqa_confidence
211
    elif kbqa_answer != "Not Found" and kbqa_confidence > odqa_confidence:
212
        return kbqa_answer, kbqa_confidence
213
    else:
214
        return odqa_answer, odqa_confidence
215

216
    return answer, confidence
217

218

219
@app.route("/test", methods=["POST"])
220
def test():
221
    last_phrase = request.json["query"]
222
    response_dict = getQaResponse(last_phrase)
223
    return response_dict["response"]
224

225

226
@app.route("/respond", methods=["POST"])
227
def respond():
228
    st_time = time.time()
229
    # to clarify, there's just one (1) dialog returned, not multiple
230
    dialogs_batch = request.json["dialogs"]
231
    confidences = []
232
    responses = []
233
    attributes = []
234
    sentences_to_classify = []
235
    ner_outputs_to_classify = []
236
    is_factoid_sents = []
237

238
    for dialog in dialogs_batch:
239
        uttr = dialog["human_utterances"][-1]
240
        # probabilities of being factoid question
241
        last_phrase = dialog["human_utterances"][-1]["text"]
242
        if "about" in last_phrase:
243
            probable_subjects = last_phrase.split("about")[1:]
244
        else:
245
            probable_subjects = []
246
        names = get_entities(dialog["human_utterances"][-1], only_named=True, with_labels=True)
247
        names = [j["text"].lower() for j in names]
248
        names = [j for j in names + probable_subjects if j in fact_dict.keys()]
249
        names = list(set(names))
250
        nounphrases = get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False)
251
        is_factoid_cls = "is_factoid" in get_factoid(uttr, probs=False)
252
        is_factoid = is_factoid_cls and (names or nounphrases) and check_factoid(last_phrase)
253
        is_factoid_sents.append(is_factoid)
254
        ner_outputs_to_classify.append(names)
255

256
    logger.info(f"Ner outputs {ner_outputs_to_classify}")
257
    fact_outputs = get_random_facts(ner_outputs_to_classify)
258
    logger.info(f"Fact outputs {fact_outputs}")
259
    for i in range(len(sentences_to_classify)):
260
        if asked_about_fact(sentences_to_classify[i]):
261
            is_factoid_sents[i] = ASKED_ABOUT_FACT_PROB
262

263
    # factoid_classes = [cl > FACTOID_CLASS_THRESHOLD for cl in factoid_classes]
264
    # logger.info('Factoid classes ' + str(factoid_classes))
265

266
    questions_batch = []
267
    facts_batch = []
268
    question_nums = []
269
    for n, (dialog, is_factoid, fact_output) in enumerate(zip(dialogs_batch, is_factoid_sents, fact_outputs)):
270
        curr_ann_uttr = dialog["human_utterances"][-1]
271
        prev_ann_uttr = dialog["bot_utterances"][-1] if len(dialog["bot_utterances"]) else {}
272
        annotations = curr_ann_uttr["annotations"]
273
        tell_me_about_intent = (
274
            annotations.get("intent_catcher", {}).get("lets_chat_about", {}).get("detected", 0) == 1
275
            or if_chat_about_particular_topic(curr_ann_uttr, prev_ann_uttr)
276
            or re.findall(full_template, curr_ann_uttr.get("text", ""))
277
        )
278

279
        logger.info(
280
            f"factoid_qa --- text {curr_ann_uttr.get('text', '')} --- "
281
            f"find {re.findall(full_template, curr_ann_uttr.get('text', ''))}"
282
        )
283
        if "sentrewrite" in annotations:
284
            text_rewritten = annotations["sentrewrite"]["modified_sents"][-1]
285
        else:
286
            text_rewritten = curr_ann_uttr["text"]
287
        is_question = "?" in text_rewritten
288
        if is_factoid and (tell_me_about_intent or is_question):
289
            questions_batch.append(curr_ann_uttr["text"])
290
            facts_batch.append(annotations.get("fact_retrieval", {}).get("facts", []))
291
            question_nums.append(n)
292

293
    text_qa_response_batch = [{"answer": "", "answer_sentence": "", "confidence": 0.0} for _ in dialogs_batch]
294
    resp = requests.post(TEXT_QA_URL, json={"question_raw": questions_batch, "top_facts": facts_batch}, timeout=1.8)
295
    if resp.status_code != 200:
296
        logger.info("API Error: Text QA inaccessible")
297
    else:
298
        logger.info("Query against Text QA succeeded")
299
        text_qa_resp = resp.json()
300
        text_qa_response_batch = []
301
        cnt_fnd = 0
302
        for i in range(len(dialogs_batch)):
303
            if i in question_nums and cnt_fnd < len(text_qa_resp):
304
                text_qa_response_batch.append(
305
                    {
306
                        "answer": text_qa_resp[cnt_fnd][0],
307
                        "answer_sentence": text_qa_resp[cnt_fnd][3],
308
                        "confidence": text_qa_resp[cnt_fnd][1],
309
                    }
310
                )
311
            else:
312
                text_qa_response_batch.append({"answer": "", "answer_sentence": "", "confidence": 0.0})
313
    logger.info(f"Response: {resp.json()}")
314

315
    kbqa_response = dict()
316

317
    for dialog, text_qa_response, is_factoid, fact_output in zip(
318
        dialogs_batch, text_qa_response_batch, is_factoid_sents, fact_outputs
319
    ):
320
        attr = {}
321
        curr_ann_uttr = dialog["human_utterances"][-1]
322
        prev_ann_uttr = dialog["bot_utterances"][-1] if len(dialog["bot_utterances"]) else {}
323
        tell_me_about_intent = (
324
            curr_ann_uttr["annotations"].get("intent_catcher", {}).get("lets_chat_about", {}).get("detected", 0) == 1
325
            or if_chat_about_particular_topic(curr_ann_uttr, prev_ann_uttr)
326
            or re.findall(full_template, curr_ann_uttr.get("text", ""))
327
        )
328

329
        if "sentrewrite" in curr_ann_uttr["annotations"]:
330
            curr_uttr_rewritten = curr_ann_uttr["annotations"]["sentrewrite"]["modified_sents"][-1]
331
        else:
332
            curr_uttr_rewritten = curr_ann_uttr["text"]
333
        is_question = "?" in curr_uttr_rewritten
334
        logger.info(f"is_factoid {is_factoid} tell_me_about {tell_me_about_intent} is_question {is_question}")
335
        if is_factoid and (tell_me_about_intent or is_question):
336
            logger.info("Question is classified as factoid. Querying KBQA and ODQA.")
337
            print("Question is classified as factoid. Querying KBQA and ODQA...", flush=True)
338
            logger.info(f"Using annotators output, kbqa_response {curr_ann_uttr['annotations'].get('kbqa', [])}")
339
            if use_annotators_output:
340
                kbqa_response = curr_ann_uttr["annotations"].get("kbqa", {})
341
                logger.info(f"Using annotators output, kbqa_response {kbqa_response}")
342
            else:
343
                futures = []
344
                executor = concurrent.futures.ThreadPoolExecutor()
345
                for system in ["kbqa"]:
346
                    futures.append(executor.submit(getQaResponse, last_phrase, system))
347
                results = []
348
                for future in concurrent.futures.as_completed(futures):
349
                    results.append(future.result())
350
                for result in results:
351
                    kbqa_response = result
352

353
            response, confidence = qa_choose(last_phrase, text_qa_response, kbqa_response)
354
            if len(response) > 300:
355
                response_cut = ""
356
                cur_len = 0
357
                response_split = response.split(", ")
358
                for piece in response_split:
359
                    if cur_len + len(piece) < 300:
360
                        response_cut += f"{piece}, "
361
                        cur_len += len(piece)
362
                response = response_cut.rstrip(", ")
363

364
            if not response:
365
                response = random.choice(DONT_KNOW_ANSWER)
366
                confidence = FACTOID_NOTSURE_CONFIDENCE
367
                attr["not sure"] = True
368
        else:
369
            logger.info("Question is not classified as factoid.")
370
            response = ""
371
            confidence = 0.0
372
        responses.append(response)
373
        confidences.append(confidence)
374
        attributes.append(attr)
375
    logger.info(f"Responses: {responses} --- confidences: {confidences}")
376
    total_time = time.time() - st_time
377
    logger.info(f"factoid_qa exec time: {total_time:.3f}s")
378
    return jsonify(list(zip(responses, confidences, attributes)))
379

380

381
if __name__ == "__main__":
382
    app.run(debug=False, host="0.0.0.0", port=3000)
383

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.