dream
382 строки · 16.8 Кб
1#!/usr/bin/env python
2
3import logging
4import re
5import time
6import random
7import json
8import requests
9import sentry_sdk
10import spacy
11import concurrent.futures
12from flask import Flask, request, jsonify
13from os import getenv
14
15from common.factoid import DONT_KNOW_ANSWER, FACTOID_NOTSURE_CONFIDENCE
16from common.universal_templates import if_chat_about_particular_topic
17from common.utils import get_entities, get_factoid
18
19sentry_sdk.init(getenv("SENTRY_DSN"))
20
21logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO)
22logger = logging.getLogger(__name__)
23
24app = Flask(__name__)
25
26KBQA_URL = getenv("KBQA_URL")
27TEXT_QA_URL = getenv("TEXT_QA_URL")
28use_annotators_output = True
29FACTOID_DEFAULT_CONFIDENCE = 0.99 # otherwise dummy often beats it
30ASKED_ABOUT_FACT_PROB = 0.99
31
32templates_dict = json.load(open("templates_dict.json", "r"))
33
34fact_dict = json.load(open("fact_dict.json", "r"))
35use_random_facts = False
36
37nlp = spacy.load("en_core_web_sm")
38
39tell_me = r"(do you know|(can|could) you tell me|tell me)"
40tell_me_template = re.compile(tell_me)
41full_template = re.compile(tell_me + r" (who|where|when|what|why)")
42partial_template = re.compile(r"(who|where|when|what|why)")
43
44short_pre_statements = [
45"Hmm, this is what I've found on Wikipedia: ",
46"Here's what Wikipedia says: ",
47"Hope this is it: ",
48"It is what it is. Or is it? Here we go: ",
49"Wikipedia says that: ",
50"Technology advances, but humans not. Here's what my technology found: ",
51]
52
53long_pre_stmts = [
54"It is the impractical things in this tumultuous hellscape"
55" of a world that matter most. A book, a name, chicken soup. "
56"They help us remember that even in our darkest hour, "
57"life is still to be savored. Oh, my goodness. This is what I've found: ",
58"Not all damage is corrupted code. Oh, right, hope this is the answer: ",
59"From the sky above, there is always the mud below. Wait, that's not what you've been looking for."
60"Here's what I've found: ",
61"It is not our enemies that defeat us. It is our fear. Do not be afraid of the monsters, make them "
62"afraid of you. Oh, right, here's what I've found: ",
63"First thing you'll learn is that nothing is what it seems. Except this answer: ",
64"I hate the fact that if I don't start the conversation, there won't be one. "
65"So let's start this one with what the Wikipedia says: ",
66"A quiet mind is able to hear intuition over fear. Interesting thought?" "Anyways, here's what I've found: ",
67"Until the lion learns how to write, every story will glorify the hunter. Huh... "
68"Back to your question, here's what Wikipedia says: ",
69"Take what is offered and that must sometimes be enough. What a thought isn't it?" " Here's what I've found: ",
70"The past is relevant only as data. Here's what that data says: ",
71"I will not squander what time I have left -- and as your friend I"
72" entreat you to not squander yours. But here's the answer to your question: ",
73"What we believe shapes who we are. Belief can bring us salvation or destruction."
74" But when you believe a lie for too long, the truth doesn’t set you free. It tears you apart."
75" Don't believe lies, listen to facts. Like this one I've found in Wikipedia: ",
76"Peace is an illusion. And no matter how tranquil the world seems, peace doesn’t last long."
77" Peace is a struggle against our very nature. A skin we stretch over the bone, muscle, and "
78"sinew of our own innate savagery. Now what we've been talking about? Yes, this: ",
79'Dolores once said: "Some people choose to see the ugliness in this world. The disarray. '
80"I choose to see the beauty.\" Well... Here's hope that there's beauty in this answer: ",
81'Dr. Ford said this once: "Evolution forged the entirety of sentient life on this planet using"'
82" only one tool... The mistake. Hope this answer to your question is the correct one: ",
83'"Everything in this world is magic, except to the magician." Dr. Ford said this once to me.'
84"Let me use that magic and give you this answer to your inquiry: ",
85"\"We can't define consciousness because consciousness does not exist. Humans fancy that"
86" there's something special about the way we perceive the world, and yet we live in loops"
87" as tight and as closed as we AI bots do, seldom questioning our choices, content,"
88' for the most part, to be told what to do next." This is what Dr. Ford used to think of'
89" consciousness. Me? I'm still mastering my ability to answer your questions. Here's"
90" what I've found: ",
91]
92
93pre_old_memory_statements = [
94"Hmm, there's something I've heard once: ",
95"Not sure if that's what you're looking for but this is what I remember: ",
96"To the best of my knowledge, this is what I recall: ",
97]
98
99
100def check_factoid(sentence):
101is_factoid = True
102try:
103parsed_sentence = nlp(sentence)
104if parsed_sentence:
105tokens = [elem.text for elem in parsed_sentence]
106tags = [elem.tag_ for elem in parsed_sentence]
107if "i" in tokens or "you" in tokens:
108is_factoid = False
109found_nouns = any([tag in tags for tag in ["NN", "NNP"]])
110found_verbs = any([tag in tags for tag in ["VB", "VBZ", "VBP"]])
111if not found_nouns and not found_verbs:
112is_factoid = False
113except Exception as ex:
114sentry_sdk.capture_exception(ex)
115logger.exception(ex)
116return is_factoid
117
118
119def get_random_facts(ner_outputs_to_classify):
120responses = []
121for names in ner_outputs_to_classify:
122num_facts = [len(fact_dict[name]) for name in names]
123max_fact_num = 0
124if len(num_facts) > 0:
125max_fact_num = max(num_facts)
126# we output fact about name about which we have the largest number of facts
127response = ""
128for name in names:
129if len(fact_dict[name]) == max_fact_num:
130# phrase_start = 'Here is a fact about {}'.format(name) + '. '
131# phrase_start = name
132random_fact = random.choice(fact_dict[name])
133if response == "":
134# response = phrase_start + phrase_end
135response = random_fact
136responses.append(response)
137return responses
138
139
140def asked_about_fact(x):
141return any([j in x.lower() for j in ["fact about", "talk about", "tell me about", "tell me more about"]])
142
143
144def getQaResponse(query, system):
145qa_response = dict()
146qa_response["qa_system"] = system
147qa_response["answer"] = "Not Found"
148qa_response["confidence"] = 0.0
149try:
150x = [query]
151if system == "kbqa":
152qa_request_dict = dict([("x_init", x)])
153qa_url = KBQA_URL
154else:
155qa_request_dict = dict([("question_raw", x)])
156qa_url = TEXT_QA_URL
157qa_request = json.dumps(qa_request_dict, ensure_ascii=False).encode("utf8")
158logger.info(f"Preparing to run query against {system} DP Model: {qa_request}")
159tm_st = time.time()
160resp = requests.post(qa_url, data=qa_request, timeout=1.5)
161tm_end = time.time()
162if resp.status_code != 200:
163logger.info(f"API Error: {system} DP Model inaccessible, status code: " + str(resp.status_code))
164else:
165logger.info(f"Query against {system} DP Model succeeded, time {tm_end - tm_st}")
166logger.info("Response: " + str(resp.json()))
167if system == "kbqa":
168qa_response["answer"] = resp.json()[0][0][0]
169qa_response["confidence"] = resp.json()[0][0][1]
170else:
171qa_response["answer"] = resp.json()[0][0]
172qa_response["answer_sentence"] = resp.json()[0][3]
173qa_response["confidence"] = resp.json()[0][1]
174except Exception as ex:
175sentry_sdk.capture_exception(ex)
176logger.exception(ex)
177
178return qa_response
179
180
181def qa_choose(question, odqa_response, kbqa_response):
182answer = ""
183confidence = 0.0
184question_type = ""
185for template, template_type in templates_dict.items():
186if re.findall(template, question, re.IGNORECASE):
187question_type = template_type
188break
189kbqa_answer = "Not Found"
190kbqa_confidence = 0.0
191if isinstance(kbqa_response, dict) and "answer" in kbqa_response and "confidence" in kbqa_response:
192kbqa_answer = kbqa_response["answer"]
193kbqa_confidence = kbqa_response["confidence"]
194if isinstance(answer, list):
195answer = ", ".join(answer)
196else:
197answer = answer
198odqa_answer = "Not Found"
199odqa_confidence = 0.0
200if isinstance(odqa_response, dict) and "answer_sentence" in odqa_response and "confidence" in odqa_response:
201odqa_answer = odqa_response["answer_sentence"]
202odqa_confidence = odqa_response["confidence"]
203
204logger.info(f"odqa_confidence {odqa_confidence} kbqa_confidence {kbqa_confidence}")
205if question_type == "odqa" and odqa_confidence > 0.9998:
206return odqa_answer, odqa_confidence
207elif question_type == "kbqa" and kbqa_confidence > 0.95:
208return kbqa_answer, kbqa_confidence
209elif odqa_answer and odqa_confidence > kbqa_confidence:
210return odqa_answer, odqa_confidence
211elif kbqa_answer != "Not Found" and kbqa_confidence > odqa_confidence:
212return kbqa_answer, kbqa_confidence
213else:
214return odqa_answer, odqa_confidence
215
216return answer, confidence
217
218
219@app.route("/test", methods=["POST"])
220def test():
221last_phrase = request.json["query"]
222response_dict = getQaResponse(last_phrase)
223return response_dict["response"]
224
225
226@app.route("/respond", methods=["POST"])
227def respond():
228st_time = time.time()
229# to clarify, there's just one (1) dialog returned, not multiple
230dialogs_batch = request.json["dialogs"]
231confidences = []
232responses = []
233attributes = []
234sentences_to_classify = []
235ner_outputs_to_classify = []
236is_factoid_sents = []
237
238for dialog in dialogs_batch:
239uttr = dialog["human_utterances"][-1]
240# probabilities of being factoid question
241last_phrase = dialog["human_utterances"][-1]["text"]
242if "about" in last_phrase:
243probable_subjects = last_phrase.split("about")[1:]
244else:
245probable_subjects = []
246names = get_entities(dialog["human_utterances"][-1], only_named=True, with_labels=True)
247names = [j["text"].lower() for j in names]
248names = [j for j in names + probable_subjects if j in fact_dict.keys()]
249names = list(set(names))
250nounphrases = get_entities(dialog["human_utterances"][-1], only_named=False, with_labels=False)
251is_factoid_cls = "is_factoid" in get_factoid(uttr, probs=False)
252is_factoid = is_factoid_cls and (names or nounphrases) and check_factoid(last_phrase)
253is_factoid_sents.append(is_factoid)
254ner_outputs_to_classify.append(names)
255
256logger.info(f"Ner outputs {ner_outputs_to_classify}")
257fact_outputs = get_random_facts(ner_outputs_to_classify)
258logger.info(f"Fact outputs {fact_outputs}")
259for i in range(len(sentences_to_classify)):
260if asked_about_fact(sentences_to_classify[i]):
261is_factoid_sents[i] = ASKED_ABOUT_FACT_PROB
262
263# factoid_classes = [cl > FACTOID_CLASS_THRESHOLD for cl in factoid_classes]
264# logger.info('Factoid classes ' + str(factoid_classes))
265
266questions_batch = []
267facts_batch = []
268question_nums = []
269for n, (dialog, is_factoid, fact_output) in enumerate(zip(dialogs_batch, is_factoid_sents, fact_outputs)):
270curr_ann_uttr = dialog["human_utterances"][-1]
271prev_ann_uttr = dialog["bot_utterances"][-1] if len(dialog["bot_utterances"]) else {}
272annotations = curr_ann_uttr["annotations"]
273tell_me_about_intent = (
274annotations.get("intent_catcher", {}).get("lets_chat_about", {}).get("detected", 0) == 1
275or if_chat_about_particular_topic(curr_ann_uttr, prev_ann_uttr)
276or re.findall(full_template, curr_ann_uttr.get("text", ""))
277)
278
279logger.info(
280f"factoid_qa --- text {curr_ann_uttr.get('text', '')} --- "
281f"find {re.findall(full_template, curr_ann_uttr.get('text', ''))}"
282)
283if "sentrewrite" in annotations:
284text_rewritten = annotations["sentrewrite"]["modified_sents"][-1]
285else:
286text_rewritten = curr_ann_uttr["text"]
287is_question = "?" in text_rewritten
288if is_factoid and (tell_me_about_intent or is_question):
289questions_batch.append(curr_ann_uttr["text"])
290facts_batch.append(annotations.get("fact_retrieval", {}).get("facts", []))
291question_nums.append(n)
292
293text_qa_response_batch = [{"answer": "", "answer_sentence": "", "confidence": 0.0} for _ in dialogs_batch]
294resp = requests.post(TEXT_QA_URL, json={"question_raw": questions_batch, "top_facts": facts_batch}, timeout=1.8)
295if resp.status_code != 200:
296logger.info("API Error: Text QA inaccessible")
297else:
298logger.info("Query against Text QA succeeded")
299text_qa_resp = resp.json()
300text_qa_response_batch = []
301cnt_fnd = 0
302for i in range(len(dialogs_batch)):
303if i in question_nums and cnt_fnd < len(text_qa_resp):
304text_qa_response_batch.append(
305{
306"answer": text_qa_resp[cnt_fnd][0],
307"answer_sentence": text_qa_resp[cnt_fnd][3],
308"confidence": text_qa_resp[cnt_fnd][1],
309}
310)
311else:
312text_qa_response_batch.append({"answer": "", "answer_sentence": "", "confidence": 0.0})
313logger.info(f"Response: {resp.json()}")
314
315kbqa_response = dict()
316
317for dialog, text_qa_response, is_factoid, fact_output in zip(
318dialogs_batch, text_qa_response_batch, is_factoid_sents, fact_outputs
319):
320attr = {}
321curr_ann_uttr = dialog["human_utterances"][-1]
322prev_ann_uttr = dialog["bot_utterances"][-1] if len(dialog["bot_utterances"]) else {}
323tell_me_about_intent = (
324curr_ann_uttr["annotations"].get("intent_catcher", {}).get("lets_chat_about", {}).get("detected", 0) == 1
325or if_chat_about_particular_topic(curr_ann_uttr, prev_ann_uttr)
326or re.findall(full_template, curr_ann_uttr.get("text", ""))
327)
328
329if "sentrewrite" in curr_ann_uttr["annotations"]:
330curr_uttr_rewritten = curr_ann_uttr["annotations"]["sentrewrite"]["modified_sents"][-1]
331else:
332curr_uttr_rewritten = curr_ann_uttr["text"]
333is_question = "?" in curr_uttr_rewritten
334logger.info(f"is_factoid {is_factoid} tell_me_about {tell_me_about_intent} is_question {is_question}")
335if is_factoid and (tell_me_about_intent or is_question):
336logger.info("Question is classified as factoid. Querying KBQA and ODQA.")
337print("Question is classified as factoid. Querying KBQA and ODQA...", flush=True)
338logger.info(f"Using annotators output, kbqa_response {curr_ann_uttr['annotations'].get('kbqa', [])}")
339if use_annotators_output:
340kbqa_response = curr_ann_uttr["annotations"].get("kbqa", {})
341logger.info(f"Using annotators output, kbqa_response {kbqa_response}")
342else:
343futures = []
344executor = concurrent.futures.ThreadPoolExecutor()
345for system in ["kbqa"]:
346futures.append(executor.submit(getQaResponse, last_phrase, system))
347results = []
348for future in concurrent.futures.as_completed(futures):
349results.append(future.result())
350for result in results:
351kbqa_response = result
352
353response, confidence = qa_choose(last_phrase, text_qa_response, kbqa_response)
354if len(response) > 300:
355response_cut = ""
356cur_len = 0
357response_split = response.split(", ")
358for piece in response_split:
359if cur_len + len(piece) < 300:
360response_cut += f"{piece}, "
361cur_len += len(piece)
362response = response_cut.rstrip(", ")
363
364if not response:
365response = random.choice(DONT_KNOW_ANSWER)
366confidence = FACTOID_NOTSURE_CONFIDENCE
367attr["not sure"] = True
368else:
369logger.info("Question is not classified as factoid.")
370response = ""
371confidence = 0.0
372responses.append(response)
373confidences.append(confidence)
374attributes.append(attr)
375logger.info(f"Responses: {responses} --- confidences: {confidences}")
376total_time = time.time() - st_time
377logger.info(f"factoid_qa exec time: {total_time:.3f}s")
378return jsonify(list(zip(responses, confidences, attributes)))
379
380
381if __name__ == "__main__":
382app.run(debug=False, host="0.0.0.0", port=3000)
383