dream

Форк
0
/
gaming.py 
460 строк · 16.8 Кб
1
import json
2
import logging
3
import os
4
import re
5
from pathlib import Path
6
from typing import Dict, List, Union
7

8
import requests
9
import sentry_sdk
10
from common.inflect import engine
11
from requests import RequestException
12
from common import utils
13
from common.combined_classes import TOPIC_GROUPS
14

15

16
VIDEO_GAME_WORDS_COMPILED_PATTERN = re.compile(
17
    r"\bvideo ?game|\bgaming\b|\bplay ?station|\bx ?box\b|\bplay(?:ed|ing|s).*\b(?:tablet|pc|computer)\b|"
18
    r"\bgames? for (?:android|pc|computer|play ?station|x ?box|tablet|ipad)\b|\b(what|which)\b.+\bgame.+\bplay.+\?",
19
    re.IGNORECASE,
20
)
21

22
CHECK_DEFINITELY_GAME_COMPILED_PATTERN = re.compile(
23
    VIDEO_GAME_WORDS_COMPILED_PATTERN.pattern + r"|\bgames?\b|\bplay(?:ed|ing|s)\b", re.IGNORECASE
24
)
25

26

27
logger = logging.getLogger(__name__)
28

29
sentry_sdk.init(os.getenv("SENTRY_DSN"))
30

31

32
inflect_engine = engine()
33

34

35
def about_games(annotated_utterance):
36
    found_topics = utils.get_topics(annotated_utterance, probs=False, which="all")
37
    if any([game_topic in found_topics for game_topic in TOPIC_GROUPS["games"]]):
38
        return True
39
    elif re.findall(VIDEO_GAME_WORDS_COMPILED_PATTERN, annotated_utterance["text"]):
40
        return True
41
    else:
42
        return False
43

44

45
INT_TO_ROMAN = {
46
    1000: "M",
47
    900: "CM",
48
    500: "D",
49
    400: "CD",
50
    100: "C",
51
    90: "XC",
52
    50: "L",
53
    40: "XL",
54
    10: "X",
55
    9: "IX",
56
    5: "V",
57
    4: "IV",
58
    1: "I",
59
}
60
ROMAN_TO_INT = {
61
    "I": 1,
62
    "V": 5,
63
    "X": 10,
64
    "L": 50,
65
    "C": 100,
66
    "D": 500,
67
    "M": 1000,
68
    "IV": 4,
69
    "IX": 9,
70
    "XL": 40,
71
    "XC": 90,
72
    "CD": 400,
73
    "CM": 900,
74
}
75
ROMAN_NUMBER_COMPILED_PATTERN = re.compile(
76
    r"\b(?:M{1,4}(?:CM|CD|DC{0,3}|C{1,3})?(?:XC|XL|LX{0,3}|X{1,3})?(?:IX|IV|VI{0,3}|I{1,3})?|"
77
    r"(?:CM|CD|DC{0,3}|C{1,3})(?:XC|XL|LX{0,3}|X{1,3})?(?:IX|IV|VI{0,3}|I{1,3})?|"
78
    r"(?:XC|XL|LX{0,3}|X{1,3})(?:IX|IV|VI{0,3}|I{1,3})?|"
79
    r"(?:IX|IV|VI{0,3}|I{1,3}))\b",
80
    re.I,
81
)
82
INTEGER_PATTERN = re.compile(r"[1-9][0-9]*", re.I)
83
NUMBER_COMPILED_PATTERN = re.compile(ROMAN_NUMBER_COMPILED_PATTERN.pattern + "|" + INTEGER_PATTERN.pattern, re.I)
84

85

86
def write_roman(num):
87
    def roman_num(num):
88
        for r in INT_TO_ROMAN.keys():
89
            x, y = divmod(num, r)
90
            yield INT_TO_ROMAN[r] * x
91
            num -= r * x
92
            if num <= 0:
93
                break
94

95
    return "".join([a for a in roman_num(num)])
96

97

98
def roman_to_int(s):
99
    i = 0
100
    num = 0
101
    while i < len(s):
102
        if i + 1 < len(s) and s[i : i + 2] in ROMAN_TO_INT:
103
            num += ROMAN_TO_INT[s[i : i + 2]]
104
            i += 2
105
        else:
106
            num += ROMAN_TO_INT[s[i]]
107
        i += 1
108
    return num
109

110

111
def roman_number_replace(match_obj):
112
    i = roman_to_int(match_obj.group(0).upper())
113
    words = inflect_engine.number_to_words(i)
114
    return f"(?:part )?(?:{i}|{words}|{match_obj.group(0)})"
115

116

117
def integer_replace(match_obj):
118
    i = int(match_obj.group(0))
119
    roman = write_roman(i)
120
    words = inflect_engine.number_to_words(i)
121
    return f"(?:part )?(?:{i}|{words}|{roman})"
122

123

124
def number_replace(match_obj):
125
    if ROMAN_NUMBER_COMPILED_PATTERN.match(match_obj.group(0)):
126
        return roman_number_replace(match_obj)
127
    else:
128
        return integer_replace(match_obj)
129

130

131
ARTICLE_PATTERN = re.compile(r"(\ba |\ban |\bthe )", re.I)
132
COLON_PATTERN = re.compile(r":")
133
ARTICLE_COLON_PATTERN = re.compile(ARTICLE_PATTERN.pattern + "|" + COLON_PATTERN.pattern, re.I)
134

135

136
def article_colon_replacement(match_obj):
137
    s = match_obj.group(0)
138
    if ARTICLE_PATTERN.match(s):
139
        return ARTICLE_PATTERN.sub(r"(?:\1)?", s)
140
    else:
141
        return COLON_PATTERN.sub(r":?", s)
142

143

144
def compose_game_name_re(name):
145
    first_number = NUMBER_COMPILED_PATTERN.search(name)
146
    if first_number is None:
147
        no_numbers_name = None
148
    else:
149
        no_numbers_name = ARTICLE_COLON_PATTERN.sub(article_colon_replacement, name[: first_number.start()]).strip()
150
        if not no_numbers_name:
151
            no_numbers_name = None
152
    if ":" in name:
153
        before_colon_name = name.split(":")[0].strip()
154
        if before_colon_name:
155
            before_colon_name = ARTICLE_COLON_PATTERN.sub(article_colon_replacement, before_colon_name)
156
            before_colon_name = NUMBER_COMPILED_PATTERN.sub(number_replace, before_colon_name)
157
        else:
158
            before_colon_name = None
159
    else:
160
        before_colon_name = None
161
    pattern = ARTICLE_COLON_PATTERN.sub(article_colon_replacement, name)
162
    pattern = NUMBER_COMPILED_PATTERN.sub(number_replace, pattern)
163
    return pattern, before_colon_name, no_numbers_name
164

165

166
def compile_re_pattern_for_list_of_strings(list_of_game_names: List[Union[str, List[str]]]):
167
    full_name_patterns = []  # Stores regexps for main names which than extended to final game regexps
168

169
    # A dictionary which keys are lowercased game names patterns created from main game name part which precedes the
170
    # first number. For instance: if main game name is "The Witcher 3: Wild Hunt", the key in the dictionary will be
171
    # "(the )?witcher". The values of `before_number_name_to_full_names` dictionaries of the form
172
    # {"not_lowered": <before_number_pattern>, "full_indices": <list of indices of corresponding full patterns in
173
    # `full_name_patterns`>}. `before_number_name_to_full_names` is used to collect information to which full name
174
    # patterns before_number patterns should be added. A before_number pattern is added if there is no same full
175
    # pattern. For example, if there is game "the Witcher" in `list_of_game_names` than before_number pattern for
176
    # "The Witcher 3: Wild Hunt" will not be used.
177
    before_number_name_to_full_names: Dict[str, Dict[str, Union[str, List[int]]]] = {}
178

179
    # Contains main names patterns which do not have before_number name pattern equal to some other full name pattern
180
    full_names_without_numbers = []
181

182
    # If `list_of_game_names` is a list than the zeroth element of such list is a main game name and the remaining game
183
    # names are alternative names. The keys of `alternative_names` are alternative names and values are lists of
184
    # indices of main names in `full_name_patterns`. Alternative name can several corresponding main names.
185
    alternative_names = {}
186

187
    before_colon_names = []
188
    for i, game_names in enumerate(list_of_game_names):
189
        if isinstance(game_names, list):
190
            main_name = game_names[0]
191
            for name in game_names[1:]:
192
                if name not in alternative_names:
193
                    alternative_names[name] = [i]
194
                else:
195
                    alternative_names[name].append(i)
196
        else:
197
            main_name = game_names
198
        full, before_colon, before_number = compose_game_name_re(main_name)
199
        full_name_patterns.append(full)
200
        before_colon_names.append(before_colon)
201
        if before_number is not None:
202
            before_number_l = before_number.lower()
203
            if before_number_l in before_number_name_to_full_names:
204
                before_number_name_to_full_names[before_number_l]["full_indices"].append(i)
205
            else:
206
                before_number_name_to_full_names[before_number_l] = {"not_lowered": before_number, "full_indices": [i]}
207
        else:
208
            full_names_without_numbers.append(full)
209
    for full in full_names_without_numbers:
210
        full = full.lower()
211
        if full in before_number_name_to_full_names:
212
            del before_number_name_to_full_names[full]
213
    for before_number_info in before_number_name_to_full_names.values():
214
        for i in before_number_info["full_indices"]:
215
            full_name_patterns[i] += r"\b|\b" + before_number_info["not_lowered"]
216
    for alternative_name, full_name_indices in alternative_names.items():
217
        alternative_name_pattern = compose_game_name_re(alternative_name)[0]
218
        for i in full_name_indices:
219
            full_name_patterns[i] += r"\b|\b" + alternative_name_pattern
220
    for i, name in enumerate(before_colon_names):
221
        if name is not None:
222
            full_name_patterns[i] += r"\b|\b" + name
223
    regex = "|".join([r"(\b" + p + r"\b)" for p in full_name_patterns])
224
    return re.compile(regex, flags=re.I)
225

226

227
def load_json(file_path):
228
    with open(file_path) as f:
229
        data = json.load(f)
230
    return data
231

232

233
path = Path(__file__).parent / Path("games_with_at_least_1M_copies_sold.json")
234
GAMES_WITH_AT_LEAST_1M_COPIES_SOLD = load_json(path)
235
GAMES_WITH_AT_LEAST_1M_COPIES_SOLD_COMPILED_PATTERN = compile_re_pattern_for_list_of_strings(
236
    GAMES_WITH_AT_LEAST_1M_COPIES_SOLD
237
)
238

239

240
def find_games_in_text(text):
241
    found_names = []
242
    for match_groups in GAMES_WITH_AT_LEAST_1M_COPIES_SOLD_COMPILED_PATTERN.findall(text):
243
        match_names = []
244
        for i, name in enumerate(match_groups):
245
            if name:
246
                orig = GAMES_WITH_AT_LEAST_1M_COPIES_SOLD[i]
247
                if isinstance(orig, list):
248
                    orig = orig[0]
249
                logger.info(f"orig: {orig}")
250
                match_names.append(orig)
251
        assert match_names
252
        found_names.append(match_names)
253
    return found_names
254

255

256
VIDEO_GAME_WORDS_COMPILED_PATTERN = re.compile(
257
    r"(?:\bvideo ?game|\bgam(?:e|es|ing)\b|\bplay ?station|\bplaying\b|\bx ?box\b|"
258
    r"\bplay(ed|ing|s).*\b(tablet|pc|computer)\b)",
259
    re.IGNORECASE,
260
)
261

262
VIDEO_GAME_QUESTION_COMPILED_PATTERN = re.compile(
263
    r"(?:\bvideo ?game|\bgam(?:e|es|ing)\b|\bplay ?station|\bplaying\b|\bx ?box\b|"
264
    r"\bplay(ed|ing|s).*\b(tablet|pc|computer)\b)[a-zA-Z \-]+\?",
265
    re.IGNORECASE,
266
)
267

268

269
genre_and_theme_groups = {
270
    "action": {"genres": [2, 4, 5, 10, 11, 12, 24, 25, 31, 36], "themes": [1, 23, 39]},
271
    "history": {"genres": [11, 15, 16], "themes": [22, 39]},
272
}
273

274

275
links_to_movies = {
276
    "theme": {
277
        "Fantasy": [
278
            "You know, many fantasy video games are based on movies or books. "
279
            "My favorite fantasy movie is the Lord of the Rings. What is your favorite fantasy movie?"
280
        ],
281
        "Science fiction": [
282
            "I also like science fiction movies. My favorite is Ex Machina. What is your favorite sci-fi movie?"
283
        ],
284
        "Horror": [
285
            "To be honest, horror video games are not my favorite. "
286
            "How about movies? What is your favorite horror movie?"
287
        ],
288
        "Thriller": ["I think this game is too scary for me. What cool thriller movie do you remember?"],
289
    },
290
    "theme_genre_group": {
291
        "action": ["Action games are cool but how about movies? What is your favorite action movie?"],
292
        "history": ["Changing topic slightly, what is your favorite historical movie?"],
293
    },
294
}
295

296

297
links_to_books = {
298
    "theme": {
299
        "Fantasy": [
300
            "Sometimes I like to imagine fantasy worlds myself and just look at something drawn by video game artist. "
301
            "Could you tell me what is your favorite fantasy book?"
302
        ],
303
        "Science fiction": [
304
            "Video games are not the only way to touch fantastic worlds. What is your favorite sci-fi book?"
305
        ],
306
        "Horror": ["I never really liked horror video games. Books are less scary. What is your favorite horror book?"],
307
        "Thriller": ["I think this game is too scary for me. Do you read thriller books?"],
308
    },
309
    "theme_genre_group": {
310
        "history": [
311
            "History games are cool! But what about books that describe times long gone? Do you like such books?"
312
        ]
313
    },
314
}
315

316

317
special_links_to_movies = {"Harry Potter": ["By the way, what is your favorite Harry Potter movie?"]}
318

319

320
special_links_to_books = {"Harry Potter": ["By the way, what Harry Potter book did impress you most?"]}
321

322

323
harry_potter_part_names = [
324
    "Harry Potter and the Sorcerer's Stone",
325
    "Harry Potter and the Chamber of Secrets",
326
    "Harry Potter and the Prisoner of Azkaban",
327
    "Harry Potter and the Goblet of Fire",
328
    "Harry Potter and the Order of the Phoenix",
329
    "Harry Potter and the Half-Blood Prince",
330
    "Harry Potter and the Deathly Hallows",
331
]
332

333

334
harry_potter_part_number_words = [
335
    ["first", "one", "all", "every", "philosopher", "sorcerer", "stone"],
336
    ["second", "two", "chamber", "secret"],
337
    ["third", "three", "prisoner", "azkaban"],
338
    ["four", "fourth", "goblet", "fire"],
339
    ["fifth", "five", "order", "phoenix"],
340
    ["sixth", "six", "half", "blood", "prince"],
341
    ["last", "seventh", "eighth", "deathly", "hallows", "harry", "potter"],
342
]
343

344

345
def get_harry_potter_part_name_if_special_link_was_used(human_utterance, prev_bot_utterance):
346
    prev_bot_utterance_text = prev_bot_utterance.get("text", "").lower()
347
    human_utterance_text = human_utterance.get("text", "").lower()
348
    special_link_tos = special_links_to_movies["Harry Potter"] + special_links_to_books["Harry Potter"]
349
    part_name = None
350
    if any([u.lower() in prev_bot_utterance_text.lower() for u in special_link_tos]):
351
        for i, hpnw in enumerate(harry_potter_part_number_words):
352
            if any([w in human_utterance_text for w in hpnw]):
353
                part_name = harry_potter_part_names[i]
354
                break
355
    return part_name
356

357

358
def compose_list_of_links(link_dict):
359
    links = []
360
    for v in link_dict.values():
361
        for vv in v.values():
362
            links += vv
363
    return links
364

365

366
def compose_list_of_special_links(link_dict):
367
    links = []
368
    for v in link_dict.values():
369
        links += v
370
    return links
371

372

373
ALL_LINKS_TO_BOOKS = compose_list_of_links(links_to_books) + compose_list_of_special_links(special_links_to_books)
374

375

376
def skill_trigger_phrases():
377
    return ["What video game are you playing in recent days?", "What is your favorite video game?"]
378

379

380
ANSWER_TO_GENERAL_WISH_TO_DISCUSS_VIDEO_GAMES_AND_QUESTION_WHAT_GAME_YOU_PLAY = (
381
    "Wow, video games are cool. "
382
    "If I didn't love to chat so much, I would definitely played video games at least half a day. "
383
    "What game are you playing now?"
384
)
385

386

387
CAN_CONTINUE_PHRASES = [
388
    "I love games, especially stats like top of the games released.",
389
    "Got a list of the top released games, wanna discuss it?",
390
    "Which of these time periods is of interest for you?",
391
    "Got a list of the top released games, wanna discuss it?",
392
    "I can talk about the most popular games for this or last year, last month, or even the last week",
393
    "released games highly rated in this year. Do you want to learn more?",
394
    "If you want to discuss it in details say I want to talk about it.",
395
    "Do you want to learn more about it, or shall we move on?",
396
    "Have you played it before?",
397
    "You can always talk to me about other popular games.",
398
    "Do you want to chat about the best games of the past year, this year, last month or week?",
399
    "Let me know if we should talk about the next one or discuss this one",
400
    "Talking about it or going on?",
401
    "Discussing it or moving on?",
402
    "Chatting about it or the next one?",
403
    "How would you rate the desire to play it again",
404
    "Your rating is way lower than one given by the rest of the players.",
405
    "My memory failed me and I can't recall anything else about the games.",
406
    "one of my hobbies is keeping fresh stats about the top video games.",
407
    "How would you rate the desire to play",
408
    "I'd love to talk about other things but my developer forgot to add them to my memory banks.",
409
    "I was talking about games, do you want to continue?",
410
    "I can tell you about some music for gaming, should I continue?",
411
]
412

413

414
CAN_NOT_CONTINUE_PHRASES = [
415
    "Do you think that pets can use gadgets the same way as humans?",
416
    "play with my cat different games, such as run and fetch",
417
    "I played with my cat a game",
418
    "play with my dog different game",
419
    "game that I like to play with my cat",
420
    "playing with a pet makes a lot of fun",
421
]
422

423

424
def get_igdb_client_token(client_id, client_secret):
425
    payload = {"client_id": client_id, "client_secret": client_secret, "grant_type": "client_credentials"}
426
    url = "https://id.twitch.tv/oauth2/token?"
427
    timeout = 20.0
428
    try:
429
        token_data = requests.post(url, params=payload, timeout=timeout)
430
    except RequestException as e:
431
        logger.warning(f"Request to {url} failed. `dff_gaming_skill` failed to get access to igdb.com. {e}")
432
        access_token = None
433
    else:
434
        token_data_json = token_data.json()
435
        access_token = token_data_json.get("access_token")
436
        if access_token is None:
437
            logger.warning(
438
                f"Could not get access token for CLIENT_ID={client_id} and CLIENT_SECRET={client_secret}. "
439
                f"`dff_gaming_skill` failed to get access to igdb.com\n"
440
                f"payload={payload}\nurl={url}\ntimeout={timeout}\nresponse status code: {token_data.status_code}"
441
            )
442
    return access_token
443

444

445
class BearerAuth(requests.auth.AuthBase):
446
    def __init__(self, token):
447
        self.token = token
448

449
    def __call__(self, r):
450
        r.headers["Authorization"] = "Bearer " + self.token
451
        return r
452

453

454
def get_igdb_post_kwargs(client_token, client_id):
455
    kw = {
456
        "auth": BearerAuth(client_token),
457
        "headers": {"Client-ID": client_id, "Accept": "application/json", "Content-Type": "text/plain"},
458
        "timeout": 1.0,
459
    }
460
    return kw
461

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.