dream

Форк
0
/
news.py 
162 строки · 5.9 Кб
1
import logging
2
import random
3
import re
4
import requests
5
from os import getenv
6

7
import sentry_sdk
8
from common import utils
9

10
sentry_sdk.init(getenv("SENTRY_DSN"))
11

12
logger = logging.getLogger(__name__)
13

14
# this way news skill offers latest news when nothing specific found
15
OFFER_BREAKING_NEWS = "Would you like to hear the latest news?"
16
OFFER_TOPIC_SPECIFIC_NEWS = "Would you like to hear news about TOPIC?"
17
SAY_TOPIC_SPECIFIC_NEWS = "Talking about TOPIC. I've recently heard that"
18
# statuses in attributes for news skill
19
OFFER_TOPIC_SPECIFIC_NEWS_STATUS = "offered_specific_news"
20
OFFERED_BREAKING_NEWS_STATUS = "offered_breaking_news"
21
OFFERED_NEWS_DETAILS_STATUS = "offered_news_details"
22
OPINION_REQUEST_STATUS = "opinion_request"
23
OFFERED_NEWS_TOPIC_CATEGORIES_STATUS = "offered_news_topic_categories"
24

25
NEWS_GIVEN = "offered_news_details"
26
WHAT_TYPE_OF_NEWS = [
27
    "What other kinds of news would you want to discuss?",
28
    "What are the other kinds of news would you like to hear about?",
29
    "What else would you want to hear news about?",
30
    "What type of news do you prefer?",
31
]
32

33
NEWS_DUPLICATES = WHAT_TYPE_OF_NEWS
34
NEWS_COMPILED_PATTERN = re.compile(r"(news|(what is|what ?'s)( the)? new|something new)", re.IGNORECASE)
35
EXTRACT_OFFERED_NEWS_TOPIC_TEMPLATE = re.compile(r"news? about ([a-z\- ]+)", re.IGNORECASE)
36

37

38
def skill_trigger_phrases():
39
    return [OFFER_BREAKING_NEWS]
40

41

42
def is_breaking_news_requested(prev_bot_utt, user_utt):
43
    if OFFER_BREAKING_NEWS.lower() in prev_bot_utt.get("text", "").lower():
44
        if utils.is_yes(user_utt):
45
            return True
46
    return False
47

48

49
TOPIC_NEWS_OFFER = ["Would you like to hear something new about", "Would you like to hear news about"]
50

51

52
def get_offer_news_about_topic(topic):
53
    return f"{random.choice(TOPIC_NEWS_OFFER)} {topic}?"
54

55

56
def was_offer_news_about_topic(uttr: str):
57
    uttr_lower = uttr.lower()
58
    if any([offer.lower() in uttr_lower for offer in TOPIC_NEWS_OFFER]):
59
        return True
60
    return False
61

62

63
def get_news_about_topic(
64
    topic: str, NEWS_API_ANNOTATOR_URL: str, discussed_news=None, return_list_of_news=False, timeout_value=1.0
65
):
66
    """
67
    Function to get news output from news-api-skill.
68
    ```
69
    import os
70

71
    NEWS_API_ANNOTATOR_URL = os.environ.get('NEWS_API_ANNOTATOR_URL')
72
    result = get_news_about_topic("politics", NEWS_API_ANNOTATOR_URL)
73
    result = [text, conf, human_attributes, bot_attributes, attributes]
74
    ```
75
        attributes contains `curr_news` dictionary with info about news
76

77
    Attributes:
78
        - topic: string topic/entity about which one wants to get news
79
        - NEWS_API_ANNOTATOR_URL: news api skill url
80
        - discussed_news: list of string news urls which were given to user (not to repeat)
81
        - get_list_of_news: whether to get list of news or not
82

83
    Returns:
84
        - dictionary with news, as curr_news about in example
85
    """
86
    # 'curr_news': {
87
    # 'content': "MORLEY -- Braelyn Berry will be doing her third sport with the track team in her junior season.\n
88
    #     But she's had some outstanding efforts in her other two sports.\nBerry was an All-Stater with the volleyball
89
    #     team in the fall and a standout with her baske... [1299 chars]",
90
    # 'description': 'MORLEY -- Braelyn Berry will be doing her third sport with the track team in her junior...',
91
    # 'image': 'https://s.hdnux.com/photos/01/17/44/57/20859886/3/rawImage.jpg',
92
    # 'publishedAt': '2021-04-13T02:43:00Z',
93
    # 'source': {'name': 'The Pioneer', 'url': 'https://www.bigrapidsnews.com'},
94
    # 'title': 'Morley Stanwood multi-sport athlete anxious for spring season',
95
    # 'url': 'https://www.bigrapidsnews.com/sports/article/Morley-Stanwood-multi-sport-athlete-anxious-for-16096053.php'
96
    # },
97
    if discussed_news is None:
98
        discussed_news = []
99

100
    human_attr = {"news_api_skill": {"discussed_news": discussed_news}}
101
    result_news = {}
102
    dialogs = {
103
        "dialogs": [
104
            {
105
                "utterances": [],
106
                "bot_utterances": [],
107
                "human": {"attributes": human_attr},
108
                "human_utterances": [
109
                    {
110
                        "text": f"news about {topic}",
111
                        "annotations": {"ner": [[{"text": topic}]], "cobot_topics": {"text": ["News"]}},
112
                    }
113
                ],
114
            }
115
        ],
116
        "return_list_of_news": return_list_of_news,
117
    }
118
    try:
119
        result = requests.post(NEWS_API_ANNOTATOR_URL, json=dialogs, timeout=timeout_value)
120
        result = result.json()[0]
121
        for entity_news_dict in result:
122
            if entity_news_dict and str(entity_news_dict["entity"]).lower() == topic.lower():
123
                if return_list_of_news:
124
                    result_news = entity_news_dict["list_of_news"]
125
                else:
126
                    result_news = entity_news_dict["news"]
127

128
    except Exception as e:
129
        sentry_sdk.capture_exception(e)
130
        logger.exception(e)
131

132
    return result_news
133

134

135
BANNED_UNIGRAMS = ["I", "i", "news", "something", "anything", "me"]
136

137

138
def extract_topics(curr_uttr):
139
    """Extract entities as topics for news request. If no entities found, extract nounphrases.
140

141
    Args:
142
        curr_uttr: current human utterance dictionary
143

144
    Returns:
145
        list of mentioned entities/nounphrases
146
    """
147
    entities = utils.get_entities(curr_uttr, only_named=True, with_labels=False)
148
    entities = [ent.lower() for ent in entities]
149
    entities = [
150
        ent
151
        for ent in entities
152
        if not (ent == "alexa" and curr_uttr["text"].lower()[:5] == "alexa") and "news" not in ent
153
    ]
154
    if len(entities) == 0:
155
        for ent in utils.get_entities(curr_uttr, only_named=False, with_labels=False):
156
            if ent.lower() not in BANNED_UNIGRAMS and "news" not in ent.lower():
157
                if ent in entities:
158
                    pass
159
                else:
160
                    entities.append(ent)
161
    entities = [ent for ent in entities if len(ent) > 0]
162
    return entities
163

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.