dream

Форк
0
204 строки · 7.0 Кб
1
import logging
2
import os
3
import time
4
from flask import Flask, request, jsonify
5
import sentry_sdk
6
from deeppavlov import build_model
7

8
logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO)
9
logger = logging.getLogger(__name__)
10
sentry_sdk.init(os.getenv("SENTRY_DSN"))
11

12
app = Flask(__name__)
13

14
config_name = os.getenv("CONFIG")
15

16
with open("abstract_rels.txt", "r") as inp:
17
    abstract_rels = [line.strip() for line in inp.readlines()]
18

19
try:
20
    el = build_model(config_name, download=True)
21
    logger.info("model loaded")
22
except Exception as e:
23
    sentry_sdk.capture_exception(e)
24
    logger.exception(e)
25
    raise e
26

27

28
def preprocess_context(context_batch):
29
    """Preprocesses the context batch by combining previous and current utterances.
30

31
    Args:
32
      context_batch (list): List of conversation contexts.
33

34
    Returns:
35
      list: Preprocessed context batch.
36
    """
37
    optimized_context_batch = []
38
    for hist_uttr in context_batch:
39
        if len(hist_uttr) == 1:
40
            optimized_context_batch.append(hist_uttr[0])
41
        else:
42
            prev_uttr = hist_uttr[-2]
43
            cur_uttr = hist_uttr[-1]
44
            is_q = (
45
                any([prev_uttr.startswith(q_word) for q_word in ["what ", "who ", "when ", "where "]])
46
                or "?" in prev_uttr
47
            )
48
            if is_q and len(cur_uttr.split()) < 3:
49
                optimized_context_batch.append(f"{prev_uttr} {cur_uttr}")
50
            else:
51
                optimized_context_batch.append(cur_uttr)
52

53
    return optimized_context_batch
54

55

56
def process_entity_info(
57
    entity_substr_batch, entity_ids_batch, conf_batch, entity_id_tags_batch, prex_info_batch, optimized_context_batch
58
):
59
    """Processes entity information based on various conditions.
60

61
    Args:
62
      entity_substr_batch (list): List of entity substrings (entity names).
63
      entity_ids_batch (list): List of entity IDs.
64
      conf_batch (list): List of confidences.
65
      entity_id_tags_batch (list): List of entity ID tags (entity kinds).
66
      prex_info_batch (list): List of property extraction information.
67
      optimized_context_batch (list): List of preprocessed conversation contexts.
68

69
    Returns:
70
      list: Processed entity information batch.
71
    """
72
    entity_info_batch = []
73
    for (
74
        entity_substr_list,
75
        entity_ids_list,
76
        conf_list,
77
        entity_id_tags_list,
78
        prex_info,
79
        context,
80
    ) in zip(
81
        entity_substr_batch,
82
        entity_ids_batch,
83
        conf_batch,
84
        entity_id_tags_batch,
85
        prex_info_batch,
86
        optimized_context_batch,
87
    ):
88
        entity_info_list = []
89
        triplets = {}
90

91
        # Extract triplets from property extraction information
92
        if isinstance(prex_info, list) and prex_info:
93
            prex_info = prex_info[0]
94
        if prex_info:
95
            triplets = prex_info.get("triplets", {})
96

97
        obj2rel_dict = {}
98
        for triplet in triplets:
99
            obj = triplet["object"].lower()
100

101
            # Determine the relationship type (relation or property)
102
            if "relation" in triplet:
103
                rel = triplet["relation"]
104
            elif "property" in triplet:
105
                rel = triplet["property"]
106
            else:
107
                rel = ""
108
            obj2rel_dict[obj] = rel
109

110
        # Process entity information for each entity substring
111
        for entity_substr, entity_ids, confs, entity_id_tags in zip(
112
            entity_substr_list,
113
            entity_ids_list,
114
            conf_list,
115
            entity_id_tags_list,
116
        ):
117
            entity_info = {}
118
            entity_substr = entity_substr.lower()
119
            context = context.lower()
120
            curr_rel = obj2rel_dict.get(entity_substr, "")
121
            is_abstract = curr_rel.lower().replace("_", " ") in abstract_rels and not any(
122
                [f" {word} {entity_substr}" in context for word in ["the", "my", "his", "her"]]
123
            )
124

125
            filtered_entity_ids, filtered_confs, filtered_entity_id_tags = [], [], []
126

127
            # Filter entity information based on condition:
128
            # - Exclude entities marked as "Abstract" in db if they are not considered
129
            # abstract according to is_abstract.
130
            for entity_id, conf, entity_id_tag in zip(entity_ids, confs, entity_id_tags):
131
                if entity_id_tag == "Abstract" and not is_abstract:
132
                    logger.info(f"Contradiction between the entity_kind 'Abstract' and relationship '{curr_rel}'")
133
                else:
134
                    filtered_entity_ids.append(entity_id)
135
                    filtered_confs.append(conf)
136
                    filtered_entity_id_tags.append(entity_id_tag)
137

138
            if filtered_entity_ids and entity_substr in context:
139
                # Construct the entity information dictionary
140
                entity_info["entity_substr"] = entity_substr
141
                entity_info["entity_ids"] = filtered_entity_ids
142
                entity_info["confidences"] = [float(elem[2]) for elem in filtered_confs]
143
                entity_info["tokens_match_conf"] = [float(elem[0]) for elem in filtered_confs]
144
                entity_info["entity_id_tags"] = filtered_entity_id_tags
145
                entity_info_list.append(entity_info)
146
        # Add the processed entity information to the batch
147
        entity_info_batch.append(entity_info_list)
148
    return entity_info_batch
149

150

151
@app.route("/model", methods=["POST"])
152
def respond():
153
    """Main function for responding to a request.
154

155
    Returns:
156
      flask.Response: Response containing the processed entity information.
157
    """
158
    st_time = time.time()
159
    user_ids = request.json.get("user_id", [""])
160
    entity_substr_batch = request.json.get("entity_substr", [[""]])
161
    entity_tags_batch = request.json.get(
162
        "entity_tags",
163
        [["" for _ in entity_substr_list] for entity_substr_list in entity_substr_batch],
164
    )
165
    context_batch = request.json.get("contexts", [[""]])
166
    prex_info_batch = request.json.get("property_extraction", [{} for _ in entity_substr_batch])
167

168
    # Preprocess the conversation context
169
    optimized_context_batch = preprocess_context(context_batch)
170

171
    entity_info_batch = []
172
    try:
173
        (
174
            entity_substr_batch,
175
            entity_ids_batch,
176
            conf_batch,
177
            entity_id_tags_batch,
178
        ) = el(user_ids, entity_substr_batch, entity_tags_batch)
179

180
        # Process entity information
181
        entity_info_batch = process_entity_info(
182
            entity_substr_batch,
183
            entity_ids_batch,
184
            conf_batch,
185
            entity_id_tags_batch,
186
            prex_info_batch,
187
            optimized_context_batch,
188
        )
189

190
    except Exception as e:
191
        sentry_sdk.capture_exception(e)
192
        logger.exception(e)
193
        entity_info_batch = [[]] * len(entity_substr_batch)
194

195
    total_time = time.time() - st_time
196
    logger.info(f"entity_info_batch: {entity_info_batch}")
197
    logger.info(f"custom entity linking exec time = {total_time:.3f}s")
198

199
    # Return the processed entity information
200
    return jsonify(entity_info_batch)
201

202

203
if __name__ == "__main__":
204
    app.run(debug=False, host="0.0.0.0", port=3000)
205

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.