dream
60 строк · 1.6 Кб
1import logging
2import re
3import time
4from os import getenv
5
6import sentry_sdk
7import spacy
8from flask import Flask, request, jsonify
9
10
11sentry_sdk.init(getenv("SENTRY_DSN"))
12
13spacy_nlp = spacy.load(getenv("SPACY_MODEL"))
14TOKEN_ATTRIBUTES = getenv("TOKEN_ATTRIBUTES").split("|")
15ANNOTATE_BATCH_WITH_TOKENS_ONLY = getenv("ANNOTATE_BATCH_WITH_TOKENS_ONLY", False)
16
17logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.DEBUG)
18logger = logging.getLogger(__name__)
19
20app = Flask(__name__)
21
22
23def remove_quotes(text):
24return re.sub(r"\s+", " ", re.sub(r"\'\"", " ", text)).strip()
25
26
27def get_result(request, only_tokens=False):
28st_time = time.time()
29sentences = request.json["sentences"]
30result = []
31
32for uttr in sentences:
33doc = spacy_nlp(remove_quotes(uttr))
34curr_tokens = []
35for token in doc:
36curr_token = {"text": token.text}
37if not only_tokens:
38for attr in TOKEN_ATTRIBUTES:
39curr_token[attr] = str(getattr(token, attr))
40curr_tokens += [curr_token]
41result += [curr_tokens]
42total_time = time.time() - st_time
43logger.info(f"spacy_annotator exec time: {total_time:.3f}s")
44return result
45
46
47@app.route("/respond", methods=["POST"])
48def respond():
49result = get_result(request)
50return jsonify(result)
51
52
53@app.route("/respond_batch", methods=["POST"])
54def respond_batch():
55result = get_result(request, only_tokens=ANNOTATE_BATCH_WITH_TOKENS_ONLY)
56return jsonify([{"batch": result}])
57
58
59if __name__ == "__main__":
60app.run(debug=False, host="0.0.0.0", port=3000)
61