dream
81 строка · 2.9 Кб
1# Copyright 2017 Neural Networks and Deep Learning lab, MIPT
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import logging16from os import getenv17from typing import List, Union18
19import sentry_sdk20from bert_dp.preprocessing import InputFeatures21from overrides import overrides22
23from deeppavlov.core.common.registry import register24from deeppavlov.models.bert.bert_classifier import BertClassifierModel25
26sentry_sdk.init(getenv("SENTRY_DSN"))27
28logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO)29logger = logging.getLogger(__name__)30
31
32@register("toxic_classification")33class BertFloatClassifierModel(BertClassifierModel):34"""35Bert-based model for text classification with floating point values
36
37It uses output from [CLS] token and predicts labels using linear transformation.
38
39"""
40
41columns = ["identity_hate", "insult", "obscene", "severe_toxic", "sexual_explicit", "threat", "toxic"]42
43def __init__(self, **kwargs) -> None:44super().__init__(**kwargs)45# FOR INIT GRAPH when training was used the following loss function46# we have multi-label case47# some classes for some samples are true-labeled as `-1`48# we should not take into account (loss) this values49# self.y_probas = tf.nn.sigmoid(logits)50# chosen_inds = tf.not_equal(one_hot_labels, -1)51#52# self.loss = tf.reduce_mean(53# tf.nn.sigmoid_cross_entropy_with_logits(labels=one_hot_labels, logits=logits)[chosen_inds])54
55@overrides56def __call__(self, features: List[InputFeatures]) -> Union[List[int], List[List[float]]]:57"""58Make prediction for given features (texts).
59
60Args:
61features: batch of InputFeatures
62
63Returns:
64predicted classes or probabilities of each class
65
66"""
67input_ids = [f.input_ids for f in features]68input_masks = [f.input_mask for f in features]69input_type_ids = [f.input_type_ids for f in features]70
71feed_dict = self._build_feed_dict(input_ids, input_masks, input_type_ids)72if not self.return_probas:73pred = self.sess.run(self.y_predictions, feed_dict=feed_dict)74else:75pred = self.sess.run(self.y_probas, feed_dict=feed_dict)76
77batch_predictions = []78for i in range(len(pred)):79batch_predictions.append({self.columns[j]: pred[i, j] for j in range(len(self.columns))})80
81return batch_predictions82