dream

bert_float_classifier.py
81 строка · 2.9 Кб
Перенос по словам
1
# Copyright 2017 Neural Networks and Deep Learning lab, MIPT
2
#
3
# Licensed under the Apache License, Version 2.0 (the "License");
4
# you may not use this file except in compliance with the License.
5
# You may obtain a copy of the License at
6
#
7
#     http://www.apache.org/licenses/LICENSE-2.0
8
#
9
# Unless required by applicable law or agreed to in writing, software
10
# distributed under the License is distributed on an "AS IS" BASIS,
11
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
# See the License for the specific language governing permissions and
13
# limitations under the License.
14

15
import logging
16
from os import getenv
17
from typing import List, Union
18

19
import sentry_sdk
20
from bert_dp.preprocessing import InputFeatures
21
from overrides import overrides
22

23
from deeppavlov.core.common.registry import register
24
from deeppavlov.models.bert.bert_classifier import BertClassifierModel
25

26
sentry_sdk.init(getenv("SENTRY_DSN"))
27

28
logging.basicConfig(format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", level=logging.INFO)
29
logger = logging.getLogger(__name__)
30

31

32
@register("toxic_classification")
33
class BertFloatClassifierModel(BertClassifierModel):
34
    """
35
    Bert-based model for text classification with floating point values
36

37
    It uses output from [CLS] token and predicts labels using linear transformation.
38

39
    """
40

41
    columns = ["identity_hate", "insult", "obscene", "severe_toxic", "sexual_explicit", "threat", "toxic"]
42

43
    def __init__(self, **kwargs) -> None:
44
        super().__init__(**kwargs)
45
        # FOR INIT GRAPH when training was used the following loss function
46
        # we have multi-label case
47
        # some classes for some samples are true-labeled as `-1`
48
        # we should not take into account (loss) this values
49
        # self.y_probas = tf.nn.sigmoid(logits)
50
        # chosen_inds = tf.not_equal(one_hot_labels, -1)
51
        #
52
        # self.loss = tf.reduce_mean(
53
        #     tf.nn.sigmoid_cross_entropy_with_logits(labels=one_hot_labels, logits=logits)[chosen_inds])
54

55
    @overrides
56
    def __call__(self, features: List[InputFeatures]) -> Union[List[int], List[List[float]]]:
57
        """
58
        Make prediction for given features (texts).
59

60
        Args:
61
            features: batch of InputFeatures
62

63
        Returns:
64
            predicted classes or probabilities of each class
65

66
        """
67
        input_ids = [f.input_ids for f in features]
68
        input_masks = [f.input_mask for f in features]
69
        input_type_ids = [f.input_type_ids for f in features]
70

71
        feed_dict = self._build_feed_dict(input_ids, input_masks, input_type_ids)
72
        if not self.return_probas:
73
            pred = self.sess.run(self.y_predictions, feed_dict=feed_dict)
74
        else:
75
            pred = self.sess.run(self.y_probas, feed_dict=feed_dict)
76

77
        batch_predictions = []
78
        for i in range(len(pred)):
79
            batch_predictions.append({self.columns[j]: pred[i, j] for j in range(len(self.columns))})
80

81
        return batch_predictions
82
dream

Использование cookies