google-research
149 строк · 5.5 Кб
1# coding=utf-8
2# Copyright 2024 The Google Research Authors.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""No padding Inception FCN neural network.
17
18This is a variant of Inception v3 that removes all paddings. This change
19allows the network to be trained and inference run with different patch size
20(Fully Convolutional Network, FCN mode) while having the same inference results.
21The network can be initialized for two different receptive fields: 911 and 129.
22"""
23
24import tensorflow.compat.v1 as tf25import tf_slim as slim26
27import inception_base_12928import inception_base_91129import network30import network_params31import scope_utils32
33
34def get_inception_base_and_downsample_factor(receptive_field_size):35"""Get the Inception base network and its downsample factor."""36if receptive_field_size == 911:37return inception_base_911.nopad_inception_v3_base_911, inception_base_911.MODEL_DOWNSAMPLE_FACTOR38elif receptive_field_size == 129:39return inception_base_129.nopad_inception_v3_base_129, inception_base_129.MODEL_DOWNSAMPLE_FACTOR40else:41raise ValueError(42f'Receptive field size should be 911 or 129. {receptive_field_size} was provided.'43)44
45
46class InceptionV3FCN(network.Network):47"""A no pad, fully convolutional InceptionV3 model."""48
49def __init__(50self,51inception_params,52conv_scope_params,53num_classes = 2,54is_training = True,55):56"""Creates a no pad, fully convolutional InceptionV3 model.57
58Args:
59inception_params: parameters specific to the InceptionV3
60conv_scope_params: parameters used to configure the general convolution
61parameters used in the slim argument scope.
62num_classes: number of output classes from the model
63is_training: whether the network should be built for training or inference
64"""
65super().__init__()66self._num_classes = num_classes67self._is_training = is_training68self._network_base, self._downsample_factor = get_inception_base_and_downsample_factor(69inception_params.receptive_field_size)70self._prelogit_dropout_keep_prob = inception_params.prelogit_dropout_keep_prob71self._depth_multiplier = inception_params.depth_multiplier72self._min_depth = inception_params.min_depth73self._inception_fcn_stride = inception_params.inception_fcn_stride74self._conv_scope_params = conv_scope_params75if self._depth_multiplier <= 0:76raise ValueError('param depth_multiplier should be greater than zero.')77self._logits_stride = int(78self._inception_fcn_stride /79self._downsample_factor) if self._inception_fcn_stride else 180
81def build(self, inputs):82"""Returns an InceptionV3FCN model with configurable conv2d normalization.83
84Args:
85inputs: a map from input string names to tensors. Required:
86* IMAGES: a tensor of shape [batch, height, width, channels]
87
88Returns:
89A dictionary from network layer names to the corresponding layer
90activation Tensors. Includes:
91* PRE_LOGITS: activation layer preceding LOGITS
92* LOGITS: the pre-softmax activations, size [batch, num_classes]
93* PROBABILITIES: softmax probs, size [batch, num_classes]
94"""
95images = self._get_tensor(inputs, self.IMAGES, expected_rank=4)96with slim.arg_scope(97scope_utils.get_conv_scope(self._conv_scope_params, self._is_training)):98net, end_points = self._network_base(99images,100min_depth=self._min_depth,101depth_multiplier=self._depth_multiplier)102# Final pooling and prediction103with tf.variable_scope('Logits'):104# 1 x 1 x 768105net = slim.dropout(106net,107keep_prob=self._prelogit_dropout_keep_prob,108is_training=self._is_training,109scope='Dropout_1b')110end_points[self.PRE_LOGITS] = net111# 1 x 1 x num_classes112logits = slim.conv2d(113net,114self._num_classes, [1, 1],115activation_fn=None,116normalizer_fn=None,117stride=self._logits_stride,118scope='Conv2d_1c_1x1')119probabilities_tensor = tf.nn.softmax(logits)120end_points[self.PROBABILITIES_TENSOR] = probabilities_tensor121if self._logits_stride == 1:122# Reshape to remove height and width123end_points[self.LOGITS] = tf.squeeze(124logits, [1, 2], name='SpatialSqueeze')125end_points[self.PROBABILITIES] = tf.squeeze(126probabilities_tensor, [1, 2], name='SpatialSqueeze')127else:128end_points[self.LOGITS] = logits129end_points[self.PROBABILITIES] = probabilities_tensor130return end_points131
132
133def get_inception_v3_fcn_network_fn(134inception_params,135conv_scope_params,136num_classes = 2,137is_training = True,138):139"""Returns a function that return logits and endpoints for slim uptraining."""140
141net = InceptionV3FCN(inception_params, conv_scope_params, num_classes,142is_training)143
144def network_fn(images):145images_dict = {'Images': images}146endpoints = net.build(images_dict)147return endpoints['Logits'], endpoints148
149return network_fn150