google-research
176 строк · 6.8 Кб
1# coding=utf-8
2# Copyright 2024 The Google Research Authors.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Functionality shared by train and eval."""
17
18import tensorflow.compat.v1 as tf19flags = tf.app.flags20
21flags.DEFINE_enum(22'output_mode', 'combined', ['segment', 'regression', 'combined'],23'"segment", uses a model similar to DeepLab.'24'"regression", uses the ClickRegression model'25'"combined", uses the multitask learning architecture')26
27flags.DEFINE_string('master', '', 'name of the tensorflow server')28
29flags.DEFINE_integer('image_size', 513, '')30
31flags.DEFINE_integer(32'logits_kernel_size', 1,33'The kernel size for the convolutional kernel that '34'generates logits.')35
36# Settings for model variants.
37
38flags.DEFINE_string('model_variant', 'mobilenet_v2', 'DeepLab model variant.')39
40flags.DEFINE_string('pretrained_text_enc_name',41'https://tfhub.dev/google/universal-sentence-encoder/2',42'Text embedding to use for elements.')43
44flags.DEFINE_string(45'pretrained_elements_ref_match_model',46'https://tfhub.dev/google/nnlm-en-dim128-with-normalization/1', '')47
48flags.DEFINE_multi_float('image_pyramid', [1.0],49'Input scales for multi-scale feature extraction.')50
51flags.DEFINE_multi_integer('atrous_rates', [6, 12, 18],52'Atrous rates for atrous spatial pyramid pooling.')53
54flags.DEFINE_boolean('add_image_level_feature', True,55'Add image level feature.')56
57flags.DEFINE_boolean('aspp_with_batch_norm', True,58'Use batch norm parameters for ASPP or not.')59
60flags.DEFINE_boolean('aspp_with_separable_conv', True,61'Use separable convolution for ASPP or not.')62
63flags.DEFINE_multi_integer('multi_grid', [1, 1, 1],64'Employ a hierarchy of atrous rates for ResNet.')65flags.DEFINE_float('comb_dropout_keep_prob', 1.0, '')66flags.DEFINE_float('image_keep_prob', 1.0, '')67flags.DEFINE_float('elements_keep_prob', .75, '')68
69flags.DEFINE_float(70'depth_multiplier', 1.0,71'Multiplier for the depth (number of channels) for all '72'convolution ops used in MobileNet.')73
74flags.DEFINE_integer('output_stride', 16,75'The ratio of input to output spatial resolution.')76
77flags.DEFINE_integer(78'decoder_output_stride', None,79'The ratio of input to output spatial resolution when '80'employing decoder to refine segmentation results.')81
82flags.DEFINE_boolean('decoder_use_separable_conv', True,83'Employ separable convolution for decoder or not.')84
85flags.DEFINE_enum('merge_method', 'max', ['max', 'avg'],86'Scheme to merge multi scale features.')87
88flags.DEFINE_boolean(89'use_ref_exp', True,90'Whether or not to use the referring expression in the model.')91flags.DEFINE_boolean(92'use_elements_texts', True,93'Whether or not to use the elements text in the model.'94'Crash if this is true when use_elements_boxes is false.')95flags.DEFINE_boolean('use_elements_boxes', True,96'Whether or not to use the elements boxes in the model.')97flags.DEFINE_boolean(98'use_image', True,99'Whether or not to use the screenshot image in the model.'100'Set to false for a baseline relying on elements.')101flags.DEFINE_boolean('use_elements_neighbors', False, '')102flags.DEFINE_boolean('use_elements_ref_match', False, '')103
104flags.DEFINE_enum(105'merge_ref_elements_method', 'singDotAtten',106['', 'combine', 'singDotAtten', 'sepDotAtten', 'combAtten'],107"'': Don't merge in elements model. 'combine': Concatenate the"108' representations and feed through a DNN.'109" 'singDotAtten': Use the same DNN to calculate the representations"110" of the items and expression to multiply. 'sepDotAtten' Use"111' separate networks to calculate the representations.'112" 'combAtten': Use a network to directly output multiply values.")113
114flags.DEFINE_enum(115'select_attention_method', 'singDotAtten',116['singDotAtten', 'sepDotAtten', 'combAtten'],117'The attention method used to select a given item.'118" 'singDotAtten': Use the same DNN to calculate the representations"119" of the items and expression to multiply. 'sepDotAtten' Use"120' separate networks to calculate the representations.'121" 'combAtten': Use a network to directly output multiply values.")122
123flags.DEFINE_enum(124'elements_proj_mode', 'step', ['tile', 'step', 'cont'],125'How to project the elements information onto the image feature.'126" 'tile': blindly tile the feature over the image."127" 'step': Tile only in the elements bounding box locations."128" 'cont': Tile the values in bounding box locations and"129' increase magnitude near the center of the box.')130flags.DEFINE_boolean('incorrect_boxes_as_errors', True,131'Crash on incorrect box sizes.')132flags.DEFINE_string(133'add_ref_elements_layer', '',134'The layer to add the ref and elements representations to.')135flags.DEFINE_boolean(136'proj_elements_memop', True,137'Reduces elements projection mem by using a tf while loop.'138'May be slower.')139
140flags.DEFINE_boolean('elements_3d_output', True, '')141
142flags.DEFINE_boolean('elements_cnn', True, '')143
144flags.DEFINE_boolean('elements_img_comb_cnn', True, '')145flags.DEFINE_integer('elements_img_comb_cnn_layers', 2, '')146flags.DEFINE_integer('elements_enc_size', 512, '')147
148# Dataset settings.
149
150flags.DEFINE_string('dataset', 'default', 'Name of the segmentation dataset.')151
152flags.DEFINE_string('dataset_dir', '', 'Where the dataset reside.')153
154flags.DEFINE_integer('dataset_threads', 100, '')155
156flags.DEFINE_boolean('preprocess_divide_label', False, '')157flags.DEFINE_integer('shuffle_buffer_size', 10000, '')158flags.DEFINE_integer('file_shuffle_buffer_size', 100, '')159
160flags.DEFINE_boolean('use_labels', True,161'If True, include label in input pipeline.')162flags.DEFINE_boolean(163'train_mode', True,164'Specify whether we are in training mode. Used for model ops such as'165'dropout and batch norm')166flags.DEFINE_boolean('coord_softmax', False,167'if True, use the coordinate softmax architecture.')168
169flags.DEFINE_boolean(170'regression_batch_norm', False,171'if True, apply batch normalization to all ClickRegression-specific'172'convolutions')173
174flags.DEFINE_boolean(175'use_groundtruth_box', False,176'if True, use the xmin,xmax,ymin,ymax features of the ground truth box')177