google-research

feedforward.py
322 строки · 11.9 Кб
Перенос по словам
1
# coding=utf-8
2
# Copyright 2024 The Google Research Authors.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
"""Provides a simple feed forward neural net class for regression tasks.
17

18
The FeedForward class constructs a tensorflow model. The constructor takes a
19
config object and expects metaparameter settings to be stored as config
20
attributes. Any object that has the appropriate attributes can be passed in to
21
configure the model since FeedForward does not assume the config object is
22
anything more than a dumb struct and does not attempt to serialize it.
23
Nonetheless, it will usually be an instance of tf.HParams. The model
24
constructor sets up TF variables to hold the weights, but the fprop method
25
builds the fprop graph for the model using the weights.
26

27
A few notes on what metaparameters will be needed in general:
28

29
For each fully connected (FC) layer, we need to select a size, an activation
30
function, a dropout rate, and an initialiation scheme. At construction time, we
31
only need the sizes and initialization.
32

33
Right now there is no support for convolutional layers.
34

35
Eventually, for each convolutional layer we need the activation function, a
36
dropout rate, a filter size, a number of filters, an initialization scheme, and
37
in principle padding and strides, but we will fix those. At construction time,
38
we only need the filter size, number of filters, and initialization.
39

40
Although in principle we can interleave FC and conv layers, life is complicated
41
enough as it is. Let's do zero or more conv layers followed by zero or more FC
42
layers. During metaparameter search, based on limitations of metaparater tuning
43
policies, we will need to fix the number of layers of each type in a given
44
study. We also might need to use introspection to add attributes to the hpconfig
45
object the tuner gives us, since the tuner interface has limited flexibility
46
for multi-dimensional metaparameters.
47

48
The model class doesn't know anything about training, so training
49
metaparameters in the config object will be ignored.
50
"""
51

52
import collections
53

54
from six.moves import map
55

56
import tensorflow.compat.v1 as tf
57
from tensorflow.contrib import labeled_tensor as lt
58
from xxx import layers as contrib_layers
59
from xxx import framework as contrib_framework
60

61

62
nonlinearities = {
63
    'relu': tf.nn.relu,
64
    'elu': tf.nn.elu,
65
    'tanh': tf.tanh,
66
    'sigmoid': tf.sigmoid
67
}
68

69

70
def _stack_inputs_by_rank(inputs):
71
  """Create 2D and 3D input tensors from a dictionary of inputs.
72

73
  3D inputs are stacked together for use in (optional) convolutional layers.
74
  2D inputs are only used in fully-connected layers.
75

76
  Args:
77
    inputs: Dict[str, lt.LabeledTensor] providing input features. All features
78
      must be 2D or 3D labeled tensors with a 'batch' axis as their first
79
      dimension. 3D tensors must have 'position' as their second axis. The last
80
      axis of all tensors is allowed to vary, because raw input features may
81
      have different names for labels that are more meaningful than generic
82
      "features" or "channels".
83

84
  Returns:
85
    Tuple[Optional[lt.LabeledTensor], Optional[lt.LabeledTensor]], where the
86
    first labeled tensor, if present, has axes ['batch', 'feature'] and the
87
    second labeled tensor, if present, has axes ['batch', 'position',
88
    'channel'].
89

90
  Raises:
91
    ValueError: if the result tensors do not have the same batch axis.
92
  """
93
  inputs_2d = []
94
  inputs_3d = []
95
  for key in sorted(inputs):
96
    # outputs should be fixed across randomized dict iteration order
97
    tensor = inputs[key]
98
    if len(tensor.axes) == 2:
99
      tensor = lt.rename_axis(tensor, list(tensor.axes.keys())[-1], 'feature')
100
      inputs_2d.append(tensor)
101
    elif len(tensor.axes) == 3:
102
      assert list(tensor.axes.values())[1].name == 'position'
103
      tensor = lt.rename_axis(tensor, list(tensor.axes.keys())[-1], 'channel')
104
      inputs_3d.append(tensor)
105
    else:
106
      raise AssertionError('unexpected rank')
107

108
  combined_2d = lt.concat(inputs_2d, 'feature') if inputs_2d else None
109
  combined_3d = lt.concat(inputs_3d, 'channel') if inputs_3d else None
110
  if combined_2d is not None and combined_3d is not None:
111
    if list(combined_2d.axes.values())[0] != list(combined_2d.axes.values())[0]:
112
      raise ValueError('mismatched batch axis')
113
  return combined_2d, combined_3d
114

115

116
class FeedForward:
117
  """Class implementing a simple feedforward neural net in tensorflow.
118

119
  Attributes:
120
    batch_axis: lt.Axis for batches of examples.
121
    input_position_axis: lt.Axis for input positions.
122
    input_channel_axis: lt.Axis for input channels.
123
    logit_axis: lt.Axis for logit channels, output from the `frop` method.
124
    config: a reference to the config object we used to specify the model. In
125
      general we expect it to be an instance of tf.HParams, but it could be
126
      anything with the right attributes.
127
    params: list of weights and biases
128
  """
129

130
  def __init__(self, dummy_inputs, logit_axis, config):
131

132
    self.logit_axis = logit_axis
133
    self.config = config
134

135
    self.fc_sizes = getattr(config, 'fc_hid_sizes', []) + [len(logit_axis)]
136
    self.fc_init_factors = (
137
        getattr(config, 'fc_init_factors', []) + [config.output_init_factor])
138

139
    if not dummy_inputs:
140
      raise ValueError('network has size 0 input')
141
    if logit_axis.size == 0:
142
      raise ValueError('network has size 0 output')
143

144
    if len({
145
        len(self.fc_sizes), len(self.fc_init_factors), len(config.dropouts)
146
    }) != 1:
147
      raise ValueError('invalid hyperparameter config for fc layers')
148
    self.num_fc_layers = len(self.fc_sizes)
149

150
    self._conv_config = _ConvConfig(*[
151
        getattr(config, 'conv_' + field, []) for field in _ConvConfig._fields
152
    ])
153
    if len(set(map(len, self._conv_config))) != 1:
154
      raise ValueError('invalid hyperparameter config for conv layers')
155
    self.num_conv_layers = len(self._conv_config.depths)
156

157
    self.fprop = tf.make_template('feedforward', self._fprop)
158
    # create variables
159
    self.fprop(dummy_inputs, mode='test')
160
    self.params = contrib_framework.get_variables(
161
        scope=self.fprop.variable_scope.name)
162

163
  def _fprop(self, inputs, mode):
164
    """Builds the fprop graph from inputs up to logits.
165

166
    Args:
167
      inputs: input LabeledTensor with axes [batch_axis, input_position_axis,
168
        input_channel_axis].
169
      mode: either 'test' or 'train', determines whether we add dropout nodes
170

171
    Returns:
172
      Logits tensor with axes [batch_axis, logit_axis].
173

174
    Raises:
175
      ValueError: mode must be 'train' or 'test'
176
    """
177
    if mode not in ['test', 'train']:
178
      raise ValueError('mode must be one of "train" or "test"')
179
    is_training = mode == 'train'
180

181
    inputs_2d, inputs_3d = _stack_inputs_by_rank(inputs)
182

183
    if inputs_2d is None and inputs_3d is None:
184
      raise ValueError('feedforward model has no inputs')
185

186
    # Get the batch axis from the actual inputs, because we set up the graph
187
    # with unknown batch size.
188
    example_inputs = inputs_3d if inputs_2d is None else inputs_2d
189
    batch_axis = example_inputs.axes['batch']
190

191
    w_initializer = tf.uniform_unit_scaling_initializer
192
    nonlinearity = nonlinearities[self.config.nonlinearity]
193

194
    if inputs_3d is not None:
195
      conv_args = list(zip(*self._conv_config))
196
      net = contrib_layers.stack(
197
          inputs_3d,
198
          conv1d,
199
          conv_args,
200
          scope='conv',
201
          padding='SAME',
202
          activation_fn=nonlinearity,
203
          w_initializer=w_initializer)
204
      net = contrib_layers.flatten(net)
205
      if inputs_2d is not None:
206
        net = tf.concat([net, inputs_2d], 1)
207
    else:
208
      net = inputs_2d
209

210
    if net.get_shape()[-1].value == 0:
211
      raise ValueError('feature dimension has size 0')
212

213
    keep_probs = [1 - d for d in self.config.dropouts]
214
    fc_args = list(zip(self.fc_sizes, keep_probs, self.fc_init_factors))
215

216
    net = contrib_layers.stack(
217
        net,
218
        dropout_and_fully_connected,
219
        fc_args[:-1],
220
        scope='fc',
221
        is_training=is_training,
222
        activation_fn=nonlinearity,
223
        w_initializer=w_initializer)
224

225
    # the last layer should not have a non-linearity
226
    net = dropout_and_fully_connected(
227
        net, *fc_args[-1], scope='fc_final', is_training=is_training,
228
        activation_fn=None, w_initializer=w_initializer)
229

230
    logits = lt.LabeledTensor(net, [batch_axis, self.logit_axis])
231
    return logits
232

233

234
# must match the order of conv1d's arguments
235
_ConvConfig = collections.namedtuple(
236
    '_ConvConfig', 'depths, widths, strides, rates, init_factors')
237

238

239
def conv1d(inputs,
240
           filter_depth,
241
           filter_width,
242
           stride=1,
243
           rate=1,
244
           init_factor=1.0,
245
           w_initializer=None,
246
           **kwargs):
247
  """Adds a convolutional 1d layer.
248

249
  If rate is 1 then a standard convolutional layer will be added,
250
  if rate is > 1 then an dilated (atrous) convolutional layer will
251
  be added.
252

253
  Args:
254
    inputs: a 3-D tensor  `[batch_size, in_width, in_channels]`.
255
    filter_depth: integer, the number of output channels.
256
    filter_width: integer, size of the convolution kernel.
257
    stride: integer, size of the convolution stride.
258
    rate: integer, the size of the convolution dilation.
259
    init_factor: passed to `w_initializer`.
260
    w_initializer: function to call to create a weights initializer.
261
    **kwargs: passed on to `layers.conv2d`.
262

263
  Returns:
264
    A tensor variable representing the result of the series of operations.
265
  Raises:
266
    Error if rate > 1 and stride != 1. Current implementation of
267
    atrous_conv2d does not allow a stride other than 1.
268
  """
269
  with tf.name_scope('conv1d'):
270
    # expand from 1d to 2d convolutions to match conv2d API
271
    # take inputs (which are only the inputs_3d layers) from
272
    # ['batch', 'position', 'channel'] to ['batch', 1, 'position', 'channel']
273
    # convolutions are done over the middle 2 dimensions.
274
    inputs_2d = tf.expand_dims(inputs, 1)
275
    kernel_size_2d = [1, filter_width]
276
    stride_2d = [1, stride]
277
    rate_2d = [1, rate]
278
    weights_initializer = w_initializer(factor=init_factor)
279
    output_2d = contrib_layers.conv2d(
280
        inputs_2d,
281
        filter_depth,
282
        kernel_size_2d,
283
        stride_2d,
284
        rate=rate_2d,
285
        weights_initializer=weights_initializer,
286
        **kwargs)
287

288
    output = tf.squeeze(output_2d, [1])
289
    return output
290

291

292
def dropout_and_fully_connected(inputs,
293
                                num_outputs,
294
                                keep_prob=0.5,
295
                                init_factor=1.0,
296
                                is_training=True,
297
                                w_initializer=None,
298
                                **kwargs):
299
  """Apply dropout followed by a fully connected layer.
300

301
  Args:
302
    inputs: A tensor of with at least rank 2 and value for the last dimension,
303
      i.e. `[batch_size, depth]`, `[None, None, None, channels]`.
304
    num_outputs: Integer or long, the number of output units in the layer.
305
    keep_prob: A scalar `Tensor` with the same type as x. The probability
306
      that each element is kept.
307
    init_factor: passed to `w_initializer`.
308
    is_training: A bool `Tensor` indicating whether or not the model
309
      is in training mode. If so, dropout is applied and values scaled.
310
      Otherwise, dropout is skipped.
311
    w_initializer: Function to call to create a weights initializer.
312
    **kwargs: passed on to `layers.fully_connected`.
313

314
  Returns:
315
    A tensor variable representing the result of the series of operations.
316
  """
317
  net = contrib_layers.dropout(
318
      inputs, keep_prob=keep_prob, is_training=is_training)
319
  weights_initializer = w_initializer(factor=init_factor)
320
  net = contrib_layers.fully_connected(
321
      net, num_outputs, weights_initializer=weights_initializer, **kwargs)
322
  return net
323
google-research

Использование cookies