google-research

ntsnet.py
590 строк · 19.7 Кб
Перенос по словам
1
# coding=utf-8
2
# Copyright 2024 The Google Research Authors.
3
#
4
# Licensed under the Apache License, Version 2.0 (the "License");
5
# you may not use this file except in compliance with the License.
6
# You may obtain a copy of the License at
7
#
8
#     http://www.apache.org/licenses/LICENSE-2.0
9
#
10
# Unless required by applicable law or agreed to in writing, software
11
# distributed under the License is distributed on an "AS IS" BASIS,
12
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
# See the License for the specific language governing permissions and
14
# limitations under the License.
15

16
"""NTS-Net adapted for perturbed top-k.
17

18
Based on the original PyTorch code
19
https://github.com/yangze0930/NTS-Net/blob/master/core/model.py
20
"""
21

22
import enum
23
import functools
24
import math
25
from typing import List, Tuple
26

27
from absl import app
28
from absl import flags
29
from absl import logging
30
import chex
31
from clu import platform
32
import einops
33
from flax.deprecated import nn
34
import jax
35
import jax.numpy as jnp
36
import ml_collections
37
import ml_collections.config_flags as config_flags
38
from off_the_grid.lib import data
39
from off_the_grid.lib import models
40
from off_the_grid.lib import utils
41
import off_the_grid.lib.classification_utils as classification_lib
42
from off_the_grid.lib.layers import sample_patches
43
from off_the_grid.lib.layers import transformer
44
import optax
45
import tensorflow as tf
46

47

48
FLAGS = flags.FLAGS
49

50
config_flags.DEFINE_config_file(
51
    "config", None, "Training configuration.", lock_config=True)
52
flags.DEFINE_string("workdir", None, "Work unit directory.")
53
NUM_CLASSES = 200
54

55
ANCHORS_SETTINGS = (
56
    dict(
57
        layer="p3",
58
        stride=32,
59
        size=48,
60
        scale=[2**(1. / 3.), 2**(2. / 3.)],
61
        aspect_ratio=[0.667, 1, 1.5]),  # Anchors 0-5
62
    dict(
63
        layer="p4",
64
        stride=64,
65
        size=96,
66
        scale=[2**(1. / 3.), 2**(2. / 3.)],
67
        aspect_ratio=[0.667, 1, 1.5]),  # Anchors 6-11
68
    dict(
69
        layer="p5",
70
        stride=128,
71
        size=192,
72
        scale=[1, 2**(1. / 3.), 2**(2. / 3.)],
73
        aspect_ratio=[0.667, 1, 1.5]),  # Anchors 12-20
74
)
75

76

77
class Communication(str, enum.Enum):
78
  NONE = "none"
79
  SQUEEZE_EXCITE_D = "squeeze_excite_d"
80
  SQUEEZE_EXCITE_X = "squeeze_excite_x"
81
  TRANSFORMER = "transformer"
82

83

84
def zeroone(scores, x_min, x_max):
85
  """Normalize values to lie between [0, 1]."""
86
  return [(x - x_min) / (x_max - x_min + 1e-5) for x in scores]
87

88

89
class ProposalNet(nn.Module):
90
  """FPN inspired scorer module."""
91

92
  def apply(self, x,
93
            communication = Communication.NONE,
94
            train = True):
95
    """Forward pass."""
96
    batch_size = x.shape[0]
97

98
    if communication is Communication.SQUEEZE_EXCITE_X:
99
      x = sample_patches.SqueezeExciteLayer(x)
100
    # end if squeeze excite x
101

102
    d1 = nn.relu(nn.Conv(
103
        x, 128, kernel_size=(3, 3), strides=(1, 1), bias=True, name="down1"))
104
    d2 = nn.relu(nn.Conv(
105
        d1, 128, kernel_size=(3, 3), strides=(2, 2), bias=True, name="down2"))
106
    d3 = nn.relu(nn.Conv(
107
        d2, 128, kernel_size=(3, 3), strides=(2, 2), bias=True, name="down3"))
108

109
    if communication is Communication.SQUEEZE_EXCITE_D:
110
      d1_flatten = einops.rearrange(d1, "b h w c -> b (h w) c")
111
      d2_flatten = einops.rearrange(d2, "b h w c -> b (h w) c")
112
      d3_flatten = einops.rearrange(d3, "b h w c -> b (h w) c")
113

114
      nd1 = d1_flatten.shape[1]
115
      nd2 = d2_flatten.shape[1]
116

117
      d_together = jnp.concatenate([d1_flatten, d2_flatten, d3_flatten], axis=1)
118

119
      num_channels = d_together.shape[-1]
120
      y = d_together.mean(axis=1)
121
      y = nn.Dense(y, features=num_channels // 4, bias=False)
122
      y = nn.relu(y)
123
      y = nn.Dense(y, features=num_channels, bias=False)
124
      y = nn.sigmoid(y)
125

126
      d_together = d_together * y[:, None, :]
127

128
      # split and reshape
129
      d1 = d_together[:, :nd1].reshape(d1.shape)
130
      d2 = d_together[:, nd1:nd1+nd2].reshape(d2.shape)
131
      d3 = d_together[:, nd1+nd2:].reshape(d3.shape)
132

133
    elif communication is Communication.TRANSFORMER:
134
      d1_flatten = einops.rearrange(d1, "b h w c -> b (h w) c")
135
      d2_flatten = einops.rearrange(d2, "b h w c -> b (h w) c")
136
      d3_flatten = einops.rearrange(d3, "b h w c -> b (h w) c")
137

138
      nd1 = d1_flatten.shape[1]
139
      nd2 = d2_flatten.shape[1]
140

141
      d_together = jnp.concatenate([d1_flatten, d2_flatten, d3_flatten], axis=1)
142

143
      positional_encodings = self.param(
144
          "scale_ratio_position_encodings",
145
          shape=(1,) + d_together.shape[1:],
146
          initializer=jax.nn.initializers.normal(1. / d_together.shape[-1]))
147
      d_together = transformer.Transformer(
148
          d_together + positional_encodings,
149
          num_layers=2,
150
          num_heads=8,
151
          is_training=train)
152

153
      # split and reshape
154
      d1 = d_together[:, :nd1].reshape(d1.shape)
155
      d2 = d_together[:, nd1:nd1+nd2].reshape(d2.shape)
156
      d3 = d_together[:, nd1+nd2:].reshape(d3.shape)
157

158
    t1 = nn.Conv(
159
        d1, 6, kernel_size=(1, 1), strides=(1, 1), bias=True, name="tidy1")
160
    t2 = nn.Conv(
161
        d2, 6, kernel_size=(1, 1), strides=(1, 1), bias=True, name="tidy2")
162
    t3 = nn.Conv(
163
        d3, 9, kernel_size=(1, 1), strides=(1, 1), bias=True, name="tidy3")
164

165
    raw_scores = (jnp.split(t1, 6, axis=-1) +
166
                  jnp.split(t2, 6, axis=-1) +
167
                  jnp.split(t3, 9, axis=-1))
168

169
    # The following is for normalization.
170
    t = jnp.concatenate((jnp.reshape(t1, [batch_size, -1]),
171
                         jnp.reshape(t2, [batch_size, -1]),
172
                         jnp.reshape(t3, [batch_size, -1])), axis=1)
173
    t_min = jnp.reshape(jnp.min(t, axis=-1), [batch_size, 1, 1, 1])
174
    t_max = jnp.reshape(jnp.max(t, axis=-1), [batch_size, 1, 1, 1])
175
    normalized_scores = zeroone(raw_scores, t_min, t_max)
176

177
    stats = {
178
        "scores": normalized_scores,
179
        "raw_scores": t,
180
    }
181
    # removes the split dimension. scores are now b x h' x w' shaped
182
    normalized_scores = [s.squeeze(-1) for s in normalized_scores]
183

184
    return normalized_scores, stats
185

186

187
def extract_weighted_patches(x,
188
                             weights,
189
                             kernel,
190
                             stride,
191
                             padding):
192
  """Weighted average of patches using jax.lax.scan."""
193
  logging.info("recompiling for kernel=%s and stride=%s and padding=%s", kernel,
194
               stride, padding)
195
  x = jnp.pad(x, ((0, 0),
196
                  (padding[0], padding[0] + kernel[0]),
197
                  (padding[1], padding[1] + kernel[1]),
198
                  (0, 0)))
199
  batch_size, _, _, channels = x.shape
200
  _, k, weights_h, weights_w = weights.shape
201

202
  def accumulate_patches(acc, index_i_j):
203
    i, j = index_i_j
204
    patch = jax.lax.dynamic_slice(
205
        x,
206
        (0, i * stride[0], j * stride[1], 0),
207
        (batch_size, kernel[0], kernel[1], channels))
208
    weight = weights[:, :, i, j]
209

210
    weighted_patch = jnp.einsum("bk, bijc -> bkijc", weight, patch)
211
    acc += weighted_patch
212
    return acc, None
213

214
  indices = jnp.stack(
215
      jnp.meshgrid(jnp.arange(weights_h), jnp.arange(weights_w), indexing="ij"),
216
      axis=-1)
217
  indices = indices.reshape((-1, 2))
218

219
  init_patches = jnp.zeros((batch_size, k, kernel[0], kernel[1], channels))
220
  patches, _ = jax.lax.scan(accumulate_patches, init_patches, indices)
221

222
  return patches
223

224

225
def weighted_anchor_aggregator(x, weights):
226
  """Given a tensor of weights per anchor computes the weighted average."""
227
  counter = 0
228
  all_sub_aggregates = []
229

230
  for anchor_info in ANCHORS_SETTINGS:
231
    stride = anchor_info["stride"]
232
    size = anchor_info["size"]
233
    for scale in anchor_info["scale"]:
234
      for aspect_ratio in anchor_info["aspect_ratio"]:
235
        kernel_size = (
236
            int(size * scale / float(aspect_ratio) ** 0.5),
237
            int(size * scale * float(aspect_ratio) ** 0.5))
238
        padding = (
239
            math.ceil((kernel_size[0] - stride) / 2.),
240
            math.ceil((kernel_size[1] - stride) / 2.))
241
        aggregate = extract_weighted_patches(
242
            x, weights[counter], kernel_size, (stride, stride), padding)
243
        aggregate = jnp.reshape(aggregate,
244
                                [-1, kernel_size[0], kernel_size[1], 3])
245
        aggregate_224 = jax.image.resize(aggregate,
246
                                         [aggregate.shape[0], 224, 224, 3],
247
                                         "bilinear")
248
        all_sub_aggregates.append(aggregate_224)
249
        counter += 1
250

251
  return jnp.sum(jnp.stack(all_sub_aggregates, axis=0), axis=0)
252

253

254
class AttentionNet(nn.Module):
255
  """The complete NTS-Net model using perturbed top-k."""
256

257
  def apply(self,
258
            x,
259
            config,
260
            num_classes,
261
            train = True):
262
    """Creates a model definition."""
263
    b, c = x.shape[0], x.shape[3]
264
    k = config.k
265
    sigma = config.ptopk_sigma
266
    num_samples = config.ptopk_num_samples
267

268
    sigma *= self.state("sigma_mutiplier", shape=(),
269
                        initializer=nn.initializers.ones).value
270

271
    stats = {"x": x, "sigma": sigma}
272

273
    feature_extractor = models.ResNet50.shared(train=train, name="ResNet_0")
274

275
    rpn_feature = feature_extractor(x)
276
    rpn_scores, rpn_stats = ProposalNet(
277
        jax.lax.stop_gradient(rpn_feature),
278
        communication=Communication(config.communication),
279
        train=train)
280
    stats.update(rpn_stats)
281

282
    # rpn_scores are a list of score images. We keep track of the structure
283
    # because it is used in the aggregation step later-on.
284
    rpn_scores_shapes = [s.shape for s in rpn_scores]
285
    rpn_scores_flat = jnp.concatenate(
286
        [jnp.reshape(s, [b, -1]) for s in rpn_scores], axis=1)
287
    top_k_indicators = sample_patches.select_patches_perturbed_topk(
288
        rpn_scores_flat,
289
        k=k,
290
        sigma=sigma,
291
        num_samples=num_samples)
292
    top_k_indicators = jnp.transpose(top_k_indicators, [0, 2, 1])
293
    offset = 0
294
    weights = []
295
    for sh in rpn_scores_shapes:
296
      cur = top_k_indicators[:, :, offset:offset + sh[1] * sh[2]]
297
      cur = jnp.reshape(cur, [b, k, sh[1], sh[2]])
298
      weights.append(cur)
299
      offset += sh[1] * sh[2]
300
    chex.assert_equal(offset, top_k_indicators.shape[-1])
301

302
    part_imgs = weighted_anchor_aggregator(x, weights)
303
    chex.assert_shape(part_imgs, (b * k, 224, 224, c))
304
    stats["part_imgs"] = jnp.reshape(part_imgs, [b, k*224, 224, c])
305

306
    part_features = feature_extractor(part_imgs)
307
    part_features = jnp.mean(part_features, axis=[1, 2])  # GAP the spatial dims
308

309
    part_features = nn.dropout(  # features from parts
310
        jnp.reshape(part_features, [b * k, 2048]),
311
        0.5,
312
        deterministic=not train,
313
        rng=nn.make_rng())
314
    features = nn.dropout(  # features from whole image
315
        jnp.reshape(jnp.mean(rpn_feature, axis=[1, 2]), [b, -1]),
316
        0.5,
317
        deterministic=not train,
318
        rng=nn.make_rng())
319

320
    # Mean pool all part features, add it to features and predict logits.
321
    concat_out = jnp.mean(jnp.reshape(part_features, [b, k, 2048]),
322
                          axis=1) + features
323
    concat_logits = nn.Dense(concat_out, num_classes)
324
    raw_logits = nn.Dense(features, num_classes)
325
    part_logits = jnp.reshape(nn.Dense(part_features, num_classes), [b, k, -1])
326

327
    all_logits = {
328
        "raw_logits": raw_logits,
329
        "concat_logits": concat_logits,
330
        "part_logits": part_logits,
331
    }
332
    # add entropy into it for entropy regularization.
333
    stats["rpn_scores_entropy"] = jax.scipy.special.entr(
334
        jax.nn.softmax(stats["raw_scores"])).sum(axis=1).mean(axis=0)
335
    return all_logits, stats
336

337

338
def create_optimizer(config):
339
  """Creates the optimizer associated to a config."""
340
  ops = []
341

342
  # Gradient clipping either by norm `gradient_norm_clip` or by absolute value
343
  # `gradient_value_clip`.
344
  if "gradient_clip" in config:
345
    raise ValueError("'gradient_clip' is deprecated, please use "
346
                     "'gradient_norm_clip'.")
347
  assert not ("gradient_norm_clip" in config and
348
              "gradient_value_clip" in config), (
349
                  "Gradient clipping by norm and by value are exclusive.")
350

351
  if "gradient_norm_clip" in config:
352
    ops.append(optax.clip_by_global_norm(config.gradient_norm_clip))
353
  if "gradient_value_clip" in config:
354
    ops.append(optax.clip(config.gradient_value_clip))
355

356
  # Define the learning rate schedule.
357
  schedule_fn = utils.get_optax_schedule_fn(
358
      warmup_ratio=config.get("warmup_ratio", 0.),
359
      num_train_steps=config.num_train_steps,
360
      decay=config.get("learning_rate_step_decay", 1.0),
361
      decay_at_steps=config.get("learning_rate_decay_at_steps", []),
362
      cosine_decay_schedule=config.get("cosine_decay", False))
363

364
  schedule_ops = [optax.scale_by_schedule(schedule_fn)]
365

366
  # Scale some parameters matching a regex by a multiplier. Config field
367
  # `scaling_by_regex` is a list of pairs (regex: str, multiplier: float).
368
  scaling_by_regex = config.get("scaling_learning_rate_by_regex", [])
369
  for regex, multiplier in scaling_by_regex:
370
    logging.info("Learning rate is scaled by %f for parameters matching '%s'",
371
                 multiplier, regex)
372
    schedule_ops.append(utils.scale_selected_parameters(regex, multiplier))
373
  schedule_optimizer = optax.chain(*schedule_ops)
374

375
  if "weight_decay_coupled" in config and config.weight_decay_coupled > 0.:
376
    # it calls decoupled weight decay before applying optimizer which is
377
    # coupled weight decay. :D
378
    ops.append(utils.decoupled_weight_decay(
379
        decay=config.weight_decay_coupled,
380
        step_size_fn=lambda x: jnp.ones([], dtype=jnp.float32)))
381

382
  if config.optimizer.lower() == "adam":
383
    optimizer = optax.adam(config.learning_rate)
384
    ops.append(optimizer)
385
    ops.append(schedule_optimizer)
386
  elif config.optimizer.lower() == "sgd":
387
    ops.append(schedule_optimizer)
388
    optimizer = optax.sgd(config.learning_rate, momentum=config.momentum)
389
    ops.append(optimizer)
390
  else:
391
    raise NotImplementedError("Invalid optimizer: {}".format(
392
        config.optimizer))
393

394
  if "weight_decay" in config and config.weight_decay > 0.:
395
    ops.append(utils.decoupled_weight_decay(
396
        decay=config.weight_decay, step_size_fn=schedule_fn))
397

398
  # Freeze parameters that match the given regexes (if any).
399
  freeze_weights_regexes = config.get("freeze_weights_regex", []) or []
400
  if isinstance(freeze_weights_regexes, str):
401
    freeze_weights_regexes = [freeze_weights_regexes]
402
  for reg in freeze_weights_regexes:
403
    ops.append(utils.freeze(reg))
404

405
  return optax.chain(*ops)
406

407

408
def cross_entropy(logits, labels):
409
  """Basic corss entropy loss."""
410
  logp = jax.nn.log_softmax(logits)
411
  loglik = jnp.take_along_axis(logp, labels[:, None], axis=1)
412
  return -jnp.mean(loglik)
413

414

415
def ntsnet_loss(logits_dict, labels, stats, config):
416
  """Customized cross entropy loss for dictionary of logits."""
417
  raw_logits = logits_dict["raw_logits"]
418
  concat_logits = logits_dict["concat_logits"]
419
  part_logits = logits_dict["part_logits"]
420

421
  raw_loss = cross_entropy(raw_logits, labels)
422
  concat_loss = cross_entropy(concat_logits, labels)
423

424
  k = part_logits.shape[1]
425
  num_classes = part_logits.shape[2]
426
  labels_per_part = jnp.tile(jnp.expand_dims(labels, axis=1), [1, k])
427
  part_loss = cross_entropy(
428
      jnp.reshape(part_logits, [-1, num_classes]),
429
      jnp.reshape(labels_per_part, [-1,]))
430

431
  reg = config.entropy_regularizer * rpn_scores_entropy(
432
      logits_dict, labels, stats)
433

434
  return raw_loss + concat_loss + part_loss + reg
435

436

437
def accuracy(logits_dict, labels, stats):
438
  """Customized accuracy metric for dictionary of logits."""
439
  del stats
440
  logits = logits_dict["concat_logits"]
441
  predictions = jnp.argmax(logits, axis=-1)
442
  return jnp.mean(predictions == labels)
443

444

445
def cross_entropy_raw_logits(logits_dict, labels, stats):
446
  """Customized cross entropy loss for dictionary of logits."""
447
  del stats
448
  return cross_entropy(logits_dict["raw_logits"], labels)
449

450

451
def cross_entropy_concat_logits(logits_dict, labels, stats):
452
  """Customized cross entropy loss for dictionary of logits."""
453
  del stats
454
  return cross_entropy(logits_dict["concat_logits"], labels)
455

456

457
def cross_entropy_part_logits(logits_dict, labels, stats):
458
  """Customized cross entropy loss for dictionary of logits."""
459
  del stats
460
  part_logits = logits_dict["part_logits"]
461
  k = part_logits.shape[1]
462
  num_classes = part_logits.shape[2]
463
  labels_per_part = jnp.tile(jnp.expand_dims(labels, axis=1), [1, k])
464
  part_loss = cross_entropy(
465
      jnp.reshape(part_logits, [-1, num_classes]),
466
      jnp.reshape(labels_per_part, [-1,]))
467
  return part_loss
468

469

470
def rpn_scores_entropy(logits_dict, labels, stats):
471
  """Entropy."""
472
  del logits_dict
473
  del labels
474
  return stats["rpn_scores_entropy"]
475

476

477
def train_and_evaluate(config, workdir):
478
  """Runs a training and evaluation loop.
479

480
  Args:
481
    config: Configuration to use.
482
    workdir: Working directory for checkpoints and TF summaries. If this
483
      contains checkpoint training will be resumed from the latest checkpoint.
484

485
  Returns:
486
    Training state.
487
  """
488
  rng = jax.random.PRNGKey(config.seed)
489
  rng, data_rng = jax.random.split(rng)
490

491
  # Make sure config defines num_epochs and num_train_steps appropriately.
492
  utils.check_epochs_and_steps(config)
493

494
  # Check that perturbed-topk is selection method.
495
  assert config.selection_method == "perturbed-topk", (
496
      "ntsnet only supports perturbed-topk as selection method. Got: {}".format(
497
          config.selection_method))
498

499
  train_preprocessing_fn, eval_preprocessing_fn = data.parse_preprocessing_strings(
500
      config.get("train_preprocess_str", ""),
501
      config.get("eval_preprocess_str", ""))
502

503
  assert config.batch_size % jax.local_device_count() == 0, (
504
      f"Batch size ({config.batch_size}) should be divisible by number of "
505
      f"devices ({jax.local_device_count()}).")
506

507
  per_device_batch_size = config.batch_size // jax.local_device_count()
508
  train_ds, eval_ds, num_classes = data.get_dataset(
509
      config.dataset,
510
      per_device_batch_size,
511
      data_rng,
512
      train_preprocessing_fn=train_preprocessing_fn,
513
      eval_preprocessing_fn=eval_preprocessing_fn,
514
      **config.get("data", {}))
515

516
  module = AttentionNet.partial(config=config, num_classes=num_classes)
517

518
  optimizer = create_optimizer(config)
519

520
  loss_fn = functools.partial(ntsnet_loss, config=config)
521
  train_metrics_dict = {
522
      "train_loss": loss_fn,
523
      "train_loss_raw": cross_entropy_raw_logits,
524
      "train_loss_concat": cross_entropy_concat_logits,
525
      "train_loss_part": cross_entropy_part_logits,
526
      "train_accuracy": accuracy,
527
      "train_rpn_scores_entropy": rpn_scores_entropy,
528
  }
529
  eval_metrics_dict = {
530
      "eval_loss": loss_fn,
531
      "eval_loss_raw": cross_entropy_raw_logits,
532
      "eval_loss_concat": cross_entropy_concat_logits,
533
      "eval_loss_part": cross_entropy_part_logits,
534
      "eval_accuracy": accuracy,
535
      "eval_rpn_scores_entropy": rpn_scores_entropy,
536
  }
537

538
  # Enables relevant statistics aggregator.
539
  stats_aggregators = []
540

541
  def add_image_prefix(image_aggregator):
542
    def aggregator(stats):
543
      d = image_aggregator(stats)
544
      return {f"image_{k}": v for k, v in d.items()}
545
    return aggregator
546

547
  if config.get("log_images", True):
548
    @add_image_prefix
549
    def plot_patches(stats):
550
      d = {
551
          "part_imgs": (stats["part_imgs"] + 1.0) / 2.0,
552
          "x": (stats["x"] + 1.0) / 2.0
553
      }
554
      for i, sc in enumerate(stats["scores"]):
555
        d[f"scores_{i}"] = sc
556
      return d
557

558
    stats_aggregators.append(plot_patches)
559

560
  stats_aggregators.append(lambda x: {"sigma": x["sigma"]})
561

562
  state = classification_lib.training_loop(
563
      module=module,
564
      rng=rng,
565
      train_ds=train_ds,
566
      eval_ds=eval_ds,
567
      loss_fn=loss_fn,
568
      optimizer=optimizer,
569
      train_metrics_dict=train_metrics_dict,
570
      eval_metrics_dict=eval_metrics_dict,
571
      stats_aggregators=stats_aggregators,
572
      config=config,
573
      workdir=workdir)
574
  return state
575

576

577
def main(argv):
578
  del argv
579

580
  # Hide any GPUs form TensorFlow. Otherwise TF might reserve memory and make
581
  # it unavailable to JAX.
582
  tf.config.experimental.set_visible_devices([], "GPU")
583

584
  state = train_and_evaluate(FLAGS.config, FLAGS.workdir)
585
  del state
586

587

588
if __name__ == "__main__":
589
  flags.mark_flags_as_required(["config", "workdir"])
590
  app.run(main)
591
google-research

Использование cookies