google-research

generator.cc
428 строк · 16.5 Кб
Перенос по словам
1
// Copyright 2024 The Google Research Authors.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14

15
#include "generator.h"
16

17
#include "definitions.h"
18
#include "instruction.pb.h"
19
#include "instruction.h"
20
#include "random_generator.h"
21
#include "absl/memory/memory.h"
22

23
namespace automl_zero {
24

25
using ::absl::make_unique;
26
using ::std::endl;
27
using ::std::make_shared;
28
using ::std::mt19937;
29
using ::std::shared_ptr;
30
using ::std::vector;
31

32
void PadComponentFunctionWithInstruction(
33
    const size_t total_instructions,
34
    const shared_ptr<const Instruction>& instruction,
35
    vector<shared_ptr<const Instruction>>* component_function) {
36
  component_function->reserve(total_instructions);
37
  while (component_function->size() < total_instructions) {
38
    component_function->emplace_back(instruction);
39
  }
40
}
41

42
Generator::Generator(
43
    const HardcodedAlgorithmID init_model,
44
    const IntegerT setup_size_init,
45
    const IntegerT predict_size_init,
46
    const IntegerT learn_size_init,
47
    const vector<Op>& allowed_setup_ops,
48
    const vector<Op>& allowed_predict_ops,
49
    const vector<Op>& allowed_learn_ops,
50
    mt19937* bit_gen,
51
    RandomGenerator* rand_gen)
52
    : init_model_(init_model),
53
      setup_size_init_(setup_size_init),
54
      predict_size_init_(predict_size_init),
55
      learn_size_init_(learn_size_init),
56
      allowed_setup_ops_(allowed_setup_ops),
57
      allowed_predict_ops_(allowed_predict_ops),
58
      allowed_learn_ops_(allowed_learn_ops),
59
      rand_gen_(rand_gen),
60
      randomizer_(
61
          allowed_setup_ops,
62
          allowed_predict_ops,
63
          allowed_learn_ops,
64
          bit_gen,
65
          rand_gen_),
66
      no_op_instruction_(make_shared<const Instruction>()) {}
67

68
Algorithm Generator::TheInitModel() {
69
  return ModelByID(init_model_);
70
}
71

72
Algorithm Generator::ModelByID(const HardcodedAlgorithmID model) {
73
  switch (model) {
74
    case NO_OP_ALGORITHM:
75
      return NoOp();
76
    case RANDOM_ALGORITHM:
77
      return Random();
78
    case NEURAL_NET_ALGORITHM:
79
      return NeuralNet(
80
          kDefaultLearningRate, 0.1, 0.1);
81
    case INTEGRATION_TEST_DAMAGED_NEURAL_NET_ALGORITHM: {
82
      Algorithm algorithm = NeuralNet(
83
          kDefaultLearningRate, 0.1, 0.1);
84
      // Delete the first two instructions in setup which are the
85
      // gaussian initialization of the first and final layer weights.
86
      algorithm.setup_.erase(algorithm.setup_.begin());
87
      algorithm.setup_.erase(algorithm.setup_.begin());
88
      return algorithm;
89
    }
90
    case LINEAR_ALGORITHM:
91
      return LinearModel(kDefaultLearningRate);
92
    default:
93
      LOG(FATAL) << "Unsupported algorithm ID." << endl;
94
  }
95
}
96

97
inline void FillComponentFunctionWithInstruction(
98
    const IntegerT num_instructions,
99
    const shared_ptr<const Instruction>& instruction,
100
    vector<shared_ptr<const Instruction>>* component_function) {
101
  component_function->reserve(num_instructions);
102
  component_function->clear();
103
  for (IntegerT pos = 0; pos < num_instructions; ++pos) {
104
    component_function->emplace_back(instruction);
105
  }
106
}
107

108
Algorithm Generator::NoOp() {
109
  Algorithm algorithm;
110
  FillComponentFunctionWithInstruction(
111
      setup_size_init_, no_op_instruction_, &algorithm.setup_);
112
  FillComponentFunctionWithInstruction(
113
      predict_size_init_, no_op_instruction_, &algorithm.predict_);
114
  FillComponentFunctionWithInstruction(
115
      learn_size_init_, no_op_instruction_, &algorithm.learn_);
116
  return algorithm;
117
}
118

119
Algorithm Generator::Random() {
120
  Algorithm algorithm = NoOp();
121
  CHECK(setup_size_init_ == 0 || !allowed_setup_ops_.empty());
122
  CHECK(predict_size_init_ == 0 || !allowed_predict_ops_.empty());
123
  CHECK(learn_size_init_ == 0 || !allowed_learn_ops_.empty());
124
  randomizer_.Randomize(&algorithm);
125
  return algorithm;
126
}
127

128
void PadComponentFunctionWithRandomInstruction(
129
    const size_t total_instructions, const Op op,
130
    RandomGenerator* rand_gen,
131
    vector<shared_ptr<const Instruction>>* component_function) {
132
  component_function->reserve(total_instructions);
133
  while (component_function->size() < total_instructions) {
134
    component_function->push_back(make_shared<const Instruction>(op, rand_gen));
135
  }
136
}
137

138
Generator::Generator()
139
    : init_model_(RANDOM_ALGORITHM),
140
      setup_size_init_(6),
141
      predict_size_init_(3),
142
      learn_size_init_(9),
143
      allowed_setup_ops_(
144
          {NO_OP, SCALAR_SUM_OP, MATRIX_VECTOR_PRODUCT_OP, VECTOR_MEAN_OP}),
145
      allowed_predict_ops_(
146
          {NO_OP, SCALAR_SUM_OP, MATRIX_VECTOR_PRODUCT_OP, VECTOR_MEAN_OP}),
147
      allowed_learn_ops_(
148
          {NO_OP, SCALAR_SUM_OP, MATRIX_VECTOR_PRODUCT_OP, VECTOR_MEAN_OP}),
149
      bit_gen_owned_(make_unique<mt19937>(GenerateRandomSeed())),
150
      rand_gen_owned_(make_unique<RandomGenerator>(bit_gen_owned_.get())),
151
      rand_gen_(rand_gen_owned_.get()),
152
      randomizer_(
153
          allowed_setup_ops_,
154
          allowed_predict_ops_,
155
          allowed_learn_ops_,
156
          bit_gen_owned_.get(),
157
          rand_gen_),
158
      no_op_instruction_(make_shared<const Instruction>()) {}
159

160
Algorithm Generator::UnitTestNeuralNetNoBiasNoGradient(
161
    const double learning_rate) {
162
  Algorithm algorithm;
163

164
  // Scalar addresses
165
  constexpr AddressT kLearningRateAddress = 2;
166
  constexpr AddressT kPredictionErrorAddress = 3;
167
  CHECK_GE(kMaxScalarAddresses, 4);
168

169
  // Vector addresses.
170
  constexpr AddressT kFinalLayerWeightsAddress = 1;
171
  CHECK_EQ(
172
      kFinalLayerWeightsAddress,
173
      Generator::kUnitTestNeuralNetNoBiasNoGradientFinalLayerWeightsAddress);
174
  constexpr AddressT kFirstLayerOutputBeforeReluAddress = 2;
175
  constexpr AddressT kFirstLayerOutputAfterReluAddress = 3;
176
  constexpr AddressT kZerosAddress = 4;
177
  constexpr AddressT kGradientWrtFinalLayerWeightsAddress = 5;
178
  constexpr AddressT kGradientWrtActivationsAddress = 6;
179
  constexpr AddressT kGradientOfReluAddress = 7;
180
  CHECK_GE(kMaxVectorAddresses, 8);
181

182
  // Matrix addresses.
183
  constexpr AddressT kFirstLayerWeightsAddress = 0;
184
  CHECK_EQ(
185
      kFirstLayerWeightsAddress,
186
      Generator::kUnitTestNeuralNetNoBiasNoGradientFirstLayerWeightsAddress);
187
  constexpr AddressT kGradientWrtFirstLayerWeightsAddress = 1;
188
  CHECK_GE(kMaxMatrixAddresses, 2);
189

190
  shared_ptr<const Instruction> no_op_instruction =
191
      make_shared<const Instruction>();
192

193
  algorithm.setup_.emplace_back(make_shared<const Instruction>(
194
      SCALAR_CONST_SET_OP,
195
      kLearningRateAddress,
196
      ActivationDataSetter(learning_rate)));
197
  PadComponentFunctionWithInstruction(
198
      setup_size_init_, no_op_instruction, &algorithm.setup_);
199

200
  IntegerT num_predict_instructions = 5;
201
  algorithm.predict_.reserve(num_predict_instructions);
202
  // Multiply with first layer weight matrix.
203
  algorithm.predict_.emplace_back(make_shared<const Instruction>(
204
      MATRIX_VECTOR_PRODUCT_OP,
205
      kFirstLayerWeightsAddress, kFeaturesVectorAddress,
206
      kFirstLayerOutputBeforeReluAddress));
207
  // Apply RELU.
208
  algorithm.predict_.emplace_back(make_shared<const Instruction>(
209
      VECTOR_MAX_OP, kFirstLayerOutputBeforeReluAddress, kZerosAddress,
210
      kFirstLayerOutputAfterReluAddress));
211
  // Dot product with final layer weight vector.
212
  algorithm.predict_.emplace_back(make_shared<const Instruction>(
213
      VECTOR_INNER_PRODUCT_OP, kFirstLayerOutputAfterReluAddress,
214
      kFinalLayerWeightsAddress, kPredictionsScalarAddress));
215
  PadComponentFunctionWithInstruction(
216
      predict_size_init_, no_op_instruction, &algorithm.predict_);
217

218
  algorithm.learn_.reserve(11);
219
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
220
      SCALAR_DIFF_OP, kLabelsScalarAddress, kPredictionsScalarAddress,
221
      kPredictionErrorAddress));
222
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
223
      SCALAR_PRODUCT_OP,
224
      kLearningRateAddress, kPredictionErrorAddress, kPredictionErrorAddress));
225
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
226
      SCALAR_VECTOR_PRODUCT_OP, kPredictionErrorAddress,
227
      kFirstLayerOutputAfterReluAddress, kGradientWrtFinalLayerWeightsAddress));
228
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
229
      VECTOR_SUM_OP,
230
      kFinalLayerWeightsAddress, kGradientWrtFinalLayerWeightsAddress,
231
      kFinalLayerWeightsAddress));
232
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
233
      SCALAR_VECTOR_PRODUCT_OP,
234
      kPredictionErrorAddress, kFinalLayerWeightsAddress,
235
      kGradientWrtActivationsAddress));
236
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
237
      VECTOR_HEAVYSIDE_OP,
238
      kFirstLayerOutputBeforeReluAddress, 0, kGradientOfReluAddress));
239
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
240
      VECTOR_PRODUCT_OP,
241
      kGradientOfReluAddress, kGradientWrtActivationsAddress,
242
      kGradientWrtActivationsAddress));
243
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
244
      VECTOR_OUTER_PRODUCT_OP,
245
      kGradientWrtActivationsAddress, kFeaturesVectorAddress,
246
      kGradientWrtFirstLayerWeightsAddress));
247
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
248
      MATRIX_SUM_OP,
249
      kFirstLayerWeightsAddress, kGradientWrtFirstLayerWeightsAddress,
250
      kFirstLayerWeightsAddress));
251
  PadComponentFunctionWithInstruction(
252
      learn_size_init_, no_op_instruction, &algorithm.learn_);
253

254
  return algorithm;
255
}
256

257
Algorithm Generator::NeuralNet(
258
    const double learning_rate,
259
    const double first_init_scale,
260
    const double final_init_scale) {
261
  Algorithm algorithm;
262

263
  // Scalar addresses
264
  constexpr AddressT kFinalLayerBiasAddress = 2;
265
  constexpr AddressT kLearningRateAddress = 3;
266
  constexpr AddressT kPredictionErrorAddress = 4;
267
  CHECK_GE(kMaxScalarAddresses, 5);
268

269
  // Vector addresses.
270
  constexpr AddressT kFirstLayerBiasAddress = 1;
271
  constexpr AddressT kFinalLayerWeightsAddress = 2;
272
  constexpr AddressT kFirstLayerOutputBeforeReluAddress = 3;
273
  constexpr AddressT kFirstLayerOutputAfterReluAddress = 4;
274
  constexpr AddressT kZerosAddress = 5;
275
  constexpr AddressT kGradientWrtFinalLayerWeightsAddress = 6;
276
  constexpr AddressT kGradientWrtActivationsAddress = 7;
277
  constexpr AddressT kGradientOfReluAddress = 8;
278
  CHECK_GE(kMaxVectorAddresses, 9);
279

280
  // Matrix addresses.
281
  constexpr AddressT kFirstLayerWeightsAddress = 0;
282
  constexpr AddressT kGradientWrtFirstLayerWeightsAddress = 1;
283
  CHECK_GE(kMaxMatrixAddresses, 2);
284

285
  shared_ptr<const Instruction> no_op_instruction =
286
      make_shared<const Instruction>();
287

288
  algorithm.setup_.emplace_back(make_shared<const Instruction>(
289
      VECTOR_GAUSSIAN_SET_OP,
290
      kFinalLayerWeightsAddress,
291
      FloatDataSetter(0.0),
292
      FloatDataSetter(final_init_scale)));
293
  algorithm.setup_.emplace_back(make_shared<const Instruction>(
294
      MATRIX_GAUSSIAN_SET_OP,
295
      kFirstLayerWeightsAddress,
296
      FloatDataSetter(0.0),
297
      FloatDataSetter(first_init_scale)));
298
  algorithm.setup_.emplace_back(make_shared<const Instruction>(
299
      SCALAR_CONST_SET_OP,
300
      kLearningRateAddress,
301
      ActivationDataSetter(learning_rate)));
302
  PadComponentFunctionWithInstruction(
303
      setup_size_init_, no_op_instruction, &algorithm.setup_);
304

305
  // Multiply with first layer weight matrix.
306
  algorithm.predict_.emplace_back(make_shared<const Instruction>(
307
      MATRIX_VECTOR_PRODUCT_OP,
308
      kFirstLayerWeightsAddress, kFeaturesVectorAddress,
309
      kFirstLayerOutputBeforeReluAddress));
310
  // Add first layer bias.
311
  algorithm.predict_.emplace_back(make_shared<const Instruction>(
312
      VECTOR_SUM_OP, kFirstLayerOutputBeforeReluAddress, kFirstLayerBiasAddress,
313
      kFirstLayerOutputBeforeReluAddress));
314
  // Apply RELU.
315
  algorithm.predict_.emplace_back(make_shared<const Instruction>(
316
      VECTOR_MAX_OP, kFirstLayerOutputBeforeReluAddress, kZerosAddress,
317
      kFirstLayerOutputAfterReluAddress));
318
  // Dot product with final layer weight vector.
319
  algorithm.predict_.emplace_back(make_shared<const Instruction>(
320
      VECTOR_INNER_PRODUCT_OP, kFirstLayerOutputAfterReluAddress,
321
      kFinalLayerWeightsAddress, kPredictionsScalarAddress));
322
  // Add final layer bias.
323
  CHECK_LE(kFinalLayerBiasAddress, kMaxScalarAddresses);
324
  algorithm.predict_.emplace_back(make_shared<const Instruction>(
325
      SCALAR_SUM_OP, kPredictionsScalarAddress, kFinalLayerBiasAddress,
326
      kPredictionsScalarAddress));
327
  PadComponentFunctionWithInstruction(
328
      predict_size_init_, no_op_instruction, &algorithm.predict_);
329

330
  algorithm.learn_.reserve(11);
331
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
332
      SCALAR_DIFF_OP, kLabelsScalarAddress, kPredictionsScalarAddress,
333
      kPredictionErrorAddress));
334
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
335
      SCALAR_PRODUCT_OP,
336
      kLearningRateAddress, kPredictionErrorAddress, kPredictionErrorAddress));
337
  CHECK_LE(kFinalLayerBiasAddress, kMaxScalarAddresses);
338
  // Update final layer bias.
339
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
340
    SCALAR_SUM_OP, kFinalLayerBiasAddress, kPredictionErrorAddress,
341
    kFinalLayerBiasAddress));
342
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
343
      SCALAR_VECTOR_PRODUCT_OP, kPredictionErrorAddress,
344
      kFirstLayerOutputAfterReluAddress, kGradientWrtFinalLayerWeightsAddress));
345
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
346
      VECTOR_SUM_OP,
347
      kFinalLayerWeightsAddress, kGradientWrtFinalLayerWeightsAddress,
348
      kFinalLayerWeightsAddress));
349
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
350
      SCALAR_VECTOR_PRODUCT_OP,
351
      kPredictionErrorAddress, kFinalLayerWeightsAddress,
352
      kGradientWrtActivationsAddress));
353
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
354
      VECTOR_HEAVYSIDE_OP,
355
      kFirstLayerOutputBeforeReluAddress, 0, kGradientOfReluAddress));
356
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
357
      VECTOR_PRODUCT_OP,
358
      kGradientOfReluAddress, kGradientWrtActivationsAddress,
359
      kGradientWrtActivationsAddress));
360
  // Update first layer bias.
361
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
362
    VECTOR_SUM_OP, kFirstLayerBiasAddress, kGradientWrtActivationsAddress,
363
    kFirstLayerBiasAddress));
364
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
365
      VECTOR_OUTER_PRODUCT_OP,
366
      kGradientWrtActivationsAddress, kFeaturesVectorAddress,
367
      kGradientWrtFirstLayerWeightsAddress));
368
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
369
      MATRIX_SUM_OP,
370
      kFirstLayerWeightsAddress, kGradientWrtFirstLayerWeightsAddress,
371
      kFirstLayerWeightsAddress));
372
  PadComponentFunctionWithInstruction(
373
      learn_size_init_, no_op_instruction, &algorithm.learn_);
374

375
  return algorithm;
376
}
377

378
Algorithm Generator::LinearModel(const double learning_rate) {
379
  Algorithm algorithm;
380

381
  // Scalar addresses
382
  constexpr AddressT kLearningRateAddress = 2;
383
  constexpr AddressT kPredictionErrorAddress = 3;
384
  CHECK_GE(kMaxScalarAddresses, 4);
385

386
  // Vector addresses.
387
  constexpr AddressT kWeightsAddress = 1;
388
  constexpr AddressT kCorrectionAddress = 2;
389
  CHECK_GE(kMaxVectorAddresses, 3);
390

391
  CHECK_GE(kMaxMatrixAddresses, 0);
392

393
  shared_ptr<const Instruction> no_op_instruction =
394
      make_shared<const Instruction>();
395

396
  algorithm.setup_.emplace_back(make_shared<const Instruction>(
397
      SCALAR_CONST_SET_OP,
398
      kLearningRateAddress,
399
      ActivationDataSetter(learning_rate)));
400
  PadComponentFunctionWithInstruction(
401
      setup_size_init_, no_op_instruction, &algorithm.setup_);
402

403
  algorithm.predict_.emplace_back(make_shared<const Instruction>(
404
      VECTOR_INNER_PRODUCT_OP,
405
      kWeightsAddress, kFeaturesVectorAddress, kPredictionsScalarAddress));
406
  PadComponentFunctionWithInstruction(
407
      predict_size_init_, no_op_instruction, &algorithm.predict_);
408

409
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
410
      SCALAR_DIFF_OP,
411
      kLabelsScalarAddress, kPredictionsScalarAddress,
412
      kPredictionErrorAddress));
413
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
414
      SCALAR_PRODUCT_OP,
415
      kLearningRateAddress, kPredictionErrorAddress,
416
      kPredictionErrorAddress));
417
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
418
      SCALAR_VECTOR_PRODUCT_OP,
419
      kPredictionErrorAddress, kFeaturesVectorAddress, kCorrectionAddress));
420
  algorithm.learn_.emplace_back(make_shared<const Instruction>(
421
      VECTOR_SUM_OP,
422
      kWeightsAddress, kCorrectionAddress, kWeightsAddress));
423
  PadComponentFunctionWithInstruction(
424
      learn_size_init_, no_op_instruction, &algorithm.learn_);
425
  return algorithm;
426
}
427

428
}  // namespace automl_zero
429
google-research

Использование cookies