google-research
428 строк · 16.5 Кб
1// Copyright 2024 The Google Research Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include "generator.h"
16
17#include "definitions.h"
18#include "instruction.pb.h"
19#include "instruction.h"
20#include "random_generator.h"
21#include "absl/memory/memory.h"
22
23namespace automl_zero {
24
25using ::absl::make_unique;
26using ::std::endl;
27using ::std::make_shared;
28using ::std::mt19937;
29using ::std::shared_ptr;
30using ::std::vector;
31
32void PadComponentFunctionWithInstruction(
33const size_t total_instructions,
34const shared_ptr<const Instruction>& instruction,
35vector<shared_ptr<const Instruction>>* component_function) {
36component_function->reserve(total_instructions);
37while (component_function->size() < total_instructions) {
38component_function->emplace_back(instruction);
39}
40}
41
42Generator::Generator(
43const HardcodedAlgorithmID init_model,
44const IntegerT setup_size_init,
45const IntegerT predict_size_init,
46const IntegerT learn_size_init,
47const vector<Op>& allowed_setup_ops,
48const vector<Op>& allowed_predict_ops,
49const vector<Op>& allowed_learn_ops,
50mt19937* bit_gen,
51RandomGenerator* rand_gen)
52: init_model_(init_model),
53setup_size_init_(setup_size_init),
54predict_size_init_(predict_size_init),
55learn_size_init_(learn_size_init),
56allowed_setup_ops_(allowed_setup_ops),
57allowed_predict_ops_(allowed_predict_ops),
58allowed_learn_ops_(allowed_learn_ops),
59rand_gen_(rand_gen),
60randomizer_(
61allowed_setup_ops,
62allowed_predict_ops,
63allowed_learn_ops,
64bit_gen,
65rand_gen_),
66no_op_instruction_(make_shared<const Instruction>()) {}
67
68Algorithm Generator::TheInitModel() {
69return ModelByID(init_model_);
70}
71
72Algorithm Generator::ModelByID(const HardcodedAlgorithmID model) {
73switch (model) {
74case NO_OP_ALGORITHM:
75return NoOp();
76case RANDOM_ALGORITHM:
77return Random();
78case NEURAL_NET_ALGORITHM:
79return NeuralNet(
80kDefaultLearningRate, 0.1, 0.1);
81case INTEGRATION_TEST_DAMAGED_NEURAL_NET_ALGORITHM: {
82Algorithm algorithm = NeuralNet(
83kDefaultLearningRate, 0.1, 0.1);
84// Delete the first two instructions in setup which are the
85// gaussian initialization of the first and final layer weights.
86algorithm.setup_.erase(algorithm.setup_.begin());
87algorithm.setup_.erase(algorithm.setup_.begin());
88return algorithm;
89}
90case LINEAR_ALGORITHM:
91return LinearModel(kDefaultLearningRate);
92default:
93LOG(FATAL) << "Unsupported algorithm ID." << endl;
94}
95}
96
97inline void FillComponentFunctionWithInstruction(
98const IntegerT num_instructions,
99const shared_ptr<const Instruction>& instruction,
100vector<shared_ptr<const Instruction>>* component_function) {
101component_function->reserve(num_instructions);
102component_function->clear();
103for (IntegerT pos = 0; pos < num_instructions; ++pos) {
104component_function->emplace_back(instruction);
105}
106}
107
108Algorithm Generator::NoOp() {
109Algorithm algorithm;
110FillComponentFunctionWithInstruction(
111setup_size_init_, no_op_instruction_, &algorithm.setup_);
112FillComponentFunctionWithInstruction(
113predict_size_init_, no_op_instruction_, &algorithm.predict_);
114FillComponentFunctionWithInstruction(
115learn_size_init_, no_op_instruction_, &algorithm.learn_);
116return algorithm;
117}
118
119Algorithm Generator::Random() {
120Algorithm algorithm = NoOp();
121CHECK(setup_size_init_ == 0 || !allowed_setup_ops_.empty());
122CHECK(predict_size_init_ == 0 || !allowed_predict_ops_.empty());
123CHECK(learn_size_init_ == 0 || !allowed_learn_ops_.empty());
124randomizer_.Randomize(&algorithm);
125return algorithm;
126}
127
128void PadComponentFunctionWithRandomInstruction(
129const size_t total_instructions, const Op op,
130RandomGenerator* rand_gen,
131vector<shared_ptr<const Instruction>>* component_function) {
132component_function->reserve(total_instructions);
133while (component_function->size() < total_instructions) {
134component_function->push_back(make_shared<const Instruction>(op, rand_gen));
135}
136}
137
138Generator::Generator()
139: init_model_(RANDOM_ALGORITHM),
140setup_size_init_(6),
141predict_size_init_(3),
142learn_size_init_(9),
143allowed_setup_ops_(
144{NO_OP, SCALAR_SUM_OP, MATRIX_VECTOR_PRODUCT_OP, VECTOR_MEAN_OP}),
145allowed_predict_ops_(
146{NO_OP, SCALAR_SUM_OP, MATRIX_VECTOR_PRODUCT_OP, VECTOR_MEAN_OP}),
147allowed_learn_ops_(
148{NO_OP, SCALAR_SUM_OP, MATRIX_VECTOR_PRODUCT_OP, VECTOR_MEAN_OP}),
149bit_gen_owned_(make_unique<mt19937>(GenerateRandomSeed())),
150rand_gen_owned_(make_unique<RandomGenerator>(bit_gen_owned_.get())),
151rand_gen_(rand_gen_owned_.get()),
152randomizer_(
153allowed_setup_ops_,
154allowed_predict_ops_,
155allowed_learn_ops_,
156bit_gen_owned_.get(),
157rand_gen_),
158no_op_instruction_(make_shared<const Instruction>()) {}
159
160Algorithm Generator::UnitTestNeuralNetNoBiasNoGradient(
161const double learning_rate) {
162Algorithm algorithm;
163
164// Scalar addresses
165constexpr AddressT kLearningRateAddress = 2;
166constexpr AddressT kPredictionErrorAddress = 3;
167CHECK_GE(kMaxScalarAddresses, 4);
168
169// Vector addresses.
170constexpr AddressT kFinalLayerWeightsAddress = 1;
171CHECK_EQ(
172kFinalLayerWeightsAddress,
173Generator::kUnitTestNeuralNetNoBiasNoGradientFinalLayerWeightsAddress);
174constexpr AddressT kFirstLayerOutputBeforeReluAddress = 2;
175constexpr AddressT kFirstLayerOutputAfterReluAddress = 3;
176constexpr AddressT kZerosAddress = 4;
177constexpr AddressT kGradientWrtFinalLayerWeightsAddress = 5;
178constexpr AddressT kGradientWrtActivationsAddress = 6;
179constexpr AddressT kGradientOfReluAddress = 7;
180CHECK_GE(kMaxVectorAddresses, 8);
181
182// Matrix addresses.
183constexpr AddressT kFirstLayerWeightsAddress = 0;
184CHECK_EQ(
185kFirstLayerWeightsAddress,
186Generator::kUnitTestNeuralNetNoBiasNoGradientFirstLayerWeightsAddress);
187constexpr AddressT kGradientWrtFirstLayerWeightsAddress = 1;
188CHECK_GE(kMaxMatrixAddresses, 2);
189
190shared_ptr<const Instruction> no_op_instruction =
191make_shared<const Instruction>();
192
193algorithm.setup_.emplace_back(make_shared<const Instruction>(
194SCALAR_CONST_SET_OP,
195kLearningRateAddress,
196ActivationDataSetter(learning_rate)));
197PadComponentFunctionWithInstruction(
198setup_size_init_, no_op_instruction, &algorithm.setup_);
199
200IntegerT num_predict_instructions = 5;
201algorithm.predict_.reserve(num_predict_instructions);
202// Multiply with first layer weight matrix.
203algorithm.predict_.emplace_back(make_shared<const Instruction>(
204MATRIX_VECTOR_PRODUCT_OP,
205kFirstLayerWeightsAddress, kFeaturesVectorAddress,
206kFirstLayerOutputBeforeReluAddress));
207// Apply RELU.
208algorithm.predict_.emplace_back(make_shared<const Instruction>(
209VECTOR_MAX_OP, kFirstLayerOutputBeforeReluAddress, kZerosAddress,
210kFirstLayerOutputAfterReluAddress));
211// Dot product with final layer weight vector.
212algorithm.predict_.emplace_back(make_shared<const Instruction>(
213VECTOR_INNER_PRODUCT_OP, kFirstLayerOutputAfterReluAddress,
214kFinalLayerWeightsAddress, kPredictionsScalarAddress));
215PadComponentFunctionWithInstruction(
216predict_size_init_, no_op_instruction, &algorithm.predict_);
217
218algorithm.learn_.reserve(11);
219algorithm.learn_.emplace_back(make_shared<const Instruction>(
220SCALAR_DIFF_OP, kLabelsScalarAddress, kPredictionsScalarAddress,
221kPredictionErrorAddress));
222algorithm.learn_.emplace_back(make_shared<const Instruction>(
223SCALAR_PRODUCT_OP,
224kLearningRateAddress, kPredictionErrorAddress, kPredictionErrorAddress));
225algorithm.learn_.emplace_back(make_shared<const Instruction>(
226SCALAR_VECTOR_PRODUCT_OP, kPredictionErrorAddress,
227kFirstLayerOutputAfterReluAddress, kGradientWrtFinalLayerWeightsAddress));
228algorithm.learn_.emplace_back(make_shared<const Instruction>(
229VECTOR_SUM_OP,
230kFinalLayerWeightsAddress, kGradientWrtFinalLayerWeightsAddress,
231kFinalLayerWeightsAddress));
232algorithm.learn_.emplace_back(make_shared<const Instruction>(
233SCALAR_VECTOR_PRODUCT_OP,
234kPredictionErrorAddress, kFinalLayerWeightsAddress,
235kGradientWrtActivationsAddress));
236algorithm.learn_.emplace_back(make_shared<const Instruction>(
237VECTOR_HEAVYSIDE_OP,
238kFirstLayerOutputBeforeReluAddress, 0, kGradientOfReluAddress));
239algorithm.learn_.emplace_back(make_shared<const Instruction>(
240VECTOR_PRODUCT_OP,
241kGradientOfReluAddress, kGradientWrtActivationsAddress,
242kGradientWrtActivationsAddress));
243algorithm.learn_.emplace_back(make_shared<const Instruction>(
244VECTOR_OUTER_PRODUCT_OP,
245kGradientWrtActivationsAddress, kFeaturesVectorAddress,
246kGradientWrtFirstLayerWeightsAddress));
247algorithm.learn_.emplace_back(make_shared<const Instruction>(
248MATRIX_SUM_OP,
249kFirstLayerWeightsAddress, kGradientWrtFirstLayerWeightsAddress,
250kFirstLayerWeightsAddress));
251PadComponentFunctionWithInstruction(
252learn_size_init_, no_op_instruction, &algorithm.learn_);
253
254return algorithm;
255}
256
257Algorithm Generator::NeuralNet(
258const double learning_rate,
259const double first_init_scale,
260const double final_init_scale) {
261Algorithm algorithm;
262
263// Scalar addresses
264constexpr AddressT kFinalLayerBiasAddress = 2;
265constexpr AddressT kLearningRateAddress = 3;
266constexpr AddressT kPredictionErrorAddress = 4;
267CHECK_GE(kMaxScalarAddresses, 5);
268
269// Vector addresses.
270constexpr AddressT kFirstLayerBiasAddress = 1;
271constexpr AddressT kFinalLayerWeightsAddress = 2;
272constexpr AddressT kFirstLayerOutputBeforeReluAddress = 3;
273constexpr AddressT kFirstLayerOutputAfterReluAddress = 4;
274constexpr AddressT kZerosAddress = 5;
275constexpr AddressT kGradientWrtFinalLayerWeightsAddress = 6;
276constexpr AddressT kGradientWrtActivationsAddress = 7;
277constexpr AddressT kGradientOfReluAddress = 8;
278CHECK_GE(kMaxVectorAddresses, 9);
279
280// Matrix addresses.
281constexpr AddressT kFirstLayerWeightsAddress = 0;
282constexpr AddressT kGradientWrtFirstLayerWeightsAddress = 1;
283CHECK_GE(kMaxMatrixAddresses, 2);
284
285shared_ptr<const Instruction> no_op_instruction =
286make_shared<const Instruction>();
287
288algorithm.setup_.emplace_back(make_shared<const Instruction>(
289VECTOR_GAUSSIAN_SET_OP,
290kFinalLayerWeightsAddress,
291FloatDataSetter(0.0),
292FloatDataSetter(final_init_scale)));
293algorithm.setup_.emplace_back(make_shared<const Instruction>(
294MATRIX_GAUSSIAN_SET_OP,
295kFirstLayerWeightsAddress,
296FloatDataSetter(0.0),
297FloatDataSetter(first_init_scale)));
298algorithm.setup_.emplace_back(make_shared<const Instruction>(
299SCALAR_CONST_SET_OP,
300kLearningRateAddress,
301ActivationDataSetter(learning_rate)));
302PadComponentFunctionWithInstruction(
303setup_size_init_, no_op_instruction, &algorithm.setup_);
304
305// Multiply with first layer weight matrix.
306algorithm.predict_.emplace_back(make_shared<const Instruction>(
307MATRIX_VECTOR_PRODUCT_OP,
308kFirstLayerWeightsAddress, kFeaturesVectorAddress,
309kFirstLayerOutputBeforeReluAddress));
310// Add first layer bias.
311algorithm.predict_.emplace_back(make_shared<const Instruction>(
312VECTOR_SUM_OP, kFirstLayerOutputBeforeReluAddress, kFirstLayerBiasAddress,
313kFirstLayerOutputBeforeReluAddress));
314// Apply RELU.
315algorithm.predict_.emplace_back(make_shared<const Instruction>(
316VECTOR_MAX_OP, kFirstLayerOutputBeforeReluAddress, kZerosAddress,
317kFirstLayerOutputAfterReluAddress));
318// Dot product with final layer weight vector.
319algorithm.predict_.emplace_back(make_shared<const Instruction>(
320VECTOR_INNER_PRODUCT_OP, kFirstLayerOutputAfterReluAddress,
321kFinalLayerWeightsAddress, kPredictionsScalarAddress));
322// Add final layer bias.
323CHECK_LE(kFinalLayerBiasAddress, kMaxScalarAddresses);
324algorithm.predict_.emplace_back(make_shared<const Instruction>(
325SCALAR_SUM_OP, kPredictionsScalarAddress, kFinalLayerBiasAddress,
326kPredictionsScalarAddress));
327PadComponentFunctionWithInstruction(
328predict_size_init_, no_op_instruction, &algorithm.predict_);
329
330algorithm.learn_.reserve(11);
331algorithm.learn_.emplace_back(make_shared<const Instruction>(
332SCALAR_DIFF_OP, kLabelsScalarAddress, kPredictionsScalarAddress,
333kPredictionErrorAddress));
334algorithm.learn_.emplace_back(make_shared<const Instruction>(
335SCALAR_PRODUCT_OP,
336kLearningRateAddress, kPredictionErrorAddress, kPredictionErrorAddress));
337CHECK_LE(kFinalLayerBiasAddress, kMaxScalarAddresses);
338// Update final layer bias.
339algorithm.learn_.emplace_back(make_shared<const Instruction>(
340SCALAR_SUM_OP, kFinalLayerBiasAddress, kPredictionErrorAddress,
341kFinalLayerBiasAddress));
342algorithm.learn_.emplace_back(make_shared<const Instruction>(
343SCALAR_VECTOR_PRODUCT_OP, kPredictionErrorAddress,
344kFirstLayerOutputAfterReluAddress, kGradientWrtFinalLayerWeightsAddress));
345algorithm.learn_.emplace_back(make_shared<const Instruction>(
346VECTOR_SUM_OP,
347kFinalLayerWeightsAddress, kGradientWrtFinalLayerWeightsAddress,
348kFinalLayerWeightsAddress));
349algorithm.learn_.emplace_back(make_shared<const Instruction>(
350SCALAR_VECTOR_PRODUCT_OP,
351kPredictionErrorAddress, kFinalLayerWeightsAddress,
352kGradientWrtActivationsAddress));
353algorithm.learn_.emplace_back(make_shared<const Instruction>(
354VECTOR_HEAVYSIDE_OP,
355kFirstLayerOutputBeforeReluAddress, 0, kGradientOfReluAddress));
356algorithm.learn_.emplace_back(make_shared<const Instruction>(
357VECTOR_PRODUCT_OP,
358kGradientOfReluAddress, kGradientWrtActivationsAddress,
359kGradientWrtActivationsAddress));
360// Update first layer bias.
361algorithm.learn_.emplace_back(make_shared<const Instruction>(
362VECTOR_SUM_OP, kFirstLayerBiasAddress, kGradientWrtActivationsAddress,
363kFirstLayerBiasAddress));
364algorithm.learn_.emplace_back(make_shared<const Instruction>(
365VECTOR_OUTER_PRODUCT_OP,
366kGradientWrtActivationsAddress, kFeaturesVectorAddress,
367kGradientWrtFirstLayerWeightsAddress));
368algorithm.learn_.emplace_back(make_shared<const Instruction>(
369MATRIX_SUM_OP,
370kFirstLayerWeightsAddress, kGradientWrtFirstLayerWeightsAddress,
371kFirstLayerWeightsAddress));
372PadComponentFunctionWithInstruction(
373learn_size_init_, no_op_instruction, &algorithm.learn_);
374
375return algorithm;
376}
377
378Algorithm Generator::LinearModel(const double learning_rate) {
379Algorithm algorithm;
380
381// Scalar addresses
382constexpr AddressT kLearningRateAddress = 2;
383constexpr AddressT kPredictionErrorAddress = 3;
384CHECK_GE(kMaxScalarAddresses, 4);
385
386// Vector addresses.
387constexpr AddressT kWeightsAddress = 1;
388constexpr AddressT kCorrectionAddress = 2;
389CHECK_GE(kMaxVectorAddresses, 3);
390
391CHECK_GE(kMaxMatrixAddresses, 0);
392
393shared_ptr<const Instruction> no_op_instruction =
394make_shared<const Instruction>();
395
396algorithm.setup_.emplace_back(make_shared<const Instruction>(
397SCALAR_CONST_SET_OP,
398kLearningRateAddress,
399ActivationDataSetter(learning_rate)));
400PadComponentFunctionWithInstruction(
401setup_size_init_, no_op_instruction, &algorithm.setup_);
402
403algorithm.predict_.emplace_back(make_shared<const Instruction>(
404VECTOR_INNER_PRODUCT_OP,
405kWeightsAddress, kFeaturesVectorAddress, kPredictionsScalarAddress));
406PadComponentFunctionWithInstruction(
407predict_size_init_, no_op_instruction, &algorithm.predict_);
408
409algorithm.learn_.emplace_back(make_shared<const Instruction>(
410SCALAR_DIFF_OP,
411kLabelsScalarAddress, kPredictionsScalarAddress,
412kPredictionErrorAddress));
413algorithm.learn_.emplace_back(make_shared<const Instruction>(
414SCALAR_PRODUCT_OP,
415kLearningRateAddress, kPredictionErrorAddress,
416kPredictionErrorAddress));
417algorithm.learn_.emplace_back(make_shared<const Instruction>(
418SCALAR_VECTOR_PRODUCT_OP,
419kPredictionErrorAddress, kFeaturesVectorAddress, kCorrectionAddress));
420algorithm.learn_.emplace_back(make_shared<const Instruction>(
421VECTOR_SUM_OP,
422kWeightsAddress, kCorrectionAddress, kWeightsAddress));
423PadComponentFunctionWithInstruction(
424learn_size_init_, no_op_instruction, &algorithm.learn_);
425return algorithm;
426}
427
428} // namespace automl_zero
429