3
from caffe2.python import workspace, memonger, core, model_helper, brew
4
from caffe2.proto import caffe2_pb2
5
import caffe2.python.hypothesis_test_util as hu
6
import hypothesis.strategies as st
7
from hypothesis import given, settings
11
def has_blob(proto, needle):
16
for outp in op.output:
22
def count_blobs(proto):
25
blobs = blobs.union(set(op.input)).union(set(op.output))
29
class MemongerTest(hu.HypothesisTestCase):
30
@given(input_dim=st.integers(min_value=1, max_value=10),
31
output_dim=st.integers(min_value=1, max_value=10),
32
batch_size=st.integers(min_value=1, max_value=10),
33
do=st.sampled_from(hu.device_options),
34
algo=st.sampled_from(memonger.AssignmentAlgorithm))
35
@settings(max_examples=5, deadline=None)
36
def test_simple_memonger(self, input_dim, output_dim, batch_size, do, algo):
37
m = model_helper.ModelHelper()
38
fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
39
fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
40
fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
43
.Softmax([], "pred") \
44
.LabelCrossEntropy(["label"], ["xent"]) \
45
.AveragedLoss([], "loss")
46
input_to_grad = m.AddGradientOperators(["loss"])
47
m.net.Proto().device_option.CopyFrom(do)
48
m.param_init_net.Proto().device_option.CopyFrom(do)
50
[o for op in m.param_init_net.Proto().op for o in op.output] + \
51
["data", "label", "loss", input_to_grad["fc1_w"]]
53
optimization = memonger.optimize_interference(
54
m.Proto(), static_blobs, algo=algo)
55
data = np.random.randn(batch_size, input_dim).astype(np.float32)
56
label = np.random.randint(
57
low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
58
workspace.RunNetOnce(m.param_init_net)
59
workspace.FeedBlob("data", data, device_option=do)
60
workspace.FeedBlob("label", label, device_option=do)
61
workspace.RunNetOnce(m.net)
62
loss = workspace.FetchBlob("loss")
63
grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
64
workspace.RunNetOnce(optimization.net)
65
optimized_loss = workspace.FetchBlob("loss")
66
optimized_grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
67
np.testing.assert_almost_equal(loss, optimized_loss)
68
np.testing.assert_almost_equal(grad, optimized_grad)
69
stats = memonger.compute_statistics(optimization.assignments)
70
self.assertLess(stats.optimized_nbytes, stats.baseline_nbytes)
73
blob_sizes = memonger.collect_blob_sizes(m.Proto())
74
optimization1 = memonger.optimize_interference(
75
m.Proto(), static_blobs, blob_sizes=blob_sizes, algo=algo)
76
workspace.RunNetOnce(optimization1.net)
77
optimized_loss = workspace.FetchBlob("loss")
78
optimized_grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
79
np.testing.assert_almost_equal(loss, optimized_loss)
80
np.testing.assert_almost_equal(grad, optimized_grad)
81
stats = memonger.compute_statistics(optimization1.assignments)
82
self.assertLessEqual(stats.optimized_nbytes, stats.baseline_nbytes)
84
@given(input_dim=st.integers(min_value=1, max_value=10),
85
output_dim=st.integers(min_value=1, max_value=10),
86
batch_size=st.integers(min_value=1, max_value=10),
87
do=st.sampled_from(hu.device_options))
88
@settings(max_examples=5, deadline=None)
89
def test_fast_memonger(self, input_dim, output_dim, batch_size, do):
90
m = model_helper.ModelHelper()
91
fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
92
fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
93
fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
96
.Softmax([], "pred") \
97
.LabelCrossEntropy(["label"], ["xent"]) \
98
.AveragedLoss([], "loss")
99
input_to_grad = m.AddGradientOperators(["loss"])
100
m.net.Proto().device_option.CopyFrom(do)
101
m.param_init_net.Proto().device_option.CopyFrom(do)
103
[o for op in m.param_init_net.Proto().op for o in op.output] + \
104
["data", "label", "loss", input_to_grad["fc1_w"]]
106
optimized_net = memonger.optimize_inference_fast(
107
m.Proto(), static_blobs)
108
data = np.random.randn(batch_size, input_dim).astype(np.float32)
109
label = np.random.randint(
110
low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
111
workspace.RunNetOnce(m.param_init_net)
112
workspace.FeedBlob("data", data, device_option=do)
113
workspace.FeedBlob("label", label, device_option=do)
114
workspace.RunNetOnce(m.net)
115
loss = workspace.FetchBlob("loss")
116
grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
117
workspace.RunNetOnce(optimized_net)
118
optimized_loss = workspace.FetchBlob("loss")
119
optimized_grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
120
np.testing.assert_almost_equal(loss, optimized_loss)
121
np.testing.assert_almost_equal(grad, optimized_grad)
123
self.assertLess(count_blobs(optimized_net), count_blobs(m.Proto()))
125
def test_fast_memonger_unique_outputs(self):
126
m = model_helper.ModelHelper()
130
m, "data{}".format(i), "fc".format(i), dim_in=2, dim_out=2)
136
r.append(brew.sum(m, [x, y], 1))
137
concated = brew.concat(m, r, "concated")
138
brew.relu(m, concated, "merged")
141
[o for op in m.param_init_net.Proto().op for o in op.output] + \
142
["merged"] + ["data{}".format(i) for i in range(len(fc))]
144
optimized_net = memonger.optimize_inference_fast(
145
m.Proto(), static_blobs)
146
for op in optimized_net.op:
147
self.assertEqual(len(op.output), len(set(op.output)), str(op))
149
@given(input_dim=st.integers(min_value=1, max_value=4),
150
output_dim=st.integers(min_value=1, max_value=4),
151
batch_size=st.integers(min_value=1, max_value=4))
152
def test_gradient_optim(self, input_dim, output_dim, batch_size):
153
m = model_helper.ModelHelper()
154
with core.NameScope("name_x"):
155
fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
156
fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
157
fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
158
fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
159
fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
161
.Softmax([], "pred") \
162
.LabelCrossEntropy(["label"], ["xent"]) \
163
.AveragedLoss([], "loss")
164
input_to_grad = m.AddGradientOperators(["name_x/loss"])
166
blobs_before = count_blobs(m.net.Proto())
167
optim_proto = memonger.share_grad_blobs(
170
set(m.param_to_grad.values()),
172
share_activations=False,
174
blobs_after = count_blobs(optim_proto)
175
self.assertLess(blobs_after, blobs_before)
177
optim_proto_wacts = memonger.share_grad_blobs(
180
set(m.param_to_grad.values()),
182
share_activations=True,
183
dont_share_blobs=set([str(input_to_grad["name_x/fc1_w"])]),
185
blobs_wact_optim = count_blobs(optim_proto_wacts)
186
self.assertLessEqual(blobs_wact_optim, blobs_after)
189
self.assertTrue(has_blob(optim_proto, "name_x/fc5"))
191
has_blob(optim_proto_wacts, "name_x/fc5"),
192
"Dont remap final activation",
196
data = np.random.randn(batch_size, input_dim).astype(np.float32)
197
label = np.random.randint(
198
low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
199
workspace.RunNetOnce(m.param_init_net)
200
workspace.FeedBlob("name_x/data", data)
201
workspace.FeedBlob("name_x/label", label)
202
workspace.RunNetOnce(m.net)
203
loss = workspace.FetchBlob("name_x/loss")
204
grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
205
workspace.RunNetOnce(optim_proto)
206
optimized_loss = workspace.FetchBlob("name_x/loss")
207
optimized_grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
208
np.testing.assert_almost_equal(loss, optimized_loss)
209
np.testing.assert_almost_equal(grad, optimized_grad)
211
workspace.FeedBlob(str(input_to_grad["name_x/fc1_w"]), np.array([0.0]))
214
workspace.RunNetOnce(optim_proto_wacts)
215
optimized_loss = workspace.FetchBlob("name_x/loss")
216
optimized_grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
217
np.testing.assert_almost_equal(loss, optimized_loss)
218
np.testing.assert_almost_equal(grad, optimized_grad)
220
@unittest.skipIf(not workspace.has_gpu_support, "No gpu support.")
221
def test_memonger_mix_cpu_gpu(self):
223
Check that memonger does not make blobs cross CPU/GPU boundary
225
m = model_helper.ModelHelper()
226
with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType, 0)):
227
fc1 = brew.fc(m, "data", "fc1", dim_in=2, dim_out=2)
228
fc2 = brew.fc(m, fc1, "fc2", dim_in=2, dim_out=2)
229
fc3 = brew.fc(m, fc2, "fc3", dim_in=2, dim_out=2)
230
fc4 = brew.fc(m, fc3, "fc4", dim_in=2, dim_out=2)
231
fc4_cpu = m.net.CopyGPUToCPU(fc4, "fc4_cpu")
232
with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)):
233
fc5_cpu = brew.fc(m, fc4_cpu, "fc5_cpu", dim_in=2, dim_out=2)
234
fc6_cpu = brew.fc(m, fc5_cpu, "fc6_cpu", dim_in=2, dim_out=2)
235
fc7_cpu = brew.fc(m, fc6_cpu, "fc7_cpu", dim_in=2, dim_out=2)
236
fc7_cpu.Relu([], fc7_cpu) \
237
.Softmax([], "pred") \
238
.LabelCrossEntropy(["label"], ["xent"]) \
239
.AveragedLoss([], "loss")
240
m.AddGradientOperators(["loss"])
242
blobs_before = count_blobs(m.net.Proto())
243
optim_proto = memonger.share_grad_blobs(
246
set(m.param_to_grad.values()),
248
share_activations=True,
249
dont_share_blobs=set(),
251
blobs_after = count_blobs(optim_proto)
252
self.assertLess(blobs_after, blobs_before)
256
device_blobs = {caffe2_pb2.CPU: set(), workspace.GpuDeviceType: set()}
257
for op in optim_proto.op:
258
if op.type not in ['CopyCPUToGPU', "CopyGPUToCPU"]:
259
dev = op.device_option.device_type
260
for b in list(op.input) + list(op.output):
261
device_blobs[dev].add(b)
263
device_crossers = device_blobs[caffe2_pb2.CPU].intersection(
264
device_blobs[workspace.GpuDeviceType]
266
self.assertEqual(device_crossers, set())
268
@given(input_dim=st.integers(min_value=4, max_value=4),
269
output_dim=st.integers(min_value=4, max_value=4),
270
batch_size=st.integers(min_value=4, max_value=4))
271
@settings(deadline=1000)
272
def test_gradient_optim_tree(self, input_dim, output_dim, batch_size):
273
m = model_helper.ModelHelper()
274
with core.NameScope("name_x"):
275
fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
276
fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
277
fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
278
fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
279
fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
281
.Softmax([], "pred1") \
282
.LabelCrossEntropy(["label"], ["xent1"]) \
283
.AveragedLoss([], "loss1")
284
fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
286
.Softmax([], "pred2") \
287
.LabelCrossEntropy(["label"], ["xent2"]) \
288
.AveragedLoss([], "loss2")
289
input_to_grad = m.AddGradientOperators(["name_x/loss1", "name_x/loss2"])
291
blobs_before = count_blobs(m.net.Proto())
292
optim_proto = memonger.share_grad_blobs(
294
["name_x/loss1", "name_x/loss2"],
295
set(m.param_to_grad.values()),
297
share_activations=True,
298
dont_share_blobs=set(['name_x/fc6', 'name_x/fc5',
299
str(input_to_grad["name_x/fc1_w"])]),
301
blobs_after = count_blobs(optim_proto)
302
self.assertLess(blobs_after, blobs_before)
303
self.assertTrue(has_blob(optim_proto, "name_x/fc6"))
306
data = np.random.randn(batch_size, input_dim).astype(np.float32)
307
label = np.random.randint(
308
low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
309
workspace.RunNetOnce(m.param_init_net)
310
workspace.FeedBlob("name_x/data", data)
311
workspace.FeedBlob("name_x/label", label)
312
workspace.RunNetOnce(m.net)
313
loss1 = workspace.FetchBlob("name_x/loss1")
314
loss2 = workspace.FetchBlob("name_x/loss2")
315
grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
316
workspace.FeedBlob(str(input_to_grad["name_x/fc1_w"]), np.array([0.0]))
318
workspace.RunNetOnce(optim_proto)
319
optimized_loss1 = workspace.FetchBlob("name_x/loss1")
320
optimized_loss2 = workspace.FetchBlob("name_x/loss2")
321
optimized_grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
322
np.testing.assert_almost_equal(loss1, optimized_loss1)
323
np.testing.assert_almost_equal(loss2, optimized_loss2)
324
np.testing.assert_almost_equal(grad, optimized_grad)
326
@given(input_dim=st.integers(min_value=4, max_value=4),
327
output_dim=st.integers(min_value=4, max_value=4),
328
batch_size=st.integers(min_value=4, max_value=4))
329
@settings(deadline=1000)
330
def test_forward_optim_tree_daggy(self, input_dim, output_dim, batch_size):
331
m = model_helper.ModelHelper()
332
m.Proto().type = "dag"
333
m.Proto().num_workers = 4
335
with core.NameScope("name_x"):
336
fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
337
fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
339
fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
340
fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
341
fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
344
fc3b = brew.fc(m, fc2, "fc3b", dim_in=output_dim, dim_out=output_dim)
345
fc4b = brew.fc(m, fc3b, "fc4b", dim_in=output_dim, dim_out=output_dim)
346
fc5b = brew.fc(m, fc4b, "fc5b", dim_in=output_dim, dim_out=output_dim)
348
fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum")
350
fc5.Relu([], fc5sum) \
351
.Softmax([], "pred1") \
352
.LabelCrossEntropy(["label"], ["xent1"]) \
353
.AveragedLoss([], "loss1")
354
fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
356
.Softmax([], "pred2") \
357
.LabelCrossEntropy(["label"], ["xent2"]) \
358
.AveragedLoss([], "loss2")
360
blobs_before = count_blobs(m.net.Proto())
361
optim_proto = memonger.optimize_inference_for_dag(
362
m.net, ["name_x/data"], "name_x"
364
blobs_after = count_blobs(optim_proto)
365
self.assertLess(blobs_after, blobs_before)
368
data = np.random.randn(batch_size, input_dim).astype(np.float32)
369
label = np.random.randint(
370
low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
371
workspace.RunNetOnce(m.param_init_net)
372
workspace.FeedBlob("name_x/data", data)
373
workspace.FeedBlob("name_x/label", label)
374
workspace.RunNetOnce(m.net)
375
loss1 = workspace.FetchBlob("name_x/loss1")
376
loss2 = workspace.FetchBlob("name_x/loss2")
377
workspace.RunNetOnce(optim_proto)
378
optimized_loss1 = workspace.FetchBlob("name_x/loss1")
379
optimized_loss2 = workspace.FetchBlob("name_x/loss2")
380
np.testing.assert_almost_equal(loss1, optimized_loss1)
381
np.testing.assert_almost_equal(loss2, optimized_loss2)
383
@given(input_dim=st.integers(min_value=4, max_value=4),
384
output_dim=st.integers(min_value=4, max_value=4),
385
batch_size=st.integers(min_value=4, max_value=4))
386
@settings(deadline=10000)
387
def test_forward_optim_tree_harder(self, input_dim, output_dim, batch_size):
388
m = model_helper.ModelHelper()
389
m.net.Proto().type = "dag"
390
m.net.Proto().num_workers = 4
391
m.net.AddExternalInput("label")
392
m.net.AddExternalInput("data")
394
with core.NameScope("name_x"):
395
fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
396
fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
398
fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
399
fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
400
fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
403
fc3b = brew.fc(m, fc2, "fc3b", dim_in=output_dim, dim_out=output_dim)
404
fc4b = brew.fc(m, fc3b, "fc4b", dim_in=output_dim, dim_out=output_dim)
405
fc5b = brew.fc(m, fc4b, "fc5b", dim_in=output_dim, dim_out=output_dim)
407
fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum")
408
fc5sum.Relu([], "relu1") \
409
.Softmax([], "pred1") \
410
.LabelCrossEntropy(["label"], ["xent1"]) \
411
.AveragedLoss([], "loss1")
412
fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
414
.Softmax([], "pred2") \
415
.LabelCrossEntropy(["label"], ["xent2"]) \
416
.AveragedLoss([], "loss2")
418
blobs_before = count_blobs(m.net.Proto())
419
optim_proto = memonger.optimize_inference_for_dag(
420
m.net, ["name_x/data"], "name_x/"
423
blobs_after = count_blobs(optim_proto)
427
optim_proto_extra_input = memonger.optimize_inference_for_dag(
428
m.net, ["name_x/data", "name_x/fc1_w"], "name_x/"
430
blobs_after_extra_input = count_blobs(optim_proto_extra_input)
431
self.assertEqual(blobs_after, blobs_after_extra_input)
434
print(str(optim_proto))
435
self.assertLess(blobs_after, blobs_before)
438
data = np.random.randn(batch_size, input_dim).astype(np.float32)
439
label = np.random.randint(
440
low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
441
workspace.RunNetOnce(m.param_init_net)
442
workspace.FeedBlob("name_x/data", data)
443
workspace.FeedBlob("name_x/label", label)
444
workspace.RunNetOnce(m.net)
445
loss1 = workspace.FetchBlob("name_x/loss1")
446
loss2 = workspace.FetchBlob("name_x/loss2")
447
workspace.RunNetOnce(optim_proto)
448
optimized_loss1 = workspace.FetchBlob("name_x/loss1")
449
optimized_loss2 = workspace.FetchBlob("name_x/loss2")
450
np.testing.assert_almost_equal(loss1, optimized_loss1)
451
np.testing.assert_almost_equal(loss2, optimized_loss2)
455
@settings(deadline=10000)
456
def test_forward_optim_tree_dag_traversal(self):
461
m = model_helper.ModelHelper()
462
m.Proto().type = "dag"
463
m.Proto().num_workers = 4
465
with core.NameScope("name_x"):
466
fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
467
fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
469
fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
470
fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
471
fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
474
fc3b = brew.fc(m, fc2, "fc3b", dim_in=output_dim, dim_out=output_dim)
475
fc4b = brew.fc(m, fc3b, "fc4b", dim_in=output_dim, dim_out=output_dim)
476
fc5b = brew.fc(m, fc4b, "fc5b", dim_in=output_dim, dim_out=output_dim)
478
fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum")
480
fc5.Relu([], fc5sum) \
481
.Softmax([], "pred1") \
482
.LabelCrossEntropy(["label"], ["xent1"]) \
483
.AveragedLoss([], "loss1")
484
fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
486
.Softmax([], "pred2") \
487
.LabelCrossEntropy(["label"], ["xent2"]) \
488
.AveragedLoss([], "loss2")
490
blobs_before = count_blobs(m.net.Proto())
493
optim_proto = memonger.optimize_inference_for_dag(
494
m.net, ["name_x/fc5_w", "name_x/data"], "name_x"
496
blobs_after = count_blobs(optim_proto)
497
self.assertLess(blobs_after, blobs_before)
500
def test_forward_optim_tree_enforce_inplace_op_invalid(self):
501
m = model_helper.ModelHelper()
502
m.Proto().type = "dag"
503
m.Proto().num_workers = 4
506
net.IndexFreeze("A", "B")
507
net.Sum(["B", "B"], "C")
509
net.Sum(["D", "D"], "E")
511
with self.assertRaises(RuntimeError):
512
memonger.optimize_inference_for_dag(net, ["A"], "")
517
def test_forward_optim_tree_enforce_inplace_op_valid_and_as_head(self):
518
m = model_helper.ModelHelper()
519
m.Proto().type = "dag"
520
m.Proto().num_workers = 4
523
net.IndexFreeze("A", "A")
524
net.Sum(["A", "A"], "B")
527
net.Sum(["D", "D"], "E")
529
blobs_before = count_blobs(m.net.Proto())
530
optim_proto = memonger.optimize_inference_for_dag(
533
blobs_after = count_blobs(optim_proto)
534
self.assertLess(blobs_after, blobs_before)
537
from caffe2.python import rnn_cell
539
model = model_helper.ModelHelper()
540
seq_lengths, labels = \
541
model.net.AddExternalInputs(
542
'seq_lengths', 'labels',
546
hidden_init, cell_init = model.net.AddExternalInputs(
547
"hidden_init_{}".format(i),
548
"cell_init_{}".format(i)
550
init_blobs.extend([hidden_init, cell_init])
551
model.param_init_net.ConstantFill([], ["input"], shape=[T, 4, 10])
552
output, last_hidden, _, last_state = rnn_cell.LSTM(
555
seq_lengths=seq_lengths,
556
initial_states=init_blobs,
562
return_last_layer_only=True,
564
softmax, loss = model.net.SoftmaxWithLoss(
565
[model.Flatten(output), "labels"],
569
model.AddGradientOperators([loss])
570
blobs_before = count_blobs(model.net.Proto())
571
optim_proto = memonger.share_grad_blobs(
574
set(model.param_to_grad.values()),
576
share_activations=True,
577
dont_share_blobs=set(),
579
blobs_after = count_blobs(optim_proto)
580
self.assertLess(blobs_after, blobs_before)
583
for init_blob in init_blobs:
584
workspace.FeedBlob(init_blob, np.zeros(
585
[1, 4, 10], dtype=np.float32
587
workspace.FeedBlob("seq_lengths", np.array([T] * 4, dtype=np.int32))
588
workspace.FeedBlob("labels", np.random.rand(T).astype(np.int32))
590
workspace.RunNetOnce(model.param_init_net)
591
workspace.RunNetOnce(model.net)
593
def test_compute_interference_graph_inplace_ops(self):
594
m = model_helper.ModelHelper()
598
g = memonger.compute_interference_graph(m.net.Proto().op)
599
self.assertEqual(list(g.edges()), [(0, 1), (0, 2), (1, 2)])
601
def test_topological_sort_longest_path(self):
602
m = model_helper.ModelHelper()
604
m.Copy("conv0_w_comp", "conv0_w")
606
conv0 = brew.conv(m, "data", "conv0", 32, 32, 4)
608
m.Copy("conv2_w", "conv2_w")
610
brew.conv(m, conv0, "conv2", 16, 32, 4)
612
g = memonger.compute_interference_graph(m.net.Proto().op)
614
orders_org = memonger.topological_sort_traversal(g)
615
orders_gt_org = [2, 0, 1, 3]
616
self.assertEqual(orders_gt_org, list(orders_org))
618
orders = memonger.topological_sort_traversal_longest_path(g)
620
orders_gt = [0, 1, 2, 3]
621
self.assertEqual(orders_gt, list(orders))
623
def test_topological_sort_longest_path_multi_target(self):
625
m = model_helper.ModelHelper()
627
m.Copy("conv0_w_comp", "conv0_w")
629
conv0 = brew.conv(m, "data", "conv0", 32, 32, 4)
631
m.Copy("conv2_w", "conv2_w")
633
brew.conv(m, conv0, "conv2", 16, 32, 4)
635
m.Copy("data1", "data2")
637
m.Copy("data2", "data3")
639
g = memonger.compute_interference_graph(m.net.Proto().op)
641
orders_org = memonger.topological_sort_traversal(g)
642
orders_gt_org = [4, 5, 2, 0, 1, 3]
643
self.assertEqual(orders_gt_org, list(orders_org))
645
orders = memonger.topological_sort_traversal_longest_path(g)
647
orders_gt = [0, 1, 2, 3, 4, 5]
648
self.assertEqual(orders_gt, list(orders))
650
def test_topological_sort_longest_path_single_node(self):
652
m = model_helper.ModelHelper()
654
m.Copy("conv0_w_comp", "conv0_w")
656
g = memonger.compute_interference_graph(m.net.Proto().op)
658
orders_org = memonger.topological_sort_traversal(g)
660
self.assertEqual(orders_gt_org, list(orders_org))
662
orders = memonger.topological_sort_traversal_longest_path(g)
665
self.assertEqual(orders_gt, list(orders))
667
def test_compute_assignments_greedy(self):
668
LiveRange = memonger.LiveRange
670
('b1', LiveRange(1, 3, 10)),
671
('b2', LiveRange(3, 4, 1)),
672
('b3', LiveRange(5, 6, 1)),
673
('b4', LiveRange(5, 7, 10)),
676
[ranges_sorted[0], ranges_sorted[3]],
677
[ranges_sorted[1], ranges_sorted[2]],
680
best = memonger.compute_assignments_greedy(ranges_sorted, None)
681
self.assertEqual(memonger.get_memory_usage(best), 11)
682
self.assertEqual(best, assignment_gt)
684
def test_compute_assignments_dp(self):
685
LiveRange = memonger.LiveRange
687
('b1', LiveRange(1, 3, 10)),
688
('b2', LiveRange(3, 4, 1)),
689
('b3', LiveRange(5, 6, 1)),
690
('b4', LiveRange(5, 7, 10)),
693
best = memonger.compute_assignments_dp(ranges_sorted, None)
694
self.assertEqual(memonger.get_memory_usage(best), 11)
696
def test_compute_assignments_dp1(self):
697
LiveRange = memonger.LiveRange
699
('b1', LiveRange(1, 2, 10)),
700
('b2', LiveRange(4, 6, 1)),
701
('b3', LiveRange(5, 6, 10)),
704
best = memonger.compute_assignments_dp(ranges_sorted, [])
705
self.assertEqual(memonger.get_memory_usage(best), 11)
707
@given(input_dim=st.integers(min_value=4, max_value=4),
708
output_dim=st.integers(min_value=4, max_value=4),
709
batch_size=st.integers(min_value=4, max_value=4))
710
def test_verify_graph_equality(self, input_dim, output_dim, batch_size):
711
m = model_helper.ModelHelper()
712
m.Proto().type = "dag"
713
m.Proto().num_workers = 4
714
with core.NameScope("name_x"):
715
fc1 = brew.fc(m, "data", "x", dim_in=input_dim, dim_out=output_dim)
716
fc2 = brew.fc(m, fc1, "y", dim_in=output_dim, dim_out=output_dim)
717
fc3 = brew.fc(m, fc1, "z", dim_in=output_dim, dim_out=output_dim)
718
brew.sum(m, [fc2, fc3], "out")
720
m2 = model_helper.ModelHelper()
721
m2.Proto().type = "dag"
722
m2.Proto().num_workers = 4
723
with core.NameScope("name_x"):
724
fc1 = brew.fc(m2, "data", "other_x", dim_in=input_dim, dim_out=output_dim)
725
fc2 = brew.fc(m2, fc1, "other_y", dim_in=output_dim, dim_out=output_dim)
726
fc3 = brew.fc(m2, fc1, "other_z", dim_in=output_dim, dim_out=output_dim)
727
brew.sum(m2, [fc2, fc3], "out")
729
self.assertTrue(memonger.verify_graph_equality(m.net.Proto(), m2.net.Proto()))
731
@given(input_dim=st.integers(min_value=4, max_value=4),
732
output_dim=st.integers(min_value=4, max_value=4),
733
batch_size=st.integers(min_value=4, max_value=4))
734
def test_verify_graph_equality_harder(self, input_dim, output_dim, batch_size):
735
m = model_helper.ModelHelper()
736
m.Proto().type = "dag"
737
m.Proto().num_workers = 4
738
with core.NameScope("name_x"):
739
fc1 = brew.fc(m, "data", "x", dim_in=input_dim, dim_out=output_dim)
740
fc2a = brew.fc(m, fc1, "y", dim_in=output_dim, dim_out=output_dim)
741
fc2b = brew.fc(m, fc1, "z", dim_in=output_dim, dim_out=output_dim)
742
fc3a = brew.fc(m, fc2a, "u", dim_in=output_dim, dim_out=output_dim)
743
fc3b = brew.fc(m, fc2b, "v", dim_in=output_dim, dim_out=output_dim)
744
brew.sum(m, [fc3a, fc3b], "out")
746
m2 = model_helper.ModelHelper()
747
m2.Proto().type = "dag"
748
m2.Proto().num_workers = 4
749
with core.NameScope("name_x"):
750
fc1 = brew.fc(m2, "data", "x", dim_in=input_dim, dim_out=output_dim)
751
fc2a = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
752
fc2b = brew.fc(m2, fc1, "z", dim_in=output_dim, dim_out=output_dim)
753
fc3a = brew.fc(m2, fc2a, "y", dim_in=output_dim, dim_out=output_dim)
754
fc3b = brew.fc(m2, fc2b, "z", dim_in=output_dim, dim_out=output_dim)
755
brew.sum(m2, [fc3a, fc3b], "out")
757
self.assertTrue(memonger.verify_graph_equality(m.net.Proto(), m2.net.Proto()))
759
@given(input_dim=st.integers(min_value=4, max_value=4),
760
output_dim=st.integers(min_value=4, max_value=4),
761
batch_size=st.integers(min_value=4, max_value=4))
762
def test_verify_graph_inequality(self, input_dim, output_dim, batch_size):
763
m = model_helper.ModelHelper()
764
m.Proto().type = "dag"
765
m.Proto().num_workers = 4
766
with core.NameScope("name_x"):
767
fc1 = brew.fc(m, "data", "x", dim_in=input_dim, dim_out=output_dim)
768
fc2 = brew.fc(m, fc1, "y", dim_in=output_dim, dim_out=output_dim)
769
fc3 = brew.fc(m, fc1, "z", dim_in=output_dim, dim_out=output_dim)
770
brew.sum(m, [fc2, fc3], "out")
772
m2 = model_helper.ModelHelper()
773
m2.Proto().type = "dag"
774
m2.Proto().num_workers = 4
775
with core.NameScope("name_x"):
776
fc1 = brew.fc(m2, "data", "x", dim_in=input_dim, dim_out=output_dim)
777
fc2 = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
778
fc3 = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
779
brew.sum(m2, [fc2, fc3], "out")
781
self.assertFalse(memonger.verify_graph_equality(m.net.Proto(), m2.net.Proto()))
783
@given(input_dim=st.integers(min_value=4, max_value=4),
784
output_dim=st.integers(min_value=4, max_value=4),
785
batch_size=st.integers(min_value=4, max_value=4))
786
def test_verify_graph_inequality_harder(self, input_dim, output_dim, batch_size):
787
m = model_helper.ModelHelper()
788
m.Proto().type = "dag"
789
m.Proto().num_workers = 4
790
with core.NameScope("name_x"):
791
fc1 = brew.fc(m, "data", "x", dim_in=input_dim, dim_out=output_dim)
792
fc2a = brew.fc(m, fc1, "y", dim_in=output_dim, dim_out=output_dim)
793
fc2b = brew.fc(m, fc1, "z", dim_in=output_dim, dim_out=output_dim)
794
fc3a = brew.fc(m, fc2a, "u", dim_in=output_dim, dim_out=output_dim)
795
fc3b = brew.fc(m, fc2b, "v", dim_in=output_dim, dim_out=output_dim)
796
brew.sum(m, [fc3a, fc3b], "out")
798
m2 = model_helper.ModelHelper()
799
m2.Proto().type = "dag"
800
m2.Proto().num_workers = 4
801
with core.NameScope("name_x"):
802
fc1 = brew.fc(m2, "data", "x", dim_in=input_dim, dim_out=output_dim)
803
fc2a = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
804
fc2b = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
805
fc3a = brew.fc(m2, fc2a, "u", dim_in=output_dim, dim_out=output_dim)
806
fc3b = brew.fc(m2, fc2b, "v", dim_in=output_dim, dim_out=output_dim)
807
brew.sum(m2, [fc3a, fc3b], "out")
809
self.assertFalse(memonger.verify_graph_equality(m.net.Proto(), m2.net.Proto()))
811
def test_release_blobs_when_used(self):
812
m = model_helper.ModelHelper()
813
fc1 = brew.fc(m, "data", "x", dim_in=2, dim_out=2)
814
fc2 = brew.fc(m, fc1, "y", dim_in=2, dim_out=2)
815
fc3 = brew.fc(m, fc1, "z", dim_in=2, dim_out=2)
816
fc4 = brew.fc(m, fc2, "u", dim_in=2, dim_out=2)
817
m.net.Alias(["u"], ["u_alias"])
819
brew.sum(m, [fc3, fc4], "out")
821
with_frees = memonger.release_blobs_when_used(m.net.Proto(), set("data"))
823
expect_frees = {"x", "y", "z"}
826
for op in with_frees.op:
827
if op.type == "Free":
828
self.assertFalse(op.input[0] in found_frees)
829
found_frees.add(op.input[0])
833
self.assertFalse(inp in found_frees)
834
for outp in op.output:
835
self.assertFalse(outp in found_frees)
837
self.assertEqual(expect_frees, found_frees)
840
if __name__ == '__main__':