pytorch

memonger_test.py
841 строка · 36.0 Кб
Перенос по словам
1
import numpy as np
2

3
from caffe2.python import workspace, memonger, core, model_helper, brew
4
from caffe2.proto import caffe2_pb2
5
import caffe2.python.hypothesis_test_util as hu
6
import hypothesis.strategies as st
7
from hypothesis import given, settings
8
import unittest
9

10

11
def has_blob(proto, needle):
12
    for op in proto.op:
13
        for inp in op.input:
14
            if inp == needle:
15
                return True
16
        for outp in op.output:
17
            if outp == needle:
18
                return True
19
    return False
20

21

22
def count_blobs(proto):
23
    blobs = set()
24
    for op in proto.op:
25
        blobs = blobs.union(set(op.input)).union(set(op.output))
26
    return len(blobs)
27

28

29
class MemongerTest(hu.HypothesisTestCase):
30
    @given(input_dim=st.integers(min_value=1, max_value=10),
31
           output_dim=st.integers(min_value=1, max_value=10),
32
           batch_size=st.integers(min_value=1, max_value=10),
33
           do=st.sampled_from(hu.device_options),
34
           algo=st.sampled_from(memonger.AssignmentAlgorithm))
35
    @settings(max_examples=5, deadline=None)
36
    def test_simple_memonger(self, input_dim, output_dim, batch_size, do, algo):
37
        m = model_helper.ModelHelper()
38
        fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
39
        fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
40
        fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
41

42
        fc3.Relu([], fc3)\
43
           .Softmax([], "pred") \
44
           .LabelCrossEntropy(["label"], ["xent"]) \
45
           .AveragedLoss([], "loss")
46
        input_to_grad = m.AddGradientOperators(["loss"])
47
        m.net.Proto().device_option.CopyFrom(do)
48
        m.param_init_net.Proto().device_option.CopyFrom(do)
49
        static_blobs = \
50
            [o for op in m.param_init_net.Proto().op for o in op.output] + \
51
            ["data", "label", "loss", input_to_grad["fc1_w"]]
52

53
        optimization = memonger.optimize_interference(
54
            m.Proto(), static_blobs, algo=algo)
55
        data = np.random.randn(batch_size, input_dim).astype(np.float32)
56
        label = np.random.randint(
57
            low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
58
        workspace.RunNetOnce(m.param_init_net)
59
        workspace.FeedBlob("data", data, device_option=do)
60
        workspace.FeedBlob("label", label, device_option=do)
61
        workspace.RunNetOnce(m.net)
62
        loss = workspace.FetchBlob("loss")
63
        grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
64
        workspace.RunNetOnce(optimization.net)
65
        optimized_loss = workspace.FetchBlob("loss")
66
        optimized_grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
67
        np.testing.assert_almost_equal(loss, optimized_loss)
68
        np.testing.assert_almost_equal(grad, optimized_grad)
69
        stats = memonger.compute_statistics(optimization.assignments)
70
        self.assertLess(stats.optimized_nbytes, stats.baseline_nbytes)
71

72
        # run with blob sizes
73
        blob_sizes = memonger.collect_blob_sizes(m.Proto())
74
        optimization1 = memonger.optimize_interference(
75
            m.Proto(), static_blobs, blob_sizes=blob_sizes, algo=algo)
76
        workspace.RunNetOnce(optimization1.net)
77
        optimized_loss = workspace.FetchBlob("loss")
78
        optimized_grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
79
        np.testing.assert_almost_equal(loss, optimized_loss)
80
        np.testing.assert_almost_equal(grad, optimized_grad)
81
        stats = memonger.compute_statistics(optimization1.assignments)
82
        self.assertLessEqual(stats.optimized_nbytes, stats.baseline_nbytes)
83

84
    @given(input_dim=st.integers(min_value=1, max_value=10),
85
           output_dim=st.integers(min_value=1, max_value=10),
86
           batch_size=st.integers(min_value=1, max_value=10),
87
           do=st.sampled_from(hu.device_options))
88
    @settings(max_examples=5, deadline=None)
89
    def test_fast_memonger(self, input_dim, output_dim, batch_size, do):
90
        m = model_helper.ModelHelper()
91
        fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
92
        fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
93
        fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
94

95
        fc3.Relu([], fc3)\
96
           .Softmax([], "pred") \
97
           .LabelCrossEntropy(["label"], ["xent"]) \
98
           .AveragedLoss([], "loss")
99
        input_to_grad = m.AddGradientOperators(["loss"])
100
        m.net.Proto().device_option.CopyFrom(do)
101
        m.param_init_net.Proto().device_option.CopyFrom(do)
102
        static_blobs = \
103
            [o for op in m.param_init_net.Proto().op for o in op.output] + \
104
            ["data", "label", "loss", input_to_grad["fc1_w"]]
105

106
        optimized_net = memonger.optimize_inference_fast(
107
            m.Proto(), static_blobs)
108
        data = np.random.randn(batch_size, input_dim).astype(np.float32)
109
        label = np.random.randint(
110
            low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
111
        workspace.RunNetOnce(m.param_init_net)
112
        workspace.FeedBlob("data", data, device_option=do)
113
        workspace.FeedBlob("label", label, device_option=do)
114
        workspace.RunNetOnce(m.net)
115
        loss = workspace.FetchBlob("loss")
116
        grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
117
        workspace.RunNetOnce(optimized_net)
118
        optimized_loss = workspace.FetchBlob("loss")
119
        optimized_grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
120
        np.testing.assert_almost_equal(loss, optimized_loss)
121
        np.testing.assert_almost_equal(grad, optimized_grad)
122

123
        self.assertLess(count_blobs(optimized_net), count_blobs(m.Proto()))
124

125
    def test_fast_memonger_unique_outputs(self):
126
        m = model_helper.ModelHelper()
127
        fc = []
128
        for i in range(2):
129
            z = brew.fc(
130
                m, "data{}".format(i), "fc".format(i), dim_in=2, dim_out=2)
131
            fc.append(z)
132
        r = []
133
        # Trick is here to have same input appear twice in a same Sum
134
        for x in fc:
135
            for y in fc:
136
                r.append(brew.sum(m, [x, y], 1))
137
        concated = brew.concat(m, r, "concated")
138
        brew.relu(m, concated, "merged")
139

140
        static_blobs = \
141
            [o for op in m.param_init_net.Proto().op for o in op.output] + \
142
            ["merged"] + ["data{}".format(i) for i in range(len(fc))]
143

144
        optimized_net = memonger.optimize_inference_fast(
145
            m.Proto(), static_blobs)
146
        for op in optimized_net.op:
147
            self.assertEqual(len(op.output), len(set(op.output)), str(op))
148

149
    @given(input_dim=st.integers(min_value=1, max_value=4),
150
           output_dim=st.integers(min_value=1, max_value=4),
151
           batch_size=st.integers(min_value=1, max_value=4))
152
    def test_gradient_optim(self, input_dim, output_dim, batch_size):
153
        m = model_helper.ModelHelper()
154
        with core.NameScope("name_x"):
155
            fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
156
            fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
157
            fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
158
            fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
159
            fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
160
            fc5.Relu([], fc5)\
161
               .Softmax([], "pred") \
162
               .LabelCrossEntropy(["label"], ["xent"]) \
163
               .AveragedLoss([], "loss")
164
        input_to_grad = m.AddGradientOperators(["name_x/loss"])
165

166
        blobs_before = count_blobs(m.net.Proto())
167
        optim_proto = memonger.share_grad_blobs(
168
            m.net,
169
            ["name_x/loss"],
170
            set(m.param_to_grad.values()),
171
            "name_x/",
172
            share_activations=False,
173
        )
174
        blobs_after = count_blobs(optim_proto)
175
        self.assertLess(blobs_after, blobs_before)
176

177
        optim_proto_wacts = memonger.share_grad_blobs(
178
            m.net,
179
            ["name_x/loss"],
180
            set(m.param_to_grad.values()),
181
            "name_x/",
182
            share_activations=True,
183
            dont_share_blobs=set([str(input_to_grad["name_x/fc1_w"])]),
184
        )
185
        blobs_wact_optim = count_blobs(optim_proto_wacts)
186
        self.assertLessEqual(blobs_wact_optim, blobs_after)
187

188
        # Check that the last activations are not shared
189
        self.assertTrue(has_blob(optim_proto, "name_x/fc5"))
190
        self.assertTrue(
191
            has_blob(optim_proto_wacts, "name_x/fc5"),
192
            "Dont remap final activation",
193
        )
194

195
        # Test networks produce exactly same gradients
196
        data = np.random.randn(batch_size, input_dim).astype(np.float32)
197
        label = np.random.randint(
198
            low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
199
        workspace.RunNetOnce(m.param_init_net)
200
        workspace.FeedBlob("name_x/data", data)
201
        workspace.FeedBlob("name_x/label", label)
202
        workspace.RunNetOnce(m.net)
203
        loss = workspace.FetchBlob("name_x/loss")
204
        grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
205
        workspace.RunNetOnce(optim_proto)
206
        optimized_loss = workspace.FetchBlob("name_x/loss")
207
        optimized_grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
208
        np.testing.assert_almost_equal(loss, optimized_loss)
209
        np.testing.assert_almost_equal(grad, optimized_grad)
210

211
        workspace.FeedBlob(str(input_to_grad["name_x/fc1_w"]), np.array([0.0]))
212

213
        # Run with the forward optimization
214
        workspace.RunNetOnce(optim_proto_wacts)
215
        optimized_loss = workspace.FetchBlob("name_x/loss")
216
        optimized_grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
217
        np.testing.assert_almost_equal(loss, optimized_loss)
218
        np.testing.assert_almost_equal(grad, optimized_grad)
219

220
    @unittest.skipIf(not workspace.has_gpu_support, "No gpu support.")
221
    def test_memonger_mix_cpu_gpu(self):
222
        '''
223
        Check that memonger does not make blobs cross CPU/GPU boundary
224
        '''
225
        m = model_helper.ModelHelper()
226
        with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType, 0)):
227
            fc1 = brew.fc(m, "data", "fc1", dim_in=2, dim_out=2)
228
            fc2 = brew.fc(m, fc1, "fc2", dim_in=2, dim_out=2)
229
            fc3 = brew.fc(m, fc2, "fc3", dim_in=2, dim_out=2)
230
            fc4 = brew.fc(m, fc3, "fc4", dim_in=2, dim_out=2)
231
            fc4_cpu = m.net.CopyGPUToCPU(fc4, "fc4_cpu")
232
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)):
233
            fc5_cpu = brew.fc(m, fc4_cpu, "fc5_cpu", dim_in=2, dim_out=2)
234
            fc6_cpu = brew.fc(m, fc5_cpu, "fc6_cpu", dim_in=2, dim_out=2)
235
            fc7_cpu = brew.fc(m, fc6_cpu, "fc7_cpu", dim_in=2, dim_out=2)
236
            fc7_cpu.Relu([], fc7_cpu) \
237
               .Softmax([], "pred") \
238
               .LabelCrossEntropy(["label"], ["xent"]) \
239
               .AveragedLoss([], "loss")
240
        m.AddGradientOperators(["loss"])
241

242
        blobs_before = count_blobs(m.net.Proto())
243
        optim_proto = memonger.share_grad_blobs(
244
            m.net,
245
            ["loss"],
246
            set(m.param_to_grad.values()),
247
            "",
248
            share_activations=True,
249
            dont_share_blobs=set(),
250
        )
251
        blobs_after = count_blobs(optim_proto)
252
        self.assertLess(blobs_after, blobs_before)
253

254
        # Create set of blobs on CPU side and GPU side and check they don't
255
        # overlap
256
        device_blobs = {caffe2_pb2.CPU: set(), workspace.GpuDeviceType: set()}
257
        for op in optim_proto.op:
258
            if op.type not in ['CopyCPUToGPU', "CopyGPUToCPU"]:
259
                dev = op.device_option.device_type
260
                for b in list(op.input) + list(op.output):
261
                    device_blobs[dev].add(b)
262

263
        device_crossers = device_blobs[caffe2_pb2.CPU].intersection(
264
            device_blobs[workspace.GpuDeviceType]
265
        )
266
        self.assertEqual(device_crossers, set())
267

268
    @given(input_dim=st.integers(min_value=4, max_value=4),
269
           output_dim=st.integers(min_value=4, max_value=4),
270
           batch_size=st.integers(min_value=4, max_value=4))
271
    @settings(deadline=1000)
272
    def test_gradient_optim_tree(self, input_dim, output_dim, batch_size):
273
        m = model_helper.ModelHelper()
274
        with core.NameScope("name_x"):
275
            fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
276
            fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
277
            fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
278
            fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
279
            fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
280
            fc5.Relu([], fc5) \
281
               .Softmax([], "pred1") \
282
               .LabelCrossEntropy(["label"], ["xent1"]) \
283
               .AveragedLoss([], "loss1")
284
            fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
285
            fc6.Relu([], fc6) \
286
               .Softmax([], "pred2") \
287
               .LabelCrossEntropy(["label"], ["xent2"]) \
288
               .AveragedLoss([], "loss2")
289
        input_to_grad = m.AddGradientOperators(["name_x/loss1", "name_x/loss2"])
290

291
        blobs_before = count_blobs(m.net.Proto())
292
        optim_proto = memonger.share_grad_blobs(
293
            m.net,
294
            ["name_x/loss1", "name_x/loss2"],
295
            set(m.param_to_grad.values()),
296
            "name_x",  # "name_x//shared_gradinp_0_shared" if using "name_x/"
297
            share_activations=True,
298
            dont_share_blobs=set(['name_x/fc6', 'name_x/fc5',
299
                                   str(input_to_grad["name_x/fc1_w"])]),
300
        )
301
        blobs_after = count_blobs(optim_proto)
302
        self.assertLess(blobs_after, blobs_before)
303
        self.assertTrue(has_blob(optim_proto, "name_x/fc6"))
304

305
        # Test networks produce exactly same gradients
306
        data = np.random.randn(batch_size, input_dim).astype(np.float32)
307
        label = np.random.randint(
308
            low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
309
        workspace.RunNetOnce(m.param_init_net)
310
        workspace.FeedBlob("name_x/data", data)
311
        workspace.FeedBlob("name_x/label", label)
312
        workspace.RunNetOnce(m.net)
313
        loss1 = workspace.FetchBlob("name_x/loss1")
314
        loss2 = workspace.FetchBlob("name_x/loss2")
315
        grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
316
        workspace.FeedBlob(str(input_to_grad["name_x/fc1_w"]), np.array([0.0]))
317

318
        workspace.RunNetOnce(optim_proto)
319
        optimized_loss1 = workspace.FetchBlob("name_x/loss1")
320
        optimized_loss2 = workspace.FetchBlob("name_x/loss2")
321
        optimized_grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
322
        np.testing.assert_almost_equal(loss1, optimized_loss1)
323
        np.testing.assert_almost_equal(loss2, optimized_loss2)
324
        np.testing.assert_almost_equal(grad, optimized_grad)
325

326
    @given(input_dim=st.integers(min_value=4, max_value=4),
327
           output_dim=st.integers(min_value=4, max_value=4),
328
           batch_size=st.integers(min_value=4, max_value=4))
329
    @settings(deadline=1000)
330
    def test_forward_optim_tree_daggy(self, input_dim, output_dim, batch_size):
331
        m = model_helper.ModelHelper()
332
        m.Proto().type = "dag"
333
        m.Proto().num_workers = 4
334

335
        with core.NameScope("name_x"):
336
            fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
337
            fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
338

339
            fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
340
            fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
341
            fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
342

343
            # Branch
344
            fc3b = brew.fc(m, fc2, "fc3b", dim_in=output_dim, dim_out=output_dim)
345
            fc4b = brew.fc(m, fc3b, "fc4b", dim_in=output_dim, dim_out=output_dim)
346
            fc5b = brew.fc(m, fc4b, "fc5b", dim_in=output_dim, dim_out=output_dim)
347

348
            fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum")
349

350
            fc5.Relu([], fc5sum) \
351
               .Softmax([], "pred1") \
352
               .LabelCrossEntropy(["label"], ["xent1"]) \
353
               .AveragedLoss([], "loss1")
354
            fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
355
            fc6.Relu([], fc6) \
356
               .Softmax([], "pred2") \
357
               .LabelCrossEntropy(["label"], ["xent2"]) \
358
               .AveragedLoss([], "loss2")
359

360
        blobs_before = count_blobs(m.net.Proto())
361
        optim_proto = memonger.optimize_inference_for_dag(
362
            m.net, ["name_x/data"], "name_x"
363
        )
364
        blobs_after = count_blobs(optim_proto)
365
        self.assertLess(blobs_after, blobs_before)
366

367
        # Test networks produce exactly same results
368
        data = np.random.randn(batch_size, input_dim).astype(np.float32)
369
        label = np.random.randint(
370
            low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
371
        workspace.RunNetOnce(m.param_init_net)
372
        workspace.FeedBlob("name_x/data", data)
373
        workspace.FeedBlob("name_x/label", label)
374
        workspace.RunNetOnce(m.net)
375
        loss1 = workspace.FetchBlob("name_x/loss1")
376
        loss2 = workspace.FetchBlob("name_x/loss2")
377
        workspace.RunNetOnce(optim_proto)
378
        optimized_loss1 = workspace.FetchBlob("name_x/loss1")
379
        optimized_loss2 = workspace.FetchBlob("name_x/loss2")
380
        np.testing.assert_almost_equal(loss1, optimized_loss1)
381
        np.testing.assert_almost_equal(loss2, optimized_loss2)
382

383
    @given(input_dim=st.integers(min_value=4, max_value=4),
384
           output_dim=st.integers(min_value=4, max_value=4),
385
           batch_size=st.integers(min_value=4, max_value=4))
386
    @settings(deadline=10000)
387
    def test_forward_optim_tree_harder(self, input_dim, output_dim, batch_size):
388
        m = model_helper.ModelHelper()
389
        m.net.Proto().type = "dag"
390
        m.net.Proto().num_workers = 4
391
        m.net.AddExternalInput("label")
392
        m.net.AddExternalInput("data")
393

394
        with core.NameScope("name_x"):
395
            fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
396
            fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
397

398
            fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
399
            fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
400
            fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
401

402
            # Branch
403
            fc3b = brew.fc(m, fc2, "fc3b", dim_in=output_dim, dim_out=output_dim)
404
            fc4b = brew.fc(m, fc3b, "fc4b", dim_in=output_dim, dim_out=output_dim)
405
            fc5b = brew.fc(m, fc4b, "fc5b", dim_in=output_dim, dim_out=output_dim)
406

407
            fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum")
408
            fc5sum.Relu([], "relu1") \
409
               .Softmax([], "pred1") \
410
               .LabelCrossEntropy(["label"], ["xent1"]) \
411
               .AveragedLoss([], "loss1")
412
            fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
413
            fc6.Relu([], fc6) \
414
               .Softmax([], "pred2") \
415
               .LabelCrossEntropy(["label"], ["xent2"]) \
416
               .AveragedLoss([], "loss2")
417

418
        blobs_before = count_blobs(m.net.Proto())
419
        optim_proto = memonger.optimize_inference_for_dag(
420
            m.net, ["name_x/data"], "name_x/"
421
        )
422

423
        blobs_after = count_blobs(optim_proto)
424

425
        # Extra test with when one of the parameters is also an input.
426
        # This caused a bug before.
427
        optim_proto_extra_input = memonger.optimize_inference_for_dag(
428
            m.net, ["name_x/data", "name_x/fc1_w"], "name_x/"
429
        )
430
        blobs_after_extra_input = count_blobs(optim_proto_extra_input)
431
        self.assertEqual(blobs_after, blobs_after_extra_input)
432
        ###
433

434
        print(str(optim_proto))
435
        self.assertLess(blobs_after, blobs_before)
436

437
        # Test networks produce exactly same results
438
        data = np.random.randn(batch_size, input_dim).astype(np.float32)
439
        label = np.random.randint(
440
            low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
441
        workspace.RunNetOnce(m.param_init_net)
442
        workspace.FeedBlob("name_x/data", data)
443
        workspace.FeedBlob("name_x/label", label)
444
        workspace.RunNetOnce(m.net)
445
        loss1 = workspace.FetchBlob("name_x/loss1")
446
        loss2 = workspace.FetchBlob("name_x/loss2")
447
        workspace.RunNetOnce(optim_proto)
448
        optimized_loss1 = workspace.FetchBlob("name_x/loss1")
449
        optimized_loss2 = workspace.FetchBlob("name_x/loss2")
450
        np.testing.assert_almost_equal(loss1, optimized_loss1)
451
        np.testing.assert_almost_equal(loss2, optimized_loss2)
452

453
    # This test reproduces scenario where dag traversal for finding
454
    # shared blobs was not always starting from ops with in degree of 0
455
    @settings(deadline=10000)
456
    def test_forward_optim_tree_dag_traversal(self):
457
        input_dim = 4
458
        output_dim = 4
459
        batch_size = 4
460

461
        m = model_helper.ModelHelper()
462
        m.Proto().type = "dag"
463
        m.Proto().num_workers = 4
464

465
        with core.NameScope("name_x"):
466
            fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
467
            fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
468

469
            fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
470
            fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
471
            fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
472

473
            # Branch
474
            fc3b = brew.fc(m, fc2, "fc3b", dim_in=output_dim, dim_out=output_dim)
475
            fc4b = brew.fc(m, fc3b, "fc4b", dim_in=output_dim, dim_out=output_dim)
476
            fc5b = brew.fc(m, fc4b, "fc5b", dim_in=output_dim, dim_out=output_dim)
477

478
            fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum")
479

480
            fc5.Relu([], fc5sum) \
481
               .Softmax([], "pred1") \
482
               .LabelCrossEntropy(["label"], ["xent1"]) \
483
               .AveragedLoss([], "loss1")
484
            fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
485
            fc6.Relu([], fc6) \
486
               .Softmax([], "pred2") \
487
               .LabelCrossEntropy(["label"], ["xent2"]) \
488
               .AveragedLoss([], "loss2")
489

490
        blobs_before = count_blobs(m.net.Proto())
491
        # adding name_x/fc5_w as heads (which belongs to non-root op)
492
        # to make sure that dag traversal always starts from root ops
493
        optim_proto = memonger.optimize_inference_for_dag(
494
            m.net, ["name_x/fc5_w", "name_x/data"], "name_x"
495
        )
496
        blobs_after = count_blobs(optim_proto)
497
        self.assertLess(blobs_after, blobs_before)
498

499
    # This is specifically to verify the op schema check being done in memonger
500
    def test_forward_optim_tree_enforce_inplace_op_invalid(self):
501
        m = model_helper.ModelHelper()
502
        m.Proto().type = "dag"
503
        m.Proto().num_workers = 4
504

505
        net = m.net
506
        net.IndexFreeze("A", "B")  # enforce inplace op
507
        net.Sum(["B", "B"], "C")
508
        net.Relu("C", "D")
509
        net.Sum(["D", "D"], "E")
510

511
        with self.assertRaises(RuntimeError):
512
            memonger.optimize_inference_for_dag(net, ["A"], "")
513

514
    # Here inplace op is specifically a root op to repro the scenario where dag
515
    # memonger could treat all the output blobs as shareable blobs and fails
516
    # assertion of input blob with the same name not allowed to share
517
    def test_forward_optim_tree_enforce_inplace_op_valid_and_as_head(self):
518
        m = model_helper.ModelHelper()
519
        m.Proto().type = "dag"
520
        m.Proto().num_workers = 4
521

522
        net = m.net
523
        net.IndexFreeze("A", "A")  # enforce inplace op
524
        net.Sum(["A", "A"], "B")
525
        net.Relu("B", "C")
526
        net.Relu("C", "D")
527
        net.Sum(["D", "D"], "E")
528

529
        blobs_before = count_blobs(m.net.Proto())
530
        optim_proto = memonger.optimize_inference_for_dag(
531
            net, ["A"], ""
532
        )
533
        blobs_after = count_blobs(optim_proto)
534
        self.assertLess(blobs_after, blobs_before)
535

536
    def test_rnn(self):
537
        from caffe2.python import rnn_cell
538
        T = 5
539
        model = model_helper.ModelHelper()
540
        seq_lengths, labels = \
541
            model.net.AddExternalInputs(
542
                'seq_lengths', 'labels',
543
            )
544
        init_blobs = []
545
        for i in range(2):
546
            hidden_init, cell_init = model.net.AddExternalInputs(
547
                "hidden_init_{}".format(i),
548
                "cell_init_{}".format(i)
549
            )
550
            init_blobs.extend([hidden_init, cell_init])
551
        model.param_init_net.ConstantFill([], ["input"], shape=[T, 4, 10])
552
        output, last_hidden, _, last_state = rnn_cell.LSTM(
553
            model=model,
554
            input_blob="input",
555
            seq_lengths=seq_lengths,
556
            initial_states=init_blobs,
557
            dim_in=10,
558
            dim_out=[10, 10],
559
            scope="lstm1",
560
            forward_only=False,
561
            drop_states=True,
562
            return_last_layer_only=True,
563
        )
564
        softmax, loss = model.net.SoftmaxWithLoss(
565
            [model.Flatten(output), "labels"],
566
            ['softmax', 'loss'],
567
        )
568

569
        model.AddGradientOperators([loss])
570
        blobs_before = count_blobs(model.net.Proto())
571
        optim_proto = memonger.share_grad_blobs(
572
            model.net,
573
            ["loss"],
574
            set(model.param_to_grad.values()),
575
            "",
576
            share_activations=True,
577
            dont_share_blobs=set(),
578
        )
579
        blobs_after = count_blobs(optim_proto)
580
        self.assertLess(blobs_after, blobs_before)
581

582
        # Run once to see all blobs are set up correctly
583
        for init_blob in init_blobs:
584
            workspace.FeedBlob(init_blob, np.zeros(
585
                [1, 4, 10], dtype=np.float32
586
            ))
587
        workspace.FeedBlob("seq_lengths", np.array([T] * 4, dtype=np.int32))
588
        workspace.FeedBlob("labels", np.random.rand(T).astype(np.int32))
589

590
        workspace.RunNetOnce(model.param_init_net)
591
        workspace.RunNetOnce(model.net)
592

593
    def test_compute_interference_graph_inplace_ops(self):
594
        m = model_helper.ModelHelper()
595
        m.Copy("b1", "b1")
596
        m.Copy("b1", "b1")
597
        m.Copy("b1", "b1")
598
        g = memonger.compute_interference_graph(m.net.Proto().op)
599
        self.assertEqual(list(g.edges()), [(0, 1), (0, 2), (1, 2)])
600

601
    def test_topological_sort_longest_path(self):
602
        m = model_helper.ModelHelper()
603
        # 0
604
        m.Copy("conv0_w_comp", "conv0_w")
605
        # 1
606
        conv0 = brew.conv(m, "data", "conv0", 32, 32, 4)
607
        # 2
608
        m.Copy("conv2_w", "conv2_w")
609
        # 3
610
        brew.conv(m, conv0, "conv2", 16, 32, 4)
611

612
        g = memonger.compute_interference_graph(m.net.Proto().op)
613

614
        orders_org = memonger.topological_sort_traversal(g)
615
        orders_gt_org = [2, 0, 1, 3]
616
        self.assertEqual(orders_gt_org, list(orders_org))
617

618
        orders = memonger.topological_sort_traversal_longest_path(g)
619
        # longer path is in front of the shorter one
620
        orders_gt = [0, 1, 2, 3]
621
        self.assertEqual(orders_gt, list(orders))
622

623
    def test_topological_sort_longest_path_multi_target(self):
624
        # two outputs: conv2 and data4
625
        m = model_helper.ModelHelper()
626
        # 0
627
        m.Copy("conv0_w_comp", "conv0_w")
628
        # 1
629
        conv0 = brew.conv(m, "data", "conv0", 32, 32, 4)
630
        # 2
631
        m.Copy("conv2_w", "conv2_w")
632
        # 3
633
        brew.conv(m, conv0, "conv2", 16, 32, 4)
634
        # 4
635
        m.Copy("data1", "data2")
636
        # 5
637
        m.Copy("data2", "data3")
638

639
        g = memonger.compute_interference_graph(m.net.Proto().op)
640

641
        orders_org = memonger.topological_sort_traversal(g)
642
        orders_gt_org = [4, 5, 2, 0, 1, 3]
643
        self.assertEqual(orders_gt_org, list(orders_org))
644

645
        orders = memonger.topological_sort_traversal_longest_path(g)
646
        # longer path is in front of the shorter one
647
        orders_gt = [0, 1, 2, 3, 4, 5]
648
        self.assertEqual(orders_gt, list(orders))
649

650
    def test_topological_sort_longest_path_single_node(self):
651
        # single node
652
        m = model_helper.ModelHelper()
653
        # 0
654
        m.Copy("conv0_w_comp", "conv0_w")
655

656
        g = memonger.compute_interference_graph(m.net.Proto().op)
657

658
        orders_org = memonger.topological_sort_traversal(g)
659
        orders_gt_org = [0]
660
        self.assertEqual(orders_gt_org, list(orders_org))
661

662
        orders = memonger.topological_sort_traversal_longest_path(g)
663
        # longer path is in front of the shorter one
664
        orders_gt = [0]
665
        self.assertEqual(orders_gt, list(orders))
666

667
    def test_compute_assignments_greedy(self):
668
        LiveRange = memonger.LiveRange
669
        ranges_sorted = [
670
            ('b1', LiveRange(1, 3, 10)),
671
            ('b2', LiveRange(3, 4, 1)),
672
            ('b3', LiveRange(5, 6, 1)),
673
            ('b4', LiveRange(5, 7, 10)),
674
        ]
675
        assignment_gt = [
676
            [ranges_sorted[0], ranges_sorted[3]],
677
            [ranges_sorted[1], ranges_sorted[2]],
678
        ]
679

680
        best = memonger.compute_assignments_greedy(ranges_sorted, None)
681
        self.assertEqual(memonger.get_memory_usage(best), 11)
682
        self.assertEqual(best, assignment_gt)
683

684
    def test_compute_assignments_dp(self):
685
        LiveRange = memonger.LiveRange
686
        ranges_sorted = [
687
            ('b1', LiveRange(1, 3, 10)),
688
            ('b2', LiveRange(3, 4, 1)),
689
            ('b3', LiveRange(5, 6, 1)),
690
            ('b4', LiveRange(5, 7, 10)),
691
        ]
692

693
        best = memonger.compute_assignments_dp(ranges_sorted, None)
694
        self.assertEqual(memonger.get_memory_usage(best), 11)
695

696
    def test_compute_assignments_dp1(self):
697
        LiveRange = memonger.LiveRange
698
        ranges_sorted = [
699
            ('b1', LiveRange(1, 2, 10)),
700
            ('b2', LiveRange(4, 6, 1)),
701
            ('b3', LiveRange(5, 6, 10)),
702
        ]
703

704
        best = memonger.compute_assignments_dp(ranges_sorted, [])
705
        self.assertEqual(memonger.get_memory_usage(best), 11)
706

707
    @given(input_dim=st.integers(min_value=4, max_value=4),
708
           output_dim=st.integers(min_value=4, max_value=4),
709
           batch_size=st.integers(min_value=4, max_value=4))
710
    def test_verify_graph_equality(self, input_dim, output_dim, batch_size):
711
        m = model_helper.ModelHelper()
712
        m.Proto().type = "dag"
713
        m.Proto().num_workers = 4
714
        with core.NameScope("name_x"):
715
            fc1 = brew.fc(m, "data", "x", dim_in=input_dim, dim_out=output_dim)
716
            fc2 = brew.fc(m, fc1, "y", dim_in=output_dim, dim_out=output_dim)
717
            fc3 = brew.fc(m, fc1, "z", dim_in=output_dim, dim_out=output_dim)
718
            brew.sum(m, [fc2, fc3], "out")
719

720
        m2 = model_helper.ModelHelper()
721
        m2.Proto().type = "dag"
722
        m2.Proto().num_workers = 4
723
        with core.NameScope("name_x"):
724
            fc1 = brew.fc(m2, "data", "other_x", dim_in=input_dim, dim_out=output_dim)
725
            fc2 = brew.fc(m2, fc1, "other_y", dim_in=output_dim, dim_out=output_dim)
726
            fc3 = brew.fc(m2, fc1, "other_z", dim_in=output_dim, dim_out=output_dim)
727
            brew.sum(m2, [fc2, fc3], "out")
728

729
        self.assertTrue(memonger.verify_graph_equality(m.net.Proto(), m2.net.Proto()))
730

731
    @given(input_dim=st.integers(min_value=4, max_value=4),
732
           output_dim=st.integers(min_value=4, max_value=4),
733
           batch_size=st.integers(min_value=4, max_value=4))
734
    def test_verify_graph_equality_harder(self, input_dim, output_dim, batch_size):
735
        m = model_helper.ModelHelper()
736
        m.Proto().type = "dag"
737
        m.Proto().num_workers = 4
738
        with core.NameScope("name_x"):
739
            fc1 = brew.fc(m, "data", "x", dim_in=input_dim, dim_out=output_dim)
740
            fc2a = brew.fc(m, fc1, "y", dim_in=output_dim, dim_out=output_dim)
741
            fc2b = brew.fc(m, fc1, "z", dim_in=output_dim, dim_out=output_dim)
742
            fc3a = brew.fc(m, fc2a, "u", dim_in=output_dim, dim_out=output_dim)
743
            fc3b = brew.fc(m, fc2b, "v", dim_in=output_dim, dim_out=output_dim)
744
            brew.sum(m, [fc3a, fc3b], "out")
745

746
        m2 = model_helper.ModelHelper()
747
        m2.Proto().type = "dag"
748
        m2.Proto().num_workers = 4
749
        with core.NameScope("name_x"):
750
            fc1 = brew.fc(m2, "data", "x", dim_in=input_dim, dim_out=output_dim)
751
            fc2a = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
752
            fc2b = brew.fc(m2, fc1, "z", dim_in=output_dim, dim_out=output_dim)
753
            fc3a = brew.fc(m2, fc2a, "y", dim_in=output_dim, dim_out=output_dim)
754
            fc3b = brew.fc(m2, fc2b, "z", dim_in=output_dim, dim_out=output_dim)
755
            brew.sum(m2, [fc3a, fc3b], "out")
756

757
        self.assertTrue(memonger.verify_graph_equality(m.net.Proto(), m2.net.Proto()))
758

759
    @given(input_dim=st.integers(min_value=4, max_value=4),
760
           output_dim=st.integers(min_value=4, max_value=4),
761
           batch_size=st.integers(min_value=4, max_value=4))
762
    def test_verify_graph_inequality(self, input_dim, output_dim, batch_size):
763
        m = model_helper.ModelHelper()
764
        m.Proto().type = "dag"
765
        m.Proto().num_workers = 4
766
        with core.NameScope("name_x"):
767
            fc1 = brew.fc(m, "data", "x", dim_in=input_dim, dim_out=output_dim)
768
            fc2 = brew.fc(m, fc1, "y", dim_in=output_dim, dim_out=output_dim)
769
            fc3 = brew.fc(m, fc1, "z", dim_in=output_dim, dim_out=output_dim)
770
            brew.sum(m, [fc2, fc3], "out")
771

772
        m2 = model_helper.ModelHelper()
773
        m2.Proto().type = "dag"
774
        m2.Proto().num_workers = 4
775
        with core.NameScope("name_x"):
776
            fc1 = brew.fc(m2, "data", "x", dim_in=input_dim, dim_out=output_dim)
777
            fc2 = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
778
            fc3 = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
779
            brew.sum(m2, [fc2, fc3], "out")
780

781
        self.assertFalse(memonger.verify_graph_equality(m.net.Proto(), m2.net.Proto()))
782

783
    @given(input_dim=st.integers(min_value=4, max_value=4),
784
           output_dim=st.integers(min_value=4, max_value=4),
785
           batch_size=st.integers(min_value=4, max_value=4))
786
    def test_verify_graph_inequality_harder(self, input_dim, output_dim, batch_size):
787
        m = model_helper.ModelHelper()
788
        m.Proto().type = "dag"
789
        m.Proto().num_workers = 4
790
        with core.NameScope("name_x"):
791
            fc1 = brew.fc(m, "data", "x", dim_in=input_dim, dim_out=output_dim)
792
            fc2a = brew.fc(m, fc1, "y", dim_in=output_dim, dim_out=output_dim)
793
            fc2b = brew.fc(m, fc1, "z", dim_in=output_dim, dim_out=output_dim)
794
            fc3a = brew.fc(m, fc2a, "u", dim_in=output_dim, dim_out=output_dim)
795
            fc3b = brew.fc(m, fc2b, "v", dim_in=output_dim, dim_out=output_dim)
796
            brew.sum(m, [fc3a, fc3b], "out")
797

798
        m2 = model_helper.ModelHelper()
799
        m2.Proto().type = "dag"
800
        m2.Proto().num_workers = 4
801
        with core.NameScope("name_x"):
802
            fc1 = brew.fc(m2, "data", "x", dim_in=input_dim, dim_out=output_dim)
803
            fc2a = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
804
            fc2b = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
805
            fc3a = brew.fc(m2, fc2a, "u", dim_in=output_dim, dim_out=output_dim)
806
            fc3b = brew.fc(m2, fc2b, "v", dim_in=output_dim, dim_out=output_dim)
807
            brew.sum(m2, [fc3a, fc3b], "out")
808

809
        self.assertFalse(memonger.verify_graph_equality(m.net.Proto(), m2.net.Proto()))
810

811
    def test_release_blobs_when_used(self):
812
        m = model_helper.ModelHelper()
813
        fc1 = brew.fc(m, "data", "x", dim_in=2, dim_out=2)
814
        fc2 = brew.fc(m, fc1, "y", dim_in=2, dim_out=2)
815
        fc3 = brew.fc(m, fc1, "z", dim_in=2, dim_out=2)
816
        fc4 = brew.fc(m, fc2, "u", dim_in=2, dim_out=2)
817
        m.net.Alias(["u"], ["u_alias"])
818

819
        brew.sum(m, [fc3, fc4], "out")
820

821
        with_frees = memonger.release_blobs_when_used(m.net.Proto(), set("data"))
822

823
        expect_frees = {"x", "y", "z"}  # out is external output
824
                                        # and u is aliased so cannot be freed
825
        found_frees = set()
826
        for op in with_frees.op:
827
            if op.type == "Free":
828
                self.assertFalse(op.input[0] in found_frees)  # no double frees
829
                found_frees.add(op.input[0])
830
            else:
831
                # Check a freed blob is not used anymore
832
                for inp in op.input:
833
                    self.assertFalse(inp in found_frees)
834
                for outp in op.output:
835
                    self.assertFalse(outp in found_frees)
836

837
        self.assertEqual(expect_frees, found_frees)
838

839

840
if __name__ == '__main__':
841
    unittest.main()
842
pytorch

Использование cookies