pytorch

elementwise_op_broadcast_test.py
422 строки · 17.1 Кб
Перенос по словам
1

2

3

4

5

6
import unittest
7

8
from hypothesis import given, assume, settings
9
import hypothesis.strategies as st
10
import numpy as np
11
import operator
12

13
from caffe2.proto import caffe2_pb2
14
from caffe2.python import core, workspace
15
import caffe2.python.hypothesis_test_util as hu
16
import caffe2.python.serialized_test.serialized_test_util as serial
17

18

19
# TODO(jiayq): make them hypothesis tests for better coverage.
20
class TestElementwiseBroadcast(serial.SerializedTestCase):
21

22
    def __generate_test_cases(self, allow_broadcast_fastpath: bool):
23
        """
24
        generates a set of test cases
25

26
        For each iteration, generates X, Y, args, X_out, Y_out
27
        where
28
          X, Y         are test input tensors
29
          args         is a dictionary of arguments to be passed to
30
                       core.CreateOperator()
31
          X_out, Y_out are reshaped versions of X and Y
32
                       which can be used to calculate the expected
33
                       result with the operator to be tested
34
        """
35
        # Set broadcast and no axis, i.e. broadcasting last dimensions.
36
        X = np.random.rand(2, 3, 4, 5).astype(np.float32)
37
        Y = np.random.rand(4, 5).astype(np.float32)
38
        args = dict(broadcast=1, allow_broadcast_fastpath=allow_broadcast_fastpath)
39
        yield X, Y, args, X, Y
40

41
        # broadcasting intermediate dimensions
42
        X = np.random.rand(2, 3, 4, 5).astype(np.float32)
43
        Y = np.random.rand(3, 4).astype(np.float32)
44
        args = dict(broadcast=1, axis=1, allow_broadcast_fastpath=allow_broadcast_fastpath)
45
        yield X, Y, args, X, Y[:, :, np.newaxis]
46

47
        # broadcasting the first dimension
48
        X = np.random.rand(2, 3, 4, 5).astype(np.float32)
49
        Y = np.random.rand(2).astype(np.float32)
50
        args = dict(broadcast=1, axis=0, allow_broadcast_fastpath=allow_broadcast_fastpath)
51
        yield X, Y, args, X, Y[:, np.newaxis, np.newaxis, np.newaxis]
52

53
        # broadcasting with single elem dimensions at both ends
54
        X = np.random.rand(2, 3, 4, 5).astype(np.float32)
55
        Y = np.random.rand(1, 4, 1).astype(np.float32)
56
        args = dict(broadcast=1, axis=1, allow_broadcast_fastpath=allow_broadcast_fastpath)
57
        yield X, Y, args, X, Y
58

59
    def __test_binary_op(
60
        self, gc, dc, caffe2_op, op_function, allow_broadcast_fastpath: bool = False
61
    ):
62
        """
63
        Args:
64
            caffe2_op: A string. Name of the caffe operator to test.
65
            op_function: an actual python operator (e.g. operator.add)
66
        path_prefix: A string. Optional param used to construct db name or path
67
            where checkpoint files are stored.
68
        """
69

70
        for X, Y, op_args, X_out, Y_out in self.__generate_test_cases(allow_broadcast_fastpath):
71
            op = core.CreateOperator(caffe2_op, ["X", "Y"], "out", **op_args)
72
            workspace.FeedBlob("X", X)
73
            workspace.FeedBlob("Y", Y)
74
            workspace.RunOperatorOnce(op)
75
            out = workspace.FetchBlob("out")
76
            np.testing.assert_array_almost_equal(out, op_function(X_out, Y_out))
77
            self.assertDeviceChecks(dc, op, [X, Y], [0])
78
            self.assertGradientChecks(gc, op, [X, Y], 1, [0])
79

80
    @given(allow_broadcast_fastpath=st.booleans(), **hu.gcs)
81
    @settings(deadline=None)
82
    def test_broadcast_Add(self, allow_broadcast_fastpath: bool, gc, dc):
83
        self.__test_binary_op(
84
            gc, dc, "Add", operator.add, allow_broadcast_fastpath=allow_broadcast_fastpath
85
        )
86

87
    @given(allow_broadcast_fastpath=st.booleans(), **hu.gcs)
88
    @settings(deadline=None)
89
    def test_broadcast_Mul(self, allow_broadcast_fastpath: bool, gc, dc):
90
        self.__test_binary_op(
91
            gc, dc, "Mul", operator.mul, allow_broadcast_fastpath=allow_broadcast_fastpath
92
        )
93

94
    @given(allow_broadcast_fastpath=st.booleans(), **hu.gcs)
95
    @settings(deadline=None)
96
    def test_broadcast_Sub(self, allow_broadcast_fastpath: bool, gc, dc):
97
        self.__test_binary_op(
98
            gc, dc, "Sub", operator.sub, allow_broadcast_fastpath=allow_broadcast_fastpath
99
        )
100

101
    @given(**hu.gcs)
102
    @settings(deadline=None)
103
    def test_broadcast_powt(self, gc, dc):
104
        np.random.seed(101)
105

106
        #operator
107
        def powt_op(X, Y):
108
            return [np.power(X, Y)]
109

110
        #two gradients Y*X^(Y-1) and X^Y * ln(X)
111
        def powt_grad(g_out, outputs, fwd_inputs):
112
            [X, Y] = fwd_inputs
113
            Z = outputs[0]
114
            return ([Y * np.power(X, Y - 1), Z * np.log(X)] * g_out)
115

116
        #1. Set broadcast and no axis, i.e. broadcasting last dimensions.
117
        X = np.random.rand(2, 3, 4, 5).astype(np.float32) + 1.0
118
        Y = np.random.rand(4, 5).astype(np.float32) + 2.0
119

120
        #two gradients Y*X^(Y-1) and X^Y * ln(X)
121
        #latter gradient is summed over 1 and 0 dims to account for broadcast
122
        def powt_grad_broadcast(g_out, outputs, fwd_inputs):
123
            [GX, GY] = powt_grad(g_out, outputs, fwd_inputs)
124
            return ([GX, np.sum(np.sum(GY, 1), 0)])
125

126
        op = core.CreateOperator("Pow", ["X", "Y"], "Z", broadcast=1)
127
        self.assertReferenceChecks(device_option=gc,
128
                                   op=op,
129
                                   inputs=[X, Y],
130
                                   reference=powt_op,
131
                                   output_to_grad="Z",
132
                                   grad_reference=powt_grad_broadcast)
133

134
        #2. broadcasting intermediate dimensions
135
        X = np.random.rand(2, 3, 4, 5).astype(np.float32) + 1.0
136
        Y = np.random.rand(3, 4).astype(np.float32) + 2.0
137

138
        #pow op with the latter array increased by one dim
139
        def powt_op_axis1(X, Y):
140
            return powt_op(X, Y[:, :, np.newaxis])
141

142
        #two gradients Y*X^(Y-1) and X^Y * ln(X)
143
        #latter gradient is summed over 3 and 0 dims to account for broadcast
144
        def powt_grad_axis1(g_out, outputs, fwd_inputs):
145
            [X, Y] = fwd_inputs
146
            [GX, GY] = powt_grad(g_out, outputs, [X, Y[:, :, np.newaxis]])
147
            return ([GX, np.sum(np.sum(GY, 3), 0)])
148

149
        op = core.CreateOperator("Pow", ["X", "Y"], "Z", broadcast=1, axis=1)
150
        self.assertReferenceChecks(device_option=gc,
151
                                   op=op,
152
                                   inputs=[X, Y],
153
                                   reference=powt_op_axis1,
154
                                   output_to_grad="Z",
155
                                   grad_reference=powt_grad_axis1)
156

157
        #3. broadcasting the first dimension
158
        X = np.random.rand(2, 3, 4, 5).astype(np.float32) + 1.0
159
        Y = np.random.rand(2).astype(np.float32) + 2.0
160

161
        #pow op with the latter array increased by one dim
162
        def powt_op_axis0(X, Y):
163
            return powt_op(X, Y[:, np.newaxis, np.newaxis, np.newaxis])
164

165
        #two gradients Y*X^(Y-1) and X^Y * ln(X)
166
        #latter gradient is summed over 3, 2 and 1 dims to account for broadcast
167
        def powt_grad_axis0(g_out, outputs, fwd_inputs):
168
            [X, Y] = fwd_inputs
169
            [GX, GY] = powt_grad(g_out,
170
                                 outputs,
171
                                 [X, Y[:, np.newaxis, np.newaxis, np.newaxis]])
172
            return ([GX, np.sum(np.sum(np.sum(GY, 3), 2), 1)])
173

174
        op = core.CreateOperator("Pow", ["X", "Y"], "Z", broadcast=1, axis=0)
175
        self.assertReferenceChecks(device_option=gc,
176
                                   op=op,
177
                                   inputs=[X, Y],
178
                                   reference=powt_op_axis0,
179
                                   output_to_grad="Z",
180
                                   grad_reference=powt_grad_axis0)
181

182
        #4. broadcasting with single elem dimensions at both ends
183
        X = np.random.rand(2, 3, 4, 5).astype(np.float32) + 1.0
184
        Y = np.random.rand(1, 4, 1).astype(np.float32) + 2.0
185

186
        #pow op with the latter array increased by one dim
187
        def powt_op_mixed(X, Y):
188
            return powt_op(X, Y[np.newaxis, :, :, :])
189

190
        #two gradients Y*X^(Y-1) and X^Y * ln(X)
191
        #latter gradient is summed over 0 and 1 dims to account for broadcast
192
        def powt_grad_mixed(g_out, outputs, fwd_inputs):
193
            [X, Y] = fwd_inputs
194
            [GX, GY] = powt_grad(g_out, outputs, [X, Y[np.newaxis, :, :, :]])
195
            return ([GX, np.reshape(np.sum(np.sum(np.sum(GY, 3), 1), 0),
196
                                    (1, 4, 1))])
197

198
        op = core.CreateOperator("Pow", ["X", "Y"], "Z", broadcast=1, axis=1)
199
        self.assertReferenceChecks(device_option=gc,
200
                                   op=op,
201
                                   inputs=[X, Y],
202
                                   reference=powt_op_mixed,
203
                                   output_to_grad="Z",
204
                                   grad_reference=powt_grad_mixed)
205

206
    @given(allow_broadcast_fastpath=st.booleans(), **hu.gcs)
207
    def test_broadcast_scalar(self, allow_broadcast_fastpath: bool, gc, dc):
208
        # broadcasting constant
209
        X = np.random.rand(2, 3, 4, 5).astype(np.float32)
210
        Y = np.random.rand(1).astype(np.float32)
211
        op = core.CreateOperator(
212
            "Add", ["X", "Y"], "out", broadcast=1, allow_broadcast_fastpath=allow_broadcast_fastpath
213
        )
214
        workspace.FeedBlob("X", X)
215
        workspace.FeedBlob("Y", Y)
216
        workspace.RunOperatorOnce(op)
217
        out = workspace.FetchBlob("out")
218
        np.testing.assert_array_almost_equal(
219
            out, X + Y)
220
        self.assertDeviceChecks(dc, op, [X, Y], [0])
221

222
        # broadcasting scalar
223
        X = np.random.rand(1).astype(np.float32)
224
        Y = np.random.rand(1).astype(np.float32).reshape([])
225
        op = core.CreateOperator(
226
            "Add", ["X", "Y"], "out", broadcast=1, allow_broadcast_fastpath=allow_broadcast_fastpath
227
        )
228
        workspace.FeedBlob("X", X)
229
        workspace.FeedBlob("Y", Y)
230
        workspace.RunOperatorOnce(op)
231
        out = workspace.FetchBlob("out")
232
        np.testing.assert_array_almost_equal(
233
            out, X + Y)
234
        self.assertDeviceChecks(dc, op, [X, Y], [0])
235

236
    @given(allow_broadcast_fastpath=st.booleans(), **hu.gcs)
237
    def test_semantic_broadcast(self, allow_broadcast_fastpath: bool, gc, dc):
238
        # NCHW as default
239
        X = np.random.rand(2, 3, 4, 5).astype(np.float32)
240
        Y = np.random.rand(3).astype(np.float32)
241
        op = core.CreateOperator(
242
            "Add", ["X", "Y"], "out", broadcast=1, axis_str="C",
243
            allow_broadcast_fastpath=allow_broadcast_fastpath)
244
        workspace.FeedBlob("X", X)
245
        workspace.FeedBlob("Y", Y)
246
        workspace.RunOperatorOnce(op)
247
        out = workspace.FetchBlob("out")
248
        np.testing.assert_array_almost_equal(
249
            out, X + Y[:, np.newaxis, np.newaxis])
250
        self.assertDeviceChecks(dc, op, [X, Y], [0])
251

252
        # NHWC
253
        X = np.random.rand(2, 3, 4, 5).astype(np.float32)
254
        Y = np.random.rand(5).astype(np.float32)
255
        op = core.CreateOperator(
256
            "Add", ["X", "Y"], "out", broadcast=1, axis_str="C", order="NHWC",
257
            allow_broadcast_fastpath=allow_broadcast_fastpath)
258
        workspace.FeedBlob("X", X)
259
        workspace.FeedBlob("Y", Y)
260
        workspace.RunOperatorOnce(op)
261
        out = workspace.FetchBlob("out")
262
        np.testing.assert_array_almost_equal(out, X + Y)
263
        self.assertDeviceChecks(dc, op, [X, Y], [0])
264

265
    @given(**hu.gcs)
266
    def test_sum_reduce_empty_blob(self, gc, dc):
267
        net = core.Net('test')
268

269
        with core.DeviceScope(gc):
270
            net.GivenTensorFill([], ["X"], values=[], shape=[2, 0, 5])
271
            net.GivenTensorFill([], ["Y"], values=[], shape=[2, 0])
272
            net.SumReduceLike(["X", "Y"], "out", axis=0)
273
            workspace.RunNetOnce(net)
274

275
    @given(**hu.gcs)
276
    def test_sum_reduce(self, gc, dc):
277
        # Set broadcast and no axis, i.e. broadcasting last dimensions.
278
        X = np.random.rand(2, 3, 4, 5).astype(np.float32)
279
        Y = np.random.rand(4, 5).astype(np.float32)
280
        op = core.CreateOperator(
281
            "SumReduceLike", ["X", "Y"], "out", broadcast=1)
282
        workspace.FeedBlob("X", X)
283
        workspace.FeedBlob("Y", Y)
284
        workspace.RunOperatorOnce(op)
285
        out = workspace.FetchBlob("out")
286
        res = np.sum(X, axis=0)
287
        res = np.sum(res, axis=0)
288
        np.testing.assert_array_almost_equal(out, res)
289
        self.assertDeviceChecks(dc, op, [X, Y], [0])
290

291
        # Set broadcast and no axis, i.e. broadcasting last dimensions.
292
        X = np.random.rand(2, 3, 4, 5).astype(np.float32)
293
        Y = np.random.rand(2, 3).astype(np.float32)
294
        op = core.CreateOperator(
295
            "SumReduceLike", ["X", "Y"], "out", broadcast=1, axis=0)
296
        workspace.FeedBlob("X", X)
297
        workspace.FeedBlob("Y", Y)
298
        workspace.RunOperatorOnce(op)
299
        out = workspace.FetchBlob("out")
300
        res = np.sum(X, axis=3)
301
        res = np.sum(res, axis=2)
302
        np.testing.assert_array_almost_equal(out, res, decimal=3)
303
        self.assertDeviceChecks(dc, op, [X, Y], [0])
304

305
        # broadcasting intermediate dimensions
306
        X = np.random.rand(2, 3, 4, 5).astype(np.float32)
307
        Y = np.random.rand(3, 4).astype(np.float32)
308
        op = core.CreateOperator(
309
            "SumReduceLike", ["X", "Y"], "out", broadcast=1, axis=1)
310
        workspace.FeedBlob("X", X)
311
        workspace.FeedBlob("Y", Y)
312
        workspace.RunOperatorOnce(op)
313
        out = workspace.FetchBlob("out")
314
        res = np.sum(X, axis=0)
315
        res = np.sum(res, axis=2)
316
        np.testing.assert_array_almost_equal(out, res)
317
        self.assertDeviceChecks(dc, op, [X, Y], [0])
318

319
        # broadcasting intermediate dimensions
320
        X = np.random.rand(2, 3, 4, 500).astype(np.float64)
321
        Y = np.random.rand(1).astype(np.float64)
322
        op = core.CreateOperator(
323
            "SumReduceLike", ["X", "Y"], "out", broadcast=1)
324
        workspace.FeedBlob("X", X)
325
        workspace.FeedBlob("Y", Y)
326
        workspace.RunOperatorOnce(op)
327
        out = workspace.FetchBlob("out")
328
        res = np.array(np.sum(X))
329
        np.testing.assert_array_almost_equal(out, res, decimal=0)
330

331
        # broadcasting with single elem dimensions at both ends
332
        X = np.random.rand(2, 3, 4, 5).astype(np.float32)
333
        Y = np.random.rand(1, 3, 4, 1).astype(np.float32)
334
        op = core.CreateOperator(
335
            "SumReduceLike", ["X", "Y"], "out", broadcast=1)
336
        workspace.FeedBlob("X", X)
337
        workspace.FeedBlob("Y", Y)
338
        workspace.RunOperatorOnce(op)
339
        out = workspace.FetchBlob("out")
340
        res = np.sum(X, axis=0)
341
        res = np.sum(res, axis=2).reshape(Y.shape)
342
        np.testing.assert_array_almost_equal(out, res)
343
        self.assertDeviceChecks(dc, op, [X, Y], [0])
344

345
        # fp64 is not supported with the CUDA op
346
        dc_cpu_only = [d for d in dc if d.device_type != caffe2_pb2.CUDA]
347
        self.assertDeviceChecks(dc_cpu_only, op, [X, Y], [0])
348

349
    @unittest.skipIf(not workspace.has_gpu_support, "No gpu support")
350
    @given(**hu.gcs)
351
    def test_sum_reduce_fp16(self, gc, dc):
352
        assume(core.IsGPUDeviceType(gc.device_type))
353

354
        # Set broadcast and no axis, i.e. broadcasting last dimensions.
355
        X = np.random.rand(2, 3, 4, 5).astype(np.float16)
356
        Y = np.random.rand(4, 5).astype(np.float16)
357
        op = core.CreateOperator(
358
            "SumReduceLike", ["X", "Y"], "out", broadcast=1, device_option=gc)
359

360
        def ref_op(X, Y):
361
            res = np.sum(X, axis=0)
362
            res = np.sum(res, axis=0)
363
            return [res]
364

365
        self.assertReferenceChecks(
366
            device_option=gc,
367
            op=op,
368
            inputs=[X, Y],
369
            reference=ref_op,
370
            threshold=1e-3)
371

372
        # Set broadcast and no axis, i.e. broadcasting last dimensions.
373
        X = np.random.rand(2, 3, 4, 5).astype(np.float16)
374
        Y = np.random.rand(2, 3).astype(np.float16)
375
        op = core.CreateOperator(
376
            "SumReduceLike", ["X", "Y"], "out", broadcast=1, axis=0)
377

378
        def ref_op(X, Y):
379
            res = np.sum(X, axis=3)
380
            res = np.sum(res, axis=2)
381
            return [res]
382

383
        self.assertReferenceChecks(
384
            device_option=gc,
385
            op=op,
386
            inputs=[X, Y],
387
            reference=ref_op,
388
            threshold=1e-3)
389

390
        # broadcasting intermediate dimensions
391
        X = np.random.rand(2, 3, 4, 5).astype(np.float16)
392
        Y = np.random.rand(3, 4).astype(np.float16)
393
        op = core.CreateOperator(
394
            "SumReduceLike", ["X", "Y"], "out", broadcast=1, axis=1)
395

396
        def ref_op(X, Y):
397
            res = np.sum(X, axis=0)
398
            res = np.sum(res, axis=2)
399
            return [res]
400

401
        self.assertReferenceChecks(
402
            device_option=gc,
403
            op=op,
404
            inputs=[X, Y],
405
            reference=ref_op,
406
            threshold=1e-3)
407

408
        # broadcasting with single elem dimensions at both ends
409
        X = np.random.rand(2, 3, 4, 5).astype(np.float16)
410
        Y = np.random.rand(1, 3, 4, 1).astype(np.float16)
411
        op = core.CreateOperator(
412
            "SumReduceLike", ["X", "Y"], "out", broadcast=1)
413

414
        def ref_op(X, Y):
415
            res = np.sum(X, axis=0)
416
            res = np.sum(res, axis=2)
417
            return [res.reshape(Y.shape)]
418

419
        self.assertReferenceChecks(
420
            device_option=gc,
421
            op=op,
422
            inputs=[X, Y],
423
            reference=ref_op,
424
            threshold=1e-3)
425

426
if __name__ == "__main__":
427
    unittest.main()
428
pytorch

Использование cookies