4
The Hypothesis library uses *property-based testing* to check
5
invariants about the code under test under a variety of random inputs.
7
The key idea here is to express properties of the code under test
8
(e.g. that it passes a gradient check, that it implements a reference
9
function, etc), and then generate random instances and verify they
10
satisfy these properties.
12
The main functions of interest are exposed on `HypothesisTestCase`.
13
You can usually just add a short function in this to generate an
14
arbitrary number of test cases for your operator.
18
- `assertDeviceChecks(devices, op, inputs, outputs)`. This asserts that the
19
operator computes the same outputs, regardless of which device it is executed
21
- `assertGradientChecks(device, op, inputs, output_,
22
outputs_with_grads)`. This implements a standard numerical gradient checker
23
for the operator in question.
24
- `assertReferenceChecks(device, op, inputs, reference)`. This runs the
25
reference function (effectively calling `reference(*inputs)`, and comparing
26
that to the output of output.
28
`hypothesis_test_util.py` exposes some useful pre-built samplers.
30
- `hu.gcs` - a gradient checker device (`gc`) and device checker devices (`dc`)
32
- `hu.gcs_cpu_only` - a CPU-only gradient checker device (`gc`) and
33
device checker devices (`dc`). Used for when your operator is only
34
implemented on the CPU.
41
from caffe2.proto import caffe2_pb2
42
from caffe2.python import (
43
workspace, device_checker, gradient_checker, test_util, core)
48
import hypothesis.extra.numpy
49
import hypothesis.strategies as st
57
return os.getenv('SANDCASTLE') == '1' or os.getenv('TW_JOB_USER') == 'sandcastle'
61
return 'TRAVIS' in os.environ
65
return struct.unpack("f", struct.pack("f", float(x)))[0]
70
def settings(*args, **kwargs):
71
if 'min_satisfying_examples' in kwargs and hypothesis.version.__version_info__ >= (3, 56, 0):
72
kwargs.pop('min_satisfying_examples')
74
if 'deadline' in kwargs and hypothesis.version.__version_info__ < (4, 44, 0):
75
kwargs.pop('deadline')
77
if 'timeout' in kwargs and hypothesis.version.__version_info__ >= (4, 44, 0):
78
if 'deadline' not in kwargs:
79
kwargs['deadline'] = kwargs['timeout'] * 1e3
82
return hypothesis.settings(*args, **kwargs)
86
def floats(*args, **kwargs):
88
width_supported = hypothesis.version.__version_info__ >= (3, 67, 0)
89
if 'width' in kwargs and not width_supported:
92
if 'width' not in kwargs and width_supported:
94
if kwargs.get('min_value', None) is not None:
95
kwargs['min_value'] = to_float32(kwargs['min_value'])
96
if kwargs.get('max_value', None) is not None:
97
kwargs['max_value'] = to_float32(kwargs['max_value'])
99
return st.floats(*args, **kwargs)
102
hypothesis.settings.register_profile(
106
suppress_health_check=[hypothesis.HealthCheck.too_slow],
109
min_satisfying_examples=1,
110
verbosity=hypothesis.Verbosity.verbose,
112
hypothesis.settings.register_profile(
115
suppress_health_check=[hypothesis.HealthCheck.too_slow],
118
min_satisfying_examples=1,
119
verbosity=hypothesis.Verbosity.verbose,
121
hypothesis.settings.register_profile(
124
suppress_health_check=[hypothesis.HealthCheck.too_slow],
127
min_satisfying_examples=1,
128
verbosity=hypothesis.Verbosity.verbose,
131
hypothesis.settings.load_profile(
132
'sandcastle' if is_sandcastle() else os.getenv('CAFFE2_HYPOTHESIS_PROFILE',
137
def dims(min_value=1, max_value=5):
138
return st.integers(min_value=min_value, max_value=max_value)
141
def elements_of_type(dtype=np.float32, filter_=None):
143
if dtype is np.float16:
144
elems = floats(min_value=-1.0, max_value=1.0, width=16)
145
elif dtype is np.float32:
146
elems = floats(min_value=-1.0, max_value=1.0, width=32)
147
elif dtype is np.float64:
148
elems = floats(min_value=-1.0, max_value=1.0, width=64)
149
elif dtype is np.int32:
150
elems = st.integers(min_value=0, max_value=2 ** 31 - 1)
151
elif dtype is np.int64:
152
elems = st.integers(min_value=0, max_value=2 ** 63 - 1)
154
elems = st.booleans()
156
raise ValueError("Unexpected dtype without elements provided")
157
return elems if filter_ is None else elems.filter(filter_)
160
def arrays(dims, dtype=np.float32, elements=None, unique=False):
162
elements = elements_of_type(dtype)
163
return hypothesis.extra.numpy.arrays(
177
dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
178
return dims_.flatmap(
179
lambda dims: arrays(dims, dtype, elements, unique=unique))
182
def tensor1d(min_len=1, max_len=64, dtype=np.float32, elements=None):
183
return tensor(1, 1, dtype, elements, min_value=min_len, max_value=max_len)
186
def segment_ids(size, is_sorted):
188
return st.just(np.empty(shape=[0], dtype=np.int32))
193
elements=st.booleans()).map(
194
lambda x: np.cumsum(x, dtype=np.int32) - x[0])
199
elements=st.integers(min_value=0, max_value=2 * size))
202
def lengths(size, min_segments=None, max_segments=None, **kwargs):
207
if min_segments is None:
209
if max_segments is None:
211
assert min_segments >= 0
212
assert min_segments <= max_segments
213
if size == 0 and max_segments == 0:
214
return st.just(np.empty(shape=[0], dtype=np.int32))
215
assert max_segments > 0, "size is not 0, need at least one segment"
217
min_value=max(min_segments - 1, 0), max_value=max_segments - 1
220
hypothesis.extra.numpy.arrays(
221
np.int32, num_borders, elements=st.integers(
222
min_value=0, max_value=size
226
lambda x: np.append(x, np.array([0, size], dtype=np.int32))
227
).map(sorted).map(np.diff)
236
segment_generator=segment_ids,
240
gen_empty = st.booleans() if allow_empty else st.just(False)
241
data_dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
242
data_dims_ = st.tuples(
243
gen_empty, data_dims_
244
).map(lambda pair: ([0] if pair[0] else []) + pair[1])
245
return data_dims_.flatmap(lambda data_dims: st.tuples(
246
arrays(data_dims, dtype, elements),
247
segment_generator(data_dims[0], is_sorted=is_sorted),
251
def lengths_tensor(min_segments=None, max_segments=None, *args, **kwargs):
252
gen = functools.partial(
253
lengths, min_segments=min_segments, max_segments=max_segments)
254
return segmented_tensor(*args, segment_generator=gen, **kwargs)
257
def sparse_segmented_tensor(min_dim=1, max_dim=4, dtype=np.float32,
258
is_sorted=True, elements=None, allow_empty=False,
259
segment_generator=segment_ids, itype=np.int64,
261
gen_empty = st.booleans() if allow_empty else st.just(False)
262
data_dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
263
all_dims_ = st.tuples(gen_empty, data_dims_).flatmap(
264
lambda pair: st.tuples(
266
(st.integers(min_value=1, max_value=pair[1][0]) if not pair[0]
269
return all_dims_.flatmap(lambda dims: st.tuples(
270
arrays(dims[0], dtype, elements),
271
arrays(dims[1], dtype=itype, elements=st.integers(
272
min_value=0, max_value=dims[0][0] - 1)),
273
segment_generator(dims[1], is_sorted=is_sorted),
277
def sparse_lengths_tensor(**kwargs):
278
return sparse_segmented_tensor(segment_generator=lengths, **kwargs)
281
def tensors(n, min_dim=1, max_dim=4, dtype=np.float32, elements=None, **kwargs):
282
dims_ = st.lists(dims(**kwargs), min_size=min_dim, max_size=max_dim)
283
return dims_.flatmap(
284
lambda dims: st.lists(
285
arrays(dims, dtype, elements),
290
def tensors1d(n, min_len=1, max_len=64, dtype=np.float32, elements=None):
292
n, 1, 1, dtype, elements, min_value=min_len, max_value=max_len
296
cpu_do = caffe2_pb2.DeviceOption()
297
cuda_do = caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA)
298
hip_do = caffe2_pb2.DeviceOption(device_type=caffe2_pb2.HIP)
299
gpu_do = caffe2_pb2.DeviceOption(device_type=workspace.GpuDeviceType)
300
_cuda_do_list = ([cuda_do] if workspace.has_cuda_support else [])
301
_hip_do_list = ([hip_do] if workspace.has_hip_support else [])
302
_gpu_do_list = ([gpu_do] if workspace.has_gpu_support else [])
305
_device_options_no_hip = [cpu_do] + _cuda_do_list
306
device_options = _device_options_no_hip + _hip_do_list
309
expanded_device_options = [cpu_do] + [
310
caffe2_pb2.DeviceOption(device_type=workspace.GpuDeviceType, device_id=i)
311
for i in range(workspace.NumGpuDevices())]
314
def device_checker_device_options():
315
return st.just(device_options)
318
def gradient_checker_device_option():
319
return st.sampled_from(device_options)
323
gc=gradient_checker_device_option(),
324
dc=device_checker_device_options()
327
gcs_cpu_only = dict(gc=st.sampled_from([cpu_do]), dc=st.just([cpu_do]))
328
gcs_cuda_only = dict(gc=st.sampled_from(_cuda_do_list), dc=st.just(_cuda_do_list))
329
gcs_gpu_only = dict(gc=st.sampled_from(_gpu_do_list), dc=st.just(_gpu_do_list))
330
gcs_no_hip = dict(gc=st.sampled_from(_device_options_no_hip), dc=st.just(_device_options_no_hip))
333
@contextlib.contextmanager
334
def temp_workspace(name=b"temp_ws"):
335
old_ws_name = workspace.CurrentWorkspace()
336
workspace.SwitchWorkspace(name, True)
338
workspace.ResetWorkspace()
339
workspace.SwitchWorkspace(old_ws_name)
346
input_device_options=None,
349
op = copy.deepcopy(op)
350
op.device_option.CopyFrom(device_option)
351
net = caffe2_pb2.NetDef()
353
net.name = op.name if op.name else "test"
355
with temp_workspace():
356
_input_device_options = input_device_options or \
357
core.InferOpBlobDevicesAsDict(op)[0]
358
for (n, b) in zip(op.input, inputs):
362
device_option=_input_device_options.get(n, device_option)
364
workspace.CreateNet(net)
365
ret = workspace.BenchmarkNet(net.name, 1, iterations, True)
373
input_device_options=None,
375
op = copy.deepcopy(op)
376
op.device_option.CopyFrom(device_option)
378
with temp_workspace():
379
if (len(op.input) > len(inputs)):
381
'must supply an input for each input on the op: %s vs %s' %
383
_input_device_options = input_device_options or \
384
core.InferOpBlobDevicesAsDict(op)[0]
385
for (n, b) in zip(op.input, inputs):
389
device_option=_input_device_options.get(n, device_option)
391
workspace.RunOperatorOnce(op)
392
outputs_to_check = list(range(len(op.output)))
394
for output_index in outputs_to_check:
395
output_blob_name = op.output[output_index]
396
output = workspace.FetchBlob(output_blob_name)
401
class HypothesisTestCase(test_util.TestCase):
403
A unittest.TestCase subclass with some helper functions for
404
utilizing the `hypothesis` (hypothesis.readthedocs.io) library.
407
def assertDeviceChecks(
413
input_device_options=None,
417
Asserts that the operator computes the same outputs, regardless of
418
which device it is executed on.
420
Useful for checking the consistency of GPU and CPU
421
implementations of operators.
425
@given(inputs=hu.tensors(n=2), in_place=st.booleans(), **hu.gcs)
426
def test_sum(self, inputs, in_place, gc, dc):
427
op = core.CreateOperator("Sum", ["X1", "X2"],
428
["Y" if not in_place else "X1"])
430
self.assertDeviceChecks(dc, op, [X1, X2], [0])
432
dc = device_checker.DeviceChecker(
434
device_options=device_options
437
dc.CheckSimple(op, inputs, outputs_to_check, input_device_options)
440
def assertGradientChecks(
450
input_device_options=None,
451
ensure_outputs_are_inferred=False,
454
Implements a standard numerical gradient checker for the operator
457
Useful for checking the consistency of the forward and
458
backward implementations of operators.
462
@given(inputs=hu.tensors(n=2), in_place=st.booleans(), **hu.gcs)
463
def test_sum(self, inputs, in_place, gc, dc):
464
op = core.CreateOperator("Sum", ["X1", "X2"],
465
["Y" if not in_place else "X1"])
467
self.assertGradientChecks(gc, op, [X1, X2], 0, [0])
469
gc = gradient_checker.GradientChecker(
472
device_option=device_option,
473
workspace_name=str(device_option),
474
input_device_options=input_device_options,
476
res, grad, grad_estimated = gc.CheckSimple(
477
op, inputs, outputs_to_check, outputs_with_grads,
479
input_device_options=input_device_options,
480
ensure_outputs_are_inferred=ensure_outputs_are_inferred,
482
self.assertEqual(grad.shape, grad_estimated.shape)
485
"Gradient check failed for input " + str(op.input[outputs_to_check])
488
def _assertGradReferenceChecks(
497
grad_blob_name = output_to_grad + '_grad'
498
grad_ops, grad_map = core.GradientRegistry.GetBackwardPass(
499
[op], {output_to_grad: grad_blob_name})
500
output_grad = workspace.FetchBlob(output_to_grad)
501
grad_ref_outputs = grad_reference(output_grad, ref_outputs, inputs)
502
workspace.FeedBlob(grad_blob_name, workspace.FetchBlob(output_to_grad))
503
workspace.RunOperatorsOnce(grad_ops)
505
self.assertEqual(len(grad_ref_outputs), len(inputs))
506
for (n, ref) in zip(op.input, grad_ref_outputs):
507
grad_names = grad_map.get(n)
510
self.assertIsNone(ref)
512
if isinstance(grad_names, core.BlobReference):
516
val_name = grad_names
519
ref_vals, ref_indices = ref
520
val_name = grad_names.values
521
vals = workspace.FetchBlob(str(val_name))
522
np.testing.assert_allclose(
527
err_msg='Gradient {0} (x) is not matching the reference (y)'
530
if ref_indices is not None:
531
indices = workspace.FetchBlob(str(grad_names.indices))
532
np.testing.assert_allclose(indices, ref_indices,
533
atol=1e-4, rtol=1e-4)
535
def _assertInferTensorChecks(self, name, shapes, types, output,
536
ensure_output_is_inferred=False):
538
not ensure_output_is_inferred or (name in shapes),
539
'Shape for {0} was not inferred'.format(name))
541
if name not in shapes:
544
output = workspace.FetchBlob(name)
545
if type(output) is np.ndarray:
546
if output.dtype == np.dtype('float64'):
547
correct_type = caffe2_pb2.TensorProto.DOUBLE
548
elif output.dtype == np.dtype('float32'):
549
correct_type = caffe2_pb2.TensorProto.FLOAT
550
elif output.dtype == np.dtype('int32'):
551
correct_type = caffe2_pb2.TensorProto.INT32
552
elif output.dtype == np.dtype('int64'):
553
correct_type = caffe2_pb2.TensorProto.INT64
555
correct_type = "unknown {}".format(np.dtype)
557
correct_type = str(type(output))
559
np.testing.assert_array_equal(
560
np.array(shapes[name]).astype(np.int32),
561
np.array(output.shape).astype(np.int32),
562
err_msg='Shape {} mismatch: {} vs. {}'.format(
567
if correct_type != caffe2_pb2.TensorProto.INT32:
569
np.testing.assert_equal(
572
err_msg='Type {} mismatch: {} vs. {}'.format(
573
name, types[name], correct_type,
576
except AssertionError as e:
579
logging.warning(str(e))
580
if os.getenv('CAFFE2_ASSERT_SHAPEINFERENCE') == '1' or ensure_output_is_inferred:
583
def assertReferenceChecks(
589
input_device_options=None,
594
outputs_to_check=None,
595
ensure_outputs_are_inferred=False,
598
This runs the reference Python function implementation
599
(effectively calling `reference(*inputs)`, and compares that
600
to the output of output, with an absolute/relative tolerance
601
given by the `threshold` parameter.
603
Useful for checking the implementation matches the Python
604
(typically NumPy) implementation of the same functionality.
608
@given(X=hu.tensor(), inplace=st.booleans(), **hu.gcs)
609
def test_softsign(self, X, inplace, gc, dc):
610
op = core.CreateOperator(
611
"Softsign", ["X"], ["X" if inplace else "Y"])
614
return (X / (1 + np.abs(X)),)
616
self.assertReferenceChecks(gc, op, [X], softsign)
618
op = copy.deepcopy(op)
619
op.device_option.CopyFrom(device_option)
621
with temp_workspace():
622
if (len(op.input) > len(inputs)):
624
'must supply an input for each input on the op: %s vs %s' %
626
_input_device_options = input_device_options or \
627
core.InferOpBlobDevicesAsDict(op)[0]
628
for (n, b) in zip(op.input, inputs):
632
device_option=_input_device_options.get(n, device_option)
634
net = core.Net("opnet")
635
net.Proto().op.extend([op])
636
test_shape_inference = False
638
(shapes, types) = workspace.InferShapesAndTypes([net])
639
test_shape_inference = True
640
except RuntimeError as e:
643
logging.warning(str(e))
644
if os.getenv('CAFFE2_ASSERT_SHAPEINFERENCE') == '1' or ensure_outputs_are_inferred:
646
workspace.RunNetOnce(net)
647
reference_outputs = reference(*inputs)
648
if not (isinstance(reference_outputs, tuple) or
649
isinstance(reference_outputs, list)):
651
"You are providing a wrong reference implementation. A "
652
"proper one should return a tuple/list of numpy arrays.")
653
if not outputs_to_check:
654
self.assertEqual(len(reference_outputs), len(op.output))
655
outputs_to_check = list(range(len(op.output)))
657
for (output_index, ref) in zip(outputs_to_check, reference_outputs):
658
output_blob_name = op.output[output_index]
659
output = workspace.FetchBlob(output_blob_name)
660
if output.dtype.kind in ('S', 'O'):
661
np.testing.assert_array_equal(output, ref)
665
np.testing.assert_allclose(
666
output, ref, atol=atol, rtol=threshold,
668
'Output {0} is not matching the reference'.format(
672
if test_shape_inference:
673
self._assertInferTensorChecks(
674
output_blob_name, shapes, types, output,
675
ensure_output_is_inferred=ensure_outputs_are_inferred)
677
if grad_reference is not None:
678
assert output_to_grad is not None, \
679
"If grad_reference is set," \
680
"output_to_grad has to be set as well"
682
with core.DeviceScope(device_option):
683
self._assertGradReferenceChecks(
684
op, inputs, reference_outputs,
685
output_to_grad, grad_reference,
690
def assertValidationChecks(
696
input_device_options=None,
701
assert len(set(list(op.input) + list(op.output))) == \
702
len(op.input) + len(op.output), \
703
"in-place ops are not supported in as_kwargs mode"
704
op = copy.deepcopy(op)
705
op.device_option.CopyFrom(device_option)
707
with temp_workspace():
708
_input_device_options = input_device_options or \
709
core.InferOpBlobDevicesAsDict(op)[0]
710
for (n, b) in zip(op.input, inputs):
714
device_option=_input_device_options.get(n, device_option)
717
workspace.RunNetOnce(init_net)
718
workspace.RunOperatorOnce(op)
719
outputs = [workspace.FetchBlob(n) for n in op.output]
721
validator(**dict(zip(
722
list(op.input) + list(op.output), inputs + outputs)))
724
validator(inputs=inputs, outputs=outputs)
726
def assertRunOpRaises(
731
input_device_options=None,
732
exception=(Exception,),
735
op = copy.deepcopy(op)
736
op.device_option.CopyFrom(device_option)
738
with temp_workspace():
739
_input_device_options = input_device_options or \
740
core.InferOpBlobDevicesAsDict(op)[0]
741
for (n, b) in zip(op.input, inputs):
745
device_option=_input_device_options.get(n, device_option)
748
self.assertRaises(exception, workspace.RunOperatorOnce, op)
750
self.assertRaisesRegex(
751
exception, regexp, workspace.RunOperatorOnce, op)