pytorch
275 строк · 8.9 Кб
1
2
3import caffe2.python.hypothesis_test_util as hu
4import caffe2.python.serialized_test.serialized_test_util as serial
5import numpy as np
6from caffe2.python import core, workspace
7from hypothesis import given, settings, strategies as st
8
9
10def batched_boarders_and_data(
11data_min_size=5,
12data_max_size=10,
13examples_min_number=1,
14examples_max_number=4,
15example_min_size=1,
16example_max_size=3,
17dtype=np.float32,
18elements=None,
19):
20dims_ = st.tuples(
21st.integers(min_value=data_min_size, max_value=data_max_size),
22st.integers(min_value=examples_min_number, max_value=examples_max_number),
23st.integers(min_value=example_min_size, max_value=example_max_size),
24)
25return dims_.flatmap(
26lambda dims: st.tuples(
27hu.arrays(
28[dims[1], dims[2], 2],
29dtype=np.int32,
30elements=st.integers(min_value=0, max_value=dims[0]),
31),
32hu.arrays([dims[0]], dtype, elements),
33)
34)
35
36
37@st.composite
38def _tensor_splits(draw):
39lengths = draw(st.lists(st.integers(1, 5), min_size=1, max_size=10))
40batch_size = draw(st.integers(1, 5))
41element_pairs = [
42(batch, r) for batch in range(batch_size) for r in range(len(lengths))
43]
44perm = draw(st.permutations(element_pairs))
45perm = perm[:-1] # skip one range
46ranges = [[(0, 0)] * len(lengths) for _ in range(batch_size)]
47offset = 0
48for pair in perm:
49ranges[pair[0]][pair[1]] = (offset, lengths[pair[1]])
50offset += lengths[pair[1]]
51
52data = draw(
53st.lists(
54st.floats(min_value=-1.0, max_value=1.0), min_size=offset, max_size=offset
55)
56)
57
58key = draw(st.permutations(range(offset)))
59
60return (
61np.array(data).astype(np.float32),
62np.array(ranges),
63np.array(lengths),
64np.array(key).astype(np.int64),
65)
66
67
68@st.composite
69def _bad_tensor_splits(draw):
70lengths = draw(st.lists(st.integers(4, 6), min_size=4, max_size=4))
71batch_size = 4
72element_pairs = [
73(batch, r) for batch in range(batch_size) for r in range(len(lengths))
74]
75perm = draw(st.permutations(element_pairs))
76ranges = [[(0, 0)] * len(lengths) for _ in range(batch_size)]
77offset = 0
78
79# Inject some bad samples depending on the batch.
80# Batch 2: length is set to 0. This way, 25% of the samples are empty.
81# Batch 0-1: length is set to half the original length. This way, 50% of the
82# samples are of mismatched length.
83for pair in perm:
84if pair[0] == 2:
85length = 0
86elif pair[0] <= 1:
87length = lengths[pair[1]] // 2
88else:
89length = lengths[pair[1]]
90ranges[pair[0]][pair[1]] = (offset, length)
91offset += length
92
93data = draw(
94st.lists(
95st.floats(min_value=-1.0, max_value=1.0), min_size=offset, max_size=offset
96)
97)
98
99key = draw(st.permutations(range(offset)))
100
101return (
102np.array(data).astype(np.float32),
103np.array(ranges),
104np.array(lengths),
105np.array(key).astype(np.int64),
106)
107
108
109def gather_ranges(data, ranges):
110lengths = []
111output = []
112for example_ranges in ranges:
113length = 0
114for range in example_ranges:
115assert len(range) == 2
116output.extend(data[range[0] : range[0] + range[1]])
117length += range[1]
118lengths.append(length)
119return output, lengths
120
121
122def gather_ranges_to_dense(data, ranges, lengths):
123outputs = []
124assert len(ranges)
125batch_size = len(ranges)
126assert len(ranges[0])
127num_ranges = len(ranges[0])
128assert ranges.shape[2] == 2
129for i in range(num_ranges):
130out = []
131for j in range(batch_size):
132start, length = ranges[j][i]
133if not length:
134out.append([0] * lengths[i])
135else:
136assert length == lengths[i]
137out.append(data[start : start + length])
138outputs.append(np.array(out))
139return outputs
140
141
142def gather_ranges_to_dense_with_key(data, ranges, key, lengths):
143outputs = []
144assert len(ranges)
145batch_size = len(ranges)
146assert len(ranges[0])
147num_ranges = len(ranges[0])
148assert ranges.shape[2] == 2
149for i in range(num_ranges):
150out = []
151for j in range(batch_size):
152start, length = ranges[j][i]
153if not length:
154out.append([0] * lengths[i])
155else:
156assert length == lengths[i]
157key_data_list = zip(
158key[start : start + length], data[start : start + length]
159)
160sorted_key_data_list = sorted(key_data_list, key=lambda x: x[0])
161sorted_data = [d for (k, d) in sorted_key_data_list]
162out.append(sorted_data)
163outputs.append(np.array(out))
164return outputs
165
166
167class TestGatherRanges(serial.SerializedTestCase):
168@given(boarders_and_data=batched_boarders_and_data(), **hu.gcs_cpu_only)
169@settings(deadline=10000)
170def test_gather_ranges(self, boarders_and_data, gc, dc):
171boarders, data = boarders_and_data
172
173def boarders_to_range(boarders):
174assert len(boarders) == 2
175boarders = sorted(boarders)
176return [boarders[0], boarders[1] - boarders[0]]
177
178ranges = np.apply_along_axis(boarders_to_range, 2, boarders)
179
180self.assertReferenceChecks(
181device_option=gc,
182op=core.CreateOperator(
183"GatherRanges", ["data", "ranges"], ["output", "lengths"]
184),
185inputs=[data, ranges],
186reference=gather_ranges,
187)
188
189@given(tensor_splits=_tensor_splits(), **hu.gcs_cpu_only)
190@settings(deadline=10000)
191def test_gather_ranges_split(self, tensor_splits, gc, dc):
192data, ranges, lengths, _ = tensor_splits
193
194self.assertReferenceChecks(
195device_option=gc,
196op=core.CreateOperator(
197"GatherRangesToDense",
198["data", "ranges"],
199["X_{}".format(i) for i in range(len(lengths))],
200lengths=lengths,
201),
202inputs=[data, ranges, lengths],
203reference=gather_ranges_to_dense,
204)
205
206@given(tensor_splits=_tensor_splits(), **hu.gcs_cpu_only)
207def test_gather_ranges_with_key_split(self, tensor_splits, gc, dc):
208data, ranges, lengths, key = tensor_splits
209
210self.assertReferenceChecks(
211device_option=gc,
212op=core.CreateOperator(
213"GatherRangesToDense",
214["data", "ranges", "key"],
215["X_{}".format(i) for i in range(len(lengths))],
216lengths=lengths,
217),
218inputs=[data, ranges, key, lengths],
219reference=gather_ranges_to_dense_with_key,
220)
221
222def test_shape_and_type_inference(self):
223with hu.temp_workspace("shape_type_inf_int32"):
224net = core.Net("test_net")
225net.ConstantFill([], "ranges", shape=[3, 5, 2], dtype=core.DataType.INT32)
226net.ConstantFill([], "values", shape=[64], dtype=core.DataType.INT64)
227net.GatherRanges(["values", "ranges"], ["values_output", "lengths_output"])
228(shapes, types) = workspace.InferShapesAndTypes([net], {})
229
230self.assertEqual(shapes["values_output"], [64])
231self.assertEqual(types["values_output"], core.DataType.INT64)
232self.assertEqual(shapes["lengths_output"], [3])
233self.assertEqual(types["lengths_output"], core.DataType.INT32)
234
235@given(tensor_splits=_bad_tensor_splits(), **hu.gcs_cpu_only)
236@settings(deadline=10000)
237def test_empty_range_check(self, tensor_splits, gc, dc):
238data, ranges, lengths, key = tensor_splits
239
240workspace.FeedBlob("data", data)
241workspace.FeedBlob("ranges", ranges)
242workspace.FeedBlob("key", key)
243
244def getOpWithThreshold(
245min_observation=2, max_mismatched_ratio=0.5, max_empty_ratio=None
246):
247return core.CreateOperator(
248"GatherRangesToDense",
249["data", "ranges", "key"],
250["X_{}".format(i) for i in range(len(lengths))],
251lengths=lengths,
252min_observation=min_observation,
253max_mismatched_ratio=max_mismatched_ratio,
254max_empty_ratio=max_empty_ratio,
255)
256
257workspace.RunOperatorOnce(getOpWithThreshold())
258
259workspace.RunOperatorOnce(
260getOpWithThreshold(max_mismatched_ratio=0.3, min_observation=50)
261)
262
263with self.assertRaises(RuntimeError):
264workspace.RunOperatorOnce(
265getOpWithThreshold(max_mismatched_ratio=0.3, min_observation=5)
266)
267
268with self.assertRaises(RuntimeError):
269workspace.RunOperatorOnce(
270getOpWithThreshold(min_observation=50, max_empty_ratio=0.01)
271)
272
273
274if __name__ == "__main__":
275import unittest
276
277unittest.main()
278