5
import caffe2.python.fakelowp.init_shared_libs
7
from hypothesis import given, settings
8
from hypothesis import strategies as st
9
from caffe2.proto import caffe2_pb2
10
from caffe2.python import core, workspace
11
from caffe2.python.onnx.onnxifi import onnxifi_caffe2_net
12
from caffe2.python.fakelowp.test_utils import print_test_debug_info
13
import caffe2.python.serialized_test.serialized_test_util as serial
16
workspace.GlobalInit(["caffe2", "--glow_global_fp16=1",
17
"--glow_global_fused_scale_offset_fp16=1",
18
"--glow_global_force_sls_fp16_accum=1"])
21
class SparseLengthsSum4BitFakeNNPIFp16Test(serial.SerializedTestCase):
22
@given(seed=st.integers(0, 65535))
23
@settings(deadline=datetime.timedelta(seconds=10))
24
def test_slws_fused_4bit_rowwise_all_same(self, seed):
26
workspace.ResetWorkspace()
29
data = np.ones((n, m)).astype(np.float32) * 0.2 - 0.1
31
max_segment_length = 100
32
num_lengths = np.random.randint(1, max_segments + 1)
34
lengths = np.random.randint(0, max_segment_length + 1,
35
size=num_lengths).astype(np.int32)
36
num_indices = np.sum(lengths)
37
indices = np.zeros(num_indices, dtype=np.int64)
38
weights = np.random.uniform(low=-0.5, high=0.5, size=[len(indices)])\
40
weights = np.ones(len(indices)).astype(np.float32)
41
pred_net = caffe2_pb2.NetDef()
42
pred_net.name = "pred"
43
pred_net.external_input.extend(
44
["quantized_data", "weights", "indices", "lengths"])
45
pred_net.external_output.append("Y")
46
pred_net.op.add().CopyFrom(
48
"SparseLengthsWeightedSumFused4BitRowwise",
49
["quantized_data", "weights", "indices", "lengths"],
53
ref_net = caffe2_pb2.NetDef()
55
ref_net.external_input.extend(
56
["quantized_data", "weights", "indices", "lengths"])
57
ref_net.external_output.append("Y")
58
ref_net.op.add().CopyFrom(
60
"SparseLengthsWeightedSumFused4BitRowwiseFakeFP16NNPI",
61
["quantized_data", "weights", "indices", "lengths"],
65
workspace.FeedBlob("data", data)
66
workspace.RunOperatorOnce(
68
"FloatToFused4BitRowwiseQuantized",
73
print("quantized", workspace.FetchBlob("quantized_data"))
74
pred_net_onnxified = onnxifi_caffe2_net(
77
max_batch_size=max_segments,
78
max_seq_size=max_segment_length,
83
num_onnxified_ops = sum(
84
1 if o.type == "Onnxifi" else 0 for o in pred_net_onnxified.op)
85
np.testing.assert_equal(num_onnxified_ops, 1)
86
workspace.FeedBlob("indices", indices)
87
workspace.FeedBlob("lengths", lengths)
88
workspace.FeedBlob("weights", weights)
89
workspace.CreateNet(pred_net_onnxified)
90
workspace.CreateNet(ref_net)
91
workspace.RunNet(pred_net_onnxified.name)
92
Y_glow = workspace.FetchBlob('Y')
93
workspace.RunNet(ref_net.name)
94
Y_c2 = workspace.FetchBlob('Y')
95
if not np.allclose(Y_c2, Y_glow):
96
print_test_debug_info(
97
"slws_fused_4bit_rowwise",
105
"diff": Y_glow - Y_c2,
106
"rowwise_diff": (Y_glow - Y_c2)[:, 0]})
111
seed=st.integers(0, 65535),
112
num_rows=st.integers(2, 20),
113
embedding_dim=st.sampled_from([8, 12, 16, 24, 32, 54, 64, 72, 128]),
114
batch_size=st.integers(1, 32),
115
max_weight=st.integers(0, 1),
117
@settings(deadline=datetime.timedelta(seconds=10))
118
def test_slws_fused_4bit_rowwise(self, seed, num_rows, embedding_dim, batch_size, max_weight):
119
workspace.ResetWorkspace()
121
data = np.random.rand(num_rows, embedding_dim).astype(np.float32)
124
lengths = np.random.choice(np.arange(1, num_rows), batch_size).astype(np.int32)
126
for length in lengths:
127
_indices.extend(np.random.choice(np.arange(1, num_rows), length))
128
indices = np.asarray(_indices).astype(np.int64)
130
weights = np.random.uniform(
134
).astype(np.float32) - max_weight / 2.0
135
pred_net = caffe2_pb2.NetDef()
136
pred_net.name = "pred"
137
pred_net.external_input.extend(
138
["quantized_data", "weights", "indices", "lengths"])
139
pred_net.external_output.append("Y")
140
pred_net.op.add().CopyFrom(
142
"SparseLengthsWeightedSumFused4BitRowwise",
143
["quantized_data", "weights", "indices", "lengths"],
148
ref_net = caffe2_pb2.NetDef()
150
ref_net.external_input.extend(
151
["quantized_data", "weights", "indices", "lengths"])
152
ref_net.external_output.append("Y")
153
ref_net.op.add().CopyFrom(
155
"SparseLengthsWeightedSumFused4BitRowwiseFakeFP16NNPI",
156
["quantized_data", "weights", "indices", "lengths"],
161
workspace.FeedBlob("data", data)
162
workspace.RunOperatorOnce(
164
"FloatToFused4BitRowwiseQuantized",
170
pred_net_onnxified = onnxifi_caffe2_net(
173
max_batch_size=batch_size,
174
max_seq_size=np.max(lengths),
180
num_onnxified_ops = sum(
181
1 if o.type == "Onnxifi" else 0 for o in pred_net_onnxified.op)
182
np.testing.assert_equal(num_onnxified_ops, 1)
184
workspace.FeedBlob("indices", indices)
185
workspace.FeedBlob("lengths", lengths)
186
workspace.FeedBlob("weights", weights)
188
workspace.CreateNet(pred_net_onnxified)
189
workspace.CreateNet(ref_net)
191
workspace.RunNet(pred_net_onnxified.name)
192
Y_glow = workspace.FetchBlob('Y')
194
workspace.RunNet(ref_net.name)
195
Y_c2 = workspace.FetchBlob('Y')
197
if not np.allclose(Y_c2, Y_glow):
198
print_test_debug_info(
199
"slws_fused_4bit_rowwise",
207
"Y_glow": Y_glow.shape,
208
"diff": Y_glow - Y_c2,
209
"rowwise_diff": (Y_glow - Y_c2)[:, 0]
214
if __name__ == '__main__':