1
import caffe2.python.fakelowp.init_shared_libs
3
from caffe2.python import core, workspace
4
from caffe2.python.onnx.onnxifi import onnxifi_caffe2_net
5
from hypothesis import given, strategies as st, settings
6
from caffe2.python.fakelowp.test_utils import print_test_debug_info
7
import caffe2.python.serialized_test.serialized_test_util as serial
10
core.GlobalInit(["caffe2",
11
"--caffe2_log_level=-3",
12
"--glow_global_fp16=1",
13
"--glow_clip_quant_range_to_fp16=1",
14
"--glow_global_fp16_constants=1"
18
class Int8OpsTest(serial.SerializedTestCase):
19
def _get_scale_zp(self, tensor):
20
tensor_max = np.max(tensor)
21
tensor_min = min(0, np.min(tensor))
22
scale = np.float32(np.float16((tensor_max - tensor_min) / 255.0))
24
scale = np.float32(1e-6)
25
zero_point = 0 - tensor_min / scale
26
zero_point = int(round(np.clip(zero_point, 0, 255.0)))
27
return (scale, zero_point)
30
n=st.integers(2, 1024),
31
rand_seed=st.integers(0, 65534),
32
non_zero_offset=st.booleans()
34
@settings(deadline=datetime.timedelta(seconds=50))
35
def test_int8_quantize(self, n, rand_seed, non_zero_offset):
36
print("n={}, rand_seed={}".format(n, rand_seed))
37
np.random.seed(rand_seed)
38
workspace.ResetWorkspace()
41
X_fp32 = np.random.uniform(-1, 1, size=(n, n)).astype(np.float16) \
44
X_fp32 = np.random.rand(n, n).astype(np.float16).astype(np.float32)
46
W_fp32 = np.identity(n, dtype=np.float32)
47
b_fp32 = np.zeros((n,), dtype=np.float32)
49
X_scale, X_zero_point = self._get_scale_zp(X_fp32)
51
workspace.FeedBlob("X", X_fp32)
52
workspace.FeedBlob("W", W_fp32)
53
workspace.FeedBlob("b", b_fp32)
55
workspace.RunOperatorOnce(
61
save_unpacked_weights=True,
66
ref_net = core.Net("net")
67
ref_net.Int8QuantizeNNPI(
71
Y_zero_point=X_zero_point
73
ref_net.Int8FCFakeAcc32NNPI(
74
["X_int8", "W_int8", "b"],
77
Y_zero_point=X_zero_point,
79
ref_net.Int8DequantizeNNPI(
83
ref_net.Proto().external_output.append("Y")
86
workspace.RunNetOnce(ref_net)
87
Y_fbgemm = workspace.FetchBlob("Y")
90
ref_net.Proto().op[0].type = "Int8Quantize"
91
ref_net.Proto().op[1].type = "Int8FC"
92
ref_net.Proto().op[2].type = "Int8Dequantize"
93
net_onnxified = onnxifi_caffe2_net(
99
weight_names=["W_int8", "b"],
101
num_onnxified_ops = sum(
102
1 if o.type == "Onnxifi" else 0 for o in net_onnxified.op
104
np.testing.assert_equal(num_onnxified_ops, 1)
105
workspace.CreateNet(net_onnxified)
106
workspace.RunNet(net_onnxified.name)
107
Y_glow = workspace.FetchBlob("Y")
109
if not np.allclose(Y_glow, Y_fbgemm):
110
diff_Y = np.abs(Y_glow - Y_fbgemm)
111
print_test_debug_info(
119
"Y_fbgemm": Y_fbgemm,
122
"maxdiff": diff_Y.max(axis=1),
128
n=st.integers(1, 1024),
129
m=st.integers(1, 1024),
130
k=st.integers(1, 1024),
132
rand_seed=st.integers(0, 65534),
133
quantize_bias=st.sampled_from([False]),
135
@settings(deadline=datetime.timedelta(seconds=50))
137
self, n, m, k, rand_seed, quantize_bias, f
140
f"n={n}, m={m}, k={k}, rand_seed={rand_seed}, quantize_bias={quantize_bias}"
142
np.random.seed(rand_seed)
143
workspace.ResetWorkspace()
146
X_fp32 = np.random.uniform(-ff, ff, size=(m, k)).astype(np.float32)
147
W_fp32 = np.random.uniform(-ff, ff, size=(n, k)).astype(np.float32)
148
b_fp32 = np.random.uniform(-ff, ff, size=(n)).astype(np.float32)
150
X_scale, X_zero_point = self._get_scale_zp(X_fp32)
151
Y_fp32 = np.dot(X_fp32, W_fp32.T) + b_fp32
152
Y_scale, Y_zero_point = self._get_scale_zp(Y_fp32)
154
workspace.FeedBlob("X", X_fp32)
155
workspace.FeedBlob("W", W_fp32)
156
workspace.FeedBlob("b", b_fp32)
158
workspace.RunOperatorOnce(
161
["W", "b"] if quantize_bias else ["W"],
162
["W_int8", "b_int32"] if quantize_bias else ["W_int8"],
164
save_unpacked_weights=True,
169
ref_net = core.Net("net")
170
ref_net.Int8QuantizeNNPI(
174
Y_zero_point=X_zero_point
176
ref_net.Int8FCFakeAcc32NNPI(
177
["X_int8", "W_int8", "b_int32" if quantize_bias else "b"],
180
Y_zero_point=Y_zero_point,
182
ref_net.Int8DequantizeNNPI(
186
ref_net.Proto().external_output.append("Y")
189
workspace.RunNetOnce(ref_net)
190
Y_fbgemm = workspace.FetchBlob("Y")
193
ref_net.Proto().op[0].type = "Int8Quantize"
194
ref_net.Proto().op[1].type = "Int8FC"
195
ref_net.Proto().op[2].type = "Int8Dequantize"
196
net_onnxified = onnxifi_caffe2_net(
202
weight_names=["W_int8", "b_int32"] if quantize_bias else ["W_int8", "b"],
204
num_onnxified_ops = sum(
205
1 if o.type == "Onnxifi" else 0 for o in net_onnxified.op
207
np.testing.assert_equal(num_onnxified_ops, 1)
208
workspace.CreateNet(net_onnxified)
209
workspace.RunNet(net_onnxified.name)
210
Y_glow = workspace.FetchBlob("Y")
212
if not np.allclose(Y_glow, Y_fbgemm):
213
diff_Y = np.abs(Y_glow - Y_fbgemm)
214
print_test_debug_info(
224
"Y_fbgemm": Y_fbgemm,
227
"maxdiff": diff_Y.max(axis=1),
234
rand_seed=st.integers(0, 65534)
236
@settings(deadline=datetime.timedelta(seconds=10))
237
def test_int8_small_input(self, n, rand_seed):
238
print("n={}, rand_seed={}".format(n, rand_seed))
239
np.random.seed(rand_seed)
240
workspace.ResetWorkspace()
242
X_fp32 = np.random.uniform(0.01, 0.03, size=(n, n)).astype(np.float32)
243
W_fp32 = np.identity(n, dtype=np.float32)
244
b_fp32 = np.zeros((n,), dtype=np.float32)
246
X_scale, X_zero_point = self._get_scale_zp(X_fp32)
248
workspace.FeedBlob("X", X_fp32)
249
workspace.FeedBlob("W", W_fp32)
250
workspace.FeedBlob("b", b_fp32)
252
workspace.RunOperatorOnce(
258
save_unpacked_weights=True,
263
ref_net = core.Net("net")
264
ref_net.Int8QuantizeNNPI(
268
Y_zero_point=X_zero_point
270
ref_net.Int8FCFakeAcc32NNPI(
271
["X_int8", "W_int8", "b"],
274
Y_zero_point=X_zero_point,
276
ref_net.Int8DequantizeNNPI(
280
ref_net.Proto().external_output.append("Y")
283
workspace.RunNetOnce(ref_net)
284
Y_fbgemm = workspace.FetchBlob("Y")
287
ref_net.Proto().op[0].type = "Int8Quantize"
288
ref_net.Proto().op[1].type = "Int8FC"
289
ref_net.Proto().op[2].type = "Int8Dequantize"
290
net_onnxified = onnxifi_caffe2_net(
296
weight_names=["W_int8", "b"],
298
num_onnxified_ops = sum(
299
1 if o.type == "Onnxifi" else 0 for o in net_onnxified.op
301
np.testing.assert_equal(num_onnxified_ops, 1)
302
workspace.CreateNet(net_onnxified)
303
workspace.RunNet(net_onnxified.name)
304
Y_glow = workspace.FetchBlob("Y")
306
if not np.allclose(Y_glow, Y_fbgemm):
307
diff_Y = np.abs(Y_glow - Y_fbgemm)
308
print_test_debug_info(
316
"Y_fbgemm": Y_fbgemm,
319
"maxdiff": diff_Y.max(axis=1),