16
#include "requantize.h"
17
#include "fused_activation.h"
21
static inline signed char float2int8(float v)
23
int int32 = static_cast<int>(round(v));
24
if (int32 > 127) return 127;
25
if (int32 < -127) return -127;
26
return (signed char)int32;
29
Requantize::Requantize()
32
support_inplace = false;
35
int Requantize::load_param(const ParamDict& pd)
42
scale_in_data_size = pd.get(0, 1);
43
scale_out_data_size = pd.get(1, 1);
44
bias_data_size = pd.get(2, 0);
45
activation_type = pd.get(3, 0);
46
activation_params = pd.get(4, Mat());
51
int Requantize::load_model(const ModelBin& mb)
53
scale_in_data = mb.load(scale_in_data_size, 1);
54
if (scale_in_data.empty())
57
scale_out_data = mb.load(scale_out_data_size, 1);
58
if (scale_out_data.empty())
63
bias_data = mb.load(bias_data_size, 1);
64
if (bias_data.empty())
71
int Requantize::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
73
int dims = bottom_blob.dims;
77
int w = bottom_blob.w;
79
top_blob.create(w, (size_t)1u, opt.blob_allocator);
83
const int* intptr = bottom_blob;
84
signed char* ptr = top_blob;
86
if (scale_in_data_size == 1 && scale_out_data_size == 1)
88
const float scale_in = scale_in_data[0];
89
const float scale_out = scale_out_data[0];
91
if (bias_data_size == 0)
93
#pragma omp parallel for num_threads(opt.num_threads)
94
for (int i = 0; i < w; i++)
96
float v = intptr[i] * scale_in;
97
ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
100
else if (bias_data_size == 1)
102
const float bias = bias_data[0];
104
#pragma omp parallel for num_threads(opt.num_threads)
105
for (int i = 0; i < w; i++)
107
float v = intptr[i] * scale_in + bias;
108
ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
113
#pragma omp parallel for num_threads(opt.num_threads)
114
for (int i = 0; i < w; i++)
116
float v = intptr[i] * scale_in + bias_data[i];
117
ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
121
else if (scale_in_data_size == 1 && scale_out_data_size > 1)
123
const float scale_in = scale_in_data[0];
125
if (bias_data_size == 0)
127
#pragma omp parallel for num_threads(opt.num_threads)
128
for (int i = 0; i < w; i++)
130
float v = intptr[i] * scale_in;
131
ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out_data[i]);
134
else if (bias_data_size == 1)
136
const float bias = bias_data[0];
138
#pragma omp parallel for num_threads(opt.num_threads)
139
for (int i = 0; i < w; i++)
141
float v = intptr[i] * scale_in + bias;
142
ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out_data[i]);
147
#pragma omp parallel for num_threads(opt.num_threads)
148
for (int i = 0; i < w; i++)
150
float v = intptr[i] * scale_in + bias_data[i];
151
ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out_data[i]);
155
else if (scale_in_data_size > 1 && scale_out_data_size == 1)
157
const float scale_out = scale_out_data[0];
159
if (bias_data_size == 0)
161
#pragma omp parallel for num_threads(opt.num_threads)
162
for (int i = 0; i < w; i++)
164
float v = intptr[i] * scale_in_data[i];
165
ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
168
else if (bias_data_size == 1)
170
const float bias = bias_data[0];
172
#pragma omp parallel for num_threads(opt.num_threads)
173
for (int i = 0; i < w; i++)
175
float v = intptr[i] * scale_in_data[i] + bias;
176
ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
181
#pragma omp parallel for num_threads(opt.num_threads)
182
for (int i = 0; i < w; i++)
184
float v = intptr[i] * scale_in_data[i] + bias_data[i];
185
ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
191
if (bias_data_size == 0)
193
#pragma omp parallel for num_threads(opt.num_threads)
194
for (int i = 0; i < w; i++)
196
float v = intptr[i] * scale_in_data[i];
197
ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out_data[i]);
200
else if (bias_data_size == 1)
202
const float bias = bias_data[0];
204
#pragma omp parallel for num_threads(opt.num_threads)
205
for (int i = 0; i < w; i++)
207
float v = intptr[i] * scale_in_data[i] + bias;
208
ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out_data[i]);
213
#pragma omp parallel for num_threads(opt.num_threads)
214
for (int i = 0; i < w; i++)
216
float v = intptr[i] * scale_in_data[i] + bias_data[i];
217
ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out_data[i]);
225
int w = bottom_blob.w;
226
int h = bottom_blob.h;
228
top_blob.create(w, h, (size_t)1u, opt.blob_allocator);
229
if (top_blob.empty())
232
if (bias_data_size == 0)
234
#pragma omp parallel for num_threads(opt.num_threads)
235
for (int i = 0; i < h; i++)
237
const int* intptr = bottom_blob.row<const int>(i);
238
signed char* ptr = top_blob.row<signed char>(i);
240
const float scale_in = scale_in_data_size == 1 ? scale_in_data[0] : scale_in_data[i];
241
const float scale_out = scale_out_data_size == 1 ? scale_out_data[0] : scale_out_data[i];
243
for (int j = 0; j < w; j++)
245
float v = intptr[j] * scale_in;
246
ptr[j] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
252
#pragma omp parallel for num_threads(opt.num_threads)
253
for (int i = 0; i < h; i++)
255
const int* intptr = bottom_blob.row<const int>(i);
256
signed char* ptr = top_blob.row<signed char>(i);
258
const float scale_in = scale_in_data_size == 1 ? scale_in_data[0] : scale_in_data[i];
259
const float scale_out = scale_out_data_size == 1 ? scale_out_data[0] : scale_out_data[i];
260
const float bias = bias_data_size == 1 ? bias_data[0] : bias_data[i];
262
for (int j = 0; j < w; j++)
264
float v = intptr[j] * scale_in + bias;
265
ptr[j] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
273
int w = bottom_blob.w;
274
int h = bottom_blob.h;
275
int channels = bottom_blob.c;
278
top_blob.create(w, h, channels, (size_t)1u, opt.blob_allocator);
279
if (top_blob.empty())
282
if (bias_data_size == 0)
284
#pragma omp parallel for num_threads(opt.num_threads)
285
for (int q = 0; q < channels; q++)
287
const int* intptr = bottom_blob.channel(q);
288
signed char* ptr = top_blob.channel(q);
290
const float scale_in = scale_in_data_size == 1 ? scale_in_data[0] : scale_in_data[q];
291
const float scale_out = scale_out_data_size == 1 ? scale_out_data[0] : scale_out_data[q];
293
for (int i = 0; i < size; i++)
295
float v = intptr[i] * scale_in;
296
ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
302
#pragma omp parallel for num_threads(opt.num_threads)
303
for (int q = 0; q < channels; q++)
305
const int* intptr = bottom_blob.channel(q);
306
signed char* ptr = top_blob.channel(q);
308
const float scale_in = scale_in_data_size == 1 ? scale_in_data[0] : scale_in_data[q];
309
const float scale_out = scale_out_data_size == 1 ? scale_out_data[0] : scale_out_data[q];
310
const float bias = bias_data_size == 1 ? bias_data[0] : bias_data[q];
312
for (int i = 0; i < size; i++)
314
float v = intptr[i] * scale_in + bias;
315
ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);