ncnn

requantize.cpp
324 строки · 11.3 Кб
Перенос по словам
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2019 BUG1989. All rights reserved.
4
// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
5
//
6
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
7
// in compliance with the License. You may obtain a copy of the License at
8
//
9
// https://opensource.org/licenses/BSD-3-Clause
10
//
11
// Unless required by applicable law or agreed to in writing, software distributed
12
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
13
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
14
// specific language governing permissions and limitations under the License.
15

16
#include "requantize.h"
17
#include "fused_activation.h"
18

19
namespace ncnn {
20

21
static inline signed char float2int8(float v)
22
{
23
    int int32 = static_cast<int>(round(v));
24
    if (int32 > 127) return 127;
25
    if (int32 < -127) return -127;
26
    return (signed char)int32;
27
}
28

29
Requantize::Requantize()
30
{
31
    one_blob_only = true;
32
    support_inplace = false;
33
}
34

35
int Requantize::load_param(const ParamDict& pd)
36
{
37
    //     scale_in = pd.get(0, 1.f);  // bottom_blob_scale * weight_scale
38
    //     scale_out = pd.get(1, 1.f); // top_blob_scale
39
    //     bias_term = pd.get(2, 0);
40
    //     bias_data_size = pd.get(3, 0);
41

42
    scale_in_data_size = pd.get(0, 1);
43
    scale_out_data_size = pd.get(1, 1);
44
    bias_data_size = pd.get(2, 0);
45
    activation_type = pd.get(3, 0);
46
    activation_params = pd.get(4, Mat());
47

48
    return 0;
49
}
50

51
int Requantize::load_model(const ModelBin& mb)
52
{
53
    scale_in_data = mb.load(scale_in_data_size, 1);
54
    if (scale_in_data.empty())
55
        return -100;
56

57
    scale_out_data = mb.load(scale_out_data_size, 1);
58
    if (scale_out_data.empty())
59
        return -100;
60

61
    if (bias_data_size)
62
    {
63
        bias_data = mb.load(bias_data_size, 1);
64
        if (bias_data.empty())
65
            return -100;
66
    }
67

68
    return 0;
69
}
70

71
int Requantize::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
72
{
73
    int dims = bottom_blob.dims;
74

75
    if (dims == 1)
76
    {
77
        int w = bottom_blob.w;
78

79
        top_blob.create(w, (size_t)1u, opt.blob_allocator);
80
        if (top_blob.empty())
81
            return -100;
82

83
        const int* intptr = bottom_blob;
84
        signed char* ptr = top_blob;
85

86
        if (scale_in_data_size == 1 && scale_out_data_size == 1)
87
        {
88
            const float scale_in = scale_in_data[0];
89
            const float scale_out = scale_out_data[0];
90

91
            if (bias_data_size == 0)
92
            {
93
                #pragma omp parallel for num_threads(opt.num_threads)
94
                for (int i = 0; i < w; i++)
95
                {
96
                    float v = intptr[i] * scale_in;
97
                    ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
98
                }
99
            }
100
            else if (bias_data_size == 1)
101
            {
102
                const float bias = bias_data[0];
103

104
                #pragma omp parallel for num_threads(opt.num_threads)
105
                for (int i = 0; i < w; i++)
106
                {
107
                    float v = intptr[i] * scale_in + bias;
108
                    ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
109
                }
110
            }
111
            else
112
            {
113
                #pragma omp parallel for num_threads(opt.num_threads)
114
                for (int i = 0; i < w; i++)
115
                {
116
                    float v = intptr[i] * scale_in + bias_data[i];
117
                    ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
118
                }
119
            }
120
        }
121
        else if (scale_in_data_size == 1 && scale_out_data_size > 1)
122
        {
123
            const float scale_in = scale_in_data[0];
124

125
            if (bias_data_size == 0)
126
            {
127
                #pragma omp parallel for num_threads(opt.num_threads)
128
                for (int i = 0; i < w; i++)
129
                {
130
                    float v = intptr[i] * scale_in;
131
                    ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out_data[i]);
132
                }
133
            }
134
            else if (bias_data_size == 1)
135
            {
136
                const float bias = bias_data[0];
137

138
                #pragma omp parallel for num_threads(opt.num_threads)
139
                for (int i = 0; i < w; i++)
140
                {
141
                    float v = intptr[i] * scale_in + bias;
142
                    ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out_data[i]);
143
                }
144
            }
145
            else
146
            {
147
                #pragma omp parallel for num_threads(opt.num_threads)
148
                for (int i = 0; i < w; i++)
149
                {
150
                    float v = intptr[i] * scale_in + bias_data[i];
151
                    ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out_data[i]);
152
                }
153
            }
154
        }
155
        else if (scale_in_data_size > 1 && scale_out_data_size == 1)
156
        {
157
            const float scale_out = scale_out_data[0];
158

159
            if (bias_data_size == 0)
160
            {
161
                #pragma omp parallel for num_threads(opt.num_threads)
162
                for (int i = 0; i < w; i++)
163
                {
164
                    float v = intptr[i] * scale_in_data[i];
165
                    ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
166
                }
167
            }
168
            else if (bias_data_size == 1)
169
            {
170
                const float bias = bias_data[0];
171

172
                #pragma omp parallel for num_threads(opt.num_threads)
173
                for (int i = 0; i < w; i++)
174
                {
175
                    float v = intptr[i] * scale_in_data[i] + bias;
176
                    ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
177
                }
178
            }
179
            else
180
            {
181
                #pragma omp parallel for num_threads(opt.num_threads)
182
                for (int i = 0; i < w; i++)
183
                {
184
                    float v = intptr[i] * scale_in_data[i] + bias_data[i];
185
                    ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
186
                }
187
            }
188
        }
189
        else // if (scale_in_data_size > 1 && scale_out_data_size > 1)
190
        {
191
            if (bias_data_size == 0)
192
            {
193
                #pragma omp parallel for num_threads(opt.num_threads)
194
                for (int i = 0; i < w; i++)
195
                {
196
                    float v = intptr[i] * scale_in_data[i];
197
                    ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out_data[i]);
198
                }
199
            }
200
            else if (bias_data_size == 1)
201
            {
202
                const float bias = bias_data[0];
203

204
                #pragma omp parallel for num_threads(opt.num_threads)
205
                for (int i = 0; i < w; i++)
206
                {
207
                    float v = intptr[i] * scale_in_data[i] + bias;
208
                    ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out_data[i]);
209
                }
210
            }
211
            else
212
            {
213
                #pragma omp parallel for num_threads(opt.num_threads)
214
                for (int i = 0; i < w; i++)
215
                {
216
                    float v = intptr[i] * scale_in_data[i] + bias_data[i];
217
                    ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out_data[i]);
218
                }
219
            }
220
        }
221
    }
222

223
    if (dims == 2)
224
    {
225
        int w = bottom_blob.w;
226
        int h = bottom_blob.h;
227

228
        top_blob.create(w, h, (size_t)1u, opt.blob_allocator);
229
        if (top_blob.empty())
230
            return -100;
231

232
        if (bias_data_size == 0)
233
        {
234
            #pragma omp parallel for num_threads(opt.num_threads)
235
            for (int i = 0; i < h; i++)
236
            {
237
                const int* intptr = bottom_blob.row<const int>(i);
238
                signed char* ptr = top_blob.row<signed char>(i);
239

240
                const float scale_in = scale_in_data_size == 1 ? scale_in_data[0] : scale_in_data[i];
241
                const float scale_out = scale_out_data_size == 1 ? scale_out_data[0] : scale_out_data[i];
242

243
                for (int j = 0; j < w; j++)
244
                {
245
                    float v = intptr[j] * scale_in;
246
                    ptr[j] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
247
                }
248
            }
249
        }
250
        else
251
        {
252
            #pragma omp parallel for num_threads(opt.num_threads)
253
            for (int i = 0; i < h; i++)
254
            {
255
                const int* intptr = bottom_blob.row<const int>(i);
256
                signed char* ptr = top_blob.row<signed char>(i);
257

258
                const float scale_in = scale_in_data_size == 1 ? scale_in_data[0] : scale_in_data[i];
259
                const float scale_out = scale_out_data_size == 1 ? scale_out_data[0] : scale_out_data[i];
260
                const float bias = bias_data_size == 1 ? bias_data[0] : bias_data[i];
261

262
                for (int j = 0; j < w; j++)
263
                {
264
                    float v = intptr[j] * scale_in + bias;
265
                    ptr[j] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
266
                }
267
            }
268
        }
269
    }
270

271
    if (dims == 3)
272
    {
273
        int w = bottom_blob.w;
274
        int h = bottom_blob.h;
275
        int channels = bottom_blob.c;
276
        int size = w * h;
277

278
        top_blob.create(w, h, channels, (size_t)1u, opt.blob_allocator);
279
        if (top_blob.empty())
280
            return -100;
281

282
        if (bias_data_size == 0)
283
        {
284
            #pragma omp parallel for num_threads(opt.num_threads)
285
            for (int q = 0; q < channels; q++)
286
            {
287
                const int* intptr = bottom_blob.channel(q);
288
                signed char* ptr = top_blob.channel(q);
289

290
                const float scale_in = scale_in_data_size == 1 ? scale_in_data[0] : scale_in_data[q];
291
                const float scale_out = scale_out_data_size == 1 ? scale_out_data[0] : scale_out_data[q];
292

293
                for (int i = 0; i < size; i++)
294
                {
295
                    float v = intptr[i] * scale_in;
296
                    ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
297
                }
298
            }
299
        }
300
        else
301
        {
302
            #pragma omp parallel for num_threads(opt.num_threads)
303
            for (int q = 0; q < channels; q++)
304
            {
305
                const int* intptr = bottom_blob.channel(q);
306
                signed char* ptr = top_blob.channel(q);
307

308
                const float scale_in = scale_in_data_size == 1 ? scale_in_data[0] : scale_in_data[q];
309
                const float scale_out = scale_out_data_size == 1 ? scale_out_data[0] : scale_out_data[q];
310
                const float bias = bias_data_size == 1 ? bias_data[0] : bias_data[q];
311

312
                for (int i = 0; i < size; i++)
313
                {
314
                    float v = intptr[i] * scale_in + bias;
315
                    ptr[i] = float2int8(activation_ss(v, activation_type, activation_params) * scale_out);
316
                }
317
            }
318
        }
319
    }
320

321
    return 0;
322
}
323

324
} // namespace ncnn
325
ncnn

Использование cookies