ncnn

innerproduct.cpp
283 строки · 7.5 Кб
Перенос по словам
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#include "innerproduct.h"
16

17
#include "layer_type.h"
18

19
#include "fused_activation.h"
20

21
namespace ncnn {
22

23
InnerProduct::InnerProduct()
24
{
25
    one_blob_only = true;
26
    support_inplace = false;
27
}
28

29
int InnerProduct::load_param(const ParamDict& pd)
30
{
31
    num_output = pd.get(0, 0);
32
    bias_term = pd.get(1, 0);
33
    weight_data_size = pd.get(2, 0);
34
    int8_scale_term = pd.get(8, 0);
35
    activation_type = pd.get(9, 0);
36
    activation_params = pd.get(10, Mat());
37

38
    if (int8_scale_term)
39
    {
40
#if NCNN_INT8
41
        support_int8_storage = true;
42
#else
43
        NCNN_LOGE("please build ncnn with NCNN_INT8 enabled for int8 inference");
44
        return -1;
45
#endif
46
    }
47

48
    return 0;
49
}
50

51
int InnerProduct::load_model(const ModelBin& mb)
52
{
53
    weight_data = mb.load(weight_data_size, 0);
54
    if (weight_data.empty())
55
        return -100;
56

57
    if (bias_term)
58
    {
59
        bias_data = mb.load(num_output, 1);
60
        if (bias_data.empty())
61
            return -100;
62
    }
63

64
#if NCNN_INT8
65
    if (int8_scale_term)
66
    {
67
        weight_data_int8_scales = mb.load(num_output, 1);
68
        bottom_blob_int8_scales = mb.load(1, 1);
69
    }
70
#endif // NCNN_INT8
71

72
#if NCNN_INT8
73
    // runtime quantize the weight data
74
    if (weight_data.elemsize == (size_t)4u && int8_scale_term)
75
    {
76
        const int num_input = weight_data_size / num_output;
77

78
        Mat weight_data_r2 = weight_data.reshape(num_input, num_output);
79

80
        Mat weight_data_int8;
81
        Option opt_q;
82
        opt_q.num_threads = 1;
83
        opt_q.use_packing_layout = false;
84
        quantize_to_int8(weight_data_r2, weight_data_int8, weight_data_int8_scales, opt_q);
85
        if (weight_data_int8.empty())
86
            return -100;
87

88
        weight_data = weight_data_int8.reshape(weight_data_size);
89
    }
90
#endif // NCNN_INT8
91

92
    return 0;
93
}
94

95
int InnerProduct::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
96
{
97
#if NCNN_INT8
98
    if (opt.use_int8_inference && weight_data.elemsize == (size_t)1u)
99
    {
100
        return forward_int8(bottom_blob, top_blob, opt);
101
    }
102
#endif
103

104
    const int num_input = weight_data_size / num_output;
105

106
    int w = bottom_blob.w;
107
    int h = bottom_blob.h;
108
    int channels = bottom_blob.c;
109
    size_t elemsize = bottom_blob.elemsize;
110
    int size = w * h;
111

112
    if (bottom_blob.dims == 2 && w == num_input)
113
    {
114
        // gemm
115
        top_blob.create(num_output, h, elemsize, opt.blob_allocator);
116
        if (top_blob.empty())
117
            return -100;
118

119
        #pragma omp parallel for num_threads(opt.num_threads)
120
        for (int j = 0; j < h; j++)
121
        {
122
            const float* m = bottom_blob.row(j);
123
            float* outptr = top_blob.row(j);
124

125
            for (int p = 0; p < num_output; p++)
126
            {
127
                const float* kptr = (const float*)weight_data + w * p;
128

129
                float sum = 0.f;
130

131
                if (bias_term)
132
                    sum = bias_data[p];
133

134
                for (int i = 0; i < w; i++)
135
                {
136
                    sum += m[i] * kptr[i];
137
                }
138

139
                outptr[p] = activation_ss(sum, activation_type, activation_params);
140
            }
141
        }
142

143
        return 0;
144
    }
145

146
    top_blob.create(num_output, elemsize, opt.blob_allocator);
147
    if (top_blob.empty())
148
        return -100;
149

150
    // num_output
151
    #pragma omp parallel for num_threads(opt.num_threads)
152
    for (int p = 0; p < num_output; p++)
153
    {
154
        float sum = 0.f;
155

156
        if (bias_term)
157
            sum = bias_data[p];
158

159
        // channels
160
        for (int q = 0; q < channels; q++)
161
        {
162
            const float* w = (const float*)weight_data + size * channels * p + size * q;
163
            const float* m = bottom_blob.channel(q);
164

165
            for (int i = 0; i < size; i++)
166
            {
167
                sum += m[i] * w[i];
168
            }
169
        }
170

171
        top_blob[p] = activation_ss(sum, activation_type, activation_params);
172
    }
173

174
    return 0;
175
}
176

177
#if NCNN_INT8
178
int InnerProduct::forward_int8(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
179
{
180
    const int num_input = weight_data_size / num_output;
181

182
    int w = bottom_blob.w;
183
    int h = bottom_blob.h;
184
    int channels = bottom_blob.c;
185
    size_t elemsize = bottom_blob.elemsize;
186
    int size = w * h;
187

188
    Mat bottom_blob_int8 = bottom_blob;
189
    if (elemsize != 1)
190
    {
191
        Option opt_g = opt;
192
        opt_g.blob_allocator = opt.workspace_allocator;
193
        opt_g.use_packing_layout = false;
194

195
        quantize_to_int8(bottom_blob, bottom_blob_int8, bottom_blob_int8_scales, opt_g);
196
    }
197

198
    if (bottom_blob.dims == 2 && w == num_input)
199
    {
200
        // gemm
201
        top_blob.create(num_output, h, 4u, opt.blob_allocator);
202
        if (top_blob.empty())
203
            return -100;
204

205
        #pragma omp parallel for num_threads(opt.num_threads)
206
        for (int j = 0; j < h; j++)
207
        {
208
            const signed char* m = bottom_blob_int8.row<signed char>(j);
209
            float* outptr = top_blob.row(j);
210

211
            for (int p = 0; p < num_output; p++)
212
            {
213
                const signed char* kptr = (const signed char*)weight_data + w * p;
214
                int sum = 0;
215

216
                for (int i = 0; i < w; i++)
217
                {
218
                    sum += m[i] * kptr[i];
219
                }
220
                // dequantize and relu
221
                float scale_in;
222
                if (weight_data_int8_scales[p] == 0)
223
                    scale_in = 0;
224
                else
225
                    scale_in = 1.f / (bottom_blob_int8_scales[0] * weight_data_int8_scales[p]);
226

227
                float sumfp32 = sum * scale_in;
228

229
                if (bias_term)
230
                    sumfp32 += bias_data[p];
231

232
                outptr[p] = activation_ss(sumfp32, activation_type, activation_params);
233
            }
234
        }
235

236
        return 0;
237
    }
238

239
    top_blob.create(num_output, 4u, opt.blob_allocator);
240
    if (top_blob.empty())
241
        return -100;
242

243
    // num_output
244
    #pragma omp parallel for num_threads(opt.num_threads)
245
    for (int p = 0; p < num_output; p++)
246
    {
247
        float* outptr = top_blob;
248

249
        int sum = 0;
250

251
        int offset = size * channels * p;
252
        // channels
253
        for (int q = 0; q < channels; q++)
254
        {
255
            const signed char* w = (const signed char*)weight_data + offset + size * q;
256
            const signed char* m = bottom_blob_int8.channel(q);
257

258
            for (int i = 0; i < size; i++)
259
            {
260
                sum += m[i] * w[i];
261
            }
262
        }
263

264
        // dequantize and relu
265
        float scale_in;
266
        if (weight_data_int8_scales[p] == 0)
267
            scale_in = 0;
268
        else
269
            scale_in = 1.f / (bottom_blob_int8_scales[0] * weight_data_int8_scales[p]);
270

271
        float sumfp32 = sum * scale_in;
272

273
        if (bias_term)
274
            sumfp32 += bias_data[p];
275

276
        outptr[p] = activation_ss(sumfp32, activation_type, activation_params);
277
    }
278

279
    return 0;
280
}
281
#endif // NCNN_INT8
282

283
} // namespace ncnn
284
ncnn

Использование cookies