1
// Tencent is pleased to support the open source community by making ncnn available.
3
// Copyright (C) 2018 THL A29 Limited, a Tencent company. All rights reserved.
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
8
// https://opensource.org/licenses/BSD-3-Clause
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
22
support_inplace = false;
25
int Quantize::load_param(const ParamDict& pd)
27
scale_data_size = pd.get(0, 1);
32
int Quantize::load_model(const ModelBin& mb)
34
scale_data = mb.load(scale_data_size, 1);
35
if (scale_data.empty())
41
static inline signed char float2int8(float v)
43
int int32 = static_cast<int>(round(v));
44
if (int32 > 127) return 127;
45
if (int32 < -127) return -127;
46
return (signed char)int32;
49
int Quantize::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
51
int dims = bottom_blob.dims;
55
int w = bottom_blob.w;
57
top_blob.create(w, (size_t)1u, opt.blob_allocator);
61
const float* ptr = bottom_blob;
62
signed char* outptr = top_blob;
64
if (scale_data_size == 1)
66
const float scale = scale_data[0];
68
#pragma omp parallel for num_threads(opt.num_threads)
69
for (int i = 0; i < w; i++)
71
outptr[i] = float2int8(ptr[i] * scale);
76
#pragma omp parallel for num_threads(opt.num_threads)
77
for (int i = 0; i < w; i++)
79
outptr[i] = float2int8(ptr[i] * scale_data[i]);
86
int w = bottom_blob.w;
87
int h = bottom_blob.h;
89
top_blob.create(w, h, (size_t)1u, opt.blob_allocator);
93
#pragma omp parallel for num_threads(opt.num_threads)
94
for (int i = 0; i < h; i++)
96
const float* ptr0 = bottom_blob.row(i);
97
signed char* outptr0 = top_blob.row<signed char>(i);
99
const float scale = scale_data_size == 1 ? scale_data[0] : scale_data[i];
101
for (int j = 0; j < w; j++)
103
outptr0[j] = float2int8(ptr0[j] * scale);
110
int w = bottom_blob.w;
111
int h = bottom_blob.h;
112
int channels = bottom_blob.c;
115
top_blob.create(w, h, channels, (size_t)1u, opt.blob_allocator);
116
if (top_blob.empty())
119
#pragma omp parallel for num_threads(opt.num_threads)
120
for (int q = 0; q < channels; q++)
122
const float* ptr = bottom_blob.channel(q);
123
signed char* outptr = top_blob.channel(q);
125
const float scale = scale_data_size == 1 ? scale_data[0] : scale_data[q];
127
for (int i = 0; i < size; i++)
129
outptr[i] = float2int8(ptr[i] * scale);