1
// Tencent is pleased to support the open source community by making ncnn available.
3
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
8
// https://opensource.org/licenses/BSD-3-Clause
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
18
#include "layer_type.h"
22
#if __ANDROID_API__ >= 26
23
#include <android/hardware_buffer.h>
24
#endif // __ANDROID_API__ >= 26
25
#endif // NCNN_PLATFORM_API
30
Mat Mat::clone(Allocator* _allocator) const
37
m.create(w, elemsize, elempack, _allocator);
39
m.create(w, h, elemsize, elempack, _allocator);
41
m.create(w, h, c, elemsize, elempack, _allocator);
43
m.create(w, h, d, c, elemsize, elempack, _allocator);
51
memcpy(m.data, data, total() * elemsize);
54
// copy by channel for differnet cstep
55
size_t size = (size_t)w * h * d * elemsize;
56
for (int i = 0; i < c; i++)
58
memcpy(m.channel(i), channel(i), size);
66
void Mat::clone_from(const ncnn::Mat& mat, Allocator* allocator)
68
*this = mat.clone(allocator);
71
Mat Mat::reshape(int _w, Allocator* _allocator) const
73
if (w * h * d * c != _w)
76
if (dims >= 3 && cstep != (size_t)w * h * d)
79
m.create(_w, elemsize, elempack, _allocator);
84
for (int i = 0; i < c; i++)
86
const void* ptr = (unsigned char*)data + i * cstep * elemsize;
87
void* mptr = (unsigned char*)m.data + (size_t)i * w * h * d * elemsize;
88
memcpy(mptr, ptr, (size_t)w * h * d * elemsize);
107
Mat Mat::reshape(int _w, int _h, Allocator* _allocator) const
109
if (w * h * d * c != _w * _h)
112
if (dims >= 3 && cstep != (size_t)w * h * d)
115
m.create(_w, _h, elemsize, elempack, _allocator);
120
for (int i = 0; i < c; i++)
122
const void* ptr = (unsigned char*)data + i * cstep * elemsize;
123
void* mptr = (unsigned char*)m.data + (size_t)i * w * h * d * elemsize;
124
memcpy(mptr, ptr, (size_t)w * h * d * elemsize);
138
m.cstep = (size_t)_w * _h;
143
Mat Mat::reshape(int _w, int _h, int _c, Allocator* _allocator) const
145
if (w * h * d * c != _w * _h * _c)
150
if ((size_t)_w * _h != alignSize((size_t)_w * _h * elemsize, 16) / elemsize)
153
m.create(_w, _h, _c, elemsize, elempack, _allocator);
158
for (int i = 0; i < _c; i++)
160
const void* ptr = (unsigned char*)data + (size_t)i * _w * _h * elemsize;
161
void* mptr = (unsigned char*)m.data + i * m.cstep * m.elemsize;
162
memcpy(mptr, ptr, (size_t)_w * _h * elemsize);
170
// flatten and then align
171
Mat tmp = reshape(_w * _h * _c, _allocator);
172
return tmp.reshape(_w, _h, _c, _allocator);
183
m.cstep = alignSize((size_t)_w * _h * elemsize, 16) / elemsize;
188
Mat Mat::reshape(int _w, int _h, int _d, int _c, Allocator* _allocator) const
190
if (w * h * d * c != _w * _h * _d * _c)
195
if ((size_t)_w * _h * _d != alignSize((size_t)_w * _h * _d * elemsize, 16) / elemsize)
198
m.create(_w, _h, _d, _c, elemsize, elempack, _allocator);
203
for (int i = 0; i < _c; i++)
205
const void* ptr = (unsigned char*)data + (size_t)i * _w * _h * _d * elemsize;
206
void* mptr = (unsigned char*)m.data + i * m.cstep * m.elemsize;
207
memcpy(mptr, ptr, (size_t)_w * _h * _d * elemsize);
215
// flatten and then align
216
Mat tmp = reshape(_w * _h * _d * _c, _allocator);
217
return tmp.reshape(_w, _h, _d, _c, _allocator);
228
m.cstep = alignSize((size_t)_w * _h * _d * elemsize, 16) / elemsize;
233
void Mat::create(int _w, size_t _elemsize, Allocator* _allocator)
235
if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
240
elemsize = _elemsize;
242
allocator = _allocator;
252
size_t totalsize = alignSize(total() * elemsize, 4);
256
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
258
data = fastMalloc(totalsize + (int)sizeof(*refcount));
263
refcount = (int*)(((unsigned char*)data) + totalsize);
268
void Mat::create(int _w, int _h, size_t _elemsize, Allocator* _allocator)
270
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
275
elemsize = _elemsize;
277
allocator = _allocator;
285
cstep = (size_t)w * h;
287
size_t totalsize = alignSize(total() * elemsize, 4);
291
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
293
data = fastMalloc(totalsize + (int)sizeof(*refcount));
298
refcount = (int*)(((unsigned char*)data) + totalsize);
303
void Mat::create(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator)
305
if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
310
elemsize = _elemsize;
312
allocator = _allocator;
320
cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;
322
size_t totalsize = alignSize(total() * elemsize, 4);
326
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
328
data = fastMalloc(totalsize + (int)sizeof(*refcount));
333
refcount = (int*)(((unsigned char*)data) + totalsize);
338
void Mat::create(int _w, int _h, int _d, int _c, size_t _elemsize, Allocator* _allocator)
340
if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
345
elemsize = _elemsize;
347
allocator = _allocator;
355
cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize;
357
size_t totalsize = alignSize(total() * elemsize, 4);
361
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
363
data = fastMalloc(totalsize + (int)sizeof(*refcount));
368
refcount = (int*)(((unsigned char*)data) + totalsize);
373
void Mat::create(int _w, size_t _elemsize, int _elempack, Allocator* _allocator)
375
if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
380
elemsize = _elemsize;
381
elempack = _elempack;
382
allocator = _allocator;
392
size_t totalsize = alignSize(total() * elemsize, 4);
396
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
398
data = fastMalloc(totalsize + (int)sizeof(*refcount));
403
refcount = (int*)(((unsigned char*)data) + totalsize);
408
void Mat::create(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator)
410
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
415
elemsize = _elemsize;
416
elempack = _elempack;
417
allocator = _allocator;
425
cstep = (size_t)w * h;
427
size_t totalsize = alignSize(total() * elemsize, 4);
431
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
433
data = fastMalloc(totalsize + (int)sizeof(*refcount));
438
refcount = (int*)(((unsigned char*)data) + totalsize);
443
void Mat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
445
if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
450
elemsize = _elemsize;
451
elempack = _elempack;
452
allocator = _allocator;
460
cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;
462
size_t totalsize = alignSize(total() * elemsize, 4);
466
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
468
data = fastMalloc(totalsize + (int)sizeof(*refcount));
473
refcount = (int*)(((unsigned char*)data) + totalsize);
478
void Mat::create(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
480
if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
485
elemsize = _elemsize;
486
elempack = _elempack;
487
allocator = _allocator;
495
cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize;
497
size_t totalsize = alignSize(total() * elemsize, 4);
501
data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
503
data = fastMalloc(totalsize + (int)sizeof(*refcount));
508
refcount = (int*)(((unsigned char*)data) + totalsize);
513
void Mat::create_like(const Mat& m, Allocator* _allocator)
517
create(m.w, m.elemsize, m.elempack, _allocator);
519
create(m.w, m.h, m.elemsize, m.elempack, _allocator);
521
create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
523
create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
527
void Mat::create_like(const VkMat& m, Allocator* _allocator)
531
create(m.w, m.elemsize, m.elempack, _allocator);
533
create(m.w, m.h, m.elemsize, m.elempack, _allocator);
535
create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
537
create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
540
void Mat::create_like(const VkImageMat& im, Allocator* _allocator)
544
create(im.w, im.elemsize, im.elempack, _allocator);
546
create(im.w, im.h, im.elemsize, im.elempack, _allocator);
548
create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
550
create(im.w, im.h, im.d, im.c, im.elemsize, im.elempack, _allocator);
555
void VkMat::create(int _w, size_t _elemsize, VkAllocator* _allocator)
557
if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
562
elemsize = _elemsize;
564
allocator = _allocator;
576
size_t totalsize = alignSize(total() * elemsize, 4);
578
data = allocator->fastMalloc(totalsize);
583
refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
588
void VkMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
590
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
595
elemsize = _elemsize;
597
allocator = _allocator;
609
size_t totalsize = alignSize(total() * elemsize, 4);
611
data = allocator->fastMalloc(totalsize);
616
refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
621
void VkMat::create(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
623
if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
628
elemsize = _elemsize;
630
allocator = _allocator;
638
cstep = alignSize(w * h * elemsize, 16) / elemsize;
642
size_t totalsize = alignSize(total() * elemsize, 4);
644
data = allocator->fastMalloc(totalsize);
649
refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
654
void VkMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator)
656
if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
661
elemsize = _elemsize;
663
allocator = _allocator;
671
cstep = alignSize(w * h * d * elemsize, 16) / elemsize;
675
size_t totalsize = alignSize(total() * elemsize, 4);
677
data = allocator->fastMalloc(totalsize);
682
refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
687
void VkMat::create(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
689
if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
694
elemsize = _elemsize;
695
elempack = _elempack;
696
allocator = _allocator;
708
size_t totalsize = alignSize(total() * elemsize, 4);
710
data = allocator->fastMalloc(totalsize);
715
refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
720
void VkMat::create(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
722
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
727
elemsize = _elemsize;
728
elempack = _elempack;
729
allocator = _allocator;
741
size_t totalsize = alignSize(total() * elemsize, 4);
743
data = allocator->fastMalloc(totalsize);
748
refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
753
void VkMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
755
if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
760
elemsize = _elemsize;
761
elempack = _elempack;
762
allocator = _allocator;
770
cstep = alignSize(w * h * elemsize, 16) / elemsize;
774
size_t totalsize = alignSize(total() * elemsize, 4);
776
data = allocator->fastMalloc(totalsize);
781
refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
786
void VkMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
788
if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
793
elemsize = _elemsize;
794
elempack = _elempack;
795
allocator = _allocator;
803
cstep = alignSize(w * h * d * elemsize, 16) / elemsize;
807
size_t totalsize = alignSize(total() * elemsize, 4);
809
data = allocator->fastMalloc(totalsize);
814
refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
819
void VkMat::create_like(const Mat& m, VkAllocator* _allocator)
823
create(m.w, m.elemsize, m.elempack, _allocator);
825
create(m.w, m.h, m.elemsize, m.elempack, _allocator);
827
create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
829
create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
832
void VkMat::create_like(const VkMat& m, VkAllocator* _allocator)
836
create(m.w, m.elemsize, m.elempack, _allocator);
838
create(m.w, m.h, m.elemsize, m.elempack, _allocator);
840
create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
842
create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
845
void VkMat::create_like(const VkImageMat& im, VkAllocator* _allocator)
849
create(im.w, im.elemsize, im.elempack, _allocator);
851
create(im.w, im.h, im.elemsize, im.elempack, _allocator);
853
create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
855
create(im.w, im.h, im.d, im.c, im.elemsize, im.elempack, _allocator);
858
void VkImageMat::create(int _w, size_t _elemsize, VkAllocator* _allocator)
860
if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
865
elemsize = _elemsize;
867
allocator = _allocator;
877
data = allocator->fastMalloc(w, h, c, elemsize, elempack);
882
refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
887
void VkImageMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
889
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
894
elemsize = _elemsize;
896
allocator = _allocator;
906
data = allocator->fastMalloc(w, h, c, elemsize, elempack);
911
refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
916
void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
918
if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
923
elemsize = _elemsize;
925
allocator = _allocator;
935
data = allocator->fastMalloc(w, h, c, elemsize, elempack);
940
refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
945
void VkImageMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator)
947
if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
952
elemsize = _elemsize;
954
allocator = _allocator;
964
// underlying image is 3d
965
data = allocator->fastMalloc(w, h * d, c, elemsize, elempack);
970
refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
975
void VkImageMat::create(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
977
if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
982
elemsize = _elemsize;
983
elempack = _elempack;
984
allocator = _allocator;
994
data = allocator->fastMalloc(w, h, c, elemsize, elempack);
999
refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
1004
void VkImageMat::create(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1006
if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
1011
elemsize = _elemsize;
1012
elempack = _elempack;
1013
allocator = _allocator;
1023
data = allocator->fastMalloc(w, h, c, elemsize, elempack);
1028
refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
1033
void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1035
if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
1040
elemsize = _elemsize;
1041
elempack = _elempack;
1042
allocator = _allocator;
1052
data = allocator->fastMalloc(w, h, c, elemsize, elempack);
1057
refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
1062
void VkImageMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1064
if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
1069
elemsize = _elemsize;
1070
elempack = _elempack;
1071
allocator = _allocator;
1081
// underlying image is 3d
1082
data = allocator->fastMalloc(w, h * d, c, elemsize, elempack);
1087
refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
1092
void VkImageMat::create_like(const Mat& m, VkAllocator* _allocator)
1096
create(m.w, m.elemsize, m.elempack, _allocator);
1098
create(m.w, m.h, m.elemsize, m.elempack, _allocator);
1100
create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
1102
create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
1105
void VkImageMat::create_like(const VkMat& m, VkAllocator* _allocator)
1109
create(m.w, m.elemsize, m.elempack, _allocator);
1111
create(m.w, m.h, m.elemsize, m.elempack, _allocator);
1113
create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
1115
create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
1118
void VkImageMat::create_like(const VkImageMat& im, VkAllocator* _allocator)
1120
int _dims = im.dims;
1122
create(im.w, im.elemsize, im.elempack, _allocator);
1124
create(im.w, im.h, im.elemsize, im.elempack, _allocator);
1126
create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
1128
create(im.w, im.h, im.d, im.c, im.elemsize, im.elempack, _allocator);
1130
#endif // NCNN_VULKAN
1132
void Mat::substract_mean_normalize(const float* mean_vals, const float* norm_vals)
1136
if (mean_vals && !norm_vals)
1138
// substract mean only
1139
op = create_layer(LayerType::Bias);
1147
weights[0] = Mat(c);
1148
for (int q = 0; q < c; q++)
1150
weights[0][q] = -mean_vals[q];
1153
op->load_model(ModelBinFromMatArray(weights));
1155
else if (!mean_vals && norm_vals)
1158
op = create_layer(LayerType::Scale);
1166
weights[0] = Mat(c);
1167
for (int q = 0; q < c; q++)
1169
weights[0][q] = norm_vals[q];
1172
op->load_model(ModelBinFromMatArray(weights));
1174
else if (mean_vals && norm_vals)
1176
// substract mean and normalize
1177
op = create_layer(LayerType::Scale);
1186
weights[0] = Mat(c);
1187
weights[1] = Mat(c);
1188
for (int q = 0; q < c; q++)
1190
weights[0][q] = norm_vals[q];
1191
weights[1][q] = -mean_vals[q] * norm_vals[q];
1194
op->load_model(ModelBinFromMatArray(weights));
1196
else // if (!mean_vals && !norm_vals)
1202
opt.num_threads = 1; // TODO
1204
op->create_pipeline(opt);
1206
op->forward_inplace(*this, opt);
1208
op->destroy_pipeline(opt);
1213
Mat Mat::from_float16(const unsigned short* data, int size)
1215
Mat src(size, (void*)data, (size_t)2u);
1219
opt.num_threads = 1; // TODO
1220
cast_float16_to_float32(src, dst, opt);
1226
#if NCNN_PLATFORM_API
1227
#if __ANDROID_API__ >= 26
1228
VkImageMat VkImageMat::from_android_hardware_buffer(VkAndroidHardwareBufferImageAllocator* allocator)
1230
int width = allocator->width();
1231
int height = allocator->height();
1232
size_t elemsize = 4u; // elemsize for ahb is actually just a placeholder
1234
return VkImageMat(width, height, elemsize, allocator);
1236
#endif // __ANDROID_API__ >= 26
1237
#endif // NCNN_PLATFORM_API
1238
#endif // NCNN_VULKAN
1240
unsigned short float32_to_float16(float value)
1252
unsigned short sign = (tmp.u & 0x80000000) >> 31;
1253
unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
1254
unsigned int significand = tmp.u & 0x7FFFFF;
1256
// NCNN_LOGE("%d %d %d", sign, exponent, significand);
1259
unsigned short fp16;
1262
// zero or denormal, always underflow
1263
fp16 = (sign << 15) | (0x00 << 10) | 0x00;
1265
else if (exponent == 0xFF)
1268
fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
1273
short newexp = exponent + (-127 + 15);
1276
// overflow, return infinity
1277
fp16 = (sign << 15) | (0x1F << 10) | 0x00;
1279
else if (newexp <= 0)
1281
// Some normal fp32 cannot be expressed as normal fp16
1282
fp16 = (sign << 15) | (0x00 << 10) | 0x00;
1287
fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
1294
float float16_to_float32(unsigned short value)
1297
unsigned short sign = (value & 0x8000) >> 15;
1298
unsigned short exponent = (value & 0x7c00) >> 10;
1299
unsigned short significand = value & 0x03FF;
1301
// NCNN_LOGE("%d %d %d", sign, exponent, significand);
1311
if (significand == 0)
1314
tmp.u = (sign << 31);
1320
// find non-zero bit
1321
while ((significand & 0x200) == 0)
1327
significand &= 0x3FF;
1328
tmp.u = (sign << 31) | ((-exponent + (-15 + 127)) << 23) | (significand << 13);
1331
else if (exponent == 0x1F)
1334
tmp.u = (sign << 31) | (0xFF << 23) | (significand << 13);
1339
tmp.u = (sign << 31) | ((exponent + (-15 + 127)) << 23) | (significand << 13);
1345
void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int type, float v, const Option& opt)
1347
Layer* padding = create_layer(LayerType::Padding);
1357
padding->load_param(pd);
1359
padding->create_pipeline(opt);
1361
padding->forward(src, dst, opt);
1363
padding->destroy_pipeline(opt);
1368
void copy_make_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, int type, float v, const Option& opt)
1370
Layer* padding = create_layer(LayerType::Padding);
1382
padding->load_param(pd);
1384
padding->create_pipeline(opt);
1386
padding->forward(src, dst, opt);
1388
padding->destroy_pipeline(opt);
1393
void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const Option& opt)
1395
if (left + right > src.w || top + bottom > src.h)
1397
NCNN_LOGE("copy_cut_border parameter error, top: %d, bottom: %d, left: %d, right: %d, src.w: %d, src.h: %d", top, bottom, left, right, src.w, src.h);
1400
Layer* crop = create_layer(LayerType::Crop);
1406
pd.set(3, src.w - left - right);
1407
pd.set(4, src.h - top - bottom);
1410
crop->load_param(pd);
1412
crop->create_pipeline(opt);
1414
crop->forward(src, dst, opt);
1416
crop->destroy_pipeline(opt);
1421
void copy_cut_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, const Option& opt)
1423
if (left + right > src.w || top + bottom > src.h || front + behind > src.d)
1425
NCNN_LOGE("copy_cut_border_3d parameter error, top: %d, bottom: %d, left: %d, right: %d, front: %d, behind: %d, src.w: %d, src.h: %d, src.d: %d", top, bottom, left, right, front, behind, src.w, src.h, src.d);
1428
Layer* crop = create_layer(LayerType::Crop);
1435
pd.set(3, src.w - left - right);
1436
pd.set(4, src.h - top - bottom);
1437
pd.set(14, src.d - front - behind);
1440
crop->load_param(pd);
1442
crop->create_pipeline(opt);
1444
crop->forward(src, dst, opt);
1446
crop->destroy_pipeline(opt);
1451
void resize_nearest(const Mat& src, Mat& dst, int w, int h, const Option& opt)
1453
Layer* interp = create_layer(LayerType::Interp);
1460
interp->load_param(pd);
1462
interp->create_pipeline(opt);
1464
interp->forward(src, dst, opt);
1466
interp->destroy_pipeline(opt);
1471
void resize_bilinear(const Mat& src, Mat& dst, int w, int h, const Option& opt)
1473
Layer* interp = create_layer(LayerType::Interp);
1480
interp->load_param(pd);
1482
interp->create_pipeline(opt);
1484
interp->forward(src, dst, opt);
1486
interp->destroy_pipeline(opt);
1491
void resize_bicubic(const Mat& src, Mat& dst, int w, int h, const Option& opt)
1493
Layer* interp = create_layer(LayerType::Interp);
1500
interp->load_param(pd);
1502
interp->create_pipeline(opt);
1504
interp->forward(src, dst, opt);
1506
interp->destroy_pipeline(opt);
1511
void convert_packing(const Mat& src, Mat& dst, int _elempack, const Option& opt)
1513
Layer* packing = create_layer(LayerType::Packing);
1516
pd.set(0, _elempack);
1518
packing->load_param(pd);
1520
packing->create_pipeline(opt);
1522
packing->forward(src, dst, opt);
1524
packing->destroy_pipeline(opt);
1529
void flatten(const Mat& src, Mat& dst, const Option& opt)
1531
Layer* flatten = create_layer(LayerType::Flatten);
1535
flatten->load_param(pd);
1537
flatten->create_pipeline(opt);
1539
flatten->forward(src, dst, opt);
1541
flatten->destroy_pipeline(opt);
1546
void cast_float32_to_float16(const Mat& src, Mat& dst, const Option& opt)
1548
Layer* cast = create_layer(LayerType::Cast);
1554
cast->load_param(pd);
1556
cast->create_pipeline(opt);
1558
cast->forward(src, dst, opt);
1560
cast->destroy_pipeline(opt);
1565
void cast_float16_to_float32(const Mat& src, Mat& dst, const Option& opt)
1567
Layer* cast = create_layer(LayerType::Cast);
1573
cast->load_param(pd);
1575
cast->create_pipeline(opt);
1577
cast->forward(src, dst, opt);
1579
cast->destroy_pipeline(opt);
1584
void cast_int8_to_float32(const Mat& src, Mat& dst, const Option& opt)
1586
Layer* cast = create_layer(LayerType::Cast);
1592
cast->load_param(pd);
1594
cast->create_pipeline(opt);
1596
cast->forward(src, dst, opt);
1598
cast->destroy_pipeline(opt);
1603
void cast_float32_to_bfloat16(const Mat& src, Mat& dst, const Option& opt)
1605
Layer* cast = create_layer(LayerType::Cast);
1611
cast->load_param(pd);
1613
cast->create_pipeline(opt);
1615
cast->forward(src, dst, opt);
1617
cast->destroy_pipeline(opt);
1622
void cast_bfloat16_to_float32(const Mat& src, Mat& dst, const Option& opt)
1624
Layer* cast = create_layer(LayerType::Cast);
1630
cast->load_param(pd);
1632
cast->create_pipeline(opt);
1634
cast->forward(src, dst, opt);
1636
cast->destroy_pipeline(opt);
1641
void quantize_to_int8(const Mat& src, Mat& dst, const Mat& scale_data, const Option& opt)
1643
Layer* quantize = create_layer(LayerType::Quantize);
1646
pd.set(0, scale_data.w);
1648
quantize->load_param(pd);
1651
weights[0] = scale_data;
1653
quantize->load_model(ModelBinFromMatArray(weights));
1655
quantize->create_pipeline(opt);
1657
quantize->forward(src, dst, opt);
1659
quantize->destroy_pipeline(opt);
1664
void dequantize_from_int32(const Mat& src, Mat& dst, const Mat& scale_data, const Mat& bias_data, const Option& opt)
1666
Layer* dequantize = create_layer(LayerType::Dequantize);
1669
pd.set(0, scale_data.w);
1670
pd.set(1, bias_data.w);
1672
dequantize->load_param(pd);
1675
weights[0] = scale_data;
1676
weights[1] = bias_data;
1678
dequantize->load_model(ModelBinFromMatArray(weights));
1680
dequantize->create_pipeline(opt);
1682
dequantize->forward(src, dst, opt);
1684
dequantize->destroy_pipeline(opt);
1689
void requantize_from_int32_to_int8(const Mat& src, Mat& dst, const Mat& scale_in_data, const Mat& scale_out_data, const Mat& bias_data, int activation_type, const Mat& activation_params, const Option& opt)
1691
Layer* requantize = create_layer(LayerType::Requantize);
1694
pd.set(0, scale_in_data.w);
1695
pd.set(1, scale_out_data.w);
1696
pd.set(2, bias_data.w);
1697
pd.set(3, activation_type);
1698
pd.set(4, activation_params);
1700
requantize->load_param(pd);
1703
weights[0] = scale_in_data;
1704
weights[1] = scale_out_data;
1705
weights[2] = bias_data;
1707
requantize->load_model(ModelBinFromMatArray(weights));
1709
requantize->create_pipeline(opt);
1711
requantize->forward(src, dst, opt);
1713
requantize->destroy_pipeline(opt);