ncnn

Форк
0
/
mat.cpp 
1718 строк · 38.8 Кб
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#include "mat.h"
16

17
#include "layer.h"
18
#include "layer_type.h"
19

20
#if NCNN_VULKAN
21
#if NCNN_PLATFORM_API
22
#if __ANDROID_API__ >= 26
23
#include <android/hardware_buffer.h>
24
#endif // __ANDROID_API__ >= 26
25
#endif // NCNN_PLATFORM_API
26
#endif // NCNN_VULKAN
27

28
namespace ncnn {
29

30
Mat Mat::clone(Allocator* _allocator) const
31
{
32
    if (empty())
33
        return Mat();
34

35
    Mat m;
36
    if (dims == 1)
37
        m.create(w, elemsize, elempack, _allocator);
38
    else if (dims == 2)
39
        m.create(w, h, elemsize, elempack, _allocator);
40
    else if (dims == 3)
41
        m.create(w, h, c, elemsize, elempack, _allocator);
42
    else if (dims == 4)
43
        m.create(w, h, d, c, elemsize, elempack, _allocator);
44

45
    if (m.empty())
46
        return m;
47

48
    if (total() > 0)
49
    {
50
        if (cstep == m.cstep)
51
            memcpy(m.data, data, total() * elemsize);
52
        else
53
        {
54
            // copy by channel for differnet cstep
55
            size_t size = (size_t)w * h * d * elemsize;
56
            for (int i = 0; i < c; i++)
57
            {
58
                memcpy(m.channel(i), channel(i), size);
59
            }
60
        }
61
    }
62

63
    return m;
64
}
65

66
void Mat::clone_from(const ncnn::Mat& mat, Allocator* allocator)
67
{
68
    *this = mat.clone(allocator);
69
}
70

71
Mat Mat::reshape(int _w, Allocator* _allocator) const
72
{
73
    if (w * h * d * c != _w)
74
        return Mat();
75

76
    if (dims >= 3 && cstep != (size_t)w * h * d)
77
    {
78
        Mat m;
79
        m.create(_w, elemsize, elempack, _allocator);
80
        if (m.empty())
81
            return m;
82

83
        // flatten
84
        for (int i = 0; i < c; i++)
85
        {
86
            const void* ptr = (unsigned char*)data + i * cstep * elemsize;
87
            void* mptr = (unsigned char*)m.data + (size_t)i * w * h * d * elemsize;
88
            memcpy(mptr, ptr, (size_t)w * h * d * elemsize);
89
        }
90

91
        return m;
92
    }
93

94
    Mat m = *this;
95

96
    m.dims = 1;
97
    m.w = _w;
98
    m.h = 1;
99
    m.d = 1;
100
    m.c = 1;
101

102
    m.cstep = _w;
103

104
    return m;
105
}
106

107
Mat Mat::reshape(int _w, int _h, Allocator* _allocator) const
108
{
109
    if (w * h * d * c != _w * _h)
110
        return Mat();
111

112
    if (dims >= 3 && cstep != (size_t)w * h * d)
113
    {
114
        Mat m;
115
        m.create(_w, _h, elemsize, elempack, _allocator);
116
        if (m.empty())
117
            return m;
118

119
        // flatten
120
        for (int i = 0; i < c; i++)
121
        {
122
            const void* ptr = (unsigned char*)data + i * cstep * elemsize;
123
            void* mptr = (unsigned char*)m.data + (size_t)i * w * h * d * elemsize;
124
            memcpy(mptr, ptr, (size_t)w * h * d * elemsize);
125
        }
126

127
        return m;
128
    }
129

130
    Mat m = *this;
131

132
    m.dims = 2;
133
    m.w = _w;
134
    m.h = _h;
135
    m.d = 1;
136
    m.c = 1;
137

138
    m.cstep = (size_t)_w * _h;
139

140
    return m;
141
}
142

143
Mat Mat::reshape(int _w, int _h, int _c, Allocator* _allocator) const
144
{
145
    if (w * h * d * c != _w * _h * _c)
146
        return Mat();
147

148
    if (dims < 3)
149
    {
150
        if ((size_t)_w * _h != alignSize((size_t)_w * _h * elemsize, 16) / elemsize)
151
        {
152
            Mat m;
153
            m.create(_w, _h, _c, elemsize, elempack, _allocator);
154
            if (m.empty())
155
                return m;
156

157
            // align channel
158
            for (int i = 0; i < _c; i++)
159
            {
160
                const void* ptr = (unsigned char*)data + (size_t)i * _w * _h * elemsize;
161
                void* mptr = (unsigned char*)m.data + i * m.cstep * m.elemsize;
162
                memcpy(mptr, ptr, (size_t)_w * _h * elemsize);
163
            }
164

165
            return m;
166
        }
167
    }
168
    else if (c != _c)
169
    {
170
        // flatten and then align
171
        Mat tmp = reshape(_w * _h * _c, _allocator);
172
        return tmp.reshape(_w, _h, _c, _allocator);
173
    }
174

175
    Mat m = *this;
176

177
    m.dims = 3;
178
    m.w = _w;
179
    m.h = _h;
180
    m.d = 1;
181
    m.c = _c;
182

183
    m.cstep = alignSize((size_t)_w * _h * elemsize, 16) / elemsize;
184

185
    return m;
186
}
187

188
Mat Mat::reshape(int _w, int _h, int _d, int _c, Allocator* _allocator) const
189
{
190
    if (w * h * d * c != _w * _h * _d * _c)
191
        return Mat();
192

193
    if (dims < 3)
194
    {
195
        if ((size_t)_w * _h * _d != alignSize((size_t)_w * _h * _d * elemsize, 16) / elemsize)
196
        {
197
            Mat m;
198
            m.create(_w, _h, _d, _c, elemsize, elempack, _allocator);
199
            if (m.empty())
200
                return m;
201

202
            // align channel
203
            for (int i = 0; i < _c; i++)
204
            {
205
                const void* ptr = (unsigned char*)data + (size_t)i * _w * _h * _d * elemsize;
206
                void* mptr = (unsigned char*)m.data + i * m.cstep * m.elemsize;
207
                memcpy(mptr, ptr, (size_t)_w * _h * _d * elemsize);
208
            }
209

210
            return m;
211
        }
212
    }
213
    else if (c != _c)
214
    {
215
        // flatten and then align
216
        Mat tmp = reshape(_w * _h * _d * _c, _allocator);
217
        return tmp.reshape(_w, _h, _d, _c, _allocator);
218
    }
219

220
    Mat m = *this;
221

222
    m.dims = 4;
223
    m.w = _w;
224
    m.h = _h;
225
    m.d = _d;
226
    m.c = _c;
227

228
    m.cstep = alignSize((size_t)_w * _h * _d * elemsize, 16) / elemsize;
229

230
    return m;
231
}
232

233
void Mat::create(int _w, size_t _elemsize, Allocator* _allocator)
234
{
235
    if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
236
        return;
237

238
    release();
239

240
    elemsize = _elemsize;
241
    elempack = 1;
242
    allocator = _allocator;
243

244
    dims = 1;
245
    w = _w;
246
    h = 1;
247
    d = 1;
248
    c = 1;
249

250
    cstep = w;
251

252
    size_t totalsize = alignSize(total() * elemsize, 4);
253
    if (totalsize > 0)
254
    {
255
        if (allocator)
256
            data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
257
        else
258
            data = fastMalloc(totalsize + (int)sizeof(*refcount));
259
    }
260

261
    if (data)
262
    {
263
        refcount = (int*)(((unsigned char*)data) + totalsize);
264
        *refcount = 1;
265
    }
266
}
267

268
void Mat::create(int _w, int _h, size_t _elemsize, Allocator* _allocator)
269
{
270
    if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
271
        return;
272

273
    release();
274

275
    elemsize = _elemsize;
276
    elempack = 1;
277
    allocator = _allocator;
278

279
    dims = 2;
280
    w = _w;
281
    h = _h;
282
    d = 1;
283
    c = 1;
284

285
    cstep = (size_t)w * h;
286

287
    size_t totalsize = alignSize(total() * elemsize, 4);
288
    if (totalsize > 0)
289
    {
290
        if (allocator)
291
            data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
292
        else
293
            data = fastMalloc(totalsize + (int)sizeof(*refcount));
294
    }
295

296
    if (data)
297
    {
298
        refcount = (int*)(((unsigned char*)data) + totalsize);
299
        *refcount = 1;
300
    }
301
}
302

303
void Mat::create(int _w, int _h, int _c, size_t _elemsize, Allocator* _allocator)
304
{
305
    if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
306
        return;
307

308
    release();
309

310
    elemsize = _elemsize;
311
    elempack = 1;
312
    allocator = _allocator;
313

314
    dims = 3;
315
    w = _w;
316
    h = _h;
317
    d = 1;
318
    c = _c;
319

320
    cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;
321

322
    size_t totalsize = alignSize(total() * elemsize, 4);
323
    if (totalsize > 0)
324
    {
325
        if (allocator)
326
            data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
327
        else
328
            data = fastMalloc(totalsize + (int)sizeof(*refcount));
329
    }
330

331
    if (data)
332
    {
333
        refcount = (int*)(((unsigned char*)data) + totalsize);
334
        *refcount = 1;
335
    }
336
}
337

338
void Mat::create(int _w, int _h, int _d, int _c, size_t _elemsize, Allocator* _allocator)
339
{
340
    if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
341
        return;
342

343
    release();
344

345
    elemsize = _elemsize;
346
    elempack = 1;
347
    allocator = _allocator;
348

349
    dims = 4;
350
    w = _w;
351
    h = _h;
352
    d = _d;
353
    c = _c;
354

355
    cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize;
356

357
    size_t totalsize = alignSize(total() * elemsize, 4);
358
    if (totalsize > 0)
359
    {
360
        if (allocator)
361
            data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
362
        else
363
            data = fastMalloc(totalsize + (int)sizeof(*refcount));
364
    }
365

366
    if (data)
367
    {
368
        refcount = (int*)(((unsigned char*)data) + totalsize);
369
        *refcount = 1;
370
    }
371
}
372

373
void Mat::create(int _w, size_t _elemsize, int _elempack, Allocator* _allocator)
374
{
375
    if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
376
        return;
377

378
    release();
379

380
    elemsize = _elemsize;
381
    elempack = _elempack;
382
    allocator = _allocator;
383

384
    dims = 1;
385
    w = _w;
386
    h = 1;
387
    d = 1;
388
    c = 1;
389

390
    cstep = w;
391

392
    size_t totalsize = alignSize(total() * elemsize, 4);
393
    if (totalsize > 0)
394
    {
395
        if (allocator)
396
            data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
397
        else
398
            data = fastMalloc(totalsize + (int)sizeof(*refcount));
399
    }
400

401
    if (data)
402
    {
403
        refcount = (int*)(((unsigned char*)data) + totalsize);
404
        *refcount = 1;
405
    }
406
}
407

408
void Mat::create(int _w, int _h, size_t _elemsize, int _elempack, Allocator* _allocator)
409
{
410
    if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
411
        return;
412

413
    release();
414

415
    elemsize = _elemsize;
416
    elempack = _elempack;
417
    allocator = _allocator;
418

419
    dims = 2;
420
    w = _w;
421
    h = _h;
422
    d = 1;
423
    c = 1;
424

425
    cstep = (size_t)w * h;
426

427
    size_t totalsize = alignSize(total() * elemsize, 4);
428
    if (totalsize > 0)
429
    {
430
        if (allocator)
431
            data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
432
        else
433
            data = fastMalloc(totalsize + (int)sizeof(*refcount));
434
    }
435

436
    if (data)
437
    {
438
        refcount = (int*)(((unsigned char*)data) + totalsize);
439
        *refcount = 1;
440
    }
441
}
442

443
void Mat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
444
{
445
    if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
446
        return;
447

448
    release();
449

450
    elemsize = _elemsize;
451
    elempack = _elempack;
452
    allocator = _allocator;
453

454
    dims = 3;
455
    w = _w;
456
    h = _h;
457
    d = 1;
458
    c = _c;
459

460
    cstep = alignSize((size_t)w * h * elemsize, 16) / elemsize;
461

462
    size_t totalsize = alignSize(total() * elemsize, 4);
463
    if (totalsize > 0)
464
    {
465
        if (allocator)
466
            data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
467
        else
468
            data = fastMalloc(totalsize + (int)sizeof(*refcount));
469
    }
470

471
    if (data)
472
    {
473
        refcount = (int*)(((unsigned char*)data) + totalsize);
474
        *refcount = 1;
475
    }
476
}
477

478
void Mat::create(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, Allocator* _allocator)
479
{
480
    if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
481
        return;
482

483
    release();
484

485
    elemsize = _elemsize;
486
    elempack = _elempack;
487
    allocator = _allocator;
488

489
    dims = 4;
490
    w = _w;
491
    h = _h;
492
    d = _d;
493
    c = _c;
494

495
    cstep = alignSize((size_t)w * h * d * elemsize, 16) / elemsize;
496

497
    size_t totalsize = alignSize(total() * elemsize, 4);
498
    if (totalsize > 0)
499
    {
500
        if (allocator)
501
            data = allocator->fastMalloc(totalsize + (int)sizeof(*refcount));
502
        else
503
            data = fastMalloc(totalsize + (int)sizeof(*refcount));
504
    }
505

506
    if (data)
507
    {
508
        refcount = (int*)(((unsigned char*)data) + totalsize);
509
        *refcount = 1;
510
    }
511
}
512

513
void Mat::create_like(const Mat& m, Allocator* _allocator)
514
{
515
    int _dims = m.dims;
516
    if (_dims == 1)
517
        create(m.w, m.elemsize, m.elempack, _allocator);
518
    if (_dims == 2)
519
        create(m.w, m.h, m.elemsize, m.elempack, _allocator);
520
    if (_dims == 3)
521
        create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
522
    if (_dims == 4)
523
        create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
524
}
525

526
#if NCNN_VULKAN
527
void Mat::create_like(const VkMat& m, Allocator* _allocator)
528
{
529
    int _dims = m.dims;
530
    if (_dims == 1)
531
        create(m.w, m.elemsize, m.elempack, _allocator);
532
    if (_dims == 2)
533
        create(m.w, m.h, m.elemsize, m.elempack, _allocator);
534
    if (_dims == 3)
535
        create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
536
    if (_dims == 4)
537
        create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
538
}
539

540
void Mat::create_like(const VkImageMat& im, Allocator* _allocator)
541
{
542
    int _dims = im.dims;
543
    if (_dims == 1)
544
        create(im.w, im.elemsize, im.elempack, _allocator);
545
    if (_dims == 2)
546
        create(im.w, im.h, im.elemsize, im.elempack, _allocator);
547
    if (_dims == 3)
548
        create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
549
    if (_dims == 4)
550
        create(im.w, im.h, im.d, im.c, im.elemsize, im.elempack, _allocator);
551
}
552
#endif // NCNN_VULKAN
553

554
#if NCNN_VULKAN
555
void VkMat::create(int _w, size_t _elemsize, VkAllocator* _allocator)
556
{
557
    if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
558
        return;
559

560
    release();
561

562
    elemsize = _elemsize;
563
    elempack = 1;
564
    allocator = _allocator;
565

566
    dims = 1;
567
    w = _w;
568
    h = 1;
569
    d = 1;
570
    c = 1;
571

572
    cstep = w;
573

574
    if (total() > 0)
575
    {
576
        size_t totalsize = alignSize(total() * elemsize, 4);
577

578
        data = allocator->fastMalloc(totalsize);
579
    }
580

581
    if (data)
582
    {
583
        refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
584
        *refcount = 1;
585
    }
586
}
587

588
void VkMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
589
{
590
    if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
591
        return;
592

593
    release();
594

595
    elemsize = _elemsize;
596
    elempack = 1;
597
    allocator = _allocator;
598

599
    dims = 2;
600
    w = _w;
601
    h = _h;
602
    d = 1;
603
    c = 1;
604

605
    cstep = w * h;
606

607
    if (total() > 0)
608
    {
609
        size_t totalsize = alignSize(total() * elemsize, 4);
610

611
        data = allocator->fastMalloc(totalsize);
612
    }
613

614
    if (data)
615
    {
616
        refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
617
        *refcount = 1;
618
    }
619
}
620

621
void VkMat::create(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
622
{
623
    if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
624
        return;
625

626
    release();
627

628
    elemsize = _elemsize;
629
    elempack = 1;
630
    allocator = _allocator;
631

632
    dims = 3;
633
    w = _w;
634
    h = _h;
635
    d = 1;
636
    c = _c;
637

638
    cstep = alignSize(w * h * elemsize, 16) / elemsize;
639

640
    if (total() > 0)
641
    {
642
        size_t totalsize = alignSize(total() * elemsize, 4);
643

644
        data = allocator->fastMalloc(totalsize);
645
    }
646

647
    if (data)
648
    {
649
        refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
650
        *refcount = 1;
651
    }
652
}
653

654
void VkMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator)
655
{
656
    if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
657
        return;
658

659
    release();
660

661
    elemsize = _elemsize;
662
    elempack = 1;
663
    allocator = _allocator;
664

665
    dims = 4;
666
    w = _w;
667
    h = _h;
668
    d = _d;
669
    c = _c;
670

671
    cstep = alignSize(w * h * d * elemsize, 16) / elemsize;
672

673
    if (total() > 0)
674
    {
675
        size_t totalsize = alignSize(total() * elemsize, 4);
676

677
        data = allocator->fastMalloc(totalsize);
678
    }
679

680
    if (data)
681
    {
682
        refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
683
        *refcount = 1;
684
    }
685
}
686

687
void VkMat::create(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
688
{
689
    if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
690
        return;
691

692
    release();
693

694
    elemsize = _elemsize;
695
    elempack = _elempack;
696
    allocator = _allocator;
697

698
    dims = 1;
699
    w = _w;
700
    h = 1;
701
    d = 1;
702
    c = 1;
703

704
    cstep = w;
705

706
    if (total() > 0)
707
    {
708
        size_t totalsize = alignSize(total() * elemsize, 4);
709

710
        data = allocator->fastMalloc(totalsize);
711
    }
712

713
    if (data)
714
    {
715
        refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
716
        *refcount = 1;
717
    }
718
}
719

720
void VkMat::create(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
721
{
722
    if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
723
        return;
724

725
    release();
726

727
    elemsize = _elemsize;
728
    elempack = _elempack;
729
    allocator = _allocator;
730

731
    dims = 2;
732
    w = _w;
733
    h = _h;
734
    d = 1;
735
    c = 1;
736

737
    cstep = w * h;
738

739
    if (total() > 0)
740
    {
741
        size_t totalsize = alignSize(total() * elemsize, 4);
742

743
        data = allocator->fastMalloc(totalsize);
744
    }
745

746
    if (data)
747
    {
748
        refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
749
        *refcount = 1;
750
    }
751
}
752

753
void VkMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
754
{
755
    if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
756
        return;
757

758
    release();
759

760
    elemsize = _elemsize;
761
    elempack = _elempack;
762
    allocator = _allocator;
763

764
    dims = 3;
765
    w = _w;
766
    h = _h;
767
    d = 1;
768
    c = _c;
769

770
    cstep = alignSize(w * h * elemsize, 16) / elemsize;
771

772
    if (total() > 0)
773
    {
774
        size_t totalsize = alignSize(total() * elemsize, 4);
775

776
        data = allocator->fastMalloc(totalsize);
777
    }
778

779
    if (data)
780
    {
781
        refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
782
        *refcount = 1;
783
    }
784
}
785

786
void VkMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
787
{
788
    if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
789
        return;
790

791
    release();
792

793
    elemsize = _elemsize;
794
    elempack = _elempack;
795
    allocator = _allocator;
796

797
    dims = 4;
798
    w = _w;
799
    h = _h;
800
    d = _d;
801
    c = _c;
802

803
    cstep = alignSize(w * h * d * elemsize, 16) / elemsize;
804

805
    if (total() > 0)
806
    {
807
        size_t totalsize = alignSize(total() * elemsize, 4);
808

809
        data = allocator->fastMalloc(totalsize);
810
    }
811

812
    if (data)
813
    {
814
        refcount = (int*)((unsigned char*)data + offsetof(VkBufferMemory, refcount));
815
        *refcount = 1;
816
    }
817
}
818

819
void VkMat::create_like(const Mat& m, VkAllocator* _allocator)
820
{
821
    int _dims = m.dims;
822
    if (_dims == 1)
823
        create(m.w, m.elemsize, m.elempack, _allocator);
824
    if (_dims == 2)
825
        create(m.w, m.h, m.elemsize, m.elempack, _allocator);
826
    if (_dims == 3)
827
        create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
828
    if (_dims == 4)
829
        create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
830
}
831

832
void VkMat::create_like(const VkMat& m, VkAllocator* _allocator)
833
{
834
    int _dims = m.dims;
835
    if (_dims == 1)
836
        create(m.w, m.elemsize, m.elempack, _allocator);
837
    if (_dims == 2)
838
        create(m.w, m.h, m.elemsize, m.elempack, _allocator);
839
    if (_dims == 3)
840
        create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
841
    if (_dims == 4)
842
        create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
843
}
844

845
void VkMat::create_like(const VkImageMat& im, VkAllocator* _allocator)
846
{
847
    int _dims = im.dims;
848
    if (_dims == 1)
849
        create(im.w, im.elemsize, im.elempack, _allocator);
850
    if (_dims == 2)
851
        create(im.w, im.h, im.elemsize, im.elempack, _allocator);
852
    if (_dims == 3)
853
        create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
854
    if (_dims == 4)
855
        create(im.w, im.h, im.d, im.c, im.elemsize, im.elempack, _allocator);
856
}
857

858
void VkImageMat::create(int _w, size_t _elemsize, VkAllocator* _allocator)
859
{
860
    if (dims == 1 && w == _w && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
861
        return;
862

863
    release();
864

865
    elemsize = _elemsize;
866
    elempack = 1;
867
    allocator = _allocator;
868

869
    dims = 1;
870
    w = _w;
871
    h = 1;
872
    d = 1;
873
    c = 1;
874

875
    if (total() > 0)
876
    {
877
        data = allocator->fastMalloc(w, h, c, elemsize, elempack);
878
    }
879

880
    if (data)
881
    {
882
        refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
883
        *refcount = 1;
884
    }
885
}
886

887
void VkImageMat::create(int _w, int _h, size_t _elemsize, VkAllocator* _allocator)
888
{
889
    if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
890
        return;
891

892
    release();
893

894
    elemsize = _elemsize;
895
    elempack = 1;
896
    allocator = _allocator;
897

898
    dims = 2;
899
    w = _w;
900
    h = _h;
901
    d = 1;
902
    c = 1;
903

904
    if (total() > 0)
905
    {
906
        data = allocator->fastMalloc(w, h, c, elemsize, elempack);
907
    }
908

909
    if (data)
910
    {
911
        refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
912
        *refcount = 1;
913
    }
914
}
915

916
void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, VkAllocator* _allocator)
917
{
918
    if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
919
        return;
920

921
    release();
922

923
    elemsize = _elemsize;
924
    elempack = 1;
925
    allocator = _allocator;
926

927
    dims = 3;
928
    w = _w;
929
    h = _h;
930
    d = 1;
931
    c = _c;
932

933
    if (total() > 0)
934
    {
935
        data = allocator->fastMalloc(w, h, c, elemsize, elempack);
936
    }
937

938
    if (data)
939
    {
940
        refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
941
        *refcount = 1;
942
    }
943
}
944

945
void VkImageMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, VkAllocator* _allocator)
946
{
947
    if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == 1 && allocator == _allocator)
948
        return;
949

950
    release();
951

952
    elemsize = _elemsize;
953
    elempack = 1;
954
    allocator = _allocator;
955

956
    dims = 4;
957
    w = _w;
958
    h = _h;
959
    d = _d;
960
    c = _c;
961

962
    if (total() > 0)
963
    {
964
        // underlying image is 3d
965
        data = allocator->fastMalloc(w, h * d, c, elemsize, elempack);
966
    }
967

968
    if (data)
969
    {
970
        refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
971
        *refcount = 1;
972
    }
973
}
974

975
void VkImageMat::create(int _w, size_t _elemsize, int _elempack, VkAllocator* _allocator)
976
{
977
    if (dims == 1 && w == _w && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
978
        return;
979

980
    release();
981

982
    elemsize = _elemsize;
983
    elempack = _elempack;
984
    allocator = _allocator;
985

986
    dims = 1;
987
    w = _w;
988
    h = 1;
989
    d = 1;
990
    c = 1;
991

992
    if (total() > 0)
993
    {
994
        data = allocator->fastMalloc(w, h, c, elemsize, elempack);
995
    }
996

997
    if (data)
998
    {
999
        refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
1000
        *refcount = 1;
1001
    }
1002
}
1003

1004
void VkImageMat::create(int _w, int _h, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1005
{
1006
    if (dims == 2 && w == _w && h == _h && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
1007
        return;
1008

1009
    release();
1010

1011
    elemsize = _elemsize;
1012
    elempack = _elempack;
1013
    allocator = _allocator;
1014

1015
    dims = 2;
1016
    w = _w;
1017
    h = _h;
1018
    d = 1;
1019
    c = 1;
1020

1021
    if (total() > 0)
1022
    {
1023
        data = allocator->fastMalloc(w, h, c, elemsize, elempack);
1024
    }
1025

1026
    if (data)
1027
    {
1028
        refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
1029
        *refcount = 1;
1030
    }
1031
}
1032

1033
void VkImageMat::create(int _w, int _h, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1034
{
1035
    if (dims == 3 && w == _w && h == _h && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
1036
        return;
1037

1038
    release();
1039

1040
    elemsize = _elemsize;
1041
    elempack = _elempack;
1042
    allocator = _allocator;
1043

1044
    dims = 3;
1045
    w = _w;
1046
    h = _h;
1047
    d = 1;
1048
    c = _c;
1049

1050
    if (total() > 0)
1051
    {
1052
        data = allocator->fastMalloc(w, h, c, elemsize, elempack);
1053
    }
1054

1055
    if (data)
1056
    {
1057
        refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
1058
        *refcount = 1;
1059
    }
1060
}
1061

1062
void VkImageMat::create(int _w, int _h, int _d, int _c, size_t _elemsize, int _elempack, VkAllocator* _allocator)
1063
{
1064
    if (dims == 4 && w == _w && h == _h && d == _d && c == _c && elemsize == _elemsize && elempack == _elempack && allocator == _allocator)
1065
        return;
1066

1067
    release();
1068

1069
    elemsize = _elemsize;
1070
    elempack = _elempack;
1071
    allocator = _allocator;
1072

1073
    dims = 4;
1074
    w = _w;
1075
    h = _h;
1076
    d = _d;
1077
    c = _c;
1078

1079
    if (total() > 0)
1080
    {
1081
        // underlying image is 3d
1082
        data = allocator->fastMalloc(w, h * d, c, elemsize, elempack);
1083
    }
1084

1085
    if (data)
1086
    {
1087
        refcount = (int*)((unsigned char*)data + offsetof(VkImageMemory, refcount));
1088
        *refcount = 1;
1089
    }
1090
}
1091

1092
void VkImageMat::create_like(const Mat& m, VkAllocator* _allocator)
1093
{
1094
    int _dims = m.dims;
1095
    if (_dims == 1)
1096
        create(m.w, m.elemsize, m.elempack, _allocator);
1097
    if (_dims == 2)
1098
        create(m.w, m.h, m.elemsize, m.elempack, _allocator);
1099
    if (_dims == 3)
1100
        create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
1101
    if (_dims == 4)
1102
        create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
1103
}
1104

1105
void VkImageMat::create_like(const VkMat& m, VkAllocator* _allocator)
1106
{
1107
    int _dims = m.dims;
1108
    if (_dims == 1)
1109
        create(m.w, m.elemsize, m.elempack, _allocator);
1110
    if (_dims == 2)
1111
        create(m.w, m.h, m.elemsize, m.elempack, _allocator);
1112
    if (_dims == 3)
1113
        create(m.w, m.h, m.c, m.elemsize, m.elempack, _allocator);
1114
    if (_dims == 4)
1115
        create(m.w, m.h, m.d, m.c, m.elemsize, m.elempack, _allocator);
1116
}
1117

1118
void VkImageMat::create_like(const VkImageMat& im, VkAllocator* _allocator)
1119
{
1120
    int _dims = im.dims;
1121
    if (_dims == 1)
1122
        create(im.w, im.elemsize, im.elempack, _allocator);
1123
    if (_dims == 2)
1124
        create(im.w, im.h, im.elemsize, im.elempack, _allocator);
1125
    if (_dims == 3)
1126
        create(im.w, im.h, im.c, im.elemsize, im.elempack, _allocator);
1127
    if (_dims == 4)
1128
        create(im.w, im.h, im.d, im.c, im.elemsize, im.elempack, _allocator);
1129
}
1130
#endif // NCNN_VULKAN
1131

1132
void Mat::substract_mean_normalize(const float* mean_vals, const float* norm_vals)
1133
{
1134
    Layer* op;
1135

1136
    if (mean_vals && !norm_vals)
1137
    {
1138
        // substract mean only
1139
        op = create_layer(LayerType::Bias);
1140

1141
        ParamDict pd;
1142
        pd.set(0, c);
1143

1144
        op->load_param(pd);
1145

1146
        Mat weights[1];
1147
        weights[0] = Mat(c);
1148
        for (int q = 0; q < c; q++)
1149
        {
1150
            weights[0][q] = -mean_vals[q];
1151
        }
1152

1153
        op->load_model(ModelBinFromMatArray(weights));
1154
    }
1155
    else if (!mean_vals && norm_vals)
1156
    {
1157
        // normalize only
1158
        op = create_layer(LayerType::Scale);
1159

1160
        ParamDict pd;
1161
        pd.set(0, c);
1162

1163
        op->load_param(pd);
1164

1165
        Mat weights[1];
1166
        weights[0] = Mat(c);
1167
        for (int q = 0; q < c; q++)
1168
        {
1169
            weights[0][q] = norm_vals[q];
1170
        }
1171

1172
        op->load_model(ModelBinFromMatArray(weights));
1173
    }
1174
    else if (mean_vals && norm_vals)
1175
    {
1176
        // substract mean and normalize
1177
        op = create_layer(LayerType::Scale);
1178

1179
        ParamDict pd;
1180
        pd.set(0, c);
1181
        pd.set(1, 1);
1182

1183
        op->load_param(pd);
1184

1185
        Mat weights[2];
1186
        weights[0] = Mat(c);
1187
        weights[1] = Mat(c);
1188
        for (int q = 0; q < c; q++)
1189
        {
1190
            weights[0][q] = norm_vals[q];
1191
            weights[1][q] = -mean_vals[q] * norm_vals[q];
1192
        }
1193

1194
        op->load_model(ModelBinFromMatArray(weights));
1195
    }
1196
    else // if (!mean_vals && !norm_vals)
1197
    {
1198
        return;
1199
    }
1200

1201
    Option opt;
1202
    opt.num_threads = 1; // TODO
1203

1204
    op->create_pipeline(opt);
1205

1206
    op->forward_inplace(*this, opt);
1207

1208
    op->destroy_pipeline(opt);
1209

1210
    delete op;
1211
}
1212

1213
Mat Mat::from_float16(const unsigned short* data, int size)
1214
{
1215
    Mat src(size, (void*)data, (size_t)2u);
1216
    Mat dst;
1217

1218
    Option opt;
1219
    opt.num_threads = 1; // TODO
1220
    cast_float16_to_float32(src, dst, opt);
1221

1222
    return dst;
1223
}
1224

1225
#if NCNN_VULKAN
1226
#if NCNN_PLATFORM_API
1227
#if __ANDROID_API__ >= 26
1228
VkImageMat VkImageMat::from_android_hardware_buffer(VkAndroidHardwareBufferImageAllocator* allocator)
1229
{
1230
    int width = allocator->width();
1231
    int height = allocator->height();
1232
    size_t elemsize = 4u; // elemsize for ahb is actually just a placeholder
1233

1234
    return VkImageMat(width, height, elemsize, allocator);
1235
}
1236
#endif // __ANDROID_API__ >= 26
1237
#endif // NCNN_PLATFORM_API
1238
#endif // NCNN_VULKAN
1239

1240
unsigned short float32_to_float16(float value)
1241
{
1242
    // 1 : 8 : 23
1243
    union
1244
    {
1245
        unsigned int u;
1246
        float f;
1247
    } tmp;
1248

1249
    tmp.f = value;
1250

1251
    // 1 : 8 : 23
1252
    unsigned short sign = (tmp.u & 0x80000000) >> 31;
1253
    unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
1254
    unsigned int significand = tmp.u & 0x7FFFFF;
1255

1256
    //     NCNN_LOGE("%d %d %d", sign, exponent, significand);
1257

1258
    // 1 : 5 : 10
1259
    unsigned short fp16;
1260
    if (exponent == 0)
1261
    {
1262
        // zero or denormal, always underflow
1263
        fp16 = (sign << 15) | (0x00 << 10) | 0x00;
1264
    }
1265
    else if (exponent == 0xFF)
1266
    {
1267
        // infinity or NaN
1268
        fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
1269
    }
1270
    else
1271
    {
1272
        // normalized
1273
        short newexp = exponent + (-127 + 15);
1274
        if (newexp >= 31)
1275
        {
1276
            // overflow, return infinity
1277
            fp16 = (sign << 15) | (0x1F << 10) | 0x00;
1278
        }
1279
        else if (newexp <= 0)
1280
        {
1281
            // Some normal fp32 cannot be expressed as normal fp16
1282
            fp16 = (sign << 15) | (0x00 << 10) | 0x00;
1283
        }
1284
        else
1285
        {
1286
            // normal fp16
1287
            fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
1288
        }
1289
    }
1290

1291
    return fp16;
1292
}
1293

1294
float float16_to_float32(unsigned short value)
1295
{
1296
    // 1 : 5 : 10
1297
    unsigned short sign = (value & 0x8000) >> 15;
1298
    unsigned short exponent = (value & 0x7c00) >> 10;
1299
    unsigned short significand = value & 0x03FF;
1300

1301
    //     NCNN_LOGE("%d %d %d", sign, exponent, significand);
1302

1303
    // 1 : 8 : 23
1304
    union
1305
    {
1306
        unsigned int u;
1307
        float f;
1308
    } tmp;
1309
    if (exponent == 0)
1310
    {
1311
        if (significand == 0)
1312
        {
1313
            // zero
1314
            tmp.u = (sign << 31);
1315
        }
1316
        else
1317
        {
1318
            // denormal
1319
            exponent = 0;
1320
            // find non-zero bit
1321
            while ((significand & 0x200) == 0)
1322
            {
1323
                significand <<= 1;
1324
                exponent++;
1325
            }
1326
            significand <<= 1;
1327
            significand &= 0x3FF;
1328
            tmp.u = (sign << 31) | ((-exponent + (-15 + 127)) << 23) | (significand << 13);
1329
        }
1330
    }
1331
    else if (exponent == 0x1F)
1332
    {
1333
        // infinity or NaN
1334
        tmp.u = (sign << 31) | (0xFF << 23) | (significand << 13);
1335
    }
1336
    else
1337
    {
1338
        // normalized
1339
        tmp.u = (sign << 31) | ((exponent + (-15 + 127)) << 23) | (significand << 13);
1340
    }
1341

1342
    return tmp.f;
1343
}
1344

1345
void copy_make_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int type, float v, const Option& opt)
1346
{
1347
    Layer* padding = create_layer(LayerType::Padding);
1348

1349
    ParamDict pd;
1350
    pd.set(0, top);
1351
    pd.set(1, bottom);
1352
    pd.set(2, left);
1353
    pd.set(3, right);
1354
    pd.set(4, type);
1355
    pd.set(5, v);
1356

1357
    padding->load_param(pd);
1358

1359
    padding->create_pipeline(opt);
1360

1361
    padding->forward(src, dst, opt);
1362

1363
    padding->destroy_pipeline(opt);
1364

1365
    delete padding;
1366
}
1367

1368
void copy_make_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, int type, float v, const Option& opt)
1369
{
1370
    Layer* padding = create_layer(LayerType::Padding);
1371

1372
    ParamDict pd;
1373
    pd.set(0, top);
1374
    pd.set(1, bottom);
1375
    pd.set(2, left);
1376
    pd.set(3, right);
1377
    pd.set(4, type);
1378
    pd.set(5, v);
1379
    pd.set(7, front);
1380
    pd.set(8, behind);
1381

1382
    padding->load_param(pd);
1383

1384
    padding->create_pipeline(opt);
1385

1386
    padding->forward(src, dst, opt);
1387

1388
    padding->destroy_pipeline(opt);
1389

1390
    delete padding;
1391
}
1392

1393
void copy_cut_border(const Mat& src, Mat& dst, int top, int bottom, int left, int right, const Option& opt)
1394
{
1395
    if (left + right > src.w || top + bottom > src.h)
1396
    {
1397
        NCNN_LOGE("copy_cut_border parameter error, top: %d, bottom: %d, left: %d, right: %d, src.w: %d, src.h: %d", top, bottom, left, right, src.w, src.h);
1398
        return;
1399
    }
1400
    Layer* crop = create_layer(LayerType::Crop);
1401

1402
    ParamDict pd;
1403
    pd.set(0, left);
1404
    pd.set(1, top);
1405
    pd.set(2, 0);
1406
    pd.set(3, src.w - left - right);
1407
    pd.set(4, src.h - top - bottom);
1408
    pd.set(5, -233);
1409

1410
    crop->load_param(pd);
1411

1412
    crop->create_pipeline(opt);
1413

1414
    crop->forward(src, dst, opt);
1415

1416
    crop->destroy_pipeline(opt);
1417

1418
    delete crop;
1419
}
1420

1421
void copy_cut_border_3d(const Mat& src, Mat& dst, int top, int bottom, int left, int right, int front, int behind, const Option& opt)
1422
{
1423
    if (left + right > src.w || top + bottom > src.h || front + behind > src.d)
1424
    {
1425
        NCNN_LOGE("copy_cut_border_3d parameter error, top: %d, bottom: %d, left: %d, right: %d, front: %d, behind: %d, src.w: %d, src.h: %d, src.d: %d", top, bottom, left, right, front, behind, src.w, src.h, src.d);
1426
        return;
1427
    }
1428
    Layer* crop = create_layer(LayerType::Crop);
1429

1430
    ParamDict pd;
1431
    pd.set(0, left);
1432
    pd.set(1, top);
1433
    pd.set(13, front);
1434
    pd.set(2, 0);
1435
    pd.set(3, src.w - left - right);
1436
    pd.set(4, src.h - top - bottom);
1437
    pd.set(14, src.d - front - behind);
1438
    pd.set(5, -233);
1439

1440
    crop->load_param(pd);
1441

1442
    crop->create_pipeline(opt);
1443

1444
    crop->forward(src, dst, opt);
1445

1446
    crop->destroy_pipeline(opt);
1447

1448
    delete crop;
1449
}
1450

1451
void resize_nearest(const Mat& src, Mat& dst, int w, int h, const Option& opt)
1452
{
1453
    Layer* interp = create_layer(LayerType::Interp);
1454

1455
    ParamDict pd;
1456
    pd.set(0, 1);
1457
    pd.set(3, h);
1458
    pd.set(4, w);
1459

1460
    interp->load_param(pd);
1461

1462
    interp->create_pipeline(opt);
1463

1464
    interp->forward(src, dst, opt);
1465

1466
    interp->destroy_pipeline(opt);
1467

1468
    delete interp;
1469
}
1470

1471
void resize_bilinear(const Mat& src, Mat& dst, int w, int h, const Option& opt)
1472
{
1473
    Layer* interp = create_layer(LayerType::Interp);
1474

1475
    ParamDict pd;
1476
    pd.set(0, 2);
1477
    pd.set(3, h);
1478
    pd.set(4, w);
1479

1480
    interp->load_param(pd);
1481

1482
    interp->create_pipeline(opt);
1483

1484
    interp->forward(src, dst, opt);
1485

1486
    interp->destroy_pipeline(opt);
1487

1488
    delete interp;
1489
}
1490

1491
void resize_bicubic(const Mat& src, Mat& dst, int w, int h, const Option& opt)
1492
{
1493
    Layer* interp = create_layer(LayerType::Interp);
1494

1495
    ParamDict pd;
1496
    pd.set(0, 3);
1497
    pd.set(3, h);
1498
    pd.set(4, w);
1499

1500
    interp->load_param(pd);
1501

1502
    interp->create_pipeline(opt);
1503

1504
    interp->forward(src, dst, opt);
1505

1506
    interp->destroy_pipeline(opt);
1507

1508
    delete interp;
1509
}
1510

1511
void convert_packing(const Mat& src, Mat& dst, int _elempack, const Option& opt)
1512
{
1513
    Layer* packing = create_layer(LayerType::Packing);
1514

1515
    ParamDict pd;
1516
    pd.set(0, _elempack);
1517

1518
    packing->load_param(pd);
1519

1520
    packing->create_pipeline(opt);
1521

1522
    packing->forward(src, dst, opt);
1523

1524
    packing->destroy_pipeline(opt);
1525

1526
    delete packing;
1527
}
1528

1529
void flatten(const Mat& src, Mat& dst, const Option& opt)
1530
{
1531
    Layer* flatten = create_layer(LayerType::Flatten);
1532

1533
    ParamDict pd;
1534

1535
    flatten->load_param(pd);
1536

1537
    flatten->create_pipeline(opt);
1538

1539
    flatten->forward(src, dst, opt);
1540

1541
    flatten->destroy_pipeline(opt);
1542

1543
    delete flatten;
1544
}
1545

1546
void cast_float32_to_float16(const Mat& src, Mat& dst, const Option& opt)
1547
{
1548
    Layer* cast = create_layer(LayerType::Cast);
1549

1550
    ParamDict pd;
1551
    pd.set(0, 1);
1552
    pd.set(1, 2);
1553

1554
    cast->load_param(pd);
1555

1556
    cast->create_pipeline(opt);
1557

1558
    cast->forward(src, dst, opt);
1559

1560
    cast->destroy_pipeline(opt);
1561

1562
    delete cast;
1563
}
1564

1565
void cast_float16_to_float32(const Mat& src, Mat& dst, const Option& opt)
1566
{
1567
    Layer* cast = create_layer(LayerType::Cast);
1568

1569
    ParamDict pd;
1570
    pd.set(0, 2);
1571
    pd.set(1, 1);
1572

1573
    cast->load_param(pd);
1574

1575
    cast->create_pipeline(opt);
1576

1577
    cast->forward(src, dst, opt);
1578

1579
    cast->destroy_pipeline(opt);
1580

1581
    delete cast;
1582
}
1583

1584
void cast_int8_to_float32(const Mat& src, Mat& dst, const Option& opt)
1585
{
1586
    Layer* cast = create_layer(LayerType::Cast);
1587

1588
    ParamDict pd;
1589
    pd.set(0, 3);
1590
    pd.set(1, 1);
1591

1592
    cast->load_param(pd);
1593

1594
    cast->create_pipeline(opt);
1595

1596
    cast->forward(src, dst, opt);
1597

1598
    cast->destroy_pipeline(opt);
1599

1600
    delete cast;
1601
}
1602

1603
void cast_float32_to_bfloat16(const Mat& src, Mat& dst, const Option& opt)
1604
{
1605
    Layer* cast = create_layer(LayerType::Cast);
1606

1607
    ParamDict pd;
1608
    pd.set(0, 1);
1609
    pd.set(1, 4);
1610

1611
    cast->load_param(pd);
1612

1613
    cast->create_pipeline(opt);
1614

1615
    cast->forward(src, dst, opt);
1616

1617
    cast->destroy_pipeline(opt);
1618

1619
    delete cast;
1620
}
1621

1622
void cast_bfloat16_to_float32(const Mat& src, Mat& dst, const Option& opt)
1623
{
1624
    Layer* cast = create_layer(LayerType::Cast);
1625

1626
    ParamDict pd;
1627
    pd.set(0, 4);
1628
    pd.set(1, 1);
1629

1630
    cast->load_param(pd);
1631

1632
    cast->create_pipeline(opt);
1633

1634
    cast->forward(src, dst, opt);
1635

1636
    cast->destroy_pipeline(opt);
1637

1638
    delete cast;
1639
}
1640

1641
void quantize_to_int8(const Mat& src, Mat& dst, const Mat& scale_data, const Option& opt)
1642
{
1643
    Layer* quantize = create_layer(LayerType::Quantize);
1644

1645
    ParamDict pd;
1646
    pd.set(0, scale_data.w);
1647

1648
    quantize->load_param(pd);
1649

1650
    Mat weights[1];
1651
    weights[0] = scale_data;
1652

1653
    quantize->load_model(ModelBinFromMatArray(weights));
1654

1655
    quantize->create_pipeline(opt);
1656

1657
    quantize->forward(src, dst, opt);
1658

1659
    quantize->destroy_pipeline(opt);
1660

1661
    delete quantize;
1662
}
1663

1664
void dequantize_from_int32(const Mat& src, Mat& dst, const Mat& scale_data, const Mat& bias_data, const Option& opt)
1665
{
1666
    Layer* dequantize = create_layer(LayerType::Dequantize);
1667

1668
    ParamDict pd;
1669
    pd.set(0, scale_data.w);
1670
    pd.set(1, bias_data.w);
1671

1672
    dequantize->load_param(pd);
1673

1674
    Mat weights[2];
1675
    weights[0] = scale_data;
1676
    weights[1] = bias_data;
1677

1678
    dequantize->load_model(ModelBinFromMatArray(weights));
1679

1680
    dequantize->create_pipeline(opt);
1681

1682
    dequantize->forward(src, dst, opt);
1683

1684
    dequantize->destroy_pipeline(opt);
1685

1686
    delete dequantize;
1687
}
1688

1689
void requantize_from_int32_to_int8(const Mat& src, Mat& dst, const Mat& scale_in_data, const Mat& scale_out_data, const Mat& bias_data, int activation_type, const Mat& activation_params, const Option& opt)
1690
{
1691
    Layer* requantize = create_layer(LayerType::Requantize);
1692

1693
    ParamDict pd;
1694
    pd.set(0, scale_in_data.w);
1695
    pd.set(1, scale_out_data.w);
1696
    pd.set(2, bias_data.w);
1697
    pd.set(3, activation_type);
1698
    pd.set(4, activation_params);
1699

1700
    requantize->load_param(pd);
1701

1702
    Mat weights[3];
1703
    weights[0] = scale_in_data;
1704
    weights[1] = scale_out_data;
1705
    weights[2] = bias_data;
1706

1707
    requantize->load_model(ModelBinFromMatArray(weights));
1708

1709
    requantize->create_pipeline(opt);
1710

1711
    requantize->forward(src, dst, opt);
1712

1713
    requantize->destroy_pipeline(opt);
1714

1715
    delete requantize;
1716
}
1717

1718
} // namespace ncnn
1719

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.