1
// Tencent is pleased to support the open source community by making ncnn available.
3
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
8
// https://opensource.org/licenses/BSD-3-Clause
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
17
static int packing_cpu_naive(const ncnn::Mat& a, ncnn::Mat& b, int out_elempack)
20
pd.set(0, out_elempack);
22
std::vector<ncnn::Mat> weights(0);
27
ncnn::Layer* op = ncnn::create_layer_naive("Packing");
31
ncnn::ModelBinFromMatArray mb(weights.data());
35
op->create_pipeline(opt);
37
op->forward(a, b, opt);
39
op->destroy_pipeline(opt);
46
static int test_packing_cpu_fp32(const ncnn::Mat& a, int in_elempack, int out_elempack)
49
pd.set(0, out_elempack);
51
std::vector<ncnn::Mat> weights(0);
55
opt.use_vulkan_compute = false;
56
opt.use_int8_inference = false;
57
opt.use_fp16_storage = false;
58
opt.use_fp16_arithmetic = false;
59
opt.use_packing_layout = false;
61
ncnn::Layer* op = ncnn::create_layer_cpu("Packing");
65
ncnn::ModelBinFromMatArray mb(weights.data());
69
op->create_pipeline(opt);
72
ncnn::convert_packing(a, ap, in_elempack, opt);
75
packing_cpu_naive(ap, b, out_elempack);
78
op->forward(ap, c, opt);
80
op->destroy_pipeline(opt);
84
if (CompareMat(b, c, 0.001) != 0)
86
fprintf(stderr, "test_packing_cpu_fp32 failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
93
static int test_packing_cpu_fp16(const ncnn::Mat& a, int in_elempack, int out_elempack)
96
pd.set(0, out_elempack);
98
std::vector<ncnn::Mat> weights(0);
102
opt.use_vulkan_compute = false;
103
opt.use_int8_inference = false;
104
opt.use_fp16_storage = true;
105
opt.use_fp16_arithmetic = true;
106
opt.use_packing_layout = false;
108
ncnn::Layer* op = ncnn::create_layer_cpu("Packing");
110
if (!op->support_fp16_storage)
118
ncnn::ModelBinFromMatArray mb(weights.data());
122
op->create_pipeline(opt);
125
ncnn::cast_float32_to_float16(a, a16, opt);
128
ncnn::convert_packing(a16, ap, in_elempack, opt);
131
packing_cpu_naive(ap, b, out_elempack);
134
op->forward(ap, c, opt);
136
op->destroy_pipeline(opt);
141
ncnn::cast_float16_to_float32(c, c32, opt);
143
if (CompareMat(b, c32, 0.001) != 0)
145
fprintf(stderr, "test_packing_cpu_fp16 failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
152
static int test_packing_cpu_int8(const ncnn::Mat& a, int in_elempack, int out_elempack)
155
pd.set(0, out_elempack);
157
std::vector<ncnn::Mat> weights(0);
161
opt.use_vulkan_compute = false;
162
opt.use_int8_inference = false;
163
opt.use_fp16_storage = false;
164
opt.use_fp16_arithmetic = false;
165
opt.use_packing_layout = false;
167
ncnn::Layer* op = ncnn::create_layer_cpu("Packing");
171
ncnn::ModelBinFromMatArray mb(weights.data());
175
op->create_pipeline(opt);
178
if (a.dims == 1) a8 = RandomS8Mat(a.w);
179
if (a.dims == 2) a8 = RandomS8Mat(a.w, a.h);
180
if (a.dims == 3) a8 = RandomS8Mat(a.w, a.h, a.c);
181
if (a.dims == 4) a8 = RandomS8Mat(a.w, a.h, a.d, a.c);
184
ncnn::convert_packing(a8, ap, in_elempack, opt);
187
packing_cpu_naive(ap, b, out_elempack);
190
op->forward(ap, c, opt);
192
op->destroy_pipeline(opt);
197
ncnn::cast_int8_to_float32(b, b32, opt);
200
ncnn::cast_int8_to_float32(c, c32, opt);
202
if (CompareMat(b32, c32, 0.001) != 0)
204
fprintf(stderr, "test_packing_cpu_int8 failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
211
static int test_packing_cpu(const ncnn::Mat& a, int in_elempack, int out_elempack)
214
|| test_packing_cpu_fp32(a, in_elempack, out_elempack)
215
|| test_packing_cpu_fp16(a, in_elempack, out_elempack)
216
|| test_packing_cpu_int8(a, in_elempack, out_elempack);
221
static int test_packing_gpu_buffer(const ncnn::Mat& a, int in_elempack, int out_elempack)
224
pd.set(0, out_elempack);
225
pd.set(2, 1); // cast_type_from
226
pd.set(3, 1); // cast_type_to
227
pd.set(4, 0); // storage_type_from
228
pd.set(5, 0); // storage_type_to
230
std::vector<ncnn::Mat> weights(0);
234
opt.use_vulkan_compute = true;
235
opt.use_int8_inference = false;
236
opt.use_fp16_packed = false;
237
opt.use_fp16_storage = false;
238
opt.use_fp16_arithmetic = false;
239
opt.use_int8_storage = false;
240
opt.use_int8_arithmetic = false;
241
opt.use_packing_layout = true;
242
opt.use_shader_pack8 = true;
243
opt.use_image_storage = false;
245
ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
247
ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
248
ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
250
opt.blob_vkallocator = blob_vkallocator;
251
opt.workspace_vkallocator = blob_vkallocator;
252
opt.staging_vkallocator = staging_vkallocator;
254
if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
255
if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
257
ncnn::Layer* op = ncnn::create_layer_vulkan("Packing");
263
ncnn::ModelBinFromMatArray mb(weights.data());
267
op->create_pipeline(opt);
270
ncnn::convert_packing(a, ap, in_elempack, opt);
273
packing_cpu_naive(ap, b, out_elempack);
278
ncnn::VkCompute cmd(vkdev);
282
cmd.record_clone(ap, a_gpu, opt);
285
op->forward(a_gpu, d_gpu, cmd, opt);
288
cmd.record_clone(d_gpu, d, opt);
290
cmd.submit_and_wait();
292
op->destroy_pipeline(opt);
296
vkdev->reclaim_blob_allocator(blob_vkallocator);
297
vkdev->reclaim_staging_allocator(staging_vkallocator);
299
if (CompareMat(b, d, 0.001) != 0)
301
fprintf(stderr, "test_packing_gpu_buffer failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
308
static int test_packing_gpu_image(const ncnn::Mat& a, int in_elempack, int out_elempack)
311
pd.set(0, out_elempack);
312
pd.set(2, 1); // cast_type_from
313
pd.set(3, 1); // cast_type_to
314
pd.set(4, 1); // storage_type_from
315
pd.set(5, 1); // storage_type_to
317
std::vector<ncnn::Mat> weights(0);
321
opt.use_vulkan_compute = true;
322
opt.use_int8_inference = false;
323
opt.use_fp16_packed = false;
324
opt.use_fp16_storage = false;
325
opt.use_fp16_arithmetic = false;
326
opt.use_int8_storage = false;
327
opt.use_int8_arithmetic = false;
328
opt.use_packing_layout = true;
329
opt.use_shader_pack8 = true;
330
opt.use_image_storage = true;
332
ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
334
ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
335
ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
337
opt.blob_vkallocator = blob_vkallocator;
338
opt.workspace_vkallocator = blob_vkallocator;
339
opt.staging_vkallocator = staging_vkallocator;
341
if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
342
if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
344
ncnn::Layer* op = ncnn::create_layer_vulkan("Packing");
350
ncnn::ModelBinFromMatArray mb(weights.data());
354
op->create_pipeline(opt);
357
ncnn::convert_packing(a, ap, in_elempack, opt);
360
packing_cpu_naive(ap, b, out_elempack);
365
ncnn::VkCompute cmd(vkdev);
368
ncnn::VkImageMat a_gpu;
369
cmd.record_clone(ap, a_gpu, opt);
371
ncnn::VkImageMat d_gpu;
372
op->forward(a_gpu, d_gpu, cmd, opt);
375
cmd.record_clone(d_gpu, d, opt);
377
cmd.submit_and_wait();
379
op->destroy_pipeline(opt);
383
vkdev->reclaim_blob_allocator(blob_vkallocator);
384
vkdev->reclaim_staging_allocator(staging_vkallocator);
386
if (CompareMat(b, d, 0.001) != 0)
388
fprintf(stderr, "test_packing_gpu_image failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
395
static int test_packing_gpu_buffer2image(const ncnn::Mat& a, int in_elempack, int out_elempack)
399
opt.use_vulkan_compute = true;
400
opt.use_int8_inference = false;
401
opt.use_fp16_packed = false;
402
opt.use_fp16_storage = false;
403
opt.use_fp16_arithmetic = false;
404
opt.use_int8_storage = false;
405
opt.use_int8_arithmetic = false;
406
opt.use_packing_layout = true;
407
opt.use_shader_pack8 = true;
408
opt.use_image_storage = true;
410
ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
412
ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
413
ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
415
opt.blob_vkallocator = blob_vkallocator;
416
opt.workspace_vkallocator = blob_vkallocator;
417
opt.staging_vkallocator = staging_vkallocator;
419
if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
420
if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
423
ncnn::convert_packing(a, ap, in_elempack, opt);
426
packing_cpu_naive(ap, b, out_elempack);
431
ncnn::VkCompute cmd(vkdev);
435
cmd.record_clone(ap, a_gpu, opt);
437
ncnn::VkImageMat d_gpu;
438
vkdev->convert_packing(a_gpu, d_gpu, out_elempack, cmd, opt);
441
cmd.record_clone(d_gpu, d, opt);
443
cmd.submit_and_wait();
445
vkdev->reclaim_blob_allocator(blob_vkallocator);
446
vkdev->reclaim_staging_allocator(staging_vkallocator);
448
if (CompareMat(b, d, 0.001) != 0)
450
fprintf(stderr, "test_packing_gpu_buffer2image failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
457
static int test_packing_gpu_image2buffer(const ncnn::Mat& a, int in_elempack, int out_elempack)
461
opt.use_vulkan_compute = true;
462
opt.use_int8_inference = false;
463
opt.use_fp16_packed = false;
464
opt.use_fp16_storage = false;
465
opt.use_fp16_arithmetic = false;
466
opt.use_int8_storage = false;
467
opt.use_int8_arithmetic = false;
468
opt.use_packing_layout = true;
469
opt.use_shader_pack8 = true;
470
opt.use_image_storage = true;
472
ncnn::VulkanDevice* vkdev = ncnn::get_gpu_device();
474
ncnn::VkAllocator* blob_vkallocator = vkdev->acquire_blob_allocator();
475
ncnn::VkAllocator* staging_vkallocator = vkdev->acquire_staging_allocator();
477
opt.blob_vkallocator = blob_vkallocator;
478
opt.workspace_vkallocator = blob_vkallocator;
479
opt.staging_vkallocator = staging_vkallocator;
481
if (!vkdev->info.support_fp16_packed()) opt.use_fp16_packed = false;
482
if (!vkdev->info.support_fp16_storage()) opt.use_fp16_storage = false;
485
ncnn::convert_packing(a, ap, in_elempack, opt);
488
packing_cpu_naive(ap, b, out_elempack);
493
ncnn::VkCompute cmd(vkdev);
496
ncnn::VkImageMat a_gpu;
497
cmd.record_clone(ap, a_gpu, opt);
500
vkdev->convert_packing(a_gpu, d_gpu, out_elempack, cmd, opt);
503
cmd.record_clone(d_gpu, d, opt);
505
cmd.submit_and_wait();
507
vkdev->reclaim_blob_allocator(blob_vkallocator);
508
vkdev->reclaim_staging_allocator(staging_vkallocator);
510
if (CompareMat(b, d, 0.001) != 0)
512
fprintf(stderr, "test_packing_gpu_image2buffer failed a.dims=%d a=(%d %d %d %d) in_elempack=%d out_elempack=%d\n", a.dims, a.w, a.h, a.d, a.c, in_elempack, out_elempack);
520
static int test_packing_cpu(const ncnn::Mat& a)
523
|| test_packing_cpu(a, 1, 1)
524
|| test_packing_cpu(a, 4, 4)
525
|| test_packing_cpu(a, 4, 8)
526
|| test_packing_cpu(a, 1, 4)
527
|| test_packing_cpu(a, 4, 1)
528
|| test_packing_cpu(a, 1, 8)
529
|| test_packing_cpu(a, 8, 1)
530
|| test_packing_cpu(a, 4, 8)
531
|| test_packing_cpu(a, 8, 4)
532
|| test_packing_cpu(a, 1, 16)
533
|| test_packing_cpu(a, 16, 1)
534
|| test_packing_cpu(a, 4, 16)
535
|| test_packing_cpu(a, 16, 4)
536
|| test_packing_cpu(a, 8, 16)
537
|| test_packing_cpu(a, 16, 8);
541
static int test_packing_gpu(const ncnn::Mat& a)
544
|| test_packing_gpu_buffer(a, 1, 1)
545
|| test_packing_gpu_buffer(a, 4, 4)
546
|| test_packing_gpu_buffer(a, 8, 8)
547
|| test_packing_gpu_buffer(a, 1, 4)
548
|| test_packing_gpu_buffer(a, 4, 1)
549
|| test_packing_gpu_buffer(a, 1, 8)
550
|| test_packing_gpu_buffer(a, 8, 1)
551
|| test_packing_gpu_buffer(a, 4, 8)
552
|| test_packing_gpu_buffer(a, 8, 4)
553
|| test_packing_gpu_image(a, 1, 1)
554
|| test_packing_gpu_image(a, 4, 4)
555
|| test_packing_gpu_image(a, 8, 8)
556
|| test_packing_gpu_image(a, 1, 4)
557
|| test_packing_gpu_image(a, 4, 1)
558
|| test_packing_gpu_image(a, 1, 8)
559
|| test_packing_gpu_image(a, 8, 1)
560
|| test_packing_gpu_image(a, 4, 8)
561
|| test_packing_gpu_image(a, 8, 4)
562
|| test_packing_gpu_buffer2image(a, 1, 1)
563
|| test_packing_gpu_buffer2image(a, 4, 4)
564
|| test_packing_gpu_buffer2image(a, 8, 8)
565
|| test_packing_gpu_buffer2image(a, 1, 4)
566
|| test_packing_gpu_buffer2image(a, 4, 1)
567
|| test_packing_gpu_buffer2image(a, 1, 8)
568
|| test_packing_gpu_buffer2image(a, 8, 1)
569
|| test_packing_gpu_buffer2image(a, 4, 8)
570
|| test_packing_gpu_buffer2image(a, 8, 4)
571
|| test_packing_gpu_image2buffer(a, 1, 1)
572
|| test_packing_gpu_image2buffer(a, 4, 4)
573
|| test_packing_gpu_image2buffer(a, 8, 8)
574
|| test_packing_gpu_image2buffer(a, 1, 4)
575
|| test_packing_gpu_image2buffer(a, 4, 1)
576
|| test_packing_gpu_image2buffer(a, 1, 8)
577
|| test_packing_gpu_image2buffer(a, 8, 1)
578
|| test_packing_gpu_image2buffer(a, 4, 8)
579
|| test_packing_gpu_image2buffer(a, 8, 4);
583
static int test_packing_0()
585
ncnn::Mat a = RandomMat(9, 7, 10, 16);
586
ncnn::Mat b = RandomMat(9, 7, 10, 3);
588
|| test_packing_cpu(a)
589
|| test_packing_cpu(b)
591
|| test_packing_gpu(a)
596
static int test_packing_1()
598
ncnn::Mat a = RandomMat(9, 10, 16);
599
ncnn::Mat b = RandomMat(9, 10, 3);
601
|| test_packing_cpu(a)
602
|| test_packing_cpu(b)
604
|| test_packing_gpu(a)
609
static int test_packing_2()
611
ncnn::Mat a = RandomMat(19, 16);
613
|| test_packing_cpu(a)
615
|| test_packing_gpu(a)
620
static int test_packing_3()
622
ncnn::Mat a = RandomMat(80);
624
|| test_packing_cpu(a)
626
|| test_packing_gpu(a)