1
// Tencent is pleased to support the open source community by making ncnn available.
3
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
8
// https://opensource.org/licenses/BSD-3-Clause
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
15
#include "padding_vulkan.h"
17
#include "layer_shader_type.h"
21
Padding_vulkan::Padding_vulkan()
23
support_vulkan = true;
24
support_image_storage = true;
27
pipeline_padding_pack4 = 0;
28
pipeline_padding_pack1to4 = 0;
29
pipeline_padding_pack4to1 = 0;
30
pipeline_padding_pack8 = 0;
31
pipeline_padding_pack1to8 = 0;
32
pipeline_padding_pack4to8 = 0;
33
pipeline_padding_pack8to4 = 0;
34
pipeline_padding_pack8to1 = 0;
36
pipeline_padding_3d = 0;
37
pipeline_padding_3d_pack4 = 0;
38
pipeline_padding_3d_pack8 = 0;
41
int Padding_vulkan::create_pipeline(const Option& _opt)
44
const Mat& shape = bottom_shapes.empty() ? Mat() : bottom_shapes[0];
45
const Mat& out_shape = top_shapes.empty() ? Mat() : top_shapes[0];
48
if (shape.dims == 1) elempack = opt.use_shader_pack8 && shape.w % 8 == 0 ? 8 : shape.w % 4 == 0 ? 4 : 1;
49
if (shape.dims == 2) elempack = opt.use_shader_pack8 && shape.h % 8 == 0 ? 8 : shape.h % 4 == 0 ? 4 : 1;
50
if (shape.dims == 3 || shape.dims == 4) elempack = opt.use_shader_pack8 && shape.c % 8 == 0 ? 8 : shape.c % 4 == 0 ? 4 : 1;
53
if (out_shape.dims == 1) out_elempack = opt.use_shader_pack8 && out_shape.w % 8 == 0 ? 8 : out_shape.w % 4 == 0 ? 4 : 1;
54
if (out_shape.dims == 2) out_elempack = opt.use_shader_pack8 && out_shape.h % 8 == 0 ? 8 : out_shape.h % 4 == 0 ? 4 : 1;
55
if (out_shape.dims == 3 || out_shape.dims == 4) out_elempack = opt.use_shader_pack8 && out_shape.c % 8 == 0 ? 8 : out_shape.c % 4 == 0 ? 4 : 1;
57
int offset_elempack = 1;
61
offset_elempack = elempack;
63
offset_elempack = opt.use_shader_pack8 && left % 8 == 0 ? 8 : left % 4 == 0 ? 4 : 1;
65
else if (shape.dims == 2)
68
offset_elempack = elempack;
70
offset_elempack = opt.use_shader_pack8 && top % 8 == 0 ? 8 : top % 4 == 0 ? 4 : 1;
72
else if (shape.dims == 3)
75
offset_elempack = elempack;
77
offset_elempack = opt.use_shader_pack8 && front % 8 == 0 ? 8 : front % 4 == 0 ? 4 : 1;
79
else // if (shape.dims == 4)
81
offset_elempack = elempack;
84
offset_elempack = std::min(offset_elempack, elempack);
88
if (opt.use_fp16_storage)
90
elemsize = elempack * 2u;
91
out_elemsize = out_elempack * 2u;
93
else if (opt.use_fp16_packed)
95
elemsize = elempack == 1 ? 4u : elempack * 2u;
96
out_elemsize = out_elempack == 1 ? 4u : out_elempack * 2u;
100
elemsize = elempack * 4u;
101
out_elemsize = out_elempack * 4u;
105
if (shape.dims == 1) shape_packed = Mat(shape.w / elempack, (void*)0, elemsize, elempack);
106
if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack);
107
if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
108
if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack);
110
Mat out_shape_packed;
111
if (out_shape.dims == 1) out_shape_packed = Mat(out_shape.w / out_elempack, (void*)0, out_elemsize, out_elempack);
112
if (out_shape.dims == 2) out_shape_packed = Mat(out_shape.w, out_shape.h / out_elempack, (void*)0, out_elemsize, out_elempack);
113
if (out_shape.dims == 3) out_shape_packed = Mat(out_shape.w, out_shape.h, out_shape.c / out_elempack, (void*)0, out_elemsize, out_elempack);
114
if (out_shape.dims == 4) out_shape_packed = Mat(out_shape.w, out_shape.h, out_shape.d, out_shape.c / out_elempack, (void*)0, out_elemsize, out_elempack);
116
Mat shape_unpacked = shape_packed;
117
if (one_blob_only && shape.dims != 0 && elempack > offset_elempack)
119
size_t offset_elemsize;
120
if (opt.use_fp16_storage)
122
offset_elemsize = offset_elempack * 2u;
124
else if (opt.use_fp16_packed)
126
offset_elemsize = offset_elempack == 1 ? 4u : offset_elempack * 2u;
130
offset_elemsize = offset_elempack * 4u;
133
if (shape.dims == 1) shape_unpacked = Mat(shape.w / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
134
if (shape.dims == 2) shape_unpacked = Mat(shape.w, shape.h / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
135
if (shape.dims == 3) shape_unpacked = Mat(shape.w, shape.h, shape.c / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
136
// if (shape.dims == 4) should never reach here
140
if (!vkdev->shape_support_image_storage(shape_packed) || !vkdev->shape_support_image_storage(shape_unpacked) || !vkdev->shape_support_image_storage(out_shape_packed))
142
support_image_storage = false;
143
opt.use_image_storage = false;
146
std::vector<vk_specialization_type> specializations(3 + 10);
147
specializations[0].i = type;
148
specializations[1].f = value;
149
specializations[2].i = per_channel_pad_data_size ? 1 : 0;
150
specializations[3 + 0].i = shape_unpacked.dims;
151
specializations[3 + 1].i = shape_unpacked.w;
152
specializations[3 + 2].i = shape_unpacked.h;
153
specializations[3 + 3].i = shape_unpacked.c;
154
specializations[3 + 4].i = shape_unpacked.cstep;
155
specializations[3 + 5].i = out_shape_packed.dims;
156
specializations[3 + 6].i = out_shape_packed.w;
157
specializations[3 + 7].i = out_shape_packed.h;
158
specializations[3 + 8].i = out_shape_packed.c;
159
specializations[3 + 9].i = out_shape_packed.cstep;
161
std::vector<vk_specialization_type> specializations_3d(3 + 12);
162
specializations_3d[0].i = type;
163
specializations_3d[1].f = value;
164
specializations_3d[2].i = per_channel_pad_data_size ? 1 : 0;
165
specializations_3d[3 + 0].i = shape_unpacked.dims;
166
specializations_3d[3 + 1].i = shape_unpacked.w;
167
specializations_3d[3 + 2].i = shape_unpacked.h;
168
specializations_3d[3 + 3].i = shape_unpacked.d;
169
specializations_3d[3 + 4].i = shape_unpacked.c;
170
specializations_3d[3 + 5].i = shape_unpacked.cstep;
171
specializations_3d[3 + 6].i = out_shape_packed.dims;
172
specializations_3d[3 + 7].i = out_shape_packed.w;
173
specializations_3d[3 + 8].i = out_shape_packed.h;
174
specializations_3d[3 + 9].i = out_shape_packed.d;
175
specializations_3d[3 + 10].i = out_shape_packed.c;
176
specializations_3d[3 + 11].i = out_shape_packed.cstep;
179
if (out_shape_packed.dims == 1)
181
local_size_xyz.w = std::min(64, out_shape_packed.w);
182
local_size_xyz.h = 1;
183
local_size_xyz.c = 1;
185
if (out_shape_packed.dims == 2)
187
local_size_xyz.w = std::min(8, out_shape_packed.w);
188
local_size_xyz.h = std::min(8, out_shape_packed.h);
189
local_size_xyz.c = 1;
191
if (out_shape_packed.dims == 3)
193
local_size_xyz.w = std::min(4, out_shape_packed.w);
194
local_size_xyz.h = std::min(4, out_shape_packed.h);
195
local_size_xyz.c = std::min(4, out_shape_packed.c);
197
if (out_shape_packed.dims == 4)
199
local_size_xyz.w = std::min(4, out_shape_packed.w);
200
local_size_xyz.h = std::min(4, out_shape_packed.h * out_shape_packed.d);
201
local_size_xyz.c = std::min(4, out_shape_packed.c);
205
if (out_shape.dims == 0 || (offset_elempack == 1 && out_elempack == 1))
207
pipeline_padding = new Pipeline(vkdev);
208
pipeline_padding->set_optimal_local_size_xyz(local_size_xyz);
209
pipeline_padding->create(LayerShaderType::padding, opt, specializations);
211
pipeline_padding_3d = new Pipeline(vkdev);
212
pipeline_padding_3d->set_optimal_local_size_xyz(local_size_xyz);
213
pipeline_padding_3d->create(LayerShaderType::padding_3d, opt, specializations_3d);
217
if (out_shape.dims == 0 || (offset_elempack == 4 && out_elempack == 4))
219
pipeline_padding_pack4 = new Pipeline(vkdev);
220
pipeline_padding_pack4->set_optimal_local_size_xyz(local_size_xyz);
221
pipeline_padding_pack4->create(LayerShaderType::padding_pack4, opt, specializations);
223
pipeline_padding_3d_pack4 = new Pipeline(vkdev);
224
pipeline_padding_3d_pack4->set_optimal_local_size_xyz(local_size_xyz);
225
pipeline_padding_3d_pack4->create(LayerShaderType::padding_3d_pack4, opt, specializations_3d);
229
if (out_shape.dims == 0 || (offset_elempack == 1 && out_elempack == 4))
231
pipeline_padding_pack1to4 = new Pipeline(vkdev);
232
pipeline_padding_pack1to4->set_optimal_local_size_xyz(local_size_xyz);
233
pipeline_padding_pack1to4->create(LayerShaderType::padding_pack1to4, opt, specializations);
237
if (out_shape.dims == 0 || (offset_elempack == 4 && out_elempack == 1))
239
pipeline_padding_pack4to1 = new Pipeline(vkdev);
240
pipeline_padding_pack4to1->set_optimal_local_size_xyz(local_size_xyz);
241
pipeline_padding_pack4to1->create(LayerShaderType::padding_pack4to1, opt, specializations);
245
if ((opt.use_shader_pack8 && out_shape.dims == 0) || (offset_elempack == 8 && out_elempack == 8))
247
pipeline_padding_pack8 = new Pipeline(vkdev);
248
pipeline_padding_pack8->set_optimal_local_size_xyz(local_size_xyz);
249
pipeline_padding_pack8->create(LayerShaderType::padding_pack8, opt, specializations);
251
pipeline_padding_3d_pack8 = new Pipeline(vkdev);
252
pipeline_padding_3d_pack8->set_optimal_local_size_xyz(local_size_xyz);
253
pipeline_padding_3d_pack8->create(LayerShaderType::padding_3d_pack8, opt, specializations_3d);
257
if ((opt.use_shader_pack8 && out_shape.dims == 0) || (offset_elempack == 1 && out_elempack == 8))
259
pipeline_padding_pack1to8 = new Pipeline(vkdev);
260
pipeline_padding_pack1to8->set_optimal_local_size_xyz(local_size_xyz);
261
pipeline_padding_pack1to8->create(LayerShaderType::padding_pack1to8, opt, specializations);
265
if ((opt.use_shader_pack8 && out_shape.dims == 0) || (offset_elempack == 4 && out_elempack == 8))
267
pipeline_padding_pack4to8 = new Pipeline(vkdev);
268
pipeline_padding_pack4to8->set_optimal_local_size_xyz(local_size_xyz);
269
pipeline_padding_pack4to8->create(LayerShaderType::padding_pack4to8, opt, specializations);
273
if ((opt.use_shader_pack8 && out_shape.dims == 0) || (offset_elempack == 8 && out_elempack == 4))
275
pipeline_padding_pack8to4 = new Pipeline(vkdev);
276
pipeline_padding_pack8to4->set_optimal_local_size_xyz(local_size_xyz);
277
pipeline_padding_pack8to4->create(LayerShaderType::padding_pack8to4, opt, specializations);
281
if ((opt.use_shader_pack8 && out_shape.dims == 0) || (offset_elempack == 8 && out_elempack == 1))
283
pipeline_padding_pack8to1 = new Pipeline(vkdev);
284
pipeline_padding_pack8to1->set_optimal_local_size_xyz(local_size_xyz);
285
pipeline_padding_pack8to1->create(LayerShaderType::padding_pack8to1, opt, specializations);
291
int Padding_vulkan::destroy_pipeline(const Option& /*opt*/)
293
delete pipeline_padding;
294
pipeline_padding = 0;
296
delete pipeline_padding_pack4;
297
pipeline_padding_pack4 = 0;
299
delete pipeline_padding_pack1to4;
300
pipeline_padding_pack1to4 = 0;
302
delete pipeline_padding_pack4to1;
303
pipeline_padding_pack4to1 = 0;
305
delete pipeline_padding_pack8;
306
pipeline_padding_pack8 = 0;
308
delete pipeline_padding_pack1to8;
309
pipeline_padding_pack1to8 = 0;
311
delete pipeline_padding_pack4to8;
312
pipeline_padding_pack4to8 = 0;
314
delete pipeline_padding_pack8to4;
315
pipeline_padding_pack8to4 = 0;
317
delete pipeline_padding_pack8to1;
318
pipeline_padding_pack8to1 = 0;
320
delete pipeline_padding_3d;
321
pipeline_padding_3d = 0;
323
delete pipeline_padding_3d_pack4;
324
pipeline_padding_3d_pack4 = 0;
326
delete pipeline_padding_3d_pack8;
327
pipeline_padding_3d_pack8 = 0;
332
int Padding_vulkan::upload_model(VkTransfer& cmd, const Option& opt)
334
if (per_channel_pad_data_size == 0)
337
int elempack = opt.use_shader_pack8 && per_channel_pad_data_size % 8 == 0 ? 8 : per_channel_pad_data_size % 4 == 0 ? 4 : 1;
339
Mat per_channel_pad_data_packed;
340
convert_packing(per_channel_pad_data, per_channel_pad_data_packed, elempack, opt);
342
if (support_image_storage && opt.use_image_storage)
344
cmd.record_upload(per_channel_pad_data_packed, per_channel_pad_data_gpu_image, opt);
348
cmd.record_upload(per_channel_pad_data_packed, per_channel_pad_data_gpu, opt);
353
per_channel_pad_data.release();
359
int Padding_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const
361
int dims = bottom_blob.dims;
362
int w = bottom_blob.w;
363
int h = bottom_blob.h;
364
int d = bottom_blob.d;
365
int channels = bottom_blob.c;
366
size_t elemsize = bottom_blob.elemsize;
367
int elempack = bottom_blob.elempack;
379
if (left == 0 && right == 0)
381
top_blob = bottom_blob;
385
outw = w * elempack + left + right;
386
out_elempack = opt.use_shader_pack8 && outw % 8 == 0 ? 8 : outw % 4 == 0 ? 4 : 1;
387
offset_elempack = left == 0 ? elempack : opt.use_shader_pack8 && left % 8 == 0 ? 8 : left % 4 == 0 ? 4 : 1;
391
if (top == 0 && bottom == 0 && left == 0 && right == 0)
393
top_blob = bottom_blob;
397
outw = w + left + right;
398
outh = h * elempack + top + bottom;
399
out_elempack = opt.use_shader_pack8 && outh % 8 == 0 ? 8 : outh % 4 == 0 ? 4 : 1;
400
offset_elempack = top == 0 ? elempack : opt.use_shader_pack8 && top % 8 == 0 ? 8 : top % 4 == 0 ? 4 : 1;
404
if (top == 0 && bottom == 0 && left == 0 && right == 0 && front == 0 && behind == 0)
406
top_blob = bottom_blob;
410
outw = w + left + right;
411
outh = h + top + bottom;
412
outc = channels * elempack + front + behind;
413
out_elempack = opt.use_shader_pack8 && outc % 8 == 0 ? 8 : outc % 4 == 0 ? 4 : 1;
414
offset_elempack = front == 0 ? elempack : opt.use_shader_pack8 && front % 8 == 0 ? 8 : front % 4 == 0 ? 4 : 1;
416
else // if (dims == 4)
418
if (top == 0 && bottom == 0 && left == 0 && right == 0 && front == 0 && behind == 0)
420
top_blob = bottom_blob;
424
outw = w + left + right;
425
outh = h + top + bottom;
426
outd = d + front + behind;
427
outc = channels * elempack;
428
out_elempack = elempack;
429
offset_elempack = elempack;
432
offset_elempack = std::min(offset_elempack, elempack);
434
size_t out_elemsize = elemsize / elempack * out_elempack;
436
if (opt.use_fp16_packed && !opt.use_fp16_storage)
438
if (out_elempack == 8) out_elemsize = 8 * 2u;
439
if (out_elempack == 4) out_elemsize = 4 * 2u;
440
if (out_elempack == 1) out_elemsize = 4u;
444
VkMat bottom_blob_unpacked = bottom_blob;
445
if (elempack > offset_elempack)
447
Option opt_pack1 = opt;
448
opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
450
vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
455
top_blob.create(outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
459
top_blob.create(outw, outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
463
top_blob.create(outw, outh, outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
465
else // if (dims == 4)
467
top_blob.create(outw, outh, outd, outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
469
if (top_blob.empty())
472
std::vector<VkMat> bindings(3);
473
bindings[0] = bottom_blob_unpacked;
474
bindings[1] = top_blob;
475
bindings[2] = per_channel_pad_data_gpu;
479
std::vector<vk_constant_type> constants(15);
480
constants[0].i = bottom_blob_unpacked.dims;
481
constants[1].i = bottom_blob_unpacked.w;
482
constants[2].i = bottom_blob_unpacked.h;
483
constants[3].i = bottom_blob_unpacked.d;
484
constants[4].i = bottom_blob_unpacked.c;
485
constants[5].i = bottom_blob_unpacked.cstep;
486
constants[6].i = top_blob.dims;
487
constants[7].i = top_blob.w;
488
constants[8].i = top_blob.h;
489
constants[9].i = top_blob.d;
490
constants[10].i = top_blob.c;
491
constants[11].i = top_blob.cstep;
492
constants[12].i = left;
493
constants[13].i = top;
494
constants[14].i = front;
496
const Pipeline* pipeline = out_elempack == 8 ? pipeline_padding_3d_pack8
497
: out_elempack == 4 ? pipeline_padding_3d_pack4
498
: pipeline_padding_3d;
500
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
505
std::vector<vk_constant_type> constants(13);
506
constants[0].i = bottom_blob_unpacked.dims;
507
constants[1].i = bottom_blob_unpacked.w;
508
constants[2].i = bottom_blob_unpacked.h;
509
constants[3].i = bottom_blob_unpacked.c;
510
constants[4].i = bottom_blob_unpacked.cstep;
511
constants[5].i = top_blob.dims;
512
constants[6].i = top_blob.w;
513
constants[7].i = top_blob.h;
514
constants[8].i = top_blob.c;
515
constants[9].i = top_blob.cstep;
516
constants[10].i = left;
517
constants[11].i = top;
518
constants[12].i = front;
520
const Pipeline* pipeline = 0;
521
if (offset_elempack == 1 && out_elempack == 1)
523
pipeline = pipeline_padding;
525
else if (offset_elempack == 4 && out_elempack == 4)
527
pipeline = pipeline_padding_pack4;
529
else if (offset_elempack == 1 && out_elempack == 4)
531
pipeline = pipeline_padding_pack1to4;
533
else if (offset_elempack == 4 && out_elempack == 1)
535
pipeline = pipeline_padding_pack4to1;
537
else if (offset_elempack == 8 && out_elempack == 8)
539
pipeline = pipeline_padding_pack8;
541
else if (offset_elempack == 1 && out_elempack == 8)
543
pipeline = pipeline_padding_pack1to8;
545
else if (offset_elempack == 4 && out_elempack == 8)
547
pipeline = pipeline_padding_pack4to8;
549
else if (offset_elempack == 8 && out_elempack == 4)
551
pipeline = pipeline_padding_pack8to4;
553
else if (offset_elempack == 8 && out_elempack == 1)
555
pipeline = pipeline_padding_pack8to1;
558
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
563
int Padding_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt) const
565
const VkMat& bottom_blob = bottom_blobs[0];
566
const VkMat& reference_blob = bottom_blobs[1];
568
VkMat& top_blob = top_blobs[0];
576
const int* param_data = reference_blob.mapped();
578
_top = param_data[0];
579
_bottom = param_data[1];
580
_left = param_data[2];
581
_right = param_data[3];
582
_front = param_data[4];
583
_behind = param_data[5];
586
int dims = bottom_blob.dims;
587
int w = bottom_blob.w;
588
int h = bottom_blob.h;
589
int d = bottom_blob.d;
590
int channels = bottom_blob.c;
591
size_t elemsize = bottom_blob.elemsize;
592
int elempack = bottom_blob.elempack;
604
if (_left == 0 && _right == 0)
606
top_blob = bottom_blob;
610
outw = w * elempack + _left + _right;
611
out_elempack = opt.use_shader_pack8 && outw % 8 == 0 ? 8 : outw % 4 == 0 ? 4 : 1;
612
offset_elempack = _left == 0 ? elempack : opt.use_shader_pack8 && _left % 8 == 0 ? 8 : _left % 4 == 0 ? 4 : 1;
616
if (_top == 0 && _bottom == 0 && _left == 0 && _right == 0)
618
top_blob = bottom_blob;
622
outw = w + _left + _right;
623
outh = h * elempack + _top + _bottom;
624
out_elempack = opt.use_shader_pack8 && outh % 8 == 0 ? 8 : outh % 4 == 0 ? 4 : 1;
625
offset_elempack = _top == 0 ? elempack : opt.use_shader_pack8 && _top % 8 == 0 ? 8 : _top % 4 == 0 ? 4 : 1;
629
if (_top == 0 && _bottom == 0 && _left == 0 && _right == 0 && _front == 0 && _behind == 0)
631
top_blob = bottom_blob;
635
outw = w + _left + _right;
636
outh = h + _top + _bottom;
637
outc = channels * elempack + _front + _behind;
638
out_elempack = opt.use_shader_pack8 && outc % 8 == 0 ? 8 : outc % 4 == 0 ? 4 : 1;
639
offset_elempack = _front == 0 ? elempack : opt.use_shader_pack8 && _front % 8 == 0 ? 8 : _front % 4 == 0 ? 4 : 1;
641
else // if (dims == 4)
643
if (_top == 0 && _bottom == 0 && _left == 0 && _right == 0 && _front == 0 && _behind == 0)
645
top_blob = bottom_blob;
649
outw = w + _left + _right;
650
outh = h + _top + _bottom;
651
outd = d + _front + _behind;
652
outc = channels * elempack;
653
out_elempack = elempack;
654
offset_elempack = elempack;
657
offset_elempack = std::min(offset_elempack, elempack);
659
size_t out_elemsize = elemsize / elempack * out_elempack;
661
if (opt.use_fp16_packed && !opt.use_fp16_storage)
663
if (out_elempack == 8) out_elemsize = 8 * 2u;
664
if (out_elempack == 4) out_elemsize = 4 * 2u;
665
if (out_elempack == 1) out_elemsize = 4u;
669
VkMat bottom_blob_unpacked = bottom_blob;
670
if (elempack > offset_elempack)
672
Option opt_pack1 = opt;
673
opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
675
vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
680
top_blob.create(outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
684
top_blob.create(outw, outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
688
top_blob.create(outw, outh, outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
690
else // if (dims == 4)
692
top_blob.create(outw, outh, outd, outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
694
if (top_blob.empty())
697
std::vector<VkMat> bindings(3);
698
bindings[0] = bottom_blob_unpacked;
699
bindings[1] = top_blob;
700
bindings[2] = per_channel_pad_data_gpu;
704
std::vector<vk_constant_type> constants(15);
705
constants[0].i = bottom_blob_unpacked.dims;
706
constants[1].i = bottom_blob_unpacked.w;
707
constants[2].i = bottom_blob_unpacked.h;
708
constants[3].i = bottom_blob_unpacked.d;
709
constants[4].i = bottom_blob_unpacked.c;
710
constants[5].i = bottom_blob_unpacked.cstep;
711
constants[6].i = top_blob.dims;
712
constants[7].i = top_blob.w;
713
constants[8].i = top_blob.h;
714
constants[9].i = top_blob.d;
715
constants[10].i = top_blob.c;
716
constants[11].i = top_blob.cstep;
717
constants[12].i = _left;
718
constants[13].i = _top;
719
constants[14].i = _front;
721
const Pipeline* pipeline = out_elempack == 8 ? pipeline_padding_3d_pack8
722
: out_elempack == 4 ? pipeline_padding_3d_pack4
723
: pipeline_padding_3d;
725
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
730
std::vector<vk_constant_type> constants(13);
731
constants[0].i = bottom_blob_unpacked.dims;
732
constants[1].i = bottom_blob_unpacked.w;
733
constants[2].i = bottom_blob_unpacked.h;
734
constants[3].i = bottom_blob_unpacked.c;
735
constants[4].i = bottom_blob_unpacked.cstep;
736
constants[5].i = top_blob.dims;
737
constants[6].i = top_blob.w;
738
constants[7].i = top_blob.h;
739
constants[8].i = top_blob.c;
740
constants[9].i = top_blob.cstep;
741
constants[10].i = _left;
742
constants[11].i = _top;
743
constants[12].i = _front;
745
const Pipeline* pipeline = 0;
746
if (offset_elempack == 1 && out_elempack == 1)
748
pipeline = pipeline_padding;
750
else if (offset_elempack == 4 && out_elempack == 4)
752
pipeline = pipeline_padding_pack4;
754
else if (offset_elempack == 1 && out_elempack == 4)
756
pipeline = pipeline_padding_pack1to4;
758
else if (offset_elempack == 4 && out_elempack == 1)
760
pipeline = pipeline_padding_pack4to1;
762
else if (offset_elempack == 8 && out_elempack == 8)
764
pipeline = pipeline_padding_pack8;
766
else if (offset_elempack == 1 && out_elempack == 8)
768
pipeline = pipeline_padding_pack1to8;
770
else if (offset_elempack == 4 && out_elempack == 8)
772
pipeline = pipeline_padding_pack4to8;
774
else if (offset_elempack == 8 && out_elempack == 4)
776
pipeline = pipeline_padding_pack8to4;
778
else if (offset_elempack == 8 && out_elempack == 1)
780
pipeline = pipeline_padding_pack8to1;
783
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
788
int Padding_vulkan::forward(const VkImageMat& bottom_blob, VkImageMat& top_blob, VkCompute& cmd, const Option& opt) const
790
int dims = bottom_blob.dims;
791
int w = bottom_blob.w;
792
int h = bottom_blob.h;
793
int d = bottom_blob.d;
794
int channels = bottom_blob.c;
795
size_t elemsize = bottom_blob.elemsize;
796
int elempack = bottom_blob.elempack;
808
if (left == 0 && right == 0)
810
top_blob = bottom_blob;
814
outw = w * elempack + left + right;
815
out_elempack = opt.use_shader_pack8 && outw % 8 == 0 ? 8 : outw % 4 == 0 ? 4 : 1;
816
offset_elempack = left == 0 ? elempack : opt.use_shader_pack8 && left % 8 == 0 ? 8 : left % 4 == 0 ? 4 : 1;
820
if (top == 0 && bottom == 0 && left == 0 && right == 0)
822
top_blob = bottom_blob;
826
outw = w + left + right;
827
outh = h * elempack + top + bottom;
828
out_elempack = opt.use_shader_pack8 && outh % 8 == 0 ? 8 : outh % 4 == 0 ? 4 : 1;
829
offset_elempack = top == 0 ? elempack : opt.use_shader_pack8 && top % 8 == 0 ? 8 : top % 4 == 0 ? 4 : 1;
833
if (top == 0 && bottom == 0 && left == 0 && right == 0 && front == 0 && behind == 0)
835
top_blob = bottom_blob;
839
outw = w + left + right;
840
outh = h + top + bottom;
841
outc = channels * elempack + front + behind;
842
out_elempack = opt.use_shader_pack8 && outc % 8 == 0 ? 8 : outc % 4 == 0 ? 4 : 1;
843
offset_elempack = front == 0 ? elempack : opt.use_shader_pack8 && front % 8 == 0 ? 8 : front % 4 == 0 ? 4 : 1;
845
else // if (dims == 4)
847
if (top == 0 && bottom == 0 && left == 0 && right == 0 && front == 0 && behind == 0)
849
top_blob = bottom_blob;
853
outw = w + left + right;
854
outh = h + top + bottom;
855
outd = d + front + behind;
856
outc = channels * elempack;
857
out_elempack = elempack;
858
offset_elempack = elempack;
861
offset_elempack = std::min(offset_elempack, elempack);
863
size_t out_elemsize = elemsize / elempack * out_elempack;
865
if (opt.use_fp16_packed && !opt.use_fp16_storage)
867
if (out_elempack == 8) out_elemsize = 8 * 2u;
868
if (out_elempack == 4) out_elemsize = 4 * 2u;
869
if (out_elempack == 1) out_elemsize = 4u;
873
VkImageMat bottom_blob_unpacked = bottom_blob;
874
if (elempack > offset_elempack)
876
Option opt_pack1 = opt;
877
opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
879
vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
884
top_blob.create(outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
888
top_blob.create(outw, outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
892
top_blob.create(outw, outh, outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
894
else // if (dims == 4)
896
top_blob.create(outw, outh, outd, outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
898
if (top_blob.empty())
901
std::vector<VkImageMat> bindings(3);
902
bindings[0] = bottom_blob_unpacked;
903
bindings[1] = top_blob;
904
bindings[2] = per_channel_pad_data_gpu_image;
908
std::vector<vk_constant_type> constants(15);
909
constants[0].i = bottom_blob_unpacked.dims;
910
constants[1].i = bottom_blob_unpacked.w;
911
constants[2].i = bottom_blob_unpacked.h;
912
constants[3].i = bottom_blob_unpacked.d;
913
constants[4].i = bottom_blob_unpacked.c;
915
constants[6].i = top_blob.dims;
916
constants[7].i = top_blob.w;
917
constants[8].i = top_blob.h;
918
constants[9].i = top_blob.d;
919
constants[10].i = top_blob.c;
921
constants[12].i = left;
922
constants[13].i = top;
923
constants[14].i = front;
925
const Pipeline* pipeline = out_elempack == 8 ? pipeline_padding_3d_pack8
926
: out_elempack == 4 ? pipeline_padding_3d_pack4
927
: pipeline_padding_3d;
929
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
934
std::vector<vk_constant_type> constants(13);
935
constants[0].i = bottom_blob_unpacked.dims;
936
constants[1].i = bottom_blob_unpacked.w;
937
constants[2].i = bottom_blob_unpacked.h;
938
constants[3].i = bottom_blob_unpacked.c;
940
constants[5].i = top_blob.dims;
941
constants[6].i = top_blob.w;
942
constants[7].i = top_blob.h;
943
constants[8].i = top_blob.c;
945
constants[10].i = left;
946
constants[11].i = top;
947
constants[12].i = front;
949
const Pipeline* pipeline = 0;
950
if (offset_elempack == 1 && out_elempack == 1)
952
pipeline = pipeline_padding;
954
else if (offset_elempack == 4 && out_elempack == 4)
956
pipeline = pipeline_padding_pack4;
958
else if (offset_elempack == 1 && out_elempack == 4)
960
pipeline = pipeline_padding_pack1to4;
962
else if (offset_elempack == 4 && out_elempack == 1)
964
pipeline = pipeline_padding_pack4to1;
966
else if (offset_elempack == 8 && out_elempack == 8)
968
pipeline = pipeline_padding_pack8;
970
else if (offset_elempack == 1 && out_elempack == 8)
972
pipeline = pipeline_padding_pack1to8;
974
else if (offset_elempack == 4 && out_elempack == 8)
976
pipeline = pipeline_padding_pack4to8;
978
else if (offset_elempack == 8 && out_elempack == 4)
980
pipeline = pipeline_padding_pack8to4;
982
else if (offset_elempack == 8 && out_elempack == 1)
984
pipeline = pipeline_padding_pack8to1;
987
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
992
int Padding_vulkan::forward(const std::vector<VkImageMat>& bottom_blobs, std::vector<VkImageMat>& top_blobs, VkCompute& cmd, const Option& opt) const
994
const VkImageMat& bottom_blob = bottom_blobs[0];
995
const VkImageMat& reference_blob = bottom_blobs[1];
997
VkImageMat& top_blob = top_blobs[0];
1006
const int* param_data = reference_blob.mapped();
1008
_top = param_data[0];
1009
_bottom = param_data[1];
1010
_left = param_data[2];
1011
_right = param_data[3];
1012
_front = param_data[4];
1013
_behind = param_data[5];
1016
int dims = bottom_blob.dims;
1017
int w = bottom_blob.w;
1018
int h = bottom_blob.h;
1019
int d = bottom_blob.d;
1020
int channels = bottom_blob.c;
1021
size_t elemsize = bottom_blob.elemsize;
1022
int elempack = bottom_blob.elempack;
1029
int offset_elempack;
1034
if (_left == 0 && _right == 0)
1036
top_blob = bottom_blob;
1040
outw = w * elempack + _left + _right;
1041
out_elempack = opt.use_shader_pack8 && outw % 8 == 0 ? 8 : outw % 4 == 0 ? 4 : 1;
1042
offset_elempack = _left == 0 ? elempack : opt.use_shader_pack8 && _left % 8 == 0 ? 8 : _left % 4 == 0 ? 4 : 1;
1046
if (_top == 0 && _bottom == 0 && _left == 0 && _right == 0)
1048
top_blob = bottom_blob;
1052
outw = w + _left + _right;
1053
outh = h * elempack + _top + _bottom;
1054
out_elempack = opt.use_shader_pack8 && outh % 8 == 0 ? 8 : outh % 4 == 0 ? 4 : 1;
1055
offset_elempack = _top == 0 ? elempack : opt.use_shader_pack8 && _top % 8 == 0 ? 8 : _top % 4 == 0 ? 4 : 1;
1059
if (_top == 0 && _bottom == 0 && _left == 0 && _right == 0 && _front == 0 && _behind == 0)
1061
top_blob = bottom_blob;
1065
outw = w + _left + _right;
1066
outh = h + _top + _bottom;
1067
outc = channels * elempack + _front + _behind;
1068
out_elempack = opt.use_shader_pack8 && outc % 8 == 0 ? 8 : outc % 4 == 0 ? 4 : 1;
1069
offset_elempack = _front == 0 ? elempack : opt.use_shader_pack8 && _front % 8 == 0 ? 8 : _front % 4 == 0 ? 4 : 1;
1071
else // if (dims == 4)
1073
if (_top == 0 && _bottom == 0 && _left == 0 && _right == 0 && _front == 0 && _behind == 0)
1075
top_blob = bottom_blob;
1079
outw = w + _left + _right;
1080
outh = h + _top + _bottom;
1081
outd = d + _front + _behind;
1082
outc = channels * elempack;
1083
out_elempack = elempack;
1084
offset_elempack = elempack;
1087
offset_elempack = std::min(offset_elempack, elempack);
1089
size_t out_elemsize = elemsize / elempack * out_elempack;
1091
if (opt.use_fp16_packed && !opt.use_fp16_storage)
1093
if (out_elempack == 8) out_elemsize = 8 * 2u;
1094
if (out_elempack == 4) out_elemsize = 4 * 2u;
1095
if (out_elempack == 1) out_elemsize = 4u;
1099
VkImageMat bottom_blob_unpacked = bottom_blob;
1100
if (elempack > offset_elempack)
1102
Option opt_pack1 = opt;
1103
opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
1105
vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
1110
top_blob.create(outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
1114
top_blob.create(outw, outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
1118
top_blob.create(outw, outh, outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
1120
else // if (dims == 4)
1122
top_blob.create(outw, outh, outd, outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
1124
if (top_blob.empty())
1127
std::vector<VkImageMat> bindings(3);
1128
bindings[0] = bottom_blob_unpacked;
1129
bindings[1] = top_blob;
1130
bindings[2] = per_channel_pad_data_gpu_image;
1134
std::vector<vk_constant_type> constants(15);
1135
constants[0].i = bottom_blob_unpacked.dims;
1136
constants[1].i = bottom_blob_unpacked.w;
1137
constants[2].i = bottom_blob_unpacked.h;
1138
constants[3].i = bottom_blob_unpacked.d;
1139
constants[4].i = bottom_blob_unpacked.c;
1141
constants[6].i = top_blob.dims;
1142
constants[7].i = top_blob.w;
1143
constants[8].i = top_blob.h;
1144
constants[9].i = top_blob.d;
1145
constants[10].i = top_blob.c;
1146
constants[11].i = 0;
1147
constants[12].i = _left;
1148
constants[13].i = _top;
1149
constants[14].i = _front;
1151
const Pipeline* pipeline = out_elempack == 8 ? pipeline_padding_3d_pack8
1152
: out_elempack == 4 ? pipeline_padding_3d_pack4
1153
: pipeline_padding_3d;
1155
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1160
std::vector<vk_constant_type> constants(13);
1161
constants[0].i = bottom_blob_unpacked.dims;
1162
constants[1].i = bottom_blob_unpacked.w;
1163
constants[2].i = bottom_blob_unpacked.h;
1164
constants[3].i = bottom_blob_unpacked.c;
1166
constants[5].i = top_blob.dims;
1167
constants[6].i = top_blob.w;
1168
constants[7].i = top_blob.h;
1169
constants[8].i = top_blob.c;
1171
constants[10].i = _left;
1172
constants[11].i = _top;
1173
constants[12].i = _front;
1175
const Pipeline* pipeline = 0;
1176
if (offset_elempack == 1 && out_elempack == 1)
1178
pipeline = pipeline_padding;
1180
else if (offset_elempack == 4 && out_elempack == 4)
1182
pipeline = pipeline_padding_pack4;
1184
else if (offset_elempack == 1 && out_elempack == 4)
1186
pipeline = pipeline_padding_pack1to4;
1188
else if (offset_elempack == 4 && out_elempack == 1)
1190
pipeline = pipeline_padding_pack4to1;
1192
else if (offset_elempack == 8 && out_elempack == 8)
1194
pipeline = pipeline_padding_pack8;
1196
else if (offset_elempack == 1 && out_elempack == 8)
1198
pipeline = pipeline_padding_pack1to8;
1200
else if (offset_elempack == 4 && out_elempack == 8)
1202
pipeline = pipeline_padding_pack4to8;
1204
else if (offset_elempack == 8 && out_elempack == 4)
1206
pipeline = pipeline_padding_pack8to4;
1208
else if (offset_elempack == 8 && out_elempack == 1)
1210
pipeline = pipeline_padding_pack8to1;
1213
cmd.record_pipeline(pipeline, bindings, constants, top_blob);