1
// Tencent is pleased to support the open source community by making ncnn available.
3
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
8
// https://opensource.org/licenses/BSD-3-Clause
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
15
#include "pixelshuffle_vulkan.h"
17
#include "layer_shader_type.h"
21
PixelShuffle_vulkan::PixelShuffle_vulkan()
23
support_vulkan = true;
24
support_image_storage = true;
26
pipeline_pixelshuffle = 0;
27
pipeline_pixelshuffle_pack4 = 0;
28
pipeline_pixelshuffle_pack4to1 = 0;
29
pipeline_pixelshuffle_pack8 = 0;
30
pipeline_pixelshuffle_pack8to1 = 0;
31
pipeline_pixelshuffle_pack8to4 = 0;
34
int PixelShuffle_vulkan::create_pipeline(const Option& _opt)
37
const Mat& shape = bottom_shapes.empty() ? Mat() : bottom_shapes[0];
38
const Mat& out_shape = top_shapes.empty() ? Mat() : top_shapes[0];
41
if (shape.dims == 1) elempack = opt.use_shader_pack8 && shape.w % 8 == 0 ? 8 : shape.w % 4 == 0 ? 4 : 1;
42
if (shape.dims == 2) elempack = opt.use_shader_pack8 && shape.h % 8 == 0 ? 8 : shape.h % 4 == 0 ? 4 : 1;
43
if (shape.dims == 3) elempack = opt.use_shader_pack8 && shape.c % 8 == 0 ? 8 : shape.c % 4 == 0 ? 4 : 1;
46
if (out_shape.dims == 1) out_elempack = opt.use_shader_pack8 && out_shape.w % 8 == 0 ? 8 : out_shape.w % 4 == 0 ? 4 : 1;
47
if (out_shape.dims == 2) out_elempack = opt.use_shader_pack8 && out_shape.h % 8 == 0 ? 8 : out_shape.h % 4 == 0 ? 4 : 1;
48
if (out_shape.dims == 3) out_elempack = opt.use_shader_pack8 && out_shape.c % 8 == 0 ? 8 : out_shape.c % 4 == 0 ? 4 : 1;
52
if (opt.use_fp16_storage)
54
elemsize = elempack * 2u;
55
out_elemsize = out_elempack * 2u;
57
else if (opt.use_fp16_packed)
59
elemsize = elempack == 1 ? 4u : elempack * 2u;
60
out_elemsize = out_elempack == 1 ? 4u : out_elempack * 2u;
64
elemsize = elempack * 4u;
65
out_elemsize = out_elempack * 4u;
69
if (shape.dims == 1) shape_packed = Mat(shape.w / elempack, (void*)0, elemsize, elempack);
70
if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack);
71
if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
74
if (out_shape.dims == 1) out_shape_packed = Mat(out_shape.w / out_elempack, (void*)0, out_elemsize, out_elempack);
75
if (out_shape.dims == 2) out_shape_packed = Mat(out_shape.w, out_shape.h / out_elempack, (void*)0, out_elemsize, out_elempack);
76
if (out_shape.dims == 3) out_shape_packed = Mat(out_shape.w, out_shape.h, out_shape.c / out_elempack, (void*)0, out_elemsize, out_elempack);
79
if (!vkdev->shape_support_image_storage(shape_packed) || !vkdev->shape_support_image_storage(out_shape_packed))
81
support_image_storage = false;
82
opt.use_image_storage = false;
85
std::vector<vk_specialization_type> specializations(2 + 10);
86
specializations[0].i = upscale_factor;
87
specializations[1].i = mode;
88
specializations[2 + 0].i = shape_packed.dims;
89
specializations[2 + 1].i = shape_packed.w;
90
specializations[2 + 2].i = shape_packed.h;
91
specializations[2 + 3].i = shape_packed.c;
92
specializations[2 + 4].i = shape_packed.cstep;
93
specializations[2 + 5].i = out_shape_packed.dims;
94
specializations[2 + 6].i = out_shape_packed.w;
95
specializations[2 + 7].i = out_shape_packed.h;
96
specializations[2 + 8].i = out_shape_packed.c;
97
specializations[2 + 9].i = out_shape_packed.cstep;
99
Mat local_size_xyz_bottom; // pack4to1 and pack8to1
100
if (shape_packed.dims != 3)
102
local_size_xyz_bottom.w = std::min(4, shape_packed.w);
103
local_size_xyz_bottom.h = std::min(4, shape_packed.h);
104
local_size_xyz_bottom.c = std::min(4, shape_packed.c);
108
if (out_shape_packed.dims != 0)
110
local_size_xyz.w = std::min(4, out_shape_packed.w);
111
local_size_xyz.h = std::min(4, out_shape_packed.h);
112
local_size_xyz.c = std::min(4, out_shape_packed.c);
116
if (shape.dims == 0 || (elempack == 1 && out_elempack == 1))
118
pipeline_pixelshuffle = new Pipeline(vkdev);
119
pipeline_pixelshuffle->set_optimal_local_size_xyz(local_size_xyz);
120
pipeline_pixelshuffle->create(LayerShaderType::pixelshuffle, opt, specializations);
124
if (shape.dims == 0 || (elempack == 4 && out_elempack == 4))
126
pipeline_pixelshuffle_pack4 = new Pipeline(vkdev);
127
pipeline_pixelshuffle_pack4->set_optimal_local_size_xyz(local_size_xyz);
128
pipeline_pixelshuffle_pack4->create(LayerShaderType::pixelshuffle_pack4, opt, specializations);
132
if (shape.dims == 0 || (elempack == 4 && out_elempack == 1))
134
pipeline_pixelshuffle_pack4to1 = new Pipeline(vkdev);
135
pipeline_pixelshuffle_pack4to1->set_optimal_local_size_xyz(local_size_xyz_bottom);
136
pipeline_pixelshuffle_pack4to1->create(LayerShaderType::pixelshuffle_pack4to1, opt, specializations);
140
if ((opt.use_shader_pack8 && shape.dims == 0) || (elempack == 8 && out_elempack == 8))
142
pipeline_pixelshuffle_pack8 = new Pipeline(vkdev);
143
pipeline_pixelshuffle_pack8->set_optimal_local_size_xyz(local_size_xyz);
144
pipeline_pixelshuffle_pack8->create(LayerShaderType::pixelshuffle_pack8, opt, specializations);
148
if ((opt.use_shader_pack8 && shape.dims == 0) || (elempack == 8 && out_elempack == 1))
150
pipeline_pixelshuffle_pack8to1 = new Pipeline(vkdev);
151
pipeline_pixelshuffle_pack8to1->set_optimal_local_size_xyz(local_size_xyz_bottom);
152
pipeline_pixelshuffle_pack8to1->create(LayerShaderType::pixelshuffle_pack8to1, opt, specializations);
156
if ((opt.use_shader_pack8 && shape.dims == 0) || (elempack == 8 && out_elempack == 4))
158
pipeline_pixelshuffle_pack8to4 = new Pipeline(vkdev);
159
pipeline_pixelshuffle_pack8to4->set_optimal_local_size_xyz(local_size_xyz);
160
pipeline_pixelshuffle_pack8to4->create(LayerShaderType::pixelshuffle_pack8to4, opt, specializations);
166
int PixelShuffle_vulkan::destroy_pipeline(const Option& /*opt*/)
168
delete pipeline_pixelshuffle;
169
pipeline_pixelshuffle = 0;
171
delete pipeline_pixelshuffle_pack4;
172
pipeline_pixelshuffle_pack4 = 0;
174
delete pipeline_pixelshuffle_pack4to1;
175
pipeline_pixelshuffle_pack4to1 = 0;
177
delete pipeline_pixelshuffle_pack8;
178
pipeline_pixelshuffle_pack8 = 0;
180
delete pipeline_pixelshuffle_pack8to1;
181
pipeline_pixelshuffle_pack8to1 = 0;
183
delete pipeline_pixelshuffle_pack8to4;
184
pipeline_pixelshuffle_pack8to4 = 0;
189
int PixelShuffle_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const
191
int w = bottom_blob.w;
192
int h = bottom_blob.h;
193
int channels = bottom_blob.c;
194
size_t elemsize = bottom_blob.elemsize;
195
int elempack = bottom_blob.elempack;
197
int outw = w * upscale_factor;
198
int outh = h * upscale_factor;
199
int outc = channels * elempack / (upscale_factor * upscale_factor);
201
int out_elempack = opt.use_shader_pack8 && outc % 8 == 0 ? 8 : outc % 4 == 0 ? 4 : 1;
202
size_t out_elemsize = elemsize / elempack * out_elempack;
204
if (opt.use_fp16_packed && !opt.use_fp16_storage)
206
if (out_elempack == 8) out_elemsize = 8 * 2u;
207
if (out_elempack == 4) out_elemsize = 4 * 2u;
208
if (out_elempack == 1) out_elemsize = 4u;
211
top_blob.create(outw, outh, outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
212
if (top_blob.empty())
215
std::vector<VkMat> bindings(2);
216
bindings[0] = bottom_blob;
217
bindings[1] = top_blob;
219
std::vector<vk_constant_type> constants(10);
220
constants[0].i = bottom_blob.dims;
221
constants[1].i = bottom_blob.w;
222
constants[2].i = bottom_blob.h;
223
constants[3].i = bottom_blob.c;
224
constants[4].i = bottom_blob.cstep;
225
constants[5].i = top_blob.dims;
226
constants[6].i = top_blob.w;
227
constants[7].i = top_blob.h;
228
constants[8].i = top_blob.c;
229
constants[9].i = top_blob.cstep;
231
if (elempack == 1 && out_elempack == 1)
233
cmd.record_pipeline(pipeline_pixelshuffle, bindings, constants, top_blob);
235
else if (elempack == 4 && out_elempack == 4)
237
cmd.record_pipeline(pipeline_pixelshuffle_pack4, bindings, constants, top_blob);
239
else if (elempack == 4 && out_elempack == 1)
241
cmd.record_pipeline(pipeline_pixelshuffle_pack4to1, bindings, constants, bottom_blob);
243
else if (elempack == 8 && out_elempack == 8)
245
cmd.record_pipeline(pipeline_pixelshuffle_pack8, bindings, constants, top_blob);
247
else if (elempack == 8 && out_elempack == 1)
249
cmd.record_pipeline(pipeline_pixelshuffle_pack8to1, bindings, constants, bottom_blob);
251
else if (elempack == 8 && out_elempack == 4)
253
cmd.record_pipeline(pipeline_pixelshuffle_pack8to4, bindings, constants, top_blob);
259
int PixelShuffle_vulkan::forward(const VkImageMat& bottom_blob, VkImageMat& top_blob, VkCompute& cmd, const Option& opt) const
261
int w = bottom_blob.w;
262
int h = bottom_blob.h;
263
int channels = bottom_blob.c;
264
size_t elemsize = bottom_blob.elemsize;
265
int elempack = bottom_blob.elempack;
267
int outw = w * upscale_factor;
268
int outh = h * upscale_factor;
269
int outc = channels * elempack / (upscale_factor * upscale_factor);
271
int out_elempack = opt.use_shader_pack8 && outc % 8 == 0 ? 8 : outc % 4 == 0 ? 4 : 1;
272
size_t out_elemsize = elemsize / elempack * out_elempack;
274
if (opt.use_fp16_packed && !opt.use_fp16_storage)
276
if (out_elempack == 8) out_elemsize = 8 * 2u;
277
if (out_elempack == 4) out_elemsize = 4 * 2u;
278
if (out_elempack == 1) out_elemsize = 4u;
281
top_blob.create(outw, outh, outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
282
if (top_blob.empty())
285
std::vector<VkImageMat> bindings(2);
286
bindings[0] = bottom_blob;
287
bindings[1] = top_blob;
289
std::vector<vk_constant_type> constants(10);
290
constants[0].i = bottom_blob.dims;
291
constants[1].i = bottom_blob.w;
292
constants[2].i = bottom_blob.h;
293
constants[3].i = bottom_blob.c;
294
constants[4].i = 0; //bottom_blob.cstep;
295
constants[5].i = top_blob.dims;
296
constants[6].i = top_blob.w;
297
constants[7].i = top_blob.h;
298
constants[8].i = top_blob.c;
299
constants[9].i = 0; //top_blob.cstep;
301
if (elempack == 1 && out_elempack == 1)
303
cmd.record_pipeline(pipeline_pixelshuffle, bindings, constants, top_blob);
305
else if (elempack == 4 && out_elempack == 4)
307
cmd.record_pipeline(pipeline_pixelshuffle_pack4, bindings, constants, top_blob);
309
else if (elempack == 4 && out_elempack == 1)
311
cmd.record_pipeline(pipeline_pixelshuffle_pack4to1, bindings, constants, bottom_blob);
313
else if (elempack == 8 && out_elempack == 8)
315
cmd.record_pipeline(pipeline_pixelshuffle_pack8, bindings, constants, top_blob);
317
else if (elempack == 8 && out_elempack == 1)
319
cmd.record_pipeline(pipeline_pixelshuffle_pack8to1, bindings, constants, bottom_blob);
321
else if (elempack == 8 && out_elempack == 4)
323
cmd.record_pipeline(pipeline_pixelshuffle_pack8to4, bindings, constants, top_blob);