1
// Tencent is pleased to support the open source community by making ncnn available.
3
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
8
// https://opensource.org/licenses/BSD-3-Clause
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
15
#include "priorbox_vulkan.h"
17
#include "layer_shader_type.h"
22
PriorBox_vulkan::PriorBox_vulkan()
24
support_vulkan = true;
26
pipeline_priorbox = 0;
27
pipeline_priorbox_mxnet = 0;
30
int PriorBox_vulkan::create_pipeline(const Option& opt)
32
const Mat& shape = bottom_shapes.empty() ? Mat() : bottom_shapes[0];
35
if (shape.dims == 1) elempack = opt.use_shader_pack8 && shape.w % 8 == 0 ? 8 : shape.w % 4 == 0 ? 4 : 1;
36
if (shape.dims == 2) elempack = opt.use_shader_pack8 && shape.h % 8 == 0 ? 8 : shape.h % 4 == 0 ? 4 : 1;
37
if (shape.dims == 3) elempack = opt.use_shader_pack8 && shape.c % 8 == 0 ? 8 : shape.c % 4 == 0 ? 4 : 1;
40
if (opt.use_fp16_storage)
42
elemsize = elempack * 2u;
44
else if (opt.use_fp16_packed)
46
elemsize = elempack == 1 ? 4u : elempack * 2u;
50
elemsize = elempack * 4u;
54
if (shape.dims == 1) shape_packed = Mat(shape.w / elempack, (void*)0, elemsize, elempack);
55
if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack);
56
if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
60
int num_min_size = min_sizes.w;
61
int num_max_size = max_sizes.w;
62
int num_aspect_ratio = aspect_ratios.w;
64
int num_prior = num_min_size * num_aspect_ratio + num_min_size + num_max_size;
66
num_prior += num_min_size * num_aspect_ratio;
68
std::vector<vk_specialization_type> specializations(11 + 2);
69
specializations[0].i = flip;
70
specializations[1].i = clip;
71
specializations[2].f = offset;
72
specializations[3].f = variances[0];
73
specializations[4].f = variances[1];
74
specializations[5].f = variances[2];
75
specializations[6].f = variances[3];
76
specializations[7].i = num_min_size;
77
specializations[8].i = num_max_size;
78
specializations[9].i = num_aspect_ratio;
79
specializations[10].i = num_prior;
80
specializations[11 + 0].i = shape_packed.w;
81
specializations[11 + 1].i = shape_packed.h;
83
pipeline_priorbox = new Pipeline(vkdev);
84
pipeline_priorbox->set_optimal_local_size_xyz();
85
pipeline_priorbox->create(LayerShaderType::priorbox, opt, specializations);
90
int num_sizes = min_sizes.w;
91
int num_ratios = aspect_ratios.w;
93
int num_prior = num_sizes - 1 + num_ratios;
95
std::vector<vk_specialization_type> specializations(5 + 2);
96
specializations[0].i = clip;
97
specializations[1].f = offset;
98
specializations[2].i = num_sizes;
99
specializations[3].i = num_ratios;
100
specializations[4].i = num_prior;
101
specializations[5 + 0].i = shape_packed.w;
102
specializations[5 + 1].i = shape_packed.h;
104
pipeline_priorbox_mxnet = new Pipeline(vkdev);
105
pipeline_priorbox_mxnet->set_optimal_local_size_xyz();
106
pipeline_priorbox_mxnet->create(LayerShaderType::priorbox_mxnet, opt, specializations);
112
int PriorBox_vulkan::destroy_pipeline(const Option& /*opt*/)
114
delete pipeline_priorbox;
115
pipeline_priorbox = 0;
117
delete pipeline_priorbox_mxnet;
118
pipeline_priorbox_mxnet = 0;
123
int PriorBox_vulkan::upload_model(VkTransfer& cmd, const Option& opt)
125
cmd.record_upload(min_sizes, min_sizes_gpu, opt);
128
cmd.record_upload(max_sizes, max_sizes_gpu, opt);
130
cmd.record_upload(aspect_ratios, aspect_ratios_gpu, opt);
136
aspect_ratios.release();
142
int PriorBox_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt) const
144
int w = bottom_blobs[0].w;
145
int h = bottom_blobs[0].h;
147
if (bottom_blobs.size() == 1 && image_width == -233 && image_height == -233 && max_sizes_gpu.empty())
149
// mxnet style _contrib_MultiBoxPrior
150
float step_w = step_width;
151
float step_h = step_height;
153
step_w = 1.f / (float)w;
155
step_h = 1.f / (float)h;
157
int num_sizes = min_sizes_gpu.w;
158
int num_ratios = aspect_ratios_gpu.w;
160
int num_prior = num_sizes - 1 + num_ratios;
164
size_t elemsize = elempack * 4u;
165
if (opt.use_fp16_packed || opt.use_fp16_storage)
167
elemsize = elempack * 2u;
170
VkMat& top_blob = top_blobs[0];
171
top_blob.create(4 * w * h * num_prior / elempack, elemsize, elempack, opt.blob_vkallocator);
172
if (top_blob.empty())
175
std::vector<VkMat> bindings(3);
176
bindings[0] = top_blob;
177
bindings[1] = min_sizes_gpu;
178
bindings[2] = aspect_ratios_gpu;
180
std::vector<vk_constant_type> constants(4);
183
constants[2].f = step_w;
184
constants[3].f = step_h;
187
dispatcher.w = num_sizes;
191
cmd.record_pipeline(pipeline_priorbox_mxnet, bindings, constants, dispatcher);
196
int image_w = image_width;
197
int image_h = image_height;
199
image_w = bottom_blobs[1].w;
201
image_h = bottom_blobs[1].h;
203
float step_w = step_width;
204
float step_h = step_height;
206
step_w = (float)image_w / w;
208
step_h = (float)image_h / h;
210
int num_min_size = min_sizes_gpu.w;
211
int num_max_size = max_sizes_gpu.w;
212
int num_aspect_ratio = aspect_ratios_gpu.w;
214
int num_prior = num_min_size * num_aspect_ratio + num_min_size + num_max_size;
216
num_prior += num_min_size * num_aspect_ratio;
218
size_t elemsize = 4u;
219
if (opt.use_fp16_storage)
224
VkMat& top_blob = top_blobs[0];
225
top_blob.create(4 * w * h * num_prior, 2, elemsize, 1, opt.blob_vkallocator);
226
if (top_blob.empty())
229
std::vector<VkMat> bindings(4);
230
bindings[0] = top_blob;
231
bindings[1] = min_sizes_gpu;
232
bindings[2] = num_max_size > 0 ? max_sizes_gpu : min_sizes_gpu;
233
bindings[3] = aspect_ratios_gpu;
235
std::vector<vk_constant_type> constants(6);
238
constants[2].f = image_w;
239
constants[3].f = image_h;
240
constants[4].f = step_w;
241
constants[5].f = step_h;
244
dispatcher.w = num_min_size;
248
cmd.record_pipeline(pipeline_priorbox, bindings, constants, dispatcher);