1
// Tencent is pleased to support the open source community by making ncnn available.
3
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
8
// https://opensource.org/licenses/BSD-3-Clause
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
18
#extension GL_EXT_shader_16bit_storage: require
20
#if NCNN_fp16_arithmetic
21
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
24
layout (constant_id = 0) const int clip = 0;
25
layout (constant_id = 1) const float offset = 0;
26
layout (constant_id = 2) const int num_sizes = 0;
27
layout (constant_id = 3) const int num_ratios = 0;
28
layout (constant_id = 4) const int num_prior = 0;
30
#define shape_constant_id_offset 5
31
layout (constant_id = shape_constant_id_offset + 0) const int w = 0;
32
layout (constant_id = shape_constant_id_offset + 1) const int h = 0;
34
layout (binding = 0) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
35
layout (binding = 1) readonly buffer min_sizes { sfp min_sizes_data[]; };
36
layout (binding = 2) readonly buffer aspect_ratios { sfp aspect_ratios_data[]; };
38
layout (push_constant) uniform parameter
49
int gx = int(gl_GlobalInvocationID.x);
50
int gy = int(gl_GlobalInvocationID.y);
51
int gz = int(gl_GlobalInvocationID.z);
53
if (gx >= num_sizes || gy >= psc(w) || gz >= psc(h))
56
// mxnet style _contrib_MultiBoxPrior
57
int v_offset = (gz * psc(w) + gy) * num_prior + gx;
59
afp center_x = (afp(gy) + afp(offset)) * afp(p.step_w);
60
afp center_y = (afp(gz) + afp(offset)) * afp(p.step_h);
61
afpvec4 center = afpvec4(center_x, center_y, center_x, center_y);
63
// ratio = 1, various sizes
64
afp size = buffer_ld1(min_sizes_data, gx);
65
afp cw = size * afp(0.5f) * afp(psc(h)) / afp(psc(w));
66
afp ch = size * afp(0.5f);
68
afpvec4 box = center + afpvec4(-cw, -ch, cw, ch);
69
box = clip == 1 ? clamp(box, afp(0.f), afp(1.f)) : box;
71
buffer_st4(top_blob_data, v_offset, box);
75
// various ratios, size = min_size = size[0]
76
for (int pi = 1; pi < num_ratios; pi++)
78
afp v = buffer_ld1(aspect_ratios_data, pi);
79
afp cwr = cw * sqrt(v);
80
afp chr = ch / sqrt(v);
82
afpvec4 box = center + afpvec4(-cwr, -chr, cwr, chr);
83
box = clip == 1 ? clamp(box, afp(0.f), afp(1.f)) : box;
85
buffer_st4(top_blob_data, v_offset + num_sizes - 1 + pi, box);