1
// Tencent is pleased to support the open source community by making ncnn available.
3
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
8
// https://opensource.org/licenses/BSD-3-Clause
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
18
#extension GL_EXT_shader_16bit_storage: require
20
#if NCNN_fp16_arithmetic
21
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
24
layout (constant_id = 0) const int flip = 0;
25
layout (constant_id = 1) const int clip = 0;
26
layout (constant_id = 2) const float offset = 0;
27
layout (constant_id = 3) const float variances_0 = 0;
28
layout (constant_id = 4) const float variances_1 = 0;
29
layout (constant_id = 5) const float variances_2 = 0;
30
layout (constant_id = 6) const float variances_3 = 0;
31
layout (constant_id = 7) const int num_min_size = 0;
32
layout (constant_id = 8) const int num_max_size = 0;
33
layout (constant_id = 9) const int num_aspect_ratio = 0;
34
layout (constant_id = 10) const int num_prior = 0;
36
#define shape_constant_id_offset 11
37
layout (constant_id = shape_constant_id_offset + 0) const int w = 0;
38
layout (constant_id = shape_constant_id_offset + 1) const int h = 0;
41
layout (binding = 0) writeonly buffer top_blob { vec4 top_blob_data[]; };
43
layout (binding = 0) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
45
layout (binding = 1) readonly buffer min_sizes { sfp min_sizes_data[]; };
46
layout (binding = 2) readonly buffer max_sizes { sfp max_sizes_data[]; };
47
layout (binding = 3) readonly buffer aspect_ratios { sfp aspect_ratios_data[]; };
49
layout (push_constant) uniform parameter
62
int gx = int(gl_GlobalInvocationID.x);
63
int gy = int(gl_GlobalInvocationID.y);
64
int gz = int(gl_GlobalInvocationID.z);
66
if (gx >= num_min_size || gy >= psc(w) || gz >= psc(h))
69
// anchor and variance
70
int v_offset = (gz * psc(w) + gy) * num_prior + gx;
71
int var_offset = psc(w) * psc(h) * num_prior + v_offset;
73
afp center_x = (afp(gy) + afp(offset)) * afp(p.step_w);
74
afp center_y = (afp(gz) + afp(offset)) * afp(p.step_h);
75
afpvec4 center = afpvec4(center_x, center_y, center_x, center_y);
77
afpvec4 image_norm = afp(1.f) / afpvec4(p.image_w, p.image_h, p.image_w, p.image_h);
84
afp min_size = buffer_ld1(min_sizes_data, gx);
86
afpvec4 variances = afpvec4(afp(variances_0), afp(variances_1), afp(variances_2), afp(variances_3));
89
box_w = box_h = min_size;
91
box = (center + afpvec4(-box_w, -box_h, box_w, box_h) * afp(0.5f)) * image_norm;
92
box = clip == 1 ? clamp(box, afp(0.f), afp(1.f)) : box;
95
top_blob_data[v_offset] = vec4(box);
96
top_blob_data[var_offset] = vec4(variances);
98
buffer_st4(top_blob_data, v_offset, box);
99
buffer_st4(top_blob_data, var_offset, variances);
105
if (num_max_size > 0)
107
afp max_size = buffer_ld1(max_sizes_data, gx);
110
box_w = box_h = sqrt(min_size * max_size);
112
box = (center + afpvec4(-box_w, -box_h, box_w, box_h) * afp(0.5f)) * image_norm;
113
box = clip == 1 ? clamp(box, afp(0.f), afp(1.f)) : box;
116
top_blob_data[v_offset] = vec4(box);
117
top_blob_data[var_offset] = vec4(variances);
119
buffer_st4(top_blob_data, v_offset, box);
120
buffer_st4(top_blob_data, var_offset, variances);
128
for (int pi = 0; pi < num_aspect_ratio; pi++)
130
afp ar = buffer_ld1(aspect_ratios_data, pi);
132
box_w = min_size * sqrt(ar);
133
box_h = min_size / sqrt(ar);
135
box = (center + afpvec4(-box_w, -box_h, box_w, box_h) * afp(0.5f)) * image_norm;
136
box = clip == 1 ? clamp(box, afp(0.f), afp(1.f)) : box;
139
top_blob_data[v_offset] = vec4(box);
140
top_blob_data[var_offset] = vec4(variances);
142
buffer_st4(top_blob_data, v_offset, box);
143
buffer_st4(top_blob_data, var_offset, variances);
151
box = (center + afpvec4(-box_h, -box_w, box_h, box_w) * afp(0.5f)) * image_norm;
152
box = clip == 1 ? clamp(box, afp(0.f), afp(1.f)) : box;
155
top_blob_data[v_offset] = vec4(box);
156
top_blob_data[var_offset] = vec4(variances);
158
buffer_st4(top_blob_data, v_offset, box);
159
buffer_st4(top_blob_data, var_offset, variances);