1
// Tencent is pleased to support the open source community by making ncnn available.
3
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
8
// https://opensource.org/licenses/BSD-3-Clause
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
18
#extension GL_EXT_shader_16bit_storage: require
20
#if NCNN_fp16_arithmetic
21
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
24
layout (constant_id = 0) const float eps = 0;
25
layout (constant_id = 1) const int affine = 0;
26
layout (constant_id = 2) const int w = 0;
29
layout (binding = 0, imfmtc4) writeonly uniform unfp image3D coeffs_blob;
30
layout (binding = 1) uniform highp sampler3D mean_blob;
31
layout (binding = 2) uniform highp sampler3D var_blob;
32
layout (binding = 3) uniform unfp sampler3D gamma_blob;
33
layout (binding = 4) uniform unfp sampler3D beta_blob;
35
layout (binding = 0) writeonly buffer coeffs_blob { sfpvec4 coeffs_blob_data[]; };
36
layout (binding = 1) readonly buffer mean_blob { vec4 mean_data[]; };
37
layout (binding = 2) readonly buffer var_blob { vec4 var_data[]; };
38
layout (binding = 3) readonly buffer gamma_blob { sfpvec4 gamma_data[]; };
39
layout (binding = 4) readonly buffer beta_blob { sfpvec4 beta_data[]; };
42
layout (push_constant) uniform parameter
49
int gx = int(gl_GlobalInvocationID.x);
50
int gy = int(gl_GlobalInvocationID.y);
51
int gz = int(gl_GlobalInvocationID.z);
53
if (gx >= psc(w) || gy >= 1 || gz >= 1)
57
vec4 mean = texelFetch(mean_blob, ivec3(gx, 0, 0), 0);
58
vec4 var = texelFetch(var_blob, ivec3(gx, 0, 0), 0);
60
vec4 mean = mean_data[gx];
61
vec4 var = var_data[gx];
68
a = 1.f / (sqrt(var + eps));
74
vec4 gamma = vec4(image3d_ld4(gamma_blob, ivec3(gx, 0, 0)));
75
vec4 beta = vec4(image3d_ld4(beta_blob, ivec3(gx, 0, 0)));
77
vec4 gamma = vec4(buffer_ld4(gamma_data, gx));
78
vec4 beta = vec4(buffer_ld4(beta_data, gx));
81
a = gamma / (sqrt(var + eps));
82
b = - mean * a + beta;
86
imageStore(coeffs_blob, ivec3(gx*2, 0, 0), a);
87
imageStore(coeffs_blob, ivec3(gx*2 +1, 0, 0), b);
89
buffer_st4(coeffs_blob_data, gx*2, afpvec4(a));
90
buffer_st4(coeffs_blob_data, gx*2 +1, afpvec4(b));