ncnn

Форк
0
/
interp_bicubic.comp 
179 строк · 6.4 Кб
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#version 450
16

17
#if NCNN_fp16_storage
18
#extension GL_EXT_shader_16bit_storage: require
19
#endif
20
#if NCNN_fp16_arithmetic
21
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
22
#endif
23

24
#define shape_constant_id_offset 0
25
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
26
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
27
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
28
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
29
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;
30

31
layout (constant_id = shape_constant_id_offset + 5) const int outdims = 0;
32
layout (constant_id = shape_constant_id_offset + 6) const int outw = 0;
33
layout (constant_id = shape_constant_id_offset + 7) const int outh = 0;
34
layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
35
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
36

37
#if NCNN_image_shader
38
layout (binding = 0) uniform unfp sampler3D bottom_blob;
39
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob;
40
#else
41
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
42
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
43
#endif
44
layout (binding = 2) readonly buffer alpha_blob { sfpvec4 alpha_blob_data[]; };
45
layout (binding = 3) readonly buffer xofs_blob { int xofs_blob_data[]; };
46
layout (binding = 4) readonly buffer beta_blob { sfpvec4 beta_blob_data[]; };
47
layout (binding = 5) readonly buffer yofs_blob { int yofs_blob_data[]; };
48

49
layout (push_constant) uniform parameter
50
{
51
    int dims;
52
    int w;
53
    int h;
54
    int c;
55
    int cstep;
56

57
    int outdims;
58
    int outw;
59
    int outh;
60
    int outc;
61
    int outcstep;
62
} p;
63

64
void main()
65
{
66
    int gx = int(gl_GlobalInvocationID.x);
67
    int gy = int(gl_GlobalInvocationID.y);
68
    int gz = int(gl_GlobalInvocationID.z);
69

70
    if (gx >= psc(outw) || gy >= psc(outh) || gz >= psc(outc))
71
        return;
72

73
    if (psc(dims) == 2)
74
    {
75
        int sx = xofs_blob_data[gx];
76

77
#if NCNN_image_shader
78
        afp b0 = image3d_ld1(bottom_blob, ivec3(sx - 1, gy, gz));
79
        afp b1 = image3d_ld1(bottom_blob, ivec3(sx + 0, gy, gz));
80
        afp b2 = image3d_ld1(bottom_blob, ivec3(sx + 1, gy, gz));
81
        afp b3 = image3d_ld1(bottom_blob, ivec3(sx + 2, gy, gz));
82
#else
83
        int v_offset_1 = gz * psc(cstep) + gy * psc(w) + sx;
84

85
        afp b0 = buffer_ld1(bottom_blob_data, v_offset_1 - 1);
86
        afp b1 = buffer_ld1(bottom_blob_data, v_offset_1 + 0);
87
        afp b2 = buffer_ld1(bottom_blob_data, v_offset_1 + 1);
88
        afp b3 = buffer_ld1(bottom_blob_data, v_offset_1 + 2);
89
#endif
90

91
        afpvec4 alpha = buffer_ld4(alpha_blob_data, gx);
92

93
        afpvec4 abcd = afpvec4(b0, b1, b2, b3);
94

95
        afp v = dot(abcd, alpha);
96

97
#if NCNN_image_shader
98
        image3d_st1(top_blob, ivec3(gx, gy, gz), v);
99
#else
100
        const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;
101

102
        buffer_st1(top_blob_data, gi, v);
103
#endif
104
        return;
105
    }
106

107
    int sx = xofs_blob_data[gx];
108
    int sy = yofs_blob_data[gy];
109

110
#if NCNN_image_shader
111
    afp a0 = image3d_ld1(bottom_blob, ivec3(sx - 1, sy - 1, gz));
112
    afp a1 = image3d_ld1(bottom_blob, ivec3(sx + 0, sy - 1, gz));
113
    afp a2 = image3d_ld1(bottom_blob, ivec3(sx + 1, sy - 1, gz));
114
    afp a3 = image3d_ld1(bottom_blob, ivec3(sx + 2, sy - 1, gz));
115

116
    afp b0 = image3d_ld1(bottom_blob, ivec3(sx - 1, sy + 0, gz));
117
    afp b1 = image3d_ld1(bottom_blob, ivec3(sx + 0, sy + 0, gz));
118
    afp b2 = image3d_ld1(bottom_blob, ivec3(sx + 1, sy + 0, gz));
119
    afp b3 = image3d_ld1(bottom_blob, ivec3(sx + 2, sy + 0, gz));
120

121
    afp c0 = image3d_ld1(bottom_blob, ivec3(sx - 1, sy + 1, gz));
122
    afp c1 = image3d_ld1(bottom_blob, ivec3(sx + 0, sy + 1, gz));
123
    afp c2 = image3d_ld1(bottom_blob, ivec3(sx + 1, sy + 1, gz));
124
    afp c3 = image3d_ld1(bottom_blob, ivec3(sx + 2, sy + 1, gz));
125

126
    afp d0 = image3d_ld1(bottom_blob, ivec3(sx - 1, sy + 2, gz));
127
    afp d1 = image3d_ld1(bottom_blob, ivec3(sx + 0, sy + 2, gz));
128
    afp d2 = image3d_ld1(bottom_blob, ivec3(sx + 1, sy + 2, gz));
129
    afp d3 = image3d_ld1(bottom_blob, ivec3(sx + 2, sy + 2, gz));
130
#else
131
    int v_offset_0 = gz * psc(cstep) + (sy - 1) * psc(w) + sx;
132
    int v_offset_1 = gz * psc(cstep) + (sy + 0) * psc(w) + sx;
133
    int v_offset_2 = gz * psc(cstep) + (sy + 1) * psc(w) + sx;
134
    int v_offset_3 = gz * psc(cstep) + (sy + 2) * psc(w) + sx;
135

136
    afp a0 = buffer_ld1(bottom_blob_data, v_offset_0 - 1);
137
    afp a1 = buffer_ld1(bottom_blob_data, v_offset_0 + 0);
138
    afp a2 = buffer_ld1(bottom_blob_data, v_offset_0 + 1);
139
    afp a3 = buffer_ld1(bottom_blob_data, v_offset_0 + 2);
140

141
    afp b0 = buffer_ld1(bottom_blob_data, v_offset_1 - 1);
142
    afp b1 = buffer_ld1(bottom_blob_data, v_offset_1 + 0);
143
    afp b2 = buffer_ld1(bottom_blob_data, v_offset_1 + 1);
144
    afp b3 = buffer_ld1(bottom_blob_data, v_offset_1 + 2);
145

146
    afp c0 = buffer_ld1(bottom_blob_data, v_offset_2 - 1);
147
    afp c1 = buffer_ld1(bottom_blob_data, v_offset_2 + 0);
148
    afp c2 = buffer_ld1(bottom_blob_data, v_offset_2 + 1);
149
    afp c3 = buffer_ld1(bottom_blob_data, v_offset_2 + 2);
150

151
    afp d0 = buffer_ld1(bottom_blob_data, v_offset_3 - 1);
152
    afp d1 = buffer_ld1(bottom_blob_data, v_offset_3 + 0);
153
    afp d2 = buffer_ld1(bottom_blob_data, v_offset_3 + 1);
154
    afp d3 = buffer_ld1(bottom_blob_data, v_offset_3 + 2);
155
#endif
156

157
    afpmat4 abcd0123 = afpmat4(
158
        a0, a1, a2, a3,
159
        b0, b1, b2, b3,
160
        c0, c1, c2, c3,
161
        d0, d1, d2, d3
162
    );
163

164
    afpvec4 alpha = buffer_ld4(alpha_blob_data, gx);
165

166
    afpvec4 abcd = alpha * abcd0123;
167

168
    afpvec4 beta = buffer_ld4(beta_blob_data, gy);
169

170
    afp v = dot(abcd, beta);
171

172
#if NCNN_image_shader
173
    image3d_st1(top_blob, ivec3(gx, gy, gz), v);
174
#else
175
    const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;
176

177
    buffer_st1(top_blob_data, gi, v);
178
#endif
179
}
180

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.