ncnn

Форк
0
/
interp_pack8.comp 
246 строк · 7.3 Кб
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#version 450
16

17
#if NCNN_fp16_storage
18
#extension GL_EXT_shader_16bit_storage: require
19
struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; };
20
#endif
21
#if NCNN_fp16_arithmetic
22
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
23
#endif
24

25
layout (constant_id = 0) const int resize_type = 0;
26
layout (constant_id = 1) const int align_corner = 0;
27

28
#define shape_constant_id_offset 2
29
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
30
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
31
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
32
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
33
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;
34

35
layout (constant_id = shape_constant_id_offset + 5) const int outdims = 0;
36
layout (constant_id = shape_constant_id_offset + 6) const int outw = 0;
37
layout (constant_id = shape_constant_id_offset + 7) const int outh = 0;
38
layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
39
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
40

41
#if NCNN_image_shader
42
layout (binding = 0) uniform unfp sampler3D bottom_blob;
43
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
44
#else
45
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
46
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
47
#endif
48

49
layout (push_constant) uniform parameter
50
{
51
    int dims;
52
    int w;
53
    int h;
54
    int c;
55
    int cstep;
56

57
    int outdims;
58
    int outw;
59
    int outh;
60
    int outc;
61
    int outcstep;
62

63
    float scale_x;
64
    float scale_y;
65
} p;
66

67
void main()
68
{
69
    int gx = int(gl_GlobalInvocationID.x);
70
    int gy = int(gl_GlobalInvocationID.y);
71
    int gz = int(gl_GlobalInvocationID.z);
72

73
    if (gx >= psc(outw) || gy >= psc(outh) || gz >= psc(outc))
74
        return;
75

76
    if (psc(dims) == 1)
77
    {
78
#if NCNN_image_shader
79
        image3d_cp8(top_blob, ivec3(gx, gy, gz), bottom_blob, ivec3(gz, 0, 0));
80
#else
81
        const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;
82

83
        buffer_cp8(top_blob_data, gi, bottom_blob_data, gz);
84
#endif
85

86
        return;
87
    }
88

89
    if (psc(dims) == 2)
90
    {
91
        if (resize_type == 1) // nearest
92
        {
93
            int sx = min(int(floor(afp(gx) * afp(p.scale_x))), psc(w) - 1);
94

95
#if NCNN_image_shader
96
            image3d_cp8(top_blob, ivec3(gx, gy, gz), bottom_blob, ivec3(sx, gy, gz));
97
#else
98
            int v_offset = gz * psc(cstep) + gy * psc(w) + sx;
99

100
            const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;
101

102
            buffer_cp8(top_blob_data, gi, bottom_blob_data, v_offset);
103
#endif
104
        }
105
        if (resize_type == 2) // bilinear
106
        {
107
            afp fx;
108
            if (align_corner == 1)
109
            {
110
                fx = afp(gx) * afp(p.scale_x);
111
            }
112
            else
113
            {
114
                fx = (afp(gx) + afp(0.5f)) * afp(p.scale_x) - afp(0.5f);
115
            }
116

117
            int sx = int(floor(fx));
118

119
            fx -= afp(sx);
120

121
            int sx_max = psc(w) - 2;
122

123
            if (sx < 0)
124
            {
125
                sx = 0;
126
                fx = afp(0.f);
127
            }
128
            else if (sx > sx_max)
129
            {
130
                sx = sx_max;
131
                fx = afp(1.f);
132
            }
133

134
#if NCNN_image_shader
135
            afpvec8 a0 = image3d_ld8(bottom_blob, ivec3(sx, gy, gz));
136
            afpvec8 a1 = image3d_ld8(bottom_blob, ivec3(sx + 1, gy, gz));
137
#else
138
            int v_offset_0 = gz * psc(cstep) + gy * psc(w) + sx;
139

140
            afpvec8 a0 = buffer_ld8(bottom_blob_data, v_offset_0);
141
            afpvec8 a1 = buffer_ld8(bottom_blob_data, v_offset_0 + 1);
142
#endif
143

144
            afpvec8 res;
145
            res[0] = a0[0] * (afp(1.f) - fx) + a1[0] * fx;
146
            res[1] = a0[1] * (afp(1.f) - fx) + a1[1] * fx;
147

148
#if NCNN_image_shader
149
            image3d_st8(top_blob, ivec3(gx, gy, gz), res);
150
#else
151
            const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;
152

153
            buffer_st8(top_blob_data, gi, res);
154
#endif
155
        }
156

157
        return;
158
    }
159

160
    if (resize_type == 1) // nearest
161
    {
162
        afpvec2 gxy = afpvec2(gx, gy);
163
        ivec2 sxy_max = ivec2(psc(w) - 1, psc(h) - 1);
164
        ivec2 sxy = min(ivec2(floor(gxy * afpvec2(p.scale_x, p.scale_y))), sxy_max);
165

166
        int sx = sxy.r;
167
        int sy = sxy.g;
168

169
#if NCNN_image_shader
170
        image3d_cp8(top_blob, ivec3(gx, gy, gz), bottom_blob, ivec3(sx, sy, gz));
171
#else
172
        int v_offset = gz * psc(cstep) + sy * psc(w) + sx;
173

174
        const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;
175

176
        buffer_cp8(top_blob_data, gi, bottom_blob_data, v_offset);
177
#endif
178
    }
179
    if (resize_type == 2) // bilinear
180
    {
181
        afpvec2 gxy = afpvec2(gx, gy);
182
        afpvec2 fxy;
183
        if (align_corner == 1)
184
        {
185
            fxy = gxy * afpvec2(p.scale_x, p.scale_y);
186
        }
187
        else
188
        {
189
            fxy = (gxy + afp(0.5f)) * afpvec2(p.scale_x, p.scale_y) - afp(0.5f);
190
        }
191

192
        ivec2 sxy = ivec2(floor(fxy));
193

194
        fxy -= afpvec2(sxy);
195

196
        ivec2 sxy_max = ivec2(psc(w) - 2, psc(h) - 2);
197

198
        bvec2 underflow = lessThan(sxy, ivec2(0));
199
        bvec2 overflow = greaterThan(sxy, sxy_max);
200

201
        sxy = clamp(sxy, ivec2(0), sxy_max);
202

203
        fxy = mix(fxy, afpvec2(0.f), underflow);
204
        fxy = mix(fxy, afpvec2(1.f), overflow);
205

206
        int sx = sxy.r;
207
        int sy = sxy.g;
208

209
#if NCNN_image_shader
210
        afpvec8 a0 = image3d_ld8(bottom_blob, ivec3(sx, sy, gz));
211
        afpvec8 a1 = image3d_ld8(bottom_blob, ivec3(sx + 1, sy, gz));
212
        afpvec8 b0 = image3d_ld8(bottom_blob, ivec3(sx, sy + 1, gz));
213
        afpvec8 b1 = image3d_ld8(bottom_blob, ivec3(sx + 1, sy + 1, gz));
214
#else
215
        int v_offset_0 = gz * psc(cstep) + sy * psc(w) + sx;
216
        int v_offset_1 = gz * psc(cstep) + (sy + 1) * psc(w) + sx;
217

218
        afpvec8 a0 = buffer_ld8(bottom_blob_data, v_offset_0);
219
        afpvec8 a1 = buffer_ld8(bottom_blob_data, v_offset_0 + 1);
220
        afpvec8 b0 = buffer_ld8(bottom_blob_data, v_offset_1);
221
        afpvec8 b1 = buffer_ld8(bottom_blob_data, v_offset_1 + 1);
222
#endif
223

224
        afp fx = fxy.r;
225
        afp fy = fxy.g;
226

227
        afpvec8 a;
228
        afpvec8 b;
229
        a[0] = a0[0] * (afp(1.f) - fx) + a1[0] * fx;
230
        a[1] = a0[1] * (afp(1.f) - fx) + a1[1] * fx;
231
        b[0] = b0[0] * (afp(1.f) - fx) + b1[0] * fx;
232
        b[1] = b0[1] * (afp(1.f) - fx) + b1[1] * fx;
233

234
        afpvec8 res;
235
        res[0] = a[0] * (afp(1.f) - fy) + b[0] * fy;
236
        res[1] = a[1] * (afp(1.f) - fy) + b[1] * fy;
237

238
#if NCNN_image_shader
239
        image3d_st8(top_blob, ivec3(gx, gy, gz), res);
240
#else
241
        const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;
242

243
        buffer_st8(top_blob_data, gi, res);
244
#endif
245
    }
246
}
247

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.