ncnn

pooling_pack8.comp
236 строк · 6.8 Кб
Перенос по словам
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#version 450
16

17
#if NCNN_fp16_storage
18
#extension GL_EXT_shader_16bit_storage: require
19
struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; };
20
#endif
21
#if NCNN_fp16_arithmetic
22
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
23
#endif
24

25
#define FLT_MAX 3.402823466e+38
26

27
layout (constant_id = 0) const int pooling_type = 0;
28
layout (constant_id = 1) const int kernel_w = 1;
29
layout (constant_id = 2) const int kernel_h = 1;
30
layout (constant_id = 3) const int stride_w = 1;
31
layout (constant_id = 4) const int stride_h = 1;
32
layout (constant_id = 5) const int pad_left = 0;
33
layout (constant_id = 6) const int pad_right = 0;
34
layout (constant_id = 7) const int pad_top = 0;
35
layout (constant_id = 8) const int pad_bottom = 0;
36
layout (constant_id = 9) const int global_pooling = 0;
37
layout (constant_id = 10) const int pad_mode = 0;
38
layout (constant_id = 11) const int avgpool_count_include_pad = 0;
39

40
#define shape_constant_id_offset 12
41
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
42
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
43
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
44
layout (constant_id = shape_constant_id_offset + 3) const int c = 0;
45
layout (constant_id = shape_constant_id_offset + 4) const int cstep = 0;
46

47
layout (constant_id = shape_constant_id_offset + 5) const int outdims = 0;
48
layout (constant_id = shape_constant_id_offset + 6) const int outw = 0;
49
layout (constant_id = shape_constant_id_offset + 7) const int outh = 0;
50
layout (constant_id = shape_constant_id_offset + 8) const int outc = 0;
51
layout (constant_id = shape_constant_id_offset + 9) const int outcstep = 0;
52

53
#if NCNN_image_shader
54
layout (binding = 0) uniform unfp sampler3D bottom_blob;
55
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob;
56
#else
57
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
58
layout (binding = 1) writeonly buffer top_blob { sfpvec8 top_blob_data[]; };
59
#endif
60

61
layout (push_constant) uniform parameter
62
{
63
    int dims;
64
    int w;
65
    int h;
66
    int c;
67
    int cstep;
68

69
    int outdims;
70
    int outw;
71
    int outh;
72
    int outc;
73
    int outcstep;
74

75
    int wtailpad;
76
    int htailpad;
77
} p;
78

79
void main()
80
{
81
    int gx = int(gl_GlobalInvocationID.x);
82
    int gy = int(gl_GlobalInvocationID.y);
83
    int gz = int(gl_GlobalInvocationID.z);
84

85
    if (gx >= psc(outw) || gy >= psc(outh) || gz >= psc(outc))
86
        return;
87

88
    afpvec8 res;
89

90
    if (pooling_type == 0)
91
    {
92
        res = afpvec8(afpvec4(-FLT_MAX), afpvec4(-FLT_MAX));
93

94
#if NCNN_image_shader
95
        int sx = gx * stride_w;
96
        int sy = gy * stride_h;
97

98
        for (int y = 0; y < kernel_h; y++)
99
        {
100
            for (int x = 0; x < kernel_w; x++)
101
            {
102
                afpvec8 v = image3d_ld8(bottom_blob, ivec3(sx + x, sy + y, gz));
103
                res[0] = max(res[0], v[0]);
104
                res[1] = max(res[1], v[1]);
105
            }
106
        }
107
#else
108
        int v_offset = gz * psc(cstep) + gy * stride_h * psc(w) + gx * stride_w;
109

110
        for (int y = 0; y < kernel_h; y++)
111
        {
112
            for (int x = 0; x < kernel_w; x++)
113
            {
114
                afpvec8 v = buffer_ld8(bottom_blob_data, v_offset + x);
115
                res[0] = max(res[0], v[0]);
116
                res[1] = max(res[1], v[1]);
117
            }
118

119
            v_offset += psc(w);
120
        }
121
#endif
122
    }
123
    else if (pooling_type == 1 && avgpool_count_include_pad == 0)
124
    {
125
        res = afpvec8(afpvec4(0.f), afpvec4(0.f));
126
        int area = 0;
127

128
        int sx = gx * stride_w;
129
        int sy = gy * stride_h;
130

131
#if NCNN_image_shader
132
        for (int y = 0; y < kernel_h; y++)
133
        {
134
            if (sy + y < pad_top)
135
                continue;
136

137
            if (sy + y >= psc(h) - pad_bottom - p.htailpad)
138
                break;
139

140
            for (int x = 0; x < kernel_w; x++)
141
            {
142
                if (sx + x < pad_left)
143
                    continue;
144

145
                if (sx + x >= psc(w) - pad_right - p.wtailpad)
146
                    break;
147

148
                afpvec8 v = image3d_ld8(bottom_blob, ivec3(sx + x, sy + y, gz));
149
                res[0] += v[0];
150
                res[1] += v[1];
151
                area += 1;
152
            }
153
        }
154
#else
155
        int v_offset = gz * psc(cstep) + sy * psc(w) + sx;
156

157
        for (int y = 0; y < kernel_h; y++)
158
        {
159
            if (sy + y < pad_top)
160
            {
161
                v_offset += psc(w);
162
                continue;
163
            }
164

165
            if (sy + y >= psc(h) - pad_bottom - p.htailpad)
166
                break;
167

168
            for (int x = 0; x < kernel_w; x++)
169
            {
170
                if (sx + x < pad_left)
171
                {
172
                    continue;
173
                }
174

175
                if (sx + x >= psc(w) - pad_right - p.wtailpad)
176
                    break;
177

178
                afpvec8 v = buffer_ld8(bottom_blob_data, v_offset + x);
179
                res[0] += v[0];
180
                res[1] += v[1];
181
                area += 1;
182
            }
183

184
            v_offset += psc(w);
185
        }
186
#endif
187

188
        res[0] /= afp(area);
189
        res[1] /= afp(area);
190
    }
191
    else if (pooling_type == 1 && avgpool_count_include_pad == 1)
192
    {
193
        res = afpvec8(afpvec4(0.f), afpvec4(0.f));
194

195
#if NCNN_image_shader
196
        int sx = gx * stride_w;
197
        int sy = gy * stride_h;
198

199
        for (int y = 0; y < kernel_h; y++)
200
        {
201
            for (int x = 0; x < kernel_w; x++)
202
            {
203
                afpvec8 v = image3d_ld8(bottom_blob, ivec3(sx + x, sy + y, gz));
204
                res[0] += v[0];
205
                res[1] += v[1];
206
            }
207
        }
208
#else
209
        int v_offset = gz * psc(cstep) + gy * stride_h * psc(w) + gx * stride_w;
210

211
        for (int y = 0; y < kernel_h; y++)
212
        {
213
            for (int x = 0; x < kernel_w; x++)
214
            {
215
                afpvec8 v = buffer_ld8(bottom_blob_data, v_offset + x);
216
                res[0] += v[0];
217
                res[1] += v[1];
218
            }
219

220
            v_offset += psc(w);
221
        }
222
#endif
223

224
        afp area = afp(kernel_w * kernel_h);
225
        res[0] /= area;
226
        res[1] /= area;
227
    }
228

229
#if NCNN_image_shader
230
    image3d_st8(top_blob, ivec3(gx, gy, gz), res);
231
#else
232
    const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;
233

234
    buffer_st8(top_blob_data, gi, res);
235
#endif
236
}
237
ncnn

Использование cookies