ncnn

Форк
0
/
permute_pack4to1.comp 
339 строк · 9.2 Кб
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#version 450
16

17
#if NCNN_fp16_storage
18
#extension GL_EXT_shader_16bit_storage: require
19
#endif
20
#if NCNN_fp16_arithmetic
21
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
22
#endif
23

24
layout (constant_id = 0) const int order_type = 0;
25
layout (constant_id = 1) const int bugihfa = 0;
26

27
#define shape_constant_id_offset 2
28
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
29
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
30
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
31
layout (constant_id = shape_constant_id_offset + 3) const int d = 0;
32
layout (constant_id = shape_constant_id_offset + 4) const int c = 0;
33
layout (constant_id = shape_constant_id_offset + 5) const int cstep = 0;
34

35
layout (constant_id = shape_constant_id_offset + 6) const int outdims = 0;
36
layout (constant_id = shape_constant_id_offset + 7) const int outw = 0;
37
layout (constant_id = shape_constant_id_offset + 8) const int outh = 0;
38
layout (constant_id = shape_constant_id_offset + 9) const int outd = 0;
39
layout (constant_id = shape_constant_id_offset + 10) const int outc = 0;
40
layout (constant_id = shape_constant_id_offset + 11) const int outcstep = 0;
41

42
#if NCNN_image_shader
43
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
44
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d;
45
#else
46
layout (binding = 0) readonly buffer bottom_blob { sfpvec4 bottom_blob_data[]; };
47
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
48
#endif
49

50
layout (push_constant) uniform parameter
51
{
52
    int dims;
53
    int w;
54
    int h;
55
    int d;
56
    int c;
57
    int cstep;
58

59
    int outdims;
60
    int outw;
61
    int outh;
62
    int outd;
63
    int outc;
64
    int outcstep;
65
} p;
66

67
void main()
68
{
69
    int gx = int(gl_GlobalInvocationID.x);
70
    int gy = int(gl_GlobalInvocationID.y);
71
    int gz = int(gl_GlobalInvocationID.z);
72

73
    if (gx >= psc(w) || gy >= psc(h) * psc(d) || gz >= psc(c))
74
        return;
75

76
    ivec4 x4;
77
    ivec4 y4;
78
    ivec4 z4;
79

80
    if (psc(dims) == 2)
81
    {
82
        // order_type
83
        // 0 = w h
84
        // 1 = h w
85

86
        gz = 0;
87
        z4 = ivec4(0);
88

89
        if (order_type == 0)
90
        {
91
            x4 = ivec4(gx);
92
            y4 = gy * 4 + ivec4(0, 1, 2, 3);
93
        }
94
        if (order_type == 1)
95
        {
96
            x4 = gy * 4 + ivec4(0, 1, 2, 3);
97
            y4 = ivec4(gx);
98
        }
99
    }
100
    else if (psc(dims) == 3)
101
    {
102
        // order_type
103
        // 0 = w h c
104
        // 1 = h w c
105
        // 2 = w c h
106
        // 3 = c w h
107
        // 4 = h c w
108
        // 5 = c h w
109

110
        if (order_type == 0)
111
        {
112
            x4 = ivec4(gx);
113
            y4 = ivec4(gy);
114
            z4 = gz * 4 + ivec4(0, 1, 2, 3);
115
        }
116
        if (order_type == 1)
117
        {
118
            x4 = ivec4(gy);
119
            y4 = ivec4(gx);
120
            z4 = gz * 4 + ivec4(0, 1, 2, 3);
121
        }
122
        if (order_type == 2)
123
        {
124
            x4 = ivec4(gx);
125
            y4 = gz * 4 + ivec4(0, 1, 2, 3);
126
            z4 = ivec4(gy);
127
        }
128
        if (order_type == 3)
129
        {
130
            x4 = gz * 4 + ivec4(0, 1, 2, 3);
131
            y4 = ivec4(gx);
132
            z4 = ivec4(gy);
133
        }
134
        if (order_type == 4)
135
        {
136
            x4 = ivec4(gy);
137
            y4 = gz * 4 + ivec4(0, 1, 2, 3);
138
            z4 = ivec4(gx);
139
        }
140
        if (order_type == 5)
141
        {
142
            x4 = gz * 4 + ivec4(0, 1, 2, 3);
143
            y4 = ivec4(gy);
144
            z4 = ivec4(gx);
145
        }
146
    }
147
    else // if (psc(dims) == 4)
148
    {
149
        // order_type
150
        // 0 = w h d c
151
        // 1 = h w d c
152
        // 2 = w d h c
153
        // 3 = d w h c
154
        // 4 = h d w c
155
        // 5 = d h w c
156
        // 6 = w h c d
157
        // 7 = h w c d
158
        // 8 = w c h d
159
        // 9 = c w h d
160
        //10 = h c w d
161
        //11 = c h w d
162
        //12 = w d c h
163
        //13 = d w c h
164
        //14 = w c d h
165
        //15 = c w d h
166
        //16 = d c w h
167
        //17 = c d w h
168
        //18 = h d c w
169
        //19 = d h c w
170
        //20 = h c d w
171
        //21 = c h d w
172
        //22 = d c h w
173
        //23 = c d h w
174

175
        int yd = gy / psc(h);
176
        int yh = gy % psc(h);
177

178
        if (order_type == 0)
179
        {
180
            x4 = ivec4(gx);
181
            y4 = ivec4(gy);
182
            z4 = gz * 4 + ivec4(0, 1, 2, 3);
183
        }
184
        if (order_type == 1)
185
        {
186
            x4 = ivec4(yh);
187
            y4 = ivec4(yd * psc(outh) + gx);
188
            z4 = gz * 4 + ivec4(0, 1, 2, 3);
189
        }
190
        if (order_type == 2)
191
        {
192
            x4 = ivec4(gx);
193
            y4 = ivec4(yh * psc(outh) + yd);
194
            z4 = gz * 4 + ivec4(0, 1, 2, 3);
195
        }
196
        if (order_type == 3)
197
        {
198
            x4 = ivec4(yd);
199
            y4 = ivec4(yh * psc(outh) + gx);
200
            z4 = gz * 4 + ivec4(0, 1, 2, 3);
201
        }
202
        if (order_type == 4)
203
        {
204
            x4 = ivec4(yh);
205
            y4 = ivec4(gx * psc(outh) + yd);
206
            z4 = gz * 4 + ivec4(0, 1, 2, 3);
207
        }
208
        if (order_type == 5)
209
        {
210
            x4 = ivec4(yd);
211
            y4 = ivec4(gx * psc(outh) + yh);
212
            z4 = gz * 4 + ivec4(0, 1, 2, 3);
213
        }
214
        if (order_type == 6)
215
        {
216
            x4 = ivec4(gx);
217
            y4 = (gz * 4 + ivec4(0, 1, 2, 3)) * psc(outh) + yh;
218
            z4 = ivec4(yd);
219
        }
220
        if (order_type == 7)
221
        {
222
        // 7 = h w c d
223
            x4 = ivec4(yh);
224
            y4 = (gz * 4 + ivec4(0, 1, 2, 3)) * psc(outh) + gx;
225
            z4 = ivec4(yd);
226
        }
227
        if (order_type == 8)
228
        {
229
            x4 = ivec4(gx);
230
            y4 = yh * psc(outh) + gz * 4 + ivec4(0, 1, 2, 3);
231
            z4 = ivec4(yd);
232
        }
233
        if (order_type == 9)
234
        {
235
            x4 = gz * 4 + ivec4(0, 1, 2, 3);
236
            y4 = ivec4(yh * psc(outh) + gx);
237
            z4 = ivec4(yd);
238
        }
239
        if (order_type == 10)
240
        {
241
            x4 = ivec4(yh);
242
            y4 = gx * psc(outh) + gz * 4 + ivec4(0, 1, 2, 3);
243
            z4 = ivec4(yd);
244
        }
245
        if (order_type == 11)
246
        {
247
            x4 = gz * 4 + ivec4(0, 1, 2, 3);
248
            y4 = ivec4(gx * psc(outh) + yh);
249
            z4 = ivec4(yd);
250
        }
251
        if (order_type == 12)
252
        {
253
            x4 = ivec4(gx);
254
            y4 = (gz * 4 + ivec4(0, 1, 2, 3)) * psc(outh) + yd;
255
            z4 = ivec4(yh);
256
        }
257
        if (order_type == 13)
258
        {
259
            x4 = ivec4(yd);
260
            y4 = (gz * 4 + ivec4(0, 1, 2, 3)) * psc(outh) + gx;
261
            z4 = ivec4(yh);
262
        }
263
        if (order_type == 14)
264
        {
265
            x4 = ivec4(gx);
266
            y4 = yd * psc(outh) + gz * 4 + ivec4(0, 1, 2, 3);
267
            z4 = ivec4(yh);
268
        }
269
        if (order_type == 15)
270
        {
271
            x4 = gz * 4 + ivec4(0, 1, 2, 3);
272
            y4 = ivec4(yd * psc(outh) + gx);
273
            z4 = ivec4(yh);
274
        }
275
        if (order_type == 16)
276
        {
277
            x4 = ivec4(yd);
278
            y4 = gx * psc(outh) + gz * 4 + ivec4(0, 1, 2, 3);
279
            z4 = ivec4(yh);
280
        }
281
        if (order_type == 17)
282
        {
283
            x4 = gz * 4 + ivec4(0, 1, 2, 3);
284
            y4 = ivec4(gx * psc(outh) + yd);
285
            z4 = ivec4(yh);
286
        }
287
        if (order_type == 18)
288
        {
289
            x4 = ivec4(yh);
290
            y4 = (gz * 4 + ivec4(0, 1, 2, 3)) * psc(outh) + yd;
291
            z4 = ivec4(gx);
292
        }
293
        if (order_type == 19)
294
        {
295
            x4 = ivec4(yd);
296
            y4 = (gz * 4 + ivec4(0, 1, 2, 3)) * psc(outh) + yh;
297
            z4 = ivec4(gx);
298
        }
299
        if (order_type == 20)
300
        {
301
            x4 = ivec4(yh);
302
            y4 = yd * psc(outh) + gz * 4 + ivec4(0, 1, 2, 3);
303
            z4 = ivec4(gx);
304
        }
305
        if (order_type == 21)
306
        {
307
            x4 = gz * 4 + ivec4(0, 1, 2, 3);
308
            y4 = ivec4(yd * psc(outh) + yh);
309
            z4 = ivec4(gx);
310
        }
311
        if (order_type == 22)
312
        {
313
            x4 = ivec4(yd);
314
            y4 = yh * psc(outh) + gz * 4 + ivec4(0, 1, 2, 3);
315
            z4 = ivec4(gx);
316
        }
317
        if (order_type == 23)
318
        {
319
            x4 = gz * 4 + ivec4(0, 1, 2, 3);
320
            y4 = ivec4(yh * psc(outh) + yd);
321
            z4 = ivec4(gx);
322
        }
323
    }
324

325
#if NCNN_image_shader
326
    afpvec4 v = image3d_ld4(bottom_blob_3d, ivec3(gx, gy, gz));
327

328
    image3d_st1(top_blob_3d, ivec3(x4.r, y4.r, z4.r), v.r);
329
    image3d_st1(top_blob_3d, ivec3(x4.g, y4.g, z4.g), v.g);
330
    image3d_st1(top_blob_3d, ivec3(x4.b, y4.b, z4.b), v.b);
331
    image3d_st1(top_blob_3d, ivec3(x4.a, y4.a, z4.a), v.a);
332
#else
333
    ivec4 v_offset = z4 * psc(outcstep) + y4 * psc(outw) + x4;
334

335
    int gi = gz * psc(cstep) + gy * psc(w) + gx;
336

337
    buffer_cp4to1(top_blob_data, v_offset, bottom_blob_data, gi);
338
#endif
339
}
340

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.