ncnn

Форк
0
/
permute_pack8to1.comp 
442 строки · 12.0 Кб
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2020 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#version 450
16

17
#if NCNN_fp16_storage
18
#extension GL_EXT_shader_16bit_storage: require
19
struct sfpvec8 { f16vec4 abcd; f16vec4 efgh; };
20
#endif
21
#if NCNN_fp16_arithmetic
22
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
23
#endif
24

25
layout (constant_id = 0) const int order_type = 0;
26
layout (constant_id = 1) const int bugihfa = 0;
27

28
#define shape_constant_id_offset 2
29
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
30
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
31
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
32
layout (constant_id = shape_constant_id_offset + 3) const int d = 0;
33
layout (constant_id = shape_constant_id_offset + 4) const int c = 0;
34
layout (constant_id = shape_constant_id_offset + 5) const int cstep = 0;
35

36
layout (constant_id = shape_constant_id_offset + 6) const int outdims = 0;
37
layout (constant_id = shape_constant_id_offset + 7) const int outw = 0;
38
layout (constant_id = shape_constant_id_offset + 8) const int outh = 0;
39
layout (constant_id = shape_constant_id_offset + 9) const int outd = 0;
40
layout (constant_id = shape_constant_id_offset + 10) const int outc = 0;
41
layout (constant_id = shape_constant_id_offset + 11) const int outcstep = 0;
42

43
#if NCNN_image_shader
44
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
45
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d;
46
#else
47
layout (binding = 0) readonly buffer bottom_blob { sfpvec8 bottom_blob_data[]; };
48
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
49
#endif
50

51
layout (push_constant) uniform parameter
52
{
53
    int dims;
54
    int w;
55
    int h;
56
    int d;
57
    int c;
58
    int cstep;
59

60
    int outdims;
61
    int outw;
62
    int outh;
63
    int outd;
64
    int outc;
65
    int outcstep;
66
} p;
67

68
void main()
69
{
70
    int gx = int(gl_GlobalInvocationID.x);
71
    int gy = int(gl_GlobalInvocationID.y);
72
    int gz = int(gl_GlobalInvocationID.z);
73

74
    if (gx >= psc(w) || gy >= psc(h) * psc(d) || gz >= psc(c))
75
        return;
76

77
    ivec4 x4;
78
    ivec4 xx4;
79
    ivec4 y4;
80
    ivec4 yy4;
81
    ivec4 z4;
82
    ivec4 zz4;
83

84
    if (psc(dims) == 2)
85
    {
86
        // order_type
87
        // 0 = w h
88
        // 1 = h w
89

90
        gz = 0;
91
        z4 = ivec4(0);
92
        zz4 = z4;
93

94
        if (order_type == 0)
95
        {
96
            x4 = ivec4(gx);
97
            xx4 = x4;
98
            y4 = gy * 8 + ivec4(0, 1, 2, 3);
99
            yy4 = y4 + 4;
100
        }
101
        if (order_type == 1)
102
        {
103
            x4 = gy * 8 + ivec4(0, 1, 2, 3);
104
            xx4 = x4 + 4;
105
            y4 = ivec4(gx);
106
            yy4 = y4;
107
        }
108
    }
109
    else if (psc(dims) == 3)
110
    {
111
        // order_type
112
        // 0 = w h c
113
        // 1 = h w c
114
        // 2 = w c h
115
        // 3 = c w h
116
        // 4 = h c w
117
        // 5 = c h w
118

119
        if (order_type == 0)
120
        {
121
            x4 = ivec4(gx);
122
            xx4 = x4;
123
            y4 = ivec4(gy);
124
            yy4 = y4;
125
            z4 = gz * 8 + ivec4(0, 1, 2, 3);
126
            zz4 = z4 + 4;
127
        }
128
        if (order_type == 1)
129
        {
130
            x4 = ivec4(gy);
131
            xx4 = x4;
132
            y4 = ivec4(gx);
133
            yy4 = y4;
134
            z4 = gz * 8 + ivec4(0, 1, 2, 3);
135
            zz4 = z4 + 4;
136
        }
137
        if (order_type == 2)
138
        {
139
            x4 = ivec4(gx);
140
            xx4 = x4;
141
            y4 = gz * 8 + ivec4(0, 1, 2, 3);
142
            yy4 = y4 + 4;
143
            z4 = ivec4(gy);
144
            zz4 = y4;
145
        }
146
        if (order_type == 3)
147
        {
148
            x4 = gz * 8 + ivec4(0, 1, 2, 3);
149
            xx4 = x4 + 4;
150
            y4 = ivec4(gx);
151
            yy4 = y4;
152
            z4 = ivec4(gy);
153
            zz4 = z4;
154
        }
155
        if (order_type == 4)
156
        {
157
            x4 = ivec4(gy);
158
            xx4 = x4;
159
            y4 = gz * 8 + ivec4(0, 1, 2, 3);
160
            yy4 = y4 + 4;
161
            z4 = ivec4(gx);
162
            zz4 = z4;
163
        }
164
        if (order_type == 5)
165
        {
166
            x4 = gz * 8 + ivec4(0, 1, 2, 3);
167
            xx4 = x4 + 4;
168
            y4 = ivec4(gy);
169
            yy4 = y4;
170
            z4 = ivec4(gx);
171
            zz4 = z4;
172
        }
173
    }
174
    else // if (psc(dims) == 4)
175
    {
176
        // order_type
177
        // 0 = w h d c
178
        // 1 = h w d c
179
        // 2 = w d h c
180
        // 3 = d w h c
181
        // 4 = h d w c
182
        // 5 = d h w c
183
        // 6 = w h c d
184
        // 7 = h w c d
185
        // 8 = w c h d
186
        // 9 = c w h d
187
        //10 = h c w d
188
        //11 = c h w d
189
        //12 = w d c h
190
        //13 = d w c h
191
        //14 = w c d h
192
        //15 = c w d h
193
        //16 = d c w h
194
        //17 = c d w h
195
        //18 = h d c w
196
        //19 = d h c w
197
        //20 = h c d w
198
        //21 = c h d w
199
        //22 = d c h w
200
        //23 = c d h w
201

202
        int yd = gy / psc(h);
203
        int yh = gy % psc(h);
204

205
        if (order_type == 0)
206
        {
207
            x4 = ivec4(gx);
208
            xx4 = x4;
209
            y4 = ivec4(gy);
210
            yy4 = y4;
211
            z4 = gz * 8 + ivec4(0, 1, 2, 3);
212
            zz4 = z4 + 4;
213
        }
214
        if (order_type == 1)
215
        {
216
            x4 = ivec4(yh);
217
            xx4 = x4;
218
            y4 = ivec4(yd * psc(outh) + gx);
219
            yy4 = y4;
220
            z4 = gz * 8 + ivec4(0, 1, 2, 3);
221
            zz4 = z4 + 4;
222
        }
223
        if (order_type == 2)
224
        {
225
            x4 = ivec4(gx);
226
            xx4 = x4;
227
            y4 = ivec4(yh * psc(outh) + yd);
228
            yy4 = y4;
229
            z4 = gz * 8 + ivec4(0, 1, 2, 3);
230
            zz4 = z4 + 4;
231
        }
232
        if (order_type == 3)
233
        {
234
            x4 = ivec4(yd);
235
            xx4 = x4;
236
            y4 = ivec4(yh * psc(outh) + gx);
237
            yy4 = y4;
238
            z4 = gz * 8 + ivec4(0, 1, 2, 3);
239
            zz4 = z4 + 4;
240
        }
241
        if (order_type == 4)
242
        {
243
            x4 = ivec4(yh);
244
            xx4 = x4;
245
            y4 = ivec4(gx * psc(outh) + yd);
246
            yy4 = y4;
247
            z4 = gz * 8 + ivec4(0, 1, 2, 3);
248
            zz4 = z4 + 4;
249
        }
250
        if (order_type == 5)
251
        {
252
            x4 = ivec4(yd);
253
            xx4 = x4;
254
            y4 = ivec4(gx * psc(outh) + yh);
255
            yy4 = y4;
256
            z4 = gz * 8 + ivec4(0, 1, 2, 3);
257
            zz4 = z4 + 4;
258
        }
259
        if (order_type == 6)
260
        {
261
            x4 = ivec4(gx);
262
            xx4 = x4;
263
            y4 = (gz * 8 + ivec4(0, 1, 2, 3)) * psc(outh) + yh;
264
            yy4 = (gz * 8 + ivec4(0, 1, 2, 3) + 4) * psc(outh) + yh;
265
            z4 = ivec4(yd);
266
            zz4 = z4;
267
        }
268
        if (order_type == 7)
269
        {
270
            x4 = ivec4(yh);
271
            xx4 = x4;
272
            y4 = (gz * 8 + ivec4(0, 1, 2, 3)) * psc(outh) + gx;
273
            yy4 = (gz * 8 + ivec4(0, 1, 2, 3) + 4) * psc(outh) + gx;
274
            z4 = ivec4(yd);
275
            zz4 = z4;
276
        }
277
        if (order_type == 8)
278
        {
279
            x4 = ivec4(gx);
280
            xx4 = x4;
281
            y4 = yh * psc(outh) + gz * 8 + ivec4(0, 1, 2, 3);
282
            yy4 = y4 + 4;
283
            z4 = ivec4(yd);
284
            zz4 = z4;
285
        }
286
        if (order_type == 9)
287
        {
288
            x4 = gz * 8 + ivec4(0, 1, 2, 3);
289
            xx4 = x4 + 4;
290
            y4 = ivec4(yh * psc(outh) + gx);
291
            yy4 = y4;
292
            z4 = ivec4(yd);
293
            zz4 = z4;
294
        }
295
        if (order_type == 10)
296
        {
297
            x4 = ivec4(yh);
298
            xx4 = x4;
299
            y4 = gx * psc(outh) + gz * 8 + ivec4(0, 1, 2, 3);
300
            yy4 = y4 + 4;
301
            z4 = ivec4(yd);
302
            zz4 = z4;
303
        }
304
        if (order_type == 11)
305
        {
306
            x4 = gz * 8 + ivec4(0, 1, 2, 3);
307
            xx4 = x4 + 4;
308
            y4 = ivec4(gx * psc(outh) + yh);
309
            yy4 = y4;
310
            z4 = ivec4(yd);
311
            zz4 = z4;
312
        }
313
        if (order_type == 12)
314
        {
315
            x4 = ivec4(gx);
316
            xx4 = x4;
317
            y4 = (gz * 8 + ivec4(0, 1, 2, 3)) * psc(outh) + yd;
318
            yy4 = (gz * 8 + ivec4(0, 1, 2, 3) + 4) * psc(outh) + yd;
319
            z4 = ivec4(yh);
320
            zz4 = z4;
321
        }
322
        if (order_type == 13)
323
        {
324
            x4 = ivec4(yd);
325
            xx4 = x4;
326
            y4 = (gz * 8 + ivec4(0, 1, 2, 3)) * psc(outh) + gx;
327
            yy4 = (gz * 8 + ivec4(0, 1, 2, 3) + 4) * psc(outh) + gx;
328
            z4 = ivec4(yh);
329
            zz4 = z4;
330
        }
331
        if (order_type == 14)
332
        {
333
            x4 = ivec4(gx);
334
            xx4 = x4;
335
            y4 = yd * psc(outh) + gz * 8 + ivec4(0, 1, 2, 3);
336
            yy4 = y4 + 4;
337
            z4 = ivec4(yh);
338
            zz4 = z4;
339
        }
340
        if (order_type == 15)
341
        {
342
            x4 = gz * 8 + ivec4(0, 1, 2, 3);
343
            xx4 = x4 + 4;
344
            y4 = ivec4(yd * psc(outh) + gx);
345
            yy4 = y4;
346
            z4 = ivec4(yh);
347
            zz4 = z4;
348
        }
349
        if (order_type == 16)
350
        {
351
            x4 = ivec4(yd);
352
            xx4 = x4;
353
            y4 = gx * psc(outh) + gz * 8 + ivec4(0, 1, 2, 3);
354
            yy4 = y4 + 4;
355
            z4 = ivec4(yh);
356
            zz4 = z4;
357
        }
358
        if (order_type == 17)
359
        {
360
            x4 = gz * 8 + ivec4(0, 1, 2, 3);
361
            xx4 = x4 + 4;
362
            y4 = ivec4(gx * psc(outh) + yd);
363
            yy4 = y4;
364
            z4 = ivec4(yh);
365
            zz4 = z4;
366
        }
367
        if (order_type == 18)
368
        {
369
            x4 = ivec4(yh);
370
            xx4 = x4;
371
            y4 = (gz * 8 + ivec4(0, 1, 2, 3)) * psc(outh) + yd;
372
            yy4 = (gz * 8 + ivec4(0, 1, 2, 3) + 4) * psc(outh) + yd;
373
            z4 = ivec4(gx);
374
            zz4 = z4;
375
        }
376
        if (order_type == 19)
377
        {
378
            x4 = ivec4(yd);
379
            xx4 = x4;
380
            y4 = (gz * 8 + ivec4(0, 1, 2, 3)) * psc(outh) + yh;
381
            yy4 = (gz * 8 + ivec4(0, 1, 2, 3) + 4) * psc(outh) + yh;
382
            z4 = ivec4(gx);
383
            zz4 = z4;
384
        }
385
        if (order_type == 20)
386
        {
387
            x4 = ivec4(yh);
388
            xx4 = x4;
389
            y4 = yd * psc(outh) + gz * 8 + ivec4(0, 1, 2, 3);
390
            yy4 = y4 + 4;
391
            z4 = ivec4(gx);
392
            zz4 = z4;
393
        }
394
        if (order_type == 21)
395
        {
396
            x4 = gz * 8 + ivec4(0, 1, 2, 3);
397
            xx4 = x4 + 4;
398
            y4 = ivec4(yd * psc(outh) + yh);
399
            yy4 = y4;
400
            z4 = ivec4(gx);
401
            zz4 = z4;
402
        }
403
        if (order_type == 22)
404
        {
405
            x4 = ivec4(yd);
406
            xx4 = x4;
407
            y4 = yh * psc(outh) + gz * 8 + ivec4(0, 1, 2, 3);
408
            yy4 = y4 + 4;
409
            z4 = ivec4(gx);
410
            zz4 = z4;
411
        }
412
        if (order_type == 23)
413
        {
414
            x4 = gz * 8 + ivec4(0, 1, 2, 3);
415
            xx4 = x4 + 4;
416
            y4 = ivec4(yh * psc(outh) + yd);
417
            yy4 = y4;
418
            z4 = ivec4(gx);
419
            zz4 = z4;
420
        }
421
    }
422

423
#if NCNN_image_shader
424
    afpvec8 v = image3d_ld8(bottom_blob_3d, ivec3(gx, gy, gz));
425

426
    image3d_st1(top_blob_3d, ivec3(x4.r, y4.r, z4.r), v[0].r);
427
    image3d_st1(top_blob_3d, ivec3(x4.g, y4.g, z4.g), v[0].g);
428
    image3d_st1(top_blob_3d, ivec3(x4.b, y4.b, z4.b), v[0].b);
429
    image3d_st1(top_blob_3d, ivec3(x4.a, y4.a, z4.a), v[0].a);
430
    image3d_st1(top_blob_3d, ivec3(xx4.r, yy4.r, zz4.r), v[1].r);
431
    image3d_st1(top_blob_3d, ivec3(xx4.g, yy4.g, zz4.g), v[1].g);
432
    image3d_st1(top_blob_3d, ivec3(xx4.b, yy4.b, zz4.b), v[1].b);
433
    image3d_st1(top_blob_3d, ivec3(xx4.a, yy4.a, zz4.a), v[1].a);
434
#else
435
    ivec4 v_offset = z4 * psc(outcstep) + y4 * psc(outw) + x4;
436
    ivec4 vv_offset = zz4 * psc(outcstep) + yy4 * psc(outw) + xx4;
437

438
    int gi = gz * psc(cstep) + gy * psc(w) + gx;
439

440
    buffer_cp8to1(top_blob_data, v_offset, vv_offset, bottom_blob_data, gi);
441
#endif
442
}
443

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.