ncnn

Форк
0
/
permute.comp 
333 строки · 7.6 Кб
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#version 450
16

17
#if NCNN_fp16_storage
18
#extension GL_EXT_shader_16bit_storage: require
19
#endif
20
#if NCNN_fp16_arithmetic
21
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
22
#endif
23

24
layout (constant_id = 0) const int order_type = 0;
25
layout (constant_id = 1) const int bugihfa = 0;
26

27
#define shape_constant_id_offset 2
28
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
29
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
30
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
31
layout (constant_id = shape_constant_id_offset + 3) const int d = 0;
32
layout (constant_id = shape_constant_id_offset + 4) const int c = 0;
33
layout (constant_id = shape_constant_id_offset + 5) const int cstep = 0;
34

35
layout (constant_id = shape_constant_id_offset + 6) const int outdims = 0;
36
layout (constant_id = shape_constant_id_offset + 7) const int outw = 0;
37
layout (constant_id = shape_constant_id_offset + 8) const int outh = 0;
38
layout (constant_id = shape_constant_id_offset + 9) const int outd = 0;
39
layout (constant_id = shape_constant_id_offset + 10) const int outc = 0;
40
layout (constant_id = shape_constant_id_offset + 11) const int outcstep = 0;
41

42
#if NCNN_image_shader
43
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
44
layout (binding = 1, imfmtc1) writeonly uniform unfp image3D top_blob_3d;
45
#else
46
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
47
layout (binding = 1) writeonly buffer top_blob { sfp top_blob_data[]; };
48
#endif
49

50
layout (push_constant) uniform parameter
51
{
52
    int dims;
53
    int w;
54
    int h;
55
    int d;
56
    int c;
57
    int cstep;
58

59
    int outdims;
60
    int outw;
61
    int outh;
62
    int outd;
63
    int outc;
64
    int outcstep;
65
} p;
66

67
void main()
68
{
69
    int gx = int(gl_GlobalInvocationID.x);
70
    int gy = int(gl_GlobalInvocationID.y);
71
    int gz = int(gl_GlobalInvocationID.z);
72

73
    if (gx >= psc(outw) || gy >= psc(outh) * psc(outd) || gz >= psc(outc))
74
        return;
75

76
    int x;
77
    int y;
78
    int z;
79

80
    if (psc(dims) == 2)
81
    {
82
        // order_type
83
        // 0 = w h
84
        // 1 = h w
85

86
        gz = 0;
87
        z = 0;
88

89
        if (order_type == 0)
90
        {
91
            x = gx;
92
            y = gy;
93
        }
94
        if (order_type == 1)
95
        {
96
            x = gy;
97
            y = gx;
98
        }
99
    }
100
    else if (psc(dims) == 3)
101
    {
102
        // order_type
103
        // 0 = w h c
104
        // 1 = h w c
105
        // 2 = w c h
106
        // 3 = c w h
107
        // 4 = h c w
108
        // 5 = c h w
109

110
        if (order_type == 0)
111
        {
112
            x = gx;
113
            y = gy;
114
            z = gz;
115
        }
116
        if (order_type == 1)
117
        {
118
            x = gy;
119
            y = gx;
120
            z = gz;
121
        }
122
        if (order_type == 2)
123
        {
124
            x = gx;
125
            y = gz;
126
            z = gy;
127
        }
128
        if (order_type == 3)
129
        {
130
            x = gy;
131
            y = gz;
132
            z = gx;
133
        }
134
        if (order_type == 4)
135
        {
136
            x = gz;
137
            y = gx;
138
            z = gy;
139
        }
140
        if (order_type == 5)
141
        {
142
            x = gz;
143
            y = gy;
144
            z = gx;
145
        }
146
    }
147
    else // if (psc(dims) == 4)
148
    {
149
        // order_type
150
        // 0 = w h d c
151
        // 1 = h w d c
152
        // 2 = w d h c
153
        // 3 = d w h c
154
        // 4 = h d w c
155
        // 5 = d h w c
156
        // 6 = w h c d
157
        // 7 = h w c d
158
        // 8 = w c h d
159
        // 9 = c w h d
160
        //10 = h c w d
161
        //11 = c h w d
162
        //12 = w d c h
163
        //13 = d w c h
164
        //14 = w c d h
165
        //15 = c w d h
166
        //16 = d c w h
167
        //17 = c d w h
168
        //18 = h d c w
169
        //19 = d h c w
170
        //20 = h c d w
171
        //21 = c h d w
172
        //22 = d c h w
173
        //23 = c d h w
174

175
        int yd = gy / psc(outh);
176
        int yh = gy % psc(outh);
177

178
        if (order_type == 0)
179
        {
180
            x = gx;
181
            y = yd * psc(h) + yh;
182
            z = gz;
183
        }
184
        if (order_type == 1)
185
        {
186
            x = yh;
187
            y = yd * psc(h) + gx;
188
            z = gz;
189
        }
190
        if (order_type == 2)
191
        {
192
            x = gx;
193
            y = yh * psc(h) + yd;
194
            z = gz;
195
        }
196
        if (order_type == 3)
197
        {
198
            x = yh;
199
            y = gx * psc(h) + yd;
200
            z = gz;
201
        }
202
        if (order_type == 4)
203
        {
204
            x = yd;
205
            y = yh * psc(h) + gx;
206
            z = gz;
207
        }
208
        if (order_type == 5)
209
        {
210
            x = yd;
211
            y = gx * psc(h) + yh;
212
            z = gz;
213
        }
214
        if (order_type == 6)
215
        {
216
            x = gx;
217
            y = gz * psc(h) + yh;
218
            z = yd;
219
        }
220
        if (order_type == 7)
221
        {
222
            x = yh;
223
            y = gz * psc(h) + gx;
224
            z = yd;
225
        }
226
        if (order_type == 8)
227
        {
228
            x = gx;
229
            y = gz * psc(h) + yd;
230
            z = yh;
231
        }
232
        if (order_type == 9)
233
        {
234
            x = yh;
235
            y = gz * psc(h) + yd;
236
            z = gx;
237
        }
238
        if (order_type == 10)
239
        {
240
            x = yd;
241
            y = gz * psc(h) + gx;
242
            z = yh;
243
        }
244
        if (order_type == 11)
245
        {
246
            x = yd;
247
            y = gz * psc(h) + yh;
248
            z = gx;
249
        }
250
        if (order_type == 12)
251
        {
252
            x = gx;
253
            y = yh * psc(h) + gz;
254
            z = yd;
255
        }
256
        if (order_type == 13)
257
        {
258
            x = yh;
259
            y = gx * psc(h) + gz;
260
            z = yd;
261
        }
262
        if (order_type == 14)
263
        {
264
            x = gx;
265
            y = yd * psc(h) + gz;
266
            z = yh;
267
        }
268
        if (order_type == 15)
269
        {
270
            x = yh;
271
            y = yd * psc(h) + gz;
272
            z = gx;
273
        }
274
        if (order_type == 16)
275
        {
276
            x = yd;
277
            y = gx * psc(h) + gz;
278
            z = yh;
279
        }
280
        if (order_type == 17)
281
        {
282
            x = yd;
283
            y = yh * psc(h) + gz;
284
            z = gx;
285
        }
286
        if (order_type == 18)
287
        {
288
            x = gz;
289
            y = yh * psc(h) + gx;
290
            z = yd;
291
        }
292
        if (order_type == 19)
293
        {
294
            x = gz;
295
            y = gx * psc(h) + yh;
296
            z = yd;
297
        }
298
        if (order_type == 20)
299
        {
300
            x = gz;
301
            y = yd * psc(h) + gx;
302
            z = yh;
303
        }
304
        if (order_type == 21)
305
        {
306
            x = gz;
307
            y = yd * psc(h) + yh;
308
            z = gx;
309
        }
310
        if (order_type == 22)
311
        {
312
            x = gz;
313
            y = gx * psc(h) + yd;
314
            z = yh;
315
        }
316
        if (order_type == 23)
317
        {
318
            x = gz;
319
            y = yh * psc(h) + yd;
320
            z = gx;
321
        }
322
    }
323

324
#if NCNN_image_shader
325
    image3d_cp1(top_blob_3d, ivec3(gx, gy, gz), bottom_blob_3d, ivec3(x, y, z));
326
#else
327
    int v_offset = z * psc(cstep) + y * psc(w) + x;
328

329
    const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;
330

331
    buffer_cp1(top_blob_data, gi, bottom_blob_data, v_offset);
332
#endif
333
}
334

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.