ncnn

Форк
0
/
slice_pack1to4.comp 
203 строки · 7.4 Кб
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#version 450
16

17
#if NCNN_fp16_storage
18
#extension GL_EXT_shader_16bit_storage: require
19
#endif
20
#if NCNN_fp16_arithmetic
21
#extension GL_EXT_shader_explicit_arithmetic_types_float16: require
22
#endif
23

24
layout (constant_id = 0) const int axis = 0;
25

26
#define shape_constant_id_offset 1
27
layout (constant_id = shape_constant_id_offset + 0) const int dims = 0;
28
layout (constant_id = shape_constant_id_offset + 1) const int w = 0;
29
layout (constant_id = shape_constant_id_offset + 2) const int h = 0;
30
layout (constant_id = shape_constant_id_offset + 3) const int d = 0;
31
layout (constant_id = shape_constant_id_offset + 4) const int c = 0;
32
layout (constant_id = shape_constant_id_offset + 5) const int cstep = 0;
33

34
layout (constant_id = shape_constant_id_offset + 6) const int outdims = 0;
35
layout (constant_id = shape_constant_id_offset + 7) const int outw = 0;
36
layout (constant_id = shape_constant_id_offset + 8) const int outh = 0;
37
layout (constant_id = shape_constant_id_offset + 9) const int outd = 0;
38
layout (constant_id = shape_constant_id_offset + 10) const int outc = 0;
39
layout (constant_id = shape_constant_id_offset + 11) const int outcstep = 0;
40

41
#if NCNN_image_shader
42
layout (binding = 0) uniform unfp sampler3D bottom_blob_3d;
43
layout (binding = 1, imfmtc4) writeonly uniform unfp image3D top_blob_3d;
44
#else
45
layout (binding = 0) readonly buffer bottom_blob { sfp bottom_blob_data[]; };
46
layout (binding = 1) writeonly buffer top_blob { sfpvec4 top_blob_data[]; };
47
#endif
48

49
layout (push_constant) uniform parameter
50
{
51
    int dims;
52
    int w;
53
    int h;
54
    int d;
55
    int c;
56
    int cstep;
57

58
    int outdims;
59
    int outw;
60
    int outh;
61
    int outd;
62
    int outc;
63
    int outcstep;
64

65
    int offset;
66
} p;
67

68
void main()
69
{
70
    int gx = int(gl_GlobalInvocationID.x);
71
    int gy = int(gl_GlobalInvocationID.y);
72
    int gz = int(gl_GlobalInvocationID.z);
73

74
    if (gx >= psc(outw) || gy >= psc(outh) * psc(outd) || gz >= psc(outc))
75
        return;
76

77
    int positive_axis = axis < 0 ? psc(dims) + axis : axis;
78

79
    ivec3 gxyz;
80

81
    if (psc(dims) == 4)
82
    {
83
        int yd = gy / psc(outh);
84
        int yh = gy % psc(outh);
85

86
        ivec4 gxydz = ivec4(gx, yh, yd, gz);
87
        gxydz[psc(dims) - 1] *= 4;
88
        gxydz[psc(dims) - 1 - positive_axis] += p.offset;
89

90
        gxyz = ivec3(gxydz.r, gxydz.g + gxydz.b * psc(h), gxydz.a);
91
    }
92
    else
93
    {
94
        gxyz = ivec3(gx, gy, gz);
95
        gxyz[psc(dims) - 1] *= 4;
96
        gxyz[psc(dims) - 1 - positive_axis] += p.offset;
97
    }
98

99
#if NCNN_image_shader
100
    afpvec4 v;
101

102
    if (psc(dims) == 1)
103
    {
104
        v.r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 0, 0, 0));
105
        v.g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 1, 0, 0));
106
        v.b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 2, 0, 0));
107
        v.a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 3, 0, 0));
108
    }
109
    else if (psc(dims) == 2)
110
    {
111
        if (positive_axis == 0)
112
        {
113
            v.r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 0, 0));
114
            v.g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 1, 0));
115
            v.b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 2, 0));
116
            v.a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 3, 0));
117
        }
118
        if (positive_axis == 1)
119
        {
120
            v.r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 0, gxyz.y, 0));
121
            v.g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 1, gxyz.y, 0));
122
            v.b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 2, gxyz.y, 0));
123
            v.a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 3, gxyz.y, 0));
124
        }
125
    }
126
    else if (psc(dims) == 3)
127
    {
128
        if (positive_axis == 0)
129
        {
130
            v.r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 0));
131
            v.g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 1));
132
            v.b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 2));
133
            v.a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 3));
134
        }
135
        if (positive_axis == 1)
136
        {
137
            v.r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 0, gxyz.z));
138
            v.g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 1, gxyz.z));
139
            v.b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 2, gxyz.z));
140
            v.a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 3, gxyz.z));
141
        }
142
        if (positive_axis == 2)
143
        {
144
            v.r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 0, gxyz.y, gxyz.z));
145
            v.g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 1, gxyz.y, gxyz.z));
146
            v.b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 2, gxyz.y, gxyz.z));
147
            v.a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 3, gxyz.y, gxyz.z));
148
        }
149
    }
150
    else // if (psc(dims) == 4)
151
    {
152
        if (positive_axis == 0)
153
        {
154
            v.r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 0));
155
            v.g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 1));
156
            v.b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 2));
157
            v.a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y, gxyz.z + 3));
158
        }
159
        if (positive_axis == 1)
160
        {
161
            v.r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 0 * psc(outh), gxyz.z));
162
            v.g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 1 * psc(outh), gxyz.z));
163
            v.b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 2 * psc(outh), gxyz.z));
164
            v.a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 3 * psc(outh), gxyz.z));
165
        }
166
        if (positive_axis == 2)
167
        {
168
            v.r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 0, gxyz.z));
169
            v.g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 1, gxyz.z));
170
            v.b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 2, gxyz.z));
171
            v.a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x, gxyz.y + 3, gxyz.z));
172
        }
173
        if (positive_axis == 3)
174
        {
175
            v.r = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 0, gxyz.y, gxyz.z));
176
            v.g = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 1, gxyz.y, gxyz.z));
177
            v.b = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 2, gxyz.y, gxyz.z));
178
            v.a = image3d_ld1(bottom_blob_3d, ivec3(gxyz.x + 3, gxyz.y, gxyz.z));
179
        }
180
    }
181

182
    image3d_st4(top_blob_3d, ivec3(gx, gy, gz), v);
183
#else
184
    const int gi = gz * psc(outcstep) + gy * psc(outw) + gx;
185

186
    int v_offset_0 = gxyz.z * psc(cstep) + gxyz.y * psc(w) + gxyz.x;
187

188
    ivec4 gxydz4;
189

190
    if (psc(dims) == 4)
191
    {
192
        gxydz4 = ivec4(1, psc(w), psc(w) * psc(h), psc(cstep));
193
    }
194
    else
195
    {
196
        gxydz4 = ivec4(1, psc(w), psc(cstep), 0);
197
    }
198

199
    ivec4 v_offset = v_offset_0 + ivec4(0, 1, 2, 3) * gxydz4[psc(dims) - 1 - positive_axis];
200

201
    buffer_cp1to4(top_blob_data, gi, bottom_blob_data, v_offset);
202
#endif
203
}
204

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.