ncnn

batchnorm_vulkan.cpp
219 строк · 6.7 Кб
Перенос по словам
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#include "batchnorm_vulkan.h"
16

17
#include "layer_shader_type.h"
18

19
namespace ncnn {
20

21
BatchNorm_vulkan::BatchNorm_vulkan()
22
{
23
    support_vulkan = true;
24
    support_image_storage = true;
25

26
    pipeline_batchnorm = 0;
27
    pipeline_batchnorm_pack4 = 0;
28
    pipeline_batchnorm_pack8 = 0;
29
}
30

31
int BatchNorm_vulkan::create_pipeline(const Option& opt)
32
{
33
    const Mat& shape = top_shapes.empty() ? Mat() : top_shapes[0];
34

35
    int elempack = opt.use_shader_pack8 && channels % 8 == 0 ? 8 : channels % 4 == 0 ? 4 : 1;
36

37
    size_t elemsize;
38
    if (opt.use_fp16_storage)
39
    {
40
        elemsize = elempack * 2u;
41
    }
42
    else if (opt.use_fp16_packed)
43
    {
44
        elemsize = elempack == 1 ? 4u : elempack * 2u;
45
    }
46
    else
47
    {
48
        elemsize = elempack * 4u;
49
    }
50

51
    Mat shape_packed;
52
    if (shape.dims == 1) shape_packed = Mat(shape.w / elempack, (void*)0, elemsize, elempack);
53
    if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack);
54
    if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
55
    if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack);
56

57
    std::vector<vk_specialization_type> specializations(0 + 5);
58
    specializations[0 + 0].i = std::min(3, shape_packed.dims);
59
    specializations[0 + 1].i = shape_packed.w;
60
    specializations[0 + 2].i = shape_packed.h * shape_packed.d;
61
    specializations[0 + 3].i = shape_packed.c;
62
    specializations[0 + 4].i = shape_packed.cstep;
63

64
    Mat local_size_xyz(4, 4, std::min(4, channels / elempack), (void*)0);
65
    if (shape_packed.dims == 1)
66
    {
67
        local_size_xyz.w = std::min(64, shape_packed.w);
68
        local_size_xyz.h = 1;
69
        local_size_xyz.c = 1;
70
    }
71
    if (shape_packed.dims == 2)
72
    {
73
        local_size_xyz.w = std::min(8, shape_packed.w);
74
        local_size_xyz.h = std::min(8, shape_packed.h);
75
        local_size_xyz.c = 1;
76
    }
77
    if (shape_packed.dims == 3)
78
    {
79
        local_size_xyz.w = std::min(4, shape_packed.w);
80
        local_size_xyz.h = std::min(4, shape_packed.h);
81
        local_size_xyz.c = std::min(4, shape_packed.c);
82
    }
83
    if (shape_packed.dims == 4)
84
    {
85
        local_size_xyz.w = std::min(4, shape_packed.w);
86
        local_size_xyz.h = std::min(4, shape_packed.h * shape_packed.d);
87
        local_size_xyz.c = std::min(4, shape_packed.c);
88
    }
89

90
    // pack1
91
    if (elempack == 1)
92
    {
93
        pipeline_batchnorm = new Pipeline(vkdev);
94
        pipeline_batchnorm->set_optimal_local_size_xyz(local_size_xyz);
95
        pipeline_batchnorm->create(LayerShaderType::batchnorm, opt, specializations);
96
    }
97

98
    // pack4
99
    if (elempack == 4)
100
    {
101
        pipeline_batchnorm_pack4 = new Pipeline(vkdev);
102
        pipeline_batchnorm_pack4->set_optimal_local_size_xyz(local_size_xyz);
103
        pipeline_batchnorm_pack4->create(LayerShaderType::batchnorm_pack4, opt, specializations);
104
    }
105

106
    // pack8
107
    if (elempack == 8)
108
    {
109
        pipeline_batchnorm_pack8 = new Pipeline(vkdev);
110
        pipeline_batchnorm_pack8->set_optimal_local_size_xyz(local_size_xyz);
111
        pipeline_batchnorm_pack8->create(LayerShaderType::batchnorm_pack8, opt, specializations);
112
    }
113

114
    return 0;
115
}
116

117
int BatchNorm_vulkan::destroy_pipeline(const Option& /*opt*/)
118
{
119
    delete pipeline_batchnorm;
120
    pipeline_batchnorm = 0;
121

122
    delete pipeline_batchnorm_pack4;
123
    pipeline_batchnorm_pack4 = 0;
124

125
    delete pipeline_batchnorm_pack8;
126
    pipeline_batchnorm_pack8 = 0;
127

128
    return 0;
129
}
130

131
int BatchNorm_vulkan::upload_model(VkTransfer& cmd, const Option& opt)
132
{
133
    int elempack = opt.use_shader_pack8 && channels % 8 == 0 ? 8 : channels % 4 == 0 ? 4 : 1;
134

135
    Mat a_data_packed;
136
    convert_packing(a_data, a_data_packed, elempack, opt);
137

138
    if (opt.use_image_storage)
139
    {
140
        cmd.record_upload(a_data_packed, a_data_gpu_image, opt);
141
    }
142
    else
143
    {
144
        cmd.record_upload(a_data_packed, a_data_gpu, opt);
145
    }
146

147
    Mat b_data_packed;
148
    convert_packing(b_data, b_data_packed, elempack, opt);
149

150
    if (opt.use_image_storage)
151
    {
152
        cmd.record_upload(b_data_packed, b_data_gpu_image, opt);
153
    }
154
    else
155
    {
156
        cmd.record_upload(b_data_packed, b_data_gpu, opt);
157
    }
158

159
    if (opt.lightmode)
160
    {
161
        a_data.release();
162
        b_data.release();
163
    }
164

165
    return 0;
166
}
167

168
int BatchNorm_vulkan::forward_inplace(VkMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const
169
{
170
    int elempack = bottom_top_blob.elempack;
171

172
    std::vector<VkMat> bindings(3);
173
    bindings[0] = bottom_top_blob;
174
    bindings[1] = a_data_gpu;
175
    bindings[2] = b_data_gpu;
176

177
    std::vector<vk_constant_type> constants(5);
178
    constants[0].i = std::min(3, bottom_top_blob.dims);
179
    constants[1].i = bottom_top_blob.w;
180
    constants[2].i = bottom_top_blob.h * bottom_top_blob.d;
181
    constants[3].i = bottom_top_blob.c;
182
    constants[4].i = bottom_top_blob.cstep;
183

184
    const Pipeline* pipeline = elempack == 8 ? pipeline_batchnorm_pack8
185
                               : elempack == 4 ? pipeline_batchnorm_pack4
186
                               : pipeline_batchnorm;
187

188
    cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
189

190
    return 0;
191
}
192

193
int BatchNorm_vulkan::forward_inplace(VkImageMat& bottom_top_blob, VkCompute& cmd, const Option& /*opt*/) const
194
{
195
    int elempack = bottom_top_blob.elempack;
196

197
    std::vector<VkImageMat> bindings(4);
198
    bindings[0] = bottom_top_blob;
199
    bindings[1] = bottom_top_blob;
200
    bindings[2] = a_data_gpu_image;
201
    bindings[3] = b_data_gpu_image;
202

203
    std::vector<vk_constant_type> constants(5);
204
    constants[0].i = std::min(3, bottom_top_blob.dims);
205
    constants[1].i = bottom_top_blob.w;
206
    constants[2].i = bottom_top_blob.h * bottom_top_blob.d;
207
    constants[3].i = bottom_top_blob.c;
208
    constants[4].i = 0; //bottom_top_blob.cstep;
209

210
    const Pipeline* pipeline = elempack == 8 ? pipeline_batchnorm_pack8
211
                               : elempack == 4 ? pipeline_batchnorm_pack4
212
                               : pipeline_batchnorm;
213

214
    cmd.record_pipeline(pipeline, bindings, constants, bottom_top_blob);
215

216
    return 0;
217
}
218

219
} // namespace ncnn
220
ncnn

Использование cookies