1
// Tencent is pleased to support the open source community by making ncnn available.
3
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
8
// https://opensource.org/licenses/BSD-3-Clause
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
15
#include "crop_vulkan.h"
17
#include "layer_shader_type.h"
18
#include "layer_type.h"
22
Crop_vulkan::Crop_vulkan()
24
support_vulkan = true;
25
support_image_storage = true;
28
pipeline_crop_pack4 = 0;
29
pipeline_crop_pack1to4 = 0;
30
pipeline_crop_pack4to1 = 0;
31
pipeline_crop_pack8 = 0;
32
pipeline_crop_pack1to8 = 0;
33
pipeline_crop_pack4to8 = 0;
34
pipeline_crop_pack8to4 = 0;
35
pipeline_crop_pack8to1 = 0;
38
int Crop_vulkan::create_pipeline(const Option& opt)
40
const Mat& shape = bottom_shapes.empty() ? Mat() : bottom_shapes[0];
41
const Mat& out_shape = top_shapes.empty() ? Mat() : top_shapes[0];
44
if (shape.dims == 1) elempack = opt.use_shader_pack8 && shape.w % 8 == 0 ? 8 : shape.w % 4 == 0 ? 4 : 1;
45
if (shape.dims == 2) elempack = opt.use_shader_pack8 && shape.h % 8 == 0 ? 8 : shape.h % 4 == 0 ? 4 : 1;
46
if (shape.dims == 3 || shape.dims == 4) elempack = opt.use_shader_pack8 && shape.c % 8 == 0 ? 8 : shape.c % 4 == 0 ? 4 : 1;
49
if (out_shape.dims == 1) out_elempack = opt.use_shader_pack8 && out_shape.w % 8 == 0 ? 8 : out_shape.w % 4 == 0 ? 4 : 1;
50
if (out_shape.dims == 2) out_elempack = opt.use_shader_pack8 && out_shape.h % 8 == 0 ? 8 : out_shape.h % 4 == 0 ? 4 : 1;
51
if (out_shape.dims == 3 || out_shape.dims == 4) out_elempack = opt.use_shader_pack8 && out_shape.c % 8 == 0 ? 8 : out_shape.c % 4 == 0 ? 4 : 1;
53
int offset_elempack = 1;
54
bool numpy_style_slice = !starts.empty() && !ends.empty();
55
if (numpy_style_slice)
57
offset_elempack = elempack;
59
const int* starts_ptr = starts;
60
const int* axes_ptr = axes;
62
int _axes[4] = {0, 1, 2, 3};
63
int num_axis = axes.w;
66
num_axis = shape.dims;
70
for (int i = 0; i < num_axis; i++)
72
int axis = axes_ptr[i];
74
axis = shape.dims + axis;
79
for (int i = 0; i < num_axis; i++)
81
int start = starts_ptr[i];
84
if (shape.dims == 1 && axis == 0)
86
int _woffset = start >= 0 ? start : shape.w + start;
87
offset_elempack = opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
89
if (shape.dims == 2 && axis == 0)
91
int _hoffset = start >= 0 ? start : shape.h + start;
92
offset_elempack = opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
94
if ((shape.dims == 3 || shape.dims == 4) && axis == 0)
96
int _coffset = start >= 0 ? start : shape.c + start;
97
offset_elempack = opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
106
offset_elempack = elempack;
108
offset_elempack = opt.use_shader_pack8 && woffset % 8 == 0 ? 8 : woffset % 4 == 0 ? 4 : 1;
110
else if (shape.dims == 2)
113
offset_elempack = elempack;
115
offset_elempack = opt.use_shader_pack8 && hoffset % 8 == 0 ? 8 : hoffset % 4 == 0 ? 4 : 1;
117
else // if (shape.dims == 3 || shape.dims == 4)
120
offset_elempack = elempack;
122
offset_elempack = opt.use_shader_pack8 && coffset % 8 == 0 ? 8 : coffset % 4 == 0 ? 4 : 1;
126
offset_elempack = std::min(offset_elempack, elempack);
130
if (opt.use_fp16_storage)
132
elemsize = elempack * 2u;
133
out_elemsize = out_elempack * 2u;
135
else if (opt.use_fp16_packed)
137
elemsize = elempack == 1 ? 4u : elempack * 2u;
138
out_elemsize = out_elempack == 1 ? 4u : out_elempack * 2u;
142
elemsize = elempack * 4u;
143
out_elemsize = out_elempack * 4u;
147
if (shape.dims == 1) shape_packed = Mat(shape.w / elempack, (void*)0, elemsize, elempack);
148
if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack);
149
if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
150
if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack);
152
Mat out_shape_packed;
153
if (out_shape.dims == 1) out_shape_packed = Mat(out_shape.w / out_elempack, (void*)0, out_elemsize, out_elempack);
154
if (out_shape.dims == 2) out_shape_packed = Mat(out_shape.w, out_shape.h / out_elempack, (void*)0, out_elemsize, out_elempack);
155
if (out_shape.dims == 3) out_shape_packed = Mat(out_shape.w, out_shape.h, out_shape.c / out_elempack, (void*)0, out_elemsize, out_elempack);
156
if (out_shape.dims == 4) out_shape_packed = Mat(out_shape.w, out_shape.h, out_shape.d, out_shape.c / out_elempack, (void*)0, out_elemsize, out_elempack);
158
Mat shape_unpacked = shape_packed;
159
if (one_blob_only && shape.dims != 0 && elempack == out_elempack && elempack > offset_elempack)
161
size_t offset_elemsize;
162
if (opt.use_fp16_storage)
164
offset_elemsize = offset_elempack * 2u;
166
else if (opt.use_fp16_packed)
168
offset_elemsize = offset_elempack == 1 ? 4u : offset_elempack * 2u;
172
offset_elemsize = offset_elempack * 4u;
175
if (shape.dims == 1) shape_unpacked = Mat(shape.w / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
176
if (shape.dims == 2) shape_unpacked = Mat(shape.w, shape.h / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
177
if (shape.dims == 3) shape_unpacked = Mat(shape.w, shape.h, shape.c / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
178
if (shape.dims == 4) shape_unpacked = Mat(shape.w, shape.h, shape.d, shape.c / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
181
std::vector<vk_specialization_type> specializations(1 + 12);
182
specializations[0].i = vkdev->info.bug_implicit_fp16_arithmetic();
183
specializations[1 + 0].i = shape_unpacked.dims;
184
specializations[1 + 1].i = shape_unpacked.w;
185
specializations[1 + 2].i = shape_unpacked.h;
186
specializations[1 + 3].i = shape_unpacked.d;
187
specializations[1 + 4].i = shape_unpacked.c;
188
specializations[1 + 5].i = shape_unpacked.cstep;
189
specializations[1 + 6].i = out_shape_packed.dims;
190
specializations[1 + 7].i = out_shape_packed.w;
191
specializations[1 + 8].i = out_shape_packed.h;
192
specializations[1 + 9].i = out_shape_packed.d;
193
specializations[1 + 10].i = out_shape_packed.c;
194
specializations[1 + 11].i = out_shape_packed.cstep;
197
if (out_shape_packed.dims == 1)
199
local_size_xyz.w = std::min(64, out_shape_packed.w);
200
local_size_xyz.h = 1;
201
local_size_xyz.c = 1;
203
if (out_shape_packed.dims == 2)
205
local_size_xyz.w = std::min(8, out_shape_packed.w);
206
local_size_xyz.h = std::min(8, out_shape_packed.h);
207
local_size_xyz.c = 1;
209
if (out_shape_packed.dims == 3)
211
local_size_xyz.w = std::min(4, out_shape_packed.w);
212
local_size_xyz.h = std::min(4, out_shape_packed.h);
213
local_size_xyz.c = std::min(4, out_shape_packed.c);
215
if (out_shape_packed.dims == 4)
217
local_size_xyz.w = std::min(4, out_shape_packed.w);
218
local_size_xyz.h = std::min(4, out_shape_packed.h * out_shape_packed.d);
219
local_size_xyz.c = std::min(4, out_shape_packed.c);
223
if (out_shape.dims == 0 || out_elempack == 1)
225
pipeline_crop = new Pipeline(vkdev);
226
pipeline_crop->set_optimal_local_size_xyz(local_size_xyz);
227
pipeline_crop->create(LayerShaderType::crop, opt, specializations);
231
if (out_shape.dims == 0 || out_elempack == 4)
233
pipeline_crop_pack4 = new Pipeline(vkdev);
234
pipeline_crop_pack4->set_optimal_local_size_xyz(local_size_xyz);
235
pipeline_crop_pack4->create(LayerShaderType::crop_pack4, opt, specializations);
239
if (out_shape.dims == 0 || out_elempack == 4)
241
pipeline_crop_pack1to4 = new Pipeline(vkdev);
242
pipeline_crop_pack1to4->set_optimal_local_size_xyz(local_size_xyz);
243
pipeline_crop_pack1to4->create(LayerShaderType::crop_pack1to4, opt, specializations);
247
if (out_shape.dims == 0 || out_elempack == 1)
249
pipeline_crop_pack4to1 = new Pipeline(vkdev);
250
pipeline_crop_pack4to1->set_optimal_local_size_xyz(local_size_xyz);
251
pipeline_crop_pack4to1->create(LayerShaderType::crop_pack4to1, opt, specializations);
255
if ((opt.use_shader_pack8 && out_shape.dims == 0) || (elempack == 8 && out_elempack == 8))
257
pipeline_crop_pack8 = new Pipeline(vkdev);
258
pipeline_crop_pack8->set_optimal_local_size_xyz(local_size_xyz);
259
pipeline_crop_pack8->create(LayerShaderType::crop_pack8, opt, specializations);
263
if ((opt.use_shader_pack8 && out_shape.dims == 0) || out_elempack == 8)
265
pipeline_crop_pack1to8 = new Pipeline(vkdev);
266
pipeline_crop_pack1to8->set_optimal_local_size_xyz(local_size_xyz);
267
pipeline_crop_pack1to8->create(LayerShaderType::crop_pack1to8, opt, specializations);
271
if ((opt.use_shader_pack8 && out_shape.dims == 0) || out_elempack == 8)
273
pipeline_crop_pack4to8 = new Pipeline(vkdev);
274
pipeline_crop_pack4to8->set_optimal_local_size_xyz(local_size_xyz);
275
pipeline_crop_pack4to8->create(LayerShaderType::crop_pack4to8, opt, specializations);
279
if ((opt.use_shader_pack8 && out_shape.dims == 0) || (elempack == 8 && out_elempack == 4))
281
pipeline_crop_pack8to4 = new Pipeline(vkdev);
282
pipeline_crop_pack8to4->set_optimal_local_size_xyz(local_size_xyz);
283
pipeline_crop_pack8to4->create(LayerShaderType::crop_pack8to4, opt, specializations);
287
if ((opt.use_shader_pack8 && out_shape.dims == 0) || (elempack == 8 && out_elempack == 1))
289
pipeline_crop_pack8to1 = new Pipeline(vkdev);
290
pipeline_crop_pack8to1->set_optimal_local_size_xyz(local_size_xyz);
291
pipeline_crop_pack8to1->create(LayerShaderType::crop_pack8to1, opt, specializations);
297
int Crop_vulkan::destroy_pipeline(const Option& /*opt*/)
299
delete pipeline_crop;
302
delete pipeline_crop_pack4;
303
pipeline_crop_pack4 = 0;
305
delete pipeline_crop_pack1to4;
306
pipeline_crop_pack1to4 = 0;
308
delete pipeline_crop_pack4to1;
309
pipeline_crop_pack4to1 = 0;
311
delete pipeline_crop_pack8;
312
pipeline_crop_pack8 = 0;
314
delete pipeline_crop_pack1to8;
315
pipeline_crop_pack1to8 = 0;
317
delete pipeline_crop_pack4to8;
318
pipeline_crop_pack4to8 = 0;
320
delete pipeline_crop_pack8to4;
321
pipeline_crop_pack8to4 = 0;
323
delete pipeline_crop_pack8to1;
324
pipeline_crop_pack8to1 = 0;
329
int Crop_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const
331
int dims = bottom_blob.dims;
332
size_t elemsize = bottom_blob.elemsize;
333
int elempack = bottom_blob.elempack;
335
int _woffset, _hoffset, _doffset, _coffset;
336
int _outw, _outh, _outd, _outc;
337
resolve_crop_roi(bottom_blob.shape(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
344
if (_woffset == 0 && _outw == bottom_blob.w * elempack)
346
top_blob = bottom_blob;
350
offset_elempack = _woffset == 0 ? elempack : opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
351
out_elempack = opt.use_shader_pack8 && _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
355
if (_woffset == 0 && _hoffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h * elempack)
357
top_blob = bottom_blob;
361
offset_elempack = _hoffset == 0 ? elempack : opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
362
out_elempack = opt.use_shader_pack8 && _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
366
if (_woffset == 0 && _hoffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outc == bottom_blob.c * elempack)
368
top_blob = bottom_blob;
372
offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
373
out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
375
else // if (dims == 4)
377
if (_woffset == 0 && _hoffset == 0 && _doffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outd == bottom_blob.d && _outc == bottom_blob.c * elempack)
379
top_blob = bottom_blob;
383
offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
384
out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
387
offset_elempack = std::min(offset_elempack, elempack);
389
size_t out_elemsize = elemsize / elempack * out_elempack;
391
if (opt.use_fp16_packed && !opt.use_fp16_storage)
393
if (out_elempack == 8) out_elemsize = 8 * 2u;
394
if (out_elempack == 4) out_elemsize = 4 * 2u;
395
if (out_elempack == 1) out_elemsize = 4u;
399
VkMat bottom_blob_unpacked = bottom_blob;
400
if (elempack == out_elempack && elempack > offset_elempack)
402
Option opt_pack1 = opt;
403
opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
405
vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
410
top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
414
top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
418
top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
420
else // if (dims == 4)
422
top_blob.create(_outw, _outh, _outd, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
424
if (top_blob.empty())
427
std::vector<VkMat> bindings(2);
428
bindings[0] = bottom_blob_unpacked;
429
bindings[1] = top_blob;
431
std::vector<vk_constant_type> constants(16);
432
constants[0].i = bottom_blob_unpacked.dims;
433
constants[1].i = bottom_blob_unpacked.w;
434
constants[2].i = bottom_blob_unpacked.h;
435
constants[3].i = bottom_blob_unpacked.d;
436
constants[4].i = bottom_blob_unpacked.c;
437
constants[5].i = bottom_blob_unpacked.cstep;
438
constants[6].i = top_blob.dims;
439
constants[7].i = top_blob.w;
440
constants[8].i = top_blob.h;
441
constants[9].i = top_blob.d;
442
constants[10].i = top_blob.c;
443
constants[11].i = top_blob.cstep;
444
constants[12].i = _woffset;
445
constants[13].i = _hoffset;
446
constants[14].i = _doffset;
447
constants[15].i = _coffset;
449
const Pipeline* pipeline = 0;
450
if (elempack == 1 && out_elempack == 1)
452
pipeline = pipeline_crop;
454
else if (elempack == 4 && offset_elempack == 4 && out_elempack == 4)
456
pipeline = pipeline_crop_pack4;
458
else if (elempack == 4 && offset_elempack == 1 && out_elempack == 4)
460
pipeline = pipeline_crop_pack1to4;
462
else if (elempack == 1 && out_elempack == 4)
464
pipeline = pipeline_crop_pack1to4;
466
else if (elempack == 4 && out_elempack == 1)
468
pipeline = pipeline_crop_pack4to1;
470
else if (elempack == 8 && offset_elempack == 8 && out_elempack == 8)
472
pipeline = pipeline_crop_pack8;
474
else if (elempack == 8 && offset_elempack == 4 && out_elempack == 8)
476
pipeline = pipeline_crop_pack4to8;
478
else if (elempack == 8 && offset_elempack == 1 && out_elempack == 8)
480
pipeline = pipeline_crop_pack1to8;
482
else if (elempack == 1 && out_elempack == 8)
484
pipeline = pipeline_crop_pack1to8;
486
else if (elempack == 4 && out_elempack == 8)
488
pipeline = pipeline_crop_pack4to8;
490
else if (elempack == 8 && out_elempack == 4)
492
pipeline = pipeline_crop_pack8to4;
494
else if (elempack == 8 && out_elempack == 1)
496
pipeline = pipeline_crop_pack8to1;
499
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
504
int Crop_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt) const
506
const VkMat& bottom_blob = bottom_blobs[0];
507
const VkMat& reference_blob = bottom_blobs[1];
508
VkMat& top_blob = top_blobs[0];
510
int dims = bottom_blob.dims;
511
size_t elemsize = bottom_blob.elemsize;
512
int elempack = bottom_blob.elempack;
514
int _woffset, _hoffset, _doffset, _coffset;
515
int _outw, _outh, _outd, _outc;
518
resolve_crop_roi(bottom_blob.shape(), (const int*)reference_blob.mapped(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
522
resolve_crop_roi(bottom_blob.shape(), reference_blob.shape(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
530
if (_woffset == 0 && _outw == bottom_blob.w * elempack)
532
top_blob = bottom_blob;
536
offset_elempack = _woffset == 0 ? elempack : opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
537
out_elempack = opt.use_shader_pack8 && _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
541
if (_woffset == 0 && _hoffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h * elempack)
543
top_blob = bottom_blob;
547
offset_elempack = _hoffset == 0 ? elempack : opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
548
out_elempack = opt.use_shader_pack8 && _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
552
if (_woffset == 0 && _hoffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outc == bottom_blob.c * elempack)
554
top_blob = bottom_blob;
558
offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
559
out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
561
else // if (dims == 4)
563
if (_woffset == 0 && _hoffset == 0 && _doffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outd == bottom_blob.d && _outc == bottom_blob.c * elempack)
565
top_blob = bottom_blob;
569
offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
570
out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
573
offset_elempack = std::min(offset_elempack, elempack);
575
size_t out_elemsize = elemsize / elempack * out_elempack;
577
if (opt.use_fp16_packed && !opt.use_fp16_storage)
579
if (out_elempack == 8) out_elemsize = 8 * 2u;
580
if (out_elempack == 4) out_elemsize = 4 * 2u;
581
if (out_elempack == 1) out_elemsize = 4u;
585
VkMat bottom_blob_unpacked = bottom_blob;
586
if (elempack == out_elempack && elempack > offset_elempack)
588
Option opt_pack1 = opt;
589
opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
591
vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
596
top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
600
top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
604
top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
606
else // if (dims == 4)
608
top_blob.create(_outw, _outh, _outd, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
610
if (top_blob.empty())
613
std::vector<VkMat> bindings(2);
614
bindings[0] = bottom_blob_unpacked;
615
bindings[1] = top_blob;
617
std::vector<vk_constant_type> constants(16);
618
constants[0].i = bottom_blob_unpacked.dims;
619
constants[1].i = bottom_blob_unpacked.w;
620
constants[2].i = bottom_blob_unpacked.h;
621
constants[3].i = bottom_blob_unpacked.d;
622
constants[4].i = bottom_blob_unpacked.c;
623
constants[5].i = bottom_blob_unpacked.cstep;
624
constants[6].i = top_blob.dims;
625
constants[7].i = top_blob.w;
626
constants[8].i = top_blob.h;
627
constants[9].i = top_blob.d;
628
constants[10].i = top_blob.c;
629
constants[11].i = top_blob.cstep;
630
constants[12].i = _woffset;
631
constants[13].i = _hoffset;
632
constants[14].i = _doffset;
633
constants[15].i = _coffset;
635
const Pipeline* pipeline = 0;
636
if (elempack == 1 && out_elempack == 1)
638
pipeline = pipeline_crop;
640
else if (elempack == 4 && offset_elempack == 4 && out_elempack == 4)
642
pipeline = pipeline_crop_pack4;
644
else if (elempack == 4 && offset_elempack == 1 && out_elempack == 4)
646
pipeline = pipeline_crop_pack1to4;
648
else if (elempack == 1 && out_elempack == 4)
650
pipeline = pipeline_crop_pack1to4;
652
else if (elempack == 4 && out_elempack == 1)
654
pipeline = pipeline_crop_pack4to1;
656
else if (elempack == 8 && offset_elempack == 8 && out_elempack == 8)
658
pipeline = pipeline_crop_pack8;
660
else if (elempack == 8 && offset_elempack == 4 && out_elempack == 8)
662
pipeline = pipeline_crop_pack4to8;
664
else if (elempack == 8 && offset_elempack == 1 && out_elempack == 8)
666
pipeline = pipeline_crop_pack1to8;
668
else if (elempack == 1 && out_elempack == 8)
670
pipeline = pipeline_crop_pack1to8;
672
else if (elempack == 4 && out_elempack == 8)
674
pipeline = pipeline_crop_pack4to8;
676
else if (elempack == 8 && out_elempack == 4)
678
pipeline = pipeline_crop_pack8to4;
680
else if (elempack == 8 && out_elempack == 1)
682
pipeline = pipeline_crop_pack8to1;
685
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
690
int Crop_vulkan::forward(const VkImageMat& bottom_blob, VkImageMat& top_blob, VkCompute& cmd, const Option& opt) const
692
int dims = bottom_blob.dims;
693
size_t elemsize = bottom_blob.elemsize;
694
int elempack = bottom_blob.elempack;
696
int _woffset, _hoffset, _doffset, _coffset;
697
int _outw, _outh, _outd, _outc;
698
resolve_crop_roi(bottom_blob.shape(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
705
if (_woffset == 0 && _outw == bottom_blob.w * elempack)
707
top_blob = bottom_blob;
711
offset_elempack = _woffset == 0 ? elempack : opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
712
out_elempack = opt.use_shader_pack8 && _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
716
if (_woffset == 0 && _hoffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h * elempack)
718
top_blob = bottom_blob;
722
offset_elempack = _hoffset == 0 ? elempack : opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
723
out_elempack = opt.use_shader_pack8 && _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
727
if (_woffset == 0 && _hoffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outc == bottom_blob.c * elempack)
729
top_blob = bottom_blob;
733
offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
734
out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
736
else // if (dims == 4)
738
if (_woffset == 0 && _hoffset == 0 && _doffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outd == bottom_blob.d && _outc == bottom_blob.c * elempack)
740
top_blob = bottom_blob;
744
offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
745
out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
748
offset_elempack = std::min(offset_elempack, elempack);
750
size_t out_elemsize = elemsize / elempack * out_elempack;
752
if (opt.use_fp16_packed && !opt.use_fp16_storage)
754
if (out_elempack == 8) out_elemsize = 8 * 2u;
755
if (out_elempack == 4) out_elemsize = 4 * 2u;
756
if (out_elempack == 1) out_elemsize = 4u;
760
VkImageMat bottom_blob_unpacked = bottom_blob;
761
if (elempack == out_elempack && elempack > offset_elempack)
763
Option opt_pack1 = opt;
764
opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
766
vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
771
top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
775
top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
779
top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
781
else // if (dims == 4)
783
top_blob.create(_outw, _outh, _outd, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
785
if (top_blob.empty())
788
std::vector<VkImageMat> bindings(2);
789
bindings[0] = bottom_blob_unpacked;
790
bindings[1] = top_blob;
792
std::vector<vk_constant_type> constants(16);
793
constants[0].i = bottom_blob_unpacked.dims;
794
constants[1].i = bottom_blob_unpacked.w;
795
constants[2].i = bottom_blob_unpacked.h;
796
constants[3].i = bottom_blob_unpacked.d;
797
constants[4].i = bottom_blob_unpacked.c;
798
constants[5].i = 0; //bottom_blob_unpacked.cstep;
799
constants[6].i = top_blob.dims;
800
constants[7].i = top_blob.w;
801
constants[8].i = top_blob.h;
802
constants[9].i = top_blob.d;
803
constants[10].i = top_blob.c;
804
constants[11].i = 0; //top_blob.cstep;
805
constants[12].i = _woffset;
806
constants[13].i = _hoffset;
807
constants[14].i = _doffset;
808
constants[15].i = _coffset;
810
const Pipeline* pipeline = 0;
811
if (elempack == 1 && out_elempack == 1)
813
pipeline = pipeline_crop;
815
else if (elempack == 4 && offset_elempack == 4 && out_elempack == 4)
817
pipeline = pipeline_crop_pack4;
819
else if (elempack == 4 && offset_elempack == 1 && out_elempack == 4)
821
pipeline = pipeline_crop_pack1to4;
823
else if (elempack == 1 && out_elempack == 4)
825
pipeline = pipeline_crop_pack1to4;
827
else if (elempack == 4 && out_elempack == 1)
829
pipeline = pipeline_crop_pack4to1;
831
else if (elempack == 8 && offset_elempack == 8 && out_elempack == 8)
833
pipeline = pipeline_crop_pack8;
835
else if (elempack == 8 && offset_elempack == 4 && out_elempack == 8)
837
pipeline = pipeline_crop_pack4to8;
839
else if (elempack == 8 && offset_elempack == 1 && out_elempack == 8)
841
pipeline = pipeline_crop_pack1to8;
843
else if (elempack == 1 && out_elempack == 8)
845
pipeline = pipeline_crop_pack1to8;
847
else if (elempack == 4 && out_elempack == 8)
849
pipeline = pipeline_crop_pack4to8;
851
else if (elempack == 8 && out_elempack == 4)
853
pipeline = pipeline_crop_pack8to4;
855
else if (elempack == 8 && out_elempack == 1)
857
pipeline = pipeline_crop_pack8to1;
860
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
865
int Crop_vulkan::forward(const std::vector<VkImageMat>& bottom_blobs, std::vector<VkImageMat>& top_blobs, VkCompute& cmd, const Option& opt) const
867
const VkImageMat& bottom_blob = bottom_blobs[0];
868
const VkImageMat& reference_blob = bottom_blobs[1];
869
VkImageMat& top_blob = top_blobs[0];
871
int dims = bottom_blob.dims;
872
size_t elemsize = bottom_blob.elemsize;
873
int elempack = bottom_blob.elempack;
875
int _woffset, _hoffset, _doffset, _coffset;
876
int _outw, _outh, _outd, _outc;
879
resolve_crop_roi(bottom_blob.shape(), (const int*)reference_blob.mapped(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
883
resolve_crop_roi(bottom_blob.shape(), reference_blob.shape(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
891
if (_woffset == 0 && _outw == bottom_blob.w * elempack)
893
top_blob = bottom_blob;
897
offset_elempack = _woffset == 0 ? elempack : opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
898
out_elempack = opt.use_shader_pack8 && _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
902
if (_woffset == 0 && _hoffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h * elempack)
904
top_blob = bottom_blob;
908
offset_elempack = _hoffset == 0 ? elempack : opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
909
out_elempack = opt.use_shader_pack8 && _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
913
if (_woffset == 0 && _hoffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outc == bottom_blob.c * elempack)
915
top_blob = bottom_blob;
919
offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
920
out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
922
else // if (dims == 4)
924
if (_woffset == 0 && _hoffset == 0 && _doffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outd == bottom_blob.d && _outc == bottom_blob.c * elempack)
926
top_blob = bottom_blob;
930
offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
931
out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
934
offset_elempack = std::min(offset_elempack, elempack);
936
size_t out_elemsize = elemsize / elempack * out_elempack;
938
if (opt.use_fp16_packed && !opt.use_fp16_storage)
940
if (out_elempack == 8) out_elemsize = 8 * 2u;
941
if (out_elempack == 4) out_elemsize = 4 * 2u;
942
if (out_elempack == 1) out_elemsize = 4u;
946
VkImageMat bottom_blob_unpacked = bottom_blob;
947
if (elempack == out_elempack && elempack > offset_elempack)
949
Option opt_pack1 = opt;
950
opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
952
vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
957
top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
961
top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
965
top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
967
else // if (dims == 4)
969
top_blob.create(_outw, _outh, _outd, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
971
if (top_blob.empty())
974
std::vector<VkImageMat> bindings(2);
975
bindings[0] = bottom_blob_unpacked;
976
bindings[1] = top_blob;
978
std::vector<vk_constant_type> constants(16);
979
constants[0].i = bottom_blob_unpacked.dims;
980
constants[1].i = bottom_blob_unpacked.w;
981
constants[2].i = bottom_blob_unpacked.h;
982
constants[3].i = bottom_blob_unpacked.d;
983
constants[4].i = bottom_blob_unpacked.c;
984
constants[5].i = 0; //bottom_blob_unpacked.cstep;
985
constants[6].i = top_blob.dims;
986
constants[7].i = top_blob.w;
987
constants[8].i = top_blob.h;
988
constants[9].i = top_blob.d;
989
constants[10].i = top_blob.c;
990
constants[11].i = 0; //top_blob.cstep;
991
constants[12].i = _woffset;
992
constants[13].i = _hoffset;
993
constants[14].i = _doffset;
994
constants[15].i = _coffset;
996
const Pipeline* pipeline = 0;
997
if (elempack == 1 && out_elempack == 1)
999
pipeline = pipeline_crop;
1001
else if (elempack == 4 && offset_elempack == 4 && out_elempack == 4)
1003
pipeline = pipeline_crop_pack4;
1005
else if (elempack == 4 && offset_elempack == 1 && out_elempack == 4)
1007
pipeline = pipeline_crop_pack1to4;
1009
else if (elempack == 1 && out_elempack == 4)
1011
pipeline = pipeline_crop_pack1to4;
1013
else if (elempack == 4 && out_elempack == 1)
1015
pipeline = pipeline_crop_pack4to1;
1017
else if (elempack == 8 && offset_elempack == 8 && out_elempack == 8)
1019
pipeline = pipeline_crop_pack8;
1021
else if (elempack == 8 && offset_elempack == 4 && out_elempack == 8)
1023
pipeline = pipeline_crop_pack4to8;
1025
else if (elempack == 8 && offset_elempack == 1 && out_elempack == 8)
1027
pipeline = pipeline_crop_pack1to8;
1029
else if (elempack == 1 && out_elempack == 8)
1031
pipeline = pipeline_crop_pack1to8;
1033
else if (elempack == 4 && out_elempack == 8)
1035
pipeline = pipeline_crop_pack4to8;
1037
else if (elempack == 8 && out_elempack == 4)
1039
pipeline = pipeline_crop_pack8to4;
1041
else if (elempack == 8 && out_elempack == 1)
1043
pipeline = pipeline_crop_pack8to1;
1046
cmd.record_pipeline(pipeline, bindings, constants, top_blob);