1
// Tencent is pleased to support the open source community by making ncnn available.
3
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
8
// https://opensource.org/licenses/BSD-3-Clause
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
15
#include "slice_vulkan.h"
17
#include "layer_shader_type.h"
21
Slice_vulkan::Slice_vulkan()
23
support_vulkan = true;
24
support_image_storage = true;
26
pipeline_slice[0] = 0;
27
pipeline_slice[1] = 0;
28
pipeline_slice_pack4[0] = 0;
29
pipeline_slice_pack4[1] = 0;
30
pipeline_slice_pack1to4[0] = 0;
31
pipeline_slice_pack1to4[1] = 0;
32
pipeline_slice_pack8[0] = 0;
33
pipeline_slice_pack8[1] = 0;
34
pipeline_slice_pack1to8[0] = 0;
35
pipeline_slice_pack1to8[1] = 0;
36
pipeline_slice_pack4to8[0] = 0;
37
pipeline_slice_pack4to8[1] = 0;
40
int Slice_vulkan::create_pipeline(const Option& opt)
42
const Mat& shape = bottom_shapes.empty() ? Mat() : bottom_shapes[0];
43
const Mat& out_shape = top_shapes.empty() ? Mat() : top_shapes[0];
44
int positive_axis = axis < 0 ? shape.dims + axis : axis;
47
if (shape.dims == 1) elempack = opt.use_shader_pack8 && shape.w % 8 == 0 ? 8 : shape.w % 4 == 0 ? 4 : 1;
48
if (shape.dims == 2) elempack = opt.use_shader_pack8 && shape.h % 8 == 0 ? 8 : shape.h % 4 == 0 ? 4 : 1;
49
if (shape.dims == 3 || shape.dims == 4) elempack = opt.use_shader_pack8 && shape.c % 8 == 0 ? 8 : shape.c % 4 == 0 ? 4 : 1;
52
if (positive_axis == 0)
54
if (out_shape.dims == 1) out_elempack = opt.use_shader_pack8 && out_shape.w % 8 == 0 ? 8 : out_shape.w % 4 == 0 ? 4 : 1;
55
if (out_shape.dims == 2) out_elempack = opt.use_shader_pack8 && out_shape.h % 8 == 0 ? 8 : out_shape.h % 4 == 0 ? 4 : 1;
56
if (out_shape.dims == 3 || out_shape.dims == 4) out_elempack = opt.use_shader_pack8 && out_shape.c % 8 == 0 ? 8 : out_shape.c % 4 == 0 ? 4 : 1;
58
for (size_t b = 1; b < top_shapes.size(); b++)
60
const Mat& shape1 = top_shapes[b];
62
int out_elempack1 = 1;
63
if (shape1.dims == 1) out_elempack1 = opt.use_shader_pack8 && shape1.w % 8 == 0 ? 8 : shape1.w % 4 == 0 ? 4 : 1;
64
if (shape1.dims == 2) out_elempack1 = opt.use_shader_pack8 && shape1.h % 8 == 0 ? 8 : shape1.h % 4 == 0 ? 4 : 1;
65
if (shape1.dims == 3 || shape1.dims == 4) out_elempack1 = opt.use_shader_pack8 && shape1.c % 8 == 0 ? 8 : shape1.c % 4 == 0 ? 4 : 1;
67
out_elempack = std::min(out_elempack, out_elempack1);
72
out_elempack = elempack;
76
if (opt.use_fp16_storage)
78
out_elemsize = out_elempack * 2u;
80
else if (opt.use_fp16_packed)
82
out_elemsize = out_elempack == 1 ? 4u : out_elempack * 2u;
86
out_elemsize = out_elempack * 4u;
90
if (shape.dims == 1) shape_unpacked = Mat(shape.w / out_elempack, (void*)0, out_elemsize, out_elempack);
91
if (shape.dims == 2) shape_unpacked = Mat(shape.w, shape.h / out_elempack, (void*)0, out_elemsize, out_elempack);
92
if (shape.dims == 3) shape_unpacked = Mat(shape.w, shape.h, shape.c / out_elempack, (void*)0, out_elemsize, out_elempack);
93
if (shape.dims == 4) shape_unpacked = Mat(shape.w, shape.h, shape.d, shape.c / out_elempack, (void*)0, out_elemsize, out_elempack);
95
std::vector<vk_specialization_type> specializations(1 + 12);
96
specializations[0].i = axis;
97
specializations[1 + 0].i = shape_unpacked.dims;
98
specializations[1 + 1].i = shape_unpacked.w;
99
specializations[1 + 2].i = shape_unpacked.h;
100
specializations[1 + 3].i = shape_unpacked.d;
101
specializations[1 + 4].i = shape_unpacked.c;
102
specializations[1 + 5].i = shape_unpacked.cstep;
103
specializations[1 + 6].i = 0; // TODO handle out_shape_packed for slice2
104
specializations[1 + 7].i = 0;
105
specializations[1 + 8].i = 0;
106
specializations[1 + 9].i = 0;
107
specializations[1 + 10].i = 0;
108
specializations[1 + 11].i = 0;
110
Mat local_size_xyz; // TODO more precise group size guessed from shape_unpacked
111
if (shape_unpacked.dims == 1)
113
local_size_xyz.w = 64;
114
local_size_xyz.h = 1;
115
local_size_xyz.c = 1;
117
if (shape_unpacked.dims == 2)
119
local_size_xyz.w = 8;
120
local_size_xyz.h = 8;
121
local_size_xyz.c = 1;
123
if (shape_unpacked.dims == 3)
125
local_size_xyz.w = 4;
126
local_size_xyz.h = 4;
127
local_size_xyz.c = 4;
131
if (shape.dims == 0 || out_elempack == 1)
133
pipeline_slice[0] = new Pipeline(vkdev);
134
pipeline_slice[0]->set_optimal_local_size_xyz(local_size_xyz);
135
pipeline_slice[0]->create(LayerShaderType::slice, opt, specializations);
136
pipeline_slice[1] = new Pipeline(vkdev);
137
pipeline_slice[1]->set_optimal_local_size_xyz(local_size_xyz);
138
pipeline_slice[1]->create(LayerShaderType::slice, opt, specializations);
142
if (shape.dims == 0 || out_elempack == 4)
144
pipeline_slice_pack4[0] = new Pipeline(vkdev);
145
pipeline_slice_pack4[0]->set_optimal_local_size_xyz(local_size_xyz);
146
pipeline_slice_pack4[0]->create(LayerShaderType::slice_pack4, opt, specializations);
147
pipeline_slice_pack4[1] = new Pipeline(vkdev);
148
pipeline_slice_pack4[1]->set_optimal_local_size_xyz(local_size_xyz);
149
pipeline_slice_pack4[1]->create(LayerShaderType::slice_pack4, opt, specializations);
153
if ((positive_axis <= 0 && shape.dims == 0) || out_elempack == 1)
155
pipeline_slice_pack1to4[0] = new Pipeline(vkdev);
156
pipeline_slice_pack1to4[0]->set_optimal_local_size_xyz(local_size_xyz);
157
pipeline_slice_pack1to4[0]->create(LayerShaderType::slice_pack1to4, opt, specializations);
158
pipeline_slice_pack1to4[1] = new Pipeline(vkdev);
159
pipeline_slice_pack1to4[1]->set_optimal_local_size_xyz(local_size_xyz);
160
pipeline_slice_pack1to4[1]->create(LayerShaderType::slice_pack1to4, opt, specializations);
164
if (opt.use_shader_pack8 && (shape.dims == 0 || out_elempack == 8))
166
pipeline_slice_pack8[0] = new Pipeline(vkdev);
167
pipeline_slice_pack8[0]->set_optimal_local_size_xyz(local_size_xyz);
168
pipeline_slice_pack8[0]->create(LayerShaderType::slice_pack8, opt, specializations);
169
pipeline_slice_pack8[1] = new Pipeline(vkdev);
170
pipeline_slice_pack8[1]->set_optimal_local_size_xyz(local_size_xyz);
171
pipeline_slice_pack8[1]->create(LayerShaderType::slice_pack8, opt, specializations);
175
if (opt.use_shader_pack8 && ((positive_axis <= 0 && shape.dims == 0) || out_elempack == 1))
177
pipeline_slice_pack1to8[0] = new Pipeline(vkdev);
178
pipeline_slice_pack1to8[0]->set_optimal_local_size_xyz(local_size_xyz);
179
pipeline_slice_pack1to8[0]->create(LayerShaderType::slice_pack1to8, opt, specializations);
180
pipeline_slice_pack1to8[1] = new Pipeline(vkdev);
181
pipeline_slice_pack1to8[1]->set_optimal_local_size_xyz(local_size_xyz);
182
pipeline_slice_pack1to8[1]->create(LayerShaderType::slice_pack1to8, opt, specializations);
186
if (opt.use_shader_pack8 && ((positive_axis <= 0 && shape.dims == 0) || out_elempack == 4))
188
pipeline_slice_pack4to8[0] = new Pipeline(vkdev);
189
pipeline_slice_pack4to8[0]->set_optimal_local_size_xyz(local_size_xyz);
190
pipeline_slice_pack4to8[0]->create(LayerShaderType::slice_pack4to8, opt, specializations);
191
pipeline_slice_pack4to8[1] = new Pipeline(vkdev);
192
pipeline_slice_pack4to8[1]->set_optimal_local_size_xyz(local_size_xyz);
193
pipeline_slice_pack4to8[1]->create(LayerShaderType::slice_pack4to8, opt, specializations);
199
int Slice_vulkan::destroy_pipeline(const Option& /*opt*/)
201
delete pipeline_slice[0];
202
delete pipeline_slice[1];
203
pipeline_slice[0] = 0;
204
pipeline_slice[1] = 0;
206
delete pipeline_slice_pack4[0];
207
delete pipeline_slice_pack4[1];
208
pipeline_slice_pack4[0] = 0;
209
pipeline_slice_pack4[1] = 0;
211
delete pipeline_slice_pack1to4[0];
212
delete pipeline_slice_pack1to4[1];
213
pipeline_slice_pack1to4[0] = 0;
214
pipeline_slice_pack1to4[1] = 0;
216
delete pipeline_slice_pack8[0];
217
delete pipeline_slice_pack8[1];
218
pipeline_slice_pack8[0] = 0;
219
pipeline_slice_pack8[1] = 0;
221
delete pipeline_slice_pack1to8[0];
222
delete pipeline_slice_pack1to8[1];
223
pipeline_slice_pack1to8[0] = 0;
224
pipeline_slice_pack1to8[1] = 0;
226
delete pipeline_slice_pack4to8[0];
227
delete pipeline_slice_pack4to8[1];
228
pipeline_slice_pack4to8[0] = 0;
229
pipeline_slice_pack4to8[1] = 0;
234
int Slice_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt) const
236
const VkMat& bottom_blob = bottom_blobs[0];
237
int dims = bottom_blob.dims;
238
size_t elemsize = bottom_blob.elemsize;
239
int elempack = bottom_blob.elempack;
240
const int* slices_ptr = slices;
241
const int* indices_ptr = indices;
242
int positive_axis = axis < 0 ? dims + axis : axis;
244
if (dims == 1) // positive_axis == 0
247
int w = bottom_blob.w * elempack;
249
for (size_t i = 0; i < top_blobs.size(); i++)
254
if (i == top_blobs.size() - 1)
260
int indice = indices_ptr[i];
261
int positive_indice = indice < 0 ? w + indice : indice;
262
slice = positive_indice - q;
267
slice = slices_ptr[i];
270
slice = static_cast<int>((w - q) / (top_blobs.size() - i));
274
int out_elempack = opt.use_shader_pack8 && slice % 8 == 0 ? 8 : slice % 4 == 0 ? 4 : 1;
275
size_t out_elemsize = elemsize / elempack * out_elempack;
277
if (opt.use_fp16_packed && !opt.use_fp16_storage)
279
if (out_elempack == 8) out_elemsize = 8 * 2u;
280
if (out_elempack == 4) out_elemsize = 4 * 2u;
281
if (out_elempack == 1) out_elemsize = 4u;
284
VkMat& top_blob = top_blobs[i];
285
top_blob.create(slice / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
286
if (top_blob.empty())
292
int out_elempack = top_blobs[0].elempack;
293
for (size_t i = 0; i < top_blobs.size(); i++)
295
out_elempack = std::min(out_elempack, top_blobs[i].elempack);
298
VkMat bottom_blob_unpacked = bottom_blob;
299
if (elempack > out_elempack)
301
vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, out_elempack, cmd, opt);
305
for (size_t i = 0; i < top_blobs.size(); i++)
307
VkMat& top_blob = top_blobs[i];
309
std::vector<VkMat> bindings(2);
310
bindings[0] = bottom_blob_unpacked;
311
bindings[1] = top_blob;
313
std::vector<vk_constant_type> constants(13);
314
constants[0].i = bottom_blob_unpacked.dims;
315
constants[1].i = bottom_blob_unpacked.w;
316
constants[2].i = bottom_blob_unpacked.h;
317
constants[3].i = bottom_blob_unpacked.d;
318
constants[4].i = bottom_blob_unpacked.c;
319
constants[5].i = bottom_blob_unpacked.cstep;
320
constants[6].i = top_blob.dims;
321
constants[7].i = top_blob.w;
322
constants[8].i = top_blob.h;
323
constants[9].i = top_blob.d;
324
constants[10].i = top_blob.c;
325
constants[11].i = top_blob.cstep;
326
constants[12].i = woffset;
328
const Pipeline* pipeline = 0;
329
if (out_elempack == 1 && top_blob.elempack == 1)
331
pipeline = pipeline_slice[i % 2];
333
else if (out_elempack == 4 && top_blob.elempack == 4)
335
pipeline = pipeline_slice_pack4[i % 2];
337
else if (out_elempack == 1 && top_blob.elempack == 4)
339
pipeline = pipeline_slice_pack1to4[i % 2];
341
else if (out_elempack == 8 && top_blob.elempack == 8)
343
pipeline = pipeline_slice_pack8[i % 2];
345
else if (out_elempack == 1 && top_blob.elempack == 8)
347
pipeline = pipeline_slice_pack1to8[i % 2];
349
else if (out_elempack == 4 && top_blob.elempack == 8)
351
pipeline = pipeline_slice_pack4to8[i % 2];
354
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
356
woffset += top_blob.w * top_blob.elempack / out_elempack;
362
if (dims == 2 && positive_axis == 0)
364
// slice image height
365
int w = bottom_blob.w;
366
int h = bottom_blob.h * elempack;
369
for (size_t i = 0; i < top_blobs.size(); i++)
374
if (i == top_blobs.size() - 1)
380
int indice = indices_ptr[i];
381
int positive_indice = indice < 0 ? h + indice : indice;
382
slice = positive_indice - q;
387
slice = slices_ptr[i];
390
slice = static_cast<int>((h - q) / (top_blobs.size() - i));
394
int out_elempack = opt.use_shader_pack8 && slice % 8 == 0 ? 8 : slice % 4 == 0 ? 4 : 1;
395
size_t out_elemsize = elemsize / elempack * out_elempack;
397
if (opt.use_fp16_packed && !opt.use_fp16_storage)
399
if (out_elempack == 8) out_elemsize = 8 * 2u;
400
if (out_elempack == 4) out_elemsize = 4 * 2u;
401
if (out_elempack == 1) out_elemsize = 4u;
404
VkMat& top_blob = top_blobs[i];
405
top_blob.create(w, slice / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
406
if (top_blob.empty())
412
int out_elempack = top_blobs[0].elempack;
413
for (size_t i = 0; i < top_blobs.size(); i++)
415
out_elempack = std::min(out_elempack, top_blobs[i].elempack);
418
VkMat bottom_blob_unpacked = bottom_blob;
419
if (elempack > out_elempack)
421
vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, out_elempack, cmd, opt);
425
for (size_t i = 0; i < top_blobs.size(); i++)
427
VkMat& top_blob = top_blobs[i];
429
std::vector<VkMat> bindings(2);
430
bindings[0] = bottom_blob_unpacked;
431
bindings[1] = top_blob;
433
std::vector<vk_constant_type> constants(13);
434
constants[0].i = bottom_blob_unpacked.dims;
435
constants[1].i = bottom_blob_unpacked.w;
436
constants[2].i = bottom_blob_unpacked.h;
437
constants[3].i = bottom_blob_unpacked.d;
438
constants[4].i = bottom_blob_unpacked.c;
439
constants[5].i = bottom_blob_unpacked.cstep;
440
constants[6].i = top_blob.dims;
441
constants[7].i = top_blob.w;
442
constants[8].i = top_blob.h;
443
constants[9].i = top_blob.d;
444
constants[10].i = top_blob.c;
445
constants[11].i = top_blob.cstep;
446
constants[12].i = hoffset;
448
const Pipeline* pipeline = 0;
449
if (out_elempack == 1 && top_blob.elempack == 1)
451
pipeline = pipeline_slice[i % 2];
453
else if (out_elempack == 4 && top_blob.elempack == 4)
455
pipeline = pipeline_slice_pack4[i % 2];
457
else if (out_elempack == 1 && top_blob.elempack == 4)
459
pipeline = pipeline_slice_pack1to4[i % 2];
461
else if (out_elempack == 8 && top_blob.elempack == 8)
463
pipeline = pipeline_slice_pack8[i % 2];
465
else if (out_elempack == 1 && top_blob.elempack == 8)
467
pipeline = pipeline_slice_pack1to8[i % 2];
469
else if (out_elempack == 4 && top_blob.elempack == 8)
471
pipeline = pipeline_slice_pack4to8[i % 2];
474
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
476
hoffset += top_blob.h * top_blob.elempack / out_elempack;
482
if (dims == 2 && positive_axis == 1)
485
int w = bottom_blob.w;
486
int h = bottom_blob.h;
489
for (size_t i = 0; i < top_blobs.size(); i++)
494
if (i == top_blobs.size() - 1)
500
int indice = indices_ptr[i];
501
int positive_indice = indice < 0 ? w + indice : indice;
502
slice = positive_indice - q;
507
slice = slices_ptr[i];
510
slice = static_cast<int>((w - q) / (top_blobs.size() - i));
514
VkMat& top_blob = top_blobs[i];
515
top_blob.create(slice, h, elemsize, elempack, opt.blob_vkallocator);
516
if (top_blob.empty())
523
for (size_t i = 0; i < top_blobs.size(); i++)
525
VkMat& top_blob = top_blobs[i];
527
std::vector<VkMat> bindings(2);
528
bindings[0] = bottom_blob;
529
bindings[1] = top_blob;
531
std::vector<vk_constant_type> constants(13);
532
constants[0].i = bottom_blob.dims;
533
constants[1].i = bottom_blob.w;
534
constants[2].i = bottom_blob.h;
535
constants[3].i = bottom_blob.d;
536
constants[4].i = bottom_blob.c;
537
constants[5].i = bottom_blob.cstep;
538
constants[6].i = top_blob.dims;
539
constants[7].i = top_blob.w;
540
constants[8].i = top_blob.h;
541
constants[9].i = top_blob.d;
542
constants[10].i = top_blob.c;
543
constants[11].i = top_blob.cstep;
544
constants[12].i = woffset;
546
const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
547
: elempack == 4 ? pipeline_slice_pack4[i % 2]
548
: pipeline_slice[i % 2];
550
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
552
woffset += top_blob.w;
558
if (dims == 3 && positive_axis == 0)
561
int w = bottom_blob.w;
562
int h = bottom_blob.h;
563
int channels = bottom_blob.c * elempack;
566
for (size_t i = 0; i < top_blobs.size(); i++)
571
if (i == top_blobs.size() - 1)
573
slice = channels - q;
577
int indice = indices_ptr[i];
578
int positive_indice = indice < 0 ? channels + indice : indice;
579
slice = positive_indice - q;
584
slice = slices_ptr[i];
587
slice = static_cast<int>((channels - q) / (top_blobs.size() - i));
591
int out_elempack = opt.use_shader_pack8 && slice % 8 == 0 ? 8 : slice % 4 == 0 ? 4 : 1;
592
size_t out_elemsize = elemsize / elempack * out_elempack;
594
if (opt.use_fp16_packed && !opt.use_fp16_storage)
596
if (out_elempack == 8) out_elemsize = 8 * 2u;
597
if (out_elempack == 4) out_elemsize = 4 * 2u;
598
if (out_elempack == 1) out_elemsize = 4u;
601
VkMat& top_blob = top_blobs[i];
602
top_blob.create(w, h, slice / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
603
if (top_blob.empty())
609
int out_elempack = top_blobs[0].elempack;
610
for (size_t i = 0; i < top_blobs.size(); i++)
612
out_elempack = std::min(out_elempack, top_blobs[i].elempack);
615
VkMat bottom_blob_unpacked = bottom_blob;
616
if (elempack > out_elempack)
618
vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, out_elempack, cmd, opt);
622
for (size_t i = 0; i < top_blobs.size(); i++)
624
VkMat& top_blob = top_blobs[i];
626
std::vector<VkMat> bindings(2);
627
bindings[0] = bottom_blob_unpacked;
628
bindings[1] = top_blob;
630
std::vector<vk_constant_type> constants(13);
631
constants[0].i = bottom_blob_unpacked.dims;
632
constants[1].i = bottom_blob_unpacked.w;
633
constants[2].i = bottom_blob_unpacked.h;
634
constants[3].i = bottom_blob_unpacked.d;
635
constants[4].i = bottom_blob_unpacked.c;
636
constants[5].i = bottom_blob_unpacked.cstep;
637
constants[6].i = top_blob.dims;
638
constants[7].i = top_blob.w;
639
constants[8].i = top_blob.h;
640
constants[9].i = top_blob.d;
641
constants[10].i = top_blob.c;
642
constants[11].i = top_blob.cstep;
643
constants[12].i = coffset;
645
const Pipeline* pipeline = 0;
646
if (out_elempack == 1 && top_blob.elempack == 1)
648
pipeline = pipeline_slice[i % 2];
650
else if (out_elempack == 4 && top_blob.elempack == 4)
652
pipeline = pipeline_slice_pack4[i % 2];
654
else if (out_elempack == 1 && top_blob.elempack == 4)
656
pipeline = pipeline_slice_pack1to4[i % 2];
658
else if (out_elempack == 8 && top_blob.elempack == 8)
660
pipeline = pipeline_slice_pack8[i % 2];
662
else if (out_elempack == 1 && top_blob.elempack == 8)
664
pipeline = pipeline_slice_pack1to8[i % 2];
666
else if (out_elempack == 4 && top_blob.elempack == 8)
668
pipeline = pipeline_slice_pack4to8[i % 2];
671
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
673
coffset += top_blob.c * top_blob.elempack / out_elempack;
679
if (dims == 3 && positive_axis == 1)
682
int w = bottom_blob.w;
683
int h = bottom_blob.h;
684
int channels = bottom_blob.c;
687
for (size_t i = 0; i < top_blobs.size(); i++)
692
if (i == top_blobs.size() - 1)
698
int indice = indices_ptr[i];
699
int positive_indice = indice < 0 ? h + indice : indice;
700
slice = positive_indice - q;
705
slice = slices_ptr[i];
708
slice = static_cast<int>((h - q) / (top_blobs.size() - i));
712
VkMat& top_blob = top_blobs[i];
713
top_blob.create(w, slice, channels, elemsize, elempack, opt.blob_vkallocator);
714
if (top_blob.empty())
721
for (size_t i = 0; i < top_blobs.size(); i++)
723
VkMat& top_blob = top_blobs[i];
725
std::vector<VkMat> bindings(2);
726
bindings[0] = bottom_blob;
727
bindings[1] = top_blob;
729
std::vector<vk_constant_type> constants(13);
730
constants[0].i = bottom_blob.dims;
731
constants[1].i = bottom_blob.w;
732
constants[2].i = bottom_blob.h;
733
constants[3].i = bottom_blob.d;
734
constants[4].i = bottom_blob.c;
735
constants[5].i = bottom_blob.cstep;
736
constants[6].i = top_blob.dims;
737
constants[7].i = top_blob.w;
738
constants[8].i = top_blob.h;
739
constants[9].i = top_blob.d;
740
constants[10].i = top_blob.c;
741
constants[11].i = top_blob.cstep;
742
constants[12].i = hoffset;
744
const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
745
: elempack == 4 ? pipeline_slice_pack4[i % 2]
746
: pipeline_slice[i % 2];
748
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
750
hoffset += top_blob.h;
756
if (dims == 3 && positive_axis == 2)
759
int w = bottom_blob.w;
760
int h = bottom_blob.h;
761
int channels = bottom_blob.c;
764
for (size_t i = 0; i < top_blobs.size(); i++)
769
if (i == top_blobs.size() - 1)
775
int indice = indices_ptr[i];
776
int positive_indice = indice < 0 ? w + indice : indice;
777
slice = positive_indice - q;
782
slice = slices_ptr[i];
785
slice = static_cast<int>((w - q) / (top_blobs.size() - i));
789
VkMat& top_blob = top_blobs[i];
790
top_blob.create(slice, h, channels, elemsize, elempack, opt.blob_vkallocator);
791
if (top_blob.empty())
798
for (size_t i = 0; i < top_blobs.size(); i++)
800
VkMat& top_blob = top_blobs[i];
802
std::vector<VkMat> bindings(2);
803
bindings[0] = bottom_blob;
804
bindings[1] = top_blob;
806
std::vector<vk_constant_type> constants(13);
807
constants[0].i = bottom_blob.dims;
808
constants[1].i = bottom_blob.w;
809
constants[2].i = bottom_blob.h;
810
constants[3].i = bottom_blob.d;
811
constants[4].i = bottom_blob.c;
812
constants[5].i = bottom_blob.cstep;
813
constants[6].i = top_blob.dims;
814
constants[7].i = top_blob.w;
815
constants[8].i = top_blob.h;
816
constants[9].i = top_blob.d;
817
constants[10].i = top_blob.c;
818
constants[11].i = top_blob.cstep;
819
constants[12].i = woffset;
821
const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
822
: elempack == 4 ? pipeline_slice_pack4[i % 2]
823
: pipeline_slice[i % 2];
825
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
827
woffset += top_blob.w;
833
if (dims == 4 && positive_axis == 0)
835
int w = bottom_blob.w;
836
int h = bottom_blob.h;
837
int d = bottom_blob.d;
838
int channels = bottom_blob.c * elempack;
841
for (size_t i = 0; i < top_blobs.size(); i++)
846
if (i == top_blobs.size() - 1)
848
slice = channels - q;
852
int indice = indices_ptr[i];
853
int positive_indice = indice < 0 ? channels + indice : indice;
854
slice = positive_indice - q;
859
slice = slices_ptr[i];
862
slice = static_cast<int>((channels - q) / (top_blobs.size() - i));
866
int out_elempack = opt.use_shader_pack8 && slice % 8 == 0 ? 8 : slice % 4 == 0 ? 4 : 1;
867
size_t out_elemsize = elemsize / elempack * out_elempack;
869
if (opt.use_fp16_packed && !opt.use_fp16_storage)
871
if (out_elempack == 8) out_elemsize = 8 * 2u;
872
if (out_elempack == 4) out_elemsize = 4 * 2u;
873
if (out_elempack == 1) out_elemsize = 4u;
876
VkMat& top_blob = top_blobs[i];
877
top_blob.create(w, h, d, slice / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
878
if (top_blob.empty())
884
int out_elempack = top_blobs[0].elempack;
885
for (size_t i = 0; i < top_blobs.size(); i++)
887
out_elempack = std::min(out_elempack, top_blobs[i].elempack);
890
VkMat bottom_blob_unpacked = bottom_blob;
891
if (elempack > out_elempack)
893
vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, out_elempack, cmd, opt);
897
for (size_t i = 0; i < top_blobs.size(); i++)
899
VkMat& top_blob = top_blobs[i];
901
std::vector<VkMat> bindings(2);
902
bindings[0] = bottom_blob_unpacked;
903
bindings[1] = top_blob;
905
std::vector<vk_constant_type> constants(13);
906
constants[0].i = bottom_blob_unpacked.dims;
907
constants[1].i = bottom_blob_unpacked.w;
908
constants[2].i = bottom_blob_unpacked.h;
909
constants[3].i = bottom_blob_unpacked.d;
910
constants[4].i = bottom_blob_unpacked.c;
911
constants[5].i = bottom_blob_unpacked.cstep;
912
constants[6].i = top_blob.dims;
913
constants[7].i = top_blob.w;
914
constants[8].i = top_blob.h;
915
constants[9].i = top_blob.d;
916
constants[10].i = top_blob.c;
917
constants[11].i = top_blob.cstep;
918
constants[12].i = coffset;
920
const Pipeline* pipeline = 0;
921
if (out_elempack == 1 && top_blob.elempack == 1)
923
pipeline = pipeline_slice[i % 2];
925
else if (out_elempack == 4 && top_blob.elempack == 4)
927
pipeline = pipeline_slice_pack4[i % 2];
929
else if (out_elempack == 1 && top_blob.elempack == 4)
931
pipeline = pipeline_slice_pack1to4[i % 2];
933
else if (out_elempack == 8 && top_blob.elempack == 8)
935
pipeline = pipeline_slice_pack8[i % 2];
937
else if (out_elempack == 1 && top_blob.elempack == 8)
939
pipeline = pipeline_slice_pack1to8[i % 2];
941
else if (out_elempack == 4 && top_blob.elempack == 8)
943
pipeline = pipeline_slice_pack4to8[i % 2];
946
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
948
coffset += top_blob.c * top_blob.elempack / out_elempack;
954
if (dims == 4 && positive_axis == 1)
956
int w = bottom_blob.w;
957
int h = bottom_blob.h;
958
int d = bottom_blob.d;
959
int channels = bottom_blob.c;
962
for (size_t i = 0; i < top_blobs.size(); i++)
967
if (i == top_blobs.size() - 1)
973
int indice = indices_ptr[i];
974
int positive_indice = indice < 0 ? d + indice : indice;
975
slice = positive_indice - q;
980
slice = slices_ptr[i];
983
slice = static_cast<int>((d - q) / (top_blobs.size() - i));
987
VkMat& top_blob = top_blobs[i];
988
top_blob.create(w, h, slice, channels, elemsize, elempack, opt.blob_vkallocator);
989
if (top_blob.empty())
996
for (size_t i = 0; i < top_blobs.size(); i++)
998
VkMat& top_blob = top_blobs[i];
1000
std::vector<VkMat> bindings(2);
1001
bindings[0] = bottom_blob;
1002
bindings[1] = top_blob;
1004
std::vector<vk_constant_type> constants(13);
1005
constants[0].i = bottom_blob.dims;
1006
constants[1].i = bottom_blob.w;
1007
constants[2].i = bottom_blob.h;
1008
constants[3].i = bottom_blob.d;
1009
constants[4].i = bottom_blob.c;
1010
constants[5].i = bottom_blob.cstep;
1011
constants[6].i = top_blob.dims;
1012
constants[7].i = top_blob.w;
1013
constants[8].i = top_blob.h;
1014
constants[9].i = top_blob.d;
1015
constants[10].i = top_blob.c;
1016
constants[11].i = top_blob.cstep;
1017
constants[12].i = doffset;
1019
const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
1020
: elempack == 4 ? pipeline_slice_pack4[i % 2]
1021
: pipeline_slice[i % 2];
1023
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1025
doffset += top_blob.d;
1031
if (dims == 4 && positive_axis == 2)
1033
int w = bottom_blob.w;
1034
int h = bottom_blob.h;
1035
int d = bottom_blob.d;
1036
int channels = bottom_blob.c;
1039
for (size_t i = 0; i < top_blobs.size(); i++)
1044
if (i == top_blobs.size() - 1)
1050
int indice = indices_ptr[i];
1051
int positive_indice = indice < 0 ? h + indice : indice;
1052
slice = positive_indice - q;
1057
slice = slices_ptr[i];
1060
slice = static_cast<int>((h - q) / (top_blobs.size() - i));
1064
VkMat& top_blob = top_blobs[i];
1065
top_blob.create(w, slice, d, channels, elemsize, elempack, opt.blob_vkallocator);
1066
if (top_blob.empty())
1073
for (size_t i = 0; i < top_blobs.size(); i++)
1075
VkMat& top_blob = top_blobs[i];
1077
std::vector<VkMat> bindings(2);
1078
bindings[0] = bottom_blob;
1079
bindings[1] = top_blob;
1081
std::vector<vk_constant_type> constants(13);
1082
constants[0].i = bottom_blob.dims;
1083
constants[1].i = bottom_blob.w;
1084
constants[2].i = bottom_blob.h;
1085
constants[3].i = bottom_blob.d;
1086
constants[4].i = bottom_blob.c;
1087
constants[5].i = bottom_blob.cstep;
1088
constants[6].i = top_blob.dims;
1089
constants[7].i = top_blob.w;
1090
constants[8].i = top_blob.h;
1091
constants[9].i = top_blob.d;
1092
constants[10].i = top_blob.c;
1093
constants[11].i = top_blob.cstep;
1094
constants[12].i = hoffset;
1096
const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
1097
: elempack == 4 ? pipeline_slice_pack4[i % 2]
1098
: pipeline_slice[i % 2];
1100
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1102
hoffset += top_blob.h;
1108
if (dims == 4 && positive_axis == 3)
1110
int w = bottom_blob.w;
1111
int h = bottom_blob.h;
1112
int d = bottom_blob.d;
1113
int channels = bottom_blob.c;
1116
for (size_t i = 0; i < top_blobs.size(); i++)
1121
if (i == top_blobs.size() - 1)
1127
int indice = indices_ptr[i];
1128
int positive_indice = indice < 0 ? w + indice : indice;
1129
slice = positive_indice - q;
1134
slice = slices_ptr[i];
1137
slice = static_cast<int>((w - q) / (top_blobs.size() - i));
1141
VkMat& top_blob = top_blobs[i];
1142
top_blob.create(slice, h, d, channels, elemsize, elempack, opt.blob_vkallocator);
1143
if (top_blob.empty())
1150
for (size_t i = 0; i < top_blobs.size(); i++)
1152
VkMat& top_blob = top_blobs[i];
1154
std::vector<VkMat> bindings(2);
1155
bindings[0] = bottom_blob;
1156
bindings[1] = top_blob;
1158
std::vector<vk_constant_type> constants(13);
1159
constants[0].i = bottom_blob.dims;
1160
constants[1].i = bottom_blob.w;
1161
constants[2].i = bottom_blob.h;
1162
constants[3].i = bottom_blob.d;
1163
constants[4].i = bottom_blob.c;
1164
constants[5].i = bottom_blob.cstep;
1165
constants[6].i = top_blob.dims;
1166
constants[7].i = top_blob.w;
1167
constants[8].i = top_blob.h;
1168
constants[9].i = top_blob.d;
1169
constants[10].i = top_blob.c;
1170
constants[11].i = top_blob.cstep;
1171
constants[12].i = woffset;
1173
const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
1174
: elempack == 4 ? pipeline_slice_pack4[i % 2]
1175
: pipeline_slice[i % 2];
1177
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1179
woffset += top_blob.w;
1188
int Slice_vulkan::forward(const std::vector<VkImageMat>& bottom_blobs, std::vector<VkImageMat>& top_blobs, VkCompute& cmd, const Option& opt) const
1190
const VkImageMat& bottom_blob = bottom_blobs[0];
1191
int dims = bottom_blob.dims;
1192
size_t elemsize = bottom_blob.elemsize;
1193
int elempack = bottom_blob.elempack;
1194
const int* slices_ptr = slices;
1195
const int* indices_ptr = indices;
1196
int positive_axis = axis < 0 ? dims + axis : axis;
1198
if (dims == 1) // positive_axis == 0
1201
int w = bottom_blob.w * elempack;
1203
for (size_t i = 0; i < top_blobs.size(); i++)
1208
if (i == top_blobs.size() - 1)
1214
int indice = indices_ptr[i];
1215
int positive_indice = indice < 0 ? w + indice : indice;
1216
slice = positive_indice - q;
1221
slice = slices_ptr[i];
1224
slice = static_cast<int>((w - q) / (top_blobs.size() - i));
1228
int out_elempack = opt.use_shader_pack8 && slice % 8 == 0 ? 8 : slice % 4 == 0 ? 4 : 1;
1229
size_t out_elemsize = elemsize / elempack * out_elempack;
1231
if (opt.use_fp16_packed && !opt.use_fp16_storage)
1233
if (out_elempack == 8) out_elemsize = 8 * 2u;
1234
if (out_elempack == 4) out_elemsize = 4 * 2u;
1235
if (out_elempack == 1) out_elemsize = 4u;
1238
VkImageMat& top_blob = top_blobs[i];
1239
top_blob.create(slice / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
1240
if (top_blob.empty())
1246
int out_elempack = top_blobs[0].elempack;
1247
for (size_t i = 0; i < top_blobs.size(); i++)
1249
out_elempack = std::min(out_elempack, top_blobs[i].elempack);
1252
VkImageMat bottom_blob_unpacked = bottom_blob;
1253
if (elempack > out_elempack)
1255
vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, out_elempack, cmd, opt);
1259
for (size_t i = 0; i < top_blobs.size(); i++)
1261
VkImageMat& top_blob = top_blobs[i];
1263
std::vector<VkImageMat> bindings(2);
1264
bindings[0] = bottom_blob_unpacked;
1265
bindings[1] = top_blob;
1267
std::vector<vk_constant_type> constants(13);
1268
constants[0].i = bottom_blob_unpacked.dims;
1269
constants[1].i = bottom_blob_unpacked.w;
1270
constants[2].i = bottom_blob_unpacked.h;
1271
constants[3].i = bottom_blob_unpacked.d;
1272
constants[4].i = bottom_blob_unpacked.c;
1273
constants[5].i = 0; //bottom_blob_unpacked.cstep;
1274
constants[6].i = top_blob.dims;
1275
constants[7].i = top_blob.w;
1276
constants[8].i = top_blob.h;
1277
constants[9].i = top_blob.d;
1278
constants[10].i = top_blob.c;
1279
constants[11].i = 0; //top_blob.cstep;
1280
constants[12].i = woffset;
1282
const Pipeline* pipeline = 0;
1283
if (out_elempack == 1 && top_blob.elempack == 1)
1285
pipeline = pipeline_slice[i % 2];
1287
else if (out_elempack == 4 && top_blob.elempack == 4)
1289
pipeline = pipeline_slice_pack4[i % 2];
1291
else if (out_elempack == 1 && top_blob.elempack == 4)
1293
pipeline = pipeline_slice_pack1to4[i % 2];
1295
else if (out_elempack == 8 && top_blob.elempack == 8)
1297
pipeline = pipeline_slice_pack8[i % 2];
1299
else if (out_elempack == 1 && top_blob.elempack == 8)
1301
pipeline = pipeline_slice_pack1to8[i % 2];
1303
else if (out_elempack == 4 && top_blob.elempack == 8)
1305
pipeline = pipeline_slice_pack4to8[i % 2];
1308
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1310
woffset += top_blob.w * top_blob.elempack / out_elempack;
1316
if (dims == 2 && positive_axis == 0)
1318
// slice image height
1319
int w = bottom_blob.w;
1320
int h = bottom_blob.h * elempack;
1323
for (size_t i = 0; i < top_blobs.size(); i++)
1328
if (i == top_blobs.size() - 1)
1334
int indice = indices_ptr[i];
1335
int positive_indice = indice < 0 ? h + indice : indice;
1336
slice = positive_indice - q;
1341
slice = slices_ptr[i];
1344
slice = static_cast<int>((h - q) / (top_blobs.size() - i));
1348
int out_elempack = opt.use_shader_pack8 && slice % 8 == 0 ? 8 : slice % 4 == 0 ? 4 : 1;
1349
size_t out_elemsize = elemsize / elempack * out_elempack;
1351
if (opt.use_fp16_packed && !opt.use_fp16_storage)
1353
if (out_elempack == 8) out_elemsize = 8 * 2u;
1354
if (out_elempack == 4) out_elemsize = 4 * 2u;
1355
if (out_elempack == 1) out_elemsize = 4u;
1358
VkImageMat& top_blob = top_blobs[i];
1359
top_blob.create(w, slice / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
1360
if (top_blob.empty())
1366
int out_elempack = top_blobs[0].elempack;
1367
for (size_t i = 0; i < top_blobs.size(); i++)
1369
out_elempack = std::min(out_elempack, top_blobs[i].elempack);
1372
VkImageMat bottom_blob_unpacked = bottom_blob;
1373
if (elempack > out_elempack)
1375
vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, out_elempack, cmd, opt);
1379
for (size_t i = 0; i < top_blobs.size(); i++)
1381
VkImageMat& top_blob = top_blobs[i];
1383
std::vector<VkImageMat> bindings(2);
1384
bindings[0] = bottom_blob_unpacked;
1385
bindings[1] = top_blob;
1387
std::vector<vk_constant_type> constants(13);
1388
constants[0].i = bottom_blob_unpacked.dims;
1389
constants[1].i = bottom_blob_unpacked.w;
1390
constants[2].i = bottom_blob_unpacked.h;
1391
constants[3].i = bottom_blob_unpacked.d;
1392
constants[4].i = bottom_blob_unpacked.c;
1393
constants[5].i = 0; //bottom_blob_unpacked.cstep;
1394
constants[6].i = top_blob.dims;
1395
constants[7].i = top_blob.w;
1396
constants[8].i = top_blob.h;
1397
constants[9].i = top_blob.d;
1398
constants[10].i = top_blob.c;
1399
constants[11].i = 0; //top_blob.cstep;
1400
constants[12].i = hoffset;
1402
const Pipeline* pipeline = 0;
1403
if (out_elempack == 1 && top_blob.elempack == 1)
1405
pipeline = pipeline_slice[i % 2];
1407
else if (out_elempack == 4 && top_blob.elempack == 4)
1409
pipeline = pipeline_slice_pack4[i % 2];
1411
else if (out_elempack == 1 && top_blob.elempack == 4)
1413
pipeline = pipeline_slice_pack1to4[i % 2];
1415
else if (out_elempack == 8 && top_blob.elempack == 8)
1417
pipeline = pipeline_slice_pack8[i % 2];
1419
else if (out_elempack == 1 && top_blob.elempack == 8)
1421
pipeline = pipeline_slice_pack1to8[i % 2];
1423
else if (out_elempack == 4 && top_blob.elempack == 8)
1425
pipeline = pipeline_slice_pack4to8[i % 2];
1428
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1430
hoffset += top_blob.h * top_blob.elempack / out_elempack;
1436
if (dims == 2 && positive_axis == 1)
1438
// slice image width
1439
int w = bottom_blob.w;
1440
int h = bottom_blob.h;
1443
for (size_t i = 0; i < top_blobs.size(); i++)
1448
if (i == top_blobs.size() - 1)
1454
int indice = indices_ptr[i];
1455
int positive_indice = indice < 0 ? w + indice : indice;
1456
slice = positive_indice - q;
1461
slice = slices_ptr[i];
1464
slice = static_cast<int>((w - q) / (top_blobs.size() - i));
1468
VkImageMat& top_blob = top_blobs[i];
1469
top_blob.create(slice, h, elemsize, elempack, opt.blob_vkallocator);
1470
if (top_blob.empty())
1477
for (size_t i = 0; i < top_blobs.size(); i++)
1479
VkImageMat& top_blob = top_blobs[i];
1481
std::vector<VkImageMat> bindings(2);
1482
bindings[0] = bottom_blob;
1483
bindings[1] = top_blob;
1485
std::vector<vk_constant_type> constants(13);
1486
constants[0].i = bottom_blob.dims;
1487
constants[1].i = bottom_blob.w;
1488
constants[2].i = bottom_blob.h;
1489
constants[3].i = bottom_blob.d;
1490
constants[4].i = bottom_blob.c;
1491
constants[5].i = 0; //bottom_blob.cstep;
1492
constants[6].i = top_blob.dims;
1493
constants[7].i = top_blob.w;
1494
constants[8].i = top_blob.h;
1495
constants[9].i = top_blob.d;
1496
constants[10].i = top_blob.c;
1497
constants[11].i = 0; //top_blob.cstep;
1498
constants[12].i = woffset;
1500
const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
1501
: elempack == 4 ? pipeline_slice_pack4[i % 2]
1502
: pipeline_slice[i % 2];
1504
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1506
woffset += top_blob.w;
1512
if (dims == 3 && positive_axis == 0)
1514
// slice dim channel
1515
int w = bottom_blob.w;
1516
int h = bottom_blob.h;
1517
int channels = bottom_blob.c * elempack;
1520
for (size_t i = 0; i < top_blobs.size(); i++)
1525
if (i == top_blobs.size() - 1)
1527
slice = channels - q;
1531
int indice = indices_ptr[i];
1532
int positive_indice = indice < 0 ? channels + indice : indice;
1533
slice = positive_indice - q;
1538
slice = slices_ptr[i];
1541
slice = static_cast<int>((channels - q) / (top_blobs.size() - i));
1545
int out_elempack = opt.use_shader_pack8 && slice % 8 == 0 ? 8 : slice % 4 == 0 ? 4 : 1;
1546
size_t out_elemsize = elemsize / elempack * out_elempack;
1548
if (opt.use_fp16_packed && !opt.use_fp16_storage)
1550
if (out_elempack == 8) out_elemsize = 8 * 2u;
1551
if (out_elempack == 4) out_elemsize = 4 * 2u;
1552
if (out_elempack == 1) out_elemsize = 4u;
1555
VkImageMat& top_blob = top_blobs[i];
1556
top_blob.create(w, h, slice / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
1557
if (top_blob.empty())
1563
int out_elempack = top_blobs[0].elempack;
1564
for (size_t i = 0; i < top_blobs.size(); i++)
1566
out_elempack = std::min(out_elempack, top_blobs[i].elempack);
1569
VkImageMat bottom_blob_unpacked = bottom_blob;
1570
if (elempack > out_elempack)
1572
vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, out_elempack, cmd, opt);
1576
for (size_t i = 0; i < top_blobs.size(); i++)
1578
VkImageMat& top_blob = top_blobs[i];
1580
std::vector<VkImageMat> bindings(2);
1581
bindings[0] = bottom_blob_unpacked;
1582
bindings[1] = top_blob;
1584
std::vector<vk_constant_type> constants(13);
1585
constants[0].i = bottom_blob_unpacked.dims;
1586
constants[1].i = bottom_blob_unpacked.w;
1587
constants[2].i = bottom_blob_unpacked.h;
1588
constants[3].i = bottom_blob_unpacked.d;
1589
constants[4].i = bottom_blob_unpacked.c;
1590
constants[5].i = 0; //bottom_blob_unpacked.cstep;
1591
constants[6].i = top_blob.dims;
1592
constants[7].i = top_blob.w;
1593
constants[8].i = top_blob.h;
1594
constants[9].i = top_blob.d;
1595
constants[10].i = top_blob.c;
1596
constants[11].i = 0; //top_blob.cstep;
1597
constants[12].i = coffset;
1599
const Pipeline* pipeline = 0;
1600
if (out_elempack == 1 && top_blob.elempack == 1)
1602
pipeline = pipeline_slice[i % 2];
1604
else if (out_elempack == 4 && top_blob.elempack == 4)
1606
pipeline = pipeline_slice_pack4[i % 2];
1608
else if (out_elempack == 1 && top_blob.elempack == 4)
1610
pipeline = pipeline_slice_pack1to4[i % 2];
1612
else if (out_elempack == 8 && top_blob.elempack == 8)
1614
pipeline = pipeline_slice_pack8[i % 2];
1616
else if (out_elempack == 1 && top_blob.elempack == 8)
1618
pipeline = pipeline_slice_pack1to8[i % 2];
1620
else if (out_elempack == 4 && top_blob.elempack == 8)
1622
pipeline = pipeline_slice_pack4to8[i % 2];
1625
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1627
coffset += top_blob.c * top_blob.elempack / out_elempack;
1633
if (dims == 3 && positive_axis == 1)
1636
int w = bottom_blob.w;
1637
int h = bottom_blob.h;
1638
int channels = bottom_blob.c;
1641
for (size_t i = 0; i < top_blobs.size(); i++)
1646
if (i == top_blobs.size() - 1)
1652
int indice = indices_ptr[i];
1653
int positive_indice = indice < 0 ? h + indice : indice;
1654
slice = positive_indice - q;
1659
slice = slices_ptr[i];
1662
slice = static_cast<int>((h - q) / (top_blobs.size() - i));
1666
VkImageMat& top_blob = top_blobs[i];
1667
top_blob.create(w, slice, channels, elemsize, elempack, opt.blob_vkallocator);
1668
if (top_blob.empty())
1675
for (size_t i = 0; i < top_blobs.size(); i++)
1677
VkImageMat& top_blob = top_blobs[i];
1679
std::vector<VkImageMat> bindings(2);
1680
bindings[0] = bottom_blob;
1681
bindings[1] = top_blob;
1683
std::vector<vk_constant_type> constants(13);
1684
constants[0].i = bottom_blob.dims;
1685
constants[1].i = bottom_blob.w;
1686
constants[2].i = bottom_blob.h;
1687
constants[3].i = bottom_blob.d;
1688
constants[4].i = bottom_blob.c;
1689
constants[5].i = 0; //bottom_blob.cstep;
1690
constants[6].i = top_blob.dims;
1691
constants[7].i = top_blob.w;
1692
constants[8].i = top_blob.h;
1693
constants[9].i = top_blob.d;
1694
constants[10].i = top_blob.c;
1695
constants[11].i = 0; //top_blob.cstep;
1696
constants[12].i = hoffset;
1698
const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
1699
: elempack == 4 ? pipeline_slice_pack4[i % 2]
1700
: pipeline_slice[i % 2];
1702
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1704
hoffset += top_blob.h;
1710
if (dims == 3 && positive_axis == 2)
1713
int w = bottom_blob.w;
1714
int h = bottom_blob.h;
1715
int channels = bottom_blob.c;
1718
for (size_t i = 0; i < top_blobs.size(); i++)
1723
if (i == top_blobs.size() - 1)
1729
int indice = indices_ptr[i];
1730
int positive_indice = indice < 0 ? w + indice : indice;
1731
slice = positive_indice - q;
1736
slice = slices_ptr[i];
1739
slice = static_cast<int>((w - q) / (top_blobs.size() - i));
1743
VkImageMat& top_blob = top_blobs[i];
1744
top_blob.create(slice, h, channels, elemsize, elempack, opt.blob_vkallocator);
1745
if (top_blob.empty())
1752
for (size_t i = 0; i < top_blobs.size(); i++)
1754
VkImageMat& top_blob = top_blobs[i];
1756
std::vector<VkImageMat> bindings(2);
1757
bindings[0] = bottom_blob;
1758
bindings[1] = top_blob;
1760
std::vector<vk_constant_type> constants(13);
1761
constants[0].i = bottom_blob.dims;
1762
constants[1].i = bottom_blob.w;
1763
constants[2].i = bottom_blob.h;
1764
constants[3].i = bottom_blob.d;
1765
constants[4].i = bottom_blob.c;
1766
constants[5].i = 0; //bottom_blob.cstep;
1767
constants[6].i = top_blob.dims;
1768
constants[7].i = top_blob.w;
1769
constants[8].i = top_blob.h;
1770
constants[9].i = top_blob.d;
1771
constants[10].i = top_blob.c;
1772
constants[11].i = 0; //top_blob.cstep;
1773
constants[12].i = woffset;
1775
const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
1776
: elempack == 4 ? pipeline_slice_pack4[i % 2]
1777
: pipeline_slice[i % 2];
1779
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1781
woffset += top_blob.w;
1787
if (dims == 4 && positive_axis == 0)
1789
int w = bottom_blob.w;
1790
int h = bottom_blob.h;
1791
int d = bottom_blob.d;
1792
int channels = bottom_blob.c * elempack;
1795
for (size_t i = 0; i < top_blobs.size(); i++)
1800
if (i == top_blobs.size() - 1)
1802
slice = channels - q;
1806
int indice = indices_ptr[i];
1807
int positive_indice = indice < 0 ? channels + indice : indice;
1808
slice = positive_indice - q;
1813
slice = slices_ptr[i];
1816
slice = static_cast<int>((channels - q) / (top_blobs.size() - i));
1820
int out_elempack = opt.use_shader_pack8 && slice % 8 == 0 ? 8 : slice % 4 == 0 ? 4 : 1;
1821
size_t out_elemsize = elemsize / elempack * out_elempack;
1823
if (opt.use_fp16_packed && !opt.use_fp16_storage)
1825
if (out_elempack == 8) out_elemsize = 8 * 2u;
1826
if (out_elempack == 4) out_elemsize = 4 * 2u;
1827
if (out_elempack == 1) out_elemsize = 4u;
1830
VkImageMat& top_blob = top_blobs[i];
1831
top_blob.create(w, h, d, slice / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
1832
if (top_blob.empty())
1838
int out_elempack = top_blobs[0].elempack;
1839
for (size_t i = 0; i < top_blobs.size(); i++)
1841
out_elempack = std::min(out_elempack, top_blobs[i].elempack);
1844
VkImageMat bottom_blob_unpacked = bottom_blob;
1845
if (elempack > out_elempack)
1847
vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, out_elempack, cmd, opt);
1851
for (size_t i = 0; i < top_blobs.size(); i++)
1853
VkImageMat& top_blob = top_blobs[i];
1855
std::vector<VkImageMat> bindings(2);
1856
bindings[0] = bottom_blob_unpacked;
1857
bindings[1] = top_blob;
1859
std::vector<vk_constant_type> constants(13);
1860
constants[0].i = bottom_blob_unpacked.dims;
1861
constants[1].i = bottom_blob_unpacked.w;
1862
constants[2].i = bottom_blob_unpacked.h;
1863
constants[3].i = bottom_blob_unpacked.d;
1864
constants[4].i = bottom_blob_unpacked.c;
1865
constants[5].i = 0; //bottom_blob_unpacked.cstep;
1866
constants[6].i = top_blob.dims;
1867
constants[7].i = top_blob.w;
1868
constants[8].i = top_blob.h;
1869
constants[9].i = top_blob.d;
1870
constants[10].i = top_blob.c;
1871
constants[11].i = 0; //top_blob.cstep;
1872
constants[12].i = coffset;
1874
const Pipeline* pipeline = 0;
1875
if (out_elempack == 1 && top_blob.elempack == 1)
1877
pipeline = pipeline_slice[i % 2];
1879
else if (out_elempack == 4 && top_blob.elempack == 4)
1881
pipeline = pipeline_slice_pack4[i % 2];
1883
else if (out_elempack == 1 && top_blob.elempack == 4)
1885
pipeline = pipeline_slice_pack1to4[i % 2];
1887
else if (out_elempack == 8 && top_blob.elempack == 8)
1889
pipeline = pipeline_slice_pack8[i % 2];
1891
else if (out_elempack == 1 && top_blob.elempack == 8)
1893
pipeline = pipeline_slice_pack1to8[i % 2];
1895
else if (out_elempack == 4 && top_blob.elempack == 8)
1897
pipeline = pipeline_slice_pack4to8[i % 2];
1900
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1902
coffset += top_blob.c * top_blob.elempack / out_elempack;
1908
if (dims == 4 && positive_axis == 1)
1910
int w = bottom_blob.w;
1911
int h = bottom_blob.h;
1912
int d = bottom_blob.d;
1913
int channels = bottom_blob.c;
1916
for (size_t i = 0; i < top_blobs.size(); i++)
1921
if (i == top_blobs.size() - 1)
1927
int indice = indices_ptr[i];
1928
int positive_indice = indice < 0 ? d + indice : indice;
1929
slice = positive_indice - q;
1934
slice = slices_ptr[i];
1937
slice = static_cast<int>((d - q) / (top_blobs.size() - i));
1941
VkImageMat& top_blob = top_blobs[i];
1942
top_blob.create(w, h, slice, channels, elemsize, elempack, opt.blob_vkallocator);
1943
if (top_blob.empty())
1950
for (size_t i = 0; i < top_blobs.size(); i++)
1952
VkImageMat& top_blob = top_blobs[i];
1954
std::vector<VkImageMat> bindings(2);
1955
bindings[0] = bottom_blob;
1956
bindings[1] = top_blob;
1958
std::vector<vk_constant_type> constants(13);
1959
constants[0].i = bottom_blob.dims;
1960
constants[1].i = bottom_blob.w;
1961
constants[2].i = bottom_blob.h;
1962
constants[3].i = bottom_blob.d;
1963
constants[4].i = bottom_blob.c;
1964
constants[5].i = 0; //bottom_blob.cstep;
1965
constants[6].i = top_blob.dims;
1966
constants[7].i = top_blob.w;
1967
constants[8].i = top_blob.h;
1968
constants[9].i = top_blob.d;
1969
constants[10].i = top_blob.c;
1970
constants[11].i = 0; //top_blob.cstep;
1971
constants[12].i = doffset;
1973
const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
1974
: elempack == 4 ? pipeline_slice_pack4[i % 2]
1975
: pipeline_slice[i % 2];
1977
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1979
doffset += top_blob.d;
1985
if (dims == 4 && positive_axis == 2)
1987
int w = bottom_blob.w;
1988
int h = bottom_blob.h;
1989
int d = bottom_blob.d;
1990
int channels = bottom_blob.c;
1993
for (size_t i = 0; i < top_blobs.size(); i++)
1998
if (i == top_blobs.size() - 1)
2004
int indice = indices_ptr[i];
2005
int positive_indice = indice < 0 ? h + indice : indice;
2006
slice = positive_indice - q;
2011
slice = slices_ptr[i];
2014
slice = static_cast<int>((h - q) / (top_blobs.size() - i));
2018
VkImageMat& top_blob = top_blobs[i];
2019
top_blob.create(w, slice, d, channels, elemsize, elempack, opt.blob_vkallocator);
2020
if (top_blob.empty())
2027
for (size_t i = 0; i < top_blobs.size(); i++)
2029
VkImageMat& top_blob = top_blobs[i];
2031
std::vector<VkImageMat> bindings(2);
2032
bindings[0] = bottom_blob;
2033
bindings[1] = top_blob;
2035
std::vector<vk_constant_type> constants(13);
2036
constants[0].i = bottom_blob.dims;
2037
constants[1].i = bottom_blob.w;
2038
constants[2].i = bottom_blob.h;
2039
constants[3].i = bottom_blob.d;
2040
constants[4].i = bottom_blob.c;
2041
constants[5].i = 0; //bottom_blob.cstep;
2042
constants[6].i = top_blob.dims;
2043
constants[7].i = top_blob.w;
2044
constants[8].i = top_blob.h;
2045
constants[9].i = top_blob.d;
2046
constants[10].i = top_blob.c;
2047
constants[11].i = 0; //top_blob.cstep;
2048
constants[12].i = hoffset;
2050
const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
2051
: elempack == 4 ? pipeline_slice_pack4[i % 2]
2052
: pipeline_slice[i % 2];
2054
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
2056
hoffset += top_blob.h;
2062
if (dims == 4 && positive_axis == 3)
2064
int w = bottom_blob.w;
2065
int h = bottom_blob.h;
2066
int d = bottom_blob.d;
2067
int channels = bottom_blob.c;
2070
for (size_t i = 0; i < top_blobs.size(); i++)
2075
if (i == top_blobs.size() - 1)
2081
int indice = indices_ptr[i];
2082
int positive_indice = indice < 0 ? w + indice : indice;
2083
slice = positive_indice - q;
2088
slice = slices_ptr[i];
2091
slice = static_cast<int>((w - q) / (top_blobs.size() - i));
2095
VkImageMat& top_blob = top_blobs[i];
2096
top_blob.create(slice, h, d, channels, elemsize, elempack, opt.blob_vkallocator);
2097
if (top_blob.empty())
2104
for (size_t i = 0; i < top_blobs.size(); i++)
2106
VkImageMat& top_blob = top_blobs[i];
2108
std::vector<VkImageMat> bindings(2);
2109
bindings[0] = bottom_blob;
2110
bindings[1] = top_blob;
2112
std::vector<vk_constant_type> constants(13);
2113
constants[0].i = bottom_blob.dims;
2114
constants[1].i = bottom_blob.w;
2115
constants[2].i = bottom_blob.h;
2116
constants[3].i = bottom_blob.d;
2117
constants[4].i = bottom_blob.c;
2118
constants[5].i = 0; //bottom_blob.cstep;
2119
constants[6].i = top_blob.dims;
2120
constants[7].i = top_blob.w;
2121
constants[8].i = top_blob.h;
2122
constants[9].i = top_blob.d;
2123
constants[10].i = top_blob.c;
2124
constants[11].i = 0; //top_blob.cstep;
2125
constants[12].i = woffset;
2127
const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
2128
: elempack == 4 ? pipeline_slice_pack4[i % 2]
2129
: pipeline_slice[i % 2];
2131
cmd.record_pipeline(pipeline, bindings, constants, top_blob);
2133
woffset += top_blob.w;