ncnn

Форк
0
/
slice_vulkan.cpp 
2142 строки · 70.3 Кб
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#include "slice_vulkan.h"
16

17
#include "layer_shader_type.h"
18

19
namespace ncnn {
20

21
Slice_vulkan::Slice_vulkan()
22
{
23
    support_vulkan = true;
24
    support_image_storage = true;
25

26
    pipeline_slice[0] = 0;
27
    pipeline_slice[1] = 0;
28
    pipeline_slice_pack4[0] = 0;
29
    pipeline_slice_pack4[1] = 0;
30
    pipeline_slice_pack1to4[0] = 0;
31
    pipeline_slice_pack1to4[1] = 0;
32
    pipeline_slice_pack8[0] = 0;
33
    pipeline_slice_pack8[1] = 0;
34
    pipeline_slice_pack1to8[0] = 0;
35
    pipeline_slice_pack1to8[1] = 0;
36
    pipeline_slice_pack4to8[0] = 0;
37
    pipeline_slice_pack4to8[1] = 0;
38
}
39

40
int Slice_vulkan::create_pipeline(const Option& opt)
41
{
42
    const Mat& shape = bottom_shapes.empty() ? Mat() : bottom_shapes[0];
43
    const Mat& out_shape = top_shapes.empty() ? Mat() : top_shapes[0];
44
    int positive_axis = axis < 0 ? shape.dims + axis : axis;
45

46
    int elempack = 1;
47
    if (shape.dims == 1) elempack = opt.use_shader_pack8 && shape.w % 8 == 0 ? 8 : shape.w % 4 == 0 ? 4 : 1;
48
    if (shape.dims == 2) elempack = opt.use_shader_pack8 && shape.h % 8 == 0 ? 8 : shape.h % 4 == 0 ? 4 : 1;
49
    if (shape.dims == 3 || shape.dims == 4) elempack = opt.use_shader_pack8 && shape.c % 8 == 0 ? 8 : shape.c % 4 == 0 ? 4 : 1;
50

51
    int out_elempack = 1;
52
    if (positive_axis == 0)
53
    {
54
        if (out_shape.dims == 1) out_elempack = opt.use_shader_pack8 && out_shape.w % 8 == 0 ? 8 : out_shape.w % 4 == 0 ? 4 : 1;
55
        if (out_shape.dims == 2) out_elempack = opt.use_shader_pack8 && out_shape.h % 8 == 0 ? 8 : out_shape.h % 4 == 0 ? 4 : 1;
56
        if (out_shape.dims == 3 || out_shape.dims == 4) out_elempack = opt.use_shader_pack8 && out_shape.c % 8 == 0 ? 8 : out_shape.c % 4 == 0 ? 4 : 1;
57

58
        for (size_t b = 1; b < top_shapes.size(); b++)
59
        {
60
            const Mat& shape1 = top_shapes[b];
61

62
            int out_elempack1 = 1;
63
            if (shape1.dims == 1) out_elempack1 = opt.use_shader_pack8 && shape1.w % 8 == 0 ? 8 : shape1.w % 4 == 0 ? 4 : 1;
64
            if (shape1.dims == 2) out_elempack1 = opt.use_shader_pack8 && shape1.h % 8 == 0 ? 8 : shape1.h % 4 == 0 ? 4 : 1;
65
            if (shape1.dims == 3 || shape1.dims == 4) out_elempack1 = opt.use_shader_pack8 && shape1.c % 8 == 0 ? 8 : shape1.c % 4 == 0 ? 4 : 1;
66

67
            out_elempack = std::min(out_elempack, out_elempack1);
68
        }
69
    }
70
    else
71
    {
72
        out_elempack = elempack;
73
    }
74

75
    size_t out_elemsize;
76
    if (opt.use_fp16_storage)
77
    {
78
        out_elemsize = out_elempack * 2u;
79
    }
80
    else if (opt.use_fp16_packed)
81
    {
82
        out_elemsize = out_elempack == 1 ? 4u : out_elempack * 2u;
83
    }
84
    else
85
    {
86
        out_elemsize = out_elempack * 4u;
87
    }
88

89
    Mat shape_unpacked;
90
    if (shape.dims == 1) shape_unpacked = Mat(shape.w / out_elempack, (void*)0, out_elemsize, out_elempack);
91
    if (shape.dims == 2) shape_unpacked = Mat(shape.w, shape.h / out_elempack, (void*)0, out_elemsize, out_elempack);
92
    if (shape.dims == 3) shape_unpacked = Mat(shape.w, shape.h, shape.c / out_elempack, (void*)0, out_elemsize, out_elempack);
93
    if (shape.dims == 4) shape_unpacked = Mat(shape.w, shape.h, shape.d, shape.c / out_elempack, (void*)0, out_elemsize, out_elempack);
94

95
    std::vector<vk_specialization_type> specializations(1 + 12);
96
    specializations[0].i = axis;
97
    specializations[1 + 0].i = shape_unpacked.dims;
98
    specializations[1 + 1].i = shape_unpacked.w;
99
    specializations[1 + 2].i = shape_unpacked.h;
100
    specializations[1 + 3].i = shape_unpacked.d;
101
    specializations[1 + 4].i = shape_unpacked.c;
102
    specializations[1 + 5].i = shape_unpacked.cstep;
103
    specializations[1 + 6].i = 0; // TODO handle out_shape_packed for slice2
104
    specializations[1 + 7].i = 0;
105
    specializations[1 + 8].i = 0;
106
    specializations[1 + 9].i = 0;
107
    specializations[1 + 10].i = 0;
108
    specializations[1 + 11].i = 0;
109

110
    Mat local_size_xyz; // TODO more precise group size guessed from shape_unpacked
111
    if (shape_unpacked.dims == 1)
112
    {
113
        local_size_xyz.w = 64;
114
        local_size_xyz.h = 1;
115
        local_size_xyz.c = 1;
116
    }
117
    if (shape_unpacked.dims == 2)
118
    {
119
        local_size_xyz.w = 8;
120
        local_size_xyz.h = 8;
121
        local_size_xyz.c = 1;
122
    }
123
    if (shape_unpacked.dims == 3)
124
    {
125
        local_size_xyz.w = 4;
126
        local_size_xyz.h = 4;
127
        local_size_xyz.c = 4;
128
    }
129

130
    // pack1
131
    if (shape.dims == 0 || out_elempack == 1)
132
    {
133
        pipeline_slice[0] = new Pipeline(vkdev);
134
        pipeline_slice[0]->set_optimal_local_size_xyz(local_size_xyz);
135
        pipeline_slice[0]->create(LayerShaderType::slice, opt, specializations);
136
        pipeline_slice[1] = new Pipeline(vkdev);
137
        pipeline_slice[1]->set_optimal_local_size_xyz(local_size_xyz);
138
        pipeline_slice[1]->create(LayerShaderType::slice, opt, specializations);
139
    }
140

141
    // pack4
142
    if (shape.dims == 0 || out_elempack == 4)
143
    {
144
        pipeline_slice_pack4[0] = new Pipeline(vkdev);
145
        pipeline_slice_pack4[0]->set_optimal_local_size_xyz(local_size_xyz);
146
        pipeline_slice_pack4[0]->create(LayerShaderType::slice_pack4, opt, specializations);
147
        pipeline_slice_pack4[1] = new Pipeline(vkdev);
148
        pipeline_slice_pack4[1]->set_optimal_local_size_xyz(local_size_xyz);
149
        pipeline_slice_pack4[1]->create(LayerShaderType::slice_pack4, opt, specializations);
150
    }
151

152
    // pack1to4
153
    if ((positive_axis <= 0 && shape.dims == 0) || out_elempack == 1)
154
    {
155
        pipeline_slice_pack1to4[0] = new Pipeline(vkdev);
156
        pipeline_slice_pack1to4[0]->set_optimal_local_size_xyz(local_size_xyz);
157
        pipeline_slice_pack1to4[0]->create(LayerShaderType::slice_pack1to4, opt, specializations);
158
        pipeline_slice_pack1to4[1] = new Pipeline(vkdev);
159
        pipeline_slice_pack1to4[1]->set_optimal_local_size_xyz(local_size_xyz);
160
        pipeline_slice_pack1to4[1]->create(LayerShaderType::slice_pack1to4, opt, specializations);
161
    }
162

163
    // pack8
164
    if (opt.use_shader_pack8 && (shape.dims == 0 || out_elempack == 8))
165
    {
166
        pipeline_slice_pack8[0] = new Pipeline(vkdev);
167
        pipeline_slice_pack8[0]->set_optimal_local_size_xyz(local_size_xyz);
168
        pipeline_slice_pack8[0]->create(LayerShaderType::slice_pack8, opt, specializations);
169
        pipeline_slice_pack8[1] = new Pipeline(vkdev);
170
        pipeline_slice_pack8[1]->set_optimal_local_size_xyz(local_size_xyz);
171
        pipeline_slice_pack8[1]->create(LayerShaderType::slice_pack8, opt, specializations);
172
    }
173

174
    // pack1to8
175
    if (opt.use_shader_pack8 && ((positive_axis <= 0 && shape.dims == 0) || out_elempack == 1))
176
    {
177
        pipeline_slice_pack1to8[0] = new Pipeline(vkdev);
178
        pipeline_slice_pack1to8[0]->set_optimal_local_size_xyz(local_size_xyz);
179
        pipeline_slice_pack1to8[0]->create(LayerShaderType::slice_pack1to8, opt, specializations);
180
        pipeline_slice_pack1to8[1] = new Pipeline(vkdev);
181
        pipeline_slice_pack1to8[1]->set_optimal_local_size_xyz(local_size_xyz);
182
        pipeline_slice_pack1to8[1]->create(LayerShaderType::slice_pack1to8, opt, specializations);
183
    }
184

185
    // pack4to8
186
    if (opt.use_shader_pack8 && ((positive_axis <= 0 && shape.dims == 0) || out_elempack == 4))
187
    {
188
        pipeline_slice_pack4to8[0] = new Pipeline(vkdev);
189
        pipeline_slice_pack4to8[0]->set_optimal_local_size_xyz(local_size_xyz);
190
        pipeline_slice_pack4to8[0]->create(LayerShaderType::slice_pack4to8, opt, specializations);
191
        pipeline_slice_pack4to8[1] = new Pipeline(vkdev);
192
        pipeline_slice_pack4to8[1]->set_optimal_local_size_xyz(local_size_xyz);
193
        pipeline_slice_pack4to8[1]->create(LayerShaderType::slice_pack4to8, opt, specializations);
194
    }
195

196
    return 0;
197
}
198

199
int Slice_vulkan::destroy_pipeline(const Option& /*opt*/)
200
{
201
    delete pipeline_slice[0];
202
    delete pipeline_slice[1];
203
    pipeline_slice[0] = 0;
204
    pipeline_slice[1] = 0;
205

206
    delete pipeline_slice_pack4[0];
207
    delete pipeline_slice_pack4[1];
208
    pipeline_slice_pack4[0] = 0;
209
    pipeline_slice_pack4[1] = 0;
210

211
    delete pipeline_slice_pack1to4[0];
212
    delete pipeline_slice_pack1to4[1];
213
    pipeline_slice_pack1to4[0] = 0;
214
    pipeline_slice_pack1to4[1] = 0;
215

216
    delete pipeline_slice_pack8[0];
217
    delete pipeline_slice_pack8[1];
218
    pipeline_slice_pack8[0] = 0;
219
    pipeline_slice_pack8[1] = 0;
220

221
    delete pipeline_slice_pack1to8[0];
222
    delete pipeline_slice_pack1to8[1];
223
    pipeline_slice_pack1to8[0] = 0;
224
    pipeline_slice_pack1to8[1] = 0;
225

226
    delete pipeline_slice_pack4to8[0];
227
    delete pipeline_slice_pack4to8[1];
228
    pipeline_slice_pack4to8[0] = 0;
229
    pipeline_slice_pack4to8[1] = 0;
230

231
    return 0;
232
}
233

234
int Slice_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt) const
235
{
236
    const VkMat& bottom_blob = bottom_blobs[0];
237
    int dims = bottom_blob.dims;
238
    size_t elemsize = bottom_blob.elemsize;
239
    int elempack = bottom_blob.elempack;
240
    const int* slices_ptr = slices;
241
    const int* indices_ptr = indices;
242
    int positive_axis = axis < 0 ? dims + axis : axis;
243

244
    if (dims == 1) // positive_axis == 0
245
    {
246
        // slice vector
247
        int w = bottom_blob.w * elempack;
248
        int q = 0;
249
        for (size_t i = 0; i < top_blobs.size(); i++)
250
        {
251
            int slice;
252
            if (indices_ptr)
253
            {
254
                if (i == top_blobs.size() - 1)
255
                {
256
                    slice = w - q;
257
                }
258
                else
259
                {
260
                    int indice = indices_ptr[i];
261
                    int positive_indice = indice < 0 ? w + indice : indice;
262
                    slice = positive_indice - q;
263
                }
264
            }
265
            else
266
            {
267
                slice = slices_ptr[i];
268
                if (slice == -233)
269
                {
270
                    slice = static_cast<int>((w - q) / (top_blobs.size() - i));
271
                }
272
            }
273

274
            int out_elempack = opt.use_shader_pack8 && slice % 8 == 0 ? 8 : slice % 4 == 0 ? 4 : 1;
275
            size_t out_elemsize = elemsize / elempack * out_elempack;
276

277
            if (opt.use_fp16_packed && !opt.use_fp16_storage)
278
            {
279
                if (out_elempack == 8) out_elemsize = 8 * 2u;
280
                if (out_elempack == 4) out_elemsize = 4 * 2u;
281
                if (out_elempack == 1) out_elemsize = 4u;
282
            }
283

284
            VkMat& top_blob = top_blobs[i];
285
            top_blob.create(slice / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
286
            if (top_blob.empty())
287
                return -100;
288

289
            q += slice;
290
        }
291

292
        int out_elempack = top_blobs[0].elempack;
293
        for (size_t i = 0; i < top_blobs.size(); i++)
294
        {
295
            out_elempack = std::min(out_elempack, top_blobs[i].elempack);
296
        }
297

298
        VkMat bottom_blob_unpacked = bottom_blob;
299
        if (elempack > out_elempack)
300
        {
301
            vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, out_elempack, cmd, opt);
302
        }
303

304
        int woffset = 0;
305
        for (size_t i = 0; i < top_blobs.size(); i++)
306
        {
307
            VkMat& top_blob = top_blobs[i];
308

309
            std::vector<VkMat> bindings(2);
310
            bindings[0] = bottom_blob_unpacked;
311
            bindings[1] = top_blob;
312

313
            std::vector<vk_constant_type> constants(13);
314
            constants[0].i = bottom_blob_unpacked.dims;
315
            constants[1].i = bottom_blob_unpacked.w;
316
            constants[2].i = bottom_blob_unpacked.h;
317
            constants[3].i = bottom_blob_unpacked.d;
318
            constants[4].i = bottom_blob_unpacked.c;
319
            constants[5].i = bottom_blob_unpacked.cstep;
320
            constants[6].i = top_blob.dims;
321
            constants[7].i = top_blob.w;
322
            constants[8].i = top_blob.h;
323
            constants[9].i = top_blob.d;
324
            constants[10].i = top_blob.c;
325
            constants[11].i = top_blob.cstep;
326
            constants[12].i = woffset;
327

328
            const Pipeline* pipeline = 0;
329
            if (out_elempack == 1 && top_blob.elempack == 1)
330
            {
331
                pipeline = pipeline_slice[i % 2];
332
            }
333
            else if (out_elempack == 4 && top_blob.elempack == 4)
334
            {
335
                pipeline = pipeline_slice_pack4[i % 2];
336
            }
337
            else if (out_elempack == 1 && top_blob.elempack == 4)
338
            {
339
                pipeline = pipeline_slice_pack1to4[i % 2];
340
            }
341
            else if (out_elempack == 8 && top_blob.elempack == 8)
342
            {
343
                pipeline = pipeline_slice_pack8[i % 2];
344
            }
345
            else if (out_elempack == 1 && top_blob.elempack == 8)
346
            {
347
                pipeline = pipeline_slice_pack1to8[i % 2];
348
            }
349
            else if (out_elempack == 4 && top_blob.elempack == 8)
350
            {
351
                pipeline = pipeline_slice_pack4to8[i % 2];
352
            }
353

354
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
355

356
            woffset += top_blob.w * top_blob.elempack / out_elempack;
357
        }
358

359
        return 0;
360
    }
361

362
    if (dims == 2 && positive_axis == 0)
363
    {
364
        // slice image height
365
        int w = bottom_blob.w;
366
        int h = bottom_blob.h * elempack;
367

368
        int q = 0;
369
        for (size_t i = 0; i < top_blobs.size(); i++)
370
        {
371
            int slice;
372
            if (indices_ptr)
373
            {
374
                if (i == top_blobs.size() - 1)
375
                {
376
                    slice = h - q;
377
                }
378
                else
379
                {
380
                    int indice = indices_ptr[i];
381
                    int positive_indice = indice < 0 ? h + indice : indice;
382
                    slice = positive_indice - q;
383
                }
384
            }
385
            else
386
            {
387
                slice = slices_ptr[i];
388
                if (slice == -233)
389
                {
390
                    slice = static_cast<int>((h - q) / (top_blobs.size() - i));
391
                }
392
            }
393

394
            int out_elempack = opt.use_shader_pack8 && slice % 8 == 0 ? 8 : slice % 4 == 0 ? 4 : 1;
395
            size_t out_elemsize = elemsize / elempack * out_elempack;
396

397
            if (opt.use_fp16_packed && !opt.use_fp16_storage)
398
            {
399
                if (out_elempack == 8) out_elemsize = 8 * 2u;
400
                if (out_elempack == 4) out_elemsize = 4 * 2u;
401
                if (out_elempack == 1) out_elemsize = 4u;
402
            }
403

404
            VkMat& top_blob = top_blobs[i];
405
            top_blob.create(w, slice / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
406
            if (top_blob.empty())
407
                return -100;
408

409
            q += slice;
410
        }
411

412
        int out_elempack = top_blobs[0].elempack;
413
        for (size_t i = 0; i < top_blobs.size(); i++)
414
        {
415
            out_elempack = std::min(out_elempack, top_blobs[i].elempack);
416
        }
417

418
        VkMat bottom_blob_unpacked = bottom_blob;
419
        if (elempack > out_elempack)
420
        {
421
            vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, out_elempack, cmd, opt);
422
        }
423

424
        int hoffset = 0;
425
        for (size_t i = 0; i < top_blobs.size(); i++)
426
        {
427
            VkMat& top_blob = top_blobs[i];
428

429
            std::vector<VkMat> bindings(2);
430
            bindings[0] = bottom_blob_unpacked;
431
            bindings[1] = top_blob;
432

433
            std::vector<vk_constant_type> constants(13);
434
            constants[0].i = bottom_blob_unpacked.dims;
435
            constants[1].i = bottom_blob_unpacked.w;
436
            constants[2].i = bottom_blob_unpacked.h;
437
            constants[3].i = bottom_blob_unpacked.d;
438
            constants[4].i = bottom_blob_unpacked.c;
439
            constants[5].i = bottom_blob_unpacked.cstep;
440
            constants[6].i = top_blob.dims;
441
            constants[7].i = top_blob.w;
442
            constants[8].i = top_blob.h;
443
            constants[9].i = top_blob.d;
444
            constants[10].i = top_blob.c;
445
            constants[11].i = top_blob.cstep;
446
            constants[12].i = hoffset;
447

448
            const Pipeline* pipeline = 0;
449
            if (out_elempack == 1 && top_blob.elempack == 1)
450
            {
451
                pipeline = pipeline_slice[i % 2];
452
            }
453
            else if (out_elempack == 4 && top_blob.elempack == 4)
454
            {
455
                pipeline = pipeline_slice_pack4[i % 2];
456
            }
457
            else if (out_elempack == 1 && top_blob.elempack == 4)
458
            {
459
                pipeline = pipeline_slice_pack1to4[i % 2];
460
            }
461
            else if (out_elempack == 8 && top_blob.elempack == 8)
462
            {
463
                pipeline = pipeline_slice_pack8[i % 2];
464
            }
465
            else if (out_elempack == 1 && top_blob.elempack == 8)
466
            {
467
                pipeline = pipeline_slice_pack1to8[i % 2];
468
            }
469
            else if (out_elempack == 4 && top_blob.elempack == 8)
470
            {
471
                pipeline = pipeline_slice_pack4to8[i % 2];
472
            }
473

474
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
475

476
            hoffset += top_blob.h * top_blob.elempack / out_elempack;
477
        }
478

479
        return 0;
480
    }
481

482
    if (dims == 2 && positive_axis == 1)
483
    {
484
        // slice image width
485
        int w = bottom_blob.w;
486
        int h = bottom_blob.h;
487

488
        int q = 0;
489
        for (size_t i = 0; i < top_blobs.size(); i++)
490
        {
491
            int slice;
492
            if (indices_ptr)
493
            {
494
                if (i == top_blobs.size() - 1)
495
                {
496
                    slice = w - q;
497
                }
498
                else
499
                {
500
                    int indice = indices_ptr[i];
501
                    int positive_indice = indice < 0 ? w + indice : indice;
502
                    slice = positive_indice - q;
503
                }
504
            }
505
            else
506
            {
507
                slice = slices_ptr[i];
508
                if (slice == -233)
509
                {
510
                    slice = static_cast<int>((w - q) / (top_blobs.size() - i));
511
                }
512
            }
513

514
            VkMat& top_blob = top_blobs[i];
515
            top_blob.create(slice, h, elemsize, elempack, opt.blob_vkallocator);
516
            if (top_blob.empty())
517
                return -100;
518

519
            q += slice;
520
        }
521

522
        int woffset = 0;
523
        for (size_t i = 0; i < top_blobs.size(); i++)
524
        {
525
            VkMat& top_blob = top_blobs[i];
526

527
            std::vector<VkMat> bindings(2);
528
            bindings[0] = bottom_blob;
529
            bindings[1] = top_blob;
530

531
            std::vector<vk_constant_type> constants(13);
532
            constants[0].i = bottom_blob.dims;
533
            constants[1].i = bottom_blob.w;
534
            constants[2].i = bottom_blob.h;
535
            constants[3].i = bottom_blob.d;
536
            constants[4].i = bottom_blob.c;
537
            constants[5].i = bottom_blob.cstep;
538
            constants[6].i = top_blob.dims;
539
            constants[7].i = top_blob.w;
540
            constants[8].i = top_blob.h;
541
            constants[9].i = top_blob.d;
542
            constants[10].i = top_blob.c;
543
            constants[11].i = top_blob.cstep;
544
            constants[12].i = woffset;
545

546
            const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
547
                                       : elempack == 4 ? pipeline_slice_pack4[i % 2]
548
                                       : pipeline_slice[i % 2];
549

550
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
551

552
            woffset += top_blob.w;
553
        }
554

555
        return 0;
556
    }
557

558
    if (dims == 3 && positive_axis == 0)
559
    {
560
        // slice dim channel
561
        int w = bottom_blob.w;
562
        int h = bottom_blob.h;
563
        int channels = bottom_blob.c * elempack;
564

565
        int q = 0;
566
        for (size_t i = 0; i < top_blobs.size(); i++)
567
        {
568
            int slice;
569
            if (indices_ptr)
570
            {
571
                if (i == top_blobs.size() - 1)
572
                {
573
                    slice = channels - q;
574
                }
575
                else
576
                {
577
                    int indice = indices_ptr[i];
578
                    int positive_indice = indice < 0 ? channels + indice : indice;
579
                    slice = positive_indice - q;
580
                }
581
            }
582
            else
583
            {
584
                slice = slices_ptr[i];
585
                if (slice == -233)
586
                {
587
                    slice = static_cast<int>((channels - q) / (top_blobs.size() - i));
588
                }
589
            }
590

591
            int out_elempack = opt.use_shader_pack8 && slice % 8 == 0 ? 8 : slice % 4 == 0 ? 4 : 1;
592
            size_t out_elemsize = elemsize / elempack * out_elempack;
593

594
            if (opt.use_fp16_packed && !opt.use_fp16_storage)
595
            {
596
                if (out_elempack == 8) out_elemsize = 8 * 2u;
597
                if (out_elempack == 4) out_elemsize = 4 * 2u;
598
                if (out_elempack == 1) out_elemsize = 4u;
599
            }
600

601
            VkMat& top_blob = top_blobs[i];
602
            top_blob.create(w, h, slice / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
603
            if (top_blob.empty())
604
                return -100;
605

606
            q += slice;
607
        }
608

609
        int out_elempack = top_blobs[0].elempack;
610
        for (size_t i = 0; i < top_blobs.size(); i++)
611
        {
612
            out_elempack = std::min(out_elempack, top_blobs[i].elempack);
613
        }
614

615
        VkMat bottom_blob_unpacked = bottom_blob;
616
        if (elempack > out_elempack)
617
        {
618
            vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, out_elempack, cmd, opt);
619
        }
620

621
        int coffset = 0;
622
        for (size_t i = 0; i < top_blobs.size(); i++)
623
        {
624
            VkMat& top_blob = top_blobs[i];
625

626
            std::vector<VkMat> bindings(2);
627
            bindings[0] = bottom_blob_unpacked;
628
            bindings[1] = top_blob;
629

630
            std::vector<vk_constant_type> constants(13);
631
            constants[0].i = bottom_blob_unpacked.dims;
632
            constants[1].i = bottom_blob_unpacked.w;
633
            constants[2].i = bottom_blob_unpacked.h;
634
            constants[3].i = bottom_blob_unpacked.d;
635
            constants[4].i = bottom_blob_unpacked.c;
636
            constants[5].i = bottom_blob_unpacked.cstep;
637
            constants[6].i = top_blob.dims;
638
            constants[7].i = top_blob.w;
639
            constants[8].i = top_blob.h;
640
            constants[9].i = top_blob.d;
641
            constants[10].i = top_blob.c;
642
            constants[11].i = top_blob.cstep;
643
            constants[12].i = coffset;
644

645
            const Pipeline* pipeline = 0;
646
            if (out_elempack == 1 && top_blob.elempack == 1)
647
            {
648
                pipeline = pipeline_slice[i % 2];
649
            }
650
            else if (out_elempack == 4 && top_blob.elempack == 4)
651
            {
652
                pipeline = pipeline_slice_pack4[i % 2];
653
            }
654
            else if (out_elempack == 1 && top_blob.elempack == 4)
655
            {
656
                pipeline = pipeline_slice_pack1to4[i % 2];
657
            }
658
            else if (out_elempack == 8 && top_blob.elempack == 8)
659
            {
660
                pipeline = pipeline_slice_pack8[i % 2];
661
            }
662
            else if (out_elempack == 1 && top_blob.elempack == 8)
663
            {
664
                pipeline = pipeline_slice_pack1to8[i % 2];
665
            }
666
            else if (out_elempack == 4 && top_blob.elempack == 8)
667
            {
668
                pipeline = pipeline_slice_pack4to8[i % 2];
669
            }
670

671
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
672

673
            coffset += top_blob.c * top_blob.elempack / out_elempack;
674
        }
675

676
        return 0;
677
    }
678

679
    if (dims == 3 && positive_axis == 1)
680
    {
681
        // slice dim height
682
        int w = bottom_blob.w;
683
        int h = bottom_blob.h;
684
        int channels = bottom_blob.c;
685

686
        int q = 0;
687
        for (size_t i = 0; i < top_blobs.size(); i++)
688
        {
689
            int slice;
690
            if (indices_ptr)
691
            {
692
                if (i == top_blobs.size() - 1)
693
                {
694
                    slice = h - q;
695
                }
696
                else
697
                {
698
                    int indice = indices_ptr[i];
699
                    int positive_indice = indice < 0 ? h + indice : indice;
700
                    slice = positive_indice - q;
701
                }
702
            }
703
            else
704
            {
705
                slice = slices_ptr[i];
706
                if (slice == -233)
707
                {
708
                    slice = static_cast<int>((h - q) / (top_blobs.size() - i));
709
                }
710
            }
711

712
            VkMat& top_blob = top_blobs[i];
713
            top_blob.create(w, slice, channels, elemsize, elempack, opt.blob_vkallocator);
714
            if (top_blob.empty())
715
                return -100;
716

717
            q += slice;
718
        }
719

720
        int hoffset = 0;
721
        for (size_t i = 0; i < top_blobs.size(); i++)
722
        {
723
            VkMat& top_blob = top_blobs[i];
724

725
            std::vector<VkMat> bindings(2);
726
            bindings[0] = bottom_blob;
727
            bindings[1] = top_blob;
728

729
            std::vector<vk_constant_type> constants(13);
730
            constants[0].i = bottom_blob.dims;
731
            constants[1].i = bottom_blob.w;
732
            constants[2].i = bottom_blob.h;
733
            constants[3].i = bottom_blob.d;
734
            constants[4].i = bottom_blob.c;
735
            constants[5].i = bottom_blob.cstep;
736
            constants[6].i = top_blob.dims;
737
            constants[7].i = top_blob.w;
738
            constants[8].i = top_blob.h;
739
            constants[9].i = top_blob.d;
740
            constants[10].i = top_blob.c;
741
            constants[11].i = top_blob.cstep;
742
            constants[12].i = hoffset;
743

744
            const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
745
                                       : elempack == 4 ? pipeline_slice_pack4[i % 2]
746
                                       : pipeline_slice[i % 2];
747

748
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
749

750
            hoffset += top_blob.h;
751
        }
752

753
        return 0;
754
    }
755

756
    if (dims == 3 && positive_axis == 2)
757
    {
758
        // slice dim width
759
        int w = bottom_blob.w;
760
        int h = bottom_blob.h;
761
        int channels = bottom_blob.c;
762

763
        int q = 0;
764
        for (size_t i = 0; i < top_blobs.size(); i++)
765
        {
766
            int slice;
767
            if (indices_ptr)
768
            {
769
                if (i == top_blobs.size() - 1)
770
                {
771
                    slice = w - q;
772
                }
773
                else
774
                {
775
                    int indice = indices_ptr[i];
776
                    int positive_indice = indice < 0 ? w + indice : indice;
777
                    slice = positive_indice - q;
778
                }
779
            }
780
            else
781
            {
782
                slice = slices_ptr[i];
783
                if (slice == -233)
784
                {
785
                    slice = static_cast<int>((w - q) / (top_blobs.size() - i));
786
                }
787
            }
788

789
            VkMat& top_blob = top_blobs[i];
790
            top_blob.create(slice, h, channels, elemsize, elempack, opt.blob_vkallocator);
791
            if (top_blob.empty())
792
                return -100;
793

794
            q += slice;
795
        }
796

797
        int woffset = 0;
798
        for (size_t i = 0; i < top_blobs.size(); i++)
799
        {
800
            VkMat& top_blob = top_blobs[i];
801

802
            std::vector<VkMat> bindings(2);
803
            bindings[0] = bottom_blob;
804
            bindings[1] = top_blob;
805

806
            std::vector<vk_constant_type> constants(13);
807
            constants[0].i = bottom_blob.dims;
808
            constants[1].i = bottom_blob.w;
809
            constants[2].i = bottom_blob.h;
810
            constants[3].i = bottom_blob.d;
811
            constants[4].i = bottom_blob.c;
812
            constants[5].i = bottom_blob.cstep;
813
            constants[6].i = top_blob.dims;
814
            constants[7].i = top_blob.w;
815
            constants[8].i = top_blob.h;
816
            constants[9].i = top_blob.d;
817
            constants[10].i = top_blob.c;
818
            constants[11].i = top_blob.cstep;
819
            constants[12].i = woffset;
820

821
            const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
822
                                       : elempack == 4 ? pipeline_slice_pack4[i % 2]
823
                                       : pipeline_slice[i % 2];
824

825
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
826

827
            woffset += top_blob.w;
828
        }
829

830
        return 0;
831
    }
832

833
    if (dims == 4 && positive_axis == 0)
834
    {
835
        int w = bottom_blob.w;
836
        int h = bottom_blob.h;
837
        int d = bottom_blob.d;
838
        int channels = bottom_blob.c * elempack;
839

840
        int q = 0;
841
        for (size_t i = 0; i < top_blobs.size(); i++)
842
        {
843
            int slice;
844
            if (indices_ptr)
845
            {
846
                if (i == top_blobs.size() - 1)
847
                {
848
                    slice = channels - q;
849
                }
850
                else
851
                {
852
                    int indice = indices_ptr[i];
853
                    int positive_indice = indice < 0 ? channels + indice : indice;
854
                    slice = positive_indice - q;
855
                }
856
            }
857
            else
858
            {
859
                slice = slices_ptr[i];
860
                if (slice == -233)
861
                {
862
                    slice = static_cast<int>((channels - q) / (top_blobs.size() - i));
863
                }
864
            }
865

866
            int out_elempack = opt.use_shader_pack8 && slice % 8 == 0 ? 8 : slice % 4 == 0 ? 4 : 1;
867
            size_t out_elemsize = elemsize / elempack * out_elempack;
868

869
            if (opt.use_fp16_packed && !opt.use_fp16_storage)
870
            {
871
                if (out_elempack == 8) out_elemsize = 8 * 2u;
872
                if (out_elempack == 4) out_elemsize = 4 * 2u;
873
                if (out_elempack == 1) out_elemsize = 4u;
874
            }
875

876
            VkMat& top_blob = top_blobs[i];
877
            top_blob.create(w, h, d, slice / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
878
            if (top_blob.empty())
879
                return -100;
880

881
            q += slice;
882
        }
883

884
        int out_elempack = top_blobs[0].elempack;
885
        for (size_t i = 0; i < top_blobs.size(); i++)
886
        {
887
            out_elempack = std::min(out_elempack, top_blobs[i].elempack);
888
        }
889

890
        VkMat bottom_blob_unpacked = bottom_blob;
891
        if (elempack > out_elempack)
892
        {
893
            vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, out_elempack, cmd, opt);
894
        }
895

896
        int coffset = 0;
897
        for (size_t i = 0; i < top_blobs.size(); i++)
898
        {
899
            VkMat& top_blob = top_blobs[i];
900

901
            std::vector<VkMat> bindings(2);
902
            bindings[0] = bottom_blob_unpacked;
903
            bindings[1] = top_blob;
904

905
            std::vector<vk_constant_type> constants(13);
906
            constants[0].i = bottom_blob_unpacked.dims;
907
            constants[1].i = bottom_blob_unpacked.w;
908
            constants[2].i = bottom_blob_unpacked.h;
909
            constants[3].i = bottom_blob_unpacked.d;
910
            constants[4].i = bottom_blob_unpacked.c;
911
            constants[5].i = bottom_blob_unpacked.cstep;
912
            constants[6].i = top_blob.dims;
913
            constants[7].i = top_blob.w;
914
            constants[8].i = top_blob.h;
915
            constants[9].i = top_blob.d;
916
            constants[10].i = top_blob.c;
917
            constants[11].i = top_blob.cstep;
918
            constants[12].i = coffset;
919

920
            const Pipeline* pipeline = 0;
921
            if (out_elempack == 1 && top_blob.elempack == 1)
922
            {
923
                pipeline = pipeline_slice[i % 2];
924
            }
925
            else if (out_elempack == 4 && top_blob.elempack == 4)
926
            {
927
                pipeline = pipeline_slice_pack4[i % 2];
928
            }
929
            else if (out_elempack == 1 && top_blob.elempack == 4)
930
            {
931
                pipeline = pipeline_slice_pack1to4[i % 2];
932
            }
933
            else if (out_elempack == 8 && top_blob.elempack == 8)
934
            {
935
                pipeline = pipeline_slice_pack8[i % 2];
936
            }
937
            else if (out_elempack == 1 && top_blob.elempack == 8)
938
            {
939
                pipeline = pipeline_slice_pack1to8[i % 2];
940
            }
941
            else if (out_elempack == 4 && top_blob.elempack == 8)
942
            {
943
                pipeline = pipeline_slice_pack4to8[i % 2];
944
            }
945

946
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
947

948
            coffset += top_blob.c * top_blob.elempack / out_elempack;
949
        }
950

951
        return 0;
952
    }
953

954
    if (dims == 4 && positive_axis == 1)
955
    {
956
        int w = bottom_blob.w;
957
        int h = bottom_blob.h;
958
        int d = bottom_blob.d;
959
        int channels = bottom_blob.c;
960

961
        int q = 0;
962
        for (size_t i = 0; i < top_blobs.size(); i++)
963
        {
964
            int slice;
965
            if (indices_ptr)
966
            {
967
                if (i == top_blobs.size() - 1)
968
                {
969
                    slice = d - q;
970
                }
971
                else
972
                {
973
                    int indice = indices_ptr[i];
974
                    int positive_indice = indice < 0 ? d + indice : indice;
975
                    slice = positive_indice - q;
976
                }
977
            }
978
            else
979
            {
980
                slice = slices_ptr[i];
981
                if (slice == -233)
982
                {
983
                    slice = static_cast<int>((d - q) / (top_blobs.size() - i));
984
                }
985
            }
986

987
            VkMat& top_blob = top_blobs[i];
988
            top_blob.create(w, h, slice, channels, elemsize, elempack, opt.blob_vkallocator);
989
            if (top_blob.empty())
990
                return -100;
991

992
            q += slice;
993
        }
994

995
        int doffset = 0;
996
        for (size_t i = 0; i < top_blobs.size(); i++)
997
        {
998
            VkMat& top_blob = top_blobs[i];
999

1000
            std::vector<VkMat> bindings(2);
1001
            bindings[0] = bottom_blob;
1002
            bindings[1] = top_blob;
1003

1004
            std::vector<vk_constant_type> constants(13);
1005
            constants[0].i = bottom_blob.dims;
1006
            constants[1].i = bottom_blob.w;
1007
            constants[2].i = bottom_blob.h;
1008
            constants[3].i = bottom_blob.d;
1009
            constants[4].i = bottom_blob.c;
1010
            constants[5].i = bottom_blob.cstep;
1011
            constants[6].i = top_blob.dims;
1012
            constants[7].i = top_blob.w;
1013
            constants[8].i = top_blob.h;
1014
            constants[9].i = top_blob.d;
1015
            constants[10].i = top_blob.c;
1016
            constants[11].i = top_blob.cstep;
1017
            constants[12].i = doffset;
1018

1019
            const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
1020
                                       : elempack == 4 ? pipeline_slice_pack4[i % 2]
1021
                                       : pipeline_slice[i % 2];
1022

1023
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1024

1025
            doffset += top_blob.d;
1026
        }
1027

1028
        return 0;
1029
    }
1030

1031
    if (dims == 4 && positive_axis == 2)
1032
    {
1033
        int w = bottom_blob.w;
1034
        int h = bottom_blob.h;
1035
        int d = bottom_blob.d;
1036
        int channels = bottom_blob.c;
1037

1038
        int q = 0;
1039
        for (size_t i = 0; i < top_blobs.size(); i++)
1040
        {
1041
            int slice;
1042
            if (indices_ptr)
1043
            {
1044
                if (i == top_blobs.size() - 1)
1045
                {
1046
                    slice = h - q;
1047
                }
1048
                else
1049
                {
1050
                    int indice = indices_ptr[i];
1051
                    int positive_indice = indice < 0 ? h + indice : indice;
1052
                    slice = positive_indice - q;
1053
                }
1054
            }
1055
            else
1056
            {
1057
                slice = slices_ptr[i];
1058
                if (slice == -233)
1059
                {
1060
                    slice = static_cast<int>((h - q) / (top_blobs.size() - i));
1061
                }
1062
            }
1063

1064
            VkMat& top_blob = top_blobs[i];
1065
            top_blob.create(w, slice, d, channels, elemsize, elempack, opt.blob_vkallocator);
1066
            if (top_blob.empty())
1067
                return -100;
1068

1069
            q += slice;
1070
        }
1071

1072
        int hoffset = 0;
1073
        for (size_t i = 0; i < top_blobs.size(); i++)
1074
        {
1075
            VkMat& top_blob = top_blobs[i];
1076

1077
            std::vector<VkMat> bindings(2);
1078
            bindings[0] = bottom_blob;
1079
            bindings[1] = top_blob;
1080

1081
            std::vector<vk_constant_type> constants(13);
1082
            constants[0].i = bottom_blob.dims;
1083
            constants[1].i = bottom_blob.w;
1084
            constants[2].i = bottom_blob.h;
1085
            constants[3].i = bottom_blob.d;
1086
            constants[4].i = bottom_blob.c;
1087
            constants[5].i = bottom_blob.cstep;
1088
            constants[6].i = top_blob.dims;
1089
            constants[7].i = top_blob.w;
1090
            constants[8].i = top_blob.h;
1091
            constants[9].i = top_blob.d;
1092
            constants[10].i = top_blob.c;
1093
            constants[11].i = top_blob.cstep;
1094
            constants[12].i = hoffset;
1095

1096
            const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
1097
                                       : elempack == 4 ? pipeline_slice_pack4[i % 2]
1098
                                       : pipeline_slice[i % 2];
1099

1100
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1101

1102
            hoffset += top_blob.h;
1103
        }
1104

1105
        return 0;
1106
    }
1107

1108
    if (dims == 4 && positive_axis == 3)
1109
    {
1110
        int w = bottom_blob.w;
1111
        int h = bottom_blob.h;
1112
        int d = bottom_blob.d;
1113
        int channels = bottom_blob.c;
1114

1115
        int q = 0;
1116
        for (size_t i = 0; i < top_blobs.size(); i++)
1117
        {
1118
            int slice;
1119
            if (indices_ptr)
1120
            {
1121
                if (i == top_blobs.size() - 1)
1122
                {
1123
                    slice = w - q;
1124
                }
1125
                else
1126
                {
1127
                    int indice = indices_ptr[i];
1128
                    int positive_indice = indice < 0 ? w + indice : indice;
1129
                    slice = positive_indice - q;
1130
                }
1131
            }
1132
            else
1133
            {
1134
                slice = slices_ptr[i];
1135
                if (slice == -233)
1136
                {
1137
                    slice = static_cast<int>((w - q) / (top_blobs.size() - i));
1138
                }
1139
            }
1140

1141
            VkMat& top_blob = top_blobs[i];
1142
            top_blob.create(slice, h, d, channels, elemsize, elempack, opt.blob_vkallocator);
1143
            if (top_blob.empty())
1144
                return -100;
1145

1146
            q += slice;
1147
        }
1148

1149
        int woffset = 0;
1150
        for (size_t i = 0; i < top_blobs.size(); i++)
1151
        {
1152
            VkMat& top_blob = top_blobs[i];
1153

1154
            std::vector<VkMat> bindings(2);
1155
            bindings[0] = bottom_blob;
1156
            bindings[1] = top_blob;
1157

1158
            std::vector<vk_constant_type> constants(13);
1159
            constants[0].i = bottom_blob.dims;
1160
            constants[1].i = bottom_blob.w;
1161
            constants[2].i = bottom_blob.h;
1162
            constants[3].i = bottom_blob.d;
1163
            constants[4].i = bottom_blob.c;
1164
            constants[5].i = bottom_blob.cstep;
1165
            constants[6].i = top_blob.dims;
1166
            constants[7].i = top_blob.w;
1167
            constants[8].i = top_blob.h;
1168
            constants[9].i = top_blob.d;
1169
            constants[10].i = top_blob.c;
1170
            constants[11].i = top_blob.cstep;
1171
            constants[12].i = woffset;
1172

1173
            const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
1174
                                       : elempack == 4 ? pipeline_slice_pack4[i % 2]
1175
                                       : pipeline_slice[i % 2];
1176

1177
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1178

1179
            woffset += top_blob.w;
1180
        }
1181

1182
        return 0;
1183
    }
1184

1185
    return 0;
1186
}
1187

1188
int Slice_vulkan::forward(const std::vector<VkImageMat>& bottom_blobs, std::vector<VkImageMat>& top_blobs, VkCompute& cmd, const Option& opt) const
1189
{
1190
    const VkImageMat& bottom_blob = bottom_blobs[0];
1191
    int dims = bottom_blob.dims;
1192
    size_t elemsize = bottom_blob.elemsize;
1193
    int elempack = bottom_blob.elempack;
1194
    const int* slices_ptr = slices;
1195
    const int* indices_ptr = indices;
1196
    int positive_axis = axis < 0 ? dims + axis : axis;
1197

1198
    if (dims == 1) // positive_axis == 0
1199
    {
1200
        // slice vector
1201
        int w = bottom_blob.w * elempack;
1202
        int q = 0;
1203
        for (size_t i = 0; i < top_blobs.size(); i++)
1204
        {
1205
            int slice;
1206
            if (indices_ptr)
1207
            {
1208
                if (i == top_blobs.size() - 1)
1209
                {
1210
                    slice = w - q;
1211
                }
1212
                else
1213
                {
1214
                    int indice = indices_ptr[i];
1215
                    int positive_indice = indice < 0 ? w + indice : indice;
1216
                    slice = positive_indice - q;
1217
                }
1218
            }
1219
            else
1220
            {
1221
                slice = slices_ptr[i];
1222
                if (slice == -233)
1223
                {
1224
                    slice = static_cast<int>((w - q) / (top_blobs.size() - i));
1225
                }
1226
            }
1227

1228
            int out_elempack = opt.use_shader_pack8 && slice % 8 == 0 ? 8 : slice % 4 == 0 ? 4 : 1;
1229
            size_t out_elemsize = elemsize / elempack * out_elempack;
1230

1231
            if (opt.use_fp16_packed && !opt.use_fp16_storage)
1232
            {
1233
                if (out_elempack == 8) out_elemsize = 8 * 2u;
1234
                if (out_elempack == 4) out_elemsize = 4 * 2u;
1235
                if (out_elempack == 1) out_elemsize = 4u;
1236
            }
1237

1238
            VkImageMat& top_blob = top_blobs[i];
1239
            top_blob.create(slice / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
1240
            if (top_blob.empty())
1241
                return -100;
1242

1243
            q += slice;
1244
        }
1245

1246
        int out_elempack = top_blobs[0].elempack;
1247
        for (size_t i = 0; i < top_blobs.size(); i++)
1248
        {
1249
            out_elempack = std::min(out_elempack, top_blobs[i].elempack);
1250
        }
1251

1252
        VkImageMat bottom_blob_unpacked = bottom_blob;
1253
        if (elempack > out_elempack)
1254
        {
1255
            vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, out_elempack, cmd, opt);
1256
        }
1257

1258
        int woffset = 0;
1259
        for (size_t i = 0; i < top_blobs.size(); i++)
1260
        {
1261
            VkImageMat& top_blob = top_blobs[i];
1262

1263
            std::vector<VkImageMat> bindings(2);
1264
            bindings[0] = bottom_blob_unpacked;
1265
            bindings[1] = top_blob;
1266

1267
            std::vector<vk_constant_type> constants(13);
1268
            constants[0].i = bottom_blob_unpacked.dims;
1269
            constants[1].i = bottom_blob_unpacked.w;
1270
            constants[2].i = bottom_blob_unpacked.h;
1271
            constants[3].i = bottom_blob_unpacked.d;
1272
            constants[4].i = bottom_blob_unpacked.c;
1273
            constants[5].i = 0; //bottom_blob_unpacked.cstep;
1274
            constants[6].i = top_blob.dims;
1275
            constants[7].i = top_blob.w;
1276
            constants[8].i = top_blob.h;
1277
            constants[9].i = top_blob.d;
1278
            constants[10].i = top_blob.c;
1279
            constants[11].i = 0; //top_blob.cstep;
1280
            constants[12].i = woffset;
1281

1282
            const Pipeline* pipeline = 0;
1283
            if (out_elempack == 1 && top_blob.elempack == 1)
1284
            {
1285
                pipeline = pipeline_slice[i % 2];
1286
            }
1287
            else if (out_elempack == 4 && top_blob.elempack == 4)
1288
            {
1289
                pipeline = pipeline_slice_pack4[i % 2];
1290
            }
1291
            else if (out_elempack == 1 && top_blob.elempack == 4)
1292
            {
1293
                pipeline = pipeline_slice_pack1to4[i % 2];
1294
            }
1295
            else if (out_elempack == 8 && top_blob.elempack == 8)
1296
            {
1297
                pipeline = pipeline_slice_pack8[i % 2];
1298
            }
1299
            else if (out_elempack == 1 && top_blob.elempack == 8)
1300
            {
1301
                pipeline = pipeline_slice_pack1to8[i % 2];
1302
            }
1303
            else if (out_elempack == 4 && top_blob.elempack == 8)
1304
            {
1305
                pipeline = pipeline_slice_pack4to8[i % 2];
1306
            }
1307

1308
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1309

1310
            woffset += top_blob.w * top_blob.elempack / out_elempack;
1311
        }
1312

1313
        return 0;
1314
    }
1315

1316
    if (dims == 2 && positive_axis == 0)
1317
    {
1318
        // slice image height
1319
        int w = bottom_blob.w;
1320
        int h = bottom_blob.h * elempack;
1321

1322
        int q = 0;
1323
        for (size_t i = 0; i < top_blobs.size(); i++)
1324
        {
1325
            int slice;
1326
            if (indices_ptr)
1327
            {
1328
                if (i == top_blobs.size() - 1)
1329
                {
1330
                    slice = h - q;
1331
                }
1332
                else
1333
                {
1334
                    int indice = indices_ptr[i];
1335
                    int positive_indice = indice < 0 ? h + indice : indice;
1336
                    slice = positive_indice - q;
1337
                }
1338
            }
1339
            else
1340
            {
1341
                slice = slices_ptr[i];
1342
                if (slice == -233)
1343
                {
1344
                    slice = static_cast<int>((h - q) / (top_blobs.size() - i));
1345
                }
1346
            }
1347

1348
            int out_elempack = opt.use_shader_pack8 && slice % 8 == 0 ? 8 : slice % 4 == 0 ? 4 : 1;
1349
            size_t out_elemsize = elemsize / elempack * out_elempack;
1350

1351
            if (opt.use_fp16_packed && !opt.use_fp16_storage)
1352
            {
1353
                if (out_elempack == 8) out_elemsize = 8 * 2u;
1354
                if (out_elempack == 4) out_elemsize = 4 * 2u;
1355
                if (out_elempack == 1) out_elemsize = 4u;
1356
            }
1357

1358
            VkImageMat& top_blob = top_blobs[i];
1359
            top_blob.create(w, slice / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
1360
            if (top_blob.empty())
1361
                return -100;
1362

1363
            q += slice;
1364
        }
1365

1366
        int out_elempack = top_blobs[0].elempack;
1367
        for (size_t i = 0; i < top_blobs.size(); i++)
1368
        {
1369
            out_elempack = std::min(out_elempack, top_blobs[i].elempack);
1370
        }
1371

1372
        VkImageMat bottom_blob_unpacked = bottom_blob;
1373
        if (elempack > out_elempack)
1374
        {
1375
            vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, out_elempack, cmd, opt);
1376
        }
1377

1378
        int hoffset = 0;
1379
        for (size_t i = 0; i < top_blobs.size(); i++)
1380
        {
1381
            VkImageMat& top_blob = top_blobs[i];
1382

1383
            std::vector<VkImageMat> bindings(2);
1384
            bindings[0] = bottom_blob_unpacked;
1385
            bindings[1] = top_blob;
1386

1387
            std::vector<vk_constant_type> constants(13);
1388
            constants[0].i = bottom_blob_unpacked.dims;
1389
            constants[1].i = bottom_blob_unpacked.w;
1390
            constants[2].i = bottom_blob_unpacked.h;
1391
            constants[3].i = bottom_blob_unpacked.d;
1392
            constants[4].i = bottom_blob_unpacked.c;
1393
            constants[5].i = 0; //bottom_blob_unpacked.cstep;
1394
            constants[6].i = top_blob.dims;
1395
            constants[7].i = top_blob.w;
1396
            constants[8].i = top_blob.h;
1397
            constants[9].i = top_blob.d;
1398
            constants[10].i = top_blob.c;
1399
            constants[11].i = 0; //top_blob.cstep;
1400
            constants[12].i = hoffset;
1401

1402
            const Pipeline* pipeline = 0;
1403
            if (out_elempack == 1 && top_blob.elempack == 1)
1404
            {
1405
                pipeline = pipeline_slice[i % 2];
1406
            }
1407
            else if (out_elempack == 4 && top_blob.elempack == 4)
1408
            {
1409
                pipeline = pipeline_slice_pack4[i % 2];
1410
            }
1411
            else if (out_elempack == 1 && top_blob.elempack == 4)
1412
            {
1413
                pipeline = pipeline_slice_pack1to4[i % 2];
1414
            }
1415
            else if (out_elempack == 8 && top_blob.elempack == 8)
1416
            {
1417
                pipeline = pipeline_slice_pack8[i % 2];
1418
            }
1419
            else if (out_elempack == 1 && top_blob.elempack == 8)
1420
            {
1421
                pipeline = pipeline_slice_pack1to8[i % 2];
1422
            }
1423
            else if (out_elempack == 4 && top_blob.elempack == 8)
1424
            {
1425
                pipeline = pipeline_slice_pack4to8[i % 2];
1426
            }
1427

1428
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1429

1430
            hoffset += top_blob.h * top_blob.elempack / out_elempack;
1431
        }
1432

1433
        return 0;
1434
    }
1435

1436
    if (dims == 2 && positive_axis == 1)
1437
    {
1438
        // slice image width
1439
        int w = bottom_blob.w;
1440
        int h = bottom_blob.h;
1441

1442
        int q = 0;
1443
        for (size_t i = 0; i < top_blobs.size(); i++)
1444
        {
1445
            int slice;
1446
            if (indices_ptr)
1447
            {
1448
                if (i == top_blobs.size() - 1)
1449
                {
1450
                    slice = w - q;
1451
                }
1452
                else
1453
                {
1454
                    int indice = indices_ptr[i];
1455
                    int positive_indice = indice < 0 ? w + indice : indice;
1456
                    slice = positive_indice - q;
1457
                }
1458
            }
1459
            else
1460
            {
1461
                slice = slices_ptr[i];
1462
                if (slice == -233)
1463
                {
1464
                    slice = static_cast<int>((w - q) / (top_blobs.size() - i));
1465
                }
1466
            }
1467

1468
            VkImageMat& top_blob = top_blobs[i];
1469
            top_blob.create(slice, h, elemsize, elempack, opt.blob_vkallocator);
1470
            if (top_blob.empty())
1471
                return -100;
1472

1473
            q += slice;
1474
        }
1475

1476
        int woffset = 0;
1477
        for (size_t i = 0; i < top_blobs.size(); i++)
1478
        {
1479
            VkImageMat& top_blob = top_blobs[i];
1480

1481
            std::vector<VkImageMat> bindings(2);
1482
            bindings[0] = bottom_blob;
1483
            bindings[1] = top_blob;
1484

1485
            std::vector<vk_constant_type> constants(13);
1486
            constants[0].i = bottom_blob.dims;
1487
            constants[1].i = bottom_blob.w;
1488
            constants[2].i = bottom_blob.h;
1489
            constants[3].i = bottom_blob.d;
1490
            constants[4].i = bottom_blob.c;
1491
            constants[5].i = 0; //bottom_blob.cstep;
1492
            constants[6].i = top_blob.dims;
1493
            constants[7].i = top_blob.w;
1494
            constants[8].i = top_blob.h;
1495
            constants[9].i = top_blob.d;
1496
            constants[10].i = top_blob.c;
1497
            constants[11].i = 0; //top_blob.cstep;
1498
            constants[12].i = woffset;
1499

1500
            const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
1501
                                       : elempack == 4 ? pipeline_slice_pack4[i % 2]
1502
                                       : pipeline_slice[i % 2];
1503

1504
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1505

1506
            woffset += top_blob.w;
1507
        }
1508

1509
        return 0;
1510
    }
1511

1512
    if (dims == 3 && positive_axis == 0)
1513
    {
1514
        // slice dim channel
1515
        int w = bottom_blob.w;
1516
        int h = bottom_blob.h;
1517
        int channels = bottom_blob.c * elempack;
1518

1519
        int q = 0;
1520
        for (size_t i = 0; i < top_blobs.size(); i++)
1521
        {
1522
            int slice;
1523
            if (indices_ptr)
1524
            {
1525
                if (i == top_blobs.size() - 1)
1526
                {
1527
                    slice = channels - q;
1528
                }
1529
                else
1530
                {
1531
                    int indice = indices_ptr[i];
1532
                    int positive_indice = indice < 0 ? channels + indice : indice;
1533
                    slice = positive_indice - q;
1534
                }
1535
            }
1536
            else
1537
            {
1538
                slice = slices_ptr[i];
1539
                if (slice == -233)
1540
                {
1541
                    slice = static_cast<int>((channels - q) / (top_blobs.size() - i));
1542
                }
1543
            }
1544

1545
            int out_elempack = opt.use_shader_pack8 && slice % 8 == 0 ? 8 : slice % 4 == 0 ? 4 : 1;
1546
            size_t out_elemsize = elemsize / elempack * out_elempack;
1547

1548
            if (opt.use_fp16_packed && !opt.use_fp16_storage)
1549
            {
1550
                if (out_elempack == 8) out_elemsize = 8 * 2u;
1551
                if (out_elempack == 4) out_elemsize = 4 * 2u;
1552
                if (out_elempack == 1) out_elemsize = 4u;
1553
            }
1554

1555
            VkImageMat& top_blob = top_blobs[i];
1556
            top_blob.create(w, h, slice / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
1557
            if (top_blob.empty())
1558
                return -100;
1559

1560
            q += slice;
1561
        }
1562

1563
        int out_elempack = top_blobs[0].elempack;
1564
        for (size_t i = 0; i < top_blobs.size(); i++)
1565
        {
1566
            out_elempack = std::min(out_elempack, top_blobs[i].elempack);
1567
        }
1568

1569
        VkImageMat bottom_blob_unpacked = bottom_blob;
1570
        if (elempack > out_elempack)
1571
        {
1572
            vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, out_elempack, cmd, opt);
1573
        }
1574

1575
        int coffset = 0;
1576
        for (size_t i = 0; i < top_blobs.size(); i++)
1577
        {
1578
            VkImageMat& top_blob = top_blobs[i];
1579

1580
            std::vector<VkImageMat> bindings(2);
1581
            bindings[0] = bottom_blob_unpacked;
1582
            bindings[1] = top_blob;
1583

1584
            std::vector<vk_constant_type> constants(13);
1585
            constants[0].i = bottom_blob_unpacked.dims;
1586
            constants[1].i = bottom_blob_unpacked.w;
1587
            constants[2].i = bottom_blob_unpacked.h;
1588
            constants[3].i = bottom_blob_unpacked.d;
1589
            constants[4].i = bottom_blob_unpacked.c;
1590
            constants[5].i = 0; //bottom_blob_unpacked.cstep;
1591
            constants[6].i = top_blob.dims;
1592
            constants[7].i = top_blob.w;
1593
            constants[8].i = top_blob.h;
1594
            constants[9].i = top_blob.d;
1595
            constants[10].i = top_blob.c;
1596
            constants[11].i = 0; //top_blob.cstep;
1597
            constants[12].i = coffset;
1598

1599
            const Pipeline* pipeline = 0;
1600
            if (out_elempack == 1 && top_blob.elempack == 1)
1601
            {
1602
                pipeline = pipeline_slice[i % 2];
1603
            }
1604
            else if (out_elempack == 4 && top_blob.elempack == 4)
1605
            {
1606
                pipeline = pipeline_slice_pack4[i % 2];
1607
            }
1608
            else if (out_elempack == 1 && top_blob.elempack == 4)
1609
            {
1610
                pipeline = pipeline_slice_pack1to4[i % 2];
1611
            }
1612
            else if (out_elempack == 8 && top_blob.elempack == 8)
1613
            {
1614
                pipeline = pipeline_slice_pack8[i % 2];
1615
            }
1616
            else if (out_elempack == 1 && top_blob.elempack == 8)
1617
            {
1618
                pipeline = pipeline_slice_pack1to8[i % 2];
1619
            }
1620
            else if (out_elempack == 4 && top_blob.elempack == 8)
1621
            {
1622
                pipeline = pipeline_slice_pack4to8[i % 2];
1623
            }
1624

1625
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1626

1627
            coffset += top_blob.c * top_blob.elempack / out_elempack;
1628
        }
1629

1630
        return 0;
1631
    }
1632

1633
    if (dims == 3 && positive_axis == 1)
1634
    {
1635
        // slice dim height
1636
        int w = bottom_blob.w;
1637
        int h = bottom_blob.h;
1638
        int channels = bottom_blob.c;
1639

1640
        int q = 0;
1641
        for (size_t i = 0; i < top_blobs.size(); i++)
1642
        {
1643
            int slice;
1644
            if (indices_ptr)
1645
            {
1646
                if (i == top_blobs.size() - 1)
1647
                {
1648
                    slice = h - q;
1649
                }
1650
                else
1651
                {
1652
                    int indice = indices_ptr[i];
1653
                    int positive_indice = indice < 0 ? h + indice : indice;
1654
                    slice = positive_indice - q;
1655
                }
1656
            }
1657
            else
1658
            {
1659
                slice = slices_ptr[i];
1660
                if (slice == -233)
1661
                {
1662
                    slice = static_cast<int>((h - q) / (top_blobs.size() - i));
1663
                }
1664
            }
1665

1666
            VkImageMat& top_blob = top_blobs[i];
1667
            top_blob.create(w, slice, channels, elemsize, elempack, opt.blob_vkallocator);
1668
            if (top_blob.empty())
1669
                return -100;
1670

1671
            q += slice;
1672
        }
1673

1674
        int hoffset = 0;
1675
        for (size_t i = 0; i < top_blobs.size(); i++)
1676
        {
1677
            VkImageMat& top_blob = top_blobs[i];
1678

1679
            std::vector<VkImageMat> bindings(2);
1680
            bindings[0] = bottom_blob;
1681
            bindings[1] = top_blob;
1682

1683
            std::vector<vk_constant_type> constants(13);
1684
            constants[0].i = bottom_blob.dims;
1685
            constants[1].i = bottom_blob.w;
1686
            constants[2].i = bottom_blob.h;
1687
            constants[3].i = bottom_blob.d;
1688
            constants[4].i = bottom_blob.c;
1689
            constants[5].i = 0; //bottom_blob.cstep;
1690
            constants[6].i = top_blob.dims;
1691
            constants[7].i = top_blob.w;
1692
            constants[8].i = top_blob.h;
1693
            constants[9].i = top_blob.d;
1694
            constants[10].i = top_blob.c;
1695
            constants[11].i = 0; //top_blob.cstep;
1696
            constants[12].i = hoffset;
1697

1698
            const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
1699
                                       : elempack == 4 ? pipeline_slice_pack4[i % 2]
1700
                                       : pipeline_slice[i % 2];
1701

1702
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1703

1704
            hoffset += top_blob.h;
1705
        }
1706

1707
        return 0;
1708
    }
1709

1710
    if (dims == 3 && positive_axis == 2)
1711
    {
1712
        // slice dim width
1713
        int w = bottom_blob.w;
1714
        int h = bottom_blob.h;
1715
        int channels = bottom_blob.c;
1716

1717
        int q = 0;
1718
        for (size_t i = 0; i < top_blobs.size(); i++)
1719
        {
1720
            int slice;
1721
            if (indices_ptr)
1722
            {
1723
                if (i == top_blobs.size() - 1)
1724
                {
1725
                    slice = w - q;
1726
                }
1727
                else
1728
                {
1729
                    int indice = indices_ptr[i];
1730
                    int positive_indice = indice < 0 ? w + indice : indice;
1731
                    slice = positive_indice - q;
1732
                }
1733
            }
1734
            else
1735
            {
1736
                slice = slices_ptr[i];
1737
                if (slice == -233)
1738
                {
1739
                    slice = static_cast<int>((w - q) / (top_blobs.size() - i));
1740
                }
1741
            }
1742

1743
            VkImageMat& top_blob = top_blobs[i];
1744
            top_blob.create(slice, h, channels, elemsize, elempack, opt.blob_vkallocator);
1745
            if (top_blob.empty())
1746
                return -100;
1747

1748
            q += slice;
1749
        }
1750

1751
        int woffset = 0;
1752
        for (size_t i = 0; i < top_blobs.size(); i++)
1753
        {
1754
            VkImageMat& top_blob = top_blobs[i];
1755

1756
            std::vector<VkImageMat> bindings(2);
1757
            bindings[0] = bottom_blob;
1758
            bindings[1] = top_blob;
1759

1760
            std::vector<vk_constant_type> constants(13);
1761
            constants[0].i = bottom_blob.dims;
1762
            constants[1].i = bottom_blob.w;
1763
            constants[2].i = bottom_blob.h;
1764
            constants[3].i = bottom_blob.d;
1765
            constants[4].i = bottom_blob.c;
1766
            constants[5].i = 0; //bottom_blob.cstep;
1767
            constants[6].i = top_blob.dims;
1768
            constants[7].i = top_blob.w;
1769
            constants[8].i = top_blob.h;
1770
            constants[9].i = top_blob.d;
1771
            constants[10].i = top_blob.c;
1772
            constants[11].i = 0; //top_blob.cstep;
1773
            constants[12].i = woffset;
1774

1775
            const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
1776
                                       : elempack == 4 ? pipeline_slice_pack4[i % 2]
1777
                                       : pipeline_slice[i % 2];
1778

1779
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1780

1781
            woffset += top_blob.w;
1782
        }
1783

1784
        return 0;
1785
    }
1786

1787
    if (dims == 4 && positive_axis == 0)
1788
    {
1789
        int w = bottom_blob.w;
1790
        int h = bottom_blob.h;
1791
        int d = bottom_blob.d;
1792
        int channels = bottom_blob.c * elempack;
1793

1794
        int q = 0;
1795
        for (size_t i = 0; i < top_blobs.size(); i++)
1796
        {
1797
            int slice;
1798
            if (indices_ptr)
1799
            {
1800
                if (i == top_blobs.size() - 1)
1801
                {
1802
                    slice = channels - q;
1803
                }
1804
                else
1805
                {
1806
                    int indice = indices_ptr[i];
1807
                    int positive_indice = indice < 0 ? channels + indice : indice;
1808
                    slice = positive_indice - q;
1809
                }
1810
            }
1811
            else
1812
            {
1813
                slice = slices_ptr[i];
1814
                if (slice == -233)
1815
                {
1816
                    slice = static_cast<int>((channels - q) / (top_blobs.size() - i));
1817
                }
1818
            }
1819

1820
            int out_elempack = opt.use_shader_pack8 && slice % 8 == 0 ? 8 : slice % 4 == 0 ? 4 : 1;
1821
            size_t out_elemsize = elemsize / elempack * out_elempack;
1822

1823
            if (opt.use_fp16_packed && !opt.use_fp16_storage)
1824
            {
1825
                if (out_elempack == 8) out_elemsize = 8 * 2u;
1826
                if (out_elempack == 4) out_elemsize = 4 * 2u;
1827
                if (out_elempack == 1) out_elemsize = 4u;
1828
            }
1829

1830
            VkImageMat& top_blob = top_blobs[i];
1831
            top_blob.create(w, h, d, slice / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
1832
            if (top_blob.empty())
1833
                return -100;
1834

1835
            q += slice;
1836
        }
1837

1838
        int out_elempack = top_blobs[0].elempack;
1839
        for (size_t i = 0; i < top_blobs.size(); i++)
1840
        {
1841
            out_elempack = std::min(out_elempack, top_blobs[i].elempack);
1842
        }
1843

1844
        VkImageMat bottom_blob_unpacked = bottom_blob;
1845
        if (elempack > out_elempack)
1846
        {
1847
            vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, out_elempack, cmd, opt);
1848
        }
1849

1850
        int coffset = 0;
1851
        for (size_t i = 0; i < top_blobs.size(); i++)
1852
        {
1853
            VkImageMat& top_blob = top_blobs[i];
1854

1855
            std::vector<VkImageMat> bindings(2);
1856
            bindings[0] = bottom_blob_unpacked;
1857
            bindings[1] = top_blob;
1858

1859
            std::vector<vk_constant_type> constants(13);
1860
            constants[0].i = bottom_blob_unpacked.dims;
1861
            constants[1].i = bottom_blob_unpacked.w;
1862
            constants[2].i = bottom_blob_unpacked.h;
1863
            constants[3].i = bottom_blob_unpacked.d;
1864
            constants[4].i = bottom_blob_unpacked.c;
1865
            constants[5].i = 0; //bottom_blob_unpacked.cstep;
1866
            constants[6].i = top_blob.dims;
1867
            constants[7].i = top_blob.w;
1868
            constants[8].i = top_blob.h;
1869
            constants[9].i = top_blob.d;
1870
            constants[10].i = top_blob.c;
1871
            constants[11].i = 0; //top_blob.cstep;
1872
            constants[12].i = coffset;
1873

1874
            const Pipeline* pipeline = 0;
1875
            if (out_elempack == 1 && top_blob.elempack == 1)
1876
            {
1877
                pipeline = pipeline_slice[i % 2];
1878
            }
1879
            else if (out_elempack == 4 && top_blob.elempack == 4)
1880
            {
1881
                pipeline = pipeline_slice_pack4[i % 2];
1882
            }
1883
            else if (out_elempack == 1 && top_blob.elempack == 4)
1884
            {
1885
                pipeline = pipeline_slice_pack1to4[i % 2];
1886
            }
1887
            else if (out_elempack == 8 && top_blob.elempack == 8)
1888
            {
1889
                pipeline = pipeline_slice_pack8[i % 2];
1890
            }
1891
            else if (out_elempack == 1 && top_blob.elempack == 8)
1892
            {
1893
                pipeline = pipeline_slice_pack1to8[i % 2];
1894
            }
1895
            else if (out_elempack == 4 && top_blob.elempack == 8)
1896
            {
1897
                pipeline = pipeline_slice_pack4to8[i % 2];
1898
            }
1899

1900
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1901

1902
            coffset += top_blob.c * top_blob.elempack / out_elempack;
1903
        }
1904

1905
        return 0;
1906
    }
1907

1908
    if (dims == 4 && positive_axis == 1)
1909
    {
1910
        int w = bottom_blob.w;
1911
        int h = bottom_blob.h;
1912
        int d = bottom_blob.d;
1913
        int channels = bottom_blob.c;
1914

1915
        int q = 0;
1916
        for (size_t i = 0; i < top_blobs.size(); i++)
1917
        {
1918
            int slice;
1919
            if (indices_ptr)
1920
            {
1921
                if (i == top_blobs.size() - 1)
1922
                {
1923
                    slice = d - q;
1924
                }
1925
                else
1926
                {
1927
                    int indice = indices_ptr[i];
1928
                    int positive_indice = indice < 0 ? d + indice : indice;
1929
                    slice = positive_indice - q;
1930
                }
1931
            }
1932
            else
1933
            {
1934
                slice = slices_ptr[i];
1935
                if (slice == -233)
1936
                {
1937
                    slice = static_cast<int>((d - q) / (top_blobs.size() - i));
1938
                }
1939
            }
1940

1941
            VkImageMat& top_blob = top_blobs[i];
1942
            top_blob.create(w, h, slice, channels, elemsize, elempack, opt.blob_vkallocator);
1943
            if (top_blob.empty())
1944
                return -100;
1945

1946
            q += slice;
1947
        }
1948

1949
        int doffset = 0;
1950
        for (size_t i = 0; i < top_blobs.size(); i++)
1951
        {
1952
            VkImageMat& top_blob = top_blobs[i];
1953

1954
            std::vector<VkImageMat> bindings(2);
1955
            bindings[0] = bottom_blob;
1956
            bindings[1] = top_blob;
1957

1958
            std::vector<vk_constant_type> constants(13);
1959
            constants[0].i = bottom_blob.dims;
1960
            constants[1].i = bottom_blob.w;
1961
            constants[2].i = bottom_blob.h;
1962
            constants[3].i = bottom_blob.d;
1963
            constants[4].i = bottom_blob.c;
1964
            constants[5].i = 0; //bottom_blob.cstep;
1965
            constants[6].i = top_blob.dims;
1966
            constants[7].i = top_blob.w;
1967
            constants[8].i = top_blob.h;
1968
            constants[9].i = top_blob.d;
1969
            constants[10].i = top_blob.c;
1970
            constants[11].i = 0; //top_blob.cstep;
1971
            constants[12].i = doffset;
1972

1973
            const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
1974
                                       : elempack == 4 ? pipeline_slice_pack4[i % 2]
1975
                                       : pipeline_slice[i % 2];
1976

1977
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1978

1979
            doffset += top_blob.d;
1980
        }
1981

1982
        return 0;
1983
    }
1984

1985
    if (dims == 4 && positive_axis == 2)
1986
    {
1987
        int w = bottom_blob.w;
1988
        int h = bottom_blob.h;
1989
        int d = bottom_blob.d;
1990
        int channels = bottom_blob.c;
1991

1992
        int q = 0;
1993
        for (size_t i = 0; i < top_blobs.size(); i++)
1994
        {
1995
            int slice;
1996
            if (indices_ptr)
1997
            {
1998
                if (i == top_blobs.size() - 1)
1999
                {
2000
                    slice = h - q;
2001
                }
2002
                else
2003
                {
2004
                    int indice = indices_ptr[i];
2005
                    int positive_indice = indice < 0 ? h + indice : indice;
2006
                    slice = positive_indice - q;
2007
                }
2008
            }
2009
            else
2010
            {
2011
                slice = slices_ptr[i];
2012
                if (slice == -233)
2013
                {
2014
                    slice = static_cast<int>((h - q) / (top_blobs.size() - i));
2015
                }
2016
            }
2017

2018
            VkImageMat& top_blob = top_blobs[i];
2019
            top_blob.create(w, slice, d, channels, elemsize, elempack, opt.blob_vkallocator);
2020
            if (top_blob.empty())
2021
                return -100;
2022

2023
            q += slice;
2024
        }
2025

2026
        int hoffset = 0;
2027
        for (size_t i = 0; i < top_blobs.size(); i++)
2028
        {
2029
            VkImageMat& top_blob = top_blobs[i];
2030

2031
            std::vector<VkImageMat> bindings(2);
2032
            bindings[0] = bottom_blob;
2033
            bindings[1] = top_blob;
2034

2035
            std::vector<vk_constant_type> constants(13);
2036
            constants[0].i = bottom_blob.dims;
2037
            constants[1].i = bottom_blob.w;
2038
            constants[2].i = bottom_blob.h;
2039
            constants[3].i = bottom_blob.d;
2040
            constants[4].i = bottom_blob.c;
2041
            constants[5].i = 0; //bottom_blob.cstep;
2042
            constants[6].i = top_blob.dims;
2043
            constants[7].i = top_blob.w;
2044
            constants[8].i = top_blob.h;
2045
            constants[9].i = top_blob.d;
2046
            constants[10].i = top_blob.c;
2047
            constants[11].i = 0; //top_blob.cstep;
2048
            constants[12].i = hoffset;
2049

2050
            const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
2051
                                       : elempack == 4 ? pipeline_slice_pack4[i % 2]
2052
                                       : pipeline_slice[i % 2];
2053

2054
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
2055

2056
            hoffset += top_blob.h;
2057
        }
2058

2059
        return 0;
2060
    }
2061

2062
    if (dims == 4 && positive_axis == 3)
2063
    {
2064
        int w = bottom_blob.w;
2065
        int h = bottom_blob.h;
2066
        int d = bottom_blob.d;
2067
        int channels = bottom_blob.c;
2068

2069
        int q = 0;
2070
        for (size_t i = 0; i < top_blobs.size(); i++)
2071
        {
2072
            int slice;
2073
            if (indices_ptr)
2074
            {
2075
                if (i == top_blobs.size() - 1)
2076
                {
2077
                    slice = w - q;
2078
                }
2079
                else
2080
                {
2081
                    int indice = indices_ptr[i];
2082
                    int positive_indice = indice < 0 ? w + indice : indice;
2083
                    slice = positive_indice - q;
2084
                }
2085
            }
2086
            else
2087
            {
2088
                slice = slices_ptr[i];
2089
                if (slice == -233)
2090
                {
2091
                    slice = static_cast<int>((w - q) / (top_blobs.size() - i));
2092
                }
2093
            }
2094

2095
            VkImageMat& top_blob = top_blobs[i];
2096
            top_blob.create(slice, h, d, channels, elemsize, elempack, opt.blob_vkallocator);
2097
            if (top_blob.empty())
2098
                return -100;
2099

2100
            q += slice;
2101
        }
2102

2103
        int woffset = 0;
2104
        for (size_t i = 0; i < top_blobs.size(); i++)
2105
        {
2106
            VkImageMat& top_blob = top_blobs[i];
2107

2108
            std::vector<VkImageMat> bindings(2);
2109
            bindings[0] = bottom_blob;
2110
            bindings[1] = top_blob;
2111

2112
            std::vector<vk_constant_type> constants(13);
2113
            constants[0].i = bottom_blob.dims;
2114
            constants[1].i = bottom_blob.w;
2115
            constants[2].i = bottom_blob.h;
2116
            constants[3].i = bottom_blob.d;
2117
            constants[4].i = bottom_blob.c;
2118
            constants[5].i = 0; //bottom_blob.cstep;
2119
            constants[6].i = top_blob.dims;
2120
            constants[7].i = top_blob.w;
2121
            constants[8].i = top_blob.h;
2122
            constants[9].i = top_blob.d;
2123
            constants[10].i = top_blob.c;
2124
            constants[11].i = 0; //top_blob.cstep;
2125
            constants[12].i = woffset;
2126

2127
            const Pipeline* pipeline = elempack == 8 ? pipeline_slice_pack8[i % 2]
2128
                                       : elempack == 4 ? pipeline_slice_pack4[i % 2]
2129
                                       : pipeline_slice[i % 2];
2130

2131
            cmd.record_pipeline(pipeline, bindings, constants, top_blob);
2132

2133
            woffset += top_blob.w;
2134
        }
2135

2136
        return 0;
2137
    }
2138

2139
    return 0;
2140
}
2141

2142
} // namespace ncnn
2143

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.