ncnn

Форк
0
/
crop_vulkan.cpp 
1051 строка · 35.8 Кб
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#include "crop_vulkan.h"
16

17
#include "layer_shader_type.h"
18
#include "layer_type.h"
19

20
namespace ncnn {
21

22
Crop_vulkan::Crop_vulkan()
23
{
24
    support_vulkan = true;
25
    support_image_storage = true;
26

27
    pipeline_crop = 0;
28
    pipeline_crop_pack4 = 0;
29
    pipeline_crop_pack1to4 = 0;
30
    pipeline_crop_pack4to1 = 0;
31
    pipeline_crop_pack8 = 0;
32
    pipeline_crop_pack1to8 = 0;
33
    pipeline_crop_pack4to8 = 0;
34
    pipeline_crop_pack8to4 = 0;
35
    pipeline_crop_pack8to1 = 0;
36
}
37

38
int Crop_vulkan::create_pipeline(const Option& opt)
39
{
40
    const Mat& shape = bottom_shapes.empty() ? Mat() : bottom_shapes[0];
41
    const Mat& out_shape = top_shapes.empty() ? Mat() : top_shapes[0];
42

43
    int elempack = 1;
44
    if (shape.dims == 1) elempack = opt.use_shader_pack8 && shape.w % 8 == 0 ? 8 : shape.w % 4 == 0 ? 4 : 1;
45
    if (shape.dims == 2) elempack = opt.use_shader_pack8 && shape.h % 8 == 0 ? 8 : shape.h % 4 == 0 ? 4 : 1;
46
    if (shape.dims == 3 || shape.dims == 4) elempack = opt.use_shader_pack8 && shape.c % 8 == 0 ? 8 : shape.c % 4 == 0 ? 4 : 1;
47

48
    int out_elempack = 1;
49
    if (out_shape.dims == 1) out_elempack = opt.use_shader_pack8 && out_shape.w % 8 == 0 ? 8 : out_shape.w % 4 == 0 ? 4 : 1;
50
    if (out_shape.dims == 2) out_elempack = opt.use_shader_pack8 && out_shape.h % 8 == 0 ? 8 : out_shape.h % 4 == 0 ? 4 : 1;
51
    if (out_shape.dims == 3 || out_shape.dims == 4) out_elempack = opt.use_shader_pack8 && out_shape.c % 8 == 0 ? 8 : out_shape.c % 4 == 0 ? 4 : 1;
52

53
    int offset_elempack = 1;
54
    bool numpy_style_slice = !starts.empty() && !ends.empty();
55
    if (numpy_style_slice)
56
    {
57
        offset_elempack = elempack;
58

59
        const int* starts_ptr = starts;
60
        const int* axes_ptr = axes;
61

62
        int _axes[4] = {0, 1, 2, 3};
63
        int num_axis = axes.w;
64
        if (num_axis == 0)
65
        {
66
            num_axis = shape.dims;
67
        }
68
        else
69
        {
70
            for (int i = 0; i < num_axis; i++)
71
            {
72
                int axis = axes_ptr[i];
73
                if (axis < 0)
74
                    axis = shape.dims + axis;
75
                _axes[i] = axis;
76
            }
77
        }
78

79
        for (int i = 0; i < num_axis; i++)
80
        {
81
            int start = starts_ptr[i];
82
            int axis = _axes[i];
83

84
            if (shape.dims == 1 && axis == 0)
85
            {
86
                int _woffset = start >= 0 ? start : shape.w + start;
87
                offset_elempack = opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
88
            }
89
            if (shape.dims == 2 && axis == 0)
90
            {
91
                int _hoffset = start >= 0 ? start : shape.h + start;
92
                offset_elempack = opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
93
            }
94
            if ((shape.dims == 3 || shape.dims == 4) && axis == 0)
95
            {
96
                int _coffset = start >= 0 ? start : shape.c + start;
97
                offset_elempack = opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
98
            }
99
        }
100
    }
101
    else
102
    {
103
        if (shape.dims == 1)
104
        {
105
            if (woffset == 0)
106
                offset_elempack = elempack;
107
            else
108
                offset_elempack = opt.use_shader_pack8 && woffset % 8 == 0 ? 8 : woffset % 4 == 0 ? 4 : 1;
109
        }
110
        else if (shape.dims == 2)
111
        {
112
            if (hoffset == 0)
113
                offset_elempack = elempack;
114
            else
115
                offset_elempack = opt.use_shader_pack8 && hoffset % 8 == 0 ? 8 : hoffset % 4 == 0 ? 4 : 1;
116
        }
117
        else // if (shape.dims == 3 || shape.dims == 4)
118
        {
119
            if (coffset == 0)
120
                offset_elempack = elempack;
121
            else
122
                offset_elempack = opt.use_shader_pack8 && coffset % 8 == 0 ? 8 : coffset % 4 == 0 ? 4 : 1;
123
        }
124
    }
125

126
    offset_elempack = std::min(offset_elempack, elempack);
127

128
    size_t elemsize;
129
    size_t out_elemsize;
130
    if (opt.use_fp16_storage)
131
    {
132
        elemsize = elempack * 2u;
133
        out_elemsize = out_elempack * 2u;
134
    }
135
    else if (opt.use_fp16_packed)
136
    {
137
        elemsize = elempack == 1 ? 4u : elempack * 2u;
138
        out_elemsize = out_elempack == 1 ? 4u : out_elempack * 2u;
139
    }
140
    else
141
    {
142
        elemsize = elempack * 4u;
143
        out_elemsize = out_elempack * 4u;
144
    }
145

146
    Mat shape_packed;
147
    if (shape.dims == 1) shape_packed = Mat(shape.w / elempack, (void*)0, elemsize, elempack);
148
    if (shape.dims == 2) shape_packed = Mat(shape.w, shape.h / elempack, (void*)0, elemsize, elempack);
149
    if (shape.dims == 3) shape_packed = Mat(shape.w, shape.h, shape.c / elempack, (void*)0, elemsize, elempack);
150
    if (shape.dims == 4) shape_packed = Mat(shape.w, shape.h, shape.d, shape.c / elempack, (void*)0, elemsize, elempack);
151

152
    Mat out_shape_packed;
153
    if (out_shape.dims == 1) out_shape_packed = Mat(out_shape.w / out_elempack, (void*)0, out_elemsize, out_elempack);
154
    if (out_shape.dims == 2) out_shape_packed = Mat(out_shape.w, out_shape.h / out_elempack, (void*)0, out_elemsize, out_elempack);
155
    if (out_shape.dims == 3) out_shape_packed = Mat(out_shape.w, out_shape.h, out_shape.c / out_elempack, (void*)0, out_elemsize, out_elempack);
156
    if (out_shape.dims == 4) out_shape_packed = Mat(out_shape.w, out_shape.h, out_shape.d, out_shape.c / out_elempack, (void*)0, out_elemsize, out_elempack);
157

158
    Mat shape_unpacked = shape_packed;
159
    if (one_blob_only && shape.dims != 0 && elempack == out_elempack && elempack > offset_elempack)
160
    {
161
        size_t offset_elemsize;
162
        if (opt.use_fp16_storage)
163
        {
164
            offset_elemsize = offset_elempack * 2u;
165
        }
166
        else if (opt.use_fp16_packed)
167
        {
168
            offset_elemsize = offset_elempack == 1 ? 4u : offset_elempack * 2u;
169
        }
170
        else
171
        {
172
            offset_elemsize = offset_elempack * 4u;
173
        }
174

175
        if (shape.dims == 1) shape_unpacked = Mat(shape.w / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
176
        if (shape.dims == 2) shape_unpacked = Mat(shape.w, shape.h / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
177
        if (shape.dims == 3) shape_unpacked = Mat(shape.w, shape.h, shape.c / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
178
        if (shape.dims == 4) shape_unpacked = Mat(shape.w, shape.h, shape.d, shape.c / offset_elempack, (void*)0, offset_elemsize, offset_elempack);
179
    }
180

181
    std::vector<vk_specialization_type> specializations(1 + 12);
182
    specializations[0].i = vkdev->info.bug_implicit_fp16_arithmetic();
183
    specializations[1 + 0].i = shape_unpacked.dims;
184
    specializations[1 + 1].i = shape_unpacked.w;
185
    specializations[1 + 2].i = shape_unpacked.h;
186
    specializations[1 + 3].i = shape_unpacked.d;
187
    specializations[1 + 4].i = shape_unpacked.c;
188
    specializations[1 + 5].i = shape_unpacked.cstep;
189
    specializations[1 + 6].i = out_shape_packed.dims;
190
    specializations[1 + 7].i = out_shape_packed.w;
191
    specializations[1 + 8].i = out_shape_packed.h;
192
    specializations[1 + 9].i = out_shape_packed.d;
193
    specializations[1 + 10].i = out_shape_packed.c;
194
    specializations[1 + 11].i = out_shape_packed.cstep;
195

196
    Mat local_size_xyz;
197
    if (out_shape_packed.dims == 1)
198
    {
199
        local_size_xyz.w = std::min(64, out_shape_packed.w);
200
        local_size_xyz.h = 1;
201
        local_size_xyz.c = 1;
202
    }
203
    if (out_shape_packed.dims == 2)
204
    {
205
        local_size_xyz.w = std::min(8, out_shape_packed.w);
206
        local_size_xyz.h = std::min(8, out_shape_packed.h);
207
        local_size_xyz.c = 1;
208
    }
209
    if (out_shape_packed.dims == 3)
210
    {
211
        local_size_xyz.w = std::min(4, out_shape_packed.w);
212
        local_size_xyz.h = std::min(4, out_shape_packed.h);
213
        local_size_xyz.c = std::min(4, out_shape_packed.c);
214
    }
215
    if (out_shape_packed.dims == 4)
216
    {
217
        local_size_xyz.w = std::min(4, out_shape_packed.w);
218
        local_size_xyz.h = std::min(4, out_shape_packed.h * out_shape_packed.d);
219
        local_size_xyz.c = std::min(4, out_shape_packed.c);
220
    }
221

222
    // pack1
223
    if (out_shape.dims == 0 || out_elempack == 1)
224
    {
225
        pipeline_crop = new Pipeline(vkdev);
226
        pipeline_crop->set_optimal_local_size_xyz(local_size_xyz);
227
        pipeline_crop->create(LayerShaderType::crop, opt, specializations);
228
    }
229

230
    // pack4
231
    if (out_shape.dims == 0 || out_elempack == 4)
232
    {
233
        pipeline_crop_pack4 = new Pipeline(vkdev);
234
        pipeline_crop_pack4->set_optimal_local_size_xyz(local_size_xyz);
235
        pipeline_crop_pack4->create(LayerShaderType::crop_pack4, opt, specializations);
236
    }
237

238
    // pack1to4
239
    if (out_shape.dims == 0 || out_elempack == 4)
240
    {
241
        pipeline_crop_pack1to4 = new Pipeline(vkdev);
242
        pipeline_crop_pack1to4->set_optimal_local_size_xyz(local_size_xyz);
243
        pipeline_crop_pack1to4->create(LayerShaderType::crop_pack1to4, opt, specializations);
244
    }
245

246
    // pack4to1
247
    if (out_shape.dims == 0 || out_elempack == 1)
248
    {
249
        pipeline_crop_pack4to1 = new Pipeline(vkdev);
250
        pipeline_crop_pack4to1->set_optimal_local_size_xyz(local_size_xyz);
251
        pipeline_crop_pack4to1->create(LayerShaderType::crop_pack4to1, opt, specializations);
252
    }
253

254
    // pack8
255
    if ((opt.use_shader_pack8 && out_shape.dims == 0) || (elempack == 8 && out_elempack == 8))
256
    {
257
        pipeline_crop_pack8 = new Pipeline(vkdev);
258
        pipeline_crop_pack8->set_optimal_local_size_xyz(local_size_xyz);
259
        pipeline_crop_pack8->create(LayerShaderType::crop_pack8, opt, specializations);
260
    }
261

262
    // pack1to8
263
    if ((opt.use_shader_pack8 && out_shape.dims == 0) || out_elempack == 8)
264
    {
265
        pipeline_crop_pack1to8 = new Pipeline(vkdev);
266
        pipeline_crop_pack1to8->set_optimal_local_size_xyz(local_size_xyz);
267
        pipeline_crop_pack1to8->create(LayerShaderType::crop_pack1to8, opt, specializations);
268
    }
269

270
    // pack4to8
271
    if ((opt.use_shader_pack8 && out_shape.dims == 0) || out_elempack == 8)
272
    {
273
        pipeline_crop_pack4to8 = new Pipeline(vkdev);
274
        pipeline_crop_pack4to8->set_optimal_local_size_xyz(local_size_xyz);
275
        pipeline_crop_pack4to8->create(LayerShaderType::crop_pack4to8, opt, specializations);
276
    }
277

278
    // pack8to4
279
    if ((opt.use_shader_pack8 && out_shape.dims == 0) || (elempack == 8 && out_elempack == 4))
280
    {
281
        pipeline_crop_pack8to4 = new Pipeline(vkdev);
282
        pipeline_crop_pack8to4->set_optimal_local_size_xyz(local_size_xyz);
283
        pipeline_crop_pack8to4->create(LayerShaderType::crop_pack8to4, opt, specializations);
284
    }
285

286
    // pack8to1
287
    if ((opt.use_shader_pack8 && out_shape.dims == 0) || (elempack == 8 && out_elempack == 1))
288
    {
289
        pipeline_crop_pack8to1 = new Pipeline(vkdev);
290
        pipeline_crop_pack8to1->set_optimal_local_size_xyz(local_size_xyz);
291
        pipeline_crop_pack8to1->create(LayerShaderType::crop_pack8to1, opt, specializations);
292
    }
293

294
    return 0;
295
}
296

297
int Crop_vulkan::destroy_pipeline(const Option& /*opt*/)
298
{
299
    delete pipeline_crop;
300
    pipeline_crop = 0;
301

302
    delete pipeline_crop_pack4;
303
    pipeline_crop_pack4 = 0;
304

305
    delete pipeline_crop_pack1to4;
306
    pipeline_crop_pack1to4 = 0;
307

308
    delete pipeline_crop_pack4to1;
309
    pipeline_crop_pack4to1 = 0;
310

311
    delete pipeline_crop_pack8;
312
    pipeline_crop_pack8 = 0;
313

314
    delete pipeline_crop_pack1to8;
315
    pipeline_crop_pack1to8 = 0;
316

317
    delete pipeline_crop_pack4to8;
318
    pipeline_crop_pack4to8 = 0;
319

320
    delete pipeline_crop_pack8to4;
321
    pipeline_crop_pack8to4 = 0;
322

323
    delete pipeline_crop_pack8to1;
324
    pipeline_crop_pack8to1 = 0;
325

326
    return 0;
327
}
328

329
int Crop_vulkan::forward(const VkMat& bottom_blob, VkMat& top_blob, VkCompute& cmd, const Option& opt) const
330
{
331
    int dims = bottom_blob.dims;
332
    size_t elemsize = bottom_blob.elemsize;
333
    int elempack = bottom_blob.elempack;
334

335
    int _woffset, _hoffset, _doffset, _coffset;
336
    int _outw, _outh, _outd, _outc;
337
    resolve_crop_roi(bottom_blob.shape(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
338

339
    int offset_elempack;
340
    int out_elempack;
341

342
    if (dims == 1)
343
    {
344
        if (_woffset == 0 && _outw == bottom_blob.w * elempack)
345
        {
346
            top_blob = bottom_blob;
347
            return 0;
348
        }
349

350
        offset_elempack = _woffset == 0 ? elempack : opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
351
        out_elempack = opt.use_shader_pack8 && _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
352
    }
353
    else if (dims == 2)
354
    {
355
        if (_woffset == 0 && _hoffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h * elempack)
356
        {
357
            top_blob = bottom_blob;
358
            return 0;
359
        }
360

361
        offset_elempack = _hoffset == 0 ? elempack : opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
362
        out_elempack = opt.use_shader_pack8 && _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
363
    }
364
    else if (dims == 3)
365
    {
366
        if (_woffset == 0 && _hoffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outc == bottom_blob.c * elempack)
367
        {
368
            top_blob = bottom_blob;
369
            return 0;
370
        }
371

372
        offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
373
        out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
374
    }
375
    else // if (dims == 4)
376
    {
377
        if (_woffset == 0 && _hoffset == 0 && _doffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outd == bottom_blob.d && _outc == bottom_blob.c * elempack)
378
        {
379
            top_blob = bottom_blob;
380
            return 0;
381
        }
382

383
        offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
384
        out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
385
    }
386

387
    offset_elempack = std::min(offset_elempack, elempack);
388

389
    size_t out_elemsize = elemsize / elempack * out_elempack;
390

391
    if (opt.use_fp16_packed && !opt.use_fp16_storage)
392
    {
393
        if (out_elempack == 8) out_elemsize = 8 * 2u;
394
        if (out_elempack == 4) out_elemsize = 4 * 2u;
395
        if (out_elempack == 1) out_elemsize = 4u;
396
    }
397

398
    // unpacking
399
    VkMat bottom_blob_unpacked = bottom_blob;
400
    if (elempack == out_elempack && elempack > offset_elempack)
401
    {
402
        Option opt_pack1 = opt;
403
        opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
404

405
        vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
406
    }
407

408
    if (dims == 1)
409
    {
410
        top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
411
    }
412
    else if (dims == 2)
413
    {
414
        top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
415
    }
416
    else if (dims == 3)
417
    {
418
        top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
419
    }
420
    else // if (dims == 4)
421
    {
422
        top_blob.create(_outw, _outh, _outd, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
423
    }
424
    if (top_blob.empty())
425
        return -100;
426

427
    std::vector<VkMat> bindings(2);
428
    bindings[0] = bottom_blob_unpacked;
429
    bindings[1] = top_blob;
430

431
    std::vector<vk_constant_type> constants(16);
432
    constants[0].i = bottom_blob_unpacked.dims;
433
    constants[1].i = bottom_blob_unpacked.w;
434
    constants[2].i = bottom_blob_unpacked.h;
435
    constants[3].i = bottom_blob_unpacked.d;
436
    constants[4].i = bottom_blob_unpacked.c;
437
    constants[5].i = bottom_blob_unpacked.cstep;
438
    constants[6].i = top_blob.dims;
439
    constants[7].i = top_blob.w;
440
    constants[8].i = top_blob.h;
441
    constants[9].i = top_blob.d;
442
    constants[10].i = top_blob.c;
443
    constants[11].i = top_blob.cstep;
444
    constants[12].i = _woffset;
445
    constants[13].i = _hoffset;
446
    constants[14].i = _doffset;
447
    constants[15].i = _coffset;
448

449
    const Pipeline* pipeline = 0;
450
    if (elempack == 1 && out_elempack == 1)
451
    {
452
        pipeline = pipeline_crop;
453
    }
454
    else if (elempack == 4 && offset_elempack == 4 && out_elempack == 4)
455
    {
456
        pipeline = pipeline_crop_pack4;
457
    }
458
    else if (elempack == 4 && offset_elempack == 1 && out_elempack == 4)
459
    {
460
        pipeline = pipeline_crop_pack1to4;
461
    }
462
    else if (elempack == 1 && out_elempack == 4)
463
    {
464
        pipeline = pipeline_crop_pack1to4;
465
    }
466
    else if (elempack == 4 && out_elempack == 1)
467
    {
468
        pipeline = pipeline_crop_pack4to1;
469
    }
470
    else if (elempack == 8 && offset_elempack == 8 && out_elempack == 8)
471
    {
472
        pipeline = pipeline_crop_pack8;
473
    }
474
    else if (elempack == 8 && offset_elempack == 4 && out_elempack == 8)
475
    {
476
        pipeline = pipeline_crop_pack4to8;
477
    }
478
    else if (elempack == 8 && offset_elempack == 1 && out_elempack == 8)
479
    {
480
        pipeline = pipeline_crop_pack1to8;
481
    }
482
    else if (elempack == 1 && out_elempack == 8)
483
    {
484
        pipeline = pipeline_crop_pack1to8;
485
    }
486
    else if (elempack == 4 && out_elempack == 8)
487
    {
488
        pipeline = pipeline_crop_pack4to8;
489
    }
490
    else if (elempack == 8 && out_elempack == 4)
491
    {
492
        pipeline = pipeline_crop_pack8to4;
493
    }
494
    else if (elempack == 8 && out_elempack == 1)
495
    {
496
        pipeline = pipeline_crop_pack8to1;
497
    }
498

499
    cmd.record_pipeline(pipeline, bindings, constants, top_blob);
500

501
    return 0;
502
}
503

504
int Crop_vulkan::forward(const std::vector<VkMat>& bottom_blobs, std::vector<VkMat>& top_blobs, VkCompute& cmd, const Option& opt) const
505
{
506
    const VkMat& bottom_blob = bottom_blobs[0];
507
    const VkMat& reference_blob = bottom_blobs[1];
508
    VkMat& top_blob = top_blobs[0];
509

510
    int dims = bottom_blob.dims;
511
    size_t elemsize = bottom_blob.elemsize;
512
    int elempack = bottom_blob.elempack;
513

514
    int _woffset, _hoffset, _doffset, _coffset;
515
    int _outw, _outh, _outd, _outc;
516
    if (woffset == -233)
517
    {
518
        resolve_crop_roi(bottom_blob.shape(), (const int*)reference_blob.mapped(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
519
    }
520
    else
521
    {
522
        resolve_crop_roi(bottom_blob.shape(), reference_blob.shape(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
523
    }
524

525
    int offset_elempack;
526
    int out_elempack;
527

528
    if (dims == 1)
529
    {
530
        if (_woffset == 0 && _outw == bottom_blob.w * elempack)
531
        {
532
            top_blob = bottom_blob;
533
            return 0;
534
        }
535

536
        offset_elempack = _woffset == 0 ? elempack : opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
537
        out_elempack = opt.use_shader_pack8 && _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
538
    }
539
    else if (dims == 2)
540
    {
541
        if (_woffset == 0 && _hoffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h * elempack)
542
        {
543
            top_blob = bottom_blob;
544
            return 0;
545
        }
546

547
        offset_elempack = _hoffset == 0 ? elempack : opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
548
        out_elempack = opt.use_shader_pack8 && _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
549
    }
550
    else if (dims == 3)
551
    {
552
        if (_woffset == 0 && _hoffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outc == bottom_blob.c * elempack)
553
        {
554
            top_blob = bottom_blob;
555
            return 0;
556
        }
557

558
        offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
559
        out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
560
    }
561
    else // if (dims == 4)
562
    {
563
        if (_woffset == 0 && _hoffset == 0 && _doffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outd == bottom_blob.d && _outc == bottom_blob.c * elempack)
564
        {
565
            top_blob = bottom_blob;
566
            return 0;
567
        }
568

569
        offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
570
        out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
571
    }
572

573
    offset_elempack = std::min(offset_elempack, elempack);
574

575
    size_t out_elemsize = elemsize / elempack * out_elempack;
576

577
    if (opt.use_fp16_packed && !opt.use_fp16_storage)
578
    {
579
        if (out_elempack == 8) out_elemsize = 8 * 2u;
580
        if (out_elempack == 4) out_elemsize = 4 * 2u;
581
        if (out_elempack == 1) out_elemsize = 4u;
582
    }
583

584
    // unpacking
585
    VkMat bottom_blob_unpacked = bottom_blob;
586
    if (elempack == out_elempack && elempack > offset_elempack)
587
    {
588
        Option opt_pack1 = opt;
589
        opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
590

591
        vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
592
    }
593

594
    if (dims == 1)
595
    {
596
        top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
597
    }
598
    else if (dims == 2)
599
    {
600
        top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
601
    }
602
    else if (dims == 3)
603
    {
604
        top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
605
    }
606
    else // if (dims == 4)
607
    {
608
        top_blob.create(_outw, _outh, _outd, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
609
    }
610
    if (top_blob.empty())
611
        return -100;
612

613
    std::vector<VkMat> bindings(2);
614
    bindings[0] = bottom_blob_unpacked;
615
    bindings[1] = top_blob;
616

617
    std::vector<vk_constant_type> constants(16);
618
    constants[0].i = bottom_blob_unpacked.dims;
619
    constants[1].i = bottom_blob_unpacked.w;
620
    constants[2].i = bottom_blob_unpacked.h;
621
    constants[3].i = bottom_blob_unpacked.d;
622
    constants[4].i = bottom_blob_unpacked.c;
623
    constants[5].i = bottom_blob_unpacked.cstep;
624
    constants[6].i = top_blob.dims;
625
    constants[7].i = top_blob.w;
626
    constants[8].i = top_blob.h;
627
    constants[9].i = top_blob.d;
628
    constants[10].i = top_blob.c;
629
    constants[11].i = top_blob.cstep;
630
    constants[12].i = _woffset;
631
    constants[13].i = _hoffset;
632
    constants[14].i = _doffset;
633
    constants[15].i = _coffset;
634

635
    const Pipeline* pipeline = 0;
636
    if (elempack == 1 && out_elempack == 1)
637
    {
638
        pipeline = pipeline_crop;
639
    }
640
    else if (elempack == 4 && offset_elempack == 4 && out_elempack == 4)
641
    {
642
        pipeline = pipeline_crop_pack4;
643
    }
644
    else if (elempack == 4 && offset_elempack == 1 && out_elempack == 4)
645
    {
646
        pipeline = pipeline_crop_pack1to4;
647
    }
648
    else if (elempack == 1 && out_elempack == 4)
649
    {
650
        pipeline = pipeline_crop_pack1to4;
651
    }
652
    else if (elempack == 4 && out_elempack == 1)
653
    {
654
        pipeline = pipeline_crop_pack4to1;
655
    }
656
    else if (elempack == 8 && offset_elempack == 8 && out_elempack == 8)
657
    {
658
        pipeline = pipeline_crop_pack8;
659
    }
660
    else if (elempack == 8 && offset_elempack == 4 && out_elempack == 8)
661
    {
662
        pipeline = pipeline_crop_pack4to8;
663
    }
664
    else if (elempack == 8 && offset_elempack == 1 && out_elempack == 8)
665
    {
666
        pipeline = pipeline_crop_pack1to8;
667
    }
668
    else if (elempack == 1 && out_elempack == 8)
669
    {
670
        pipeline = pipeline_crop_pack1to8;
671
    }
672
    else if (elempack == 4 && out_elempack == 8)
673
    {
674
        pipeline = pipeline_crop_pack4to8;
675
    }
676
    else if (elempack == 8 && out_elempack == 4)
677
    {
678
        pipeline = pipeline_crop_pack8to4;
679
    }
680
    else if (elempack == 8 && out_elempack == 1)
681
    {
682
        pipeline = pipeline_crop_pack8to1;
683
    }
684

685
    cmd.record_pipeline(pipeline, bindings, constants, top_blob);
686

687
    return 0;
688
}
689

690
int Crop_vulkan::forward(const VkImageMat& bottom_blob, VkImageMat& top_blob, VkCompute& cmd, const Option& opt) const
691
{
692
    int dims = bottom_blob.dims;
693
    size_t elemsize = bottom_blob.elemsize;
694
    int elempack = bottom_blob.elempack;
695

696
    int _woffset, _hoffset, _doffset, _coffset;
697
    int _outw, _outh, _outd, _outc;
698
    resolve_crop_roi(bottom_blob.shape(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
699

700
    int offset_elempack;
701
    int out_elempack;
702

703
    if (dims == 1)
704
    {
705
        if (_woffset == 0 && _outw == bottom_blob.w * elempack)
706
        {
707
            top_blob = bottom_blob;
708
            return 0;
709
        }
710

711
        offset_elempack = _woffset == 0 ? elempack : opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
712
        out_elempack = opt.use_shader_pack8 && _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
713
    }
714
    else if (dims == 2)
715
    {
716
        if (_woffset == 0 && _hoffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h * elempack)
717
        {
718
            top_blob = bottom_blob;
719
            return 0;
720
        }
721

722
        offset_elempack = _hoffset == 0 ? elempack : opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
723
        out_elempack = opt.use_shader_pack8 && _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
724
    }
725
    else if (dims == 3)
726
    {
727
        if (_woffset == 0 && _hoffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outc == bottom_blob.c * elempack)
728
        {
729
            top_blob = bottom_blob;
730
            return 0;
731
        }
732

733
        offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
734
        out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
735
    }
736
    else // if (dims == 4)
737
    {
738
        if (_woffset == 0 && _hoffset == 0 && _doffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outd == bottom_blob.d && _outc == bottom_blob.c * elempack)
739
        {
740
            top_blob = bottom_blob;
741
            return 0;
742
        }
743

744
        offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
745
        out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
746
    }
747

748
    offset_elempack = std::min(offset_elempack, elempack);
749

750
    size_t out_elemsize = elemsize / elempack * out_elempack;
751

752
    if (opt.use_fp16_packed && !opt.use_fp16_storage)
753
    {
754
        if (out_elempack == 8) out_elemsize = 8 * 2u;
755
        if (out_elempack == 4) out_elemsize = 4 * 2u;
756
        if (out_elempack == 1) out_elemsize = 4u;
757
    }
758

759
    // unpacking
760
    VkImageMat bottom_blob_unpacked = bottom_blob;
761
    if (elempack == out_elempack && elempack > offset_elempack)
762
    {
763
        Option opt_pack1 = opt;
764
        opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
765

766
        vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
767
    }
768

769
    if (dims == 1)
770
    {
771
        top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
772
    }
773
    else if (dims == 2)
774
    {
775
        top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
776
    }
777
    else if (dims == 3)
778
    {
779
        top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
780
    }
781
    else // if (dims == 4)
782
    {
783
        top_blob.create(_outw, _outh, _outd, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
784
    }
785
    if (top_blob.empty())
786
        return -100;
787

788
    std::vector<VkImageMat> bindings(2);
789
    bindings[0] = bottom_blob_unpacked;
790
    bindings[1] = top_blob;
791

792
    std::vector<vk_constant_type> constants(16);
793
    constants[0].i = bottom_blob_unpacked.dims;
794
    constants[1].i = bottom_blob_unpacked.w;
795
    constants[2].i = bottom_blob_unpacked.h;
796
    constants[3].i = bottom_blob_unpacked.d;
797
    constants[4].i = bottom_blob_unpacked.c;
798
    constants[5].i = 0; //bottom_blob_unpacked.cstep;
799
    constants[6].i = top_blob.dims;
800
    constants[7].i = top_blob.w;
801
    constants[8].i = top_blob.h;
802
    constants[9].i = top_blob.d;
803
    constants[10].i = top_blob.c;
804
    constants[11].i = 0; //top_blob.cstep;
805
    constants[12].i = _woffset;
806
    constants[13].i = _hoffset;
807
    constants[14].i = _doffset;
808
    constants[15].i = _coffset;
809

810
    const Pipeline* pipeline = 0;
811
    if (elempack == 1 && out_elempack == 1)
812
    {
813
        pipeline = pipeline_crop;
814
    }
815
    else if (elempack == 4 && offset_elempack == 4 && out_elempack == 4)
816
    {
817
        pipeline = pipeline_crop_pack4;
818
    }
819
    else if (elempack == 4 && offset_elempack == 1 && out_elempack == 4)
820
    {
821
        pipeline = pipeline_crop_pack1to4;
822
    }
823
    else if (elempack == 1 && out_elempack == 4)
824
    {
825
        pipeline = pipeline_crop_pack1to4;
826
    }
827
    else if (elempack == 4 && out_elempack == 1)
828
    {
829
        pipeline = pipeline_crop_pack4to1;
830
    }
831
    else if (elempack == 8 && offset_elempack == 8 && out_elempack == 8)
832
    {
833
        pipeline = pipeline_crop_pack8;
834
    }
835
    else if (elempack == 8 && offset_elempack == 4 && out_elempack == 8)
836
    {
837
        pipeline = pipeline_crop_pack4to8;
838
    }
839
    else if (elempack == 8 && offset_elempack == 1 && out_elempack == 8)
840
    {
841
        pipeline = pipeline_crop_pack1to8;
842
    }
843
    else if (elempack == 1 && out_elempack == 8)
844
    {
845
        pipeline = pipeline_crop_pack1to8;
846
    }
847
    else if (elempack == 4 && out_elempack == 8)
848
    {
849
        pipeline = pipeline_crop_pack4to8;
850
    }
851
    else if (elempack == 8 && out_elempack == 4)
852
    {
853
        pipeline = pipeline_crop_pack8to4;
854
    }
855
    else if (elempack == 8 && out_elempack == 1)
856
    {
857
        pipeline = pipeline_crop_pack8to1;
858
    }
859

860
    cmd.record_pipeline(pipeline, bindings, constants, top_blob);
861

862
    return 0;
863
}
864

865
int Crop_vulkan::forward(const std::vector<VkImageMat>& bottom_blobs, std::vector<VkImageMat>& top_blobs, VkCompute& cmd, const Option& opt) const
866
{
867
    const VkImageMat& bottom_blob = bottom_blobs[0];
868
    const VkImageMat& reference_blob = bottom_blobs[1];
869
    VkImageMat& top_blob = top_blobs[0];
870

871
    int dims = bottom_blob.dims;
872
    size_t elemsize = bottom_blob.elemsize;
873
    int elempack = bottom_blob.elempack;
874

875
    int _woffset, _hoffset, _doffset, _coffset;
876
    int _outw, _outh, _outd, _outc;
877
    if (woffset == -233)
878
    {
879
        resolve_crop_roi(bottom_blob.shape(), (const int*)reference_blob.mapped(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
880
    }
881
    else
882
    {
883
        resolve_crop_roi(bottom_blob.shape(), reference_blob.shape(), _woffset, _hoffset, _doffset, _coffset, _outw, _outh, _outd, _outc);
884
    }
885

886
    int offset_elempack;
887
    int out_elempack;
888

889
    if (dims == 1)
890
    {
891
        if (_woffset == 0 && _outw == bottom_blob.w * elempack)
892
        {
893
            top_blob = bottom_blob;
894
            return 0;
895
        }
896

897
        offset_elempack = _woffset == 0 ? elempack : opt.use_shader_pack8 && _woffset % 8 == 0 ? 8 : _woffset % 4 == 0 ? 4 : 1;
898
        out_elempack = opt.use_shader_pack8 && _outw % 8 == 0 ? 8 : _outw % 4 == 0 ? 4 : 1;
899
    }
900
    else if (dims == 2)
901
    {
902
        if (_woffset == 0 && _hoffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h * elempack)
903
        {
904
            top_blob = bottom_blob;
905
            return 0;
906
        }
907

908
        offset_elempack = _hoffset == 0 ? elempack : opt.use_shader_pack8 && _hoffset % 8 == 0 ? 8 : _hoffset % 4 == 0 ? 4 : 1;
909
        out_elempack = opt.use_shader_pack8 && _outh % 8 == 0 ? 8 : _outh % 4 == 0 ? 4 : 1;
910
    }
911
    else if (dims == 3)
912
    {
913
        if (_woffset == 0 && _hoffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outc == bottom_blob.c * elempack)
914
        {
915
            top_blob = bottom_blob;
916
            return 0;
917
        }
918

919
        offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
920
        out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
921
    }
922
    else // if (dims == 4)
923
    {
924
        if (_woffset == 0 && _hoffset == 0 && _doffset == 0 && _coffset == 0 && _outw == bottom_blob.w && _outh == bottom_blob.h && _outd == bottom_blob.d && _outc == bottom_blob.c * elempack)
925
        {
926
            top_blob = bottom_blob;
927
            return 0;
928
        }
929

930
        offset_elempack = _coffset == 0 ? elempack : opt.use_shader_pack8 && _coffset % 8 == 0 ? 8 : _coffset % 4 == 0 ? 4 : 1;
931
        out_elempack = opt.use_shader_pack8 && _outc % 8 == 0 ? 8 : _outc % 4 == 0 ? 4 : 1;
932
    }
933

934
    offset_elempack = std::min(offset_elempack, elempack);
935

936
    size_t out_elemsize = elemsize / elempack * out_elempack;
937

938
    if (opt.use_fp16_packed && !opt.use_fp16_storage)
939
    {
940
        if (out_elempack == 8) out_elemsize = 8 * 2u;
941
        if (out_elempack == 4) out_elemsize = 4 * 2u;
942
        if (out_elempack == 1) out_elemsize = 4u;
943
    }
944

945
    // unpacking
946
    VkImageMat bottom_blob_unpacked = bottom_blob;
947
    if (elempack == out_elempack && elempack > offset_elempack)
948
    {
949
        Option opt_pack1 = opt;
950
        opt_pack1.blob_vkallocator = opt.workspace_vkallocator;
951

952
        vkdev->convert_packing(bottom_blob, bottom_blob_unpacked, offset_elempack, cmd, opt_pack1);
953
    }
954

955
    if (dims == 1)
956
    {
957
        top_blob.create(_outw / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
958
    }
959
    else if (dims == 2)
960
    {
961
        top_blob.create(_outw, _outh / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
962
    }
963
    else if (dims == 3)
964
    {
965
        top_blob.create(_outw, _outh, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
966
    }
967
    else // if (dims == 4)
968
    {
969
        top_blob.create(_outw, _outh, _outd, _outc / out_elempack, out_elemsize, out_elempack, opt.blob_vkallocator);
970
    }
971
    if (top_blob.empty())
972
        return -100;
973

974
    std::vector<VkImageMat> bindings(2);
975
    bindings[0] = bottom_blob_unpacked;
976
    bindings[1] = top_blob;
977

978
    std::vector<vk_constant_type> constants(16);
979
    constants[0].i = bottom_blob_unpacked.dims;
980
    constants[1].i = bottom_blob_unpacked.w;
981
    constants[2].i = bottom_blob_unpacked.h;
982
    constants[3].i = bottom_blob_unpacked.d;
983
    constants[4].i = bottom_blob_unpacked.c;
984
    constants[5].i = 0; //bottom_blob_unpacked.cstep;
985
    constants[6].i = top_blob.dims;
986
    constants[7].i = top_blob.w;
987
    constants[8].i = top_blob.h;
988
    constants[9].i = top_blob.d;
989
    constants[10].i = top_blob.c;
990
    constants[11].i = 0; //top_blob.cstep;
991
    constants[12].i = _woffset;
992
    constants[13].i = _hoffset;
993
    constants[14].i = _doffset;
994
    constants[15].i = _coffset;
995

996
    const Pipeline* pipeline = 0;
997
    if (elempack == 1 && out_elempack == 1)
998
    {
999
        pipeline = pipeline_crop;
1000
    }
1001
    else if (elempack == 4 && offset_elempack == 4 && out_elempack == 4)
1002
    {
1003
        pipeline = pipeline_crop_pack4;
1004
    }
1005
    else if (elempack == 4 && offset_elempack == 1 && out_elempack == 4)
1006
    {
1007
        pipeline = pipeline_crop_pack1to4;
1008
    }
1009
    else if (elempack == 1 && out_elempack == 4)
1010
    {
1011
        pipeline = pipeline_crop_pack1to4;
1012
    }
1013
    else if (elempack == 4 && out_elempack == 1)
1014
    {
1015
        pipeline = pipeline_crop_pack4to1;
1016
    }
1017
    else if (elempack == 8 && offset_elempack == 8 && out_elempack == 8)
1018
    {
1019
        pipeline = pipeline_crop_pack8;
1020
    }
1021
    else if (elempack == 8 && offset_elempack == 4 && out_elempack == 8)
1022
    {
1023
        pipeline = pipeline_crop_pack4to8;
1024
    }
1025
    else if (elempack == 8 && offset_elempack == 1 && out_elempack == 8)
1026
    {
1027
        pipeline = pipeline_crop_pack1to8;
1028
    }
1029
    else if (elempack == 1 && out_elempack == 8)
1030
    {
1031
        pipeline = pipeline_crop_pack1to8;
1032
    }
1033
    else if (elempack == 4 && out_elempack == 8)
1034
    {
1035
        pipeline = pipeline_crop_pack4to8;
1036
    }
1037
    else if (elempack == 8 && out_elempack == 4)
1038
    {
1039
        pipeline = pipeline_crop_pack8to4;
1040
    }
1041
    else if (elempack == 8 && out_elempack == 1)
1042
    {
1043
        pipeline = pipeline_crop_pack8to1;
1044
    }
1045

1046
    cmd.record_pipeline(pipeline, bindings, constants, top_blob);
1047

1048
    return 0;
1049
}
1050

1051
} // namespace ncnn
1052

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.