ncnn

deconvolution_riscv.cpp
683 строки · 20.9 Кб
Перенос по словам
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#include "deconvolution_riscv.h"
16

17
#include "cpu.h"
18
#include "layer_type.h"
19

20
#if __riscv_vector
21
#include <riscv_vector.h>
22
#endif // __riscv_vector
23

24
#include "riscv_activation.h"
25
#include "riscv_usability.h"
26

27
namespace ncnn {
28

29
#if __riscv_vector
30
#include "deconvolution_packn.h"
31
#include "deconvolution_pack1ton.h"
32
#include "deconvolution_packnto1.h"
33

34
#if __riscv_zfh
35
#include "deconvolution_fp16s.h"
36
#include "deconvolution_packn_fp16s.h"
37
#include "deconvolution_pack1ton_fp16s.h"
38
#include "deconvolution_packnto1_fp16s.h"
39
#endif
40
#endif // __riscv_vector
41

42
Deconvolution_riscv::Deconvolution_riscv()
43
{
44
#if __riscv_vector
45
    support_packing = true;
46
#if __riscv_zfh
47
    support_fp16_storage = true;
48
#endif
49
#endif // __riscv_vector
50
}
51

52
int Deconvolution_riscv::create_pipeline(const Option& opt)
53
{
54
    if (dynamic_weight)
55
        return 0;
56

57
#if __riscv_vector && __riscv_zfh
58
    if (opt.use_fp16_storage)
59
    {
60
        return create_pipeline_fp16s(opt);
61
    }
62
#endif
63

64
#if __riscv_vector
65
    const int packn = csrr_vlenb() / 4;
66
#endif
67

68
    const int maxk = kernel_w * kernel_h;
69
    int num_input = weight_data_size / maxk / num_output;
70

71
    Mat weight_data_transposed(weight_data.w);
72
    {
73
        float* pt = weight_data_transposed;
74
        const float* p = weight_data;
75

76
        for (int i = 0; i < num_input * num_output; i++)
77
        {
78
            for (int k = 0; k < maxk; k++)
79
            {
80
                pt[maxk - 1 - k] = p[k];
81
            }
82

83
            p += maxk;
84
            pt += maxk;
85
        }
86
    }
87

88
    int elempack = 1;
89
    int out_elempack = 1;
90
#if __riscv_vector
91
    if (opt.use_packing_layout)
92
    {
93
        elempack = num_input % packn == 0 ? packn : 1;
94
        out_elempack = num_output % packn == 0 ? packn : 1;
95
    }
96
#endif
97

98
    // src = kw-kh-inch-outch
99
    // dst = pb-pa-kw-kh-inch/pa-outch/pb
100
    {
101
        Mat weight_data_r2 = weight_data_transposed.reshape(maxk, num_input, num_output);
102

103
        weight_data_tm.create(maxk, num_input / elempack, num_output / out_elempack, (size_t)4u * elempack * out_elempack, elempack * out_elempack);
104

105
        for (int q = 0; q + (out_elempack - 1) < num_output; q += out_elempack)
106
        {
107
            float* g00 = weight_data_tm.channel(q / out_elempack);
108

109
            for (int p = 0; p + (elempack - 1) < num_input; p += elempack)
110
            {
111
                for (int k = 0; k < maxk; k++)
112
                {
113
                    for (int i = 0; i < elempack; i++)
114
                    {
115
                        for (int j = 0; j < out_elempack; j++)
116
                        {
117
                            const float* k00 = weight_data_r2.channel(q + j).row(p + i);
118

119
                            g00[0] = k00[k];
120

121
                            g00++;
122
                        }
123
                    }
124
                }
125
            }
126
        }
127
    }
128

129
#if __riscv_vector
130
    // packn
131
    if (elempack == packn && out_elempack == packn)
132
    {
133
    }
134

135
    // pack1ton
136
    if (elempack == 1 && out_elempack == packn)
137
    {
138
    }
139

140
    // packnto1
141
    if (elempack == packn && out_elempack == 1)
142
    {
143
    }
144
#endif // __riscv_vector
145

146
    // pack1
147
    if (elempack == 1 && out_elempack == 1)
148
    {
149
    }
150

151
    if (opt.lightmode)
152
        weight_data.release();
153

154
    return 0;
155
}
156

157
int Deconvolution_riscv::destroy_pipeline(const Option& opt)
158
{
159
    return 0;
160
}
161

162
int Deconvolution_riscv::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
163
{
164
    int elembits = bottom_blob.elembits();
165

166
#if __riscv_vector && __riscv_zfh
167
    if (opt.use_fp16_storage && elembits == 16)
168
    {
169
        if (opt.use_fp16_arithmetic)
170
            return forward_fp16sa(bottom_blob, top_blob, opt);
171
        else
172
            return forward_fp16s(bottom_blob, top_blob, opt);
173
    }
174
#endif
175

176
#if __riscv_vector
177
    const int packn = csrr_vlenb() / 4;
178
#endif
179

180
    // deconvolv with NxN kernel
181
    // value = value + bias
182

183
    int w = bottom_blob.w;
184
    int h = bottom_blob.h;
185
    int channels = bottom_blob.c;
186
    size_t elemsize = bottom_blob.elemsize;
187
    int elempack = bottom_blob.elempack;
188

189
    //     NCNN_LOGE("Deconvolution input %d x %d  pad = %d %d  ksize=%d %d  stride=%d %d", w, h, pad_w, pad_h, kernel_w, kernel_h, stride_w, stride_h);
190

191
    const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1;
192
    const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1;
193

194
    int outw = (w - 1) * stride_w + kernel_extent_w + output_pad_right;
195
    int outh = (h - 1) * stride_h + kernel_extent_h + output_pad_bottom;
196
    int out_elempack = 1;
197
#if __riscv_vector
198
    if (opt.use_packing_layout)
199
    {
200
        out_elempack = num_output % packn == 0 ? packn : 1;
201
    }
202
#endif
203
    size_t out_elemsize = elemsize / elempack * out_elempack;
204

205
    Mat top_blob_bordered;
206
    if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || (output_w > 0 && output_h > 0))
207
    {
208
        top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.workspace_allocator);
209
    }
210
    else
211
    {
212
        top_blob_bordered = top_blob;
213
        top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
214
    }
215
    if (top_blob_bordered.empty())
216
        return -100;
217

218
    const int maxk = kernel_w * kernel_h;
219

220
#if __riscv_vector
221
    if (elempack == packn && out_elempack == packn)
222
    {
223
        {
224
            deconvolution_packn_rvv(bottom_blob, top_blob_bordered, weight_data_tm, bias_data, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, activation_type, activation_params, opt);
225
        }
226
    }
227

228
    if (elempack == 1 && out_elempack == packn)
229
    {
230
        {
231
            deconvolution_pack1ton_rvv(bottom_blob, top_blob_bordered, weight_data_tm, bias_data, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, activation_type, activation_params, opt);
232
        }
233
    }
234

235
    if (elempack == packn && out_elempack == 1)
236
    {
237
        {
238
            deconvolution_packnto1_rvv(bottom_blob, top_blob_bordered, weight_data_tm, bias_data, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, activation_type, activation_params, opt);
239
        }
240
    }
241
#endif // __riscv_vector
242

243
    if (elempack == 1 && out_elempack == 1)
244
    {
245
        {
246
            // num_output
247
            #pragma omp parallel for num_threads(opt.num_threads)
248
            for (int p = 0; p < num_output; p++)
249
            {
250
                float* outptr = top_blob_bordered.channel(p);
251

252
                for (int i = 0; i < outh; i++)
253
                {
254
                    for (int j = 0; j < outw; j++)
255
                    {
256
                        float sum = 0.f;
257

258
                        if (bias_term)
259
                        {
260
                            sum = bias_data[p];
261
                        }
262

263
                        const float* kptr = (const float*)weight_data_tm.channel(p);
264

265
                        // channels
266
                        for (int q = 0; q < channels; q++)
267
                        {
268
                            const Mat m = bottom_blob.channel(q);
269

270
                            for (int y = 0; y < kernel_h; y++)
271
                            {
272
                                int sys = (i + y * dilation_h - (kernel_extent_h - 1));
273
                                if (sys < 0 || sys % stride_h != 0)
274
                                    continue;
275

276
                                int sy = sys / stride_h;
277
                                if (sy >= h)
278
                                    continue;
279

280
                                const float* sptr = m.row(sy);
281

282
                                for (int x = 0; x < kernel_w; x++)
283
                                {
284
                                    int sxs = (j + x * dilation_w - (kernel_extent_w - 1));
285
                                    if (sxs < 0 || sxs % stride_w != 0)
286
                                        continue;
287

288
                                    int sx = sxs / stride_w;
289
                                    if (sx >= w)
290
                                        continue;
291

292
                                    float val = sptr[sx];
293

294
                                    int k = y * kernel_w + x;
295

296
                                    float w = kptr[k];
297

298
                                    sum += val * w;
299
                                }
300
                            }
301

302
                            kptr += maxk;
303
                        }
304

305
                        sum = activation_ss(sum, activation_type, activation_params);
306

307
                        outptr[j] = sum;
308
                    }
309

310
                    outptr += outw;
311
                }
312
            }
313
        }
314
    }
315

316
    cut_padding(top_blob_bordered, top_blob, opt);
317
    if (top_blob.empty())
318
        return -100;
319

320
    return 0;
321
}
322

323
int Deconvolution_riscv::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
324
{
325
    const Mat& bottom_blob = bottom_blobs[0];
326
    const Mat& _weight_data = bottom_blobs[1];
327
    Mat& top_blob = top_blobs[0];
328

329
    const int _num_input = bottom_blob.c * bottom_blob.elempack;
330
    const int _kernel_w = _weight_data.w;
331
    const int _kernel_h = _weight_data.h;
332
    const int _num_output = _weight_data.d * 1;
333

334
    Mat weight_data_flattened;
335
    flatten(_weight_data, weight_data_flattened, opt);
336
    if (weight_data_flattened.empty())
337
        return -100;
338

339
#if NCNN_RVV
340
    if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && weight_data_flattened.elembits() == 16)
341
    {
342
        Mat weight_data_flattened_fp32;
343
        cast_float16_to_float32(weight_data_flattened, weight_data_flattened_fp32, opt);
344
        weight_data_flattened = weight_data_flattened_fp32;
345
    }
346
#endif // NCNN_RVV
347

348
    // weight_data_flattened as pack1
349
    weight_data_flattened.w *= weight_data_flattened.elempack;
350
    weight_data_flattened.elemsize /= weight_data_flattened.elempack;
351
    weight_data_flattened.elempack = 1;
352

353
    // transpose group-inch/group-outch/group-kh-kw to group-outch/group-inch/group-kh-kw
354
    Mat weight_data_transposed;
355
    {
356
        weight_data_transposed.create(_kernel_w * _kernel_h * _num_output * _num_input / 1, 4u, opt.workspace_allocator);
357
        if (weight_data_transposed.empty())
358
            return -100;
359

360
        const int outch_g = _num_output / 1;
361
        const int inch_g = _num_input / 1;
362
        const int maxk = _kernel_h * _kernel_w;
363

364
        for (int g = 0; g < 1; g++)
365
        {
366
            // reorder weight from inch-outch to outch-inch
367
            float* wg2 = (float*)weight_data_transposed + g * outch_g * inch_g * maxk;
368
            const float* wg = (const float*)weight_data_flattened + g * inch_g * outch_g * maxk;
369
            for (int i = 0; i < outch_g; i++)
370
            {
371
                for (int j = 0; j < inch_g; j++)
372
                {
373
                    for (int k = 0; k < maxk; k++)
374
                    {
375
                        wg2[(i * inch_g + j) * maxk + k] = wg[(j * outch_g + i) * maxk + k];
376
                    }
377
                }
378
            }
379
        }
380
    }
381

382
    Mat bias_data_flattened;
383
    if (bias_term)
384
    {
385
        const Mat& _bias_data = bottom_blobs[2];
386
        flatten(_bias_data, bias_data_flattened, opt);
387
        if (bias_data_flattened.empty())
388
            return -100;
389

390
#if NCNN_RVV
391
        if (opt.use_fp16_storage && cpu_support_riscv_v() && cpu_support_riscv_zfh() && bias_data_flattened.elembits() == 16)
392
        {
393
            Mat bias_data_flattened_fp32;
394
            cast_float16_to_float32(bias_data_flattened, bias_data_flattened_fp32, opt);
395
            bias_data_flattened = bias_data_flattened_fp32;
396
        }
397
#endif // NCNN_RVV
398

399
        // bias_data_flattened as pack1
400
        bias_data_flattened.w *= bias_data_flattened.elempack;
401
        bias_data_flattened.elemsize /= bias_data_flattened.elempack;
402
        bias_data_flattened.elempack = 1;
403
    }
404

405
    ncnn::Layer* op = ncnn::create_layer_cpu(ncnn::LayerType::Deconvolution);
406

407
    ncnn::ParamDict pd;
408
    pd.set(0, _num_output);
409
    pd.set(1, _kernel_w);
410
    pd.set(11, _kernel_h);
411
    pd.set(2, dilation_w);
412
    pd.set(12, dilation_h);
413
    pd.set(3, stride_w);
414
    pd.set(13, stride_h);
415
    pd.set(4, pad_left);
416
    pd.set(15, pad_right);
417
    pd.set(14, pad_top);
418
    pd.set(16, pad_bottom);
419
    pd.set(18, output_pad_right);
420
    pd.set(19, output_pad_bottom);
421
    pd.set(20, output_w);
422
    pd.set(21, output_h);
423
    pd.set(5, bias_term);
424
    pd.set(6, weight_data_transposed.w);
425
    pd.set(9, activation_type);
426
    pd.set(10, activation_params);
427

428
    op->load_param(pd);
429

430
    ncnn::Mat weights[2];
431
    weights[0] = weight_data_transposed;
432
    weights[1] = bias_data_flattened;
433

434
    op->load_model(ncnn::ModelBinFromMatArray(weights));
435

436
    op->create_pipeline(opt);
437

438
    op->forward(bottom_blob, top_blob, opt);
439

440
    op->destroy_pipeline(opt);
441

442
    delete op;
443

444
    return 0;
445
}
446

447
#if __riscv_vector && __riscv_zfh
448
int Deconvolution_riscv::create_pipeline_fp16s(const Option& opt)
449
{
450
    const int packn = csrr_vlenb() / 2;
451

452
    const int maxk = kernel_w * kernel_h;
453
    const int num_input = weight_data_size / maxk / num_output;
454

455
    int elempack = 1;
456
    int out_elempack = 1;
457

458
    if (opt.use_packing_layout)
459
    {
460
        elempack = num_input % packn == 0 ? packn : 1;
461
        out_elempack = num_output % packn == 0 ? packn : 1;
462
    }
463

464
    Mat weight_data_transposed(weight_data.w);
465
    {
466
        float* pt = weight_data_transposed;
467
        const float* p = weight_data;
468

469
        for (int i = 0; i < num_input * num_output; i++)
470
        {
471
            for (int k = 0; k < maxk; k++)
472
            {
473
                pt[maxk - 1 - k] = p[k];
474
            }
475

476
            p += maxk;
477
            pt += maxk;
478
        }
479
    }
480

481
    // src = kw-kh-inch-outch
482
    // dst = pb-pa-kw-kh-inch/pa-outch/pb
483
    {
484
        Mat weight_data_r2 = weight_data_transposed.reshape(maxk, num_input, num_output);
485

486
        weight_data_tm.create(maxk, num_input / elempack, num_output / out_elempack, (size_t)2u * elempack * out_elempack, elempack * out_elempack);
487

488
        for (int q = 0; q + (out_elempack - 1) < num_output; q += out_elempack)
489
        {
490
            __fp16* g00 = weight_data_tm.channel(q / out_elempack);
491

492
            for (int p = 0; p + (elempack - 1) < num_input; p += elempack)
493
            {
494
                for (int k = 0; k < maxk; k++)
495
                {
496
                    for (int i = 0; i < elempack; i++)
497
                    {
498
                        for (int j = 0; j < out_elempack; j++)
499
                        {
500
                            const float* k00 = weight_data_r2.channel(q + j).row(p + i);
501

502
                            g00[0] = (__fp16)k00[k];
503

504
                            g00++;
505
                        }
506
                    }
507
                }
508
            }
509
        }
510
    }
511

512
    // packn
513
    if (elempack == packn && out_elempack == packn)
514
    {
515
    }
516

517
    // pack1ton
518
    if (elempack == 1 && out_elempack == packn)
519
    {
520
    }
521

522
    // packnto1
523
    if (elempack == packn && out_elempack == 1)
524
    {
525
    }
526

527
    // pack1
528
    if (elempack == 1 && out_elempack == 1)
529
    {
530
    }
531

532
    ncnn::cast_float32_to_float16(bias_data, bias_data_fp16, opt);
533

534
    if (opt.lightmode)
535
        weight_data.release();
536

537
    return 0;
538
}
539

540
int Deconvolution_riscv::forward_fp16s(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
541
{
542
    const int packn = csrr_vlenb() / 2;
543

544
    // deconvolv with NxN kernel
545
    // value = value + bias
546

547
    int w = bottom_blob.w;
548
    int h = bottom_blob.h;
549
    int channels = bottom_blob.c;
550
    size_t elemsize = bottom_blob.elemsize;
551
    int elempack = bottom_blob.elempack;
552

553
    //     NCNN_LOGE("Deconvolution input %d x %d  pad = %d %d  ksize=%d %d  stride=%d %d", w, h, pad_w, pad_h, kernel_w, kernel_h, stride_w, stride_h);
554

555
    const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1;
556
    const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1;
557

558
    int outw = (w - 1) * stride_w + kernel_extent_w + output_pad_right;
559
    int outh = (h - 1) * stride_h + kernel_extent_h + output_pad_bottom;
560
    int out_elempack = (opt.use_packing_layout && num_output % packn == 0) ? packn : 1;
561
    size_t out_elemsize = elemsize / elempack * out_elempack;
562

563
    Mat top_blob_bordered;
564
    if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || (output_w > 0 && output_h > 0))
565
    {
566
        top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.workspace_allocator);
567
    }
568
    else
569
    {
570
        top_blob_bordered = top_blob;
571
        top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
572
    }
573
    if (top_blob_bordered.empty())
574
        return -100;
575

576
    if (elempack == packn && out_elempack == packn)
577
    {
578
        {
579
            deconvolution_packn_fp16s_rvv(bottom_blob, top_blob_bordered, weight_data_tm, bias_data, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, activation_type, activation_params, opt);
580
        }
581
    }
582

583
    if (elempack == 1 && out_elempack == packn)
584
    {
585
        {
586
            deconvolution_pack1ton_fp16s_rvv(bottom_blob, top_blob_bordered, weight_data_tm, bias_data, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, activation_type, activation_params, opt);
587
        }
588
    }
589

590
    if (elempack == packn && out_elempack == 1)
591
    {
592
        {
593
            deconvolution_packnto1_fp16s_rvv(bottom_blob, top_blob_bordered, weight_data_tm, bias_data, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, activation_type, activation_params, opt);
594
        }
595
    }
596

597
    if (elempack == 1 && out_elempack == 1)
598
    {
599
        {
600
            deconvolution_fp16s(bottom_blob, top_blob_bordered, weight_data_tm, bias_data, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, activation_type, activation_params, opt);
601
        }
602
    }
603

604
    cut_padding(top_blob_bordered, top_blob, opt);
605
    if (top_blob.empty())
606
        return -100;
607

608
    return 0;
609
}
610

611
int Deconvolution_riscv::forward_fp16sa(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
612
{
613
    const int packn = csrr_vlenb() / 2;
614

615
    // deconvolv with NxN kernel
616
    // value = value + bias
617

618
    int w = bottom_blob.w;
619
    int h = bottom_blob.h;
620
    int channels = bottom_blob.c;
621
    size_t elemsize = bottom_blob.elemsize;
622
    int elempack = bottom_blob.elempack;
623

624
    //     NCNN_LOGE("Deconvolution input %d x %d  pad = %d %d  ksize=%d %d  stride=%d %d", w, h, pad_w, pad_h, kernel_w, kernel_h, stride_w, stride_h);
625

626
    const int kernel_extent_w = dilation_w * (kernel_w - 1) + 1;
627
    const int kernel_extent_h = dilation_h * (kernel_h - 1) + 1;
628

629
    int outw = (w - 1) * stride_w + kernel_extent_w + output_pad_right;
630
    int outh = (h - 1) * stride_h + kernel_extent_h + output_pad_bottom;
631
    int out_elempack = (opt.use_packing_layout && num_output % packn == 0) ? packn : 1;
632
    size_t out_elemsize = elemsize / elempack * out_elempack;
633

634
    Mat top_blob_bordered;
635
    if (pad_left > 0 || pad_right > 0 || pad_top > 0 || pad_bottom > 0 || (output_w > 0 && output_h > 0))
636
    {
637
        top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.workspace_allocator);
638
    }
639
    else
640
    {
641
        top_blob_bordered = top_blob;
642
        top_blob_bordered.create(outw, outh, num_output / out_elempack, out_elemsize, out_elempack, opt.blob_allocator);
643
    }
644
    if (top_blob_bordered.empty())
645
        return -100;
646

647
    if (elempack == packn && out_elempack == packn)
648
    {
649
        {
650
            deconvolution_packn_fp16sa_rvv(bottom_blob, top_blob_bordered, weight_data_tm, bias_data_fp16, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, activation_type, activation_params, opt);
651
        }
652
    }
653

654
    if (elempack == 1 && out_elempack == packn)
655
    {
656
        {
657
            deconvolution_pack1ton_fp16sa_rvv(bottom_blob, top_blob_bordered, weight_data_tm, bias_data_fp16, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, activation_type, activation_params, opt);
658
        }
659
    }
660

661
    if (elempack == packn && out_elempack == 1)
662
    {
663
        {
664
            deconvolution_packnto1_fp16sa_rvv(bottom_blob, top_blob_bordered, weight_data_tm, bias_data_fp16, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, activation_type, activation_params, opt);
665
        }
666
    }
667

668
    if (elempack == 1 && out_elempack == 1)
669
    {
670
        {
671
            deconvolution_fp16s(bottom_blob, top_blob_bordered, weight_data_tm, bias_data, kernel_w, kernel_h, dilation_w, dilation_h, stride_w, stride_h, activation_type, activation_params, opt);
672
        }
673
    }
674

675
    cut_padding(top_blob_bordered, top_blob, opt);
676
    if (top_blob.empty())
677
        return -100;
678

679
    return 0;
680
}
681
#endif // __riscv_vector && __riscv_zfh
682

683
} // namespace ncnn
684
ncnn

Использование cookies