ncnn

interp.cpp
673 строки · 18.7 Кб
Перенос по словам
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#include "interp.h"
16

17
namespace ncnn {
18

19
Interp::Interp()
20
{
21
    one_blob_only = true;
22
    support_inplace = false;
23
}
24

25
int Interp::load_param(const ParamDict& pd)
26
{
27
    resize_type = pd.get(0, 0);
28
    height_scale = pd.get(1, 1.f);
29
    width_scale = pd.get(2, 1.f);
30
    output_height = pd.get(3, 0);
31
    output_width = pd.get(4, 0);
32
    dynamic_target_size = pd.get(5, 0);
33
    align_corner = pd.get(6, 0);
34

35
    if (resize_type < 0 || resize_type > 3)
36
    {
37
        NCNN_LOGE("unsupported resize type %d", resize_type);
38
        return -1;
39
    }
40

41
    if (dynamic_target_size == 1)
42
    {
43
        one_blob_only = false;
44
    }
45

46
    return 0;
47
}
48

49
#if defined(__GNUC__) && defined(__powerpc__) && defined(__ALTIVEC__)
50
// NOTE gcc altivec optimized version produce wrong result
51
// so I have to disable vectorize here  --- nihui
52
__attribute__((optimize("no-tree-vectorize")))
53
#endif
54
static void
55
linear_coeffs(int w, int outw, int* xofs, float* alpha, int align_corner)
56
{
57
    double scale = (double)w / outw;
58
    if (align_corner)
59
    {
60
        scale = (double)(w - 1) / (outw - 1);
61
    }
62

63
    for (int dx = 0; dx < outw; dx++)
64
    {
65
        float fx = (float)((dx + 0.5) * scale - 0.5);
66
        if (align_corner)
67
        {
68
            fx = static_cast<float>(dx * scale);
69
        }
70

71
        int sx = static_cast<int>(floor(fx));
72
        fx -= sx;
73

74
        if (sx < 0)
75
        {
76
            sx = 0;
77
            fx = 0.f;
78
        }
79
        if (sx >= w - 1)
80
        {
81
            sx = w - 2;
82
            fx = 1.f;
83
        }
84

85
        xofs[dx] = sx;
86

87
        alpha[dx * 2] = 1.f - fx;
88
        alpha[dx * 2 + 1] = fx;
89
    }
90
}
91

92
static void resize_bilinear_image(const Mat& src, Mat& dst, float* alpha, int* xofs, float* beta, int* yofs)
93
{
94
    int w = dst.w;
95
    int h = dst.h;
96

97
    // loop body
98
    Mat rowsbuf0(w);
99
    Mat rowsbuf1(w);
100
    float* rows0 = rowsbuf0;
101
    float* rows1 = rowsbuf1;
102

103
    int prev_sy1 = -2;
104

105
    for (int dy = 0; dy < h; dy++)
106
    {
107
        int sy = yofs[dy];
108

109
        if (sy == prev_sy1)
110
        {
111
            // reuse all rows
112
        }
113
        else if (sy == prev_sy1 + 1)
114
        {
115
            // hresize one row
116
            float* rows0_old = rows0;
117
            rows0 = rows1;
118
            rows1 = rows0_old;
119
            const float* S1 = src.row(sy + 1);
120

121
            const float* alphap = alpha;
122
            float* rows1p = rows1;
123
            for (int dx = 0; dx < w; dx++)
124
            {
125
                int sx = xofs[dx];
126
                const float* S1p = S1 + sx;
127

128
                float a0 = alphap[0];
129
                float a1 = alphap[1];
130
                rows1p[dx] = S1p[0] * a0 + S1p[1] * a1;
131

132
                alphap += 2;
133
            }
134
        }
135
        else
136
        {
137
            // hresize two rows
138
            const float* S0 = src.row(sy);
139
            const float* S1 = src.row(sy + 1);
140

141
            const float* alphap = alpha;
142
            float* rows0p = rows0;
143
            float* rows1p = rows1;
144
            for (int dx = 0; dx < w; dx++)
145
            {
146
                int sx = xofs[dx];
147
                const float* S0p = S0 + sx;
148
                const float* S1p = S1 + sx;
149

150
                float a0 = alphap[0];
151
                float a1 = alphap[1];
152
                rows0p[dx] = S0p[0] * a0 + S0p[1] * a1;
153
                rows1p[dx] = S1p[0] * a0 + S1p[1] * a1;
154

155
                alphap += 2;
156
            }
157
        }
158

159
        prev_sy1 = sy;
160

161
        // vresize
162
        float b0 = beta[0];
163
        float b1 = beta[1];
164

165
        float* rows0p = rows0;
166
        float* rows1p = rows1;
167
        float* Dp = dst.row(dy);
168
        for (int dx = 0; dx < w; dx++)
169
        {
170
            //             D[x] = rows0[x]*b0 + rows1[x]*b1;
171
            *Dp++ = *rows0p++ * b0 + *rows1p++ * b1;
172
        }
173

174
        beta += 2;
175
    }
176
}
177

178
static inline void interpolate_cubic(float fx, float* coeffs)
179
{
180
    const float A = -0.75f;
181

182
    float fx0 = fx + 1;
183
    float fx1 = fx;
184
    float fx2 = 1 - fx;
185
    // float fx3 = 2 - fx;
186

187
    coeffs[0] = A * fx0 * fx0 * fx0 - 5 * A * fx0 * fx0 + 8 * A * fx0 - 4 * A;
188
    coeffs[1] = (A + 2) * fx1 * fx1 * fx1 - (A + 3) * fx1 * fx1 + 1;
189
    coeffs[2] = (A + 2) * fx2 * fx2 * fx2 - (A + 3) * fx2 * fx2 + 1;
190
    coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2];
191
}
192

193
static void cubic_coeffs(int w, int outw, int* xofs, float* alpha, int align_corner)
194
{
195
    double scale = (double)w / outw;
196
    if (align_corner)
197
    {
198
        scale = (double)(w - 1) / (outw - 1);
199
    }
200

201
    for (int dx = 0; dx < outw; dx++)
202
    {
203
        float fx = (float)((dx + 0.5) * scale - 0.5);
204
        if (align_corner)
205
        {
206
            fx = static_cast<float>(dx * scale);
207
        }
208

209
        int sx = static_cast<int>(floor(fx));
210
        fx -= sx;
211

212
        interpolate_cubic(fx, alpha + dx * 4);
213

214
        if (sx <= -1)
215
        {
216
            sx = 1;
217
            alpha[dx * 4 + 0] = 1.f - alpha[dx * 4 + 3];
218
            alpha[dx * 4 + 1] = alpha[dx * 4 + 3];
219
            alpha[dx * 4 + 2] = 0.f;
220
            alpha[dx * 4 + 3] = 0.f;
221
        }
222
        if (sx == 0)
223
        {
224
            sx = 1;
225
            alpha[dx * 4 + 0] = alpha[dx * 4 + 0] + alpha[dx * 4 + 1];
226
            alpha[dx * 4 + 1] = alpha[dx * 4 + 2];
227
            alpha[dx * 4 + 2] = alpha[dx * 4 + 3];
228
            alpha[dx * 4 + 3] = 0.f;
229
        }
230
        if (sx == w - 2)
231
        {
232
            sx = w - 3;
233
            alpha[dx * 4 + 3] = alpha[dx * 4 + 2] + alpha[dx * 4 + 3];
234
            alpha[dx * 4 + 2] = alpha[dx * 4 + 1];
235
            alpha[dx * 4 + 1] = alpha[dx * 4 + 0];
236
            alpha[dx * 4 + 0] = 0.f;
237
        }
238
        if (sx >= w - 1)
239
        {
240
            sx = w - 3;
241
            alpha[dx * 4 + 3] = 1.f - alpha[dx * 4 + 0];
242
            alpha[dx * 4 + 2] = alpha[dx * 4 + 0];
243
            alpha[dx * 4 + 1] = 0.f;
244
            alpha[dx * 4 + 0] = 0.f;
245
        }
246

247
        xofs[dx] = sx;
248
    }
249
}
250

251
static void resize_bicubic_image(const Mat& src, Mat& dst, float* alpha, int* xofs, float* beta, int* yofs)
252
{
253
    int w = dst.w;
254
    int h = dst.h;
255

256
    // loop body
257
    Mat rowsbuf0(w);
258
    Mat rowsbuf1(w);
259
    Mat rowsbuf2(w);
260
    Mat rowsbuf3(w);
261
    float* rows0 = rowsbuf0;
262
    float* rows1 = rowsbuf1;
263
    float* rows2 = rowsbuf2;
264
    float* rows3 = rowsbuf3;
265

266
    int prev_sy1 = -3;
267

268
    for (int dy = 0; dy < h; dy++)
269
    {
270
        int sy = yofs[dy];
271

272
        if (sy == prev_sy1)
273
        {
274
            // reuse all rows
275
        }
276
        else if (sy == prev_sy1 + 1)
277
        {
278
            // hresize one row
279
            float* rows0_old = rows0;
280
            rows0 = rows1;
281
            rows1 = rows2;
282
            rows2 = rows3;
283
            rows3 = rows0_old;
284
            const float* S3 = src.row(sy + 2);
285

286
            const float* alphap = alpha;
287
            float* rows3p = rows3;
288
            for (int dx = 0; dx < w; dx++)
289
            {
290
                int sx = xofs[dx];
291
                const float* S3p = S3 + sx;
292

293
                float a0 = alphap[0];
294
                float a1 = alphap[1];
295
                float a2 = alphap[2];
296
                float a3 = alphap[3];
297
                rows3p[dx] = S3p[-1] * a0 + S3p[0] * a1 + S3p[1] * a2 + S3p[2] * a3;
298

299
                alphap += 4;
300
            }
301
        }
302
        else if (sy == prev_sy1 + 2)
303
        {
304
            // hresize two rows
305
            float* rows0_old = rows0;
306
            float* rows1_old = rows1;
307
            rows0 = rows2;
308
            rows1 = rows3;
309
            rows2 = rows0_old;
310
            rows3 = rows1_old;
311
            const float* S2 = src.row(sy + 1);
312
            const float* S3 = src.row(sy + 2);
313

314
            const float* alphap = alpha;
315
            float* rows2p = rows2;
316
            float* rows3p = rows3;
317
            for (int dx = 0; dx < w; dx++)
318
            {
319
                int sx = xofs[dx];
320
                const float* S2p = S2 + sx;
321
                const float* S3p = S3 + sx;
322

323
                float a0 = alphap[0];
324
                float a1 = alphap[1];
325
                float a2 = alphap[2];
326
                float a3 = alphap[3];
327
                rows2p[dx] = S2p[-1] * a0 + S2p[0] * a1 + S2p[1] * a2 + S2p[2] * a3;
328
                rows3p[dx] = S3p[-1] * a0 + S3p[0] * a1 + S3p[1] * a2 + S3p[2] * a3;
329

330
                alphap += 4;
331
            }
332
        }
333
        else if (sy == prev_sy1 + 3)
334
        {
335
            // hresize three rows
336
            float* rows0_old = rows0;
337
            float* rows1_old = rows1;
338
            float* rows2_old = rows2;
339
            rows0 = rows3;
340
            rows1 = rows0_old;
341
            rows2 = rows1_old;
342
            rows3 = rows2_old;
343
            const float* S1 = src.row(sy);
344
            const float* S2 = src.row(sy + 1);
345
            const float* S3 = src.row(sy + 2);
346

347
            const float* alphap = alpha;
348
            float* rows1p = rows1;
349
            float* rows2p = rows2;
350
            float* rows3p = rows3;
351
            for (int dx = 0; dx < w; dx++)
352
            {
353
                int sx = xofs[dx];
354
                const float* S1p = S1 + sx;
355
                const float* S2p = S2 + sx;
356
                const float* S3p = S3 + sx;
357

358
                float a0 = alphap[0];
359
                float a1 = alphap[1];
360
                float a2 = alphap[2];
361
                float a3 = alphap[3];
362
                rows1p[dx] = S1p[-1] * a0 + S1p[0] * a1 + S1p[1] * a2 + S1p[2] * a3;
363
                rows2p[dx] = S2p[-1] * a0 + S2p[0] * a1 + S2p[1] * a2 + S2p[2] * a3;
364
                rows3p[dx] = S3p[-1] * a0 + S3p[0] * a1 + S3p[1] * a2 + S3p[2] * a3;
365

366
                alphap += 4;
367
            }
368
        }
369
        else
370
        {
371
            // hresize four rows
372
            const float* S0 = src.row(sy - 1);
373
            const float* S1 = src.row(sy);
374
            const float* S2 = src.row(sy + 1);
375
            const float* S3 = src.row(sy + 2);
376

377
            const float* alphap = alpha;
378
            float* rows0p = rows0;
379
            float* rows1p = rows1;
380
            float* rows2p = rows2;
381
            float* rows3p = rows3;
382
            for (int dx = 0; dx < w; dx++)
383
            {
384
                int sx = xofs[dx];
385
                const float* S0p = S0 + sx;
386
                const float* S1p = S1 + sx;
387
                const float* S2p = S2 + sx;
388
                const float* S3p = S3 + sx;
389

390
                float a0 = alphap[0];
391
                float a1 = alphap[1];
392
                float a2 = alphap[2];
393
                float a3 = alphap[3];
394
                rows0p[dx] = S0p[-1] * a0 + S0p[0] * a1 + S0p[1] * a2 + S0p[2] * a3;
395
                rows1p[dx] = S1p[-1] * a0 + S1p[0] * a1 + S1p[1] * a2 + S1p[2] * a3;
396
                rows2p[dx] = S2p[-1] * a0 + S2p[0] * a1 + S2p[1] * a2 + S2p[2] * a3;
397
                rows3p[dx] = S3p[-1] * a0 + S3p[0] * a1 + S3p[1] * a2 + S3p[2] * a3;
398

399
                alphap += 4;
400
            }
401
        }
402

403
        prev_sy1 = sy;
404

405
        // vresize
406
        float b0 = beta[0];
407
        float b1 = beta[1];
408
        float b2 = beta[2];
409
        float b3 = beta[3];
410

411
        float* rows0p = rows0;
412
        float* rows1p = rows1;
413
        float* rows2p = rows2;
414
        float* rows3p = rows3;
415
        float* Dp = dst.row(dy);
416
        for (int dx = 0; dx < w; dx++)
417
        {
418
            //             D[x] = rows0[x]*b0 + rows1[x]*b1 + rows2[x]*b2 + rows3[x]*b3;
419
            *Dp++ = *rows0p++ * b0 + *rows1p++ * b1 + *rows2p++ * b2 + *rows3p++ * b3;
420
        }
421

422
        beta += 4;
423
    }
424
}
425

426
int Interp::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
427
{
428
    int w = bottom_blob.w;
429
    int h = bottom_blob.h;
430

431
    int outw = output_width;
432
    int outh = output_height;
433
    if (bottom_blob.dims == 1)
434
    {
435
        w = 1;
436
        h = 1;
437
    }
438
    if (outw == 0 || outh == 0)
439
    {
440
        outw = static_cast<int>(w * width_scale);
441
        outh = static_cast<int>(h * height_scale);
442
    }
443

444
    Mat reference_blob;
445
    reference_blob.w = outw;
446
    reference_blob.h = outh;
447

448
    std::vector<Mat> bottom_blobs(2);
449
    bottom_blobs[0] = bottom_blob;
450
    bottom_blobs[1] = reference_blob;
451

452
    std::vector<Mat> top_blobs(1);
453

454
    int ret = forward(bottom_blobs, top_blobs, opt);
455

456
    top_blob = top_blobs[0];
457

458
    return ret;
459
}
460

461
int Interp::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
462
{
463
    const Mat& bottom_blob = bottom_blobs[0];
464
    const Mat& reference_blob = bottom_blobs[1];
465
    Mat& top_blob = top_blobs[0];
466

467
    int w = bottom_blob.w;
468
    int h = bottom_blob.h;
469
    int channels = bottom_blob.c;
470
    int dims = bottom_blob.dims;
471
    size_t elemsize = bottom_blob.elemsize;
472

473
    int outw = reference_blob.w;
474
    int outh = reference_blob.h;
475

476
    if (dims == 1)
477
    {
478
        // special case for 2d resize on flattened blob
479
        top_blob.create(outw, outh, w, elemsize, opt.blob_allocator);
480
        if (top_blob.empty())
481
            return -100;
482

483
        #pragma omp parallel for num_threads(opt.num_threads)
484
        for (int q = 0; q < w; q++)
485
        {
486
            Mat top_blob_c = top_blob.channel(q);
487
            const float v = bottom_blob[q];
488
            top_blob_c.fill(v);
489
        }
490

491
        return 0;
492
    }
493

494
    if (dims == 2)
495
    {
496
        if (outw == w)
497
        {
498
            top_blob = bottom_blob;
499
            return 0;
500
        }
501

502
        top_blob.create(outw, h, elemsize, opt.blob_allocator);
503
        if (top_blob.empty())
504
            return -100;
505

506
        if (resize_type == 1) // nearest
507
        {
508
            const float ws = output_width ? w / (float)outw : 1.f / width_scale;
509

510
            #pragma omp parallel for num_threads(opt.num_threads)
511
            for (int y = 0; y < h; y++)
512
            {
513
                const float* ptr = bottom_blob.row(y);
514
                float* outptr = top_blob.row(y);
515
                for (int x = 0; x < outw; x++)
516
                {
517
                    int in_x = std::min((int)(x * ws), (w - 1));
518
                    *outptr++ = ptr[in_x];
519
                }
520
            }
521
        }
522

523
        if (resize_type == 2) // bilinear
524
        {
525
            int* buf = new int[outw + outw * 2];
526

527
            int* xofs = buf;
528
            float* alpha = (float*)(buf + outw);
529

530
            linear_coeffs(w, outw, xofs, alpha, align_corner);
531

532
            #pragma omp parallel for num_threads(opt.num_threads)
533
            for (int y = 0; y < h; y++)
534
            {
535
                const float* ptr = bottom_blob.row(y);
536
                float* outptr = top_blob.row(y);
537
                const float* alphap = alpha;
538

539
                for (int x = 0; x < outw; x++)
540
                {
541
                    int sx = xofs[x];
542
                    const float* Sp = ptr + sx;
543
                    float a0 = alphap[0];
544
                    float a1 = alphap[1];
545
                    *outptr++ = Sp[0] * a0 + Sp[1] * a1;
546
                    alphap += 2;
547
                }
548
            }
549

550
            delete[] buf;
551
        }
552

553
        if (resize_type == 3) // bicubic
554
        {
555
            int* buf = new int[outw + outw * 4];
556

557
            int* xofs = buf;
558
            float* alpha = (float*)(buf + outw);
559

560
            cubic_coeffs(w, outw, xofs, alpha, align_corner);
561

562
            #pragma omp parallel for num_threads(opt.num_threads)
563
            for (int y = 0; y < h; y++)
564
            {
565
                const float* ptr = bottom_blob.row(y);
566
                float* outptr = top_blob.row(y);
567
                const float* alphap = alpha;
568

569
                for (int x = 0; x < outw; x++)
570
                {
571
                    int sx = xofs[x];
572
                    const float* Sp = ptr + sx;
573
                    float a0 = alphap[0];
574
                    float a1 = alphap[1];
575
                    float a2 = alphap[2];
576
                    float a3 = alphap[3];
577
                    *outptr++ = Sp[-1] * a0 + Sp[0] * a1 + Sp[1] * a2 + Sp[2] * a3;
578
                    alphap += 4;
579
                }
580
            }
581

582
            delete[] buf;
583
        }
584

585
        return 0;
586
    }
587

588
    if (outw == w && outh == h)
589
    {
590
        top_blob = bottom_blob;
591
        return 0;
592
    }
593

594
    top_blob.create(outw, outh, channels, elemsize, opt.blob_allocator);
595
    if (top_blob.empty())
596
        return -100;
597

598
    if (resize_type == 1) // nearest
599
    {
600
        const float hs = output_height ? h / (float)outh : 1.f / height_scale;
601
        const float ws = output_width ? w / (float)outw : 1.f / width_scale;
602

603
        #pragma omp parallel for num_threads(opt.num_threads)
604
        for (int q = 0; q < channels; q++)
605
        {
606
            const float* ptr = bottom_blob.channel(q);
607
            float* outptr = top_blob.channel(q);
608
            for (int y = 0; y < outh; y++)
609
            {
610
                int in_y = std::min((int)(y * hs), (h - 1));
611
                for (int x = 0; x < outw; x++)
612
                {
613
                    int in_x = std::min((int)(x * ws), (w - 1));
614
                    *outptr++ = ptr[in_y * w + in_x];
615
                }
616
            }
617
        }
618
    }
619

620
    if (resize_type == 2) // bilinear
621
    {
622
        int* buf = new int[outw + outh + outw * 2 + outh * 2];
623

624
        int* xofs = buf;        //new int[outw];
625
        int* yofs = buf + outw; //new int[outh];
626

627
        float* alpha = (float*)(buf + outw + outh);           //new float[outw * 2];
628
        float* beta = (float*)(buf + outw + outh + outw * 2); //new float[outh * 2];
629

630
        linear_coeffs(w, outw, xofs, alpha, align_corner);
631
        linear_coeffs(h, outh, yofs, beta, align_corner);
632

633
        #pragma omp parallel for num_threads(opt.num_threads)
634
        for (int q = 0; q < channels; ++q)
635
        {
636
            const Mat src = bottom_blob.channel(q);
637
            Mat dst = top_blob.channel(q);
638

639
            resize_bilinear_image(src, dst, alpha, xofs, beta, yofs);
640
        }
641

642
        delete[] buf;
643
    }
644

645
    if (resize_type == 3) // bicubic
646
    {
647
        int* buf = new int[outw + outh + outw * 4 + outh * 4];
648

649
        int* xofs = buf;        //new int[outw];
650
        int* yofs = buf + outw; //new int[outh];
651

652
        float* alpha = (float*)(buf + outw + outh);           //new float[outw * 4];
653
        float* beta = (float*)(buf + outw + outh + outw * 4); //new float[outh * 4];
654

655
        cubic_coeffs(w, outw, xofs, alpha, align_corner);
656
        cubic_coeffs(h, outh, yofs, beta, align_corner);
657

658
        #pragma omp parallel for num_threads(opt.num_threads)
659
        for (int q = 0; q < channels; q++)
660
        {
661
            const Mat src = bottom_blob.channel(q);
662
            Mat dst = top_blob.channel(q);
663

664
            resize_bicubic_image(src, dst, alpha, xofs, beta, yofs);
665
        }
666

667
        delete[] buf;
668
    }
669

670
    return 0;
671
}
672

673
} // namespace ncnn
674
ncnn

Использование cookies