1
// Tencent is pleased to support the open source community by making ncnn available.
3
// Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
8
// https://opensource.org/licenses/BSD-3-Clause
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
22
support_inplace = false;
25
int Interp::load_param(const ParamDict& pd)
27
resize_type = pd.get(0, 0);
28
height_scale = pd.get(1, 1.f);
29
width_scale = pd.get(2, 1.f);
30
output_height = pd.get(3, 0);
31
output_width = pd.get(4, 0);
32
dynamic_target_size = pd.get(5, 0);
33
align_corner = pd.get(6, 0);
35
if (resize_type < 0 || resize_type > 3)
37
NCNN_LOGE("unsupported resize type %d", resize_type);
41
if (dynamic_target_size == 1)
43
one_blob_only = false;
49
#if defined(__GNUC__) && defined(__powerpc__) && defined(__ALTIVEC__)
50
// NOTE gcc altivec optimized version produce wrong result
51
// so I have to disable vectorize here --- nihui
52
__attribute__((optimize("no-tree-vectorize")))
55
linear_coeffs(int w, int outw, int* xofs, float* alpha, int align_corner)
57
double scale = (double)w / outw;
60
scale = (double)(w - 1) / (outw - 1);
63
for (int dx = 0; dx < outw; dx++)
65
float fx = (float)((dx + 0.5) * scale - 0.5);
68
fx = static_cast<float>(dx * scale);
71
int sx = static_cast<int>(floor(fx));
87
alpha[dx * 2] = 1.f - fx;
88
alpha[dx * 2 + 1] = fx;
92
static void resize_bilinear_image(const Mat& src, Mat& dst, float* alpha, int* xofs, float* beta, int* yofs)
100
float* rows0 = rowsbuf0;
101
float* rows1 = rowsbuf1;
105
for (int dy = 0; dy < h; dy++)
113
else if (sy == prev_sy1 + 1)
116
float* rows0_old = rows0;
119
const float* S1 = src.row(sy + 1);
121
const float* alphap = alpha;
122
float* rows1p = rows1;
123
for (int dx = 0; dx < w; dx++)
126
const float* S1p = S1 + sx;
128
float a0 = alphap[0];
129
float a1 = alphap[1];
130
rows1p[dx] = S1p[0] * a0 + S1p[1] * a1;
138
const float* S0 = src.row(sy);
139
const float* S1 = src.row(sy + 1);
141
const float* alphap = alpha;
142
float* rows0p = rows0;
143
float* rows1p = rows1;
144
for (int dx = 0; dx < w; dx++)
147
const float* S0p = S0 + sx;
148
const float* S1p = S1 + sx;
150
float a0 = alphap[0];
151
float a1 = alphap[1];
152
rows0p[dx] = S0p[0] * a0 + S0p[1] * a1;
153
rows1p[dx] = S1p[0] * a0 + S1p[1] * a1;
165
float* rows0p = rows0;
166
float* rows1p = rows1;
167
float* Dp = dst.row(dy);
168
for (int dx = 0; dx < w; dx++)
170
// D[x] = rows0[x]*b0 + rows1[x]*b1;
171
*Dp++ = *rows0p++ * b0 + *rows1p++ * b1;
178
static inline void interpolate_cubic(float fx, float* coeffs)
180
const float A = -0.75f;
185
// float fx3 = 2 - fx;
187
coeffs[0] = A * fx0 * fx0 * fx0 - 5 * A * fx0 * fx0 + 8 * A * fx0 - 4 * A;
188
coeffs[1] = (A + 2) * fx1 * fx1 * fx1 - (A + 3) * fx1 * fx1 + 1;
189
coeffs[2] = (A + 2) * fx2 * fx2 * fx2 - (A + 3) * fx2 * fx2 + 1;
190
coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2];
193
static void cubic_coeffs(int w, int outw, int* xofs, float* alpha, int align_corner)
195
double scale = (double)w / outw;
198
scale = (double)(w - 1) / (outw - 1);
201
for (int dx = 0; dx < outw; dx++)
203
float fx = (float)((dx + 0.5) * scale - 0.5);
206
fx = static_cast<float>(dx * scale);
209
int sx = static_cast<int>(floor(fx));
212
interpolate_cubic(fx, alpha + dx * 4);
217
alpha[dx * 4 + 0] = 1.f - alpha[dx * 4 + 3];
218
alpha[dx * 4 + 1] = alpha[dx * 4 + 3];
219
alpha[dx * 4 + 2] = 0.f;
220
alpha[dx * 4 + 3] = 0.f;
225
alpha[dx * 4 + 0] = alpha[dx * 4 + 0] + alpha[dx * 4 + 1];
226
alpha[dx * 4 + 1] = alpha[dx * 4 + 2];
227
alpha[dx * 4 + 2] = alpha[dx * 4 + 3];
228
alpha[dx * 4 + 3] = 0.f;
233
alpha[dx * 4 + 3] = alpha[dx * 4 + 2] + alpha[dx * 4 + 3];
234
alpha[dx * 4 + 2] = alpha[dx * 4 + 1];
235
alpha[dx * 4 + 1] = alpha[dx * 4 + 0];
236
alpha[dx * 4 + 0] = 0.f;
241
alpha[dx * 4 + 3] = 1.f - alpha[dx * 4 + 0];
242
alpha[dx * 4 + 2] = alpha[dx * 4 + 0];
243
alpha[dx * 4 + 1] = 0.f;
244
alpha[dx * 4 + 0] = 0.f;
251
static void resize_bicubic_image(const Mat& src, Mat& dst, float* alpha, int* xofs, float* beta, int* yofs)
261
float* rows0 = rowsbuf0;
262
float* rows1 = rowsbuf1;
263
float* rows2 = rowsbuf2;
264
float* rows3 = rowsbuf3;
268
for (int dy = 0; dy < h; dy++)
276
else if (sy == prev_sy1 + 1)
279
float* rows0_old = rows0;
284
const float* S3 = src.row(sy + 2);
286
const float* alphap = alpha;
287
float* rows3p = rows3;
288
for (int dx = 0; dx < w; dx++)
291
const float* S3p = S3 + sx;
293
float a0 = alphap[0];
294
float a1 = alphap[1];
295
float a2 = alphap[2];
296
float a3 = alphap[3];
297
rows3p[dx] = S3p[-1] * a0 + S3p[0] * a1 + S3p[1] * a2 + S3p[2] * a3;
302
else if (sy == prev_sy1 + 2)
305
float* rows0_old = rows0;
306
float* rows1_old = rows1;
311
const float* S2 = src.row(sy + 1);
312
const float* S3 = src.row(sy + 2);
314
const float* alphap = alpha;
315
float* rows2p = rows2;
316
float* rows3p = rows3;
317
for (int dx = 0; dx < w; dx++)
320
const float* S2p = S2 + sx;
321
const float* S3p = S3 + sx;
323
float a0 = alphap[0];
324
float a1 = alphap[1];
325
float a2 = alphap[2];
326
float a3 = alphap[3];
327
rows2p[dx] = S2p[-1] * a0 + S2p[0] * a1 + S2p[1] * a2 + S2p[2] * a3;
328
rows3p[dx] = S3p[-1] * a0 + S3p[0] * a1 + S3p[1] * a2 + S3p[2] * a3;
333
else if (sy == prev_sy1 + 3)
335
// hresize three rows
336
float* rows0_old = rows0;
337
float* rows1_old = rows1;
338
float* rows2_old = rows2;
343
const float* S1 = src.row(sy);
344
const float* S2 = src.row(sy + 1);
345
const float* S3 = src.row(sy + 2);
347
const float* alphap = alpha;
348
float* rows1p = rows1;
349
float* rows2p = rows2;
350
float* rows3p = rows3;
351
for (int dx = 0; dx < w; dx++)
354
const float* S1p = S1 + sx;
355
const float* S2p = S2 + sx;
356
const float* S3p = S3 + sx;
358
float a0 = alphap[0];
359
float a1 = alphap[1];
360
float a2 = alphap[2];
361
float a3 = alphap[3];
362
rows1p[dx] = S1p[-1] * a0 + S1p[0] * a1 + S1p[1] * a2 + S1p[2] * a3;
363
rows2p[dx] = S2p[-1] * a0 + S2p[0] * a1 + S2p[1] * a2 + S2p[2] * a3;
364
rows3p[dx] = S3p[-1] * a0 + S3p[0] * a1 + S3p[1] * a2 + S3p[2] * a3;
372
const float* S0 = src.row(sy - 1);
373
const float* S1 = src.row(sy);
374
const float* S2 = src.row(sy + 1);
375
const float* S3 = src.row(sy + 2);
377
const float* alphap = alpha;
378
float* rows0p = rows0;
379
float* rows1p = rows1;
380
float* rows2p = rows2;
381
float* rows3p = rows3;
382
for (int dx = 0; dx < w; dx++)
385
const float* S0p = S0 + sx;
386
const float* S1p = S1 + sx;
387
const float* S2p = S2 + sx;
388
const float* S3p = S3 + sx;
390
float a0 = alphap[0];
391
float a1 = alphap[1];
392
float a2 = alphap[2];
393
float a3 = alphap[3];
394
rows0p[dx] = S0p[-1] * a0 + S0p[0] * a1 + S0p[1] * a2 + S0p[2] * a3;
395
rows1p[dx] = S1p[-1] * a0 + S1p[0] * a1 + S1p[1] * a2 + S1p[2] * a3;
396
rows2p[dx] = S2p[-1] * a0 + S2p[0] * a1 + S2p[1] * a2 + S2p[2] * a3;
397
rows3p[dx] = S3p[-1] * a0 + S3p[0] * a1 + S3p[1] * a2 + S3p[2] * a3;
411
float* rows0p = rows0;
412
float* rows1p = rows1;
413
float* rows2p = rows2;
414
float* rows3p = rows3;
415
float* Dp = dst.row(dy);
416
for (int dx = 0; dx < w; dx++)
418
// D[x] = rows0[x]*b0 + rows1[x]*b1 + rows2[x]*b2 + rows3[x]*b3;
419
*Dp++ = *rows0p++ * b0 + *rows1p++ * b1 + *rows2p++ * b2 + *rows3p++ * b3;
426
int Interp::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
428
int w = bottom_blob.w;
429
int h = bottom_blob.h;
431
int outw = output_width;
432
int outh = output_height;
433
if (bottom_blob.dims == 1)
438
if (outw == 0 || outh == 0)
440
outw = static_cast<int>(w * width_scale);
441
outh = static_cast<int>(h * height_scale);
445
reference_blob.w = outw;
446
reference_blob.h = outh;
448
std::vector<Mat> bottom_blobs(2);
449
bottom_blobs[0] = bottom_blob;
450
bottom_blobs[1] = reference_blob;
452
std::vector<Mat> top_blobs(1);
454
int ret = forward(bottom_blobs, top_blobs, opt);
456
top_blob = top_blobs[0];
461
int Interp::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
463
const Mat& bottom_blob = bottom_blobs[0];
464
const Mat& reference_blob = bottom_blobs[1];
465
Mat& top_blob = top_blobs[0];
467
int w = bottom_blob.w;
468
int h = bottom_blob.h;
469
int channels = bottom_blob.c;
470
int dims = bottom_blob.dims;
471
size_t elemsize = bottom_blob.elemsize;
473
int outw = reference_blob.w;
474
int outh = reference_blob.h;
478
// special case for 2d resize on flattened blob
479
top_blob.create(outw, outh, w, elemsize, opt.blob_allocator);
480
if (top_blob.empty())
483
#pragma omp parallel for num_threads(opt.num_threads)
484
for (int q = 0; q < w; q++)
486
Mat top_blob_c = top_blob.channel(q);
487
const float v = bottom_blob[q];
498
top_blob = bottom_blob;
502
top_blob.create(outw, h, elemsize, opt.blob_allocator);
503
if (top_blob.empty())
506
if (resize_type == 1) // nearest
508
const float ws = output_width ? w / (float)outw : 1.f / width_scale;
510
#pragma omp parallel for num_threads(opt.num_threads)
511
for (int y = 0; y < h; y++)
513
const float* ptr = bottom_blob.row(y);
514
float* outptr = top_blob.row(y);
515
for (int x = 0; x < outw; x++)
517
int in_x = std::min((int)(x * ws), (w - 1));
518
*outptr++ = ptr[in_x];
523
if (resize_type == 2) // bilinear
525
int* buf = new int[outw + outw * 2];
528
float* alpha = (float*)(buf + outw);
530
linear_coeffs(w, outw, xofs, alpha, align_corner);
532
#pragma omp parallel for num_threads(opt.num_threads)
533
for (int y = 0; y < h; y++)
535
const float* ptr = bottom_blob.row(y);
536
float* outptr = top_blob.row(y);
537
const float* alphap = alpha;
539
for (int x = 0; x < outw; x++)
542
const float* Sp = ptr + sx;
543
float a0 = alphap[0];
544
float a1 = alphap[1];
545
*outptr++ = Sp[0] * a0 + Sp[1] * a1;
553
if (resize_type == 3) // bicubic
555
int* buf = new int[outw + outw * 4];
558
float* alpha = (float*)(buf + outw);
560
cubic_coeffs(w, outw, xofs, alpha, align_corner);
562
#pragma omp parallel for num_threads(opt.num_threads)
563
for (int y = 0; y < h; y++)
565
const float* ptr = bottom_blob.row(y);
566
float* outptr = top_blob.row(y);
567
const float* alphap = alpha;
569
for (int x = 0; x < outw; x++)
572
const float* Sp = ptr + sx;
573
float a0 = alphap[0];
574
float a1 = alphap[1];
575
float a2 = alphap[2];
576
float a3 = alphap[3];
577
*outptr++ = Sp[-1] * a0 + Sp[0] * a1 + Sp[1] * a2 + Sp[2] * a3;
588
if (outw == w && outh == h)
590
top_blob = bottom_blob;
594
top_blob.create(outw, outh, channels, elemsize, opt.blob_allocator);
595
if (top_blob.empty())
598
if (resize_type == 1) // nearest
600
const float hs = output_height ? h / (float)outh : 1.f / height_scale;
601
const float ws = output_width ? w / (float)outw : 1.f / width_scale;
603
#pragma omp parallel for num_threads(opt.num_threads)
604
for (int q = 0; q < channels; q++)
606
const float* ptr = bottom_blob.channel(q);
607
float* outptr = top_blob.channel(q);
608
for (int y = 0; y < outh; y++)
610
int in_y = std::min((int)(y * hs), (h - 1));
611
for (int x = 0; x < outw; x++)
613
int in_x = std::min((int)(x * ws), (w - 1));
614
*outptr++ = ptr[in_y * w + in_x];
620
if (resize_type == 2) // bilinear
622
int* buf = new int[outw + outh + outw * 2 + outh * 2];
624
int* xofs = buf; //new int[outw];
625
int* yofs = buf + outw; //new int[outh];
627
float* alpha = (float*)(buf + outw + outh); //new float[outw * 2];
628
float* beta = (float*)(buf + outw + outh + outw * 2); //new float[outh * 2];
630
linear_coeffs(w, outw, xofs, alpha, align_corner);
631
linear_coeffs(h, outh, yofs, beta, align_corner);
633
#pragma omp parallel for num_threads(opt.num_threads)
634
for (int q = 0; q < channels; ++q)
636
const Mat src = bottom_blob.channel(q);
637
Mat dst = top_blob.channel(q);
639
resize_bilinear_image(src, dst, alpha, xofs, beta, yofs);
645
if (resize_type == 3) // bicubic
647
int* buf = new int[outw + outh + outw * 4 + outh * 4];
649
int* xofs = buf; //new int[outw];
650
int* yofs = buf + outw; //new int[outh];
652
float* alpha = (float*)(buf + outw + outh); //new float[outw * 4];
653
float* beta = (float*)(buf + outw + outh + outw * 4); //new float[outh * 4];
655
cubic_coeffs(w, outw, xofs, alpha, align_corner);
656
cubic_coeffs(h, outh, yofs, beta, align_corner);
658
#pragma omp parallel for num_threads(opt.num_threads)
659
for (int q = 0; q < channels; q++)
661
const Mat src = bottom_blob.channel(q);
662
Mat dst = top_blob.channel(q);
664
resize_bicubic_image(src, dst, alpha, xofs, beta, yofs);