ncnn

gridsample.cpp
581 строка · 22.0 Кб
Перенос по словам
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2023 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// coord compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to coord writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#include "gridsample.h"
16

17
namespace ncnn {
18

19
GridSample::GridSample()
20
{
21
    one_blob_only = false;
22
    support_inplace = false;
23
}
24

25
int GridSample::load_param(const ParamDict& pd)
26
{
27
    sample_type = pd.get(0, 1);
28
    padding_mode = pd.get(1, 1);
29
    align_corner = pd.get(2, 0);
30
    permute_fusion = pd.get(3, 0);
31

32
    if (sample_type < 1 || sample_type > 3)
33
    {
34
        NCNN_LOGE("unsupported sample type %d", sample_type);
35
        return -1;
36
    }
37

38
    if (padding_mode < 1 || padding_mode > 3)
39
    {
40
        NCNN_LOGE("unsupported padding mode %d", padding_mode);
41
        return -1;
42
    }
43

44
    return 0;
45
}
46

47
// Restore normalized location to acutal image location
48
//   When align_corners is true:
49
//     Normalized location (-1, -1) points to the top-left pixel.
50
//     Normalized location (1, 1) points to the bottom-tight pixel.
51
//   When align_corners is false [default]:
52
//     Normalized location (-1, -1) points to the top-left pixel minus half
53
//     pixel coord both directions, i.e, (-0.5, -0.5) coord acutal image space.
54
//     Normalized location (1, 1) points to the bottom-tight pixel plus half
55
//     pixel coord both directions, i.e. (H - 0.5, W - 0.5) coord acutal image space.
56
static float grid_sample_unormalize(int w, float coordx, int align_corner)
57
{
58
    return align_corner ? (coordx + 1) / 2.f * (w - 1) : ((coordx + 1) * w - 1) / 2.f;
59
}
60

61
static float border_coord(float x, float border)
62
{
63
    return std::min(border, std::max(x, 0.0f));
64
}
65

66
static float reflect_coord(float x, int high)
67
{
68
    x = fabs(x);
69
    x = high - fabs(x - high);
70
    return x;
71
}
72

73
static float compute_coord(float sx, int w, int padding_mode, int align_corner)
74
{
75
    if (padding_mode == 2) // border
76
    {
77
        sx = border_coord(sx, w - 1);
78
    }
79
    else if (padding_mode == 3) // reflection
80
    {
81
        if (align_corner)
82
        {
83
            sx = reflect_coord(sx, w - 1);
84
        }
85
        else
86
        {
87
            sx = reflect_coord(sx + 0.5, w) - 0.5;
88
            sx = border_coord(sx, w - 1);
89
        }
90
    }
91

92
    return sx;
93
}
94

95
static bool in_bounds(const Mat& image, int x, int y)
96
{
97
    return x >= 0 && y >= 0 && x < image.w && y < image.h;
98
}
99

100
static bool in_bounds(const Mat& image, int x, int y, int z)
101
{
102
    return x >= 0 && y >= 0 && z >= 0 && x < image.w && y < image.h && z < image.c;
103
}
104

105
static float get_value_bounded(const Mat& image, int x, int y)
106
{
107
    return in_bounds(image, x, y) ? image.row(y)[x] : 0.f;
108
}
109

110
static float get_value_bounded(const Mat& image, int x, int y, int z)
111
{
112
    return in_bounds(image, x, y, z) ? image.depth(z).row(y)[x] : 0.f;
113
}
114

115
static float get_value_bounded(const Mat& image, int x, int y, int padding_mode, int align_corner)
116
{
117
    x = compute_coord(x, image.w, padding_mode, align_corner);
118
    y = compute_coord(y, image.h, padding_mode, align_corner);
119

120
    return get_value_bounded(image, x, y);
121
}
122

123
static inline void interpolate_cubic(float fx, float* coeffs)
124
{
125
    const float A = -0.75f;
126

127
    float fx0 = fx + 1;
128
    float fx1 = fx;
129
    float fx2 = 1 - fx;
130
    // float fx3 = 2 - fx;
131

132
    coeffs[0] = A * fx0 * fx0 * fx0 - 5 * A * fx0 * fx0 + 8 * A * fx0 - 4 * A;
133
    coeffs[1] = (A + 2) * fx1 * fx1 * fx1 - (A + 3) * fx1 * fx1 + 1;
134
    coeffs[2] = (A + 2) * fx2 * fx2 * fx2 - (A + 3) * fx2 * fx2 + 1;
135
    coeffs[3] = 1.f - coeffs[0] - coeffs[1] - coeffs[2];
136
}
137

138
int GridSample::forward(const std::vector<Mat>& bottom_blobs, std::vector<Mat>& top_blobs, const Option& opt) const
139
{
140
    const Mat& bottom_blob = bottom_blobs[0];
141
    const Mat& grid = bottom_blobs[1];
142
    Mat& top_blob = top_blobs[0];
143

144
    int w = bottom_blob.w;
145
    int h = bottom_blob.h;
146
    int d = bottom_blob.d;
147
    int channels = bottom_blob.c;
148
    int dims = bottom_blob.dims;
149
    size_t elemsize = bottom_blob.elemsize;
150

151
    if (dims == 3)
152
    {
153
        int outw = permute_fusion == 0 ? grid.h : grid.w;
154
        int outh = permute_fusion == 0 ? grid.c : grid.h;
155

156
        top_blob.create(outw, outh, channels, elemsize, opt.blob_allocator);
157

158
        Mat offset_blob;
159
        offset_blob.create(outw, outh, grid.c, elemsize, opt.workspace_allocator);
160

161
        if (top_blob.empty() || offset_blob.empty())
162
            return -100;
163

164
        //pre-calculate all interpolation offsets for each x y, unpack grid on-the-fly
165
        if (permute_fusion == 0)
166
        {
167
            float* offsetptr_x = offset_blob.channel(0);
168
            float* offsetptr_y = offset_blob.channel(1);
169

170
            for (int y = 0; y < outh; y++)
171
            {
172
                const float* gridptr = grid.channel(y);
173
                for (int x = 0; x < outw; x++)
174
                {
175
                    float sample_x = gridptr[0];
176
                    float sample_y = gridptr[1];
177

178
                    sample_x = grid_sample_unormalize(w, sample_x, align_corner);
179
                    sample_y = grid_sample_unormalize(h, sample_y, align_corner);
180

181
                    *offsetptr_x = sample_x;
182
                    *offsetptr_y = sample_y;
183

184
                    gridptr += 2;
185
                    offsetptr_x++;
186
                    offsetptr_y++;
187
                }
188
            }
189
        }
190
        else
191
        {
192
            const float* gridptr_x = grid.channel(0);
193
            const float* gridptr_y = grid.channel(1);
194
            float* offsetptr_x = offset_blob.channel(0);
195
            float* offsetptr_y = offset_blob.channel(1);
196

197
            for (int y = 0; y < outh; y++)
198
            {
199
                for (int x = 0; x < outw; x++)
200
                {
201
                    float sample_x = *gridptr_x;
202
                    float sample_y = *gridptr_y;
203

204
                    sample_x = grid_sample_unormalize(w, sample_x, align_corner);
205
                    sample_y = grid_sample_unormalize(h, sample_y, align_corner);
206

207
                    *offsetptr_x = sample_x;
208
                    *offsetptr_y = sample_y;
209

210
                    gridptr_x++;
211
                    gridptr_y++;
212
                    offsetptr_x++;
213
                    offsetptr_y++;
214
                }
215
            }
216
        }
217

218
        if (sample_type == Interpolation_BILINEAR) // bilinear
219
        {
220
            #pragma omp parallel for num_threads(opt.num_threads)
221
            for (int q = 0; q < channels; q++)
222
            {
223
                const Mat image = bottom_blob.channel(q);
224
                float* outptr = top_blob.channel(q);
225
                const float* offsetptr_x = offset_blob.channel(0);
226
                const float* offsetptr_y = offset_blob.channel(1);
227

228
                for (int y = 0; y < outh; y++)
229
                {
230
                    for (int x = 0; x < outw; x++)
231
                    {
232
                        float sample_x = *offsetptr_x;
233
                        float sample_y = *offsetptr_y;
234

235
                        // bilinear interpolate
236
                        float v;
237
                        {
238
                            sample_x = compute_coord(sample_x, w, padding_mode, align_corner);
239
                            sample_y = compute_coord(sample_y, h, padding_mode, align_corner);
240
                            int x0 = floor(sample_x);
241
                            int y0 = floor(sample_y);
242
                            int x1 = x0 + 1;
243
                            int y1 = y0 + 1;
244

245
                            float v00 = get_value_bounded(image, x0, y0);
246
                            float v01 = get_value_bounded(image, x1, y0);
247
                            float v10 = get_value_bounded(image, x0, y1);
248
                            float v11 = get_value_bounded(image, x1, y1);
249

250
                            float alpha = sample_x - x0;
251
                            float beta = sample_y - y0;
252

253
                            float v0 = v00 * (1 - alpha) + v01 * alpha;
254
                            float v1 = v10 * (1 - alpha) + v11 * alpha;
255

256
                            v = v0 * (1 - beta) + v1 * beta;
257
                        }
258

259
                        outptr[0] = v;
260
                        outptr += 1;
261

262
                        offsetptr_x++;
263
                        offsetptr_y++;
264
                    }
265
                }
266
            }
267
        }
268
        else if (sample_type == Interpolation_NEAREST) // nearest
269
        {
270
            #pragma omp parallel for num_threads(opt.num_threads)
271
            for (int q = 0; q < channels; q++)
272
            {
273
                const Mat image = bottom_blob.channel(q);
274
                float* outptr = top_blob.channel(q);
275
                const float* offsetptr_x = offset_blob.channel(0);
276
                const float* offsetptr_y = offset_blob.channel(1);
277

278
                for (int y = 0; y < outh; y++)
279
                {
280
                    for (int x = 0; x < outw; x++)
281
                    {
282
                        float sample_x = *offsetptr_x;
283
                        float sample_y = *offsetptr_y;
284
                        sample_x = compute_coord(sample_x, w, padding_mode, align_corner);
285
                        sample_y = compute_coord(sample_y, h, padding_mode, align_corner);
286

287
                        int x0 = static_cast<int>(floor(sample_x + 0.5f));
288
                        int y0 = static_cast<int>(floor(sample_y + 0.5f));
289

290
                        float v = get_value_bounded(image, x0, y0);
291

292
                        outptr[0] = v;
293
                        outptr += 1;
294

295
                        offsetptr_x++;
296
                        offsetptr_y++;
297
                    }
298
                }
299
            }
300
        }
301
        else if (sample_type == Interpolation_BICUBIC) // bicubic
302
        {
303
            #pragma omp parallel for num_threads(opt.num_threads)
304
            for (int q = 0; q < channels; q++)
305
            {
306
                const Mat image = bottom_blob.channel(q);
307
                float* outptr = top_blob.channel(q);
308
                const float* offsetptr_x = offset_blob.channel(0);
309
                const float* offsetptr_y = offset_blob.channel(1);
310

311
                for (int y = 0; y < outh; y++)
312
                {
313
                    for (int x = 0; x < outw; x++)
314
                    {
315
                        float sample_x = *offsetptr_x;
316
                        float sample_y = *offsetptr_y;
317

318
                        // bicubic interpolate
319
                        float v;
320
                        {
321
                            int x1 = (int)floorf(sample_x);
322
                            int y1 = (int)floorf(sample_y);
323
                            int x0 = x1 - 1;
324
                            int y0 = y1 - 1;
325
                            int x2 = x1 + 1;
326
                            int y2 = y1 + 1;
327
                            int x3 = x1 + 2;
328
                            int y3 = y1 + 2;
329

330
                            float v00 = get_value_bounded(image, x0, y0, padding_mode, align_corner);
331
                            float v01 = get_value_bounded(image, x1, y0, padding_mode, align_corner);
332
                            float v02 = get_value_bounded(image, x2, y0, padding_mode, align_corner);
333
                            float v03 = get_value_bounded(image, x3, y0, padding_mode, align_corner);
334
                            float v10 = get_value_bounded(image, x0, y1, padding_mode, align_corner);
335
                            float v11 = get_value_bounded(image, x1, y1, padding_mode, align_corner);
336
                            float v12 = get_value_bounded(image, x2, y1, padding_mode, align_corner);
337
                            float v13 = get_value_bounded(image, x3, y1, padding_mode, align_corner);
338
                            float v20 = get_value_bounded(image, x0, y2, padding_mode, align_corner);
339
                            float v21 = get_value_bounded(image, x1, y2, padding_mode, align_corner);
340
                            float v22 = get_value_bounded(image, x2, y2, padding_mode, align_corner);
341
                            float v23 = get_value_bounded(image, x3, y2, padding_mode, align_corner);
342
                            float v30 = get_value_bounded(image, x0, y3, padding_mode, align_corner);
343
                            float v31 = get_value_bounded(image, x1, y3, padding_mode, align_corner);
344
                            float v32 = get_value_bounded(image, x2, y3, padding_mode, align_corner);
345
                            float v33 = get_value_bounded(image, x3, y3, padding_mode, align_corner);
346

347
                            float x_coeffs[4];
348
                            float y_coeffs[4];
349
                            interpolate_cubic(sample_x - x1, x_coeffs);
350
                            interpolate_cubic(sample_y - y1, y_coeffs);
351

352
                            float v0 = v00 * x_coeffs[0] + v01 * x_coeffs[1] + v02 * x_coeffs[2] + v03 * x_coeffs[3];
353
                            float v1 = v10 * x_coeffs[0] + v11 * x_coeffs[1] + v12 * x_coeffs[2] + v13 * x_coeffs[3];
354
                            float v2 = v20 * x_coeffs[0] + v21 * x_coeffs[1] + v22 * x_coeffs[2] + v23 * x_coeffs[3];
355
                            float v3 = v30 * x_coeffs[0] + v31 * x_coeffs[1] + v32 * x_coeffs[2] + v33 * x_coeffs[3];
356

357
                            v = v0 * y_coeffs[0] + v1 * y_coeffs[1] + v2 * y_coeffs[2] + v3 * y_coeffs[3];
358
                        }
359

360
                        outptr[0] = v;
361
                        outptr += 1;
362

363
                        offsetptr_x++;
364
                        offsetptr_y++;
365
                    }
366
                }
367
            }
368
        }
369
    }
370

371
    if (dims == 4)
372
    {
373
        int outw = permute_fusion == 0 ? grid.h : grid.w;
374
        int outh = permute_fusion == 0 ? grid.d : grid.h;
375
        int outd = permute_fusion == 0 ? grid.c : grid.d;
376

377
        top_blob.create(outw, outh, outd, channels, elemsize, opt.blob_allocator);
378

379
        Mat offset_blob;
380
        offset_blob.create(outw, outh, outd, grid.c, elemsize, opt.workspace_allocator);
381

382
        if (top_blob.empty() || offset_blob.empty())
383
            return -100;
384

385
        //pre-calculate all interpolation offsets for each x y, unpack grid on-the-fly
386
        if (permute_fusion == 0)
387
        {
388
            float* offsetptr_x = offset_blob.channel(0);
389
            float* offsetptr_y = offset_blob.channel(1);
390
            float* offsetptr_z = offset_blob.channel(2);
391

392
            for (int z = 0; z < outd; z++)
393
            {
394
                const float* gridptr = grid.channel(z);
395
                for (int y = 0; y < outh; y++)
396
                {
397
                    for (int x = 0; x < outw; x++)
398
                    {
399
                        float sample_x = gridptr[0];
400
                        float sample_y = gridptr[1];
401
                        float sample_z = gridptr[2];
402

403
                        sample_x = grid_sample_unormalize(w, sample_x, align_corner);
404
                        sample_x = compute_coord(sample_x, w, padding_mode, align_corner);
405

406
                        sample_y = grid_sample_unormalize(h, sample_y, align_corner);
407
                        sample_y = compute_coord(sample_y, h, padding_mode, align_corner);
408

409
                        sample_z = grid_sample_unormalize(d, sample_z, align_corner);
410
                        sample_z = compute_coord(sample_z, d, padding_mode, align_corner);
411

412
                        *offsetptr_x = sample_x;
413
                        *offsetptr_y = sample_y;
414
                        *offsetptr_z = sample_z;
415

416
                        gridptr += 3;
417
                        offsetptr_x++;
418
                        offsetptr_y++;
419
                        offsetptr_z++;
420
                    }
421
                }
422
            }
423
        }
424
        else
425
        {
426
            const float* gridptr_x = grid.channel(0);
427
            const float* gridptr_y = grid.channel(1);
428
            const float* gridptr_z = grid.channel(2);
429
            float* offsetptr_x = offset_blob.channel(0);
430
            float* offsetptr_y = offset_blob.channel(1);
431
            float* offsetptr_z = offset_blob.channel(2);
432

433
            for (int z = 0; z < outd; z++)
434
            {
435
                for (int y = 0; y < outh; y++)
436
                {
437
                    for (int x = 0; x < outw; x++)
438
                    {
439
                        float sample_x = *gridptr_x;
440
                        float sample_y = *gridptr_y;
441
                        float sample_z = *gridptr_z;
442

443
                        sample_x = grid_sample_unormalize(w, sample_x, align_corner);
444
                        sample_x = compute_coord(sample_x, w, padding_mode, align_corner);
445

446
                        sample_y = grid_sample_unormalize(h, sample_y, align_corner);
447
                        sample_y = compute_coord(sample_y, h, padding_mode, align_corner);
448

449
                        sample_z = grid_sample_unormalize(d, sample_z, align_corner);
450
                        sample_z = compute_coord(sample_z, d, padding_mode, align_corner);
451

452
                        *offsetptr_x = sample_x;
453
                        *offsetptr_y = sample_y;
454
                        *offsetptr_z = sample_z;
455

456
                        gridptr_x++;
457
                        gridptr_y++;
458
                        gridptr_z++;
459
                        offsetptr_x++;
460
                        offsetptr_y++;
461
                        offsetptr_z++;
462
                    }
463
                }
464
            }
465
        }
466

467
        if (sample_type == Interpolation_BILINEAR) // bilinear
468
        {
469
            #pragma omp parallel for num_threads(opt.num_threads)
470
            for (int q = 0; q < channels; q++)
471
            {
472
                const Mat image = bottom_blob.channel(q);
473
                float* outptr = top_blob.channel(q);
474
                const float* offsetptr_x = offset_blob.channel(0);
475
                const float* offsetptr_y = offset_blob.channel(1);
476
                const float* offsetptr_z = offset_blob.channel(2);
477

478
                for (int z = 0; z < outd; z++)
479
                {
480
                    for (int y = 0; y < outh; y++)
481
                    {
482
                        for (int x = 0; x < outw; x++)
483
                        {
484
                            float sample_x = *offsetptr_x;
485
                            float sample_y = *offsetptr_y;
486
                            float sample_z = *offsetptr_z;
487

488
                            // bilinear interpolate
489
                            float v;
490
                            {
491
                                int x0 = (int)floor(sample_x);
492
                                int y0 = (int)floor(sample_y);
493
                                int z0 = (int)floor(sample_z);
494
                                int x1 = x0 + 1;
495
                                int y1 = y0 + 1;
496
                                int z1 = z0 + 1;
497

498
                                float v000 = get_value_bounded(image, x0, y0, z0);
499
                                float v001 = get_value_bounded(image, x1, y0, z0);
500
                                float v010 = get_value_bounded(image, x0, y1, z0);
501
                                float v011 = get_value_bounded(image, x1, y1, z0);
502
                                float v100 = get_value_bounded(image, x0, y0, z1);
503
                                float v101 = get_value_bounded(image, x1, y0, z1);
504
                                float v110 = get_value_bounded(image, x0, y1, z1);
505
                                float v111 = get_value_bounded(image, x1, y1, z1);
506

507
                                float alpha = sample_x - x0;
508
                                float beta = sample_y - y0;
509
                                float gamma = sample_z - z0;
510

511
                                float v00 = v000 * (1 - alpha) + v001 * alpha;
512
                                float v01 = v010 * (1 - alpha) + v011 * alpha;
513
                                float v10 = v100 * (1 - alpha) + v101 * alpha;
514
                                float v11 = v110 * (1 - alpha) + v111 * alpha;
515

516
                                float v0 = v00 * (1 - beta) + v01 * beta;
517
                                float v1 = v10 * (1 - beta) + v11 * beta;
518

519
                                v = v0 * (1 - gamma) + v1 * gamma;
520
                            }
521

522
                            outptr[0] = v;
523
                            outptr += 1;
524

525
                            offsetptr_x++;
526
                            offsetptr_y++;
527
                            offsetptr_z++;
528
                        }
529
                    }
530
                }
531
            }
532
        }
533
        else if (sample_type == Interpolation_NEAREST) // nearest
534
        {
535
            #pragma omp parallel for num_threads(opt.num_threads)
536
            for (int q = 0; q < channels; q++)
537
            {
538
                const Mat image = bottom_blob.channel(q);
539
                float* outptr = top_blob.channel(q);
540
                const float* offsetptr_x = offset_blob.channel(0);
541
                const float* offsetptr_y = offset_blob.channel(1);
542
                const float* offsetptr_z = offset_blob.channel(2);
543

544
                for (int z = 0; z < outd; z++)
545
                {
546
                    for (int y = 0; y < outh; y++)
547
                    {
548
                        for (int x = 0; x < outw; x++)
549
                        {
550
                            float sample_x = *offsetptr_x;
551
                            float sample_y = *offsetptr_y;
552
                            float sample_z = *offsetptr_z;
553

554
                            int x0 = static_cast<int>(floor(sample_x + 0.5f));
555
                            int y0 = static_cast<int>(floor(sample_y + 0.5f));
556
                            int z0 = static_cast<int>(floor(sample_z + 0.5f));
557

558
                            float v = get_value_bounded(image, x0, y0, z0);
559

560
                            outptr[0] = v;
561
                            outptr += 1;
562

563
                            offsetptr_x++;
564
                            offsetptr_y++;
565
                            offsetptr_z++;
566
                        }
567
                    }
568
                }
569
            }
570
        }
571
        else if (sample_type == 3)
572
        {
573
            NCNN_LOGE("unsupported bicubic when dims == 4");
574
            return -1;
575
        }
576
    }
577

578
    return 0;
579
}
580

581
} // namespace ncnn
582
ncnn

Использование cookies