ncnn

pooling3d.cpp
490 строк · 16.3 Кб
Перенос по словам
1
// Tencent is pleased to support the open source community by making ncnn available.
2
//
3
// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
4
//
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
7
//
8
// https://opensource.org/licenses/BSD-3-Clause
9
//
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
14

15
#include "pooling3d.h"
16

17
#include <float.h>
18

19
namespace ncnn {
20

21
Pooling3D::Pooling3D()
22
{
23
    one_blob_only = true;
24
    support_inplace = false;
25
}
26

27
int Pooling3D::load_param(const ParamDict& pd)
28
{
29
    pooling_type = pd.get(0, 0);
30
    kernel_w = pd.get(1, 0);
31
    kernel_h = pd.get(11, kernel_w);
32
    kernel_d = pd.get(21, kernel_w);
33
    stride_w = pd.get(2, 1);
34
    stride_h = pd.get(12, stride_w);
35
    stride_d = pd.get(22, stride_w);
36
    pad_left = pd.get(3, 0);
37
    pad_right = pd.get(14, pad_left);
38
    pad_top = pd.get(13, pad_left);
39
    pad_bottom = pd.get(15, pad_top);
40
    pad_front = pd.get(23, pad_left);
41
    pad_behind = pd.get(16, pad_front);
42
    global_pooling = pd.get(4, 0);
43
    pad_mode = pd.get(5, 0);
44
    avgpool_count_include_pad = pd.get(6, 0);
45
    adaptive_pooling = pd.get(7, 0);
46
    out_w = pd.get(8, 0);
47
    out_h = pd.get(18, out_w);
48
    out_d = pd.get(28, out_w);
49

50
    return 0;
51
}
52

53
int Pooling3D::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
54
{
55
    // max value in NxN window
56
    // avg value in NxN window
57

58
    int w = bottom_blob.w;
59
    int h = bottom_blob.h;
60
    int d = bottom_blob.d;
61
    int channels = bottom_blob.c;
62
    size_t elemsize = bottom_blob.elemsize;
63

64
    //     NCNN_LOGE("Pooling     input %d x %d  pad = %d %d %d %d  ksize=%d %d  stride=%d %d", w, h, pad_left, pad_right, pad_top, pad_bottom, kernel_w, kernel_h, stride_w, stride_h);
65
    if (global_pooling)
66
    {
67
        top_blob.create(channels, elemsize, opt.blob_allocator);
68
        if (top_blob.empty())
69
            return -100;
70

71
        int size = w * h * d;
72

73
        if (pooling_type == PoolMethod_MAX)
74
        {
75
            #pragma omp parallel for num_threads(opt.num_threads)
76
            for (int q = 0; q < channels; q++)
77
            {
78
                const float* ptr = bottom_blob.channel(q);
79

80
                float max_value = ptr[0];
81
                for (int i = 0; i < size; i++)
82
                {
83
                    max_value = std::max(max_value, ptr[i]);
84
                }
85

86
                top_blob[q] = max_value;
87
            }
88
        }
89
        else if (pooling_type == PoolMethod_AVE)
90
        {
91
            #pragma omp parallel for num_threads(opt.num_threads)
92
            for (int q = 0; q < channels; q++)
93
            {
94
                const float* ptr = bottom_blob.channel(q);
95

96
                float sum = 0.f;
97
                for (int i = 0; i < size; i++)
98
                {
99
                    sum += ptr[i];
100
                }
101

102
                top_blob[q] = sum / size;
103
            }
104
        }
105

106
        return 0;
107
    }
108

109
    if (adaptive_pooling)
110
    {
111
        int _out_w = out_w == -233 ? w : out_w;
112
        int _out_h = out_h == -233 ? h : out_h;
113
        int _out_d = out_d == -233 ? d : out_d;
114

115
        if (_out_w == w && _out_h == h && _out_d == d)
116
        {
117
            top_blob = bottom_blob;
118
            return 0;
119
        }
120

121
        top_blob.create(_out_w, _out_h, _out_d, channels, elemsize, opt.blob_allocator);
122
        if (top_blob.empty())
123
            return -100;
124

125
        if (pooling_type == PoolMethod_MAX)
126
        {
127
            #pragma omp parallel for num_threads(opt.num_threads)
128
            for (int q = 0; q < channels; q++)
129
            {
130
                const float* inptr = bottom_blob.channel(q);
131
                float* outptr = top_blob.channel(q);
132

133
                for (int z = 0; z < _out_d; z++)
134
                {
135
                    // floor div
136
                    const int id0 = d * z / _out_d;
137
                    // ceil div
138
                    const int id1 = (d * (z + 1) + _out_d - 1) / _out_d;
139
                    for (int i = 0; i < _out_h; i++)
140
                    {
141
                        // floor div
142
                        const int ih0 = h * i / _out_h;
143
                        // ceil div
144
                        const int ih1 = (h * (i + 1) + _out_h - 1) / _out_h;
145
                        for (int j = 0; j < _out_w; j++)
146
                        {
147
                            // floor div
148
                            const int iw0 = w * j / _out_w;
149
                            // ceil div
150
                            const int iw1 = (w * (j + 1) + _out_w - 1) / _out_w;
151

152
                            float max_value = inptr[id0 * w * h + ih0 * w + iw0];
153

154
                            for (int id = id0; id < id1; id++)
155
                            {
156
                                for (int ih = ih0; ih < ih1; ih++)
157
                                {
158
                                    for (int iw = iw0; iw < iw1; iw++)
159
                                    {
160
                                        max_value = std::max(max_value, inptr[id * w * h + ih * w + iw]);
161
                                    }
162
                                }
163
                            }
164

165
                            outptr[j] = max_value;
166
                        }
167

168
                        outptr += _out_w;
169
                    }
170
                }
171
            }
172
        }
173
        else if (pooling_type == PoolMethod_AVE)
174
        {
175
            #pragma omp parallel for num_threads(opt.num_threads)
176
            for (int q = 0; q < channels; q++)
177
            {
178
                const float* inptr = bottom_blob.channel(q);
179
                float* outptr = top_blob.channel(q);
180

181
                for (int z = 0; z < _out_d; z++)
182
                {
183
                    // floor div
184
                    const int id0 = d * z / _out_d;
185
                    // ceil div
186
                    const int id1 = (d * (z + 1) + _out_d - 1) / _out_d;
187
                    int dk = id1 - id0;
188
                    for (int i = 0; i < _out_h; i++)
189
                    {
190
                        // floor div
191
                        const int ih0 = h * i / _out_h;
192
                        // ceil div
193
                        const int ih1 = (h * (i + 1) + _out_h - 1) / _out_h;
194
                        int hk = ih1 - ih0;
195
                        for (int j = 0; j < _out_w; j++)
196
                        {
197
                            // floor div
198
                            const int iw0 = w * j / _out_w;
199
                            // ceil div
200
                            const int iw1 = (w * (j + 1) + _out_w - 1) / _out_w;
201
                            int wk = iw1 - iw0;
202

203
                            float sum = 0;
204
                            for (int id = id0; id < id1; id++)
205
                            {
206
                                for (int ih = ih0; ih < ih1; ih++)
207
                                {
208
                                    for (int iw = iw0; iw < iw1; iw++)
209
                                    {
210
                                        sum += inptr[id * w * h + ih * w + iw];
211
                                    }
212
                                }
213
                            }
214

215
                            outptr[j] = sum / hk / wk / dk;
216
                        }
217

218
                        outptr += _out_w;
219
                    }
220
                }
221
            }
222
        }
223

224
        return 0;
225
    }
226

227
    Mat bottom_blob_bordered;
228
    Option opt_pad = opt;
229
    opt_pad.use_packing_layout = false;
230
    make_padding(bottom_blob, bottom_blob_bordered, opt_pad);
231
    if (bottom_blob_bordered.empty())
232
        return -100;
233

234
    w = bottom_blob_bordered.w;
235
    h = bottom_blob_bordered.h;
236
    d = bottom_blob_bordered.d;
237

238
    int outw = (w - kernel_w) / stride_w + 1;
239
    int outh = (h - kernel_h) / stride_h + 1;
240
    int outd = (d - kernel_d) / stride_d + 1;
241

242
    top_blob.create(outw, outh, outd, channels, elemsize);
243
    if (top_blob.empty())
244
        return -100;
245

246
    const int maxk = kernel_w * kernel_h * kernel_d;
247

248
    // kernel offsets
249
    std::vector<int> _space_ofs(maxk);
250
    int* space_ofs = &_space_ofs[0];
251
    {
252
        int p1 = 0;
253
        int p2 = 0;
254
        int gap0 = w - kernel_w;
255
        int gap1 = h * w - w * kernel_h;
256
        for (int z = 0; z < kernel_d; z++)
257
        {
258
            for (int i = 0; i < kernel_h; i++)
259
            {
260
                for (int j = 0; j < kernel_w; j++)
261
                {
262
                    space_ofs[p1] = p2;
263
                    p1++;
264
                    p2 += 1;
265
                }
266
                p2 += gap0;
267
            }
268
            p2 += gap1;
269
        }
270
    }
271

272
    if (pooling_type == PoolMethod_MAX)
273
    {
274
        #pragma omp parallel for num_threads(opt.num_threads)
275
        for (int q = 0; q < channels; q++)
276
        {
277
            const Mat m = bottom_blob_bordered.channel(q);
278
            float* outptr = top_blob.channel(q);
279
            for (int z = 0; z < outd; z++)
280
            {
281
                for (int i = 0; i < outh; i++)
282
                {
283
                    for (int j = 0; j < outw; j++)
284
                    {
285
                        const float* sptr = m.depth(z * stride_d).row(i * stride_h) + j * stride_w;
286

287
                        float max_value = sptr[0];
288

289
                        for (int l = 0; l < maxk; l++)
290
                        {
291
                            float val = sptr[space_ofs[l]];
292
                            max_value = std::max(max_value, val);
293
                        }
294

295
                        outptr[j] = max_value;
296
                    }
297

298
                    outptr += outw;
299
                }
300
            }
301
        }
302
    }
303
    else if (pooling_type == PoolMethod_AVE)
304
    {
305
        if (avgpool_count_include_pad == 0)
306
        {
307
            int wtailpad = 0;
308
            int htailpad = 0;
309
            int dtailpad = 0;
310

311
            if (pad_mode == 0) // full padding
312
            {
313
                wtailpad = bottom_blob_bordered.w - bottom_blob.w - pad_left - pad_right;
314
                htailpad = bottom_blob_bordered.h - bottom_blob.h - pad_top - pad_bottom;
315
                dtailpad = bottom_blob_bordered.d - bottom_blob.d - pad_front - pad_behind;
316
            }
317

318
            #pragma omp parallel for num_threads(opt.num_threads)
319
            for (int q = 0; q < channels; q++)
320
            {
321
                const Mat m = bottom_blob_bordered.channel(q);
322
                float* outptr = top_blob.channel(q);
323

324
                for (int z = 0; z < outd; z++)
325
                {
326
                    int sz0 = z * stride_d;
327

328
                    for (int i = 0; i < outh; i++)
329
                    {
330
                        int sy0 = i * stride_h;
331

332
                        for (int j = 0; j < outw; j++)
333
                        {
334
                            int sx0 = j * stride_w;
335

336
                            float sum = 0;
337
                            int area = 0;
338
                            for (int kd = 0; kd < kernel_d; kd++)
339
                            {
340
                                int sz = sz0 + kd;
341

342
                                if (sz < pad_front)
343
                                    continue;
344

345
                                if (sz >= d - pad_behind - dtailpad)
346
                                    break;
347

348
                                for (int ki = 0; ki < kernel_h; ki++)
349
                                {
350
                                    int sy = sy0 + ki;
351

352
                                    if (sy < pad_top)
353
                                        continue;
354

355
                                    if (sy >= h - pad_bottom - htailpad)
356
                                        break;
357

358
                                    for (int kj = 0; kj < kernel_w; kj++)
359
                                    {
360
                                        int sx = sx0 + kj;
361

362
                                        if (sx < pad_left)
363
                                            continue;
364

365
                                        if (sx >= w - pad_right - wtailpad)
366
                                            break;
367

368
                                        float val = m.depth(sz).row(sy)[sx];
369
                                        sum += val;
370
                                        area += 1;
371
                                    }
372
                                }
373
                            }
374

375
                            outptr[j] = sum / area;
376
                        }
377

378
                        outptr += outw;
379
                    }
380
                }
381
            }
382
        }
383
        else // if (avgpool_count_include_pad == 1)
384
        {
385
            #pragma omp parallel for num_threads(opt.num_threads)
386
            for (int q = 0; q < channels; q++)
387
            {
388
                const Mat m = bottom_blob_bordered.channel(q);
389
                float* outptr = top_blob.channel(q);
390

391
                for (int z = 0; z < outd; z++)
392
                {
393
                    for (int i = 0; i < outh; i++)
394
                    {
395
                        for (int j = 0; j < outw; j++)
396
                        {
397
                            const float* sptr = m.depth(z * stride_d).row(i * stride_h) + j * stride_w;
398

399
                            float sum = 0;
400

401
                            for (int l = 0; l < maxk; l++)
402
                            {
403
                                float val = sptr[space_ofs[l]];
404
                                sum += val;
405
                            }
406

407
                            outptr[j] = sum / maxk;
408
                        }
409

410
                        outptr += outw;
411
                    }
412
                }
413
            }
414
        }
415
    }
416

417
    return 0;
418
}
419

420
void Pooling3D::make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered, const Option& opt) const
421
{
422
    int w = bottom_blob.w;
423
    int h = bottom_blob.h;
424
    int d = bottom_blob.d;
425

426
    bottom_blob_bordered = bottom_blob;
427

428
    float pad_value = 0.f;
429
    if (pooling_type == PoolMethod_MAX)
430
    {
431
        pad_value = bottom_blob.elemsize == 1 ? -128.f : -FLT_MAX;
432
    }
433
    else if (pooling_type == PoolMethod_AVE)
434
    {
435
        pad_value = 0.f;
436
    }
437

438
    int wtailpad = 0;
439
    int htailpad = 0;
440
    int dtailpad = 0;
441

442
    if (pad_mode == 0) // full padding
443
    {
444
        int wtail = (w + pad_left + pad_right - kernel_w) % stride_w;
445
        int htail = (h + pad_top + pad_bottom - kernel_h) % stride_h;
446
        int dtail = (d + pad_front + pad_behind - kernel_d) % stride_d;
447
        if (wtail != 0)
448
            wtailpad = stride_w - wtail;
449
        if (htail != 0)
450
            htailpad = stride_h - htail;
451
        if (dtail != 0)
452
            dtailpad = stride_d - dtail;
453

454
        Option opt_b = opt;
455
        opt_b.blob_allocator = opt.workspace_allocator;
456
        copy_make_border_3d(bottom_blob, bottom_blob_bordered, pad_top, pad_bottom + htailpad, pad_left, pad_right + wtailpad, pad_front, pad_behind + dtailpad, BORDER_CONSTANT, pad_value, opt_b);
457
    }
458
    else if (pad_mode == 1) // valid padding
459
    {
460
        Option opt_b = opt;
461
        opt_b.blob_allocator = opt.workspace_allocator;
462
        copy_make_border_3d(bottom_blob, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, pad_front, pad_behind, BORDER_CONSTANT, pad_value, opt_b);
463
    }
464
    else if (pad_mode == 2) // tensorflow padding=SAME or onnx padding=SAME_UPPER
465
    {
466
        int wpad = kernel_w + (w - 1) / stride_w * stride_w - w;
467
        int hpad = kernel_h + (h - 1) / stride_h * stride_h - h;
468
        int dpad = kernel_d + (d - 1) / stride_d * stride_d - d;
469
        if (wpad > 0 || hpad > 0 || dpad > 0)
470
        {
471
            Option opt_b = opt;
472
            opt_b.blob_allocator = opt.workspace_allocator;
473
            copy_make_border_3d(bottom_blob, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, dpad / 2, dpad - dpad / 2, BORDER_CONSTANT, pad_value, opt_b);
474
        }
475
    }
476
    else if (pad_mode == 3) // onnx padding=SAME_LOWER
477
    {
478
        int wpad = kernel_w + (w - 1) / stride_w * stride_w - w;
479
        int hpad = kernel_h + (h - 1) / stride_h * stride_h - h;
480
        int dpad = kernel_d + (d - 1) / stride_d * stride_d - d;
481
        if (wpad > 0 || hpad > 0 || dpad > 0)
482
        {
483
            Option opt_b = opt;
484
            opt_b.blob_allocator = opt.workspace_allocator;
485
            copy_make_border_3d(bottom_blob, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, dpad / 2, dpad - dpad / 2, BORDER_CONSTANT, pad_value, opt_b);
486
        }
487
    }
488
}
489

490
} //namespace ncnn
491
ncnn

Использование cookies