1
// Tencent is pleased to support the open source community by making ncnn available.
3
// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
8
// https://opensource.org/licenses/BSD-3-Clause
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
24
support_inplace = false;
27
int Pooling3D::load_param(const ParamDict& pd)
29
pooling_type = pd.get(0, 0);
30
kernel_w = pd.get(1, 0);
31
kernel_h = pd.get(11, kernel_w);
32
kernel_d = pd.get(21, kernel_w);
33
stride_w = pd.get(2, 1);
34
stride_h = pd.get(12, stride_w);
35
stride_d = pd.get(22, stride_w);
36
pad_left = pd.get(3, 0);
37
pad_right = pd.get(14, pad_left);
38
pad_top = pd.get(13, pad_left);
39
pad_bottom = pd.get(15, pad_top);
40
pad_front = pd.get(23, pad_left);
41
pad_behind = pd.get(16, pad_front);
42
global_pooling = pd.get(4, 0);
43
pad_mode = pd.get(5, 0);
44
avgpool_count_include_pad = pd.get(6, 0);
45
adaptive_pooling = pd.get(7, 0);
47
out_h = pd.get(18, out_w);
48
out_d = pd.get(28, out_w);
53
int Pooling3D::forward(const Mat& bottom_blob, Mat& top_blob, const Option& opt) const
55
// max value in NxN window
56
// avg value in NxN window
58
int w = bottom_blob.w;
59
int h = bottom_blob.h;
60
int d = bottom_blob.d;
61
int channels = bottom_blob.c;
62
size_t elemsize = bottom_blob.elemsize;
64
// NCNN_LOGE("Pooling input %d x %d pad = %d %d %d %d ksize=%d %d stride=%d %d", w, h, pad_left, pad_right, pad_top, pad_bottom, kernel_w, kernel_h, stride_w, stride_h);
67
top_blob.create(channels, elemsize, opt.blob_allocator);
73
if (pooling_type == PoolMethod_MAX)
75
#pragma omp parallel for num_threads(opt.num_threads)
76
for (int q = 0; q < channels; q++)
78
const float* ptr = bottom_blob.channel(q);
80
float max_value = ptr[0];
81
for (int i = 0; i < size; i++)
83
max_value = std::max(max_value, ptr[i]);
86
top_blob[q] = max_value;
89
else if (pooling_type == PoolMethod_AVE)
91
#pragma omp parallel for num_threads(opt.num_threads)
92
for (int q = 0; q < channels; q++)
94
const float* ptr = bottom_blob.channel(q);
97
for (int i = 0; i < size; i++)
102
top_blob[q] = sum / size;
109
if (adaptive_pooling)
111
int _out_w = out_w == -233 ? w : out_w;
112
int _out_h = out_h == -233 ? h : out_h;
113
int _out_d = out_d == -233 ? d : out_d;
115
if (_out_w == w && _out_h == h && _out_d == d)
117
top_blob = bottom_blob;
121
top_blob.create(_out_w, _out_h, _out_d, channels, elemsize, opt.blob_allocator);
122
if (top_blob.empty())
125
if (pooling_type == PoolMethod_MAX)
127
#pragma omp parallel for num_threads(opt.num_threads)
128
for (int q = 0; q < channels; q++)
130
const float* inptr = bottom_blob.channel(q);
131
float* outptr = top_blob.channel(q);
133
for (int z = 0; z < _out_d; z++)
136
const int id0 = d * z / _out_d;
138
const int id1 = (d * (z + 1) + _out_d - 1) / _out_d;
139
for (int i = 0; i < _out_h; i++)
142
const int ih0 = h * i / _out_h;
144
const int ih1 = (h * (i + 1) + _out_h - 1) / _out_h;
145
for (int j = 0; j < _out_w; j++)
148
const int iw0 = w * j / _out_w;
150
const int iw1 = (w * (j + 1) + _out_w - 1) / _out_w;
152
float max_value = inptr[id0 * w * h + ih0 * w + iw0];
154
for (int id = id0; id < id1; id++)
156
for (int ih = ih0; ih < ih1; ih++)
158
for (int iw = iw0; iw < iw1; iw++)
160
max_value = std::max(max_value, inptr[id * w * h + ih * w + iw]);
165
outptr[j] = max_value;
173
else if (pooling_type == PoolMethod_AVE)
175
#pragma omp parallel for num_threads(opt.num_threads)
176
for (int q = 0; q < channels; q++)
178
const float* inptr = bottom_blob.channel(q);
179
float* outptr = top_blob.channel(q);
181
for (int z = 0; z < _out_d; z++)
184
const int id0 = d * z / _out_d;
186
const int id1 = (d * (z + 1) + _out_d - 1) / _out_d;
188
for (int i = 0; i < _out_h; i++)
191
const int ih0 = h * i / _out_h;
193
const int ih1 = (h * (i + 1) + _out_h - 1) / _out_h;
195
for (int j = 0; j < _out_w; j++)
198
const int iw0 = w * j / _out_w;
200
const int iw1 = (w * (j + 1) + _out_w - 1) / _out_w;
204
for (int id = id0; id < id1; id++)
206
for (int ih = ih0; ih < ih1; ih++)
208
for (int iw = iw0; iw < iw1; iw++)
210
sum += inptr[id * w * h + ih * w + iw];
215
outptr[j] = sum / hk / wk / dk;
227
Mat bottom_blob_bordered;
228
Option opt_pad = opt;
229
opt_pad.use_packing_layout = false;
230
make_padding(bottom_blob, bottom_blob_bordered, opt_pad);
231
if (bottom_blob_bordered.empty())
234
w = bottom_blob_bordered.w;
235
h = bottom_blob_bordered.h;
236
d = bottom_blob_bordered.d;
238
int outw = (w - kernel_w) / stride_w + 1;
239
int outh = (h - kernel_h) / stride_h + 1;
240
int outd = (d - kernel_d) / stride_d + 1;
242
top_blob.create(outw, outh, outd, channels, elemsize);
243
if (top_blob.empty())
246
const int maxk = kernel_w * kernel_h * kernel_d;
249
std::vector<int> _space_ofs(maxk);
250
int* space_ofs = &_space_ofs[0];
254
int gap0 = w - kernel_w;
255
int gap1 = h * w - w * kernel_h;
256
for (int z = 0; z < kernel_d; z++)
258
for (int i = 0; i < kernel_h; i++)
260
for (int j = 0; j < kernel_w; j++)
272
if (pooling_type == PoolMethod_MAX)
274
#pragma omp parallel for num_threads(opt.num_threads)
275
for (int q = 0; q < channels; q++)
277
const Mat m = bottom_blob_bordered.channel(q);
278
float* outptr = top_blob.channel(q);
279
for (int z = 0; z < outd; z++)
281
for (int i = 0; i < outh; i++)
283
for (int j = 0; j < outw; j++)
285
const float* sptr = m.depth(z * stride_d).row(i * stride_h) + j * stride_w;
287
float max_value = sptr[0];
289
for (int l = 0; l < maxk; l++)
291
float val = sptr[space_ofs[l]];
292
max_value = std::max(max_value, val);
295
outptr[j] = max_value;
303
else if (pooling_type == PoolMethod_AVE)
305
if (avgpool_count_include_pad == 0)
311
if (pad_mode == 0) // full padding
313
wtailpad = bottom_blob_bordered.w - bottom_blob.w - pad_left - pad_right;
314
htailpad = bottom_blob_bordered.h - bottom_blob.h - pad_top - pad_bottom;
315
dtailpad = bottom_blob_bordered.d - bottom_blob.d - pad_front - pad_behind;
318
#pragma omp parallel for num_threads(opt.num_threads)
319
for (int q = 0; q < channels; q++)
321
const Mat m = bottom_blob_bordered.channel(q);
322
float* outptr = top_blob.channel(q);
324
for (int z = 0; z < outd; z++)
326
int sz0 = z * stride_d;
328
for (int i = 0; i < outh; i++)
330
int sy0 = i * stride_h;
332
for (int j = 0; j < outw; j++)
334
int sx0 = j * stride_w;
338
for (int kd = 0; kd < kernel_d; kd++)
345
if (sz >= d - pad_behind - dtailpad)
348
for (int ki = 0; ki < kernel_h; ki++)
355
if (sy >= h - pad_bottom - htailpad)
358
for (int kj = 0; kj < kernel_w; kj++)
365
if (sx >= w - pad_right - wtailpad)
368
float val = m.depth(sz).row(sy)[sx];
375
outptr[j] = sum / area;
383
else // if (avgpool_count_include_pad == 1)
385
#pragma omp parallel for num_threads(opt.num_threads)
386
for (int q = 0; q < channels; q++)
388
const Mat m = bottom_blob_bordered.channel(q);
389
float* outptr = top_blob.channel(q);
391
for (int z = 0; z < outd; z++)
393
for (int i = 0; i < outh; i++)
395
for (int j = 0; j < outw; j++)
397
const float* sptr = m.depth(z * stride_d).row(i * stride_h) + j * stride_w;
401
for (int l = 0; l < maxk; l++)
403
float val = sptr[space_ofs[l]];
407
outptr[j] = sum / maxk;
420
void Pooling3D::make_padding(const Mat& bottom_blob, Mat& bottom_blob_bordered, const Option& opt) const
422
int w = bottom_blob.w;
423
int h = bottom_blob.h;
424
int d = bottom_blob.d;
426
bottom_blob_bordered = bottom_blob;
428
float pad_value = 0.f;
429
if (pooling_type == PoolMethod_MAX)
431
pad_value = bottom_blob.elemsize == 1 ? -128.f : -FLT_MAX;
433
else if (pooling_type == PoolMethod_AVE)
442
if (pad_mode == 0) // full padding
444
int wtail = (w + pad_left + pad_right - kernel_w) % stride_w;
445
int htail = (h + pad_top + pad_bottom - kernel_h) % stride_h;
446
int dtail = (d + pad_front + pad_behind - kernel_d) % stride_d;
448
wtailpad = stride_w - wtail;
450
htailpad = stride_h - htail;
452
dtailpad = stride_d - dtail;
455
opt_b.blob_allocator = opt.workspace_allocator;
456
copy_make_border_3d(bottom_blob, bottom_blob_bordered, pad_top, pad_bottom + htailpad, pad_left, pad_right + wtailpad, pad_front, pad_behind + dtailpad, BORDER_CONSTANT, pad_value, opt_b);
458
else if (pad_mode == 1) // valid padding
461
opt_b.blob_allocator = opt.workspace_allocator;
462
copy_make_border_3d(bottom_blob, bottom_blob_bordered, pad_top, pad_bottom, pad_left, pad_right, pad_front, pad_behind, BORDER_CONSTANT, pad_value, opt_b);
464
else if (pad_mode == 2) // tensorflow padding=SAME or onnx padding=SAME_UPPER
466
int wpad = kernel_w + (w - 1) / stride_w * stride_w - w;
467
int hpad = kernel_h + (h - 1) / stride_h * stride_h - h;
468
int dpad = kernel_d + (d - 1) / stride_d * stride_d - d;
469
if (wpad > 0 || hpad > 0 || dpad > 0)
472
opt_b.blob_allocator = opt.workspace_allocator;
473
copy_make_border_3d(bottom_blob, bottom_blob_bordered, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, dpad / 2, dpad - dpad / 2, BORDER_CONSTANT, pad_value, opt_b);
476
else if (pad_mode == 3) // onnx padding=SAME_LOWER
478
int wpad = kernel_w + (w - 1) / stride_w * stride_w - w;
479
int hpad = kernel_h + (h - 1) / stride_h * stride_h - h;
480
int dpad = kernel_d + (d - 1) / stride_d * stride_d - d;
481
if (wpad > 0 || hpad > 0 || dpad > 0)
484
opt_b.blob_allocator = opt.workspace_allocator;
485
copy_make_border_3d(bottom_blob, bottom_blob_bordered, hpad - hpad / 2, hpad / 2, wpad - wpad / 2, wpad / 2, dpad / 2, dpad - dpad / 2, BORDER_CONSTANT, pad_value, opt_b);