1
// Tencent is pleased to support the open source community by making ncnn available.
3
// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
5
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6
// in compliance with the License. You may obtain a copy of the License at
8
// https://opensource.org/licenses/BSD-3-Clause
10
// Unless required by applicable law or agreed to in writing, software distributed
11
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
13
// specific language governing permissions and limitations under the License.
22
support_inplace = true;
25
int GELU::load_param(const ParamDict& pd)
27
fast_gelu = pd.get(0, 0);
32
int GELU::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
34
int w = bottom_top_blob.w;
35
int h = bottom_top_blob.h;
36
int d = bottom_top_blob.d;
37
int channels = bottom_top_blob.c;
42
#pragma omp parallel for num_threads(opt.num_threads)
43
for (int q = 0; q < channels; q++)
45
float* ptr = bottom_top_blob.channel(q);
47
for (int i = 0; i < size; i++)
49
// y = 0.5x * (1 + tanh(sqrt(2/Pi) * (x + 0.044715x^3)))
50
ptr[i] = 0.5f * ptr[i] * (1.0f + tanhf(0.79788452f * (ptr[i] + 0.044715f * ptr[i] * ptr[i] * ptr[i])));
56
#pragma omp parallel for num_threads(opt.num_threads)
57
for (int q = 0; q < channels; q++)
59
float* ptr = bottom_top_blob.channel(q);
61
for (int i = 0; i < size; i++)
63
// y = x * P(X <= x) where X ~ N(0, 1)
64
ptr[i] = 0.5f * ptr[i] * erfcf(-0.70710678f * ptr[i]);