1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 #ifndef X86_ACTIVATION_H
16 #define X86_ACTIVATION_H
17
18 #include <math.h>
19 #include "mat.h"
20
activation_ss(float v,int activation_type,const ncnn::Mat & activation_params)21 static inline float activation_ss(float v, int activation_type, const ncnn::Mat& activation_params)
22 {
23 if (activation_type == 1)
24 {
25 v = fmax(v, 0.f);
26 }
27 else if (activation_type == 2)
28 {
29 float slope = activation_params[0];
30 v = v > 0.f ? v : v * slope;
31 }
32 else if (activation_type == 3)
33 {
34 float min = activation_params[0];
35 float max = activation_params[1];
36 if (v < min)
37 v = min;
38 if (v > max)
39 v = max;
40 }
41 else if (activation_type == 4)
42 {
43 v = 1.f / (1.f + exp(-v));
44 }
45 else if (activation_type == 5)
46 {
47 v = v * tanh(log(exp(v) + 1.f));
48 }
49
50 return v;
51 }
52
53 #if __SSE2__
54 #include <emmintrin.h>
55 #include "sse_mathfun.h"
56
sigmoid_sse(__m128 inputs)57 static inline __m128 sigmoid_sse(__m128 inputs)
58 {
59 const __m128 one = _mm_set1_ps(1.0f);
60 return _mm_div_ps(one, _mm_add_ps(one, exp_ps(_mm_sub_ps(_mm_setzero_ps(), inputs))));
61 }
62
tanh_sse(__m128 inputs)63 static inline __m128 tanh_sse(__m128 inputs)
64 {
65 const __m128 one = _mm_set1_ps(1.0f);
66 const __m128 two = _mm_set1_ps(2.0f);
67 return _mm_sub_ps(_mm_mul_ps(sigmoid_sse(_mm_mul_ps(inputs, two)), two), one);
68 }
69
mish_sse(__m128 inputs)70 static inline __m128 mish_sse(__m128 inputs)
71 {
72 return _mm_mul_ps(inputs, tanh_sse(log_ps(_mm_add_ps(exp_ps(inputs), _mm_set1_ps(1.f)))));
73 }
74
abs_sse(__m128 inputs)75 static inline __m128 abs_sse(__m128 inputs)
76 {
77 return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), inputs), inputs);
78 }
79
lrelu_sse(__m128 inputs,float slope)80 static inline __m128 lrelu_sse(__m128 inputs, float slope)
81 {
82 __m128 pos = _mm_max_ps(_mm_setzero_ps(), inputs);
83 __m128 neg = _mm_min_ps(_mm_setzero_ps(), inputs);
84 return _mm_add_ps(pos, _mm_mul_ps(_mm_set1_ps(slope), neg));
85 }
86
prelu_sse(__m128 inputs,__m128 alphas)87 static inline __m128 prelu_sse(__m128 inputs, __m128 alphas)
88 {
89 __m128 pos = _mm_max_ps(_mm_setzero_ps(), inputs);
90 __m128 neg = _mm_min_ps(_mm_setzero_ps(), inputs);
91 return _mm_add_ps(pos, _mm_mul_ps(alphas, neg));
92 }
93
activation_sse(__m128 _v,int activation_type,const ncnn::Mat & activation_params)94 static inline __m128 activation_sse(__m128 _v, int activation_type, const ncnn::Mat& activation_params)
95 {
96 // Process fused activations
97 if (activation_type == 1)
98 {
99 // Relu
100 return _mm_max_ps(_v, _mm_setzero_ps());
101 }
102 else if (activation_type == 2)
103 {
104 // Leaky relu
105 return lrelu_sse(_v, activation_params[0]);
106 }
107 else if (activation_type == 3)
108 {
109 // min max clip
110 __m128 min = _mm_set1_ps(activation_params[0]);
111 __m128 max = _mm_set1_ps(activation_params[1]);
112 return _mm_min_ps(_mm_max_ps(_v, min), max);
113 }
114 else if (activation_type == 4)
115 {
116 // Sigmoid
117 return sigmoid_sse(_v);
118 }
119 else if (activation_type == 5)
120 {
121 return mish_sse(_v);
122 }
123
124 return _v;
125 }
126
127 #if __AVX__
128 #include <immintrin.h>
129 #include "avx_mathfun.h"
130
sigmoid_avx(__m256 inputs)131 static inline __m256 sigmoid_avx(__m256 inputs)
132 {
133 const __m256 one = _mm256_set1_ps(1.0f);
134 return _mm256_div_ps(one, _mm256_add_ps(one, exp256_ps(_mm256_sub_ps(_mm256_setzero_ps(), inputs))));
135 }
136
tanh_avx(__m256 inputs)137 static inline __m256 tanh_avx(__m256 inputs)
138 {
139 const __m256 one = _mm256_set1_ps(1.0f);
140 const __m256 two = _mm256_set1_ps(2.0f);
141 return _mm256_fmsub_ps(sigmoid_avx(_mm256_mul_ps(inputs, two)), two, one);
142 }
143
mish_avx(__m256 inputs)144 static inline __m256 mish_avx(__m256 inputs)
145 {
146 return _mm256_mul_ps(inputs, tanh_avx(log256_ps(_mm256_add_ps(exp256_ps(inputs), _mm256_set1_ps(1.f)))));
147 }
148
abs_avx(__m256 inputs)149 static inline __m256 abs_avx(__m256 inputs)
150 {
151 return _mm256_max_ps(_mm256_sub_ps(_mm256_setzero_ps(), inputs), inputs);
152 }
153
lrelu_avx(__m256 inputs,float slope)154 static inline __m256 lrelu_avx(__m256 inputs, float slope)
155 {
156 __m256 pos = _mm256_max_ps(_mm256_setzero_ps(), inputs);
157 __m256 neg = _mm256_min_ps(_mm256_setzero_ps(), inputs);
158 return _mm256_add_ps(pos, _mm256_mul_ps(_mm256_set1_ps(slope), neg));
159 }
160
prelu_avx(__m256 inputs,__m256 alphas)161 static inline __m256 prelu_avx(__m256 inputs, __m256 alphas)
162 {
163 __m256 pos = _mm256_max_ps(_mm256_setzero_ps(), inputs);
164 __m256 neg = _mm256_min_ps(_mm256_setzero_ps(), inputs);
165 return _mm256_add_ps(pos, _mm256_mul_ps(alphas, neg));
166 }
167
activation_avx(__m256 _v,int activation_type,const ncnn::Mat & activation_params)168 static inline __m256 activation_avx(__m256 _v, int activation_type, const ncnn::Mat& activation_params)
169 {
170 // Process fused activations
171 if (activation_type == 1)
172 {
173 // Relu
174 return _mm256_max_ps(_v, _mm256_setzero_ps());
175 }
176 else if (activation_type == 2)
177 {
178 // Leaky relu
179 return lrelu_avx(_v, activation_params[0]);
180 }
181 else if (activation_type == 3)
182 {
183 // min max clip
184 __m256 min = _mm256_set1_ps(activation_params[0]);
185 __m256 max = _mm256_set1_ps(activation_params[1]);
186 return _mm256_min_ps(_mm256_max_ps(_v, min), max);
187 }
188 else if (activation_type == 4)
189 {
190 // Sigmoid
191 return sigmoid_avx(_v);
192 }
193 else if (activation_type == 5)
194 {
195 return mish_avx(_v);
196 }
197
198 return _v;
199 }
200 #endif // __AVX__
201 #endif // __SSE2__
202
203 #endif // X86_ACTIVATION_H
204