1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #ifndef X86_ACTIVATION_H
16 #define X86_ACTIVATION_H
17 
18 #include <math.h>
19 #include "mat.h"
20 
activation_ss(float v,int activation_type,const ncnn::Mat & activation_params)21 static inline float activation_ss(float v, int activation_type, const ncnn::Mat& activation_params)
22 {
23     if (activation_type == 1)
24     {
25         v = fmax(v, 0.f);
26     }
27     else if (activation_type == 2)
28     {
29         float slope = activation_params[0];
30         v = v > 0.f ? v : v * slope;
31     }
32     else if (activation_type == 3)
33     {
34         float min = activation_params[0];
35         float max = activation_params[1];
36         if (v < min)
37             v = min;
38         if (v > max)
39             v = max;
40     }
41     else if (activation_type == 4)
42     {
43         v = 1.f / (1.f + exp(-v));
44     }
45     else if (activation_type == 5)
46     {
47         v = v * tanh(log(exp(v) + 1.f));
48     }
49 
50     return v;
51 }
52 
53 #if __SSE2__
54 #include <emmintrin.h>
55 #include "sse_mathfun.h"
56 
sigmoid_sse(__m128 inputs)57 static inline __m128 sigmoid_sse(__m128 inputs)
58 {
59     const __m128 one = _mm_set1_ps(1.0f);
60     return _mm_div_ps(one, _mm_add_ps(one, exp_ps(_mm_sub_ps(_mm_setzero_ps(), inputs))));
61 }
62 
tanh_sse(__m128 inputs)63 static inline __m128 tanh_sse(__m128 inputs)
64 {
65     const __m128 one = _mm_set1_ps(1.0f);
66     const __m128 two = _mm_set1_ps(2.0f);
67     return _mm_sub_ps(_mm_mul_ps(sigmoid_sse(_mm_mul_ps(inputs, two)), two), one);
68 }
69 
mish_sse(__m128 inputs)70 static inline __m128 mish_sse(__m128 inputs)
71 {
72     return _mm_mul_ps(inputs, tanh_sse(log_ps(_mm_add_ps(exp_ps(inputs), _mm_set1_ps(1.f)))));
73 }
74 
abs_sse(__m128 inputs)75 static inline __m128 abs_sse(__m128 inputs)
76 {
77     return _mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), inputs), inputs);
78 }
79 
lrelu_sse(__m128 inputs,float slope)80 static inline __m128 lrelu_sse(__m128 inputs, float slope)
81 {
82     __m128 pos = _mm_max_ps(_mm_setzero_ps(), inputs);
83     __m128 neg = _mm_min_ps(_mm_setzero_ps(), inputs);
84     return _mm_add_ps(pos, _mm_mul_ps(_mm_set1_ps(slope), neg));
85 }
86 
prelu_sse(__m128 inputs,__m128 alphas)87 static inline __m128 prelu_sse(__m128 inputs, __m128 alphas)
88 {
89     __m128 pos = _mm_max_ps(_mm_setzero_ps(), inputs);
90     __m128 neg = _mm_min_ps(_mm_setzero_ps(), inputs);
91     return _mm_add_ps(pos, _mm_mul_ps(alphas, neg));
92 }
93 
activation_sse(__m128 _v,int activation_type,const ncnn::Mat & activation_params)94 static inline __m128 activation_sse(__m128 _v, int activation_type, const ncnn::Mat& activation_params)
95 {
96     // Process fused activations
97     if (activation_type == 1)
98     {
99         // Relu
100         return _mm_max_ps(_v, _mm_setzero_ps());
101     }
102     else if (activation_type == 2)
103     {
104         // Leaky relu
105         return lrelu_sse(_v, activation_params[0]);
106     }
107     else if (activation_type == 3)
108     {
109         // min max clip
110         __m128 min = _mm_set1_ps(activation_params[0]);
111         __m128 max = _mm_set1_ps(activation_params[1]);
112         return _mm_min_ps(_mm_max_ps(_v, min), max);
113     }
114     else if (activation_type == 4)
115     {
116         // Sigmoid
117         return sigmoid_sse(_v);
118     }
119     else if (activation_type == 5)
120     {
121         return mish_sse(_v);
122     }
123 
124     return _v;
125 }
126 
127 #if __AVX__
128 #include <immintrin.h>
129 #include "avx_mathfun.h"
130 
sigmoid_avx(__m256 inputs)131 static inline __m256 sigmoid_avx(__m256 inputs)
132 {
133     const __m256 one = _mm256_set1_ps(1.0f);
134     return _mm256_div_ps(one, _mm256_add_ps(one, exp256_ps(_mm256_sub_ps(_mm256_setzero_ps(), inputs))));
135 }
136 
tanh_avx(__m256 inputs)137 static inline __m256 tanh_avx(__m256 inputs)
138 {
139     const __m256 one = _mm256_set1_ps(1.0f);
140     const __m256 two = _mm256_set1_ps(2.0f);
141     return _mm256_fmsub_ps(sigmoid_avx(_mm256_mul_ps(inputs, two)), two, one);
142 }
143 
mish_avx(__m256 inputs)144 static inline __m256 mish_avx(__m256 inputs)
145 {
146     return _mm256_mul_ps(inputs, tanh_avx(log256_ps(_mm256_add_ps(exp256_ps(inputs), _mm256_set1_ps(1.f)))));
147 }
148 
abs_avx(__m256 inputs)149 static inline __m256 abs_avx(__m256 inputs)
150 {
151     return _mm256_max_ps(_mm256_sub_ps(_mm256_setzero_ps(), inputs), inputs);
152 }
153 
lrelu_avx(__m256 inputs,float slope)154 static inline __m256 lrelu_avx(__m256 inputs, float slope)
155 {
156     __m256 pos = _mm256_max_ps(_mm256_setzero_ps(), inputs);
157     __m256 neg = _mm256_min_ps(_mm256_setzero_ps(), inputs);
158     return _mm256_add_ps(pos, _mm256_mul_ps(_mm256_set1_ps(slope), neg));
159 }
160 
prelu_avx(__m256 inputs,__m256 alphas)161 static inline __m256 prelu_avx(__m256 inputs, __m256 alphas)
162 {
163     __m256 pos = _mm256_max_ps(_mm256_setzero_ps(), inputs);
164     __m256 neg = _mm256_min_ps(_mm256_setzero_ps(), inputs);
165     return _mm256_add_ps(pos, _mm256_mul_ps(alphas, neg));
166 }
167 
activation_avx(__m256 _v,int activation_type,const ncnn::Mat & activation_params)168 static inline __m256 activation_avx(__m256 _v, int activation_type, const ncnn::Mat& activation_params)
169 {
170     // Process fused activations
171     if (activation_type == 1)
172     {
173         // Relu
174         return _mm256_max_ps(_v, _mm256_setzero_ps());
175     }
176     else if (activation_type == 2)
177     {
178         // Leaky relu
179         return lrelu_avx(_v, activation_params[0]);
180     }
181     else if (activation_type == 3)
182     {
183         // min max clip
184         __m256 min = _mm256_set1_ps(activation_params[0]);
185         __m256 max = _mm256_set1_ps(activation_params[1]);
186         return _mm256_min_ps(_mm256_max_ps(_v, min), max);
187     }
188     else if (activation_type == 4)
189     {
190         // Sigmoid
191         return sigmoid_avx(_v);
192     }
193     else if (activation_type == 5)
194     {
195         return mish_avx(_v);
196     }
197 
198     return _v;
199 }
200 #endif // __AVX__
201 #endif // __SSE2__
202 
203 #endif // X86_ACTIVATION_H
204