1 #pragma once
2 
3 #include <cmath>
4 
5 /*
6 ** Fast Math Approximations to various Functions
7 **
8 ** Documentation on each one
9 */
10 
11 /*
12 ** These are directly copied from JUCE6 modules/juce_dsp/mathos/juce_FastMathApproximations.h
13 **
14 ** Since JUCE6 is GPL3 and Surge is GPL3 that copy is fine, but I did want to explicitly
15 ** acknowledge it
16 */
17 
18 namespace Surge
19 {
20 namespace DSP
21 {
22 
23 // JUCE6 Pade approximation of sin valid from -PI to PI with max error of 1e-5 and average error of
24 // 5e-7
fastsin(float x)25 inline float fastsin(float x) noexcept
26 {
27     auto x2 = x * x;
28     auto numerator = -x * (-(float)11511339840 +
29                            x2 * ((float)1640635920 + x2 * (-(float)52785432 + x2 * (float)479249)));
30     auto denominator =
31         (float)11511339840 + x2 * ((float)277920720 + x2 * ((float)3177720 + x2 * (float)18361));
32     return numerator / denominator;
33 }
34 
fastsinSSE(__m128 x)35 inline __m128 fastsinSSE(__m128 x) noexcept
36 {
37 #define M(a, b) _mm_mul_ps(a, b)
38 #define A(a, b) _mm_add_ps(a, b)
39 #define S(a, b) _mm_sub_ps(a, b)
40 #define F(a) _mm_set_ps1(a)
41 #define C(x) __m128 m##x = F((float)x)
42 
43     /*
44     auto numerator = -x * (-(float)11511339840 +
45                            x2 * ((float)1640635920 + x2 * (-(float)52785432 + x2 * (float)479249)));
46     auto denominator =
47         (float)11511339840 + x2 * ((float)277920720 + x2 * ((float)3177720 + x2 * (float)18361));
48         */
49     C(11511339840);
50     C(1640635920);
51     C(52785432);
52     C(479249);
53     C(277920720);
54     C(3177720);
55     C(18361);
56     auto mnegone = F(-1);
57 
58     auto x2 = M(x, x);
59 
60     auto num = M(mnegone,
61                  M(x, S(M(x2, A(m1640635920, M(x2, S(M(x2, m479249), m52785432)))), m11511339840)));
62     auto den = A(m11511339840, M(x2, A(m277920720, M(x2, A(m3177720, M(x2, m18361))))));
63 
64 #undef C
65 #undef M
66 #undef A
67 #undef S
68 #undef F
69     return _mm_div_ps(num, den);
70 }
71 
72 // JUCE6 Pade approximation of cos valid from -PI to PI with max error of 1e-5 and average error of
73 // 5e-7
fastcos(float x)74 inline float fastcos(float x) noexcept
75 {
76     auto x2 = x * x;
77     auto numerator = -(-(float)39251520 + x2 * ((float)18471600 + x2 * (-1075032 + 14615 * x2)));
78     auto denominator = (float)39251520 + x2 * (1154160 + x2 * (16632 + x2 * 127));
79     return numerator / denominator;
80 }
81 
fastcosSSE(__m128 x)82 inline __m128 fastcosSSE(__m128 x) noexcept
83 {
84 #define M(a, b) _mm_mul_ps(a, b)
85 #define A(a, b) _mm_add_ps(a, b)
86 #define S(a, b) _mm_sub_ps(a, b)
87 #define F(a) _mm_set_ps1(a)
88 #define C(x) __m128 m##x = F((float)x)
89 
90     // auto x2 = x * x;
91     auto x2 = M(x, x);
92 
93     C(39251520);
94     C(18471600);
95     C(1075032);
96     C(14615);
97     C(1154160);
98     C(16632);
99     C(127);
100 
101     // auto numerator = -(-(float)39251520 + x2 * ((float)18471600 + x2 * (-1075032 + 14615 * x2)));
102     auto num = S(m39251520, M(x2, A(m18471600, M(x2, S(M(m14615, x2), m1075032)))));
103 
104     // auto denominator = (float)39251520 + x2 * (1154160 + x2 * (16632 + x2 * 127));
105     auto den = A(m39251520, M(x2, A(m1154160, M(x2, A(m16632, M(x2, m127))))));
106 #undef C
107 #undef M
108 #undef A
109 #undef S
110 #undef F
111     return _mm_div_ps(num, den);
112 }
113 
114 /*
115 ** Push x into -PI, PI periodically. There is probably a faster way to do this
116 */
clampToPiRange(float x)117 inline float clampToPiRange(float x)
118 {
119     if (x <= M_PI && x >= -M_PI)
120         return x;
121     float y = x + M_PI; // so now I am 0,2PI
122 
123     // float p = fmod( y, 2.0 * M_PI );
124 
125     constexpr float oo2p = 1.0 / (2.0 * M_PI);
126     float p = y - 2.0 * M_PI * (int)(y * oo2p);
127 
128     if (p < 0)
129         p += 2.0 * M_PI;
130     return p - M_PI;
131 }
132 
clampToPiRangeSSE(__m128 x)133 inline __m128 clampToPiRangeSSE(__m128 x)
134 {
135     const auto mpi = _mm_set1_ps(M_PI);
136     const auto m2pi = _mm_set1_ps(2.0 * M_PI);
137     const auto oo2p = _mm_set1_ps(1.0 / (2.0 * M_PI));
138     const auto mz = _mm_setzero_ps();
139 
140     auto y = _mm_add_ps(x, mpi);
141     auto yip = _mm_cvtepi32_ps(_mm_cvttps_epi32(_mm_mul_ps(y, oo2p)));
142     auto p = _mm_sub_ps(y, _mm_mul_ps(m2pi, yip));
143     auto off = _mm_and_ps(_mm_cmplt_ps(p, mz), m2pi);
144     p = _mm_add_ps(p, off);
145 
146     return _mm_sub_ps(p, mpi);
147 }
148 
149 /*
150 ** Valid in range -5, 5
151 */
fasttanh(float x)152 inline float fasttanh(float x) noexcept
153 {
154     auto x2 = x * x;
155     auto numerator = x * (135135 + x2 * (17325 + x2 * (378 + x2)));
156     auto denominator = 135135 + x2 * (62370 + x2 * (3150 + 28 * x2));
157     return numerator / denominator;
158 }
159 
160 // Valid in range (-PI/2, PI/2)
fasttan(float x)161 inline float fasttan(float x) noexcept
162 {
163     auto x2 = x * x;
164     auto numerator = x * (-135135 + x2 * (17325 + x2 * (-378 + x2)));
165     auto denominator = -135135 + x2 * (62370 + x2 * (-3150 + 28 * x2));
166     return numerator / denominator;
167 }
168 
fasttanhSSE(__m128 x)169 inline __m128 fasttanhSSE(__m128 x)
170 {
171     static const __m128 m135135 = _mm_set_ps1(135135), m17325 = _mm_set_ps1(17325),
172                         m378 = _mm_set_ps1(378), m62370 = _mm_set_ps1(62370),
173                         m3150 = _mm_set_ps1(3150), m28 = _mm_set_ps1(28);
174 
175 #define M(a, b) _mm_mul_ps(a, b)
176 #define A(a, b) _mm_add_ps(a, b)
177 
178     auto x2 = M(x, x);
179     auto num = M(x, A(m135135, M(x2, A(m17325, M(x2, A(m378, x2))))));
180     auto den = A(m135135, M(x2, A(m62370, M(x2, A(m3150, M(m28, x2))))));
181 
182 #undef M
183 #undef A
184 
185     return _mm_div_ps(num, den);
186 }
187 
fasttanhSSEclamped(__m128 x)188 inline __m128 fasttanhSSEclamped(__m128 x)
189 {
190     auto xc = _mm_min_ps(_mm_set_ps1(5), _mm_max_ps(_mm_set_ps1(-5), x));
191     return fasttanhSSE(xc);
192 }
193 
194 /*
195 ** Valid in range -6, 4
196 */
fastexp(float x)197 inline float fastexp(float x) noexcept
198 {
199     auto numerator = 1680 + x * (840 + x * (180 + x * (20 + x)));
200     auto denominator = 1680 + x * (-840 + x * (180 + x * (-20 + x)));
201     return numerator / denominator;
202 }
203 
fastexpSSE(__m128 x)204 inline __m128 fastexpSSE(__m128 x) noexcept
205 {
206 #define M(a, b) _mm_mul_ps(a, b)
207 #define A(a, b) _mm_add_ps(a, b)
208 #define F(a) _mm_set_ps1(a)
209 
210     static const __m128 m1680 = F(1680), m840 = F(840), mneg840 = F(-840), m180 = F(180),
211                         m20 = F(20), mneg20 = F(-20);
212 
213     auto num = A(m1680, M(x, A(m840, M(x, A(m180, M(x, A(m20, x)))))));
214     auto den = A(m1680, M(x, A(mneg840, M(x, A(m180, M(x, A(mneg20, x)))))));
215 
216     return _mm_div_ps(num, den);
217 
218 #undef M
219 #undef A
220 #undef F
221 }
222 
223 } // namespace DSP
224 } // namespace Surge
225