1 #include <glm/gtc/type_precision.hpp>
2 #include <glm/gtx/fast_trigonometry.hpp>
3 #include <glm/gtx/integer.hpp>
4 #include <glm/gtx/common.hpp>
5 #include <glm/gtc/constants.hpp>
6 #include <glm/gtc/ulp.hpp>
7 #include <glm/gtc/vec1.hpp>
8 #include <glm/trigonometric.hpp>
9 #include <cmath>
10 #include <ctime>
11 #include <cstdio>
12 #include <vector>
13
14 namespace fastCos
15 {
perf(bool NextFloat)16 int perf(bool NextFloat)
17 {
18 const float begin = -glm::pi<float>();
19 const float end = glm::pi<float>();
20 float result = 0.f;
21
22 const std::clock_t timestamp1 = std::clock();
23 for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
24 result = glm::fastCos(i);
25
26 const std::clock_t timestamp2 = std::clock();
27 for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
28 result = glm::cos(i);
29
30 const std::clock_t timestamp3 = std::clock();
31 const std::clock_t time_fast = timestamp2 - timestamp1;
32 const std::clock_t time_default = timestamp3 - timestamp2;
33 std::printf("fastCos Time %d clocks\n", static_cast<unsigned int>(time_fast));
34 std::printf("cos Time %d clocks\n", static_cast<unsigned int>(time_default));
35
36 return time_fast <= time_default ? 0 : 1;
37 }
38 }//namespace fastCos
39
40 namespace fastSin
41 {
42 /*
43 float sin(float x) {
44 float temp;
45 temp = (x + M_PI) / ((2 * M_PI) - M_PI);
46 return limited_sin((x + M_PI) - ((2 * M_PI) - M_PI) * temp));
47 }
48 */
49
perf(bool NextFloat)50 int perf(bool NextFloat)
51 {
52 const float begin = -glm::pi<float>();
53 const float end = glm::pi<float>();
54 float result = 0.f;
55
56 const std::clock_t timestamp1 = std::clock();
57 for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
58 result = glm::fastSin(i);
59
60 const std::clock_t timestamp2 = std::clock();
61 for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
62 result = glm::sin(i);
63
64 const std::clock_t timestamp3 = std::clock();
65 const std::clock_t time_fast = timestamp2 - timestamp1;
66 const std::clock_t time_default = timestamp3 - timestamp2;
67 std::printf("fastSin Time %d clocks\n", static_cast<unsigned int>(time_fast));
68 std::printf("sin Time %d clocks\n", static_cast<unsigned int>(time_default));
69
70 return time_fast <= time_default ? 0 : 1;
71 }
72 }//namespace fastSin
73
74 namespace fastTan
75 {
perf(bool NextFloat)76 int perf(bool NextFloat)
77 {
78 const float begin = -glm::pi<float>();
79 const float end = glm::pi<float>();
80 float result = 0.f;
81
82 const std::clock_t timestamp1 = std::clock();
83 for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
84 result = glm::fastTan(i);
85
86 const std::clock_t timestamp2 = std::clock();
87 for (float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
88 result = glm::tan(i);
89
90 const std::clock_t timestamp3 = std::clock();
91 const std::clock_t time_fast = timestamp2 - timestamp1;
92 const std::clock_t time_default = timestamp3 - timestamp2;
93 std::printf("fastTan Time %d clocks\n", static_cast<unsigned int>(time_fast));
94 std::printf("tan Time %d clocks\n", static_cast<unsigned int>(time_default));
95
96 return time_fast <= time_default ? 0 : 1;
97 }
98 }//namespace fastTan
99
100 namespace fastAcos
101 {
perf(bool NextFloat)102 int perf(bool NextFloat)
103 {
104 const float begin = -glm::pi<float>();
105 const float end = glm::pi<float>();
106 float result = 0.f;
107
108 const std::clock_t timestamp1 = std::clock();
109 for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
110 result = glm::fastAcos(i);
111
112 const std::clock_t timestamp2 = std::clock();
113 for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
114 result = glm::acos(i);
115
116 const std::clock_t timestamp3 = std::clock();
117 const std::clock_t time_fast = timestamp2 - timestamp1;
118 const std::clock_t time_default = timestamp3 - timestamp2;
119
120 std::printf("fastAcos Time %d clocks\n", static_cast<unsigned int>(time_fast));
121 std::printf("acos Time %d clocks\n", static_cast<unsigned int>(time_default));
122
123 return time_fast <= time_default ? 0 : 1;
124 }
125 }//namespace fastAcos
126
127 namespace fastAsin
128 {
perf(bool NextFloat)129 int perf(bool NextFloat)
130 {
131 const float begin = -glm::pi<float>();
132 const float end = glm::pi<float>();
133 float result = 0.f;
134 const std::clock_t timestamp1 = std::clock();
135 for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
136 result = glm::fastAsin(i);
137 const std::clock_t timestamp2 = std::clock();
138 for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
139 result = glm::asin(i);
140 const std::clock_t timestamp3 = std::clock();
141 const std::clock_t time_fast = timestamp2 - timestamp1;
142 const std::clock_t time_default = timestamp3 - timestamp2;
143 std::printf("fastAsin Time %d clocks\n", static_cast<unsigned int>(time_fast));
144 std::printf("asin Time %d clocks\n", static_cast<unsigned int>(time_default));
145
146 return time_fast <= time_default ? 0 : 1;
147 }
148 }//namespace fastAsin
149
150 namespace fastAtan
151 {
perf(bool NextFloat)152 int perf(bool NextFloat)
153 {
154 const float begin = -glm::pi<float>();
155 const float end = glm::pi<float>();
156 float result = 0.f;
157 const std::clock_t timestamp1 = std::clock();
158 for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
159 result = glm::fastAtan(i);
160 const std::clock_t timestamp2 = std::clock();
161 for(float i = begin; i < end; i = NextFloat ? glm::next_float(i) : i += 0.1f)
162 result = glm::atan(i);
163 const std::clock_t timestamp3 = std::clock();
164 const std::clock_t time_fast = timestamp2 - timestamp1;
165 const std::clock_t time_default = timestamp3 - timestamp2;
166 std::printf("fastAtan Time %d clocks\n", static_cast<unsigned int>(time_fast));
167 std::printf("atan Time %d clocks\n", static_cast<unsigned int>(time_default));
168
169 return time_fast <= time_default ? 0 : 1;
170 }
171 }//namespace fastAtan
172
173 namespace taylorCos
174 {
175 glm::vec4 const AngleShift(0.0f, glm::pi<float>() * 0.5f, glm::pi<float>() * 1.0f, glm::pi<float>() * 1.5f);
176
177 template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
taylorSeriesNewCos(vecType<T,P> const & x)178 GLM_FUNC_QUALIFIER vecType<T, P> taylorSeriesNewCos(vecType<T, P> const & x)
179 {
180 vecType<T, P> const Powed2(x * x);
181 vecType<T, P> const Powed4(Powed2 * Powed2);
182 vecType<T, P> const Powed6(Powed4 * Powed2);
183 vecType<T, P> const Powed8(Powed4 * Powed4);
184
185 return static_cast<T>(1)
186 - Powed2 * static_cast<T>(0.5)
187 + Powed4 * static_cast<T>(0.04166666666666666666666666666667)
188 - Powed6 * static_cast<T>(0.00138888888888888888888888888889)
189 + Powed8 * static_cast<T>(2.4801587301587301587301587301587e-5);
190 }
191
192 template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
taylorSeriesNewCos6(vecType<T,P> const & x)193 GLM_FUNC_QUALIFIER vecType<T, P> taylorSeriesNewCos6(vecType<T, P> const & x)
194 {
195 vecType<T, P> const Powed2(x * x);
196 vecType<T, P> const Powed4(Powed2 * Powed2);
197 vecType<T, P> const Powed6(Powed4 * Powed2);
198
199 return static_cast<T>(1)
200 - Powed2 * static_cast<T>(0.5)
201 + Powed4 * static_cast<T>(0.04166666666666666666666666666667)
202 - Powed6 * static_cast<T>(0.00138888888888888888888888888889);
203 }
204
205 template <glm::precision P, template <typename, glm::precision> class vecType>
fastAbs(vecType<float,P> x)206 GLM_FUNC_QUALIFIER vecType<float, P> fastAbs(vecType<float, P> x)
207 {
208 int* Pointer = reinterpret_cast<int*>(&x[0]);
209 Pointer[0] &= 0x7fffffff;
210 Pointer[1] &= 0x7fffffff;
211 Pointer[2] &= 0x7fffffff;
212 Pointer[3] &= 0x7fffffff;
213 return x;
214 }
215
216 template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
fastCosNew(vecType<T,P> const & x)217 GLM_FUNC_QUALIFIER vecType<T, P> fastCosNew(vecType<T, P> const & x)
218 {
219 vecType<T, P> const Angle0_PI(fastAbs(fmod(x + glm::pi<T>(), glm::two_pi<T>()) - glm::pi<T>()));
220 return taylorSeriesNewCos6(x);
221 /*
222 vecType<bool, P> const FirstQuarterPi(lessThanEqual(Angle0_PI, vecType<T, P>(glm::half_pi<T>())));
223
224 vecType<T, P> const RevertAngle(mix(vecType<T, P>(glm::pi<T>()), vecType<T, P>(0), FirstQuarterPi));
225 vecType<T, P> const ReturnSign(mix(vecType<T, P>(-1), vecType<T, P>(1), FirstQuarterPi));
226 vecType<T, P> const SectionAngle(RevertAngle - Angle0_PI);
227
228 return ReturnSign * taylorSeriesNewCos(SectionAngle);
229 */
230 }
231
perf_fastCosNew(float Begin,float End,std::size_t Samples)232 int perf_fastCosNew(float Begin, float End, std::size_t Samples)
233 {
234 std::vector<glm::vec4> Results;
235 Results.resize(Samples);
236
237 float Steps = (End - Begin) / Samples;
238
239 std::clock_t const TimeStampBegin = std::clock();
240
241 for(std::size_t i = 0; i < Samples; ++i)
242 Results[i] = fastCosNew(AngleShift + glm::vec4(Begin + Steps * i));
243
244 std::clock_t const TimeStampEnd = std::clock();
245
246 std::printf("fastCosNew %ld clocks\n", TimeStampEnd - TimeStampBegin);
247
248 int Error = 0;
249 for(std::size_t i = 0; i < Samples; ++i)
250 Error += Results[i].x >= -1.0f && Results[i].x <= 1.0f ? 0 : 1;
251 return Error;
252 }
253
254 template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
deterministic_fmod(vecType<T,P> const & x,T y)255 GLM_FUNC_QUALIFIER vecType<T, P> deterministic_fmod(vecType<T, P> const & x, T y)
256 {
257 return x - y * trunc(x / y);
258 }
259
260 template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
fastCosDeterminisctic(vecType<T,P> const & x)261 GLM_FUNC_QUALIFIER vecType<T, P> fastCosDeterminisctic(vecType<T, P> const & x)
262 {
263 vecType<T, P> const Angle0_PI(abs(deterministic_fmod(x + glm::pi<T>(), glm::two_pi<T>()) - glm::pi<T>()));
264 vecType<bool, P> const FirstQuarterPi(lessThanEqual(Angle0_PI, vecType<T, P>(glm::half_pi<T>())));
265
266 vecType<T, P> const RevertAngle(mix(vecType<T, P>(glm::pi<T>()), vecType<T, P>(0), FirstQuarterPi));
267 vecType<T, P> const ReturnSign(mix(vecType<T, P>(-1), vecType<T, P>(1), FirstQuarterPi));
268 vecType<T, P> const SectionAngle(RevertAngle - Angle0_PI);
269
270 return ReturnSign * taylorSeriesNewCos(SectionAngle);
271 }
272
perf_fastCosDeterminisctic(float Begin,float End,std::size_t Samples)273 int perf_fastCosDeterminisctic(float Begin, float End, std::size_t Samples)
274 {
275 std::vector<glm::vec4> Results;
276 Results.resize(Samples);
277
278 float Steps = (End - Begin) / Samples;
279
280 std::clock_t const TimeStampBegin = std::clock();
281
282 for(std::size_t i = 0; i < Samples; ++i)
283 Results[i] = taylorCos::fastCosDeterminisctic(AngleShift + glm::vec4(Begin + Steps * i));
284
285 std::clock_t const TimeStampEnd = std::clock();
286
287 std::printf("fastCosDeterminisctic %ld clocks\n", TimeStampEnd - TimeStampBegin);
288
289 int Error = 0;
290 for(std::size_t i = 0; i < Samples; ++i)
291 Error += Results[i].x >= -1.0f && Results[i].x <= 1.0f ? 0 : 1;
292 return Error;
293 }
294
295 template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
taylorSeriesRefCos(vecType<T,P> const & x)296 GLM_FUNC_QUALIFIER vecType<T, P> taylorSeriesRefCos(vecType<T, P> const & x)
297 {
298 return static_cast<T>(1)
299 - (x * x) / glm::factorial(static_cast<T>(2))
300 + (x * x * x * x) / glm::factorial(static_cast<T>(4))
301 - (x * x * x * x * x * x) / glm::factorial(static_cast<T>(6))
302 + (x * x * x * x * x * x * x * x) / glm::factorial(static_cast<T>(8));
303 }
304
305 template <typename T, glm::precision P, template <typename, glm::precision> class vecType>
fastRefCos(vecType<T,P> const & x)306 GLM_FUNC_QUALIFIER vecType<T, P> fastRefCos(vecType<T, P> const & x)
307 {
308 vecType<T, P> const Angle0_PI(glm::abs(fmod(x + glm::pi<T>(), glm::two_pi<T>()) - glm::pi<T>()));
309 // return taylorSeriesRefCos(Angle0_PI);
310
311 vecType<bool, P> const FirstQuarterPi(lessThanEqual(Angle0_PI, vecType<T, P>(glm::half_pi<T>())));
312
313 vecType<T, P> const RevertAngle(mix(vecType<T, P>(glm::pi<T>()), vecType<T, P>(0), FirstQuarterPi));
314 vecType<T, P> const ReturnSign(mix(vecType<T, P>(-1), vecType<T, P>(1), FirstQuarterPi));
315 vecType<T, P> const SectionAngle(RevertAngle - Angle0_PI);
316
317 return ReturnSign * taylorSeriesRefCos(SectionAngle);
318 }
319
perf_fastCosRef(float Begin,float End,std::size_t Samples)320 int perf_fastCosRef(float Begin, float End, std::size_t Samples)
321 {
322 std::vector<glm::vec4> Results;
323 Results.resize(Samples);
324
325 float Steps = (End - Begin) / Samples;
326
327 std::clock_t const TimeStampBegin = std::clock();
328
329 for(std::size_t i = 0; i < Samples; ++i)
330 Results[i] = taylorCos::fastRefCos(AngleShift + glm::vec4(Begin + Steps * i));
331
332 std::clock_t const TimeStampEnd = std::clock();
333
334 std::printf("fastCosRef %ld clocks\n", TimeStampEnd - TimeStampBegin);
335
336 int Error = 0;
337 for(std::size_t i = 0; i < Samples; ++i)
338 Error += Results[i].x >= -1.0f && Results[i].x <= 1.0f ? 0 : 1;
339 return Error;
340 }
341
perf_fastCosOld(float Begin,float End,std::size_t Samples)342 int perf_fastCosOld(float Begin, float End, std::size_t Samples)
343 {
344 std::vector<glm::vec4> Results;
345 Results.resize(Samples);
346
347 float Steps = (End - Begin) / Samples;
348
349 std::clock_t const TimeStampBegin = std::clock();
350
351 for(std::size_t i = 0; i < Samples; ++i)
352 Results[i] = glm::fastCos(AngleShift + glm::vec4(Begin + Steps * i));
353
354 std::clock_t const TimeStampEnd = std::clock();
355
356 std::printf("fastCosOld %ld clocks\n", TimeStampEnd - TimeStampBegin);
357
358 int Error = 0;
359 for(std::size_t i = 0; i < Samples; ++i)
360 Error += Results[i].x >= -1.0f && Results[i].x <= 1.0f ? 0 : 1;
361 return Error;
362 }
363
perf_cos(float Begin,float End,std::size_t Samples)364 int perf_cos(float Begin, float End, std::size_t Samples)
365 {
366 std::vector<glm::vec4> Results;
367 Results.resize(Samples);
368
369 float Steps = (End - Begin) / Samples;
370
371 std::clock_t const TimeStampBegin = std::clock();
372
373 for(std::size_t i = 0; i < Samples; ++i)
374 Results[i] = glm::cos(AngleShift + glm::vec4(Begin + Steps * i));
375
376 std::clock_t const TimeStampEnd = std::clock();
377
378 std::printf("cos %ld clocks\n", TimeStampEnd - TimeStampBegin);
379
380 int Error = 0;
381 for(std::size_t i = 0; i < Samples; ++i)
382 Error += Results[i].x >= -1.0f && Results[i].x <= 1.0f ? 0 : 1;
383 return Error;
384 }
385
perf(std::size_t const Samples)386 int perf(std::size_t const Samples)
387 {
388 int Error = 0;
389
390 float const Begin = -glm::pi<float>();
391 float const End = glm::pi<float>();
392
393 Error += perf_cos(Begin, End, Samples);
394 Error += perf_fastCosOld(Begin, End, Samples);
395 Error += perf_fastCosRef(Begin, End, Samples);
396 //Error += perf_fastCosNew(Begin, End, Samples);
397 Error += perf_fastCosDeterminisctic(Begin, End, Samples);
398
399 return Error;
400 }
401
test()402 int test()
403 {
404 int Error = 0;
405
406 //for(float Angle = -4.0f * glm::pi<float>(); Angle < 4.0f * glm::pi<float>(); Angle += 0.1f)
407 //for(float Angle = -720.0f; Angle < 720.0f; Angle += 0.1f)
408 for(float Angle = 0.0f; Angle < 180.0f; Angle += 0.1f)
409 {
410 float const modAngle = std::fmod(glm::abs(Angle), 360.f);
411 assert(modAngle >= 0.0f && modAngle <= 360.f);
412 float const radAngle = glm::radians(modAngle);
413 float const Cos0 = std::cos(radAngle);
414
415 float const Cos1 = taylorCos::fastRefCos(glm::fvec1(radAngle)).x;
416 Error += glm::abs(Cos1 - Cos0) < 0.1f ? 0 : 1;
417
418 float const Cos2 = taylorCos::fastCosNew(glm::fvec1(radAngle)).x;
419 //Error += glm::abs(Cos2 - Cos0) < 0.1f ? 0 : 1;
420
421 assert(!Error);
422 }
423
424 return Error;
425 }
426 }//namespace taylorCos
427
main()428 int main()
429 {
430 int Error(0);
431
432 Error += ::taylorCos::test();
433 Error += ::taylorCos::perf(1000);
434
435 # ifdef NDEBUG
436 ::fastCos::perf(false);
437 ::fastSin::perf(false);
438 ::fastTan::perf(false);
439 ::fastAcos::perf(false);
440 ::fastAsin::perf(false);
441 ::fastAtan::perf(false);
442 # endif//NDEBUG
443
444 return Error;
445 }
446