1 
2 //
3 // This source file is part of appleseed.
4 // Visit https://appleseedhq.net/ for additional information and resources.
5 //
6 // This software is released under the MIT license.
7 //
8 // Copyright (c) 2010-2013 Francois Beaune, Jupiter Jazz Limited
9 // Copyright (c) 2014-2018 Francois Beaune, The appleseedhq Organization
10 //
11 // Permission is hereby granted, free of charge, to any person obtaining a copy
12 // of this software and associated documentation files (the "Software"), to deal
13 // in the Software without restriction, including without limitation the rights
14 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 // copies of the Software, and to permit persons to whom the Software is
16 // furnished to do so, subject to the following conditions:
17 //
18 // The above copyright notice and this permission notice shall be included in
19 // all copies or substantial portions of the Software.
20 //
21 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 // THE SOFTWARE.
28 //
29 
30 #pragma once
31 
32 // appleseed.foundation headers.
33 #include "foundation/platform/arch.h"
34 #ifdef APPLESEED_USE_SSE
35 #include "foundation/platform/sse.h"
36 #endif
37 #include "foundation/platform/types.h"
38 
39 // Standard headers.
40 #include <algorithm>
41 #include <cassert>
42 #include <cmath>
43 #include <cstddef>
44 #ifdef _MSC_VER
45 #include <cstdlib>
46 #include <intrin.h>
47 #endif
48 #include <limits>
49 
50 namespace foundation
51 {
52 
53 //
54 // Constants.
55 //
56 
57 // Mathematical constants.
Pi()58 template <typename T> inline T Pi()                 { return static_cast<T>(3.1415926535897932); }
TwoPi()59 template <typename T> inline T TwoPi()              { return static_cast<T>(6.2831853071795865); }  // 2 * Pi
FourPi()60 template <typename T> inline T FourPi()             { return static_cast<T>(12.566370614359173); }  // 4 * Pi
HalfPi()61 template <typename T> inline T HalfPi()             { return static_cast<T>(1.5707963267948966); }  // Pi / 2
PiOverFour()62 template <typename T> inline T PiOverFour()         { return static_cast<T>(0.7853981633974483); }  // Pi / 4
RcpPi()63 template <typename T> inline T RcpPi()              { return static_cast<T>(0.3183098861837907); }  // 1 / Pi
TwoOverPi()64 template <typename T> inline T TwoOverPi()          { return static_cast<T>(0.6366197723675813); }  // 2 / Pi
FourOverPi()65 template <typename T> inline T FourOverPi()         { return static_cast<T>(1.2732395447351627); }  // 4 / Pi
RcpHalfPi()66 template <typename T> inline T RcpHalfPi()          { return static_cast<T>(0.6366197723675813); }  // 1 / (Pi/2)
RcpTwoPi()67 template <typename T> inline T RcpTwoPi()           { return static_cast<T>(0.1591549430918953); }  // 1 / (2 * Pi) = 0.5 / Pi
RcpFourPi()68 template <typename T> inline T RcpFourPi()          { return static_cast<T>(0.0795774715459477); }  // 1 / (4 * Pi)
SqrtPi()69 template <typename T> inline T SqrtPi()             { return static_cast<T>(1.7724538509055160); }  // sqrt(Pi)
PiSquare()70 template <typename T> inline T PiSquare()           { return static_cast<T>(9.8696044010893586); }  // Pi^2
FourPiSquare()71 template <typename T> inline T FourPiSquare()       { return static_cast<T>(39.478417604357434); }  // 4 * Pi^2
RcpPiSquare()72 template <typename T> inline T RcpPiSquare()        { return static_cast<T>(0.1013211836423378); }  // 1 / (Pi^2) = (1 / Pi)^2
RcpFourPiSquare()73 template <typename T> inline T RcpFourPiSquare()    { return static_cast<T>(0.0253302959105844); }  // 1 / (4 * Pi^2)
FourOverPiSquare()74 template <typename T> inline T FourOverPiSquare()   { return static_cast<T>(0.4052847345693511); }  // 1 / (4 * Pi^2)
SqrtTwo()75 template <typename T> inline T SqrtTwo()            { return static_cast<T>(1.4142135623730950); }  // sqrt(2)
RcpSqrtTwo()76 template <typename T> inline T RcpSqrtTwo()         { return static_cast<T>(0.7071067811865475); }  // 1 / sqrt(2) = sqrt(2) / 2
SqrtThree()77 template <typename T> inline T SqrtThree()          { return static_cast<T>(1.7320508075688773); }  // sqrt(3)
GoldenRatio()78 template <typename T> inline T GoldenRatio()        { return static_cast<T>(1.6180339887498948); }  // (1 + sqrt(5)) / 2
Ln10()79 template <typename T> inline T Ln10()               { return static_cast<T>(2.3025850929940457); }  // ln(10)
80 
81 //
82 // The four floating point constants below were determined with the following program:
83 //
84 //   #include <cmath>
85 //   #include <cstdint>
86 //   #include <iomanip>
87 //   #include <iostream>
88 //   #include <limits>
89 //
90 //   template <typename Target, typename Source>
91 //   Target binary_cast(Source s)
92 //   {
93 //       union { Source m_source; Target m_target; } u;
94 //       u.m_source = s;
95 //       return u.m_target;
96 //   }
97 //
98 //   template <typename Float, typename UInt>
99 //   Float compute_rcp_power_of_two()
100 //   {
101 //       Float x = std::pow(Float(2.0), std::numeric_limits<UInt>::digits);
102 //
103 //       while (true)
104 //       {
105 //           const Float rcp_x = Float(1.0) / x;
106 //
107 //           if (std::numeric_limits<UInt>::max() * rcp_x < Float(1.0))
108 //               return rcp_x;
109 //
110 //           const UInt x_bits = binary_cast<UInt>(x);
111 //           x = binary_cast<Float>(x_bits + 1);
112 //       }
113 //   }
114 //
115 //   int main()
116 //   {
117 //       std::cout << std::setprecision(9) << compute_rcp_power_of_two<float, std::uint32_t>() << std::endl;
118 //       std::cout << std::setprecision(17) << compute_rcp_power_of_two<double, std::uint32_t>() << std::endl;
119 //       std::cout << std::setprecision(9) << compute_rcp_power_of_two<float, std::uint64_t>() << std::endl;
120 //       std::cout << std::setprecision(17) << compute_rcp_power_of_two<double, std::uint64_t>() << std::endl;
121 //   }
122 //
123 // Run this code on Coliru:
124 //
125 //   http://coliru.stacked-crooked.com/a/05d58a1b19118b8e
126 //
127 // Output:
128 //
129 //   2.32830616e-10
130 //   2.3283064365386963e-10
131 //   5.42101022e-20
132 //   5.421010862427521e-20
133 //
134 
135 // Return a constant that, when multiplied by 2^32 - 1 (0xFFFFFFFF), equals the largest value strictly smaller than 1.0.
136 template <typename T> inline T Rcp2Pow32();
137 template <> inline float Rcp2Pow32<float>()         { return 2.32830616e-10f; }
138 template <> inline double Rcp2Pow32<double>()       { return 2.3283064365386963e-10; }
139 
140 // Return a constant that, when multiplied by 2^64 - 1 (0xFFFFFFFFFFFFFFFF), equals the largest value strictly smaller than 1.0.
141 template <typename T> inline T Rcp2Pow64();
142 template <> inline float Rcp2Pow64<float>()         { return 5.42101022e-20f; }
143 template <> inline double Rcp2Pow64<double>()       { return 5.421010862427521e-20; }
144 
145 
146 //
147 // Conversion operations.
148 //
149 
150 // Convert an angle from degrees to radians.
151 template <typename T>
152 T deg_to_rad(const T angle);
153 
154 // Convert an angle from radians to degrees.
155 template <typename T>
156 T rad_to_deg(const T angle);
157 
158 
159 //
160 // Arithmetic operations.
161 //
162 
163 // Return the absolute value of the argument. This function complements
164 // the standard function std::abs() which is not necessary available for
165 // compiler-specific types (e.g. __int64).
166 template <typename T>
167 T abs(const T x);
168 
169 // Return the square of the argument.
170 template <typename T>
171 T square(const T x);
172 
173 // Return the cube of the argument.
174 template <typename T>
175 T cube(const T x);
176 
177 // Return 1 / x.
178 template <typename T>
179 T rcp(const T x);
180 
181 // Return 1 / x or eps if the absolute value of x is less than eps.
182 template <typename T>
183 T safe_rcp(const T x, const T eps);
184 
185 // Return the square root of x or 0 if x is negative.
186 template <typename T>
187 T safe_sqrt(const T x);
188 
189 // Compile-time exponentiation of the form x^p where p >= 0.
190 // Note: swapped template arguments to allow writing pow_int<3>(3.14).
191 template <size_t P, typename T>
192 T pow_int(const T x);
193 
194 // Runtime exponentiation of the form x^p where p >= 0.
195 template <typename T>
196 T pow_int(const T x, size_t p);
197 
198 // Return the smallest power of 2 larger than a given integer x (x > 0).
199 template <typename T> T next_pow2(T x);
200 template <> int64 next_pow2<int64>(int64 x);
201 template <> uint64 next_pow2<uint64>(uint64 x);
202 
203 // Return true if a given integer x is a power of 2.
204 template <typename T>
205 bool is_pow2(const T x);
206 
207 // Return the base-2 logarithm of a given integer.
208 template <typename T>
209 T log2_int(T x);
210 
211 // Return the log in a given base of a given scalar.
212 template <typename T>
213 T log(const T x, const T base);
214 
215 // Return the next given power of a given scalar.
216 template <typename T>
217 T next_power(const T x, const T base);
218 
219 // Round n (n >= 0) to the next multiple of m (m > 0).
220 template <typename T>
221 T next_multiple(const T n, const T m);
222 
223 // Round n (n >= 0) to the previous multiple of m (m > 0).
224 template <typename T>
225 T prev_multiple(const T n, const T m);
226 
227 // Return the factorial of a given integer.
228 template <typename T>
229 T factorial(T x);
230 
231 // Return the binomial coefficient (n, k).
232 template <typename T>
233 T binomial(const T n, const T k);
234 
235 // Clamp the argument to [low, high].
236 template <typename T>
237 T clamp(const T x, const T low, const T high);
238 
239 // Clamp the argument to [0, 1].
240 template <typename T>
241 T saturate(const T x);
242 
243 // Wrap the argument back to [0, 1).
244 template <typename T>
245 T wrap(const T x);
246 
247 // Normalize an angle into [0, 2*Pi).
248 template <typename T>
249 T normalize_angle(const T angle);
250 
251 // Semantically identical to static_cast<Int>(x).
252 template <typename Int, typename T>
253 Int truncate(const T x);
254 
255 // Round x to the nearest integer with Round Half Away from Zero tie breaking rule.
256 // Reference: http://en.wikipedia.org/wiki/Rounding#Round_half_away_from_zero.
257 template <typename Int, typename T>
258 Int round(const T x);
259 
260 // Semantically identical to std::floor().
261 template <typename T>
262 T fast_floor(const T x);
263 
264 // Semantically identical to std::ceil().
265 template <typename T>
266 T fast_ceil(const T x);
267 
268 // Return the fractional part of x, defined as x - std::floor(x).
269 // The returned value is always in [0, 1) even when x is negative.
270 // Passing negative values to this function may give surprising results,
271 // e.g. frac(-4.2) will return 0.8. You may want to use frac(std::abs(x))
272 // which for x = -4.2 will return 0.2.
273 template <typename T>
274 T frac(const T x);
275 
276 // Return the integer and fractional parts of x.
277 template <typename T, typename I>
278 T floor_frac(const T x, I& int_part);
279 
280 // Compute a % n or fmod(a, n) and always return a non-negative value.
281 template <typename T>
282 T mod(const T a, const T n);
283 
284 // Rotate an unsigned integer left or right by a given number of bits.
285 // Reference: https://stackoverflow.com/a/776523/393756
286 uint32 rotl32(const uint32 n, unsigned int shift);
287 uint64 rotl64(const uint64 n, unsigned int shift);
288 uint32 rotr32(const uint32 n, unsigned int shift);
289 uint64 rotr64(const uint64 n, unsigned int shift);
290 
291 // linearstep() returns 0 for x < a, 1 for x > b, and generates
292 // a linear transition from 0 to 1 between x = a and x = b.
293 template <typename T>
294 T linearstep(const T a, const T b, const T x);
295 
296 // smoothstep() returns 0 for x < a, 1 for x > b, and generates
297 // a smooth, C-infinite transition from 0 to 1 between x = a and
298 // x = b. The function has zero first derivatives at both x = a
299 // and x = b.
300 template <typename T>
301 T smoothstep(const T a, const T b, const T x);
302 
303 // mix() returns a for x < 0, b for x > 1, and performs a linear
304 // blend between values a and b when x is between 0 and 1.
305 template <typename T, typename U>
306 T mix(const T a, const T b, const U x);
307 
308 // lerp() returns the linear interpolation (1 - x) * a + x * b.
309 template <typename T, typename U>
310 T lerp(const T a, const T b, const U x);
311 
312 // inverse_lerp() returns the relative position of x between a and b,
313 // i.e (x - a) / (b - a). It is the inverse operation of lerp().
314 template <typename T, typename U>
315 T inverse_lerp(const T a, const T b, const U x);
316 
317 // fit() remaps a variable x from the range [min_x, max_x] to the
318 // range [min_y, max_y]. When x is outside the [min_x, max_x] range,
319 // a linear extrapolation outside the [min_y, max_y] range is used.
320 template <typename In, typename Out = In, typename Interpolant = Out>
321 Out fit(
322     const In x,
323     const In min_x,
324     const In max_x,
325     const Out min_y,
326     const Out max_y);
327 
328 
329 //
330 // Robust floating-point tests.
331 //
332 // todo: implement feq_ulp() and fz_ulp(), to compare scalars with precision
333 // expressed in ulp. Most probably an integer based comparison. Reference:
334 // http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm.
335 //
336 
337 // Default epsilon values for floating-point tests.
338 template <typename T> T default_eps();                      // intentionally left unimplemented
339 template <> inline float default_eps<float>()               { return 1.0e-6f; }
340 template <> inline double default_eps<double>()             { return 1.0e-14; }
341 template <> inline long double default_eps<long double>()   { return 1.0e-30L; }
342 
343 // Allow using custom epsilon values in template code.
344 template <typename T> T make_eps(const float feps, const double deps);
make_eps(const float feps,const double)345 template <> inline float make_eps(const float feps, const double /*deps*/)  { return feps; }
make_eps(const float,const double deps)346 template <> inline double make_eps(const float /*feps*/, const double deps) { return deps; }
347 
348 // Approximate equality tests.
349 template <typename T> bool feq(const T lhs, const T rhs);
350 template <typename T> bool feq(const T lhs, const T rhs, const T eps);
351 bool feq(const int lhs, const int rhs);
352 
353 // Approximate zero tests.
354 template <typename T> bool fz(const T lhs);
355 template <typename T> bool fz(const T lhs, const T eps);
356 bool fz(const int lhs);
357 
358 
359 //
360 // Miscellaneous.
361 //
362 
363 // Return the minimum signed finite value for a given type.
signed_min()364 template <typename T> T signed_min()        { return std::numeric_limits<T>::min(); }
signed_min()365 template <> inline float signed_min()       { return -std::numeric_limits<float>::max(); }
signed_min()366 template <> inline double signed_min()      { return -std::numeric_limits<double>::max(); }
signed_min()367 template <> inline long double signed_min() { return -std::numeric_limits<long double>::max(); }
368 
369 
370 //
371 // Conversion operations implementation.
372 //
373 
374 template <>
deg_to_rad(const float angle)375 inline float deg_to_rad(const float angle)
376 {
377     return angle * (Pi<float>() / 180.0f);
378 }
379 
380 template <>
deg_to_rad(const double angle)381 inline double deg_to_rad(const double angle)
382 {
383     return angle * (Pi<double>() / 180.0);
384 }
385 
386 template <>
deg_to_rad(const long double angle)387 inline long double deg_to_rad(const long double angle)
388 {
389     return angle * (Pi<long double>() / 180.0);
390 }
391 
392 template <>
rad_to_deg(const float angle)393 inline float rad_to_deg(const float angle)
394 {
395     return angle * (180.0f / Pi<float>());
396 }
397 
398 template <>
rad_to_deg(const double angle)399 inline double rad_to_deg(const double angle)
400 {
401     return angle * (180.0 / Pi<double>());
402 }
403 
404 template <>
rad_to_deg(const long double angle)405 inline long double rad_to_deg(const long double angle)
406 {
407     return angle * (180.0 / Pi<long double>());
408 }
409 
410 
411 //
412 // Arithmetic operations implementation.
413 //
414 
415 template <typename T>
abs(const T x)416 inline T abs(const T x)
417 {
418     return x < T(0) ? -x : x;
419 }
420 
421 template <typename T>
square(const T x)422 inline T square(const T x)
423 {
424     return x * x;
425 }
426 
427 template <typename T>
cube(const T x)428 inline T cube(const T x)
429 {
430     return x * x * x;
431 }
432 
433 template <typename T>
rcp(const T x)434 inline T rcp(const T x)
435 {
436     return T(1.0) / x;
437 }
438 
439 template <typename T>
safe_rcp(const T x,const T eps)440 inline T safe_rcp(const T x, const T eps)
441 {
442     return std::abs(x) < eps ? eps : T(1.0) / x;
443 }
444 
445 template <typename T>
safe_sqrt(const T x)446 inline T safe_sqrt(const T x)
447 {
448     return std::sqrt(std::max(x, T(0.0)));
449 }
450 
451 template <typename T, size_t P>
452 struct PowIntHelper
453 {
evalPowIntHelper454     static T eval(const T x)
455     {
456         // Reference: http://en.wikipedia.org/wiki/Exponentiation_by_squaring
457         if (P % 2 == 0)
458             return PowIntHelper<T, P / 2>::eval(x * x);
459         else
460             return x * PowIntHelper<T, (P - 1) / 2>::eval(x * x);
461     }
462 };
463 
464 template <typename T>
465 struct PowIntHelper<T, 0>
466 {
467     static T eval(const T x)
468     {
469         return T(1);
470     }
471 };
472 
473 template <size_t P, typename T>
474 inline T pow_int(const T x)
475 {
476     return PowIntHelper<T, P>::eval(x);
477 }
478 
479 template <typename T>
480 inline T pow_int(T x, size_t p)
481 {
482     // Reference: http://en.wikipedia.org/wiki/Exponentiation_by_squaring
483 
484     T y = T(1);
485 
486     while (p)
487     {
488         if (p % 2 == 0)
489         {
490             x *= x;
491             p /= 2;
492         }
493         else
494         {
495             y *= x;
496             x *= x;
497             p = (p - 1) / 2;
498         }
499     }
500 
501     return y;
502 }
503 
504 template <typename T>
505 inline T next_pow2(T x)
506 {
507     assert(x > 0);
508     --x;
509     x |= x >> 16;
510     x |= x >> 8;
511     x |= x >> 4;
512     x |= x >> 2;
513     x |= x >> 1;
514     return x + 1;
515 }
516 
517 template <>
518 inline int64 next_pow2<int64>(int64 x)
519 {
520     assert(x > 0);
521     --x;
522     x |= x >> 32;
523     x |= x >> 16;
524     x |= x >> 8;
525     x |= x >> 4;
526     x |= x >> 2;
527     x |= x >> 1;
528     return x + 1;
529 }
530 
531 template <>
532 inline uint64 next_pow2<uint64>(uint64 x)
533 {
534     assert(x > 0);
535     --x;
536     x |= x >> 32;
537     x |= x >> 16;
538     x |= x >> 8;
539     x |= x >> 4;
540     x |= x >> 2;
541     x |= x >> 1;
542     return x + 1;
543 }
544 
545 template <typename T>
546 inline bool is_pow2(const T x)
547 {
548     return (x & (x - 1)) == 0;
549 }
550 
551 template <typename T>
552 inline T log2_int(T x)
553 {
554     assert(x > 0);
555 
556     T n = 0;
557 
558     while (x >>= 1)
559         ++n;
560 
561     return n;
562 }
563 
564 // Visual C++.
565 #if defined _MSC_VER
566 
567 template <>
568 inline uint32 log2_int(const uint32 x)
569 {
570     assert(x > 0);
571 
572     unsigned long index;
573     _BitScanReverse(&index, x);
574 
575     return static_cast<uint32>(index);
576 }
577 
578 #ifdef APPLESEED_ARCH64
579 
580 template <>
581 inline uint64 log2_int(const uint64 x)
582 {
583     assert(x > 0);
584 
585     unsigned long index;
586     _BitScanReverse64(&index, x);
587 
588     return static_cast<uint64>(index);
589 }
590 
591 #endif
592 
593 // gcc.
594 #elif defined __GNUC__
595 
596 template <>
597 inline unsigned int log2_int(const unsigned int x)
598 {
599     assert(x > 0);
600     return 8 * sizeof(unsigned int) - __builtin_clz(x) - 1;
601 }
602 
603 template <>
604 inline unsigned long log2_int(const unsigned long x)
605 {
606     assert(x > 0);
607     return 8 * sizeof(unsigned long) - __builtin_clzl(x) - 1;
608 }
609 
610 #endif
611 
612 template <typename T>
613 inline T log(const T x, const T base)
614 {
615     return std::log(x) / std::log(base);
616 }
617 
618 template <typename T>
619 inline T next_power(const T x, const T base)
620 {
621     return std::pow(base, fast_ceil(log(x, base)));
622 }
623 
624 template <typename T>
625 inline T next_multiple(const T n, const T m)
626 {
627     assert(n >= 0);
628     assert(m > 0);
629     return (n + m - 1) / m * m;
630 }
631 
632 template <typename T>
633 inline T prev_multiple(const T n, const T m)
634 {
635     assert(n >= 0);
636     assert(m > 0);
637     return n - n % m;
638 }
639 
640 template <typename T>
641 inline T factorial(T x)
642 {
643     assert(x >= 0);
644 
645     T fac = 1;
646 
647     while (x > 1)
648     {
649         fac *= x;
650         --x;
651     }
652 
653     return fac;
654 }
655 
656 template <typename T>
657 inline T binomial(const T n, const T k)
658 {
659     assert(k <= n);
660     return factorial(n) / (factorial(k) * factorial(n - k));
661 }
662 
663 template <typename T>
664 inline T clamp(const T x, const T low, const T high)
665 {
666     assert(low <= high);
667     return x < low ? low :
668            x > high ? high :
669            x;
670 }
671 
672 template <typename T>
673 inline T saturate(const T x)
674 {
675     return clamp(x, T(0.0), T(1.0));
676 }
677 
678 template <typename T>
679 inline T wrap(const T x)
680 {
681     const T y = std::fmod(x, T(1.0));
682     return y < T(0.0) ? y + T(1.0) : y;
683 }
684 
685 template <typename T>
686 inline T normalize_angle(const T angle)
687 {
688     const T a = std::fmod(angle, TwoPi<T>());
689     return a < T(0.0) ? a + TwoPi<T>() : a;
690 }
691 
692 template <typename Int, typename T>
693 inline Int truncate(const T x)
694 {
695     return static_cast<Int>(x);
696 }
697 
698 #ifdef APPLESEED_USE_SSE
699 
700 template <>
701 inline int8 truncate<int8>(const float x)
702 {
703     return static_cast<int8>(_mm_cvttss_si32(_mm_load_ss(&x)));
704 }
705 
706 template <>
707 inline int16 truncate<int16>(const float x)
708 {
709     return static_cast<int16>(_mm_cvttss_si32(_mm_load_ss(&x)));
710 }
711 
712 template <>
713 inline int32 truncate<int32>(const float x)
714 {
715     return static_cast<int32>(_mm_cvttss_si32(_mm_load_ss(&x)));
716 }
717 
718 template <>
719 inline int64 truncate<int64>(const float x)
720 {
721     return static_cast<int64>(_mm_cvttss_si32(_mm_load_ss(&x)));
722 }
723 
724 template <>
725 inline int8 truncate<int8>(const double x)
726 {
727     return static_cast<int8>(_mm_cvttsd_si32(_mm_load_sd(&x)));
728 }
729 
730 template <>
731 inline int16 truncate<int16>(const double x)
732 {
733     return static_cast<int16>(_mm_cvttsd_si32(_mm_load_sd(&x)));
734 }
735 
736 template <>
737 inline int32 truncate<int32>(const double x)
738 {
739     return static_cast<int32>(_mm_cvttsd_si32(_mm_load_sd(&x)));
740 }
741 
742 template <>
743 inline int64 truncate<int64>(const double x)
744 {
745     return static_cast<int64>(_mm_cvttsd_si32(_mm_load_sd(&x)));
746 }
747 
748 #endif
749 
750 template <typename Int, typename T>
751 inline Int round(const T x)
752 {
753     return truncate<Int>(x < T(0.0) ? x - T(0.5) : x + T(0.5));
754 }
755 
756 template <typename T>
757 inline T fast_floor(const T x)
758 {
759     return std::floor(x);
760 }
761 
762 template <typename T>
763 inline T fast_ceil(const T x)
764 {
765     return std::ceil(x);
766 }
767 
768 #ifdef APPLESEED_USE_SSE42
769 
770 template <>
771 inline float fast_floor(const float x)
772 {
773     M128Fields f;
774     f.m128 = _mm_floor_ss(_mm_set1_ps(x), _mm_set1_ps(x));
775     return f.f32[0];
776 }
777 
778 template <>
779 inline double fast_floor(const double x)
780 {
781     M128Fields f;
782     f.m128d = _mm_floor_sd(_mm_set1_pd(x), _mm_set1_pd(x));
783     return f.f64[0];
784 }
785 
786 template <>
787 inline float fast_ceil(const float x)
788 {
789     M128Fields f;
790     f.m128 = _mm_ceil_ss(_mm_set1_ps(x), _mm_set1_ps(x));
791     return f.f32[0];
792 }
793 
794 template <>
795 inline double fast_ceil(const double x)
796 {
797     M128Fields f;
798     f.m128d = _mm_ceil_sd(_mm_set1_pd(x), _mm_set1_pd(x));
799     return f.f64[0];
800 }
801 
802 #endif
803 
804 template <typename T>
805 inline T frac(const T x)
806 {
807     const T f = x - fast_floor(x);
808     assert(f >= T(0.0));
809     assert(f < T(1.0));
810     return f;
811 }
812 
813 template <typename T, typename I>
814 inline T floor_frac(const T x, I& int_part)
815 {
816     const T f = fast_floor(x);
817     int_part = static_cast<I>(f);
818 
819     const T frac_part = x - f;
820     assert(frac_part >= T(0.0));
821     assert(frac_part < T(1.0));
822 
823     return frac_part;
824 }
825 
826 template <typename T>
827 inline T mod(const T a, const T n)
828 {
829     const T m = a % n;
830     return m < 0 ? n + m : m;
831 }
832 
833 template <>
834 inline float mod(const float a, const float n)
835 {
836     const float m = std::fmod(a, n);
837     return m < 0.0f ? n + m : m;
838 }
839 
840 template <>
841 inline double mod(const double a, const double n)
842 {
843     const double m = std::fmod(a, n);
844     return m < 0.0 ? n + m : m;
845 }
846 
847 #pragma warning (push)
848 #pragma warning (disable : 4146)    // unary minus operator applied to unsigned type, result still unsigned
849 
850 inline uint32 rotl32(const uint32 n, unsigned int shift)
851 {
852     const unsigned int Mask = 8 * sizeof(n) - 1;
853     assert(shift <= Mask);
854 
855 #ifdef _MSC_VER
856     return _rotl(n, shift);
857 #else
858     shift &= Mask;
859     return (n << shift) | (n >> ((-shift) & Mask));
860 #endif
861 }
862 
863 inline uint64 rotl64(const uint64 n, unsigned int shift)
864 {
865     const unsigned int Mask = 8 * sizeof(n) - 1;
866     assert(shift <= Mask);
867 
868 #ifdef _MSC_VER
869     return _rotl64(n, shift);
870 #else
871     shift &= Mask;
872     return (n << shift) | (n >> ((-shift) & Mask));
873 #endif
874 }
875 
876 inline uint32 rotr32(const uint32 n, unsigned int shift)
877 {
878     const unsigned int Mask = 8 * sizeof(n) - 1;
879     assert(shift <= Mask);
880 
881 #ifdef _MSC_VER
882     return _rotr(n, shift);
883 #else
884     shift &= Mask;
885     return (n >> shift) | (n << ((-shift) & Mask));
886 #endif
887 }
888 
889 inline uint64 rotr64(const uint64 n, unsigned int shift)
890 {
891     const unsigned int Mask = 8 * sizeof(n) - 1;
892     assert(shift <= Mask);
893 
894 #ifdef _MSC_VER
895     return _rotr64(n, shift);
896 #else
897     shift &= Mask;
898     return (n >> shift) | (n << ((-shift) & Mask));
899 #endif
900 }
901 
902 #pragma warning (pop)
903 
904 template <typename T>
905 inline T linearstep(const T a, const T b, const T x)
906 {
907     assert(a < b);
908     return x <= a ? T(0.0) :
909            x >= b ? T(1.0) :
910            (x - a) / (b - a);
911 }
912 
913 template <typename T>
914 inline T smoothstep(const T a, const T b, const T x)
915 {
916     assert(a < b);
917 
918     if (x <= a) return T(0.0);
919     if (x >= b) return T(1.0);
920 
921     const T y = (x - a) / (b - a);
922     return y * y * (T(3.0) - y - y);
923 }
924 
925 template <typename T, typename U>
926 inline T mix(const T a, const T b, const U x)
927 {
928     return x <= U(0.0) ? a :
929            x >= U(1.0) ? b :
930            lerp(a, b, x);
931 }
932 
933 template <typename T, typename U>
934 inline T lerp(const T a, const T b, const U x)
935 {
936     return (U(1.0) - x) * a + x * b;
937 }
938 
939 template <typename T, typename U>
940 inline T inverse_lerp(const T a, const T b, const U x)
941 {
942     assert(a != b);
943     return (x - a) / (b - a);
944 }
945 
946 template <typename In, typename Out, typename Interpolant>
947 inline Out fit(
948     const In x,
949     const In min_x,
950     const In max_x,
951     const Out min_y,
952     const Out max_y)
953 {
954     assert(min_x != max_x);
955     const Interpolant k = Interpolant(x - min_x) / Interpolant(max_x - min_x);
956     const Out result = min_y * (Interpolant(1.0) - k) + max_y * k;
957     return result;
958 }
959 
960 
961 //
962 // Robust floating-point tests implementation.
963 //
964 
965 template <typename T>
966 inline bool feq(const T lhs, const T rhs)
967 {
968     return feq(lhs, rhs, default_eps<T>());
969 }
970 
971 template <typename T>
972 inline bool feq(const T lhs, const T rhs, const T eps)
973 {
974     // Handle case where lhs is exactly +0.0 or -0.0.
975     if (lhs == T(0.0))
976         return std::abs(rhs) < eps;
977 
978     // Handle case where rhs is exactly +0.0 or -0.0.
979     if (rhs == T(0.0))
980         return std::abs(lhs) < eps;
981 
982     const T abs_lhs = std::abs(lhs);
983     const T abs_rhs = std::abs(rhs);
984 
985     // No equality if lhs/rhs overflows.
986     if (abs_rhs < T(1.0) &&
987         abs_lhs > abs_rhs * std::numeric_limits<T>::max())
988         return false;
989 
990     // No equality if lhs/rhs underflows.
991     if (abs_rhs > T(1.0) &&
992         abs_lhs < abs_rhs * std::numeric_limits<T>::min())
993         return false;
994 
995     // There is equality if the ratio lhs/rhs is close enough to 1.
996     const T ratio = lhs / rhs;
997     return ratio >= T(1.0) - eps && ratio <= T(1.0) + eps;
998 }
999 
1000 inline bool feq(const int lhs, const int rhs)
1001 {
1002     return lhs == rhs;
1003 }
1004 
1005 template <typename T>
1006 inline bool fz(const T lhs)
1007 {
1008     return fz(lhs, default_eps<T>());
1009 }
1010 
1011 template <typename T>
1012 inline bool fz(const T lhs, const T eps)
1013 {
1014     return std::abs(lhs) < eps;
1015 }
1016 
1017 inline bool fz(const int lhs)
1018 {
1019     return lhs == 0;
1020 }
1021 
1022 }   // namespace foundation
1023