1 /*
2  * Copyright 2011-2013 Blender Foundation
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef __UTIL_MATH_H__
18 #define __UTIL_MATH_H__
19 
20 /* Math
21  *
22  * Basic math functions on scalar and vector types. This header is used by
23  * both the kernel code when compiled as C++, and other C++ non-kernel code. */
24 
25 #ifndef __KERNEL_GPU__
26 #  include <cmath>
27 #endif
28 
29 #ifndef __KERNEL_OPENCL__
30 #  include <float.h>
31 #  include <math.h>
32 #  include <stdio.h>
33 #endif /* __KERNEL_OPENCL__ */
34 
35 #include "util/util_types.h"
36 
37 CCL_NAMESPACE_BEGIN
38 
39 /* Float Pi variations */
40 
41 /* Division */
42 #ifndef M_PI_F
43 #  define M_PI_F (3.1415926535897932f) /* pi */
44 #endif
45 #ifndef M_PI_2_F
46 #  define M_PI_2_F (1.5707963267948966f) /* pi/2 */
47 #endif
48 #ifndef M_PI_4_F
49 #  define M_PI_4_F (0.7853981633974830f) /* pi/4 */
50 #endif
51 #ifndef M_1_PI_F
52 #  define M_1_PI_F (0.3183098861837067f) /* 1/pi */
53 #endif
54 #ifndef M_2_PI_F
55 #  define M_2_PI_F (0.6366197723675813f) /* 2/pi */
56 #endif
57 #ifndef M_1_2PI_F
58 #  define M_1_2PI_F (0.1591549430918953f) /* 1/(2*pi) */
59 #endif
60 #ifndef M_SQRT_PI_8_F
61 #  define M_SQRT_PI_8_F (0.6266570686577501f) /* sqrt(pi/8) */
62 #endif
63 #ifndef M_LN_2PI_F
64 #  define M_LN_2PI_F (1.8378770664093454f) /* ln(2*pi) */
65 #endif
66 
67 /* Multiplication */
68 #ifndef M_2PI_F
69 #  define M_2PI_F (6.2831853071795864f) /* 2*pi */
70 #endif
71 #ifndef M_4PI_F
72 #  define M_4PI_F (12.566370614359172f) /* 4*pi */
73 #endif
74 
75 /* Float sqrt variations */
76 #ifndef M_SQRT2_F
77 #  define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */
78 #endif
79 #ifndef M_LN2_F
80 #  define M_LN2_F (0.6931471805599453f) /* ln(2) */
81 #endif
82 #ifndef M_LN10_F
83 #  define M_LN10_F (2.3025850929940457f) /* ln(10) */
84 #endif
85 
86 /* Scalar */
87 
88 #ifdef _WIN32
89 #  ifndef __KERNEL_OPENCL__
fmaxf(float a,float b)90 ccl_device_inline float fmaxf(float a, float b)
91 {
92   return (a > b) ? a : b;
93 }
94 
fminf(float a,float b)95 ccl_device_inline float fminf(float a, float b)
96 {
97   return (a < b) ? a : b;
98 }
99 #  endif /* !__KERNEL_OPENCL__ */
100 #endif   /* _WIN32 */
101 
102 #ifndef __KERNEL_GPU__
103 using std::isfinite;
104 using std::isnan;
105 using std::sqrt;
106 
abs(int x)107 ccl_device_inline int abs(int x)
108 {
109   return (x > 0) ? x : -x;
110 }
111 
max(int a,int b)112 ccl_device_inline int max(int a, int b)
113 {
114   return (a > b) ? a : b;
115 }
116 
min(int a,int b)117 ccl_device_inline int min(int a, int b)
118 {
119   return (a < b) ? a : b;
120 }
121 
max(float a,float b)122 ccl_device_inline float max(float a, float b)
123 {
124   return (a > b) ? a : b;
125 }
126 
min(float a,float b)127 ccl_device_inline float min(float a, float b)
128 {
129   return (a < b) ? a : b;
130 }
131 
max(double a,double b)132 ccl_device_inline double max(double a, double b)
133 {
134   return (a > b) ? a : b;
135 }
136 
min(double a,double b)137 ccl_device_inline double min(double a, double b)
138 {
139   return (a < b) ? a : b;
140 }
141 
142 /* These 2 guys are templated for usage with registers data.
143  *
144  * NOTE: Since this is CPU-only functions it is ok to use references here.
145  * But for other devices we'll need to be careful about this.
146  */
147 
min4(const T & a,const T & b,const T & c,const T & d)148 template<typename T> ccl_device_inline T min4(const T &a, const T &b, const T &c, const T &d)
149 {
150   return min(min(a, b), min(c, d));
151 }
152 
max4(const T & a,const T & b,const T & c,const T & d)153 template<typename T> ccl_device_inline T max4(const T &a, const T &b, const T &c, const T &d)
154 {
155   return max(max(a, b), max(c, d));
156 }
157 #endif /* __KERNEL_GPU__ */
158 
min4(float a,float b,float c,float d)159 ccl_device_inline float min4(float a, float b, float c, float d)
160 {
161   return min(min(a, b), min(c, d));
162 }
163 
max4(float a,float b,float c,float d)164 ccl_device_inline float max4(float a, float b, float c, float d)
165 {
166   return max(max(a, b), max(c, d));
167 }
168 
169 #ifndef __KERNEL_OPENCL__
170 /* Int/Float conversion */
171 
as_int(uint i)172 ccl_device_inline int as_int(uint i)
173 {
174   union {
175     uint ui;
176     int i;
177   } u;
178   u.ui = i;
179   return u.i;
180 }
181 
as_uint(int i)182 ccl_device_inline uint as_uint(int i)
183 {
184   union {
185     uint ui;
186     int i;
187   } u;
188   u.i = i;
189   return u.ui;
190 }
191 
as_uint(float f)192 ccl_device_inline uint as_uint(float f)
193 {
194   union {
195     uint i;
196     float f;
197   } u;
198   u.f = f;
199   return u.i;
200 }
201 
__float_as_int(float f)202 ccl_device_inline int __float_as_int(float f)
203 {
204   union {
205     int i;
206     float f;
207   } u;
208   u.f = f;
209   return u.i;
210 }
211 
__int_as_float(int i)212 ccl_device_inline float __int_as_float(int i)
213 {
214   union {
215     int i;
216     float f;
217   } u;
218   u.i = i;
219   return u.f;
220 }
221 
__float_as_uint(float f)222 ccl_device_inline uint __float_as_uint(float f)
223 {
224   union {
225     uint i;
226     float f;
227   } u;
228   u.f = f;
229   return u.i;
230 }
231 
__uint_as_float(uint i)232 ccl_device_inline float __uint_as_float(uint i)
233 {
234   union {
235     uint i;
236     float f;
237   } u;
238   u.i = i;
239   return u.f;
240 }
241 
__float4_as_int4(float4 f)242 ccl_device_inline int4 __float4_as_int4(float4 f)
243 {
244 #  ifdef __KERNEL_SSE__
245   return int4(_mm_castps_si128(f.m128));
246 #  else
247   return make_int4(
248       __float_as_int(f.x), __float_as_int(f.y), __float_as_int(f.z), __float_as_int(f.w));
249 #  endif
250 }
251 
__int4_as_float4(int4 i)252 ccl_device_inline float4 __int4_as_float4(int4 i)
253 {
254 #  ifdef __KERNEL_SSE__
255   return float4(_mm_castsi128_ps(i.m128));
256 #  else
257   return make_float4(
258       __int_as_float(i.x), __int_as_float(i.y), __int_as_float(i.z), __int_as_float(i.w));
259 #  endif
260 }
261 #endif /* __KERNEL_OPENCL__ */
262 
263 /* Versions of functions which are safe for fast math. */
isnan_safe(float f)264 ccl_device_inline bool isnan_safe(float f)
265 {
266   unsigned int x = __float_as_uint(f);
267   return (x << 1) > 0xff000000u;
268 }
269 
isfinite_safe(float f)270 ccl_device_inline bool isfinite_safe(float f)
271 {
272   /* By IEEE 754 rule, 2*Inf equals Inf */
273   unsigned int x = __float_as_uint(f);
274   return (f == f) && (x == 0 || x == (1u << 31) || (f != 2.0f * f)) && !((x << 1) > 0xff000000u);
275 }
276 
ensure_finite(float v)277 ccl_device_inline float ensure_finite(float v)
278 {
279   return isfinite_safe(v) ? v : 0.0f;
280 }
281 
282 #ifndef __KERNEL_OPENCL__
clamp(int a,int mn,int mx)283 ccl_device_inline int clamp(int a, int mn, int mx)
284 {
285   return min(max(a, mn), mx);
286 }
287 
clamp(float a,float mn,float mx)288 ccl_device_inline float clamp(float a, float mn, float mx)
289 {
290   return min(max(a, mn), mx);
291 }
292 
mix(float a,float b,float t)293 ccl_device_inline float mix(float a, float b, float t)
294 {
295   return a + t * (b - a);
296 }
297 
smoothstep(float edge0,float edge1,float x)298 ccl_device_inline float smoothstep(float edge0, float edge1, float x)
299 {
300   float result;
301   if (x < edge0)
302     result = 0.0f;
303   else if (x >= edge1)
304     result = 1.0f;
305   else {
306     float t = (x - edge0) / (edge1 - edge0);
307     result = (3.0f - 2.0f * t) * (t * t);
308   }
309   return result;
310 }
311 
312 #endif /* __KERNEL_OPENCL__ */
313 
314 #ifndef __KERNEL_CUDA__
saturate(float a)315 ccl_device_inline float saturate(float a)
316 {
317   return clamp(a, 0.0f, 1.0f);
318 }
319 #endif /* __KERNEL_CUDA__ */
320 
float_to_int(float f)321 ccl_device_inline int float_to_int(float f)
322 {
323   return (int)f;
324 }
325 
floor_to_int(float f)326 ccl_device_inline int floor_to_int(float f)
327 {
328   return float_to_int(floorf(f));
329 }
330 
quick_floor_to_int(float x)331 ccl_device_inline int quick_floor_to_int(float x)
332 {
333   return float_to_int(x) - ((x < 0) ? 1 : 0);
334 }
335 
floorfrac(float x,int * i)336 ccl_device_inline float floorfrac(float x, int *i)
337 {
338   *i = quick_floor_to_int(x);
339   return x - *i;
340 }
341 
ceil_to_int(float f)342 ccl_device_inline int ceil_to_int(float f)
343 {
344   return float_to_int(ceilf(f));
345 }
346 
fractf(float x)347 ccl_device_inline float fractf(float x)
348 {
349   return x - floorf(x);
350 }
351 
352 /* Adapted from godotengine math_funcs.h. */
wrapf(float value,float max,float min)353 ccl_device_inline float wrapf(float value, float max, float min)
354 {
355   float range = max - min;
356   return (range != 0.0f) ? value - (range * floorf((value - min) / range)) : min;
357 }
358 
pingpongf(float a,float b)359 ccl_device_inline float pingpongf(float a, float b)
360 {
361   return (b != 0.0f) ? fabsf(fractf((a - b) / (b * 2.0f)) * b * 2.0f - b) : 0.0f;
362 }
363 
smoothminf(float a,float b,float k)364 ccl_device_inline float smoothminf(float a, float b, float k)
365 {
366   if (k != 0.0f) {
367     float h = fmaxf(k - fabsf(a - b), 0.0f) / k;
368     return fminf(a, b) - h * h * h * k * (1.0f / 6.0f);
369   }
370   else {
371     return fminf(a, b);
372   }
373 }
374 
signf(float f)375 ccl_device_inline float signf(float f)
376 {
377   return (f < 0.0f) ? -1.0f : 1.0f;
378 }
379 
nonzerof(float f,float eps)380 ccl_device_inline float nonzerof(float f, float eps)
381 {
382   if (fabsf(f) < eps)
383     return signf(f) * eps;
384   else
385     return f;
386 }
387 
388 /* Signum function testing for zero. Matches GLSL and OSL functions. */
compatible_signf(float f)389 ccl_device_inline float compatible_signf(float f)
390 {
391   if (f == 0.0f) {
392     return 0.0f;
393   }
394   else {
395     return signf(f);
396   }
397 }
398 
smoothstepf(float f)399 ccl_device_inline float smoothstepf(float f)
400 {
401   float ff = f * f;
402   return (3.0f * ff - 2.0f * ff * f);
403 }
404 
mod(int x,int m)405 ccl_device_inline int mod(int x, int m)
406 {
407   return (x % m + m) % m;
408 }
409 
float2_to_float3(const float2 a)410 ccl_device_inline float3 float2_to_float3(const float2 a)
411 {
412   return make_float3(a.x, a.y, 0.0f);
413 }
414 
float4_to_float3(const float4 a)415 ccl_device_inline float3 float4_to_float3(const float4 a)
416 {
417   return make_float3(a.x, a.y, a.z);
418 }
419 
float3_to_float4(const float3 a)420 ccl_device_inline float4 float3_to_float4(const float3 a)
421 {
422   return make_float4(a.x, a.y, a.z, 1.0f);
423 }
424 
inverse_lerp(float a,float b,float x)425 ccl_device_inline float inverse_lerp(float a, float b, float x)
426 {
427   return (x - a) / (b - a);
428 }
429 
430 /* Cubic interpolation between b and c, a and d are the previous and next point. */
cubic_interp(float a,float b,float c,float d,float x)431 ccl_device_inline float cubic_interp(float a, float b, float c, float d, float x)
432 {
433   return 0.5f *
434              (((d + 3.0f * (b - c) - a) * x + (2.0f * a - 5.0f * b + 4.0f * c - d)) * x +
435               (c - a)) *
436              x +
437          b;
438 }
439 
440 CCL_NAMESPACE_END
441 
442 #include "util/util_math_int2.h"
443 #include "util/util_math_int3.h"
444 #include "util/util_math_int4.h"
445 
446 #include "util/util_math_float2.h"
447 #include "util/util_math_float3.h"
448 #include "util/util_math_float4.h"
449 
450 #include "util/util_rect.h"
451 
452 CCL_NAMESPACE_BEGIN
453 
454 #ifndef __KERNEL_OPENCL__
455 /* Interpolation */
456 
lerp(const A & a,const A & b,const B & t)457 template<class A, class B> A lerp(const A &a, const A &b, const B &t)
458 {
459   return (A)(a * ((B)1 - t) + b * t);
460 }
461 
462 #endif /* __KERNEL_OPENCL__ */
463 
464 /* Triangle */
465 
466 #ifndef __KERNEL_OPENCL__
triangle_area(const float3 & v1,const float3 & v2,const float3 & v3)467 ccl_device_inline float triangle_area(const float3 &v1, const float3 &v2, const float3 &v3)
468 #else
469 ccl_device_inline float triangle_area(const float3 v1, const float3 v2, const float3 v3)
470 #endif
471 {
472   return len(cross(v3 - v2, v1 - v2)) * 0.5f;
473 }
474 
475 /* Orthonormal vectors */
476 
make_orthonormals(const float3 N,float3 * a,float3 * b)477 ccl_device_inline void make_orthonormals(const float3 N, float3 *a, float3 *b)
478 {
479 #if 0
480   if (fabsf(N.y) >= 0.999f) {
481     *a = make_float3(1, 0, 0);
482     *b = make_float3(0, 0, 1);
483     return;
484   }
485   if (fabsf(N.z) >= 0.999f) {
486     *a = make_float3(1, 0, 0);
487     *b = make_float3(0, 1, 0);
488     return;
489   }
490 #endif
491 
492   if (N.x != N.y || N.x != N.z)
493     *a = make_float3(N.z - N.y, N.x - N.z, N.y - N.x);  //(1,1,1)x N
494   else
495     *a = make_float3(N.z - N.y, N.x + N.z, -N.y - N.x);  //(-1,1,1)x N
496 
497   *a = normalize(*a);
498   *b = cross(N, *a);
499 }
500 
501 /* Color division */
502 
safe_invert_color(float3 a)503 ccl_device_inline float3 safe_invert_color(float3 a)
504 {
505   float x, y, z;
506 
507   x = (a.x != 0.0f) ? 1.0f / a.x : 0.0f;
508   y = (a.y != 0.0f) ? 1.0f / a.y : 0.0f;
509   z = (a.z != 0.0f) ? 1.0f / a.z : 0.0f;
510 
511   return make_float3(x, y, z);
512 }
513 
safe_divide_color(float3 a,float3 b)514 ccl_device_inline float3 safe_divide_color(float3 a, float3 b)
515 {
516   float x, y, z;
517 
518   x = (b.x != 0.0f) ? a.x / b.x : 0.0f;
519   y = (b.y != 0.0f) ? a.y / b.y : 0.0f;
520   z = (b.z != 0.0f) ? a.z / b.z : 0.0f;
521 
522   return make_float3(x, y, z);
523 }
524 
safe_divide_even_color(float3 a,float3 b)525 ccl_device_inline float3 safe_divide_even_color(float3 a, float3 b)
526 {
527   float x, y, z;
528 
529   x = (b.x != 0.0f) ? a.x / b.x : 0.0f;
530   y = (b.y != 0.0f) ? a.y / b.y : 0.0f;
531   z = (b.z != 0.0f) ? a.z / b.z : 0.0f;
532 
533   /* try to get gray even if b is zero */
534   if (b.x == 0.0f) {
535     if (b.y == 0.0f) {
536       x = z;
537       y = z;
538     }
539     else if (b.z == 0.0f) {
540       x = y;
541       z = y;
542     }
543     else
544       x = 0.5f * (y + z);
545   }
546   else if (b.y == 0.0f) {
547     if (b.z == 0.0f) {
548       y = x;
549       z = x;
550     }
551     else
552       y = 0.5f * (x + z);
553   }
554   else if (b.z == 0.0f) {
555     z = 0.5f * (x + y);
556   }
557 
558   return make_float3(x, y, z);
559 }
560 
561 /* Rotation of point around axis and angle */
562 
rotate_around_axis(float3 p,float3 axis,float angle)563 ccl_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle)
564 {
565   float costheta = cosf(angle);
566   float sintheta = sinf(angle);
567   float3 r;
568 
569   r.x = ((costheta + (1 - costheta) * axis.x * axis.x) * p.x) +
570         (((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) +
571         (((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z);
572 
573   r.y = (((1 - costheta) * axis.x * axis.y + axis.z * sintheta) * p.x) +
574         ((costheta + (1 - costheta) * axis.y * axis.y) * p.y) +
575         (((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z);
576 
577   r.z = (((1 - costheta) * axis.x * axis.z - axis.y * sintheta) * p.x) +
578         (((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) +
579         ((costheta + (1 - costheta) * axis.z * axis.z) * p.z);
580 
581   return r;
582 }
583 
584 /* NaN-safe math ops */
585 
safe_sqrtf(float f)586 ccl_device_inline float safe_sqrtf(float f)
587 {
588   return sqrtf(max(f, 0.0f));
589 }
590 
inversesqrtf(float f)591 ccl_device_inline float inversesqrtf(float f)
592 {
593   return (f > 0.0f) ? 1.0f / sqrtf(f) : 0.0f;
594 }
595 
safe_asinf(float a)596 ccl_device float safe_asinf(float a)
597 {
598   return asinf(clamp(a, -1.0f, 1.0f));
599 }
600 
safe_acosf(float a)601 ccl_device float safe_acosf(float a)
602 {
603   return acosf(clamp(a, -1.0f, 1.0f));
604 }
605 
compatible_powf(float x,float y)606 ccl_device float compatible_powf(float x, float y)
607 {
608 #ifdef __KERNEL_GPU__
609   if (y == 0.0f) /* x^0 -> 1, including 0^0 */
610     return 1.0f;
611 
612   /* GPU pow doesn't accept negative x, do manual checks here */
613   if (x < 0.0f) {
614     if (fmodf(-y, 2.0f) == 0.0f)
615       return powf(-x, y);
616     else
617       return -powf(-x, y);
618   }
619   else if (x == 0.0f)
620     return 0.0f;
621 #endif
622   return powf(x, y);
623 }
624 
safe_powf(float a,float b)625 ccl_device float safe_powf(float a, float b)
626 {
627   if (UNLIKELY(a < 0.0f && b != float_to_int(b)))
628     return 0.0f;
629 
630   return compatible_powf(a, b);
631 }
632 
safe_divide(float a,float b)633 ccl_device float safe_divide(float a, float b)
634 {
635   return (b != 0.0f) ? a / b : 0.0f;
636 }
637 
safe_logf(float a,float b)638 ccl_device float safe_logf(float a, float b)
639 {
640   if (UNLIKELY(a <= 0.0f || b <= 0.0f))
641     return 0.0f;
642 
643   return safe_divide(logf(a), logf(b));
644 }
645 
safe_modulo(float a,float b)646 ccl_device float safe_modulo(float a, float b)
647 {
648   return (b != 0.0f) ? fmodf(a, b) : 0.0f;
649 }
650 
sqr(float a)651 ccl_device_inline float sqr(float a)
652 {
653   return a * a;
654 }
655 
pow20(float a)656 ccl_device_inline float pow20(float a)
657 {
658   return sqr(sqr(sqr(sqr(a)) * a));
659 }
660 
pow22(float a)661 ccl_device_inline float pow22(float a)
662 {
663   return sqr(a * sqr(sqr(sqr(a)) * a));
664 }
665 
beta(float x,float y)666 ccl_device_inline float beta(float x, float y)
667 {
668 #ifndef __KERNEL_OPENCL__
669   return expf(lgammaf(x) + lgammaf(y) - lgammaf(x + y));
670 #else
671   return expf(lgamma(x) + lgamma(y) - lgamma(x + y));
672 #endif
673 }
674 
xor_signmask(float x,int y)675 ccl_device_inline float xor_signmask(float x, int y)
676 {
677   return __int_as_float(__float_as_int(x) ^ y);
678 }
679 
bits_to_01(uint bits)680 ccl_device float bits_to_01(uint bits)
681 {
682   return bits * (1.0f / (float)0xFFFFFFFF);
683 }
684 
count_leading_zeros(uint x)685 ccl_device_inline uint count_leading_zeros(uint x)
686 {
687 #if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
688   return __clz(x);
689 #elif defined(__KERNEL_OPENCL__)
690   return clz(x);
691 #else
692   assert(x != 0);
693 #  ifdef _MSC_VER
694   unsigned long leading_zero = 0;
695   _BitScanReverse(&leading_zero, x);
696   return (31 - leading_zero);
697 #  else
698   return __builtin_clz(x);
699 #  endif
700 #endif
701 }
702 
count_trailing_zeros(uint x)703 ccl_device_inline uint count_trailing_zeros(uint x)
704 {
705 #if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
706   return (__ffs(x) - 1);
707 #elif defined(__KERNEL_OPENCL__)
708   return (31 - count_leading_zeros(x & -x));
709 #else
710   assert(x != 0);
711 #  ifdef _MSC_VER
712   unsigned long ctz = 0;
713   _BitScanForward(&ctz, x);
714   return ctz;
715 #  else
716   return __builtin_ctz(x);
717 #  endif
718 #endif
719 }
720 
find_first_set(uint x)721 ccl_device_inline uint find_first_set(uint x)
722 {
723 #if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__)
724   return __ffs(x);
725 #elif defined(__KERNEL_OPENCL__)
726   return (x != 0) ? (32 - count_leading_zeros(x & (-x))) : 0;
727 #else
728 #  ifdef _MSC_VER
729   return (x != 0) ? (32 - count_leading_zeros(x & (-x))) : 0;
730 #  else
731   return __builtin_ffs(x);
732 #  endif
733 #endif
734 }
735 
736 /* projections */
map_to_tube(const float3 co)737 ccl_device_inline float2 map_to_tube(const float3 co)
738 {
739   float len, u, v;
740   len = sqrtf(co.x * co.x + co.y * co.y);
741   if (len > 0.0f) {
742     u = (1.0f - (atan2f(co.x / len, co.y / len) / M_PI_F)) * 0.5f;
743     v = (co.z + 1.0f) * 0.5f;
744   }
745   else {
746     u = v = 0.0f;
747   }
748   return make_float2(u, v);
749 }
750 
map_to_sphere(const float3 co)751 ccl_device_inline float2 map_to_sphere(const float3 co)
752 {
753   float l = len(co);
754   float u, v;
755   if (l > 0.0f) {
756     if (UNLIKELY(co.x == 0.0f && co.y == 0.0f)) {
757       u = 0.0f; /* othwise domain error */
758     }
759     else {
760       u = (1.0f - atan2f(co.x, co.y) / M_PI_F) / 2.0f;
761     }
762     v = 1.0f - safe_acosf(co.z / l) / M_PI_F;
763   }
764   else {
765     u = v = 0.0f;
766   }
767   return make_float2(u, v);
768 }
769 
770 /* Compares two floats.
771  * Returns true if their absolute difference is smaller than abs_diff (for numbers near zero)
772  * or their relative difference is less than ulp_diff ULPs.
773  * Based on
774  * https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
775  */
776 
compare_floats(float a,float b,float abs_diff,int ulp_diff)777 ccl_device_inline float compare_floats(float a, float b, float abs_diff, int ulp_diff)
778 {
779   if (fabsf(a - b) < abs_diff) {
780     return true;
781   }
782 
783   if ((a < 0.0f) != (b < 0.0f)) {
784     return false;
785   }
786 
787   return (abs(__float_as_int(a) - __float_as_int(b)) < ulp_diff);
788 }
789 
790 /* Calculate the angle between the two vectors a and b.
791  * The usual approach acos(dot(a, b)) has severe precision issues for small angles,
792  * which are avoided by this method.
793  * Based on "Mangled Angles" from https://people.eecs.berkeley.edu/~wkahan/Mindless.pdf
794  */
precise_angle(float3 a,float3 b)795 ccl_device_inline float precise_angle(float3 a, float3 b)
796 {
797   return 2.0f * atan2f(len(a - b), len(a + b));
798 }
799 
800 CCL_NAMESPACE_END
801 
802 #endif /* __UTIL_MATH_H__ */
803