1 // Copyright Contributors to the Open Shading Language project. 2 // SPDX-License-Identifier: BSD-3-Clause 3 // https://github.com/AcademySoftwareFoundation/OpenShadingLanguage 4 5 // clang-format off 6 7 #pragma once 8 9 #include <initializer_list> 10 #include <utility> 11 #include <OSL/oslconfig.h> 12 #include <OSL/oslversion.h> 13 #include <OpenImageIO/fmath.h> 14 15 16 OSL_NAMESPACE_ENTER 17 18 19 // Shortcut notation for enable_if trickery 20 # define DUAL_REQUIRES(...) \ 21 typename std::enable_if<(__VA_ARGS__), bool>::type = true 22 23 24 25 // To generically access elements or partials of a Dual, use a ConstIndex<#>. 26 // NOTE: Each ConstIndex<#> (ConstIndex<0>, ConstIndex<1>, ConstIndex<2>, ...) 27 // is a different type and known at compile time. A regular loop index will 28 // not work to access elements or partials. For non-generic code, the methods 29 // .val(), .dx(), .dy(), .dz() should be used. 30 // For generic code, the macro "OSL_INDEX_LOOP(__INDEX_NAME, __COUNT, ...)" 31 // is provided to execute a code block (...) with __INDEX_NAME correctly typed 32 // for each ConstIndex<#> within the # range [0-__COUNT). 33 // In practice, this means changing 34 // for(int i=0; i < elements;++i) { 35 // elem(i) += value; 36 // } 37 // to 38 // OSL_INDEX_LOOP(i, elements, { 39 // elem(i) += value; 40 // }); 41 42 43 // If possible (C++14+) use generic lambda based to loop to repeat code 44 // sequences with compile time known ConstIndex. Otherwise for c++11, 45 // a more limited manually unrolled loop to a fixed maximum __COUNT 46 // works because dead code elimination prunes extraneous iterations. 47 #if (OSL_CPLUSPLUS_VERSION >= 14) && !defined(__GNUC__) 48 // explanation of passing code block as macro argument to handle 49 // nested comma operators that might break up the code block into 50 // multiple macro arguments 51 // https://mort.coffee/home/obscure-c-features/ 52 // 53 // The macro's variable argument list populates the body of the functor lambda. 54 // NOTE: generic lambda parameter __INDEX_NAME means that the functor itself 55 // is a template that accepts any type. The static_foreach will call the 56 // templated functor with different types of ConstIndex<#> where # is [0-__COUNT) 57 #define OSL_INDEX_LOOP(__INDEX_NAME, __COUNT, ...) \ 58 { \ 59 auto functor = [&](auto __INDEX_NAME) { \ 60 __VA_ARGS__; \ 61 }; \ 62 static_foreach<ConstIndex, __COUNT>(functor); \ 63 } 64 65 #else 66 namespace pvt { 67 template<typename T> 68 static OSL_HOSTDEVICE constexpr T static_min(T a, T b) { 69 return (b < a) ? b : a; 70 } 71 } 72 // explanation of passing code block as macro argument to handle 73 // nested comma operators that might break up the code block into 74 // multiple macro arguments 75 // https://mort.coffee/home/obscure-c-features/ 76 // 77 // We rely on dead code elimination to quickly get rid of the code emitted 78 // for out of range indices, but we do take care to not generate any ConstIndex<#> 79 // that would be out of range using the static_min helper 80 #define OSL_INDEX_LOOP(__INDEX_NAME, __COUNT, ...) \ 81 static_assert((__COUNT) < 4, "macro based expansion must be repeated to support higher __COUNT values"); \ 82 { ConstIndex<0> __INDEX_NAME; __VA_ARGS__ ; } \ 83 if ((__COUNT) > 1) { ConstIndex<pvt::static_min(1, (__COUNT)-1)> __INDEX_NAME; __VA_ARGS__ ; } \ 84 if ((__COUNT) > 2) { ConstIndex<pvt::static_min(2, (__COUNT)-1)> __INDEX_NAME; __VA_ARGS__ ; } \ 85 if ((__COUNT) > 3) { ConstIndex<pvt::static_min(3, (__COUNT)-1)> __INDEX_NAME; __VA_ARGS__ ; } 86 #endif 87 88 /// Dual numbers are used to represent values and their derivatives. 89 /// 90 /// The method is summarized in: 91 /// Piponi, Dan, "Automatic Differentiation, C++ Templates, 92 /// and Photogrammetry," Journal of Graphics, GPU, and Game 93 /// Tools (JGT), Vol 9, No 4, pp. 41-55 (2004). 94 /// 95 /// 96 97 // Default DualStorage can handle any number of partials by storing elements 98 // in an array. In practice the array subscript operation can require 99 // standard data layout so that the base address + index works. This 100 // can inhibit other transformations useful for vectorization such as 101 // storing data members in a Structure Of Arrays (SOA) layout 102 template<class T, int PARTIALS> 103 class DualStorage 104 { 105 public: 106 T m_elem[PARTIALS+1]; 107 108 template<int N> elem(ConstIndex<N>)109 OSL_HOSTDEVICE constexpr const T& elem (ConstIndex<N>) const { return m_elem[N]; } 110 template<int N> elem(ConstIndex<N>)111 OSL_HOSTDEVICE T& elem (ConstIndex<N>) { return m_elem[N]; } 112 }; 113 114 115 // Specialize DualStorage for specific number of partials and store elements 116 // as individual data members. Because each ConstIndex<#> is its own type and 117 // identifies a specific element, we can overload the elem function for each 118 // specific ConstIndex<#> and return the corresponding data member. 119 // The main benefit is better enabling of SROA (Scalar Replacement of Aggregates) 120 // and other transformations that allow each data member to be 121 // optimized independently, kept in vector registers vs. scatter back to the 122 // stack, and more. 123 template<class T> 124 class DualStorage<T, 1> 125 { 126 public: 127 T m_val; 128 T m_dx; 129 130 // To better enable Scalar Replacement of Aggregates and other 131 // transformations, CLANG has easier time if the per element 132 // constructors declared. DualStorage()133 OSL_HOSTDEVICE OSL_CONSTEXPR14 DualStorage() {} DualStorage(const T & val,const T & dx)134 OSL_HOSTDEVICE OSL_CONSTEXPR14 DualStorage(const T & val, const T & dx) 135 : m_val(val) 136 , m_dx(dx) 137 {} DualStorage(const DualStorage & other)138 OSL_HOSTDEVICE OSL_CONSTEXPR14 DualStorage(const DualStorage &other) 139 : m_val(other.m_val) 140 , m_dx(other.m_dx) 141 {} 142 elem(ConstIndex<0>)143 OSL_HOSTDEVICE constexpr const T& elem (ConstIndex<0>) const { return m_val; } elem(ConstIndex<1>)144 OSL_HOSTDEVICE constexpr const T& elem (ConstIndex<1>) const { return m_dx; } elem(ConstIndex<0>)145 OSL_HOSTDEVICE T& elem (ConstIndex<0>) { return m_val; } elem(ConstIndex<1>)146 OSL_HOSTDEVICE T& elem (ConstIndex<1>) { return m_dx; } 147 }; 148 149 150 // Specialize layout to be explicit data members vs. array 151 template<class T> 152 class DualStorage<T, 2> 153 { 154 public: 155 T m_val; 156 T m_dx; 157 T m_dy; 158 159 // To better enable Scalar Replacement of Aggregates and other 160 // transformations, CLANG has easier time if the per element 161 // constructors declared. DualStorage()162 OSL_HOSTDEVICE OSL_CONSTEXPR14 DualStorage() {} DualStorage(const T & val,const T & dx,const T & dy)163 OSL_HOSTDEVICE OSL_CONSTEXPR14 DualStorage(const T & val, const T & dx, const T & dy) 164 : m_val(val) 165 , m_dx(dx) 166 , m_dy(dy) 167 {} DualStorage(const DualStorage & other)168 OSL_HOSTDEVICE OSL_CONSTEXPR14 DualStorage(const DualStorage &other) 169 : m_val(other.m_val) 170 , m_dx(other.m_dx) 171 , m_dy(other.m_dy) 172 {} 173 elem(ConstIndex<0>)174 OSL_HOSTDEVICE constexpr const T& elem (ConstIndex<0>) const { return m_val; } elem(ConstIndex<1>)175 OSL_HOSTDEVICE constexpr const T& elem (ConstIndex<1>) const { return m_dx; } elem(ConstIndex<2>)176 OSL_HOSTDEVICE constexpr const T& elem (ConstIndex<2>) const { return m_dy; } elem(ConstIndex<0>)177 OSL_HOSTDEVICE T& elem (ConstIndex<0>) { return m_val; } elem(ConstIndex<1>)178 OSL_HOSTDEVICE T& elem (ConstIndex<1>) { return m_dx; } elem(ConstIndex<2>)179 OSL_HOSTDEVICE T& elem (ConstIndex<2>) { return m_dy; } 180 }; 181 182 183 template<class T> 184 class DualStorage<T, 3> 185 { 186 public: 187 T m_val; 188 T m_dx; 189 T m_dy; 190 T m_dz; 191 192 // To better enable Scalar Replacement of Aggregates and other 193 // transformations, CLANG has easier time if the per element 194 // constructors declared. DualStorage()195 OSL_HOSTDEVICE OSL_CONSTEXPR14 DualStorage() {} DualStorage(const T & val,const T & dx,const T & dy,const T & dz)196 OSL_HOSTDEVICE OSL_CONSTEXPR14 DualStorage(const T & val, const T & dx, const T & dy, const T & dz) 197 : m_val(val) 198 , m_dx(dx) 199 , m_dy(dy) 200 , m_dz(dz) 201 {} 202 DualStorage(const DualStorage & other)203 OSL_HOSTDEVICE OSL_CONSTEXPR14 DualStorage(const DualStorage &other) 204 : m_val(other.m_val) 205 , m_dx(other.m_dx) 206 , m_dy(other.m_dy) 207 , m_dz(other.dz) 208 {} 209 elem(ConstIndex<0>)210 OSL_HOSTDEVICE constexpr const T& elem (ConstIndex<0>) const { return m_val; } elem(ConstIndex<1>)211 OSL_HOSTDEVICE constexpr const T& elem (ConstIndex<1>) const { return m_dx; } elem(ConstIndex<2>)212 OSL_HOSTDEVICE constexpr const T& elem (ConstIndex<2>) const { return m_dy; } elem(ConstIndex<3>)213 OSL_HOSTDEVICE constexpr const T& elem (ConstIndex<3>) const { return m_dz; } elem(ConstIndex<0>)214 OSL_HOSTDEVICE T& elem (ConstIndex<0>) { return m_val; } elem(ConstIndex<1>)215 OSL_HOSTDEVICE T& elem (ConstIndex<1>) { return m_dx; } elem(ConstIndex<2>)216 OSL_HOSTDEVICE T& elem (ConstIndex<2>) { return m_dy; } elem(ConstIndex<3>)217 OSL_HOSTDEVICE T& elem (ConstIndex<3>) { return m_dz; } 218 }; 219 220 221 222 // Single generic implementation of Dual can handle any number of partials 223 // delegating access to element storage to the DualStorage base class 224 template< 225 class T, // Base data type 226 int PARTIALS=1 // Number of dimentions of partial derivs 227 > 228 class Dual : public DualStorage<T, PARTIALS> { 229 static const int elements = PARTIALS+1; // main value + partials 230 static_assert (PARTIALS>=1, "Can't have a Dual with 0 partials"); 231 public: 232 using value_type = T; zero()233 static OSL_FORCEINLINE OSL_HOSTDEVICE OSL_CONSTEXPR14 T zero() { return T(0.0); } 234 235 using DualStorage<T, PARTIALS>::elem; 236 template<int N> partial(ConstIndex<N>)237 OSL_HOSTDEVICE constexpr const T partial (ConstIndex<N>) const { return elem(ConstIndex<N+1>()); } 238 template<int N> partial(ConstIndex<N>)239 OSL_HOSTDEVICE T& partial (ConstIndex<N>) { return elem(ConstIndex<N+1>()); } 240 241 /// Default ctr leaves everything uninitialized 242 /// Dual()243 OSL_HOSTDEVICE OSL_CONSTEXPR14 Dual () 244 : DualStorage<T, PARTIALS>() 245 {} 246 247 /// Construct a Dual from just a real value (derivs set to 0) 248 /// Dual(const T & x)249 OSL_HOSTDEVICE OSL_CONSTEXPR14 Dual (const T &x) 250 : DualStorage<T, PARTIALS>() 251 { 252 val() = x; 253 OSL_INDEX_LOOP(i, PARTIALS, { 254 partial(i) = zero(); 255 }); 256 } 257 258 /// Copy constructor from another Dual of same dimension and different, 259 /// but castable, data type. 260 template<class F> Dual(const Dual<F,PARTIALS> & x)261 OSL_HOSTDEVICE OSL_CONSTEXPR14 Dual (const Dual<F,PARTIALS> &x) 262 : DualStorage<T, PARTIALS>() 263 { 264 OSL_INDEX_LOOP(i, elements, { 265 elem(i) = T(x.elem(i)); 266 }); 267 } 268 269 //OSL_HOSTDEVICE OSL_CONSTEXPR14 Dual (const Dual &x) = default; Dual(const Dual & x)270 OSL_HOSTDEVICE OSL_CONSTEXPR14 Dual (const Dual &x) 271 : DualStorage<T, PARTIALS>(x) 272 {} 273 274 /// Construct a Dual from a real and infinitesimals. 275 /// 276 template<typename... DerivListT> Dual(const T & x,const DerivListT &...derivs)277 OSL_HOSTDEVICE constexpr Dual (const T &x, const DerivListT & ...derivs) 278 : DualStorage<T, PARTIALS>{ x, static_cast<T>(derivs)...} 279 { 280 static_assert(sizeof...(DerivListT)==PARTIALS, "Wrong number of initializers"); 281 } 282 283 protected: 284 template<int... IntListT, typename... ValueListT> 285 OSL_HOSTDEVICE OSL_FORCEINLINE void set_expander(pvt::int_sequence<IntListT...>,const ValueListT &...values)286 set_expander (pvt::int_sequence<IntListT...> /*indices*/, const ValueListT & ...values) 287 { 288 __OSL_EXPAND_PARAMETER_PACKS( elem(ConstIndex<IntListT>()) = values ); 289 } 290 public: 291 292 template<typename... ValueListT> set(const ValueListT &...values)293 OSL_HOSTDEVICE void set (const ValueListT & ...values) 294 { 295 static_assert(sizeof...(ValueListT)==elements, "Wrong number of initializers"); 296 297 set_expander(pvt::make_int_sequence<elements>(), values...); 298 } 299 Dual(std::initializer_list<T> vals)300 OSL_HOSTDEVICE OSL_CONSTEXPR14 Dual (std::initializer_list<T> vals) { 301 #if OIIO_CPLUSPLUS_VERSION >= 14 302 static_assert (vals.size() == elements, "Wrong number of initializers"); 303 #endif 304 OSL_INDEX_LOOP(i, elements, { 305 elem(i) = vals.begin()[i]; 306 }); 307 } 308 309 /// Return the real value. val()310 OSL_HOSTDEVICE constexpr const T& val () const { return elem(ConstIndex<0>()); } val()311 OSL_HOSTDEVICE T& val () { return elem(ConstIndex<0>()); } 312 313 /// Return the partial derivative with respect to x. dx()314 OSL_HOSTDEVICE constexpr const T& dx () const { return elem(ConstIndex<1>()); } dx()315 OSL_HOSTDEVICE T& dx () { return elem(ConstIndex<1>()); } 316 317 /// Return the partial derivative with respect to y. 318 template<typename ThisType = Dual, typename std::enable_if<ThisType::elements==3, int>::type = 0> 319 OSL_HOSTDEVICE constexpr const T & dy()320 dy () const { 321 return elem(ConstIndex<2>()); 322 } 323 template<typename ThisType = Dual, typename std::enable_if<ThisType::elements==3, int>::type = 0> dy()324 OSL_HOSTDEVICE T& dy () { 325 return elem(ConstIndex<2>()); 326 } 327 328 /// Return the partial derivative with respect to z. 329 /// Only valid if there are at least 3 partial dimensions. 330 template<typename ThisType = Dual, typename std::enable_if<ThisType::elements==4, int>::type = 0> dz()331 OSL_HOSTDEVICE constexpr const T& dz () const { 332 return elem(ConstIndex<3>()); 333 } 334 template<typename ThisType = Dual, typename std::enable_if<ThisType::elements==4, int>::type = 0> dz()335 OSL_HOSTDEVICE T& dz () { 336 return elem(ConstIndex<3>()); 337 } 338 339 /// Clear the derivatives; leave the value alone. clear_d()340 OSL_HOSTDEVICE void clear_d () { 341 OSL_INDEX_LOOP(i, PARTIALS, { 342 partial(i) = zero(); 343 }); 344 } 345 346 /// Assignment of a real (the derivs are implicitly 0). 347 OSL_HOSTDEVICE const Dual & operator= (const T &x) { 348 val() = x; 349 OSL_INDEX_LOOP(i, PARTIALS, { 350 partial(i) = zero(); 351 }); 352 return *this; 353 } 354 355 OSL_HOSTDEVICE const Dual & operator= (const Dual &other) { 356 OSL_INDEX_LOOP(i, elements, { 357 elem(i) = other.elem(i); 358 }); 359 return *this; 360 } 361 362 363 /// Stream output. Format as: "val[dx,dy,...]" 364 /// 365 friend std::ostream& operator<< (std::ostream &out, const Dual &x) { 366 out << x.val() << "["; 367 OSL_INDEX_LOOP(i, PARTIALS, { 368 out << (x.partial(i)) << ((i < PARTIALS-1) ? ',' : ']'); 369 }); 370 return out; 371 } 372 373 }; 374 375 376 377 /// is_Dual<TYPE>::value is true for Dual, false for everything else. 378 /// You can also evaluate an is_Dual<T> as a bool. 379 template<class T> struct is_Dual : std::false_type {}; 380 template<class T, int P> struct is_Dual<Dual<T,P>> : std::true_type {}; 381 382 /// Dualify<T>::type returns Dual<T> if T is not a dual, or just T if it's 383 /// already a dual. 384 template<class T> struct UnDual { typedef T type; }; 385 template<class T, int P> struct UnDual<Dual<T,P>> { typedef T type; }; 386 387 /// Dualify<T,P>::type returns Dual<T,P> if T is not a dual, or just T if 388 /// it's already a dual. 389 template<class T, int P=1> struct Dualify { 390 typedef Dual<typename UnDual<T>::type, P> type; 391 }; 392 393 394 395 /// Define Dual2<T> as a Dual<T,2> -- a T-based quantity with x and y 396 /// partial derivatives. 397 template<class T> using Dual2 = Dual<T,2>; 398 399 400 401 /// Addition of duals. 402 /// 403 template<class T, int P> 404 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 Dual<T,P> operator+ (const Dual<T,P> &a, const Dual<T,P> &b) 405 { 406 Dual<T,P> result = a; 407 OSL_INDEX_LOOP(i, P+1, { 408 result.elem(i) += b.elem(i); 409 }); 410 return result; 411 } 412 413 414 template<class T, int P> 415 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 Dual<T,P> operator+ (const Dual<T,P> &a, const T &b) 416 { 417 Dual<T,P> result = a; 418 result.val() += b; 419 return result; 420 } 421 422 423 template<class T, int P> 424 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 Dual<T,P> operator+ (const T &a, const Dual<T,P> &b) 425 { 426 Dual<T,P> result = b; 427 result.val() += a; 428 return result; 429 } 430 431 432 template<class T, int P> 433 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P>& operator+= (Dual<T,P> &a, const Dual<T,P> &b) 434 { 435 OSL_INDEX_LOOP(i, P+1, { 436 a.elem(i) += b.elem(i); 437 }); 438 return a; 439 } 440 441 442 template<class T, int P> 443 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P>& operator+= (Dual<T,P> &a, const T &b) 444 { 445 a.val() += b; 446 return a; 447 } 448 449 450 /// Subtraction of duals. 451 /// 452 template<class T, int P> 453 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 Dual<T,P> operator- (const Dual<T,P> &a, const Dual<T,P> &b) 454 { 455 Dual<T,P> result; 456 OSL_INDEX_LOOP(i, P+1, { 457 result.elem(i) = a.elem(i) - b.elem(i); 458 }); 459 return result; 460 } 461 462 463 template<class T, int P> 464 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 Dual<T,P> operator- (const Dual<T,P> &a, const T &b) 465 { 466 Dual<T,P> result = a; 467 result.val() -= b; 468 return result; 469 } 470 471 472 template<class T, int P> 473 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 Dual<T,P> operator- (const T &a, const Dual<T,P> &b) 474 { 475 Dual<T,P> result; 476 result.val() = a - b.val(); 477 OSL_INDEX_LOOP(i, P, { 478 result.partial(i) = -b.partial(i); 479 }); 480 return result; 481 } 482 483 484 template<class T, int P> 485 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P>& operator-= (Dual<T,P> &a, const Dual<T,P> &b) 486 { 487 OSL_INDEX_LOOP(i, P+1, { 488 a.elem(i) -= b.elem(i); 489 }); 490 return a; 491 } 492 493 494 template<class T, int P> 495 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P>& operator-= (Dual<T,P> &a, const T &b) 496 { 497 a.val() -= b; 498 return a; 499 } 500 501 502 503 /// Negation of duals. 504 /// 505 template<class T, int P> 506 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 Dual<T,P> operator- (const Dual<T,P> &a) 507 { 508 Dual<T,P> result; 509 OSL_INDEX_LOOP(i, P+1, { 510 result.elem(i) = -a.elem(i); 511 }); 512 return result; 513 } 514 515 516 /// Multiplication of duals. This will work for any Dual<T>*Dual<S> 517 /// where T*S is meaningful. 518 template<class T, int P, class S> 519 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 auto 520 operator* (const Dual<T,P> &a, const Dual<S,P> &b) -> Dual<decltype(a.val()*b.val()),P> 521 { 522 // Use the chain rule 523 Dual<decltype(a.val()*b.val()),P> result; 524 result.val() = a.val() * b.val(); 525 OSL_INDEX_LOOP(i, P, { 526 result.partial(i) = a.val()*b.partial(i) + a.partial(i)*b.val(); 527 }); 528 return result; 529 } 530 531 532 /// Multiplication of dual by a non-dual scalar. This will work for any 533 /// Dual<T> * S where T*S is meaningful. 534 template<class T, int P, class S, 535 DUAL_REQUIRES(is_Dual<S>::value == false)> 536 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 auto 537 operator* (const Dual<T,P> &a, const S &b) -> Dual<decltype(a.val()*b),P> 538 { 539 Dual<decltype(a.val()*b),P> result; 540 OSL_INDEX_LOOP(i, P+1, { 541 result.elem(i) = a.elem(i) * b; 542 }); 543 return result; 544 } 545 546 547 /// Multiplication of dual by scalar in place. 548 /// 549 template<class T, int P, class S, 550 DUAL_REQUIRES(is_Dual<S>::value == false)> 551 OSL_HOSTDEVICE OSL_FORCEINLINE const Dual<T,P>& operator*= (Dual<T,P> &a, const S &b) 552 { 553 OSL_INDEX_LOOP(i, P+1, { 554 a.elem(i) *= b; 555 }); 556 return a; 557 } 558 559 560 561 /// Multiplication of dual by a non-dual scalar. This will work for any 562 /// Dual<T> * S where T*S is meaningful. 563 template<class T, int P, class S, 564 DUAL_REQUIRES(is_Dual<S>::value == false)> 565 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 auto 566 operator* (const S &b, const Dual<T,P> &a) -> Dual<decltype(a.val()*b),P> 567 { 568 return a*b; 569 } 570 571 572 573 /// Division of duals. 574 /// 575 template<class T, int P> 576 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 Dual<T,P> operator/ (const Dual<T,P> &a, const Dual<T,P> &b) 577 { 578 T bvalinv = T(1) / b.val(); 579 T aval_bval = a.val() / b.val(); 580 Dual<T,P> result; 581 result.val() = aval_bval; 582 OSL_INDEX_LOOP(i, P, { 583 result.partial(i) = bvalinv * (a.partial(i) - aval_bval * b.partial(i)); 584 }); 585 return result; 586 } 587 588 589 /// Division of dual by scalar. 590 /// 591 template<class T, int P> 592 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 Dual<T,P> operator/ (const Dual<T,P> &a, const T &b) 593 { 594 T bvalinv = T(1) / b; 595 T aval_bval = a.val() / b; 596 Dual<T,P> result; 597 result.val() = aval_bval; 598 OSL_INDEX_LOOP(i, P, { 599 result.partial(i) = bvalinv * a.partial(i); 600 }); 601 return result; 602 } 603 604 605 /// Division of scalar by dual. 606 /// 607 template<class T, int P> 608 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 Dual<T,P> operator/ (const T &aval, const Dual<T,P> &b) 609 { 610 T bvalinv = T(1) / b.val(); 611 T aval_bval = aval / b.val(); 612 Dual<T,P> result; 613 result.val() = aval_bval; 614 OSL_INDEX_LOOP(i, P, { 615 result.partial(i) = bvalinv * ( - aval_bval * b.partial(i)); 616 }); 617 return result; 618 } 619 620 621 622 623 template<class T, int P> 624 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr bool operator< (const Dual<T,P> &a, const Dual<T,P> &b) { 625 return a.val() < b.val(); 626 } 627 628 template<class T, int P> 629 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr bool operator< (const Dual<T,P> &a, const T &b) { 630 return a.val() < b; 631 } 632 633 template<class T, int P> 634 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr bool operator< (const T &a, const Dual<T,P> &b) { 635 return a < b.val(); 636 } 637 638 template<class T, int P> 639 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr bool operator> (const Dual<T,P> &a, const Dual<T,P> &b) { 640 return a.val() > b.val(); 641 } 642 643 template<class T, int P> 644 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr bool operator> (const Dual<T,P> &a, const T &b) { 645 return a.val() > b; 646 } 647 648 template<class T, int P> 649 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr bool operator> (const T &a, const Dual<T,P> &b) { 650 return a > b.val(); 651 } 652 653 template<class T, int P> 654 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr bool operator<= (const Dual<T,P> &a, const Dual<T,P> &b) { 655 return a.val() <= b.val(); 656 } 657 658 template<class T, int P> 659 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr bool operator<= (const Dual<T,P> &a, const T &b) { 660 return a.val() <= b; 661 } 662 663 template<class T, int P> 664 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr bool operator<= (const T &a, const Dual<T,P> &b) { 665 return a <= b.val(); 666 } 667 668 template<class T, int P> 669 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr bool operator>= (const Dual<T,P> &a, const Dual<T,P> &b) { 670 return a.val() >= b.val(); 671 } 672 673 template<class T, int P> 674 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr bool operator>= (const Dual<T,P> &a, const T &b) { 675 return a.val() >= b; 676 } 677 678 template<class T, int P> 679 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr bool operator>= (const T &a, const Dual<T,P> &b) { 680 return a >= b.val(); 681 } 682 683 684 685 // Eliminate the derivatives of a number, works for scalar as well as Dual. 686 template<class T> OSL_HOSTDEVICE OSL_FORCEINLINE constexpr const T& 687 removeDerivatives (const T &x) { return x; } 688 689 template<class T, int P> OSL_HOSTDEVICE OSL_FORCEINLINE constexpr const T& 690 removeDerivatives (const Dual<T,P> &x) { return x.val(); } 691 692 693 // Get the x derivative (or 0 for a non-Dual) 694 template<class T> OSL_HOSTDEVICE OSL_FORCEINLINE constexpr const T& 695 getXDerivative (const T & /*x*/) { return T(0); } 696 697 template<class T, int P> OSL_HOSTDEVICE OSL_FORCEINLINE constexpr const T& 698 getXDerivative (const Dual<T,P> &x) { return x.dx(); } 699 700 701 // Get the y derivative (or 0 for a non-Dual) 702 template<class T> OSL_HOSTDEVICE OSL_FORCEINLINE constexpr const T& 703 getYDerivative (const T & /*x*/) { return T(0); } 704 705 template<class T, int P> OSL_HOSTDEVICE OSL_FORCEINLINE constexpr const T& 706 getYDerivative (const Dual<T,P> &x) { return x.dy(); } 707 708 709 // Simple templated "copy" function 710 template<class T> OSL_HOSTDEVICE OSL_FORCEINLINE void 711 assignment (T &a, const T &b) { a = b; } 712 template<class T, int P> OSL_HOSTDEVICE OSL_FORCEINLINE void 713 assignment (T &a, const Dual<T,P> &b) { a = b.val(); } 714 715 // Templated value equality. For scalars, it's the same as regular ==. 716 // For Dual's, this only tests the value, not the derivatives. This solves 717 // a pesky source of confusion about whether operator== of Duals ought to 718 // return if just their value is equal or if the whole struct (including 719 // derivs) are equal. 720 template<class T> 721 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr bool equalVal (const T &x, const T &y) { 722 return x == y; 723 } 724 725 template<class T, int P> 726 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr bool equalVal (const Dual<T,P> &x, const Dual<T,P> &y) { 727 return x.val() == y.val(); 728 } 729 730 template<class T, int P> 731 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr bool equalVal (const Dual<T,P> &x, const T &y) { 732 return x.val() == y; 733 } 734 735 template<class T, int P> 736 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr bool equalVal (const T &x, const Dual<T,P> &y) { 737 return x == y.val(); 738 } 739 740 741 742 /// equalAll is the same as equalVal generally, but for two Duals of the same 743 /// type, they also compare derivs. 744 template<class T> 745 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 bool equalAll (const T &x, const T &y) { 746 return equalVal(x, y); 747 } 748 749 template<class T, int P> 750 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 bool equalAll (const Dual<T,P> &x, const Dual<T,P> &y) { 751 bool r = true; 752 OSL_INDEX_LOOP(i, P+1, { 753 r &= (x.elem(i) == y.elem(i)); 754 }); 755 return r; 756 } 757 758 759 760 761 // Helper for constructing the result of a Dual function of one variable, 762 // given the scalar version and its derivative. Suppose you have scalar 763 // function f(scalar), then the dual function F(<u,u'>) is defined as: 764 // F(<u,u'>) = < f(u), f'(u)*u' > 765 template<class T, int P> 766 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 Dual<T,P> 767 dualfunc (const Dual<T,P>& u, const T& f_val, const T& df_val) 768 { 769 Dual<T,P> result; 770 result.val() = f_val; 771 OSL_INDEX_LOOP(i, P, { 772 result.partial(i) = df_val * u.partial(i); 773 }); 774 return result; 775 } 776 777 778 // Helper for constructing the result of a Dual function of two variables, 779 // given the scalar version and its derivative. In general, the dual-form of 780 // the primitive function 'f(u,v)' is: 781 // F(<u,u'>, <v,v'>) = < f(u,v), dfdu(u,v)u' + dfdv(u,v)v' > 782 // (from http://en.wikipedia.org/wiki/Automatic_differentiation) 783 template<class T, int P> 784 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 Dual<T,P> 785 dualfunc (const Dual<T,P>& u, const Dual<T,P>& v, 786 const T& f_val, const T& dfdu_val, const T& dfdv_val) 787 { 788 Dual<T,P> result; 789 result.val() = f_val; 790 OSL_INDEX_LOOP(i, P, { 791 result.partial(i) = dfdu_val * u.partial(i) + dfdv_val * v.partial(i); 792 }); 793 return result; 794 } 795 796 797 // Helper for construction the result of a Dual function of three variables. 798 template<class T, int P> 799 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 Dual<T,P> 800 dualfunc (const Dual<T,P>& u, const Dual<T,P>& v, const Dual<T,P>& w, 801 const T& f_val, const T& dfdu_val, const T& dfdv_val, const T& dfdw_val) 802 { 803 Dual<T,P> result; 804 result.val() = f_val; 805 OSL_INDEX_LOOP(i, P, { 806 result.partial(i) = dfdu_val * u.partial(i) + dfdv_val * v.partial(i) + dfdw_val * w.partial(i); 807 }); 808 return result; 809 } 810 811 812 813 /// Fast negation of duals, in cases where you aren't too picky about the 814 /// difference between +0 and -0. (Courtesy of Alex Wells, Intel) Please see 815 /// OIIO's fast_math.h fast_neg for a more full explanation of why this is 816 /// faster. 817 template<class T, int P> 818 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 Dual<T,P> fast_neg (const Dual<T,P> &a) 819 { 820 Dual<T,P> result; 821 OSL_INDEX_LOOP(i, P+1, { 822 result.elem(i) = T(0) - a.elem(i); 823 }); 824 return result; 825 } 826 827 828 // f(x) = cos(x), f'(x) = -sin(x) 829 template<class T, int P> 830 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> cos (const Dual<T,P> &a) 831 { 832 T sina, cosa; 833 OIIO::sincos(a.val(), &sina, &cosa); 834 return dualfunc (a, cosa, -sina); 835 } 836 837 template<class T, int P> 838 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_cos(const Dual<T,P> &a) 839 { 840 T sina, cosa; 841 OIIO::fast_sincos (a.val(), &sina, &cosa); 842 return dualfunc (a, cosa, -sina); 843 } 844 845 // f(x) = sin(x), f'(x) = cos(x) 846 template<class T, int P> 847 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> sin (const Dual<T,P> &a) 848 { 849 T sina, cosa; 850 OIIO::sincos(a.val(), &sina, &cosa); 851 return dualfunc (a, sina, cosa); 852 } 853 854 template<class T, int P> 855 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_sin(const Dual<T,P> &a) 856 { 857 T sina, cosa; 858 OIIO::fast_sincos (a.val(), &sina, &cosa); 859 return dualfunc (a, sina, cosa); 860 } 861 862 template<class T, int P> 863 OSL_HOSTDEVICE OSL_FORCEINLINE void sincos(const Dual<T,P> &a, Dual<T,P> *sine, Dual<T,P> *cosine) 864 { 865 T sina, cosa; 866 OIIO::sincos(a.val(), &sina, &cosa); 867 *cosine = dualfunc (a, cosa, -sina); 868 *sine = dualfunc (a, sina, cosa); 869 } 870 871 template<class T, int P> 872 OSL_HOSTDEVICE OSL_FORCEINLINE void fast_sincos(const Dual<T,P> &a, Dual<T,P> *sine, Dual<T,P> *cosine) 873 { 874 T sina, cosa; 875 OIIO::fast_sincos(a.val(), &sina, &cosa); 876 *cosine = dualfunc (a, cosa, -sina); 877 *sine = dualfunc (a, sina, cosa); 878 } 879 880 // f(x) = tan(x), f'(x) = sec^2(x) 881 template<class T, int P> 882 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> tan (const Dual<T,P> &a) 883 { 884 T tana = std::tan (a.val()); 885 T cosa = std::cos (a.val()); 886 T sec2a = T(1)/(cosa*cosa); 887 return dualfunc (a, tana, sec2a); 888 } 889 890 template<class T, int P> 891 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_tan(const Dual<T,P> &a) 892 { 893 T tana = OIIO::fast_tan (a.val()); 894 T cosa = OIIO::fast_cos (a.val()); 895 T sec2a = 1 / (cosa * cosa); 896 return dualfunc (a, tana, sec2a); 897 } 898 899 // f(x) = cosh(x), f'(x) = sinh(x) 900 template<class T, int P> 901 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> cosh (const Dual<T,P> &a) 902 { 903 T f = std::cosh(a.val()); 904 T df = std::sinh(a.val()); 905 return dualfunc (a, f, df); 906 } 907 908 template<class T, int P> 909 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_cosh(const Dual<T,P> &a) 910 { 911 T f = OIIO::fast_cosh(a.val()); 912 T df = OIIO::fast_sinh(a.val()); 913 return dualfunc (a, f, df); 914 } 915 916 917 // f(x) = sinh(x), f'(x) = cosh(x) 918 template<class T, int P> 919 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> sinh (const Dual<T,P> &a) 920 { 921 T f = std::sinh(a.val()); 922 T df = std::cosh(a.val()); 923 return dualfunc (a, f, df); 924 } 925 926 template<class T, int P> 927 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_sinh(const Dual<T,P> &a) 928 { 929 T f = OIIO::fast_sinh(a.val()); 930 T df = OIIO::fast_cosh(a.val()); 931 return dualfunc (a, f, df); 932 } 933 934 // f(x) = tanh(x), f'(x) = sech^2(x) 935 template<class T, int P> 936 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> tanh (const Dual<T,P> &a) 937 { 938 T tanha = std::tanh(a.val()); 939 T cosha = std::cosh(a.val()); 940 T sech2a = T(1)/(cosha*cosha); 941 return dualfunc (a, tanha, sech2a); 942 } 943 944 template<class T, int P> 945 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_tanh(const Dual<T,P> &a) 946 { 947 T tanha = OIIO::fast_tanh(a.val()); 948 T cosha = OIIO::fast_cosh(a.val()); 949 T sech2a = T(1) / (cosha * cosha); 950 return dualfunc (a, tanha, sech2a); 951 } 952 953 // f(x) = acos(x), f'(x) = -1/(sqrt(1 - x^2)) 954 template<class T, int P> 955 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> safe_acos (const Dual<T,P> &a) 956 { 957 if (a.val() >= T(1)) 958 return Dual<T,P> (T(0)); 959 if (a.val() <= T(-1)) 960 return Dual<T,P> (T(M_PI)); 961 T f = std::acos (a.val()); 962 T df = -T(1) / std::sqrt (T(1) - a.val()*a.val()); 963 return dualfunc (a, f, df); 964 } 965 966 template<class T, int P> 967 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_acos(const Dual<T,P> &a) 968 { 969 T f = OIIO::fast_acos(a.val()); 970 T df = fabsf(a.val()) < 1.0f ? -1.0f / sqrtf(1.0f - a.val() * a.val()) : 0.0f; 971 return dualfunc (a, f, df); 972 } 973 974 // f(x) = asin(x), f'(x) = 1/(sqrt(1 - x^2)) 975 template<class T, int P> 976 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> safe_asin (const Dual<T,P> &a) 977 { 978 if (a.val() >= T(1)) 979 return Dual<T,P> (T(M_PI/2)); 980 if (a.val() <= T(-1)) 981 return Dual<T,P> (T(-M_PI/2)); 982 T f = std::asin (a.val()); 983 T df = T(1) / std::sqrt (T(1) - a.val()*a.val()); 984 return dualfunc (a, f, df); 985 } 986 987 template<class T, int P> 988 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_asin(const Dual<T,P> &a) 989 { 990 T f = OIIO::fast_asin(a.val()); 991 T df = fabsf(a.val()) < 1.0f ? 1.0f / sqrtf(1.0f - a.val() * a.val()) : 0.0f; 992 return dualfunc (a, f, df); 993 } 994 995 996 // f(x) = atan(x), f'(x) = 1/(1 + x^2) 997 template<class T, int P> 998 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> atan (const Dual<T,P> &a) 999 { 1000 T f = std::atan (a.val()); 1001 T df = T(1) / (T(1) + a.val()*a.val()); 1002 return dualfunc (a, f, df); 1003 } 1004 1005 template<class T, int P> 1006 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_atan(const Dual<T,P> &a) 1007 { 1008 T f = OIIO::fast_atan(a.val()); 1009 T df = 1.0f / (1.0f + a.val() * a.val()); 1010 return dualfunc (a, f, df); 1011 } 1012 1013 // f(x,y) = atan2(y,x); f'(x) = y x' / (x^2 + y^2), 1014 // f'(y) = -x y' / (x^2 + y^2) 1015 // reference: http://en.wikipedia.org/wiki/Atan2 1016 // (above link has other formulations) 1017 template<class T, int P> 1018 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> atan2 (const Dual<T,P> &y, const Dual<T,P> &x) 1019 { 1020 T atan2xy = std::atan2 (y.val(), x.val()); 1021 T denom = (x.val() == T(0) && y.val() == T(0)) ? T(0) : T(1) / (x.val()*x.val() + y.val()*y.val()); 1022 return dualfunc (y, x, atan2xy, -x.val()*denom, y.val()*denom); 1023 } 1024 1025 template<class T, int P> 1026 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_atan2(const Dual<T,P> &y, const Dual<T,P> &x) 1027 { 1028 T atan2xy = OIIO::fast_atan2(y.val(), x.val()); 1029 T denom = (x.val() == 0 && y.val() == 0) ? 0.0f : 1.0f / (x.val() * x.val() + y.val() * y.val()); 1030 return dualfunc (y, x, atan2xy, -x.val()*denom, y.val()*denom); 1031 } 1032 1033 // f(x) = log(a), f'(x) = 1/x 1034 // (log base e) 1035 template<class T, int P> 1036 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> safe_log (const Dual<T,P> &a) 1037 { 1038 T f = OIIO::safe_log(a.val()); 1039 T df = a.val() < std::numeric_limits<T>::min() ? T(0) : T(1) / a.val(); 1040 return dualfunc (a, f, df); 1041 } 1042 1043 template<class T, int P> 1044 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_log(const Dual<T,P> &a) 1045 { 1046 T f = OIIO::fast_log(a.val()); 1047 T df = a.val() < std::numeric_limits<float>::min() ? 0.0f : 1.0f / a.val(); 1048 return dualfunc (a, f, df); 1049 } 1050 1051 1052 // to compute pow(u,v), we need the dual-form representation of 1053 // the pow() operator. In general, the dual-form of the primitive 1054 // function 'g' is: 1055 // g(<u,u'>, <v,v'>) = < g(u,v), dgdu(u,v)u' + dgdv(u,v)v' > 1056 // (from http://en.wikipedia.org/wiki/Automatic_differentiation) 1057 // so, pow(u,v) = < u^v, vu^(v-1) u' + log(u)u^v v' > 1058 template<class T, int P> 1059 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> safe_pow (const Dual<T,P> &u, const Dual<T,P> &v) 1060 { 1061 // NOTE: this function won't return exactly the same as pow(x,y) because we 1062 // use the identity u^v=u * u^(v-1) which does not hold in all cases for our 1063 // "safe" variant (nor does it hold in general in floating point arithmetic). 1064 T powuvm1 = OIIO::safe_pow(u.val(), v.val() - T(1)); 1065 T powuv = powuvm1 * u.val(); 1066 T logu = u.val() > 0 ? OIIO::safe_log(u.val()) : T(0); 1067 return dualfunc (u, v, powuv, v.val()*powuvm1, logu*powuv); 1068 } 1069 // Fallthrough to OIIO::safe_pow for floats and vectors 1070 using OIIO::safe_pow; 1071 1072 template<class T, int P> 1073 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_safe_pow(const Dual<T,P> &u, const Dual<T,P> &v) 1074 { 1075 // NOTE: same issue as above (fast_safe_pow does even more clamping) 1076 T powuvm1 = OIIO::fast_safe_pow (u.val(), v.val() - 1.0f); 1077 T powuv = powuvm1 * u.val(); 1078 T logu = u.val() > 0 ? OIIO::fast_log(u.val()) : 0.0f; 1079 return dualfunc (u, v, powuv, v.val()*powuvm1, logu*powuv); 1080 } 1081 1082 // f(x) = log2(x), f'(x) = 1/(x*log2) 1083 // (log base 2) 1084 template<class T, int P> 1085 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> safe_log2 (const Dual<T,P> &a) 1086 { 1087 T f = safe_log2(a.val()); 1088 T df = a.val() < std::numeric_limits<T>::min() ? T(0) : T(1) / (a.val() * T(M_LN2)); 1089 return dualfunc (a, f, df); 1090 } 1091 1092 template<class T, int P> 1093 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_log2(const Dual<T,P> &a) 1094 { 1095 T f = OIIO::fast_log2(a.val()); 1096 T aln2 = a.val() * float(M_LN2); 1097 T df = aln2 < std::numeric_limits<float>::min() ? 0.0f : 1.0f / aln2; 1098 return dualfunc (a, f, df); 1099 } 1100 1101 // f(x) = log10(x), f'(x) = 1/(x*log10) 1102 // (log base 10) 1103 template<class T, int P> 1104 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> safe_log10 (const Dual<T,P> &a) 1105 { 1106 T f = safe_log10(a.val()); 1107 T df = a.val() < std::numeric_limits<T>::min() ? T(0) : T(1) / (a.val() * T(M_LN10)); 1108 return dualfunc (a, f, df); 1109 } 1110 1111 template<class T, int P> 1112 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_log10(const Dual<T,P> &a) 1113 { 1114 T f = OIIO::fast_log10(a.val()); 1115 T aln10 = a.val() * float(M_LN10); 1116 T df = aln10 < std::numeric_limits<float>::min() ? 0.0f : 1.0f / aln10; 1117 return dualfunc (a, f, df); 1118 } 1119 1120 // f(x) = e^x, f'(x) = e^x 1121 template<class T, int P> 1122 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> exp (const Dual<T,P> &a) 1123 { 1124 T f = std::exp(a.val()); 1125 return dualfunc (a, f, f); 1126 } 1127 1128 template<class T, int P> 1129 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_exp(const Dual<T,P> &a) 1130 { 1131 T f = OIIO::fast_exp(a.val()); 1132 return dualfunc (a, f, f); 1133 } 1134 1135 // f(x) = 2^x, f'(x) = (2^x)*log(2) 1136 template<class T, int P> 1137 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> exp2 (const Dual<T,P> &a) 1138 { 1139 using std::exp2; 1140 T f = exp2(a.val()); 1141 return dualfunc (a, f, f*T(M_LN2)); 1142 } 1143 1144 template<class T, int P> 1145 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_exp2(const Dual<T,P> &a) 1146 { 1147 T f = OIIO::fast_exp2(float(a.val())); 1148 return dualfunc (a, f, f*T(M_LN2)); 1149 } 1150 1151 1152 // f(x) = e^x - 1, f'(x) = e^x 1153 template<class T, int P> 1154 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> expm1 (const Dual<T,P> &a) 1155 { 1156 using std::expm1; 1157 T f = expm1(a.val()); 1158 T df = std::exp (a.val()); 1159 return dualfunc (a, f, df); 1160 } 1161 1162 template<class T, int P> 1163 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_expm1(const Dual<T,P> &a) 1164 { 1165 T f = OIIO::fast_expm1(a.val()); 1166 T df = OIIO::fast_exp (a.val()); 1167 return dualfunc (a, f, df); 1168 } 1169 1170 // f(x) = erf(x), f'(x) = (2e^(-x^2))/sqrt(pi) 1171 template<class T, int P> 1172 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> erf (const Dual<T,P> &a) 1173 { 1174 using std::erf; 1175 T f = erf (a.val()); 1176 const T two_over_sqrt_pi = T(1.128379167095512573896158903); 1177 T df = std::exp (-a.val() * a.val()) * two_over_sqrt_pi; 1178 return dualfunc (a, f, df); 1179 } 1180 1181 template<class T, int P> 1182 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_erf(const Dual<T,P> &a) 1183 { 1184 T f = OIIO::fast_erf (float(a.val())); // float version! 1185 const T two_over_sqrt_pi = 1.128379167095512573896158903f; 1186 T df = OIIO::fast_exp(-a.val() * a.val()) * two_over_sqrt_pi; 1187 return dualfunc (a, f, df); 1188 } 1189 1190 // f(x) = erfc(x), f'(x) = -(2e^(-x^2))/sqrt(pi) 1191 template<class T, int P> 1192 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> erfc (const Dual<T,P> &a) 1193 { 1194 using std::erfc; 1195 T f = erfc (a.val()); // float version! 1196 const T two_over_sqrt_pi = -T(1.128379167095512573896158903); 1197 T df = std::exp (-a.val() * a.val()) * two_over_sqrt_pi; 1198 return dualfunc (a, f, df); 1199 } 1200 1201 template<class T, int P> 1202 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_erfc(const Dual<T,P> &a) 1203 { 1204 T f = OIIO::fast_erfc (float(a.val())); // float version! 1205 const T two_over_sqrt_pi = -1.128379167095512573896158903f; 1206 T df = OIIO::fast_exp(-a.val() * a.val()) * two_over_sqrt_pi; 1207 return dualfunc (a, f, df); 1208 } 1209 1210 1211 // f(x) = sqrt(x), f'(x) = 1/(2*sqrt(x)) 1212 template<class T, int P> 1213 OSL_HOSTDEVICE OSL_FORCEINLINE OSL_CONSTEXPR14 Dual<T,P> sqrt (const Dual<T,P> &a) 1214 { 1215 Dual<T,P> result; 1216 if (OSL_LIKELY(a.val() > T(0))) { 1217 T f = std::sqrt(a.val()); 1218 T df = T(0.5) / f; 1219 result = dualfunc (a, f, df); 1220 } else { 1221 result = Dual<T,P> (T(0)); 1222 } 1223 return result; 1224 } 1225 1226 // f(x) = 1/sqrt(x), f'(x) = -1/(2*x^(3/2)) 1227 template<class T, int P> 1228 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> inversesqrt (const Dual<T,P> &a) 1229 { 1230 // do we want to print an error message? 1231 Dual<T,P> result; 1232 if (OSL_LIKELY(a.val() > T(0))) { 1233 T f = T(1)/std::sqrt(a.val()); 1234 T df = T(-0.5)*f/a.val(); 1235 result = dualfunc (a, f, df); 1236 } else { 1237 result = Dual<T,P> (T(0)); 1238 } 1239 return result; 1240 } 1241 1242 // f(x) = cbrt(x), f'(x) = 1/(3*x^(2/3)) 1243 template<class T, int P> 1244 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> cbrt (const Dual<T,P> &a) 1245 { 1246 if (OSL_LIKELY(a.val() != T(0))) { 1247 T f = std::cbrt(a.val()); 1248 T df = T(1) / (T(3) * f * f); 1249 return dualfunc(a, f, df); 1250 } 1251 return Dual<T,P> (T(0)); 1252 } 1253 1254 template<class T, int P> 1255 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fast_cbrt (const Dual<T,P> &a) 1256 { 1257 if (OSL_LIKELY(a.val() != T(0))) { 1258 T f = OIIO::fast_cbrt(float(a.val())); // float version! 1259 T df = T(1) / (T(3) * f * f); 1260 return dualfunc(a, f, df); 1261 } 1262 return Dual<T,P> (T(0)); 1263 } 1264 1265 // (fx) = x*(1-a) + y*a, f'(x) = (1-a)x' + (y - x)*a' + a*y' 1266 template<class T, int P> 1267 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> mix (const Dual<T,P> &x, const Dual<T,P> &y, const Dual<T,P> &a) 1268 { 1269 T mixval = x.val()*(T(1)-a.val()) + y.val()*a.val(); 1270 return dualfunc (x, y, a, mixval, T(1)-a.val(), a.val(), y.val() - x.val()); 1271 } 1272 1273 template<class T, int P> 1274 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> fabs (const Dual<T,P> &x) 1275 { 1276 return x.val() >= T(0) ? x : -x; 1277 } 1278 1279 1280 #ifdef OIIO_FMATH_HAS_SAFE_FMOD 1281 using OIIO::safe_fmod; 1282 #else 1283 OSL_FORCEINLINE OSL_HOSTDEVICE float safe_fmod (float a, float b) 1284 { 1285 if (OSL_LIKELY(b != 0.0f)) { 1286 #if 0 1287 return std::fmod (a,b); 1288 // std::fmod was getting called serially instead of vectorizing, so 1289 // we will just do the calculation ourselves 1290 #else 1291 // This formulation is equivalent but much faster in our benchmarks, 1292 // also vectorizes better. 1293 // The floating-point remainder of the division operation 1294 // a/b is a - N*b, where N = a/b with its fractional part truncated. 1295 int N = static_cast<int>(a/b); 1296 return a - N*b; 1297 #endif 1298 } 1299 return 0.0f; 1300 } 1301 #endif 1302 1303 template<class T, int P> 1304 OSL_FORCEINLINE OSL_HOSTDEVICE Dual<T,P> 1305 safe_fmod (const Dual<T,P>& a, const Dual<T,P>& b) 1306 { 1307 Dual<T,P> result = a; 1308 result.val() = safe_fmod(a.val(), b.val()); 1309 return result; 1310 } 1311 1312 1313 1314 OSL_HOSTDEVICE OSL_FORCEINLINE float smoothstep(float e0, float e1, float x) { 1315 if (x < e0) return 0.0f; 1316 else if (x >= e1) return 1.0f; 1317 else { 1318 float t = (x - e0)/(e1 - e0); 1319 return (3.0f-2.0f*t)*(t*t); 1320 } 1321 } 1322 1323 // f(t) = (3-2t)t^2, t = (x-e0)/(e1-e0) 1324 template<class T, int P> 1325 OSL_HOSTDEVICE OSL_FORCEINLINE Dual<T,P> smoothstep (const Dual<T,P> &e0, const Dual<T,P> &e1, const Dual<T,P> &x) 1326 { 1327 if (x.val() < e0.val()) { 1328 return Dual<T,P> (T(0)); 1329 } 1330 else if (x.val() >= e1.val()) { 1331 return Dual<T,P> (T(1)); 1332 } 1333 Dual<T,P> t = (x - e0)/(e1-e0); 1334 return (T(3) - T(2)*t)*t*t; 1335 } 1336 1337 1338 #ifdef __CUDA_ARCH__ 1339 template<> inline OSL_HOSTDEVICE OSL_CONSTEXPR14 float3 Dual<float3, 2>::zero() { 1340 return float3{0.f, 0.f, 0.f}; 1341 } 1342 #endif 1343 1344 1345 // ceil(Dual) loses derivatives 1346 template<class T, int P> 1347 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr float ceil (const Dual<T,P> &x) 1348 { 1349 return std::ceil(x.val()); 1350 } 1351 1352 1353 // floor(Dual) loses derivatives 1354 template<class T, int P> 1355 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr float floor (const Dual<T,P> &x) 1356 { 1357 return std::floor(x.val()); 1358 } 1359 1360 1361 // floor, cast to an int. 1362 template<class T, int P> 1363 OSL_HOSTDEVICE OSL_FORCEINLINE constexpr int 1364 ifloor (const Dual<T,P> &x) 1365 { 1366 return (int)floor(x); 1367 } 1368 1369 1370 OSL_NAMESPACE_EXIT 1371