1a9ac8606Spatrick /*===---- __clang_hip_math.h - Device-side HIP math support ----------------=== 2ec727ea7Spatrick * 3ec727ea7Spatrick * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4ec727ea7Spatrick * See https://llvm.org/LICENSE.txt for license information. 5ec727ea7Spatrick * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6ec727ea7Spatrick * 7ec727ea7Spatrick *===-----------------------------------------------------------------------=== 8ec727ea7Spatrick */ 9ec727ea7Spatrick #ifndef __CLANG_HIP_MATH_H__ 10ec727ea7Spatrick #define __CLANG_HIP_MATH_H__ 11ec727ea7Spatrick 12a9ac8606Spatrick #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__) 13a9ac8606Spatrick #error "This file is for HIP and OpenMP AMDGCN device compilation only." 14a9ac8606Spatrick #endif 15a9ac8606Spatrick 16a9ac8606Spatrick #if !defined(__HIPCC_RTC__) 17a9ac8606Spatrick #if defined(__cplusplus) 18ec727ea7Spatrick #include <algorithm> 19a9ac8606Spatrick #endif 20ec727ea7Spatrick #include <limits.h> 21ec727ea7Spatrick #include <stdint.h> 22a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__ 23a9ac8606Spatrick #include <omp.h> 24a9ac8606Spatrick #endif 25a9ac8606Spatrick #endif // !defined(__HIPCC_RTC__) 26ec727ea7Spatrick 27ec727ea7Spatrick #pragma push_macro("__DEVICE__") 28ec727ea7Spatrick 29a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__ 30a9ac8606Spatrick #define __DEVICE__ static inline __attribute__((always_inline, nothrow)) 31a9ac8606Spatrick #else 32a9ac8606Spatrick #define __DEVICE__ static __device__ inline __attribute__((always_inline)) 33a9ac8606Spatrick #endif 34a9ac8606Spatrick 35a9ac8606Spatrick // A few functions return bool type starting only in C++11. 36a9ac8606Spatrick #pragma push_macro("__RETURN_TYPE") 37a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__ 38a9ac8606Spatrick #define __RETURN_TYPE int 39a9ac8606Spatrick #else 40a9ac8606Spatrick #if defined(__cplusplus) 41ec727ea7Spatrick #define __RETURN_TYPE bool 42a9ac8606Spatrick #else 43a9ac8606Spatrick #define __RETURN_TYPE int 44a9ac8606Spatrick #endif 45a9ac8606Spatrick #endif // __OPENMP_AMDGCN__ 46a9ac8606Spatrick 47a9ac8606Spatrick #if defined (__cplusplus) && __cplusplus < 201103L 48a9ac8606Spatrick // emulate static_assert on type sizes 49a9ac8606Spatrick template<bool> 50a9ac8606Spatrick struct __compare_result{}; 51a9ac8606Spatrick template<> 52a9ac8606Spatrick struct __compare_result<true> { 53a9ac8606Spatrick static const __device__ bool valid; 54a9ac8606Spatrick }; 55ec727ea7Spatrick 56ec727ea7Spatrick __DEVICE__ 57a9ac8606Spatrick void __suppress_unused_warning(bool b){}; 58a9ac8606Spatrick template <unsigned int S, unsigned int T> 59a9ac8606Spatrick __DEVICE__ void __static_assert_equal_size() { 60a9ac8606Spatrick __suppress_unused_warning(__compare_result<S == T>::valid); 61a9ac8606Spatrick } 62a9ac8606Spatrick 63a9ac8606Spatrick #define __static_assert_type_size_equal(A, B) \ 64a9ac8606Spatrick __static_assert_equal_size<A,B>() 65a9ac8606Spatrick 66a9ac8606Spatrick #else 67a9ac8606Spatrick #define __static_assert_type_size_equal(A,B) \ 68a9ac8606Spatrick static_assert((A) == (B), "") 69a9ac8606Spatrick 70a9ac8606Spatrick #endif 71a9ac8606Spatrick 72a9ac8606Spatrick __DEVICE__ 73*12c85518Srobert uint64_t __make_mantissa_base8(const char *__tagp __attribute__((nonnull))) { 74ec727ea7Spatrick uint64_t __r = 0; 75*12c85518Srobert while (*__tagp != '\0') { 76ec727ea7Spatrick char __tmp = *__tagp; 77ec727ea7Spatrick 78ec727ea7Spatrick if (__tmp >= '0' && __tmp <= '7') 79ec727ea7Spatrick __r = (__r * 8u) + __tmp - '0'; 80ec727ea7Spatrick else 81ec727ea7Spatrick return 0; 82ec727ea7Spatrick 83ec727ea7Spatrick ++__tagp; 84ec727ea7Spatrick } 85ec727ea7Spatrick 86ec727ea7Spatrick return __r; 87ec727ea7Spatrick } 88ec727ea7Spatrick 89ec727ea7Spatrick __DEVICE__ 90*12c85518Srobert uint64_t __make_mantissa_base10(const char *__tagp __attribute__((nonnull))) { 91ec727ea7Spatrick uint64_t __r = 0; 92*12c85518Srobert while (*__tagp != '\0') { 93ec727ea7Spatrick char __tmp = *__tagp; 94ec727ea7Spatrick 95ec727ea7Spatrick if (__tmp >= '0' && __tmp <= '9') 96ec727ea7Spatrick __r = (__r * 10u) + __tmp - '0'; 97ec727ea7Spatrick else 98ec727ea7Spatrick return 0; 99ec727ea7Spatrick 100ec727ea7Spatrick ++__tagp; 101ec727ea7Spatrick } 102ec727ea7Spatrick 103ec727ea7Spatrick return __r; 104ec727ea7Spatrick } 105ec727ea7Spatrick 106ec727ea7Spatrick __DEVICE__ 107*12c85518Srobert uint64_t __make_mantissa_base16(const char *__tagp __attribute__((nonnull))) { 108ec727ea7Spatrick uint64_t __r = 0; 109*12c85518Srobert while (*__tagp != '\0') { 110ec727ea7Spatrick char __tmp = *__tagp; 111ec727ea7Spatrick 112ec727ea7Spatrick if (__tmp >= '0' && __tmp <= '9') 113ec727ea7Spatrick __r = (__r * 16u) + __tmp - '0'; 114ec727ea7Spatrick else if (__tmp >= 'a' && __tmp <= 'f') 115ec727ea7Spatrick __r = (__r * 16u) + __tmp - 'a' + 10; 116ec727ea7Spatrick else if (__tmp >= 'A' && __tmp <= 'F') 117ec727ea7Spatrick __r = (__r * 16u) + __tmp - 'A' + 10; 118ec727ea7Spatrick else 119ec727ea7Spatrick return 0; 120ec727ea7Spatrick 121ec727ea7Spatrick ++__tagp; 122ec727ea7Spatrick } 123ec727ea7Spatrick 124ec727ea7Spatrick return __r; 125ec727ea7Spatrick } 126ec727ea7Spatrick 127ec727ea7Spatrick __DEVICE__ 128*12c85518Srobert uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) { 129ec727ea7Spatrick if (*__tagp == '0') { 130ec727ea7Spatrick ++__tagp; 131ec727ea7Spatrick 132ec727ea7Spatrick if (*__tagp == 'x' || *__tagp == 'X') 133ec727ea7Spatrick return __make_mantissa_base16(__tagp); 134ec727ea7Spatrick else 135ec727ea7Spatrick return __make_mantissa_base8(__tagp); 136ec727ea7Spatrick } 137ec727ea7Spatrick 138ec727ea7Spatrick return __make_mantissa_base10(__tagp); 139ec727ea7Spatrick } 140ec727ea7Spatrick 141ec727ea7Spatrick // BEGIN FLOAT 142a9ac8606Spatrick #if defined(__cplusplus) 143ec727ea7Spatrick __DEVICE__ 144a9ac8606Spatrick int abs(int __x) { 145a9ac8606Spatrick int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1); 146a9ac8606Spatrick return (__x ^ __sgn) - __sgn; 147ec727ea7Spatrick } 148ec727ea7Spatrick __DEVICE__ 149a9ac8606Spatrick long labs(long __x) { 150a9ac8606Spatrick long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1); 151a9ac8606Spatrick return (__x ^ __sgn) - __sgn; 152a9ac8606Spatrick } 153ec727ea7Spatrick __DEVICE__ 154a9ac8606Spatrick long long llabs(long long __x) { 155a9ac8606Spatrick long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1); 156a9ac8606Spatrick return (__x ^ __sgn) - __sgn; 157a9ac8606Spatrick } 158a9ac8606Spatrick #endif 159a9ac8606Spatrick 160ec727ea7Spatrick __DEVICE__ 161a9ac8606Spatrick float acosf(float __x) { return __ocml_acos_f32(__x); } 162a9ac8606Spatrick 163ec727ea7Spatrick __DEVICE__ 164a9ac8606Spatrick float acoshf(float __x) { return __ocml_acosh_f32(__x); } 165a9ac8606Spatrick 166ec727ea7Spatrick __DEVICE__ 167a9ac8606Spatrick float asinf(float __x) { return __ocml_asin_f32(__x); } 168a9ac8606Spatrick 169ec727ea7Spatrick __DEVICE__ 170a9ac8606Spatrick float asinhf(float __x) { return __ocml_asinh_f32(__x); } 171a9ac8606Spatrick 172ec727ea7Spatrick __DEVICE__ 173a9ac8606Spatrick float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); } 174a9ac8606Spatrick 175ec727ea7Spatrick __DEVICE__ 176a9ac8606Spatrick float atanf(float __x) { return __ocml_atan_f32(__x); } 177a9ac8606Spatrick 178ec727ea7Spatrick __DEVICE__ 179a9ac8606Spatrick float atanhf(float __x) { return __ocml_atanh_f32(__x); } 180a9ac8606Spatrick 181ec727ea7Spatrick __DEVICE__ 182a9ac8606Spatrick float cbrtf(float __x) { return __ocml_cbrt_f32(__x); } 183a9ac8606Spatrick 184ec727ea7Spatrick __DEVICE__ 185a9ac8606Spatrick float ceilf(float __x) { return __ocml_ceil_f32(__x); } 186a9ac8606Spatrick 187ec727ea7Spatrick __DEVICE__ 188a9ac8606Spatrick float copysignf(float __x, float __y) { return __ocml_copysign_f32(__x, __y); } 189a9ac8606Spatrick 190ec727ea7Spatrick __DEVICE__ 191a9ac8606Spatrick float cosf(float __x) { return __ocml_cos_f32(__x); } 192a9ac8606Spatrick 193ec727ea7Spatrick __DEVICE__ 194a9ac8606Spatrick float coshf(float __x) { return __ocml_cosh_f32(__x); } 195a9ac8606Spatrick 196ec727ea7Spatrick __DEVICE__ 197a9ac8606Spatrick float cospif(float __x) { return __ocml_cospi_f32(__x); } 198a9ac8606Spatrick 199ec727ea7Spatrick __DEVICE__ 200a9ac8606Spatrick float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); } 201a9ac8606Spatrick 202ec727ea7Spatrick __DEVICE__ 203a9ac8606Spatrick float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); } 204a9ac8606Spatrick 205ec727ea7Spatrick __DEVICE__ 206a9ac8606Spatrick float erfcf(float __x) { return __ocml_erfc_f32(__x); } 207a9ac8606Spatrick 208ec727ea7Spatrick __DEVICE__ 209a9ac8606Spatrick float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); } 210a9ac8606Spatrick 211a9ac8606Spatrick __DEVICE__ 212a9ac8606Spatrick float erfcxf(float __x) { return __ocml_erfcx_f32(__x); } 213a9ac8606Spatrick 214a9ac8606Spatrick __DEVICE__ 215a9ac8606Spatrick float erff(float __x) { return __ocml_erf_f32(__x); } 216a9ac8606Spatrick 217a9ac8606Spatrick __DEVICE__ 218a9ac8606Spatrick float erfinvf(float __x) { return __ocml_erfinv_f32(__x); } 219a9ac8606Spatrick 220a9ac8606Spatrick __DEVICE__ 221a9ac8606Spatrick float exp10f(float __x) { return __ocml_exp10_f32(__x); } 222a9ac8606Spatrick 223a9ac8606Spatrick __DEVICE__ 224a9ac8606Spatrick float exp2f(float __x) { return __ocml_exp2_f32(__x); } 225a9ac8606Spatrick 226a9ac8606Spatrick __DEVICE__ 227a9ac8606Spatrick float expf(float __x) { return __ocml_exp_f32(__x); } 228a9ac8606Spatrick 229a9ac8606Spatrick __DEVICE__ 230a9ac8606Spatrick float expm1f(float __x) { return __ocml_expm1_f32(__x); } 231a9ac8606Spatrick 232a9ac8606Spatrick __DEVICE__ 233*12c85518Srobert float fabsf(float __x) { return __builtin_fabsf(__x); } 234a9ac8606Spatrick 235a9ac8606Spatrick __DEVICE__ 236a9ac8606Spatrick float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); } 237a9ac8606Spatrick 238a9ac8606Spatrick __DEVICE__ 239a9ac8606Spatrick float fdividef(float __x, float __y) { return __x / __y; } 240a9ac8606Spatrick 241a9ac8606Spatrick __DEVICE__ 242a9ac8606Spatrick float floorf(float __x) { return __ocml_floor_f32(__x); } 243a9ac8606Spatrick 244a9ac8606Spatrick __DEVICE__ 245a9ac8606Spatrick float fmaf(float __x, float __y, float __z) { 246ec727ea7Spatrick return __ocml_fma_f32(__x, __y, __z); 247ec727ea7Spatrick } 248a9ac8606Spatrick 249ec727ea7Spatrick __DEVICE__ 250a9ac8606Spatrick float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); } 251a9ac8606Spatrick 252ec727ea7Spatrick __DEVICE__ 253a9ac8606Spatrick float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); } 254a9ac8606Spatrick 255ec727ea7Spatrick __DEVICE__ 256a9ac8606Spatrick float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); } 257a9ac8606Spatrick 258ec727ea7Spatrick __DEVICE__ 259a9ac8606Spatrick float frexpf(float __x, int *__nptr) { 260ec727ea7Spatrick int __tmp; 261a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__ 262a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) 263a9ac8606Spatrick #endif 264ec727ea7Spatrick float __r = 265ec727ea7Spatrick __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp); 266ec727ea7Spatrick *__nptr = __tmp; 267ec727ea7Spatrick 268ec727ea7Spatrick return __r; 269ec727ea7Spatrick } 270a9ac8606Spatrick 271ec727ea7Spatrick __DEVICE__ 272a9ac8606Spatrick float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); } 273a9ac8606Spatrick 274ec727ea7Spatrick __DEVICE__ 275a9ac8606Spatrick int ilogbf(float __x) { return __ocml_ilogb_f32(__x); } 276a9ac8606Spatrick 277ec727ea7Spatrick __DEVICE__ 278a9ac8606Spatrick __RETURN_TYPE __finitef(float __x) { return __ocml_isfinite_f32(__x); } 279a9ac8606Spatrick 280ec727ea7Spatrick __DEVICE__ 281a9ac8606Spatrick __RETURN_TYPE __isinff(float __x) { return __ocml_isinf_f32(__x); } 282a9ac8606Spatrick 283ec727ea7Spatrick __DEVICE__ 284a9ac8606Spatrick __RETURN_TYPE __isnanf(float __x) { return __ocml_isnan_f32(__x); } 285a9ac8606Spatrick 286ec727ea7Spatrick __DEVICE__ 287a9ac8606Spatrick float j0f(float __x) { return __ocml_j0_f32(__x); } 288a9ac8606Spatrick 289ec727ea7Spatrick __DEVICE__ 290a9ac8606Spatrick float j1f(float __x) { return __ocml_j1_f32(__x); } 291a9ac8606Spatrick 292ec727ea7Spatrick __DEVICE__ 293a9ac8606Spatrick float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication 294ec727ea7Spatrick // and the Miller & Brown algorithm 295ec727ea7Spatrick // for linear recurrences to get O(log n) steps, but it's unclear if 296ec727ea7Spatrick // it'd be beneficial in this case. 297ec727ea7Spatrick if (__n == 0) 298ec727ea7Spatrick return j0f(__x); 299ec727ea7Spatrick if (__n == 1) 300ec727ea7Spatrick return j1f(__x); 301ec727ea7Spatrick 302ec727ea7Spatrick float __x0 = j0f(__x); 303ec727ea7Spatrick float __x1 = j1f(__x); 304ec727ea7Spatrick for (int __i = 1; __i < __n; ++__i) { 305ec727ea7Spatrick float __x2 = (2 * __i) / __x * __x1 - __x0; 306ec727ea7Spatrick __x0 = __x1; 307ec727ea7Spatrick __x1 = __x2; 308ec727ea7Spatrick } 309ec727ea7Spatrick 310ec727ea7Spatrick return __x1; 311ec727ea7Spatrick } 312a9ac8606Spatrick 313ec727ea7Spatrick __DEVICE__ 314a9ac8606Spatrick float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); } 315a9ac8606Spatrick 316ec727ea7Spatrick __DEVICE__ 317a9ac8606Spatrick float lgammaf(float __x) { return __ocml_lgamma_f32(__x); } 318a9ac8606Spatrick 319ec727ea7Spatrick __DEVICE__ 320a9ac8606Spatrick long long int llrintf(float __x) { return __ocml_rint_f32(__x); } 321a9ac8606Spatrick 322ec727ea7Spatrick __DEVICE__ 323a9ac8606Spatrick long long int llroundf(float __x) { return __ocml_round_f32(__x); } 324a9ac8606Spatrick 325ec727ea7Spatrick __DEVICE__ 326a9ac8606Spatrick float log10f(float __x) { return __ocml_log10_f32(__x); } 327a9ac8606Spatrick 328ec727ea7Spatrick __DEVICE__ 329a9ac8606Spatrick float log1pf(float __x) { return __ocml_log1p_f32(__x); } 330a9ac8606Spatrick 331ec727ea7Spatrick __DEVICE__ 332a9ac8606Spatrick float log2f(float __x) { return __ocml_log2_f32(__x); } 333a9ac8606Spatrick 334ec727ea7Spatrick __DEVICE__ 335a9ac8606Spatrick float logbf(float __x) { return __ocml_logb_f32(__x); } 336a9ac8606Spatrick 337ec727ea7Spatrick __DEVICE__ 338a9ac8606Spatrick float logf(float __x) { return __ocml_log_f32(__x); } 339a9ac8606Spatrick 340ec727ea7Spatrick __DEVICE__ 341a9ac8606Spatrick long int lrintf(float __x) { return __ocml_rint_f32(__x); } 342a9ac8606Spatrick 343ec727ea7Spatrick __DEVICE__ 344a9ac8606Spatrick long int lroundf(float __x) { return __ocml_round_f32(__x); } 345a9ac8606Spatrick 346ec727ea7Spatrick __DEVICE__ 347a9ac8606Spatrick float modff(float __x, float *__iptr) { 348ec727ea7Spatrick float __tmp; 349a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__ 350a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) 351a9ac8606Spatrick #endif 352ec727ea7Spatrick float __r = 353ec727ea7Spatrick __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); 354ec727ea7Spatrick *__iptr = __tmp; 355ec727ea7Spatrick return __r; 356ec727ea7Spatrick } 357a9ac8606Spatrick 358ec727ea7Spatrick __DEVICE__ 359*12c85518Srobert float nanf(const char *__tagp __attribute__((nonnull))) { 360ec727ea7Spatrick union { 361ec727ea7Spatrick float val; 362ec727ea7Spatrick struct ieee_float { 363a9ac8606Spatrick unsigned int mantissa : 22; 364a9ac8606Spatrick unsigned int quiet : 1; 365a9ac8606Spatrick unsigned int exponent : 8; 366a9ac8606Spatrick unsigned int sign : 1; 367ec727ea7Spatrick } bits; 368ec727ea7Spatrick } __tmp; 369a9ac8606Spatrick __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits)); 370ec727ea7Spatrick 371ec727ea7Spatrick __tmp.bits.sign = 0u; 372ec727ea7Spatrick __tmp.bits.exponent = ~0u; 373ec727ea7Spatrick __tmp.bits.quiet = 1u; 374ec727ea7Spatrick __tmp.bits.mantissa = __make_mantissa(__tagp); 375ec727ea7Spatrick 376ec727ea7Spatrick return __tmp.val; 377ec727ea7Spatrick } 378a9ac8606Spatrick 379ec727ea7Spatrick __DEVICE__ 380a9ac8606Spatrick float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); } 381a9ac8606Spatrick 382ec727ea7Spatrick __DEVICE__ 383a9ac8606Spatrick float nextafterf(float __x, float __y) { 384ec727ea7Spatrick return __ocml_nextafter_f32(__x, __y); 385ec727ea7Spatrick } 386a9ac8606Spatrick 387ec727ea7Spatrick __DEVICE__ 388a9ac8606Spatrick float norm3df(float __x, float __y, float __z) { 389ec727ea7Spatrick return __ocml_len3_f32(__x, __y, __z); 390ec727ea7Spatrick } 391a9ac8606Spatrick 392ec727ea7Spatrick __DEVICE__ 393a9ac8606Spatrick float norm4df(float __x, float __y, float __z, float __w) { 394ec727ea7Spatrick return __ocml_len4_f32(__x, __y, __z, __w); 395ec727ea7Spatrick } 396a9ac8606Spatrick 397ec727ea7Spatrick __DEVICE__ 398a9ac8606Spatrick float normcdff(float __x) { return __ocml_ncdf_f32(__x); } 399a9ac8606Spatrick 400ec727ea7Spatrick __DEVICE__ 401a9ac8606Spatrick float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); } 402a9ac8606Spatrick 403ec727ea7Spatrick __DEVICE__ 404a9ac8606Spatrick float normf(int __dim, 405ec727ea7Spatrick const float *__a) { // TODO: placeholder until OCML adds support. 406ec727ea7Spatrick float __r = 0; 407ec727ea7Spatrick while (__dim--) { 408ec727ea7Spatrick __r += __a[0] * __a[0]; 409ec727ea7Spatrick ++__a; 410ec727ea7Spatrick } 411ec727ea7Spatrick 412ec727ea7Spatrick return __ocml_sqrt_f32(__r); 413ec727ea7Spatrick } 414a9ac8606Spatrick 415ec727ea7Spatrick __DEVICE__ 416a9ac8606Spatrick float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } 417a9ac8606Spatrick 418ec727ea7Spatrick __DEVICE__ 419a9ac8606Spatrick float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); } 420a9ac8606Spatrick 421ec727ea7Spatrick __DEVICE__ 422a9ac8606Spatrick float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); } 423a9ac8606Spatrick 424a9ac8606Spatrick __DEVICE__ 425a9ac8606Spatrick float remainderf(float __x, float __y) { 426ec727ea7Spatrick return __ocml_remainder_f32(__x, __y); 427ec727ea7Spatrick } 428a9ac8606Spatrick 429ec727ea7Spatrick __DEVICE__ 430a9ac8606Spatrick float remquof(float __x, float __y, int *__quo) { 431ec727ea7Spatrick int __tmp; 432a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__ 433a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) 434a9ac8606Spatrick #endif 435ec727ea7Spatrick float __r = __ocml_remquo_f32( 436ec727ea7Spatrick __x, __y, (__attribute__((address_space(5))) int *)&__tmp); 437ec727ea7Spatrick *__quo = __tmp; 438ec727ea7Spatrick 439ec727ea7Spatrick return __r; 440ec727ea7Spatrick } 441a9ac8606Spatrick 442ec727ea7Spatrick __DEVICE__ 443a9ac8606Spatrick float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); } 444a9ac8606Spatrick 445ec727ea7Spatrick __DEVICE__ 446a9ac8606Spatrick float rintf(float __x) { return __ocml_rint_f32(__x); } 447a9ac8606Spatrick 448ec727ea7Spatrick __DEVICE__ 449a9ac8606Spatrick float rnorm3df(float __x, float __y, float __z) { 450ec727ea7Spatrick return __ocml_rlen3_f32(__x, __y, __z); 451ec727ea7Spatrick } 452ec727ea7Spatrick 453ec727ea7Spatrick __DEVICE__ 454a9ac8606Spatrick float rnorm4df(float __x, float __y, float __z, float __w) { 455ec727ea7Spatrick return __ocml_rlen4_f32(__x, __y, __z, __w); 456ec727ea7Spatrick } 457a9ac8606Spatrick 458ec727ea7Spatrick __DEVICE__ 459a9ac8606Spatrick float rnormf(int __dim, 460ec727ea7Spatrick const float *__a) { // TODO: placeholder until OCML adds support. 461ec727ea7Spatrick float __r = 0; 462ec727ea7Spatrick while (__dim--) { 463ec727ea7Spatrick __r += __a[0] * __a[0]; 464ec727ea7Spatrick ++__a; 465ec727ea7Spatrick } 466ec727ea7Spatrick 467ec727ea7Spatrick return __ocml_rsqrt_f32(__r); 468ec727ea7Spatrick } 469a9ac8606Spatrick 470ec727ea7Spatrick __DEVICE__ 471a9ac8606Spatrick float roundf(float __x) { return __ocml_round_f32(__x); } 472a9ac8606Spatrick 473ec727ea7Spatrick __DEVICE__ 474a9ac8606Spatrick float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); } 475a9ac8606Spatrick 476ec727ea7Spatrick __DEVICE__ 477a9ac8606Spatrick float scalblnf(float __x, long int __n) { 478ec727ea7Spatrick return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n) 479ec727ea7Spatrick : __ocml_scalb_f32(__x, __n); 480ec727ea7Spatrick } 481ec727ea7Spatrick 482a9ac8606Spatrick __DEVICE__ 483a9ac8606Spatrick float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); } 484a9ac8606Spatrick 485a9ac8606Spatrick __DEVICE__ 486a9ac8606Spatrick __RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); } 487a9ac8606Spatrick 488a9ac8606Spatrick __DEVICE__ 489a9ac8606Spatrick void sincosf(float __x, float *__sinptr, float *__cosptr) { 490a9ac8606Spatrick float __tmp; 491a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__ 492a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) 493a9ac8606Spatrick #endif 494ec727ea7Spatrick *__sinptr = 495ec727ea7Spatrick __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); 496ec727ea7Spatrick *__cosptr = __tmp; 497ec727ea7Spatrick } 498ec727ea7Spatrick 499a9ac8606Spatrick __DEVICE__ 500a9ac8606Spatrick void sincospif(float __x, float *__sinptr, float *__cosptr) { 501a9ac8606Spatrick float __tmp; 502a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__ 503a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) 504a9ac8606Spatrick #endif 505ec727ea7Spatrick *__sinptr = __ocml_sincospi_f32( 506ec727ea7Spatrick __x, (__attribute__((address_space(5))) float *)&__tmp); 507ec727ea7Spatrick *__cosptr = __tmp; 508ec727ea7Spatrick } 509a9ac8606Spatrick 510ec727ea7Spatrick __DEVICE__ 511a9ac8606Spatrick float sinf(float __x) { return __ocml_sin_f32(__x); } 512a9ac8606Spatrick 513ec727ea7Spatrick __DEVICE__ 514a9ac8606Spatrick float sinhf(float __x) { return __ocml_sinh_f32(__x); } 515a9ac8606Spatrick 516ec727ea7Spatrick __DEVICE__ 517a9ac8606Spatrick float sinpif(float __x) { return __ocml_sinpi_f32(__x); } 518a9ac8606Spatrick 519ec727ea7Spatrick __DEVICE__ 520a9ac8606Spatrick float sqrtf(float __x) { return __ocml_sqrt_f32(__x); } 521a9ac8606Spatrick 522ec727ea7Spatrick __DEVICE__ 523a9ac8606Spatrick float tanf(float __x) { return __ocml_tan_f32(__x); } 524a9ac8606Spatrick 525ec727ea7Spatrick __DEVICE__ 526a9ac8606Spatrick float tanhf(float __x) { return __ocml_tanh_f32(__x); } 527a9ac8606Spatrick 528ec727ea7Spatrick __DEVICE__ 529a9ac8606Spatrick float tgammaf(float __x) { return __ocml_tgamma_f32(__x); } 530a9ac8606Spatrick 531ec727ea7Spatrick __DEVICE__ 532a9ac8606Spatrick float truncf(float __x) { return __ocml_trunc_f32(__x); } 533a9ac8606Spatrick 534ec727ea7Spatrick __DEVICE__ 535a9ac8606Spatrick float y0f(float __x) { return __ocml_y0_f32(__x); } 536a9ac8606Spatrick 537ec727ea7Spatrick __DEVICE__ 538a9ac8606Spatrick float y1f(float __x) { return __ocml_y1_f32(__x); } 539a9ac8606Spatrick 540ec727ea7Spatrick __DEVICE__ 541a9ac8606Spatrick float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication 542ec727ea7Spatrick // and the Miller & Brown algorithm 543ec727ea7Spatrick // for linear recurrences to get O(log n) steps, but it's unclear if 544ec727ea7Spatrick // it'd be beneficial in this case. Placeholder until OCML adds 545ec727ea7Spatrick // support. 546ec727ea7Spatrick if (__n == 0) 547ec727ea7Spatrick return y0f(__x); 548ec727ea7Spatrick if (__n == 1) 549ec727ea7Spatrick return y1f(__x); 550ec727ea7Spatrick 551ec727ea7Spatrick float __x0 = y0f(__x); 552ec727ea7Spatrick float __x1 = y1f(__x); 553ec727ea7Spatrick for (int __i = 1; __i < __n; ++__i) { 554ec727ea7Spatrick float __x2 = (2 * __i) / __x * __x1 - __x0; 555ec727ea7Spatrick __x0 = __x1; 556ec727ea7Spatrick __x1 = __x2; 557ec727ea7Spatrick } 558ec727ea7Spatrick 559ec727ea7Spatrick return __x1; 560ec727ea7Spatrick } 561ec727ea7Spatrick 562ec727ea7Spatrick // BEGIN INTRINSICS 563a9ac8606Spatrick 564ec727ea7Spatrick __DEVICE__ 565a9ac8606Spatrick float __cosf(float __x) { return __ocml_native_cos_f32(__x); } 566a9ac8606Spatrick 567ec727ea7Spatrick __DEVICE__ 568a9ac8606Spatrick float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); } 569a9ac8606Spatrick 570ec727ea7Spatrick __DEVICE__ 571a9ac8606Spatrick float __expf(float __x) { return __ocml_native_exp_f32(__x); } 572a9ac8606Spatrick 573ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS 574ec727ea7Spatrick __DEVICE__ 575a9ac8606Spatrick float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); } 576a9ac8606Spatrick __DEVICE__ 577a9ac8606Spatrick float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); } 578a9ac8606Spatrick __DEVICE__ 579a9ac8606Spatrick float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); } 580a9ac8606Spatrick __DEVICE__ 581a9ac8606Spatrick float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); } 582a9ac8606Spatrick #else 583a9ac8606Spatrick __DEVICE__ 584a9ac8606Spatrick float __fadd_rn(float __x, float __y) { return __x + __y; } 585ec727ea7Spatrick #endif 586a9ac8606Spatrick 587ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS 588ec727ea7Spatrick __DEVICE__ 589a9ac8606Spatrick float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); } 590ec727ea7Spatrick __DEVICE__ 591a9ac8606Spatrick float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); } 592ec727ea7Spatrick __DEVICE__ 593a9ac8606Spatrick float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); } 594a9ac8606Spatrick __DEVICE__ 595a9ac8606Spatrick float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); } 596a9ac8606Spatrick #else 597a9ac8606Spatrick __DEVICE__ 598a9ac8606Spatrick float __fdiv_rn(float __x, float __y) { return __x / __y; } 599ec727ea7Spatrick #endif 600a9ac8606Spatrick 601ec727ea7Spatrick __DEVICE__ 602a9ac8606Spatrick float __fdividef(float __x, float __y) { return __x / __y; } 603a9ac8606Spatrick 604ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS 605ec727ea7Spatrick __DEVICE__ 606a9ac8606Spatrick float __fmaf_rd(float __x, float __y, float __z) { 607ec727ea7Spatrick return __ocml_fma_rtn_f32(__x, __y, __z); 608ec727ea7Spatrick } 609ec727ea7Spatrick __DEVICE__ 610a9ac8606Spatrick float __fmaf_rn(float __x, float __y, float __z) { 611a9ac8606Spatrick return __ocml_fma_rte_f32(__x, __y, __z); 612ec727ea7Spatrick } 613ec727ea7Spatrick __DEVICE__ 614a9ac8606Spatrick float __fmaf_ru(float __x, float __y, float __z) { 615ec727ea7Spatrick return __ocml_fma_rtp_f32(__x, __y, __z); 616ec727ea7Spatrick } 617ec727ea7Spatrick __DEVICE__ 618a9ac8606Spatrick float __fmaf_rz(float __x, float __y, float __z) { 619ec727ea7Spatrick return __ocml_fma_rtz_f32(__x, __y, __z); 620ec727ea7Spatrick } 621a9ac8606Spatrick #else 622ec727ea7Spatrick __DEVICE__ 623a9ac8606Spatrick float __fmaf_rn(float __x, float __y, float __z) { 624a9ac8606Spatrick return __ocml_fma_f32(__x, __y, __z); 625ec727ea7Spatrick } 626ec727ea7Spatrick #endif 627a9ac8606Spatrick 628ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS 629ec727ea7Spatrick __DEVICE__ 630a9ac8606Spatrick float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); } 631ec727ea7Spatrick __DEVICE__ 632a9ac8606Spatrick float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); } 633ec727ea7Spatrick __DEVICE__ 634a9ac8606Spatrick float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); } 635a9ac8606Spatrick __DEVICE__ 636a9ac8606Spatrick float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); } 637a9ac8606Spatrick #else 638a9ac8606Spatrick __DEVICE__ 639a9ac8606Spatrick float __fmul_rn(float __x, float __y) { return __x * __y; } 640ec727ea7Spatrick #endif 641a9ac8606Spatrick 642ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS 643ec727ea7Spatrick __DEVICE__ 644a9ac8606Spatrick float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); } 645ec727ea7Spatrick __DEVICE__ 646a9ac8606Spatrick float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); } 647a9ac8606Spatrick __DEVICE__ 648a9ac8606Spatrick float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); } 649a9ac8606Spatrick __DEVICE__ 650a9ac8606Spatrick float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); } 651a9ac8606Spatrick #else 652a9ac8606Spatrick __DEVICE__ 653a9ac8606Spatrick float __frcp_rn(float __x) { return 1.0f / __x; } 654ec727ea7Spatrick #endif 655a9ac8606Spatrick 656ec727ea7Spatrick __DEVICE__ 657a9ac8606Spatrick float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); } 658a9ac8606Spatrick 659ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS 660ec727ea7Spatrick __DEVICE__ 661a9ac8606Spatrick float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); } 662ec727ea7Spatrick __DEVICE__ 663a9ac8606Spatrick float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); } 664a9ac8606Spatrick __DEVICE__ 665a9ac8606Spatrick float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); } 666a9ac8606Spatrick __DEVICE__ 667a9ac8606Spatrick float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); } 668a9ac8606Spatrick #else 669a9ac8606Spatrick __DEVICE__ 670a9ac8606Spatrick float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); } 671a9ac8606Spatrick #endif 672a9ac8606Spatrick 673ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS 674ec727ea7Spatrick __DEVICE__ 675a9ac8606Spatrick float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); } 676ec727ea7Spatrick __DEVICE__ 677a9ac8606Spatrick float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); } 678ec727ea7Spatrick __DEVICE__ 679a9ac8606Spatrick float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); } 680a9ac8606Spatrick __DEVICE__ 681a9ac8606Spatrick float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); } 682a9ac8606Spatrick #else 683a9ac8606Spatrick __DEVICE__ 684a9ac8606Spatrick float __fsub_rn(float __x, float __y) { return __x - __y; } 685ec727ea7Spatrick #endif 686a9ac8606Spatrick 687ec727ea7Spatrick __DEVICE__ 688a9ac8606Spatrick float __log10f(float __x) { return __ocml_native_log10_f32(__x); } 689a9ac8606Spatrick 690ec727ea7Spatrick __DEVICE__ 691a9ac8606Spatrick float __log2f(float __x) { return __ocml_native_log2_f32(__x); } 692a9ac8606Spatrick 693ec727ea7Spatrick __DEVICE__ 694a9ac8606Spatrick float __logf(float __x) { return __ocml_native_log_f32(__x); } 695a9ac8606Spatrick 696ec727ea7Spatrick __DEVICE__ 697a9ac8606Spatrick float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } 698a9ac8606Spatrick 699ec727ea7Spatrick __DEVICE__ 700a9ac8606Spatrick float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); } 701a9ac8606Spatrick 702ec727ea7Spatrick __DEVICE__ 703a9ac8606Spatrick void __sincosf(float __x, float *__sinptr, float *__cosptr) { 704ec727ea7Spatrick *__sinptr = __ocml_native_sin_f32(__x); 705ec727ea7Spatrick *__cosptr = __ocml_native_cos_f32(__x); 706ec727ea7Spatrick } 707a9ac8606Spatrick 708ec727ea7Spatrick __DEVICE__ 709a9ac8606Spatrick float __sinf(float __x) { return __ocml_native_sin_f32(__x); } 710a9ac8606Spatrick 711ec727ea7Spatrick __DEVICE__ 712a9ac8606Spatrick float __tanf(float __x) { return __ocml_tan_f32(__x); } 713ec727ea7Spatrick // END INTRINSICS 714ec727ea7Spatrick // END FLOAT 715ec727ea7Spatrick 716ec727ea7Spatrick // BEGIN DOUBLE 717ec727ea7Spatrick __DEVICE__ 718a9ac8606Spatrick double acos(double __x) { return __ocml_acos_f64(__x); } 719a9ac8606Spatrick 720ec727ea7Spatrick __DEVICE__ 721a9ac8606Spatrick double acosh(double __x) { return __ocml_acosh_f64(__x); } 722a9ac8606Spatrick 723ec727ea7Spatrick __DEVICE__ 724a9ac8606Spatrick double asin(double __x) { return __ocml_asin_f64(__x); } 725a9ac8606Spatrick 726ec727ea7Spatrick __DEVICE__ 727a9ac8606Spatrick double asinh(double __x) { return __ocml_asinh_f64(__x); } 728a9ac8606Spatrick 729ec727ea7Spatrick __DEVICE__ 730a9ac8606Spatrick double atan(double __x) { return __ocml_atan_f64(__x); } 731a9ac8606Spatrick 732ec727ea7Spatrick __DEVICE__ 733a9ac8606Spatrick double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); } 734a9ac8606Spatrick 735ec727ea7Spatrick __DEVICE__ 736a9ac8606Spatrick double atanh(double __x) { return __ocml_atanh_f64(__x); } 737a9ac8606Spatrick 738ec727ea7Spatrick __DEVICE__ 739a9ac8606Spatrick double cbrt(double __x) { return __ocml_cbrt_f64(__x); } 740a9ac8606Spatrick 741ec727ea7Spatrick __DEVICE__ 742a9ac8606Spatrick double ceil(double __x) { return __ocml_ceil_f64(__x); } 743a9ac8606Spatrick 744ec727ea7Spatrick __DEVICE__ 745a9ac8606Spatrick double copysign(double __x, double __y) { 746ec727ea7Spatrick return __ocml_copysign_f64(__x, __y); 747ec727ea7Spatrick } 748a9ac8606Spatrick 749ec727ea7Spatrick __DEVICE__ 750a9ac8606Spatrick double cos(double __x) { return __ocml_cos_f64(__x); } 751a9ac8606Spatrick 752ec727ea7Spatrick __DEVICE__ 753a9ac8606Spatrick double cosh(double __x) { return __ocml_cosh_f64(__x); } 754a9ac8606Spatrick 755ec727ea7Spatrick __DEVICE__ 756a9ac8606Spatrick double cospi(double __x) { return __ocml_cospi_f64(__x); } 757a9ac8606Spatrick 758ec727ea7Spatrick __DEVICE__ 759a9ac8606Spatrick double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); } 760a9ac8606Spatrick 761ec727ea7Spatrick __DEVICE__ 762a9ac8606Spatrick double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); } 763a9ac8606Spatrick 764ec727ea7Spatrick __DEVICE__ 765a9ac8606Spatrick double erf(double __x) { return __ocml_erf_f64(__x); } 766a9ac8606Spatrick 767ec727ea7Spatrick __DEVICE__ 768a9ac8606Spatrick double erfc(double __x) { return __ocml_erfc_f64(__x); } 769a9ac8606Spatrick 770ec727ea7Spatrick __DEVICE__ 771a9ac8606Spatrick double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); } 772a9ac8606Spatrick 773ec727ea7Spatrick __DEVICE__ 774a9ac8606Spatrick double erfcx(double __x) { return __ocml_erfcx_f64(__x); } 775a9ac8606Spatrick 776ec727ea7Spatrick __DEVICE__ 777a9ac8606Spatrick double erfinv(double __x) { return __ocml_erfinv_f64(__x); } 778a9ac8606Spatrick 779ec727ea7Spatrick __DEVICE__ 780a9ac8606Spatrick double exp(double __x) { return __ocml_exp_f64(__x); } 781a9ac8606Spatrick 782ec727ea7Spatrick __DEVICE__ 783a9ac8606Spatrick double exp10(double __x) { return __ocml_exp10_f64(__x); } 784a9ac8606Spatrick 785ec727ea7Spatrick __DEVICE__ 786a9ac8606Spatrick double exp2(double __x) { return __ocml_exp2_f64(__x); } 787a9ac8606Spatrick 788ec727ea7Spatrick __DEVICE__ 789a9ac8606Spatrick double expm1(double __x) { return __ocml_expm1_f64(__x); } 790a9ac8606Spatrick 791ec727ea7Spatrick __DEVICE__ 792*12c85518Srobert double fabs(double __x) { return __builtin_fabs(__x); } 793a9ac8606Spatrick 794ec727ea7Spatrick __DEVICE__ 795a9ac8606Spatrick double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); } 796a9ac8606Spatrick 797ec727ea7Spatrick __DEVICE__ 798a9ac8606Spatrick double floor(double __x) { return __ocml_floor_f64(__x); } 799a9ac8606Spatrick 800ec727ea7Spatrick __DEVICE__ 801a9ac8606Spatrick double fma(double __x, double __y, double __z) { 802ec727ea7Spatrick return __ocml_fma_f64(__x, __y, __z); 803ec727ea7Spatrick } 804a9ac8606Spatrick 805ec727ea7Spatrick __DEVICE__ 806a9ac8606Spatrick double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); } 807a9ac8606Spatrick 808ec727ea7Spatrick __DEVICE__ 809a9ac8606Spatrick double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); } 810a9ac8606Spatrick 811ec727ea7Spatrick __DEVICE__ 812a9ac8606Spatrick double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); } 813a9ac8606Spatrick 814ec727ea7Spatrick __DEVICE__ 815a9ac8606Spatrick double frexp(double __x, int *__nptr) { 816ec727ea7Spatrick int __tmp; 817a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__ 818a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) 819a9ac8606Spatrick #endif 820ec727ea7Spatrick double __r = 821ec727ea7Spatrick __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp); 822ec727ea7Spatrick *__nptr = __tmp; 823ec727ea7Spatrick return __r; 824ec727ea7Spatrick } 825a9ac8606Spatrick 826ec727ea7Spatrick __DEVICE__ 827a9ac8606Spatrick double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); } 828a9ac8606Spatrick 829ec727ea7Spatrick __DEVICE__ 830a9ac8606Spatrick int ilogb(double __x) { return __ocml_ilogb_f64(__x); } 831a9ac8606Spatrick 832ec727ea7Spatrick __DEVICE__ 833a9ac8606Spatrick __RETURN_TYPE __finite(double __x) { return __ocml_isfinite_f64(__x); } 834a9ac8606Spatrick 835ec727ea7Spatrick __DEVICE__ 836a9ac8606Spatrick __RETURN_TYPE __isinf(double __x) { return __ocml_isinf_f64(__x); } 837a9ac8606Spatrick 838ec727ea7Spatrick __DEVICE__ 839a9ac8606Spatrick __RETURN_TYPE __isnan(double __x) { return __ocml_isnan_f64(__x); } 840a9ac8606Spatrick 841ec727ea7Spatrick __DEVICE__ 842a9ac8606Spatrick double j0(double __x) { return __ocml_j0_f64(__x); } 843a9ac8606Spatrick 844ec727ea7Spatrick __DEVICE__ 845a9ac8606Spatrick double j1(double __x) { return __ocml_j1_f64(__x); } 846a9ac8606Spatrick 847ec727ea7Spatrick __DEVICE__ 848a9ac8606Spatrick double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication 849ec727ea7Spatrick // and the Miller & Brown algorithm 850ec727ea7Spatrick // for linear recurrences to get O(log n) steps, but it's unclear if 851ec727ea7Spatrick // it'd be beneficial in this case. Placeholder until OCML adds 852ec727ea7Spatrick // support. 853ec727ea7Spatrick if (__n == 0) 854a9ac8606Spatrick return j0(__x); 855ec727ea7Spatrick if (__n == 1) 856a9ac8606Spatrick return j1(__x); 857ec727ea7Spatrick 858a9ac8606Spatrick double __x0 = j0(__x); 859a9ac8606Spatrick double __x1 = j1(__x); 860ec727ea7Spatrick for (int __i = 1; __i < __n; ++__i) { 861ec727ea7Spatrick double __x2 = (2 * __i) / __x * __x1 - __x0; 862ec727ea7Spatrick __x0 = __x1; 863ec727ea7Spatrick __x1 = __x2; 864ec727ea7Spatrick } 865ec727ea7Spatrick return __x1; 866ec727ea7Spatrick } 867a9ac8606Spatrick 868ec727ea7Spatrick __DEVICE__ 869a9ac8606Spatrick double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); } 870a9ac8606Spatrick 871ec727ea7Spatrick __DEVICE__ 872a9ac8606Spatrick double lgamma(double __x) { return __ocml_lgamma_f64(__x); } 873a9ac8606Spatrick 874ec727ea7Spatrick __DEVICE__ 875a9ac8606Spatrick long long int llrint(double __x) { return __ocml_rint_f64(__x); } 876a9ac8606Spatrick 877ec727ea7Spatrick __DEVICE__ 878a9ac8606Spatrick long long int llround(double __x) { return __ocml_round_f64(__x); } 879a9ac8606Spatrick 880ec727ea7Spatrick __DEVICE__ 881a9ac8606Spatrick double log(double __x) { return __ocml_log_f64(__x); } 882a9ac8606Spatrick 883ec727ea7Spatrick __DEVICE__ 884a9ac8606Spatrick double log10(double __x) { return __ocml_log10_f64(__x); } 885a9ac8606Spatrick 886ec727ea7Spatrick __DEVICE__ 887a9ac8606Spatrick double log1p(double __x) { return __ocml_log1p_f64(__x); } 888a9ac8606Spatrick 889ec727ea7Spatrick __DEVICE__ 890a9ac8606Spatrick double log2(double __x) { return __ocml_log2_f64(__x); } 891a9ac8606Spatrick 892ec727ea7Spatrick __DEVICE__ 893a9ac8606Spatrick double logb(double __x) { return __ocml_logb_f64(__x); } 894a9ac8606Spatrick 895ec727ea7Spatrick __DEVICE__ 896a9ac8606Spatrick long int lrint(double __x) { return __ocml_rint_f64(__x); } 897a9ac8606Spatrick 898ec727ea7Spatrick __DEVICE__ 899a9ac8606Spatrick long int lround(double __x) { return __ocml_round_f64(__x); } 900a9ac8606Spatrick 901ec727ea7Spatrick __DEVICE__ 902a9ac8606Spatrick double modf(double __x, double *__iptr) { 903ec727ea7Spatrick double __tmp; 904a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__ 905a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) 906a9ac8606Spatrick #endif 907ec727ea7Spatrick double __r = 908ec727ea7Spatrick __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp); 909ec727ea7Spatrick *__iptr = __tmp; 910ec727ea7Spatrick 911ec727ea7Spatrick return __r; 912ec727ea7Spatrick } 913a9ac8606Spatrick 914ec727ea7Spatrick __DEVICE__ 915a9ac8606Spatrick double nan(const char *__tagp) { 916ec727ea7Spatrick #if !_WIN32 917ec727ea7Spatrick union { 918ec727ea7Spatrick double val; 919ec727ea7Spatrick struct ieee_double { 920ec727ea7Spatrick uint64_t mantissa : 51; 921ec727ea7Spatrick uint32_t quiet : 1; 922ec727ea7Spatrick uint32_t exponent : 11; 923ec727ea7Spatrick uint32_t sign : 1; 924ec727ea7Spatrick } bits; 925ec727ea7Spatrick } __tmp; 926a9ac8606Spatrick __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits)); 927ec727ea7Spatrick 928ec727ea7Spatrick __tmp.bits.sign = 0u; 929ec727ea7Spatrick __tmp.bits.exponent = ~0u; 930ec727ea7Spatrick __tmp.bits.quiet = 1u; 931ec727ea7Spatrick __tmp.bits.mantissa = __make_mantissa(__tagp); 932ec727ea7Spatrick 933ec727ea7Spatrick return __tmp.val; 934ec727ea7Spatrick #else 935a9ac8606Spatrick __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double)); 936a9ac8606Spatrick uint64_t __val = __make_mantissa(__tagp); 937a9ac8606Spatrick __val |= 0xFFF << 51; 938a9ac8606Spatrick return *reinterpret_cast<double *>(&__val); 939ec727ea7Spatrick #endif 940ec727ea7Spatrick } 941a9ac8606Spatrick 942ec727ea7Spatrick __DEVICE__ 943a9ac8606Spatrick double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); } 944a9ac8606Spatrick 945ec727ea7Spatrick __DEVICE__ 946a9ac8606Spatrick double nextafter(double __x, double __y) { 947ec727ea7Spatrick return __ocml_nextafter_f64(__x, __y); 948ec727ea7Spatrick } 949a9ac8606Spatrick 950ec727ea7Spatrick __DEVICE__ 951a9ac8606Spatrick double norm(int __dim, 952ec727ea7Spatrick const double *__a) { // TODO: placeholder until OCML adds support. 953ec727ea7Spatrick double __r = 0; 954ec727ea7Spatrick while (__dim--) { 955ec727ea7Spatrick __r += __a[0] * __a[0]; 956ec727ea7Spatrick ++__a; 957ec727ea7Spatrick } 958ec727ea7Spatrick 959ec727ea7Spatrick return __ocml_sqrt_f64(__r); 960ec727ea7Spatrick } 961a9ac8606Spatrick 962ec727ea7Spatrick __DEVICE__ 963a9ac8606Spatrick double norm3d(double __x, double __y, double __z) { 964ec727ea7Spatrick return __ocml_len3_f64(__x, __y, __z); 965ec727ea7Spatrick } 966a9ac8606Spatrick 967ec727ea7Spatrick __DEVICE__ 968a9ac8606Spatrick double norm4d(double __x, double __y, double __z, double __w) { 969ec727ea7Spatrick return __ocml_len4_f64(__x, __y, __z, __w); 970ec727ea7Spatrick } 971a9ac8606Spatrick 972ec727ea7Spatrick __DEVICE__ 973a9ac8606Spatrick double normcdf(double __x) { return __ocml_ncdf_f64(__x); } 974a9ac8606Spatrick 975ec727ea7Spatrick __DEVICE__ 976a9ac8606Spatrick double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); } 977a9ac8606Spatrick 978ec727ea7Spatrick __DEVICE__ 979a9ac8606Spatrick double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); } 980a9ac8606Spatrick 981ec727ea7Spatrick __DEVICE__ 982a9ac8606Spatrick double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); } 983a9ac8606Spatrick 984ec727ea7Spatrick __DEVICE__ 985a9ac8606Spatrick double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); } 986a9ac8606Spatrick 987a9ac8606Spatrick __DEVICE__ 988a9ac8606Spatrick double remainder(double __x, double __y) { 989ec727ea7Spatrick return __ocml_remainder_f64(__x, __y); 990ec727ea7Spatrick } 991a9ac8606Spatrick 992ec727ea7Spatrick __DEVICE__ 993a9ac8606Spatrick double remquo(double __x, double __y, int *__quo) { 994ec727ea7Spatrick int __tmp; 995a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__ 996a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) 997a9ac8606Spatrick #endif 998ec727ea7Spatrick double __r = __ocml_remquo_f64( 999ec727ea7Spatrick __x, __y, (__attribute__((address_space(5))) int *)&__tmp); 1000ec727ea7Spatrick *__quo = __tmp; 1001ec727ea7Spatrick 1002ec727ea7Spatrick return __r; 1003ec727ea7Spatrick } 1004a9ac8606Spatrick 1005ec727ea7Spatrick __DEVICE__ 1006a9ac8606Spatrick double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); } 1007a9ac8606Spatrick 1008ec727ea7Spatrick __DEVICE__ 1009a9ac8606Spatrick double rint(double __x) { return __ocml_rint_f64(__x); } 1010a9ac8606Spatrick 1011ec727ea7Spatrick __DEVICE__ 1012a9ac8606Spatrick double rnorm(int __dim, 1013ec727ea7Spatrick const double *__a) { // TODO: placeholder until OCML adds support. 1014ec727ea7Spatrick double __r = 0; 1015ec727ea7Spatrick while (__dim--) { 1016ec727ea7Spatrick __r += __a[0] * __a[0]; 1017ec727ea7Spatrick ++__a; 1018ec727ea7Spatrick } 1019ec727ea7Spatrick 1020ec727ea7Spatrick return __ocml_rsqrt_f64(__r); 1021ec727ea7Spatrick } 1022a9ac8606Spatrick 1023ec727ea7Spatrick __DEVICE__ 1024a9ac8606Spatrick double rnorm3d(double __x, double __y, double __z) { 1025ec727ea7Spatrick return __ocml_rlen3_f64(__x, __y, __z); 1026ec727ea7Spatrick } 1027a9ac8606Spatrick 1028ec727ea7Spatrick __DEVICE__ 1029a9ac8606Spatrick double rnorm4d(double __x, double __y, double __z, double __w) { 1030ec727ea7Spatrick return __ocml_rlen4_f64(__x, __y, __z, __w); 1031ec727ea7Spatrick } 1032a9ac8606Spatrick 1033ec727ea7Spatrick __DEVICE__ 1034a9ac8606Spatrick double round(double __x) { return __ocml_round_f64(__x); } 1035a9ac8606Spatrick 1036ec727ea7Spatrick __DEVICE__ 1037a9ac8606Spatrick double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); } 1038a9ac8606Spatrick 1039ec727ea7Spatrick __DEVICE__ 1040a9ac8606Spatrick double scalbln(double __x, long int __n) { 1041ec727ea7Spatrick return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n) 1042ec727ea7Spatrick : __ocml_scalb_f64(__x, __n); 1043ec727ea7Spatrick } 1044ec727ea7Spatrick __DEVICE__ 1045a9ac8606Spatrick double scalbn(double __x, int __n) { return __ocml_scalbn_f64(__x, __n); } 1046a9ac8606Spatrick 1047ec727ea7Spatrick __DEVICE__ 1048a9ac8606Spatrick __RETURN_TYPE __signbit(double __x) { return __ocml_signbit_f64(__x); } 1049a9ac8606Spatrick 1050ec727ea7Spatrick __DEVICE__ 1051a9ac8606Spatrick double sin(double __x) { return __ocml_sin_f64(__x); } 1052a9ac8606Spatrick 1053ec727ea7Spatrick __DEVICE__ 1054a9ac8606Spatrick void sincos(double __x, double *__sinptr, double *__cosptr) { 1055ec727ea7Spatrick double __tmp; 1056a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__ 1057a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) 1058a9ac8606Spatrick #endif 1059ec727ea7Spatrick *__sinptr = __ocml_sincos_f64( 1060ec727ea7Spatrick __x, (__attribute__((address_space(5))) double *)&__tmp); 1061ec727ea7Spatrick *__cosptr = __tmp; 1062ec727ea7Spatrick } 1063a9ac8606Spatrick 1064ec727ea7Spatrick __DEVICE__ 1065a9ac8606Spatrick void sincospi(double __x, double *__sinptr, double *__cosptr) { 1066ec727ea7Spatrick double __tmp; 1067a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__ 1068a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) 1069a9ac8606Spatrick #endif 1070ec727ea7Spatrick *__sinptr = __ocml_sincospi_f64( 1071ec727ea7Spatrick __x, (__attribute__((address_space(5))) double *)&__tmp); 1072ec727ea7Spatrick *__cosptr = __tmp; 1073ec727ea7Spatrick } 1074a9ac8606Spatrick 1075ec727ea7Spatrick __DEVICE__ 1076a9ac8606Spatrick double sinh(double __x) { return __ocml_sinh_f64(__x); } 1077a9ac8606Spatrick 1078ec727ea7Spatrick __DEVICE__ 1079a9ac8606Spatrick double sinpi(double __x) { return __ocml_sinpi_f64(__x); } 1080a9ac8606Spatrick 1081ec727ea7Spatrick __DEVICE__ 1082a9ac8606Spatrick double sqrt(double __x) { return __ocml_sqrt_f64(__x); } 1083a9ac8606Spatrick 1084ec727ea7Spatrick __DEVICE__ 1085a9ac8606Spatrick double tan(double __x) { return __ocml_tan_f64(__x); } 1086a9ac8606Spatrick 1087ec727ea7Spatrick __DEVICE__ 1088a9ac8606Spatrick double tanh(double __x) { return __ocml_tanh_f64(__x); } 1089a9ac8606Spatrick 1090ec727ea7Spatrick __DEVICE__ 1091a9ac8606Spatrick double tgamma(double __x) { return __ocml_tgamma_f64(__x); } 1092a9ac8606Spatrick 1093ec727ea7Spatrick __DEVICE__ 1094a9ac8606Spatrick double trunc(double __x) { return __ocml_trunc_f64(__x); } 1095a9ac8606Spatrick 1096ec727ea7Spatrick __DEVICE__ 1097a9ac8606Spatrick double y0(double __x) { return __ocml_y0_f64(__x); } 1098a9ac8606Spatrick 1099ec727ea7Spatrick __DEVICE__ 1100a9ac8606Spatrick double y1(double __x) { return __ocml_y1_f64(__x); } 1101a9ac8606Spatrick 1102ec727ea7Spatrick __DEVICE__ 1103a9ac8606Spatrick double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication 1104ec727ea7Spatrick // and the Miller & Brown algorithm 1105ec727ea7Spatrick // for linear recurrences to get O(log n) steps, but it's unclear if 1106ec727ea7Spatrick // it'd be beneficial in this case. Placeholder until OCML adds 1107ec727ea7Spatrick // support. 1108ec727ea7Spatrick if (__n == 0) 1109a9ac8606Spatrick return y0(__x); 1110ec727ea7Spatrick if (__n == 1) 1111a9ac8606Spatrick return y1(__x); 1112ec727ea7Spatrick 1113a9ac8606Spatrick double __x0 = y0(__x); 1114a9ac8606Spatrick double __x1 = y1(__x); 1115ec727ea7Spatrick for (int __i = 1; __i < __n; ++__i) { 1116ec727ea7Spatrick double __x2 = (2 * __i) / __x * __x1 - __x0; 1117ec727ea7Spatrick __x0 = __x1; 1118ec727ea7Spatrick __x1 = __x2; 1119ec727ea7Spatrick } 1120ec727ea7Spatrick 1121ec727ea7Spatrick return __x1; 1122ec727ea7Spatrick } 1123ec727ea7Spatrick 1124ec727ea7Spatrick // BEGIN INTRINSICS 1125ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS 1126ec727ea7Spatrick __DEVICE__ 1127a9ac8606Spatrick double __dadd_rd(double __x, double __y) { 1128ec727ea7Spatrick return __ocml_add_rtn_f64(__x, __y); 1129ec727ea7Spatrick } 1130ec727ea7Spatrick __DEVICE__ 1131a9ac8606Spatrick double __dadd_rn(double __x, double __y) { 1132a9ac8606Spatrick return __ocml_add_rte_f64(__x, __y); 1133a9ac8606Spatrick } 1134ec727ea7Spatrick __DEVICE__ 1135a9ac8606Spatrick double __dadd_ru(double __x, double __y) { 1136ec727ea7Spatrick return __ocml_add_rtp_f64(__x, __y); 1137ec727ea7Spatrick } 1138ec727ea7Spatrick __DEVICE__ 1139a9ac8606Spatrick double __dadd_rz(double __x, double __y) { 1140ec727ea7Spatrick return __ocml_add_rtz_f64(__x, __y); 1141ec727ea7Spatrick } 1142a9ac8606Spatrick #else 1143ec727ea7Spatrick __DEVICE__ 1144a9ac8606Spatrick double __dadd_rn(double __x, double __y) { return __x + __y; } 1145ec727ea7Spatrick #endif 1146a9ac8606Spatrick 1147ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS 1148ec727ea7Spatrick __DEVICE__ 1149a9ac8606Spatrick double __ddiv_rd(double __x, double __y) { 1150a9ac8606Spatrick return __ocml_div_rtn_f64(__x, __y); 1151a9ac8606Spatrick } 1152a9ac8606Spatrick __DEVICE__ 1153a9ac8606Spatrick double __ddiv_rn(double __x, double __y) { 1154a9ac8606Spatrick return __ocml_div_rte_f64(__x, __y); 1155a9ac8606Spatrick } 1156a9ac8606Spatrick __DEVICE__ 1157a9ac8606Spatrick double __ddiv_ru(double __x, double __y) { 1158ec727ea7Spatrick return __ocml_div_rtp_f64(__x, __y); 1159ec727ea7Spatrick } 1160ec727ea7Spatrick __DEVICE__ 1161a9ac8606Spatrick double __ddiv_rz(double __x, double __y) { 1162ec727ea7Spatrick return __ocml_div_rtz_f64(__x, __y); 1163ec727ea7Spatrick } 1164a9ac8606Spatrick #else 1165ec727ea7Spatrick __DEVICE__ 1166a9ac8606Spatrick double __ddiv_rn(double __x, double __y) { return __x / __y; } 1167ec727ea7Spatrick #endif 1168a9ac8606Spatrick 1169ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS 1170ec727ea7Spatrick __DEVICE__ 1171a9ac8606Spatrick double __dmul_rd(double __x, double __y) { 1172a9ac8606Spatrick return __ocml_mul_rtn_f64(__x, __y); 1173a9ac8606Spatrick } 1174a9ac8606Spatrick __DEVICE__ 1175a9ac8606Spatrick double __dmul_rn(double __x, double __y) { 1176a9ac8606Spatrick return __ocml_mul_rte_f64(__x, __y); 1177a9ac8606Spatrick } 1178a9ac8606Spatrick __DEVICE__ 1179a9ac8606Spatrick double __dmul_ru(double __x, double __y) { 1180ec727ea7Spatrick return __ocml_mul_rtp_f64(__x, __y); 1181ec727ea7Spatrick } 1182ec727ea7Spatrick __DEVICE__ 1183a9ac8606Spatrick double __dmul_rz(double __x, double __y) { 1184ec727ea7Spatrick return __ocml_mul_rtz_f64(__x, __y); 1185ec727ea7Spatrick } 1186a9ac8606Spatrick #else 1187ec727ea7Spatrick __DEVICE__ 1188a9ac8606Spatrick double __dmul_rn(double __x, double __y) { return __x * __y; } 1189ec727ea7Spatrick #endif 1190a9ac8606Spatrick 1191ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS 1192ec727ea7Spatrick __DEVICE__ 1193a9ac8606Spatrick double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); } 1194ec727ea7Spatrick __DEVICE__ 1195a9ac8606Spatrick double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); } 1196ec727ea7Spatrick __DEVICE__ 1197a9ac8606Spatrick double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); } 1198a9ac8606Spatrick __DEVICE__ 1199a9ac8606Spatrick double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); } 1200a9ac8606Spatrick #else 1201a9ac8606Spatrick __DEVICE__ 1202a9ac8606Spatrick double __drcp_rn(double __x) { return 1.0 / __x; } 1203ec727ea7Spatrick #endif 1204a9ac8606Spatrick 1205ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS 1206ec727ea7Spatrick __DEVICE__ 1207a9ac8606Spatrick double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); } 1208ec727ea7Spatrick __DEVICE__ 1209a9ac8606Spatrick double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); } 1210ec727ea7Spatrick __DEVICE__ 1211a9ac8606Spatrick double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); } 1212a9ac8606Spatrick __DEVICE__ 1213a9ac8606Spatrick double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); } 1214a9ac8606Spatrick #else 1215a9ac8606Spatrick __DEVICE__ 1216a9ac8606Spatrick double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); } 1217a9ac8606Spatrick #endif 1218a9ac8606Spatrick 1219a9ac8606Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS 1220a9ac8606Spatrick __DEVICE__ 1221a9ac8606Spatrick double __dsub_rd(double __x, double __y) { 1222ec727ea7Spatrick return __ocml_sub_rtn_f64(__x, __y); 1223ec727ea7Spatrick } 1224ec727ea7Spatrick __DEVICE__ 1225a9ac8606Spatrick double __dsub_rn(double __x, double __y) { 1226a9ac8606Spatrick return __ocml_sub_rte_f64(__x, __y); 1227a9ac8606Spatrick } 1228ec727ea7Spatrick __DEVICE__ 1229a9ac8606Spatrick double __dsub_ru(double __x, double __y) { 1230ec727ea7Spatrick return __ocml_sub_rtp_f64(__x, __y); 1231ec727ea7Spatrick } 1232ec727ea7Spatrick __DEVICE__ 1233a9ac8606Spatrick double __dsub_rz(double __x, double __y) { 1234ec727ea7Spatrick return __ocml_sub_rtz_f64(__x, __y); 1235ec727ea7Spatrick } 1236a9ac8606Spatrick #else 1237ec727ea7Spatrick __DEVICE__ 1238a9ac8606Spatrick double __dsub_rn(double __x, double __y) { return __x - __y; } 1239ec727ea7Spatrick #endif 1240a9ac8606Spatrick 1241ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS 1242ec727ea7Spatrick __DEVICE__ 1243a9ac8606Spatrick double __fma_rd(double __x, double __y, double __z) { 1244a9ac8606Spatrick return __ocml_fma_rtn_f64(__x, __y, __z); 1245a9ac8606Spatrick } 1246a9ac8606Spatrick __DEVICE__ 1247a9ac8606Spatrick double __fma_rn(double __x, double __y, double __z) { 1248a9ac8606Spatrick return __ocml_fma_rte_f64(__x, __y, __z); 1249a9ac8606Spatrick } 1250a9ac8606Spatrick __DEVICE__ 1251a9ac8606Spatrick double __fma_ru(double __x, double __y, double __z) { 1252ec727ea7Spatrick return __ocml_fma_rtp_f64(__x, __y, __z); 1253ec727ea7Spatrick } 1254ec727ea7Spatrick __DEVICE__ 1255a9ac8606Spatrick double __fma_rz(double __x, double __y, double __z) { 1256ec727ea7Spatrick return __ocml_fma_rtz_f64(__x, __y, __z); 1257ec727ea7Spatrick } 1258a9ac8606Spatrick #else 1259a9ac8606Spatrick __DEVICE__ 1260a9ac8606Spatrick double __fma_rn(double __x, double __y, double __z) { 1261a9ac8606Spatrick return __ocml_fma_f64(__x, __y, __z); 1262a9ac8606Spatrick } 1263ec727ea7Spatrick #endif 1264ec727ea7Spatrick // END INTRINSICS 1265ec727ea7Spatrick // END DOUBLE 1266ec727ea7Spatrick 1267a9ac8606Spatrick // C only macros 1268a9ac8606Spatrick #if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L 1269a9ac8606Spatrick #define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x) 1270a9ac8606Spatrick #define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x) 1271a9ac8606Spatrick #define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x) 1272a9ac8606Spatrick #define signbit(__x) \ 1273a9ac8606Spatrick _Generic((__x), float : __signbitf, double : __signbit)(__x) 1274a9ac8606Spatrick #endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L 1275ec727ea7Spatrick 1276ec727ea7Spatrick #if defined(__cplusplus) 1277a9ac8606Spatrick template <class T> __DEVICE__ T min(T __arg1, T __arg2) { 1278ec727ea7Spatrick return (__arg1 < __arg2) ? __arg1 : __arg2; 1279ec727ea7Spatrick } 1280ec727ea7Spatrick 1281a9ac8606Spatrick template <class T> __DEVICE__ T max(T __arg1, T __arg2) { 1282ec727ea7Spatrick return (__arg1 > __arg2) ? __arg1 : __arg2; 1283ec727ea7Spatrick } 1284ec727ea7Spatrick 1285a9ac8606Spatrick __DEVICE__ int min(int __arg1, int __arg2) { 1286ec727ea7Spatrick return (__arg1 < __arg2) ? __arg1 : __arg2; 1287ec727ea7Spatrick } 1288a9ac8606Spatrick __DEVICE__ int max(int __arg1, int __arg2) { 1289ec727ea7Spatrick return (__arg1 > __arg2) ? __arg1 : __arg2; 1290ec727ea7Spatrick } 1291ec727ea7Spatrick 1292ec727ea7Spatrick __DEVICE__ 1293a9ac8606Spatrick float max(float __x, float __y) { return fmaxf(__x, __y); } 1294ec727ea7Spatrick 1295ec727ea7Spatrick __DEVICE__ 1296a9ac8606Spatrick double max(double __x, double __y) { return fmax(__x, __y); } 1297ec727ea7Spatrick 1298ec727ea7Spatrick __DEVICE__ 1299a9ac8606Spatrick float min(float __x, float __y) { return fminf(__x, __y); } 1300ec727ea7Spatrick 1301ec727ea7Spatrick __DEVICE__ 1302a9ac8606Spatrick double min(double __x, double __y) { return fmin(__x, __y); } 1303ec727ea7Spatrick 1304a9ac8606Spatrick #if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) 1305ec727ea7Spatrick __host__ inline static int min(int __arg1, int __arg2) { 1306ec727ea7Spatrick return std::min(__arg1, __arg2); 1307ec727ea7Spatrick } 1308ec727ea7Spatrick 1309ec727ea7Spatrick __host__ inline static int max(int __arg1, int __arg2) { 1310ec727ea7Spatrick return std::max(__arg1, __arg2); 1311ec727ea7Spatrick } 1312a9ac8606Spatrick #endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) 1313a9ac8606Spatrick #endif 1314ec727ea7Spatrick 1315ec727ea7Spatrick #pragma pop_macro("__DEVICE__") 1316ec727ea7Spatrick #pragma pop_macro("__RETURN_TYPE") 1317ec727ea7Spatrick 1318ec727ea7Spatrick #endif // __CLANG_HIP_MATH_H__ 1319