1a9ac8606Spatrick /*===---- __clang_hip_math.h - Device-side HIP math support ----------------===
2ec727ea7Spatrick  *
3ec727ea7Spatrick  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4ec727ea7Spatrick  * See https://llvm.org/LICENSE.txt for license information.
5ec727ea7Spatrick  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ec727ea7Spatrick  *
7ec727ea7Spatrick  *===-----------------------------------------------------------------------===
8ec727ea7Spatrick  */
9ec727ea7Spatrick #ifndef __CLANG_HIP_MATH_H__
10ec727ea7Spatrick #define __CLANG_HIP_MATH_H__
11ec727ea7Spatrick 
12a9ac8606Spatrick #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
13a9ac8606Spatrick #error "This file is for HIP and OpenMP AMDGCN device compilation only."
14a9ac8606Spatrick #endif
15a9ac8606Spatrick 
16a9ac8606Spatrick #if !defined(__HIPCC_RTC__)
17a9ac8606Spatrick #if defined(__cplusplus)
18ec727ea7Spatrick #include <algorithm>
19a9ac8606Spatrick #endif
20ec727ea7Spatrick #include <limits.h>
21ec727ea7Spatrick #include <stdint.h>
22a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__
23a9ac8606Spatrick #include <omp.h>
24a9ac8606Spatrick #endif
25a9ac8606Spatrick #endif // !defined(__HIPCC_RTC__)
26ec727ea7Spatrick 
27ec727ea7Spatrick #pragma push_macro("__DEVICE__")
28ec727ea7Spatrick 
29a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__
30a9ac8606Spatrick #define __DEVICE__ static inline __attribute__((always_inline, nothrow))
31a9ac8606Spatrick #else
32a9ac8606Spatrick #define __DEVICE__ static __device__ inline __attribute__((always_inline))
33a9ac8606Spatrick #endif
34a9ac8606Spatrick 
35a9ac8606Spatrick // A few functions return bool type starting only in C++11.
36a9ac8606Spatrick #pragma push_macro("__RETURN_TYPE")
37a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__
38a9ac8606Spatrick #define __RETURN_TYPE int
39a9ac8606Spatrick #else
40a9ac8606Spatrick #if defined(__cplusplus)
41ec727ea7Spatrick #define __RETURN_TYPE bool
42a9ac8606Spatrick #else
43a9ac8606Spatrick #define __RETURN_TYPE int
44a9ac8606Spatrick #endif
45a9ac8606Spatrick #endif // __OPENMP_AMDGCN__
46a9ac8606Spatrick 
47a9ac8606Spatrick #if defined (__cplusplus) && __cplusplus < 201103L
48a9ac8606Spatrick // emulate static_assert on type sizes
49a9ac8606Spatrick template<bool>
50a9ac8606Spatrick struct __compare_result{};
51a9ac8606Spatrick template<>
52a9ac8606Spatrick struct __compare_result<true> {
53a9ac8606Spatrick   static const __device__ bool valid;
54a9ac8606Spatrick };
55ec727ea7Spatrick 
56ec727ea7Spatrick __DEVICE__
57a9ac8606Spatrick void __suppress_unused_warning(bool b){};
58a9ac8606Spatrick template <unsigned int S, unsigned int T>
59a9ac8606Spatrick __DEVICE__ void __static_assert_equal_size() {
60a9ac8606Spatrick   __suppress_unused_warning(__compare_result<S == T>::valid);
61a9ac8606Spatrick }
62a9ac8606Spatrick 
63a9ac8606Spatrick #define __static_assert_type_size_equal(A, B) \
64a9ac8606Spatrick   __static_assert_equal_size<A,B>()
65a9ac8606Spatrick 
66a9ac8606Spatrick #else
67a9ac8606Spatrick #define __static_assert_type_size_equal(A,B) \
68a9ac8606Spatrick   static_assert((A) == (B), "")
69a9ac8606Spatrick 
70a9ac8606Spatrick #endif
71a9ac8606Spatrick 
72a9ac8606Spatrick __DEVICE__
73*12c85518Srobert uint64_t __make_mantissa_base8(const char *__tagp __attribute__((nonnull))) {
74ec727ea7Spatrick   uint64_t __r = 0;
75*12c85518Srobert   while (*__tagp != '\0') {
76ec727ea7Spatrick     char __tmp = *__tagp;
77ec727ea7Spatrick 
78ec727ea7Spatrick     if (__tmp >= '0' && __tmp <= '7')
79ec727ea7Spatrick       __r = (__r * 8u) + __tmp - '0';
80ec727ea7Spatrick     else
81ec727ea7Spatrick       return 0;
82ec727ea7Spatrick 
83ec727ea7Spatrick     ++__tagp;
84ec727ea7Spatrick   }
85ec727ea7Spatrick 
86ec727ea7Spatrick   return __r;
87ec727ea7Spatrick }
88ec727ea7Spatrick 
89ec727ea7Spatrick __DEVICE__
90*12c85518Srobert uint64_t __make_mantissa_base10(const char *__tagp __attribute__((nonnull))) {
91ec727ea7Spatrick   uint64_t __r = 0;
92*12c85518Srobert   while (*__tagp != '\0') {
93ec727ea7Spatrick     char __tmp = *__tagp;
94ec727ea7Spatrick 
95ec727ea7Spatrick     if (__tmp >= '0' && __tmp <= '9')
96ec727ea7Spatrick       __r = (__r * 10u) + __tmp - '0';
97ec727ea7Spatrick     else
98ec727ea7Spatrick       return 0;
99ec727ea7Spatrick 
100ec727ea7Spatrick     ++__tagp;
101ec727ea7Spatrick   }
102ec727ea7Spatrick 
103ec727ea7Spatrick   return __r;
104ec727ea7Spatrick }
105ec727ea7Spatrick 
106ec727ea7Spatrick __DEVICE__
107*12c85518Srobert uint64_t __make_mantissa_base16(const char *__tagp __attribute__((nonnull))) {
108ec727ea7Spatrick   uint64_t __r = 0;
109*12c85518Srobert   while (*__tagp != '\0') {
110ec727ea7Spatrick     char __tmp = *__tagp;
111ec727ea7Spatrick 
112ec727ea7Spatrick     if (__tmp >= '0' && __tmp <= '9')
113ec727ea7Spatrick       __r = (__r * 16u) + __tmp - '0';
114ec727ea7Spatrick     else if (__tmp >= 'a' && __tmp <= 'f')
115ec727ea7Spatrick       __r = (__r * 16u) + __tmp - 'a' + 10;
116ec727ea7Spatrick     else if (__tmp >= 'A' && __tmp <= 'F')
117ec727ea7Spatrick       __r = (__r * 16u) + __tmp - 'A' + 10;
118ec727ea7Spatrick     else
119ec727ea7Spatrick       return 0;
120ec727ea7Spatrick 
121ec727ea7Spatrick     ++__tagp;
122ec727ea7Spatrick   }
123ec727ea7Spatrick 
124ec727ea7Spatrick   return __r;
125ec727ea7Spatrick }
126ec727ea7Spatrick 
127ec727ea7Spatrick __DEVICE__
128*12c85518Srobert uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) {
129ec727ea7Spatrick   if (*__tagp == '0') {
130ec727ea7Spatrick     ++__tagp;
131ec727ea7Spatrick 
132ec727ea7Spatrick     if (*__tagp == 'x' || *__tagp == 'X')
133ec727ea7Spatrick       return __make_mantissa_base16(__tagp);
134ec727ea7Spatrick     else
135ec727ea7Spatrick       return __make_mantissa_base8(__tagp);
136ec727ea7Spatrick   }
137ec727ea7Spatrick 
138ec727ea7Spatrick   return __make_mantissa_base10(__tagp);
139ec727ea7Spatrick }
140ec727ea7Spatrick 
141ec727ea7Spatrick // BEGIN FLOAT
142a9ac8606Spatrick #if defined(__cplusplus)
143ec727ea7Spatrick __DEVICE__
144a9ac8606Spatrick int abs(int __x) {
145a9ac8606Spatrick   int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1);
146a9ac8606Spatrick   return (__x ^ __sgn) - __sgn;
147ec727ea7Spatrick }
148ec727ea7Spatrick __DEVICE__
149a9ac8606Spatrick long labs(long __x) {
150a9ac8606Spatrick   long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1);
151a9ac8606Spatrick   return (__x ^ __sgn) - __sgn;
152a9ac8606Spatrick }
153ec727ea7Spatrick __DEVICE__
154a9ac8606Spatrick long long llabs(long long __x) {
155a9ac8606Spatrick   long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1);
156a9ac8606Spatrick   return (__x ^ __sgn) - __sgn;
157a9ac8606Spatrick }
158a9ac8606Spatrick #endif
159a9ac8606Spatrick 
160ec727ea7Spatrick __DEVICE__
161a9ac8606Spatrick float acosf(float __x) { return __ocml_acos_f32(__x); }
162a9ac8606Spatrick 
163ec727ea7Spatrick __DEVICE__
164a9ac8606Spatrick float acoshf(float __x) { return __ocml_acosh_f32(__x); }
165a9ac8606Spatrick 
166ec727ea7Spatrick __DEVICE__
167a9ac8606Spatrick float asinf(float __x) { return __ocml_asin_f32(__x); }
168a9ac8606Spatrick 
169ec727ea7Spatrick __DEVICE__
170a9ac8606Spatrick float asinhf(float __x) { return __ocml_asinh_f32(__x); }
171a9ac8606Spatrick 
172ec727ea7Spatrick __DEVICE__
173a9ac8606Spatrick float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); }
174a9ac8606Spatrick 
175ec727ea7Spatrick __DEVICE__
176a9ac8606Spatrick float atanf(float __x) { return __ocml_atan_f32(__x); }
177a9ac8606Spatrick 
178ec727ea7Spatrick __DEVICE__
179a9ac8606Spatrick float atanhf(float __x) { return __ocml_atanh_f32(__x); }
180a9ac8606Spatrick 
181ec727ea7Spatrick __DEVICE__
182a9ac8606Spatrick float cbrtf(float __x) { return __ocml_cbrt_f32(__x); }
183a9ac8606Spatrick 
184ec727ea7Spatrick __DEVICE__
185a9ac8606Spatrick float ceilf(float __x) { return __ocml_ceil_f32(__x); }
186a9ac8606Spatrick 
187ec727ea7Spatrick __DEVICE__
188a9ac8606Spatrick float copysignf(float __x, float __y) { return __ocml_copysign_f32(__x, __y); }
189a9ac8606Spatrick 
190ec727ea7Spatrick __DEVICE__
191a9ac8606Spatrick float cosf(float __x) { return __ocml_cos_f32(__x); }
192a9ac8606Spatrick 
193ec727ea7Spatrick __DEVICE__
194a9ac8606Spatrick float coshf(float __x) { return __ocml_cosh_f32(__x); }
195a9ac8606Spatrick 
196ec727ea7Spatrick __DEVICE__
197a9ac8606Spatrick float cospif(float __x) { return __ocml_cospi_f32(__x); }
198a9ac8606Spatrick 
199ec727ea7Spatrick __DEVICE__
200a9ac8606Spatrick float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); }
201a9ac8606Spatrick 
202ec727ea7Spatrick __DEVICE__
203a9ac8606Spatrick float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); }
204a9ac8606Spatrick 
205ec727ea7Spatrick __DEVICE__
206a9ac8606Spatrick float erfcf(float __x) { return __ocml_erfc_f32(__x); }
207a9ac8606Spatrick 
208ec727ea7Spatrick __DEVICE__
209a9ac8606Spatrick float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); }
210a9ac8606Spatrick 
211a9ac8606Spatrick __DEVICE__
212a9ac8606Spatrick float erfcxf(float __x) { return __ocml_erfcx_f32(__x); }
213a9ac8606Spatrick 
214a9ac8606Spatrick __DEVICE__
215a9ac8606Spatrick float erff(float __x) { return __ocml_erf_f32(__x); }
216a9ac8606Spatrick 
217a9ac8606Spatrick __DEVICE__
218a9ac8606Spatrick float erfinvf(float __x) { return __ocml_erfinv_f32(__x); }
219a9ac8606Spatrick 
220a9ac8606Spatrick __DEVICE__
221a9ac8606Spatrick float exp10f(float __x) { return __ocml_exp10_f32(__x); }
222a9ac8606Spatrick 
223a9ac8606Spatrick __DEVICE__
224a9ac8606Spatrick float exp2f(float __x) { return __ocml_exp2_f32(__x); }
225a9ac8606Spatrick 
226a9ac8606Spatrick __DEVICE__
227a9ac8606Spatrick float expf(float __x) { return __ocml_exp_f32(__x); }
228a9ac8606Spatrick 
229a9ac8606Spatrick __DEVICE__
230a9ac8606Spatrick float expm1f(float __x) { return __ocml_expm1_f32(__x); }
231a9ac8606Spatrick 
232a9ac8606Spatrick __DEVICE__
233*12c85518Srobert float fabsf(float __x) { return __builtin_fabsf(__x); }
234a9ac8606Spatrick 
235a9ac8606Spatrick __DEVICE__
236a9ac8606Spatrick float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); }
237a9ac8606Spatrick 
238a9ac8606Spatrick __DEVICE__
239a9ac8606Spatrick float fdividef(float __x, float __y) { return __x / __y; }
240a9ac8606Spatrick 
241a9ac8606Spatrick __DEVICE__
242a9ac8606Spatrick float floorf(float __x) { return __ocml_floor_f32(__x); }
243a9ac8606Spatrick 
244a9ac8606Spatrick __DEVICE__
245a9ac8606Spatrick float fmaf(float __x, float __y, float __z) {
246ec727ea7Spatrick   return __ocml_fma_f32(__x, __y, __z);
247ec727ea7Spatrick }
248a9ac8606Spatrick 
249ec727ea7Spatrick __DEVICE__
250a9ac8606Spatrick float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); }
251a9ac8606Spatrick 
252ec727ea7Spatrick __DEVICE__
253a9ac8606Spatrick float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); }
254a9ac8606Spatrick 
255ec727ea7Spatrick __DEVICE__
256a9ac8606Spatrick float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
257a9ac8606Spatrick 
258ec727ea7Spatrick __DEVICE__
259a9ac8606Spatrick float frexpf(float __x, int *__nptr) {
260ec727ea7Spatrick   int __tmp;
261a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__
262a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
263a9ac8606Spatrick #endif
264ec727ea7Spatrick   float __r =
265ec727ea7Spatrick       __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
266ec727ea7Spatrick   *__nptr = __tmp;
267ec727ea7Spatrick 
268ec727ea7Spatrick   return __r;
269ec727ea7Spatrick }
270a9ac8606Spatrick 
271ec727ea7Spatrick __DEVICE__
272a9ac8606Spatrick float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); }
273a9ac8606Spatrick 
274ec727ea7Spatrick __DEVICE__
275a9ac8606Spatrick int ilogbf(float __x) { return __ocml_ilogb_f32(__x); }
276a9ac8606Spatrick 
277ec727ea7Spatrick __DEVICE__
278a9ac8606Spatrick __RETURN_TYPE __finitef(float __x) { return __ocml_isfinite_f32(__x); }
279a9ac8606Spatrick 
280ec727ea7Spatrick __DEVICE__
281a9ac8606Spatrick __RETURN_TYPE __isinff(float __x) { return __ocml_isinf_f32(__x); }
282a9ac8606Spatrick 
283ec727ea7Spatrick __DEVICE__
284a9ac8606Spatrick __RETURN_TYPE __isnanf(float __x) { return __ocml_isnan_f32(__x); }
285a9ac8606Spatrick 
286ec727ea7Spatrick __DEVICE__
287a9ac8606Spatrick float j0f(float __x) { return __ocml_j0_f32(__x); }
288a9ac8606Spatrick 
289ec727ea7Spatrick __DEVICE__
290a9ac8606Spatrick float j1f(float __x) { return __ocml_j1_f32(__x); }
291a9ac8606Spatrick 
292ec727ea7Spatrick __DEVICE__
293a9ac8606Spatrick float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication
294ec727ea7Spatrick                                 // and the Miller & Brown algorithm
295ec727ea7Spatrick   //       for linear recurrences to get O(log n) steps, but it's unclear if
296ec727ea7Spatrick   //       it'd be beneficial in this case.
297ec727ea7Spatrick   if (__n == 0)
298ec727ea7Spatrick     return j0f(__x);
299ec727ea7Spatrick   if (__n == 1)
300ec727ea7Spatrick     return j1f(__x);
301ec727ea7Spatrick 
302ec727ea7Spatrick   float __x0 = j0f(__x);
303ec727ea7Spatrick   float __x1 = j1f(__x);
304ec727ea7Spatrick   for (int __i = 1; __i < __n; ++__i) {
305ec727ea7Spatrick     float __x2 = (2 * __i) / __x * __x1 - __x0;
306ec727ea7Spatrick     __x0 = __x1;
307ec727ea7Spatrick     __x1 = __x2;
308ec727ea7Spatrick   }
309ec727ea7Spatrick 
310ec727ea7Spatrick   return __x1;
311ec727ea7Spatrick }
312a9ac8606Spatrick 
313ec727ea7Spatrick __DEVICE__
314a9ac8606Spatrick float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); }
315a9ac8606Spatrick 
316ec727ea7Spatrick __DEVICE__
317a9ac8606Spatrick float lgammaf(float __x) { return __ocml_lgamma_f32(__x); }
318a9ac8606Spatrick 
319ec727ea7Spatrick __DEVICE__
320a9ac8606Spatrick long long int llrintf(float __x) { return __ocml_rint_f32(__x); }
321a9ac8606Spatrick 
322ec727ea7Spatrick __DEVICE__
323a9ac8606Spatrick long long int llroundf(float __x) { return __ocml_round_f32(__x); }
324a9ac8606Spatrick 
325ec727ea7Spatrick __DEVICE__
326a9ac8606Spatrick float log10f(float __x) { return __ocml_log10_f32(__x); }
327a9ac8606Spatrick 
328ec727ea7Spatrick __DEVICE__
329a9ac8606Spatrick float log1pf(float __x) { return __ocml_log1p_f32(__x); }
330a9ac8606Spatrick 
331ec727ea7Spatrick __DEVICE__
332a9ac8606Spatrick float log2f(float __x) { return __ocml_log2_f32(__x); }
333a9ac8606Spatrick 
334ec727ea7Spatrick __DEVICE__
335a9ac8606Spatrick float logbf(float __x) { return __ocml_logb_f32(__x); }
336a9ac8606Spatrick 
337ec727ea7Spatrick __DEVICE__
338a9ac8606Spatrick float logf(float __x) { return __ocml_log_f32(__x); }
339a9ac8606Spatrick 
340ec727ea7Spatrick __DEVICE__
341a9ac8606Spatrick long int lrintf(float __x) { return __ocml_rint_f32(__x); }
342a9ac8606Spatrick 
343ec727ea7Spatrick __DEVICE__
344a9ac8606Spatrick long int lroundf(float __x) { return __ocml_round_f32(__x); }
345a9ac8606Spatrick 
346ec727ea7Spatrick __DEVICE__
347a9ac8606Spatrick float modff(float __x, float *__iptr) {
348ec727ea7Spatrick   float __tmp;
349a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__
350a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
351a9ac8606Spatrick #endif
352ec727ea7Spatrick   float __r =
353ec727ea7Spatrick       __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
354ec727ea7Spatrick   *__iptr = __tmp;
355ec727ea7Spatrick   return __r;
356ec727ea7Spatrick }
357a9ac8606Spatrick 
358ec727ea7Spatrick __DEVICE__
359*12c85518Srobert float nanf(const char *__tagp __attribute__((nonnull))) {
360ec727ea7Spatrick   union {
361ec727ea7Spatrick     float val;
362ec727ea7Spatrick     struct ieee_float {
363a9ac8606Spatrick       unsigned int mantissa : 22;
364a9ac8606Spatrick       unsigned int quiet : 1;
365a9ac8606Spatrick       unsigned int exponent : 8;
366a9ac8606Spatrick       unsigned int sign : 1;
367ec727ea7Spatrick     } bits;
368ec727ea7Spatrick   } __tmp;
369a9ac8606Spatrick   __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
370ec727ea7Spatrick 
371ec727ea7Spatrick   __tmp.bits.sign = 0u;
372ec727ea7Spatrick   __tmp.bits.exponent = ~0u;
373ec727ea7Spatrick   __tmp.bits.quiet = 1u;
374ec727ea7Spatrick   __tmp.bits.mantissa = __make_mantissa(__tagp);
375ec727ea7Spatrick 
376ec727ea7Spatrick   return __tmp.val;
377ec727ea7Spatrick }
378a9ac8606Spatrick 
379ec727ea7Spatrick __DEVICE__
380a9ac8606Spatrick float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); }
381a9ac8606Spatrick 
382ec727ea7Spatrick __DEVICE__
383a9ac8606Spatrick float nextafterf(float __x, float __y) {
384ec727ea7Spatrick   return __ocml_nextafter_f32(__x, __y);
385ec727ea7Spatrick }
386a9ac8606Spatrick 
387ec727ea7Spatrick __DEVICE__
388a9ac8606Spatrick float norm3df(float __x, float __y, float __z) {
389ec727ea7Spatrick   return __ocml_len3_f32(__x, __y, __z);
390ec727ea7Spatrick }
391a9ac8606Spatrick 
392ec727ea7Spatrick __DEVICE__
393a9ac8606Spatrick float norm4df(float __x, float __y, float __z, float __w) {
394ec727ea7Spatrick   return __ocml_len4_f32(__x, __y, __z, __w);
395ec727ea7Spatrick }
396a9ac8606Spatrick 
397ec727ea7Spatrick __DEVICE__
398a9ac8606Spatrick float normcdff(float __x) { return __ocml_ncdf_f32(__x); }
399a9ac8606Spatrick 
400ec727ea7Spatrick __DEVICE__
401a9ac8606Spatrick float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); }
402a9ac8606Spatrick 
403ec727ea7Spatrick __DEVICE__
404a9ac8606Spatrick float normf(int __dim,
405ec727ea7Spatrick             const float *__a) { // TODO: placeholder until OCML adds support.
406ec727ea7Spatrick   float __r = 0;
407ec727ea7Spatrick   while (__dim--) {
408ec727ea7Spatrick     __r += __a[0] * __a[0];
409ec727ea7Spatrick     ++__a;
410ec727ea7Spatrick   }
411ec727ea7Spatrick 
412ec727ea7Spatrick   return __ocml_sqrt_f32(__r);
413ec727ea7Spatrick }
414a9ac8606Spatrick 
415ec727ea7Spatrick __DEVICE__
416a9ac8606Spatrick float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
417a9ac8606Spatrick 
418ec727ea7Spatrick __DEVICE__
419a9ac8606Spatrick float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); }
420a9ac8606Spatrick 
421ec727ea7Spatrick __DEVICE__
422a9ac8606Spatrick float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); }
423a9ac8606Spatrick 
424a9ac8606Spatrick __DEVICE__
425a9ac8606Spatrick float remainderf(float __x, float __y) {
426ec727ea7Spatrick   return __ocml_remainder_f32(__x, __y);
427ec727ea7Spatrick }
428a9ac8606Spatrick 
429ec727ea7Spatrick __DEVICE__
430a9ac8606Spatrick float remquof(float __x, float __y, int *__quo) {
431ec727ea7Spatrick   int __tmp;
432a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__
433a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
434a9ac8606Spatrick #endif
435ec727ea7Spatrick   float __r = __ocml_remquo_f32(
436ec727ea7Spatrick       __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
437ec727ea7Spatrick   *__quo = __tmp;
438ec727ea7Spatrick 
439ec727ea7Spatrick   return __r;
440ec727ea7Spatrick }
441a9ac8606Spatrick 
442ec727ea7Spatrick __DEVICE__
443a9ac8606Spatrick float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); }
444a9ac8606Spatrick 
445ec727ea7Spatrick __DEVICE__
446a9ac8606Spatrick float rintf(float __x) { return __ocml_rint_f32(__x); }
447a9ac8606Spatrick 
448ec727ea7Spatrick __DEVICE__
449a9ac8606Spatrick float rnorm3df(float __x, float __y, float __z) {
450ec727ea7Spatrick   return __ocml_rlen3_f32(__x, __y, __z);
451ec727ea7Spatrick }
452ec727ea7Spatrick 
453ec727ea7Spatrick __DEVICE__
454a9ac8606Spatrick float rnorm4df(float __x, float __y, float __z, float __w) {
455ec727ea7Spatrick   return __ocml_rlen4_f32(__x, __y, __z, __w);
456ec727ea7Spatrick }
457a9ac8606Spatrick 
458ec727ea7Spatrick __DEVICE__
459a9ac8606Spatrick float rnormf(int __dim,
460ec727ea7Spatrick              const float *__a) { // TODO: placeholder until OCML adds support.
461ec727ea7Spatrick   float __r = 0;
462ec727ea7Spatrick   while (__dim--) {
463ec727ea7Spatrick     __r += __a[0] * __a[0];
464ec727ea7Spatrick     ++__a;
465ec727ea7Spatrick   }
466ec727ea7Spatrick 
467ec727ea7Spatrick   return __ocml_rsqrt_f32(__r);
468ec727ea7Spatrick }
469a9ac8606Spatrick 
470ec727ea7Spatrick __DEVICE__
471a9ac8606Spatrick float roundf(float __x) { return __ocml_round_f32(__x); }
472a9ac8606Spatrick 
473ec727ea7Spatrick __DEVICE__
474a9ac8606Spatrick float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }
475a9ac8606Spatrick 
476ec727ea7Spatrick __DEVICE__
477a9ac8606Spatrick float scalblnf(float __x, long int __n) {
478ec727ea7Spatrick   return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n)
479ec727ea7Spatrick                          : __ocml_scalb_f32(__x, __n);
480ec727ea7Spatrick }
481ec727ea7Spatrick 
482a9ac8606Spatrick __DEVICE__
483a9ac8606Spatrick float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); }
484a9ac8606Spatrick 
485a9ac8606Spatrick __DEVICE__
486a9ac8606Spatrick __RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); }
487a9ac8606Spatrick 
488a9ac8606Spatrick __DEVICE__
489a9ac8606Spatrick void sincosf(float __x, float *__sinptr, float *__cosptr) {
490a9ac8606Spatrick   float __tmp;
491a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__
492a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
493a9ac8606Spatrick #endif
494ec727ea7Spatrick   *__sinptr =
495ec727ea7Spatrick       __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
496ec727ea7Spatrick   *__cosptr = __tmp;
497ec727ea7Spatrick }
498ec727ea7Spatrick 
499a9ac8606Spatrick __DEVICE__
500a9ac8606Spatrick void sincospif(float __x, float *__sinptr, float *__cosptr) {
501a9ac8606Spatrick   float __tmp;
502a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__
503a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
504a9ac8606Spatrick #endif
505ec727ea7Spatrick   *__sinptr = __ocml_sincospi_f32(
506ec727ea7Spatrick       __x, (__attribute__((address_space(5))) float *)&__tmp);
507ec727ea7Spatrick   *__cosptr = __tmp;
508ec727ea7Spatrick }
509a9ac8606Spatrick 
510ec727ea7Spatrick __DEVICE__
511a9ac8606Spatrick float sinf(float __x) { return __ocml_sin_f32(__x); }
512a9ac8606Spatrick 
513ec727ea7Spatrick __DEVICE__
514a9ac8606Spatrick float sinhf(float __x) { return __ocml_sinh_f32(__x); }
515a9ac8606Spatrick 
516ec727ea7Spatrick __DEVICE__
517a9ac8606Spatrick float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
518a9ac8606Spatrick 
519ec727ea7Spatrick __DEVICE__
520a9ac8606Spatrick float sqrtf(float __x) { return __ocml_sqrt_f32(__x); }
521a9ac8606Spatrick 
522ec727ea7Spatrick __DEVICE__
523a9ac8606Spatrick float tanf(float __x) { return __ocml_tan_f32(__x); }
524a9ac8606Spatrick 
525ec727ea7Spatrick __DEVICE__
526a9ac8606Spatrick float tanhf(float __x) { return __ocml_tanh_f32(__x); }
527a9ac8606Spatrick 
528ec727ea7Spatrick __DEVICE__
529a9ac8606Spatrick float tgammaf(float __x) { return __ocml_tgamma_f32(__x); }
530a9ac8606Spatrick 
531ec727ea7Spatrick __DEVICE__
532a9ac8606Spatrick float truncf(float __x) { return __ocml_trunc_f32(__x); }
533a9ac8606Spatrick 
534ec727ea7Spatrick __DEVICE__
535a9ac8606Spatrick float y0f(float __x) { return __ocml_y0_f32(__x); }
536a9ac8606Spatrick 
537ec727ea7Spatrick __DEVICE__
538a9ac8606Spatrick float y1f(float __x) { return __ocml_y1_f32(__x); }
539a9ac8606Spatrick 
540ec727ea7Spatrick __DEVICE__
541a9ac8606Spatrick float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
542ec727ea7Spatrick                                 // and the Miller & Brown algorithm
543ec727ea7Spatrick   //       for linear recurrences to get O(log n) steps, but it's unclear if
544ec727ea7Spatrick   //       it'd be beneficial in this case. Placeholder until OCML adds
545ec727ea7Spatrick   //       support.
546ec727ea7Spatrick   if (__n == 0)
547ec727ea7Spatrick     return y0f(__x);
548ec727ea7Spatrick   if (__n == 1)
549ec727ea7Spatrick     return y1f(__x);
550ec727ea7Spatrick 
551ec727ea7Spatrick   float __x0 = y0f(__x);
552ec727ea7Spatrick   float __x1 = y1f(__x);
553ec727ea7Spatrick   for (int __i = 1; __i < __n; ++__i) {
554ec727ea7Spatrick     float __x2 = (2 * __i) / __x * __x1 - __x0;
555ec727ea7Spatrick     __x0 = __x1;
556ec727ea7Spatrick     __x1 = __x2;
557ec727ea7Spatrick   }
558ec727ea7Spatrick 
559ec727ea7Spatrick   return __x1;
560ec727ea7Spatrick }
561ec727ea7Spatrick 
562ec727ea7Spatrick // BEGIN INTRINSICS
563a9ac8606Spatrick 
564ec727ea7Spatrick __DEVICE__
565a9ac8606Spatrick float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
566a9ac8606Spatrick 
567ec727ea7Spatrick __DEVICE__
568a9ac8606Spatrick float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); }
569a9ac8606Spatrick 
570ec727ea7Spatrick __DEVICE__
571a9ac8606Spatrick float __expf(float __x) { return __ocml_native_exp_f32(__x); }
572a9ac8606Spatrick 
573ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS
574ec727ea7Spatrick __DEVICE__
575a9ac8606Spatrick float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
576a9ac8606Spatrick __DEVICE__
577a9ac8606Spatrick float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
578a9ac8606Spatrick __DEVICE__
579a9ac8606Spatrick float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
580a9ac8606Spatrick __DEVICE__
581a9ac8606Spatrick float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
582a9ac8606Spatrick #else
583a9ac8606Spatrick __DEVICE__
584a9ac8606Spatrick float __fadd_rn(float __x, float __y) { return __x + __y; }
585ec727ea7Spatrick #endif
586a9ac8606Spatrick 
587ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS
588ec727ea7Spatrick __DEVICE__
589a9ac8606Spatrick float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
590ec727ea7Spatrick __DEVICE__
591a9ac8606Spatrick float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
592ec727ea7Spatrick __DEVICE__
593a9ac8606Spatrick float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
594a9ac8606Spatrick __DEVICE__
595a9ac8606Spatrick float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
596a9ac8606Spatrick #else
597a9ac8606Spatrick __DEVICE__
598a9ac8606Spatrick float __fdiv_rn(float __x, float __y) { return __x / __y; }
599ec727ea7Spatrick #endif
600a9ac8606Spatrick 
601ec727ea7Spatrick __DEVICE__
602a9ac8606Spatrick float __fdividef(float __x, float __y) { return __x / __y; }
603a9ac8606Spatrick 
604ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS
605ec727ea7Spatrick __DEVICE__
606a9ac8606Spatrick float __fmaf_rd(float __x, float __y, float __z) {
607ec727ea7Spatrick   return __ocml_fma_rtn_f32(__x, __y, __z);
608ec727ea7Spatrick }
609ec727ea7Spatrick __DEVICE__
610a9ac8606Spatrick float __fmaf_rn(float __x, float __y, float __z) {
611a9ac8606Spatrick   return __ocml_fma_rte_f32(__x, __y, __z);
612ec727ea7Spatrick }
613ec727ea7Spatrick __DEVICE__
614a9ac8606Spatrick float __fmaf_ru(float __x, float __y, float __z) {
615ec727ea7Spatrick   return __ocml_fma_rtp_f32(__x, __y, __z);
616ec727ea7Spatrick }
617ec727ea7Spatrick __DEVICE__
618a9ac8606Spatrick float __fmaf_rz(float __x, float __y, float __z) {
619ec727ea7Spatrick   return __ocml_fma_rtz_f32(__x, __y, __z);
620ec727ea7Spatrick }
621a9ac8606Spatrick #else
622ec727ea7Spatrick __DEVICE__
623a9ac8606Spatrick float __fmaf_rn(float __x, float __y, float __z) {
624a9ac8606Spatrick   return __ocml_fma_f32(__x, __y, __z);
625ec727ea7Spatrick }
626ec727ea7Spatrick #endif
627a9ac8606Spatrick 
628ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS
629ec727ea7Spatrick __DEVICE__
630a9ac8606Spatrick float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
631ec727ea7Spatrick __DEVICE__
632a9ac8606Spatrick float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
633ec727ea7Spatrick __DEVICE__
634a9ac8606Spatrick float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
635a9ac8606Spatrick __DEVICE__
636a9ac8606Spatrick float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
637a9ac8606Spatrick #else
638a9ac8606Spatrick __DEVICE__
639a9ac8606Spatrick float __fmul_rn(float __x, float __y) { return __x * __y; }
640ec727ea7Spatrick #endif
641a9ac8606Spatrick 
642ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS
643ec727ea7Spatrick __DEVICE__
644a9ac8606Spatrick float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
645ec727ea7Spatrick __DEVICE__
646a9ac8606Spatrick float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
647a9ac8606Spatrick __DEVICE__
648a9ac8606Spatrick float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
649a9ac8606Spatrick __DEVICE__
650a9ac8606Spatrick float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
651a9ac8606Spatrick #else
652a9ac8606Spatrick __DEVICE__
653a9ac8606Spatrick float __frcp_rn(float __x) { return 1.0f / __x; }
654ec727ea7Spatrick #endif
655a9ac8606Spatrick 
656ec727ea7Spatrick __DEVICE__
657a9ac8606Spatrick float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); }
658a9ac8606Spatrick 
659ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS
660ec727ea7Spatrick __DEVICE__
661a9ac8606Spatrick float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
662ec727ea7Spatrick __DEVICE__
663a9ac8606Spatrick float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
664a9ac8606Spatrick __DEVICE__
665a9ac8606Spatrick float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
666a9ac8606Spatrick __DEVICE__
667a9ac8606Spatrick float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
668a9ac8606Spatrick #else
669a9ac8606Spatrick __DEVICE__
670a9ac8606Spatrick float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
671a9ac8606Spatrick #endif
672a9ac8606Spatrick 
673ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS
674ec727ea7Spatrick __DEVICE__
675a9ac8606Spatrick float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
676ec727ea7Spatrick __DEVICE__
677a9ac8606Spatrick float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
678ec727ea7Spatrick __DEVICE__
679a9ac8606Spatrick float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
680a9ac8606Spatrick __DEVICE__
681a9ac8606Spatrick float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
682a9ac8606Spatrick #else
683a9ac8606Spatrick __DEVICE__
684a9ac8606Spatrick float __fsub_rn(float __x, float __y) { return __x - __y; }
685ec727ea7Spatrick #endif
686a9ac8606Spatrick 
687ec727ea7Spatrick __DEVICE__
688a9ac8606Spatrick float __log10f(float __x) { return __ocml_native_log10_f32(__x); }
689a9ac8606Spatrick 
690ec727ea7Spatrick __DEVICE__
691a9ac8606Spatrick float __log2f(float __x) { return __ocml_native_log2_f32(__x); }
692a9ac8606Spatrick 
693ec727ea7Spatrick __DEVICE__
694a9ac8606Spatrick float __logf(float __x) { return __ocml_native_log_f32(__x); }
695a9ac8606Spatrick 
696ec727ea7Spatrick __DEVICE__
697a9ac8606Spatrick float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
698a9ac8606Spatrick 
699ec727ea7Spatrick __DEVICE__
700a9ac8606Spatrick float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
701a9ac8606Spatrick 
702ec727ea7Spatrick __DEVICE__
703a9ac8606Spatrick void __sincosf(float __x, float *__sinptr, float *__cosptr) {
704ec727ea7Spatrick   *__sinptr = __ocml_native_sin_f32(__x);
705ec727ea7Spatrick   *__cosptr = __ocml_native_cos_f32(__x);
706ec727ea7Spatrick }
707a9ac8606Spatrick 
708ec727ea7Spatrick __DEVICE__
709a9ac8606Spatrick float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
710a9ac8606Spatrick 
711ec727ea7Spatrick __DEVICE__
712a9ac8606Spatrick float __tanf(float __x) { return __ocml_tan_f32(__x); }
713ec727ea7Spatrick // END INTRINSICS
714ec727ea7Spatrick // END FLOAT
715ec727ea7Spatrick 
716ec727ea7Spatrick // BEGIN DOUBLE
717ec727ea7Spatrick __DEVICE__
718a9ac8606Spatrick double acos(double __x) { return __ocml_acos_f64(__x); }
719a9ac8606Spatrick 
720ec727ea7Spatrick __DEVICE__
721a9ac8606Spatrick double acosh(double __x) { return __ocml_acosh_f64(__x); }
722a9ac8606Spatrick 
723ec727ea7Spatrick __DEVICE__
724a9ac8606Spatrick double asin(double __x) { return __ocml_asin_f64(__x); }
725a9ac8606Spatrick 
726ec727ea7Spatrick __DEVICE__
727a9ac8606Spatrick double asinh(double __x) { return __ocml_asinh_f64(__x); }
728a9ac8606Spatrick 
729ec727ea7Spatrick __DEVICE__
730a9ac8606Spatrick double atan(double __x) { return __ocml_atan_f64(__x); }
731a9ac8606Spatrick 
732ec727ea7Spatrick __DEVICE__
733a9ac8606Spatrick double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); }
734a9ac8606Spatrick 
735ec727ea7Spatrick __DEVICE__
736a9ac8606Spatrick double atanh(double __x) { return __ocml_atanh_f64(__x); }
737a9ac8606Spatrick 
738ec727ea7Spatrick __DEVICE__
739a9ac8606Spatrick double cbrt(double __x) { return __ocml_cbrt_f64(__x); }
740a9ac8606Spatrick 
741ec727ea7Spatrick __DEVICE__
742a9ac8606Spatrick double ceil(double __x) { return __ocml_ceil_f64(__x); }
743a9ac8606Spatrick 
744ec727ea7Spatrick __DEVICE__
745a9ac8606Spatrick double copysign(double __x, double __y) {
746ec727ea7Spatrick   return __ocml_copysign_f64(__x, __y);
747ec727ea7Spatrick }
748a9ac8606Spatrick 
749ec727ea7Spatrick __DEVICE__
750a9ac8606Spatrick double cos(double __x) { return __ocml_cos_f64(__x); }
751a9ac8606Spatrick 
752ec727ea7Spatrick __DEVICE__
753a9ac8606Spatrick double cosh(double __x) { return __ocml_cosh_f64(__x); }
754a9ac8606Spatrick 
755ec727ea7Spatrick __DEVICE__
756a9ac8606Spatrick double cospi(double __x) { return __ocml_cospi_f64(__x); }
757a9ac8606Spatrick 
758ec727ea7Spatrick __DEVICE__
759a9ac8606Spatrick double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); }
760a9ac8606Spatrick 
761ec727ea7Spatrick __DEVICE__
762a9ac8606Spatrick double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); }
763a9ac8606Spatrick 
764ec727ea7Spatrick __DEVICE__
765a9ac8606Spatrick double erf(double __x) { return __ocml_erf_f64(__x); }
766a9ac8606Spatrick 
767ec727ea7Spatrick __DEVICE__
768a9ac8606Spatrick double erfc(double __x) { return __ocml_erfc_f64(__x); }
769a9ac8606Spatrick 
770ec727ea7Spatrick __DEVICE__
771a9ac8606Spatrick double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); }
772a9ac8606Spatrick 
773ec727ea7Spatrick __DEVICE__
774a9ac8606Spatrick double erfcx(double __x) { return __ocml_erfcx_f64(__x); }
775a9ac8606Spatrick 
776ec727ea7Spatrick __DEVICE__
777a9ac8606Spatrick double erfinv(double __x) { return __ocml_erfinv_f64(__x); }
778a9ac8606Spatrick 
779ec727ea7Spatrick __DEVICE__
780a9ac8606Spatrick double exp(double __x) { return __ocml_exp_f64(__x); }
781a9ac8606Spatrick 
782ec727ea7Spatrick __DEVICE__
783a9ac8606Spatrick double exp10(double __x) { return __ocml_exp10_f64(__x); }
784a9ac8606Spatrick 
785ec727ea7Spatrick __DEVICE__
786a9ac8606Spatrick double exp2(double __x) { return __ocml_exp2_f64(__x); }
787a9ac8606Spatrick 
788ec727ea7Spatrick __DEVICE__
789a9ac8606Spatrick double expm1(double __x) { return __ocml_expm1_f64(__x); }
790a9ac8606Spatrick 
791ec727ea7Spatrick __DEVICE__
792*12c85518Srobert double fabs(double __x) { return __builtin_fabs(__x); }
793a9ac8606Spatrick 
794ec727ea7Spatrick __DEVICE__
795a9ac8606Spatrick double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); }
796a9ac8606Spatrick 
797ec727ea7Spatrick __DEVICE__
798a9ac8606Spatrick double floor(double __x) { return __ocml_floor_f64(__x); }
799a9ac8606Spatrick 
800ec727ea7Spatrick __DEVICE__
801a9ac8606Spatrick double fma(double __x, double __y, double __z) {
802ec727ea7Spatrick   return __ocml_fma_f64(__x, __y, __z);
803ec727ea7Spatrick }
804a9ac8606Spatrick 
805ec727ea7Spatrick __DEVICE__
806a9ac8606Spatrick double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); }
807a9ac8606Spatrick 
808ec727ea7Spatrick __DEVICE__
809a9ac8606Spatrick double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); }
810a9ac8606Spatrick 
811ec727ea7Spatrick __DEVICE__
812a9ac8606Spatrick double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
813a9ac8606Spatrick 
814ec727ea7Spatrick __DEVICE__
815a9ac8606Spatrick double frexp(double __x, int *__nptr) {
816ec727ea7Spatrick   int __tmp;
817a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__
818a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
819a9ac8606Spatrick #endif
820ec727ea7Spatrick   double __r =
821ec727ea7Spatrick       __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
822ec727ea7Spatrick   *__nptr = __tmp;
823ec727ea7Spatrick   return __r;
824ec727ea7Spatrick }
825a9ac8606Spatrick 
826ec727ea7Spatrick __DEVICE__
827a9ac8606Spatrick double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); }
828a9ac8606Spatrick 
829ec727ea7Spatrick __DEVICE__
830a9ac8606Spatrick int ilogb(double __x) { return __ocml_ilogb_f64(__x); }
831a9ac8606Spatrick 
832ec727ea7Spatrick __DEVICE__
833a9ac8606Spatrick __RETURN_TYPE __finite(double __x) { return __ocml_isfinite_f64(__x); }
834a9ac8606Spatrick 
835ec727ea7Spatrick __DEVICE__
836a9ac8606Spatrick __RETURN_TYPE __isinf(double __x) { return __ocml_isinf_f64(__x); }
837a9ac8606Spatrick 
838ec727ea7Spatrick __DEVICE__
839a9ac8606Spatrick __RETURN_TYPE __isnan(double __x) { return __ocml_isnan_f64(__x); }
840a9ac8606Spatrick 
841ec727ea7Spatrick __DEVICE__
842a9ac8606Spatrick double j0(double __x) { return __ocml_j0_f64(__x); }
843a9ac8606Spatrick 
844ec727ea7Spatrick __DEVICE__
845a9ac8606Spatrick double j1(double __x) { return __ocml_j1_f64(__x); }
846a9ac8606Spatrick 
847ec727ea7Spatrick __DEVICE__
848a9ac8606Spatrick double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication
849ec727ea7Spatrick                                  // and the Miller & Brown algorithm
850ec727ea7Spatrick   //       for linear recurrences to get O(log n) steps, but it's unclear if
851ec727ea7Spatrick   //       it'd be beneficial in this case. Placeholder until OCML adds
852ec727ea7Spatrick   //       support.
853ec727ea7Spatrick   if (__n == 0)
854a9ac8606Spatrick     return j0(__x);
855ec727ea7Spatrick   if (__n == 1)
856a9ac8606Spatrick     return j1(__x);
857ec727ea7Spatrick 
858a9ac8606Spatrick   double __x0 = j0(__x);
859a9ac8606Spatrick   double __x1 = j1(__x);
860ec727ea7Spatrick   for (int __i = 1; __i < __n; ++__i) {
861ec727ea7Spatrick     double __x2 = (2 * __i) / __x * __x1 - __x0;
862ec727ea7Spatrick     __x0 = __x1;
863ec727ea7Spatrick     __x1 = __x2;
864ec727ea7Spatrick   }
865ec727ea7Spatrick   return __x1;
866ec727ea7Spatrick }
867a9ac8606Spatrick 
868ec727ea7Spatrick __DEVICE__
869a9ac8606Spatrick double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); }
870a9ac8606Spatrick 
871ec727ea7Spatrick __DEVICE__
872a9ac8606Spatrick double lgamma(double __x) { return __ocml_lgamma_f64(__x); }
873a9ac8606Spatrick 
874ec727ea7Spatrick __DEVICE__
875a9ac8606Spatrick long long int llrint(double __x) { return __ocml_rint_f64(__x); }
876a9ac8606Spatrick 
877ec727ea7Spatrick __DEVICE__
878a9ac8606Spatrick long long int llround(double __x) { return __ocml_round_f64(__x); }
879a9ac8606Spatrick 
880ec727ea7Spatrick __DEVICE__
881a9ac8606Spatrick double log(double __x) { return __ocml_log_f64(__x); }
882a9ac8606Spatrick 
883ec727ea7Spatrick __DEVICE__
884a9ac8606Spatrick double log10(double __x) { return __ocml_log10_f64(__x); }
885a9ac8606Spatrick 
886ec727ea7Spatrick __DEVICE__
887a9ac8606Spatrick double log1p(double __x) { return __ocml_log1p_f64(__x); }
888a9ac8606Spatrick 
889ec727ea7Spatrick __DEVICE__
890a9ac8606Spatrick double log2(double __x) { return __ocml_log2_f64(__x); }
891a9ac8606Spatrick 
892ec727ea7Spatrick __DEVICE__
893a9ac8606Spatrick double logb(double __x) { return __ocml_logb_f64(__x); }
894a9ac8606Spatrick 
895ec727ea7Spatrick __DEVICE__
896a9ac8606Spatrick long int lrint(double __x) { return __ocml_rint_f64(__x); }
897a9ac8606Spatrick 
898ec727ea7Spatrick __DEVICE__
899a9ac8606Spatrick long int lround(double __x) { return __ocml_round_f64(__x); }
900a9ac8606Spatrick 
901ec727ea7Spatrick __DEVICE__
902a9ac8606Spatrick double modf(double __x, double *__iptr) {
903ec727ea7Spatrick   double __tmp;
904a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__
905a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
906a9ac8606Spatrick #endif
907ec727ea7Spatrick   double __r =
908ec727ea7Spatrick       __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
909ec727ea7Spatrick   *__iptr = __tmp;
910ec727ea7Spatrick 
911ec727ea7Spatrick   return __r;
912ec727ea7Spatrick }
913a9ac8606Spatrick 
914ec727ea7Spatrick __DEVICE__
915a9ac8606Spatrick double nan(const char *__tagp) {
916ec727ea7Spatrick #if !_WIN32
917ec727ea7Spatrick   union {
918ec727ea7Spatrick     double val;
919ec727ea7Spatrick     struct ieee_double {
920ec727ea7Spatrick       uint64_t mantissa : 51;
921ec727ea7Spatrick       uint32_t quiet : 1;
922ec727ea7Spatrick       uint32_t exponent : 11;
923ec727ea7Spatrick       uint32_t sign : 1;
924ec727ea7Spatrick     } bits;
925ec727ea7Spatrick   } __tmp;
926a9ac8606Spatrick   __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
927ec727ea7Spatrick 
928ec727ea7Spatrick   __tmp.bits.sign = 0u;
929ec727ea7Spatrick   __tmp.bits.exponent = ~0u;
930ec727ea7Spatrick   __tmp.bits.quiet = 1u;
931ec727ea7Spatrick   __tmp.bits.mantissa = __make_mantissa(__tagp);
932ec727ea7Spatrick 
933ec727ea7Spatrick   return __tmp.val;
934ec727ea7Spatrick #else
935a9ac8606Spatrick   __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
936a9ac8606Spatrick   uint64_t __val = __make_mantissa(__tagp);
937a9ac8606Spatrick   __val |= 0xFFF << 51;
938a9ac8606Spatrick   return *reinterpret_cast<double *>(&__val);
939ec727ea7Spatrick #endif
940ec727ea7Spatrick }
941a9ac8606Spatrick 
942ec727ea7Spatrick __DEVICE__
943a9ac8606Spatrick double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); }
944a9ac8606Spatrick 
945ec727ea7Spatrick __DEVICE__
946a9ac8606Spatrick double nextafter(double __x, double __y) {
947ec727ea7Spatrick   return __ocml_nextafter_f64(__x, __y);
948ec727ea7Spatrick }
949a9ac8606Spatrick 
950ec727ea7Spatrick __DEVICE__
951a9ac8606Spatrick double norm(int __dim,
952ec727ea7Spatrick             const double *__a) { // TODO: placeholder until OCML adds support.
953ec727ea7Spatrick   double __r = 0;
954ec727ea7Spatrick   while (__dim--) {
955ec727ea7Spatrick     __r += __a[0] * __a[0];
956ec727ea7Spatrick     ++__a;
957ec727ea7Spatrick   }
958ec727ea7Spatrick 
959ec727ea7Spatrick   return __ocml_sqrt_f64(__r);
960ec727ea7Spatrick }
961a9ac8606Spatrick 
962ec727ea7Spatrick __DEVICE__
963a9ac8606Spatrick double norm3d(double __x, double __y, double __z) {
964ec727ea7Spatrick   return __ocml_len3_f64(__x, __y, __z);
965ec727ea7Spatrick }
966a9ac8606Spatrick 
967ec727ea7Spatrick __DEVICE__
968a9ac8606Spatrick double norm4d(double __x, double __y, double __z, double __w) {
969ec727ea7Spatrick   return __ocml_len4_f64(__x, __y, __z, __w);
970ec727ea7Spatrick }
971a9ac8606Spatrick 
972ec727ea7Spatrick __DEVICE__
973a9ac8606Spatrick double normcdf(double __x) { return __ocml_ncdf_f64(__x); }
974a9ac8606Spatrick 
975ec727ea7Spatrick __DEVICE__
976a9ac8606Spatrick double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); }
977a9ac8606Spatrick 
978ec727ea7Spatrick __DEVICE__
979a9ac8606Spatrick double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); }
980a9ac8606Spatrick 
981ec727ea7Spatrick __DEVICE__
982a9ac8606Spatrick double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); }
983a9ac8606Spatrick 
984ec727ea7Spatrick __DEVICE__
985a9ac8606Spatrick double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); }
986a9ac8606Spatrick 
987a9ac8606Spatrick __DEVICE__
988a9ac8606Spatrick double remainder(double __x, double __y) {
989ec727ea7Spatrick   return __ocml_remainder_f64(__x, __y);
990ec727ea7Spatrick }
991a9ac8606Spatrick 
992ec727ea7Spatrick __DEVICE__
993a9ac8606Spatrick double remquo(double __x, double __y, int *__quo) {
994ec727ea7Spatrick   int __tmp;
995a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__
996a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
997a9ac8606Spatrick #endif
998ec727ea7Spatrick   double __r = __ocml_remquo_f64(
999ec727ea7Spatrick       __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
1000ec727ea7Spatrick   *__quo = __tmp;
1001ec727ea7Spatrick 
1002ec727ea7Spatrick   return __r;
1003ec727ea7Spatrick }
1004a9ac8606Spatrick 
1005ec727ea7Spatrick __DEVICE__
1006a9ac8606Spatrick double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); }
1007a9ac8606Spatrick 
1008ec727ea7Spatrick __DEVICE__
1009a9ac8606Spatrick double rint(double __x) { return __ocml_rint_f64(__x); }
1010a9ac8606Spatrick 
1011ec727ea7Spatrick __DEVICE__
1012a9ac8606Spatrick double rnorm(int __dim,
1013ec727ea7Spatrick              const double *__a) { // TODO: placeholder until OCML adds support.
1014ec727ea7Spatrick   double __r = 0;
1015ec727ea7Spatrick   while (__dim--) {
1016ec727ea7Spatrick     __r += __a[0] * __a[0];
1017ec727ea7Spatrick     ++__a;
1018ec727ea7Spatrick   }
1019ec727ea7Spatrick 
1020ec727ea7Spatrick   return __ocml_rsqrt_f64(__r);
1021ec727ea7Spatrick }
1022a9ac8606Spatrick 
1023ec727ea7Spatrick __DEVICE__
1024a9ac8606Spatrick double rnorm3d(double __x, double __y, double __z) {
1025ec727ea7Spatrick   return __ocml_rlen3_f64(__x, __y, __z);
1026ec727ea7Spatrick }
1027a9ac8606Spatrick 
1028ec727ea7Spatrick __DEVICE__
1029a9ac8606Spatrick double rnorm4d(double __x, double __y, double __z, double __w) {
1030ec727ea7Spatrick   return __ocml_rlen4_f64(__x, __y, __z, __w);
1031ec727ea7Spatrick }
1032a9ac8606Spatrick 
1033ec727ea7Spatrick __DEVICE__
1034a9ac8606Spatrick double round(double __x) { return __ocml_round_f64(__x); }
1035a9ac8606Spatrick 
1036ec727ea7Spatrick __DEVICE__
1037a9ac8606Spatrick double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }
1038a9ac8606Spatrick 
1039ec727ea7Spatrick __DEVICE__
1040a9ac8606Spatrick double scalbln(double __x, long int __n) {
1041ec727ea7Spatrick   return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n)
1042ec727ea7Spatrick                          : __ocml_scalb_f64(__x, __n);
1043ec727ea7Spatrick }
1044ec727ea7Spatrick __DEVICE__
1045a9ac8606Spatrick double scalbn(double __x, int __n) { return __ocml_scalbn_f64(__x, __n); }
1046a9ac8606Spatrick 
1047ec727ea7Spatrick __DEVICE__
1048a9ac8606Spatrick __RETURN_TYPE __signbit(double __x) { return __ocml_signbit_f64(__x); }
1049a9ac8606Spatrick 
1050ec727ea7Spatrick __DEVICE__
1051a9ac8606Spatrick double sin(double __x) { return __ocml_sin_f64(__x); }
1052a9ac8606Spatrick 
1053ec727ea7Spatrick __DEVICE__
1054a9ac8606Spatrick void sincos(double __x, double *__sinptr, double *__cosptr) {
1055ec727ea7Spatrick   double __tmp;
1056a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__
1057a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1058a9ac8606Spatrick #endif
1059ec727ea7Spatrick   *__sinptr = __ocml_sincos_f64(
1060ec727ea7Spatrick       __x, (__attribute__((address_space(5))) double *)&__tmp);
1061ec727ea7Spatrick   *__cosptr = __tmp;
1062ec727ea7Spatrick }
1063a9ac8606Spatrick 
1064ec727ea7Spatrick __DEVICE__
1065a9ac8606Spatrick void sincospi(double __x, double *__sinptr, double *__cosptr) {
1066ec727ea7Spatrick   double __tmp;
1067a9ac8606Spatrick #ifdef __OPENMP_AMDGCN__
1068a9ac8606Spatrick #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
1069a9ac8606Spatrick #endif
1070ec727ea7Spatrick   *__sinptr = __ocml_sincospi_f64(
1071ec727ea7Spatrick       __x, (__attribute__((address_space(5))) double *)&__tmp);
1072ec727ea7Spatrick   *__cosptr = __tmp;
1073ec727ea7Spatrick }
1074a9ac8606Spatrick 
1075ec727ea7Spatrick __DEVICE__
1076a9ac8606Spatrick double sinh(double __x) { return __ocml_sinh_f64(__x); }
1077a9ac8606Spatrick 
1078ec727ea7Spatrick __DEVICE__
1079a9ac8606Spatrick double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
1080a9ac8606Spatrick 
1081ec727ea7Spatrick __DEVICE__
1082a9ac8606Spatrick double sqrt(double __x) { return __ocml_sqrt_f64(__x); }
1083a9ac8606Spatrick 
1084ec727ea7Spatrick __DEVICE__
1085a9ac8606Spatrick double tan(double __x) { return __ocml_tan_f64(__x); }
1086a9ac8606Spatrick 
1087ec727ea7Spatrick __DEVICE__
1088a9ac8606Spatrick double tanh(double __x) { return __ocml_tanh_f64(__x); }
1089a9ac8606Spatrick 
1090ec727ea7Spatrick __DEVICE__
1091a9ac8606Spatrick double tgamma(double __x) { return __ocml_tgamma_f64(__x); }
1092a9ac8606Spatrick 
1093ec727ea7Spatrick __DEVICE__
1094a9ac8606Spatrick double trunc(double __x) { return __ocml_trunc_f64(__x); }
1095a9ac8606Spatrick 
1096ec727ea7Spatrick __DEVICE__
1097a9ac8606Spatrick double y0(double __x) { return __ocml_y0_f64(__x); }
1098a9ac8606Spatrick 
1099ec727ea7Spatrick __DEVICE__
1100a9ac8606Spatrick double y1(double __x) { return __ocml_y1_f64(__x); }
1101a9ac8606Spatrick 
1102ec727ea7Spatrick __DEVICE__
1103a9ac8606Spatrick double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication
1104ec727ea7Spatrick                                  // and the Miller & Brown algorithm
1105ec727ea7Spatrick   //       for linear recurrences to get O(log n) steps, but it's unclear if
1106ec727ea7Spatrick   //       it'd be beneficial in this case. Placeholder until OCML adds
1107ec727ea7Spatrick   //       support.
1108ec727ea7Spatrick   if (__n == 0)
1109a9ac8606Spatrick     return y0(__x);
1110ec727ea7Spatrick   if (__n == 1)
1111a9ac8606Spatrick     return y1(__x);
1112ec727ea7Spatrick 
1113a9ac8606Spatrick   double __x0 = y0(__x);
1114a9ac8606Spatrick   double __x1 = y1(__x);
1115ec727ea7Spatrick   for (int __i = 1; __i < __n; ++__i) {
1116ec727ea7Spatrick     double __x2 = (2 * __i) / __x * __x1 - __x0;
1117ec727ea7Spatrick     __x0 = __x1;
1118ec727ea7Spatrick     __x1 = __x2;
1119ec727ea7Spatrick   }
1120ec727ea7Spatrick 
1121ec727ea7Spatrick   return __x1;
1122ec727ea7Spatrick }
1123ec727ea7Spatrick 
1124ec727ea7Spatrick // BEGIN INTRINSICS
1125ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS
1126ec727ea7Spatrick __DEVICE__
1127a9ac8606Spatrick double __dadd_rd(double __x, double __y) {
1128ec727ea7Spatrick   return __ocml_add_rtn_f64(__x, __y);
1129ec727ea7Spatrick }
1130ec727ea7Spatrick __DEVICE__
1131a9ac8606Spatrick double __dadd_rn(double __x, double __y) {
1132a9ac8606Spatrick   return __ocml_add_rte_f64(__x, __y);
1133a9ac8606Spatrick }
1134ec727ea7Spatrick __DEVICE__
1135a9ac8606Spatrick double __dadd_ru(double __x, double __y) {
1136ec727ea7Spatrick   return __ocml_add_rtp_f64(__x, __y);
1137ec727ea7Spatrick }
1138ec727ea7Spatrick __DEVICE__
1139a9ac8606Spatrick double __dadd_rz(double __x, double __y) {
1140ec727ea7Spatrick   return __ocml_add_rtz_f64(__x, __y);
1141ec727ea7Spatrick }
1142a9ac8606Spatrick #else
1143ec727ea7Spatrick __DEVICE__
1144a9ac8606Spatrick double __dadd_rn(double __x, double __y) { return __x + __y; }
1145ec727ea7Spatrick #endif
1146a9ac8606Spatrick 
1147ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS
1148ec727ea7Spatrick __DEVICE__
1149a9ac8606Spatrick double __ddiv_rd(double __x, double __y) {
1150a9ac8606Spatrick   return __ocml_div_rtn_f64(__x, __y);
1151a9ac8606Spatrick }
1152a9ac8606Spatrick __DEVICE__
1153a9ac8606Spatrick double __ddiv_rn(double __x, double __y) {
1154a9ac8606Spatrick   return __ocml_div_rte_f64(__x, __y);
1155a9ac8606Spatrick }
1156a9ac8606Spatrick __DEVICE__
1157a9ac8606Spatrick double __ddiv_ru(double __x, double __y) {
1158ec727ea7Spatrick   return __ocml_div_rtp_f64(__x, __y);
1159ec727ea7Spatrick }
1160ec727ea7Spatrick __DEVICE__
1161a9ac8606Spatrick double __ddiv_rz(double __x, double __y) {
1162ec727ea7Spatrick   return __ocml_div_rtz_f64(__x, __y);
1163ec727ea7Spatrick }
1164a9ac8606Spatrick #else
1165ec727ea7Spatrick __DEVICE__
1166a9ac8606Spatrick double __ddiv_rn(double __x, double __y) { return __x / __y; }
1167ec727ea7Spatrick #endif
1168a9ac8606Spatrick 
1169ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS
1170ec727ea7Spatrick __DEVICE__
1171a9ac8606Spatrick double __dmul_rd(double __x, double __y) {
1172a9ac8606Spatrick   return __ocml_mul_rtn_f64(__x, __y);
1173a9ac8606Spatrick }
1174a9ac8606Spatrick __DEVICE__
1175a9ac8606Spatrick double __dmul_rn(double __x, double __y) {
1176a9ac8606Spatrick   return __ocml_mul_rte_f64(__x, __y);
1177a9ac8606Spatrick }
1178a9ac8606Spatrick __DEVICE__
1179a9ac8606Spatrick double __dmul_ru(double __x, double __y) {
1180ec727ea7Spatrick   return __ocml_mul_rtp_f64(__x, __y);
1181ec727ea7Spatrick }
1182ec727ea7Spatrick __DEVICE__
1183a9ac8606Spatrick double __dmul_rz(double __x, double __y) {
1184ec727ea7Spatrick   return __ocml_mul_rtz_f64(__x, __y);
1185ec727ea7Spatrick }
1186a9ac8606Spatrick #else
1187ec727ea7Spatrick __DEVICE__
1188a9ac8606Spatrick double __dmul_rn(double __x, double __y) { return __x * __y; }
1189ec727ea7Spatrick #endif
1190a9ac8606Spatrick 
1191ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS
1192ec727ea7Spatrick __DEVICE__
1193a9ac8606Spatrick double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); }
1194ec727ea7Spatrick __DEVICE__
1195a9ac8606Spatrick double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); }
1196ec727ea7Spatrick __DEVICE__
1197a9ac8606Spatrick double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); }
1198a9ac8606Spatrick __DEVICE__
1199a9ac8606Spatrick double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); }
1200a9ac8606Spatrick #else
1201a9ac8606Spatrick __DEVICE__
1202a9ac8606Spatrick double __drcp_rn(double __x) { return 1.0 / __x; }
1203ec727ea7Spatrick #endif
1204a9ac8606Spatrick 
1205ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS
1206ec727ea7Spatrick __DEVICE__
1207a9ac8606Spatrick double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); }
1208ec727ea7Spatrick __DEVICE__
1209a9ac8606Spatrick double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); }
1210ec727ea7Spatrick __DEVICE__
1211a9ac8606Spatrick double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); }
1212a9ac8606Spatrick __DEVICE__
1213a9ac8606Spatrick double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
1214a9ac8606Spatrick #else
1215a9ac8606Spatrick __DEVICE__
1216a9ac8606Spatrick double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); }
1217a9ac8606Spatrick #endif
1218a9ac8606Spatrick 
1219a9ac8606Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS
1220a9ac8606Spatrick __DEVICE__
1221a9ac8606Spatrick double __dsub_rd(double __x, double __y) {
1222ec727ea7Spatrick   return __ocml_sub_rtn_f64(__x, __y);
1223ec727ea7Spatrick }
1224ec727ea7Spatrick __DEVICE__
1225a9ac8606Spatrick double __dsub_rn(double __x, double __y) {
1226a9ac8606Spatrick   return __ocml_sub_rte_f64(__x, __y);
1227a9ac8606Spatrick }
1228ec727ea7Spatrick __DEVICE__
1229a9ac8606Spatrick double __dsub_ru(double __x, double __y) {
1230ec727ea7Spatrick   return __ocml_sub_rtp_f64(__x, __y);
1231ec727ea7Spatrick }
1232ec727ea7Spatrick __DEVICE__
1233a9ac8606Spatrick double __dsub_rz(double __x, double __y) {
1234ec727ea7Spatrick   return __ocml_sub_rtz_f64(__x, __y);
1235ec727ea7Spatrick }
1236a9ac8606Spatrick #else
1237ec727ea7Spatrick __DEVICE__
1238a9ac8606Spatrick double __dsub_rn(double __x, double __y) { return __x - __y; }
1239ec727ea7Spatrick #endif
1240a9ac8606Spatrick 
1241ec727ea7Spatrick #if defined OCML_BASIC_ROUNDED_OPERATIONS
1242ec727ea7Spatrick __DEVICE__
1243a9ac8606Spatrick double __fma_rd(double __x, double __y, double __z) {
1244a9ac8606Spatrick   return __ocml_fma_rtn_f64(__x, __y, __z);
1245a9ac8606Spatrick }
1246a9ac8606Spatrick __DEVICE__
1247a9ac8606Spatrick double __fma_rn(double __x, double __y, double __z) {
1248a9ac8606Spatrick   return __ocml_fma_rte_f64(__x, __y, __z);
1249a9ac8606Spatrick }
1250a9ac8606Spatrick __DEVICE__
1251a9ac8606Spatrick double __fma_ru(double __x, double __y, double __z) {
1252ec727ea7Spatrick   return __ocml_fma_rtp_f64(__x, __y, __z);
1253ec727ea7Spatrick }
1254ec727ea7Spatrick __DEVICE__
1255a9ac8606Spatrick double __fma_rz(double __x, double __y, double __z) {
1256ec727ea7Spatrick   return __ocml_fma_rtz_f64(__x, __y, __z);
1257ec727ea7Spatrick }
1258a9ac8606Spatrick #else
1259a9ac8606Spatrick __DEVICE__
1260a9ac8606Spatrick double __fma_rn(double __x, double __y, double __z) {
1261a9ac8606Spatrick   return __ocml_fma_f64(__x, __y, __z);
1262a9ac8606Spatrick }
1263ec727ea7Spatrick #endif
1264ec727ea7Spatrick // END INTRINSICS
1265ec727ea7Spatrick // END DOUBLE
1266ec727ea7Spatrick 
1267a9ac8606Spatrick // C only macros
1268a9ac8606Spatrick #if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1269a9ac8606Spatrick #define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
1270a9ac8606Spatrick #define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
1271a9ac8606Spatrick #define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
1272a9ac8606Spatrick #define signbit(__x)                                                           \
1273a9ac8606Spatrick   _Generic((__x), float : __signbitf, double : __signbit)(__x)
1274a9ac8606Spatrick #endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1275ec727ea7Spatrick 
1276ec727ea7Spatrick #if defined(__cplusplus)
1277a9ac8606Spatrick template <class T> __DEVICE__ T min(T __arg1, T __arg2) {
1278ec727ea7Spatrick   return (__arg1 < __arg2) ? __arg1 : __arg2;
1279ec727ea7Spatrick }
1280ec727ea7Spatrick 
1281a9ac8606Spatrick template <class T> __DEVICE__ T max(T __arg1, T __arg2) {
1282ec727ea7Spatrick   return (__arg1 > __arg2) ? __arg1 : __arg2;
1283ec727ea7Spatrick }
1284ec727ea7Spatrick 
1285a9ac8606Spatrick __DEVICE__ int min(int __arg1, int __arg2) {
1286ec727ea7Spatrick   return (__arg1 < __arg2) ? __arg1 : __arg2;
1287ec727ea7Spatrick }
1288a9ac8606Spatrick __DEVICE__ int max(int __arg1, int __arg2) {
1289ec727ea7Spatrick   return (__arg1 > __arg2) ? __arg1 : __arg2;
1290ec727ea7Spatrick }
1291ec727ea7Spatrick 
1292ec727ea7Spatrick __DEVICE__
1293a9ac8606Spatrick float max(float __x, float __y) { return fmaxf(__x, __y); }
1294ec727ea7Spatrick 
1295ec727ea7Spatrick __DEVICE__
1296a9ac8606Spatrick double max(double __x, double __y) { return fmax(__x, __y); }
1297ec727ea7Spatrick 
1298ec727ea7Spatrick __DEVICE__
1299a9ac8606Spatrick float min(float __x, float __y) { return fminf(__x, __y); }
1300ec727ea7Spatrick 
1301ec727ea7Spatrick __DEVICE__
1302a9ac8606Spatrick double min(double __x, double __y) { return fmin(__x, __y); }
1303ec727ea7Spatrick 
1304a9ac8606Spatrick #if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1305ec727ea7Spatrick __host__ inline static int min(int __arg1, int __arg2) {
1306ec727ea7Spatrick   return std::min(__arg1, __arg2);
1307ec727ea7Spatrick }
1308ec727ea7Spatrick 
1309ec727ea7Spatrick __host__ inline static int max(int __arg1, int __arg2) {
1310ec727ea7Spatrick   return std::max(__arg1, __arg2);
1311ec727ea7Spatrick }
1312a9ac8606Spatrick #endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1313a9ac8606Spatrick #endif
1314ec727ea7Spatrick 
1315ec727ea7Spatrick #pragma pop_macro("__DEVICE__")
1316ec727ea7Spatrick #pragma pop_macro("__RETURN_TYPE")
1317ec727ea7Spatrick 
1318ec727ea7Spatrick #endif // __CLANG_HIP_MATH_H__
1319