10b57cec5SDimitry Andric /*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---===
20b57cec5SDimitry Andric  *
30b57cec5SDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric  *
70b57cec5SDimitry Andric  *===-----------------------------------------------------------------------===
80b57cec5SDimitry Andric  */
90b57cec5SDimitry Andric 
100b57cec5SDimitry Andric #ifndef __CLANG_CUDA_COMPLEX_BUILTINS
110b57cec5SDimitry Andric #define __CLANG_CUDA_COMPLEX_BUILTINS
120b57cec5SDimitry Andric 
130b57cec5SDimitry Andric // This header defines __muldc3, __mulsc3, __divdc3, and __divsc3.  These are
140b57cec5SDimitry Andric // libgcc functions that clang assumes are available when compiling c99 complex
150b57cec5SDimitry Andric // operations.  (These implementations come from libc++, and have been modified
165ffd83dbSDimitry Andric // to work with CUDA and OpenMP target offloading [in C and C++ mode].)
170b57cec5SDimitry Andric 
185ffd83dbSDimitry Andric #pragma push_macro("__DEVICE__")
19349cc55cSDimitry Andric #if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
205ffd83dbSDimitry Andric #pragma omp declare target
215ffd83dbSDimitry Andric #define __DEVICE__ __attribute__((noinline, nothrow, cold, weak))
225ffd83dbSDimitry Andric #else
235ffd83dbSDimitry Andric #define __DEVICE__ __device__ inline
245ffd83dbSDimitry Andric #endif
255ffd83dbSDimitry Andric 
265ffd83dbSDimitry Andric // To make the algorithms available for C and C++ in CUDA and OpenMP we select
275ffd83dbSDimitry Andric // different but equivalent function versions. TODO: For OpenMP we currently
285ffd83dbSDimitry Andric // select the native builtins as the overload support for templates is lacking.
29349cc55cSDimitry Andric #if !defined(__OPENMP_NVPTX__) && !defined(__OPENMP_AMDGCN__)
305ffd83dbSDimitry Andric #define _ISNANd std::isnan
315ffd83dbSDimitry Andric #define _ISNANf std::isnan
325ffd83dbSDimitry Andric #define _ISINFd std::isinf
335ffd83dbSDimitry Andric #define _ISINFf std::isinf
345ffd83dbSDimitry Andric #define _ISFINITEd std::isfinite
355ffd83dbSDimitry Andric #define _ISFINITEf std::isfinite
365ffd83dbSDimitry Andric #define _COPYSIGNd std::copysign
375ffd83dbSDimitry Andric #define _COPYSIGNf std::copysign
385ffd83dbSDimitry Andric #define _SCALBNd std::scalbn
395ffd83dbSDimitry Andric #define _SCALBNf std::scalbn
405ffd83dbSDimitry Andric #define _ABSd std::abs
415ffd83dbSDimitry Andric #define _ABSf std::abs
425ffd83dbSDimitry Andric #define _LOGBd std::logb
435ffd83dbSDimitry Andric #define _LOGBf std::logb
44e8d8bef9SDimitry Andric // Rather than pulling in std::max from algorithm everytime, use available ::max.
45e8d8bef9SDimitry Andric #define _fmaxd max
46e8d8bef9SDimitry Andric #define _fmaxf max
47e8d8bef9SDimitry Andric #else
48e8d8bef9SDimitry Andric #ifdef __AMDGCN__
49e8d8bef9SDimitry Andric #define _ISNANd __ocml_isnan_f64
50e8d8bef9SDimitry Andric #define _ISNANf __ocml_isnan_f32
51e8d8bef9SDimitry Andric #define _ISINFd __ocml_isinf_f64
52e8d8bef9SDimitry Andric #define _ISINFf __ocml_isinf_f32
53e8d8bef9SDimitry Andric #define _ISFINITEd __ocml_isfinite_f64
54e8d8bef9SDimitry Andric #define _ISFINITEf __ocml_isfinite_f32
55e8d8bef9SDimitry Andric #define _COPYSIGNd __ocml_copysign_f64
56e8d8bef9SDimitry Andric #define _COPYSIGNf __ocml_copysign_f32
57e8d8bef9SDimitry Andric #define _SCALBNd __ocml_scalbn_f64
58e8d8bef9SDimitry Andric #define _SCALBNf __ocml_scalbn_f32
59e8d8bef9SDimitry Andric #define _ABSd __ocml_fabs_f64
60e8d8bef9SDimitry Andric #define _ABSf __ocml_fabs_f32
61e8d8bef9SDimitry Andric #define _LOGBd __ocml_logb_f64
62e8d8bef9SDimitry Andric #define _LOGBf __ocml_logb_f32
63e8d8bef9SDimitry Andric #define _fmaxd __ocml_fmax_f64
64e8d8bef9SDimitry Andric #define _fmaxf __ocml_fmax_f32
655ffd83dbSDimitry Andric #else
665ffd83dbSDimitry Andric #define _ISNANd __nv_isnand
675ffd83dbSDimitry Andric #define _ISNANf __nv_isnanf
685ffd83dbSDimitry Andric #define _ISINFd __nv_isinfd
695ffd83dbSDimitry Andric #define _ISINFf __nv_isinff
705ffd83dbSDimitry Andric #define _ISFINITEd __nv_isfinited
715ffd83dbSDimitry Andric #define _ISFINITEf __nv_finitef
725ffd83dbSDimitry Andric #define _COPYSIGNd __nv_copysign
735ffd83dbSDimitry Andric #define _COPYSIGNf __nv_copysignf
745ffd83dbSDimitry Andric #define _SCALBNd __nv_scalbn
755ffd83dbSDimitry Andric #define _SCALBNf __nv_scalbnf
765ffd83dbSDimitry Andric #define _ABSd __nv_fabs
775ffd83dbSDimitry Andric #define _ABSf __nv_fabsf
785ffd83dbSDimitry Andric #define _LOGBd __nv_logb
795ffd83dbSDimitry Andric #define _LOGBf __nv_logbf
80e8d8bef9SDimitry Andric #define _fmaxd __nv_fmax
81e8d8bef9SDimitry Andric #define _fmaxf __nv_fmaxf
82e8d8bef9SDimitry Andric #endif
835ffd83dbSDimitry Andric #endif
845ffd83dbSDimitry Andric 
855ffd83dbSDimitry Andric #if defined(__cplusplus)
865ffd83dbSDimitry Andric extern "C" {
875ffd83dbSDimitry Andric #endif
885ffd83dbSDimitry Andric 
__muldc3(double __a,double __b,double __c,double __d)895ffd83dbSDimitry Andric __DEVICE__ double _Complex __muldc3(double __a, double __b, double __c,
905ffd83dbSDimitry Andric                                     double __d) {
910b57cec5SDimitry Andric   double __ac = __a * __c;
920b57cec5SDimitry Andric   double __bd = __b * __d;
930b57cec5SDimitry Andric   double __ad = __a * __d;
940b57cec5SDimitry Andric   double __bc = __b * __c;
950b57cec5SDimitry Andric   double _Complex z;
960b57cec5SDimitry Andric   __real__(z) = __ac - __bd;
970b57cec5SDimitry Andric   __imag__(z) = __ad + __bc;
985ffd83dbSDimitry Andric   if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) {
990b57cec5SDimitry Andric     int __recalc = 0;
1005ffd83dbSDimitry Andric     if (_ISINFd(__a) || _ISINFd(__b)) {
1015ffd83dbSDimitry Andric       __a = _COPYSIGNd(_ISINFd(__a) ? 1 : 0, __a);
1025ffd83dbSDimitry Andric       __b = _COPYSIGNd(_ISINFd(__b) ? 1 : 0, __b);
1035ffd83dbSDimitry Andric       if (_ISNANd(__c))
1045ffd83dbSDimitry Andric         __c = _COPYSIGNd(0, __c);
1055ffd83dbSDimitry Andric       if (_ISNANd(__d))
1065ffd83dbSDimitry Andric         __d = _COPYSIGNd(0, __d);
1070b57cec5SDimitry Andric       __recalc = 1;
1080b57cec5SDimitry Andric     }
1095ffd83dbSDimitry Andric     if (_ISINFd(__c) || _ISINFd(__d)) {
1105ffd83dbSDimitry Andric       __c = _COPYSIGNd(_ISINFd(__c) ? 1 : 0, __c);
1115ffd83dbSDimitry Andric       __d = _COPYSIGNd(_ISINFd(__d) ? 1 : 0, __d);
1125ffd83dbSDimitry Andric       if (_ISNANd(__a))
1135ffd83dbSDimitry Andric         __a = _COPYSIGNd(0, __a);
1145ffd83dbSDimitry Andric       if (_ISNANd(__b))
1155ffd83dbSDimitry Andric         __b = _COPYSIGNd(0, __b);
1160b57cec5SDimitry Andric       __recalc = 1;
1170b57cec5SDimitry Andric     }
1185ffd83dbSDimitry Andric     if (!__recalc &&
1195ffd83dbSDimitry Andric         (_ISINFd(__ac) || _ISINFd(__bd) || _ISINFd(__ad) || _ISINFd(__bc))) {
1205ffd83dbSDimitry Andric       if (_ISNANd(__a))
1215ffd83dbSDimitry Andric         __a = _COPYSIGNd(0, __a);
1225ffd83dbSDimitry Andric       if (_ISNANd(__b))
1235ffd83dbSDimitry Andric         __b = _COPYSIGNd(0, __b);
1245ffd83dbSDimitry Andric       if (_ISNANd(__c))
1255ffd83dbSDimitry Andric         __c = _COPYSIGNd(0, __c);
1265ffd83dbSDimitry Andric       if (_ISNANd(__d))
1275ffd83dbSDimitry Andric         __d = _COPYSIGNd(0, __d);
1280b57cec5SDimitry Andric       __recalc = 1;
1290b57cec5SDimitry Andric     }
1300b57cec5SDimitry Andric     if (__recalc) {
1310b57cec5SDimitry Andric       // Can't use std::numeric_limits<double>::infinity() -- that doesn't have
1320b57cec5SDimitry Andric       // a device overload (and isn't constexpr before C++11, naturally).
1335ffd83dbSDimitry Andric       __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d);
1345ffd83dbSDimitry Andric       __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c);
1350b57cec5SDimitry Andric     }
1360b57cec5SDimitry Andric   }
1370b57cec5SDimitry Andric   return z;
1380b57cec5SDimitry Andric }
1390b57cec5SDimitry Andric 
__mulsc3(float __a,float __b,float __c,float __d)1405ffd83dbSDimitry Andric __DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) {
1410b57cec5SDimitry Andric   float __ac = __a * __c;
1420b57cec5SDimitry Andric   float __bd = __b * __d;
1430b57cec5SDimitry Andric   float __ad = __a * __d;
1440b57cec5SDimitry Andric   float __bc = __b * __c;
1450b57cec5SDimitry Andric   float _Complex z;
1460b57cec5SDimitry Andric   __real__(z) = __ac - __bd;
1470b57cec5SDimitry Andric   __imag__(z) = __ad + __bc;
1485ffd83dbSDimitry Andric   if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) {
1490b57cec5SDimitry Andric     int __recalc = 0;
1505ffd83dbSDimitry Andric     if (_ISINFf(__a) || _ISINFf(__b)) {
1515ffd83dbSDimitry Andric       __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a);
1525ffd83dbSDimitry Andric       __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b);
1535ffd83dbSDimitry Andric       if (_ISNANf(__c))
1545ffd83dbSDimitry Andric         __c = _COPYSIGNf(0, __c);
1555ffd83dbSDimitry Andric       if (_ISNANf(__d))
1565ffd83dbSDimitry Andric         __d = _COPYSIGNf(0, __d);
1570b57cec5SDimitry Andric       __recalc = 1;
1580b57cec5SDimitry Andric     }
1595ffd83dbSDimitry Andric     if (_ISINFf(__c) || _ISINFf(__d)) {
1605ffd83dbSDimitry Andric       __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c);
1615ffd83dbSDimitry Andric       __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d);
1625ffd83dbSDimitry Andric       if (_ISNANf(__a))
1635ffd83dbSDimitry Andric         __a = _COPYSIGNf(0, __a);
1645ffd83dbSDimitry Andric       if (_ISNANf(__b))
1655ffd83dbSDimitry Andric         __b = _COPYSIGNf(0, __b);
1660b57cec5SDimitry Andric       __recalc = 1;
1670b57cec5SDimitry Andric     }
1685ffd83dbSDimitry Andric     if (!__recalc &&
1695ffd83dbSDimitry Andric         (_ISINFf(__ac) || _ISINFf(__bd) || _ISINFf(__ad) || _ISINFf(__bc))) {
1705ffd83dbSDimitry Andric       if (_ISNANf(__a))
1715ffd83dbSDimitry Andric         __a = _COPYSIGNf(0, __a);
1725ffd83dbSDimitry Andric       if (_ISNANf(__b))
1735ffd83dbSDimitry Andric         __b = _COPYSIGNf(0, __b);
1745ffd83dbSDimitry Andric       if (_ISNANf(__c))
1755ffd83dbSDimitry Andric         __c = _COPYSIGNf(0, __c);
1765ffd83dbSDimitry Andric       if (_ISNANf(__d))
1775ffd83dbSDimitry Andric         __d = _COPYSIGNf(0, __d);
1780b57cec5SDimitry Andric       __recalc = 1;
1790b57cec5SDimitry Andric     }
1800b57cec5SDimitry Andric     if (__recalc) {
1810b57cec5SDimitry Andric       __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);
1820b57cec5SDimitry Andric       __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);
1830b57cec5SDimitry Andric     }
1840b57cec5SDimitry Andric   }
1850b57cec5SDimitry Andric   return z;
1860b57cec5SDimitry Andric }
1870b57cec5SDimitry Andric 
__divdc3(double __a,double __b,double __c,double __d)1885ffd83dbSDimitry Andric __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c,
1895ffd83dbSDimitry Andric                                     double __d) {
1900b57cec5SDimitry Andric   int __ilogbw = 0;
1910b57cec5SDimitry Andric   // Can't use std::max, because that's defined in <algorithm>, and we don't
1920b57cec5SDimitry Andric   // want to pull that in for every compile.  The CUDA headers define
1930b57cec5SDimitry Andric   // ::max(float, float) and ::max(double, double), which is sufficient for us.
194e8d8bef9SDimitry Andric   double __logbw = _LOGBd(_fmaxd(_ABSd(__c), _ABSd(__d)));
1955ffd83dbSDimitry Andric   if (_ISFINITEd(__logbw)) {
1960b57cec5SDimitry Andric     __ilogbw = (int)__logbw;
1975ffd83dbSDimitry Andric     __c = _SCALBNd(__c, -__ilogbw);
1985ffd83dbSDimitry Andric     __d = _SCALBNd(__d, -__ilogbw);
1990b57cec5SDimitry Andric   }
2000b57cec5SDimitry Andric   double __denom = __c * __c + __d * __d;
2010b57cec5SDimitry Andric   double _Complex z;
2025ffd83dbSDimitry Andric   __real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw);
2035ffd83dbSDimitry Andric   __imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw);
2045ffd83dbSDimitry Andric   if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) {
2055ffd83dbSDimitry Andric     if ((__denom == 0.0) && (!_ISNANd(__a) || !_ISNANd(__b))) {
2065ffd83dbSDimitry Andric       __real__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __a;
2075ffd83dbSDimitry Andric       __imag__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __b;
2085ffd83dbSDimitry Andric     } else if ((_ISINFd(__a) || _ISINFd(__b)) && _ISFINITEd(__c) &&
2095ffd83dbSDimitry Andric                _ISFINITEd(__d)) {
2105ffd83dbSDimitry Andric       __a = _COPYSIGNd(_ISINFd(__a) ? 1.0 : 0.0, __a);
2115ffd83dbSDimitry Andric       __b = _COPYSIGNd(_ISINFd(__b) ? 1.0 : 0.0, __b);
2125ffd83dbSDimitry Andric       __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d);
2135ffd83dbSDimitry Andric       __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d);
2145ffd83dbSDimitry Andric     } else if (_ISINFd(__logbw) && __logbw > 0.0 && _ISFINITEd(__a) &&
2155ffd83dbSDimitry Andric                _ISFINITEd(__b)) {
2165ffd83dbSDimitry Andric       __c = _COPYSIGNd(_ISINFd(__c) ? 1.0 : 0.0, __c);
2175ffd83dbSDimitry Andric       __d = _COPYSIGNd(_ISINFd(__d) ? 1.0 : 0.0, __d);
2180b57cec5SDimitry Andric       __real__(z) = 0.0 * (__a * __c + __b * __d);
2190b57cec5SDimitry Andric       __imag__(z) = 0.0 * (__b * __c - __a * __d);
2200b57cec5SDimitry Andric     }
2210b57cec5SDimitry Andric   }
2220b57cec5SDimitry Andric   return z;
2230b57cec5SDimitry Andric }
2240b57cec5SDimitry Andric 
__divsc3(float __a,float __b,float __c,float __d)2255ffd83dbSDimitry Andric __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
2260b57cec5SDimitry Andric   int __ilogbw = 0;
227e8d8bef9SDimitry Andric   float __logbw = _LOGBf(_fmaxf(_ABSf(__c), _ABSf(__d)));
2285ffd83dbSDimitry Andric   if (_ISFINITEf(__logbw)) {
2290b57cec5SDimitry Andric     __ilogbw = (int)__logbw;
2305ffd83dbSDimitry Andric     __c = _SCALBNf(__c, -__ilogbw);
2315ffd83dbSDimitry Andric     __d = _SCALBNf(__d, -__ilogbw);
2320b57cec5SDimitry Andric   }
2330b57cec5SDimitry Andric   float __denom = __c * __c + __d * __d;
2340b57cec5SDimitry Andric   float _Complex z;
2355ffd83dbSDimitry Andric   __real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw);
2365ffd83dbSDimitry Andric   __imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw);
2375ffd83dbSDimitry Andric   if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) {
2385ffd83dbSDimitry Andric     if ((__denom == 0) && (!_ISNANf(__a) || !_ISNANf(__b))) {
2395ffd83dbSDimitry Andric       __real__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __a;
2405ffd83dbSDimitry Andric       __imag__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __b;
2415ffd83dbSDimitry Andric     } else if ((_ISINFf(__a) || _ISINFf(__b)) && _ISFINITEf(__c) &&
2425ffd83dbSDimitry Andric                _ISFINITEf(__d)) {
2435ffd83dbSDimitry Andric       __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a);
2445ffd83dbSDimitry Andric       __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b);
2450b57cec5SDimitry Andric       __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
2460b57cec5SDimitry Andric       __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
2475ffd83dbSDimitry Andric     } else if (_ISINFf(__logbw) && __logbw > 0 && _ISFINITEf(__a) &&
2485ffd83dbSDimitry Andric                _ISFINITEf(__b)) {
2495ffd83dbSDimitry Andric       __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c);
2505ffd83dbSDimitry Andric       __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d);
2510b57cec5SDimitry Andric       __real__(z) = 0 * (__a * __c + __b * __d);
2520b57cec5SDimitry Andric       __imag__(z) = 0 * (__b * __c - __a * __d);
2530b57cec5SDimitry Andric     }
2540b57cec5SDimitry Andric   }
2550b57cec5SDimitry Andric   return z;
2560b57cec5SDimitry Andric }
2570b57cec5SDimitry Andric 
2585ffd83dbSDimitry Andric #if defined(__cplusplus)
2595ffd83dbSDimitry Andric } // extern "C"
2605ffd83dbSDimitry Andric #endif
2615ffd83dbSDimitry Andric 
2625ffd83dbSDimitry Andric #undef _ISNANd
2635ffd83dbSDimitry Andric #undef _ISNANf
2645ffd83dbSDimitry Andric #undef _ISINFd
2655ffd83dbSDimitry Andric #undef _ISINFf
2665ffd83dbSDimitry Andric #undef _COPYSIGNd
2675ffd83dbSDimitry Andric #undef _COPYSIGNf
2685ffd83dbSDimitry Andric #undef _ISFINITEd
2695ffd83dbSDimitry Andric #undef _ISFINITEf
2705ffd83dbSDimitry Andric #undef _SCALBNd
2715ffd83dbSDimitry Andric #undef _SCALBNf
2725ffd83dbSDimitry Andric #undef _ABSd
2735ffd83dbSDimitry Andric #undef _ABSf
2745ffd83dbSDimitry Andric #undef _LOGBd
2755ffd83dbSDimitry Andric #undef _LOGBf
276e8d8bef9SDimitry Andric #undef _fmaxd
277e8d8bef9SDimitry Andric #undef _fmaxf
2785ffd83dbSDimitry Andric 
279349cc55cSDimitry Andric #if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__)
2805ffd83dbSDimitry Andric #pragma omp end declare target
2815ffd83dbSDimitry Andric #endif
2825ffd83dbSDimitry Andric 
2835ffd83dbSDimitry Andric #pragma pop_macro("__DEVICE__")
2845ffd83dbSDimitry Andric 
2850b57cec5SDimitry Andric #endif // __CLANG_CUDA_COMPLEX_BUILTINS
286