1 /*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __CLANG_CUDA_COMPLEX_BUILTINS 11 #define __CLANG_CUDA_COMPLEX_BUILTINS 12 13 // This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are 14 // libgcc functions that clang assumes are available when compiling c99 complex 15 // operations. (These implementations come from libc++, and have been modified 16 // to work with CUDA and OpenMP target offloading [in C and C++ mode].) 17 18 #pragma push_macro("__DEVICE__") 19 #ifdef _OPENMP 20 #pragma omp declare target 21 #define __DEVICE__ __attribute__((noinline, nothrow, cold, weak)) 22 #else 23 #define __DEVICE__ __device__ inline 24 #endif 25 26 // To make the algorithms available for C and C++ in CUDA and OpenMP we select 27 // different but equivalent function versions. TODO: For OpenMP we currently 28 // select the native builtins as the overload support for templates is lacking. 29 #if !defined(_OPENMP) 30 #define _ISNANd std::isnan 31 #define _ISNANf std::isnan 32 #define _ISINFd std::isinf 33 #define _ISINFf std::isinf 34 #define _ISFINITEd std::isfinite 35 #define _ISFINITEf std::isfinite 36 #define _COPYSIGNd std::copysign 37 #define _COPYSIGNf std::copysign 38 #define _SCALBNd std::scalbn 39 #define _SCALBNf std::scalbn 40 #define _ABSd std::abs 41 #define _ABSf std::abs 42 #define _LOGBd std::logb 43 #define _LOGBf std::logb 44 #else 45 #define _ISNANd __nv_isnand 46 #define _ISNANf __nv_isnanf 47 #define _ISINFd __nv_isinfd 48 #define _ISINFf __nv_isinff 49 #define _ISFINITEd __nv_isfinited 50 #define _ISFINITEf __nv_finitef 51 #define _COPYSIGNd __nv_copysign 52 #define _COPYSIGNf __nv_copysignf 53 #define _SCALBNd __nv_scalbn 54 #define _SCALBNf __nv_scalbnf 55 #define _ABSd __nv_fabs 56 #define _ABSf __nv_fabsf 57 #define _LOGBd __nv_logb 58 #define _LOGBf __nv_logbf 59 #endif 60 61 #if defined(__cplusplus) 62 extern "C" { 63 #endif 64 65 __DEVICE__ double _Complex __muldc3(double __a, double __b, double __c, 66 double __d) { 67 double __ac = __a * __c; 68 double __bd = __b * __d; 69 double __ad = __a * __d; 70 double __bc = __b * __c; 71 double _Complex z; 72 __real__(z) = __ac - __bd; 73 __imag__(z) = __ad + __bc; 74 if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) { 75 int __recalc = 0; 76 if (_ISINFd(__a) || _ISINFd(__b)) { 77 __a = _COPYSIGNd(_ISINFd(__a) ? 1 : 0, __a); 78 __b = _COPYSIGNd(_ISINFd(__b) ? 1 : 0, __b); 79 if (_ISNANd(__c)) 80 __c = _COPYSIGNd(0, __c); 81 if (_ISNANd(__d)) 82 __d = _COPYSIGNd(0, __d); 83 __recalc = 1; 84 } 85 if (_ISINFd(__c) || _ISINFd(__d)) { 86 __c = _COPYSIGNd(_ISINFd(__c) ? 1 : 0, __c); 87 __d = _COPYSIGNd(_ISINFd(__d) ? 1 : 0, __d); 88 if (_ISNANd(__a)) 89 __a = _COPYSIGNd(0, __a); 90 if (_ISNANd(__b)) 91 __b = _COPYSIGNd(0, __b); 92 __recalc = 1; 93 } 94 if (!__recalc && 95 (_ISINFd(__ac) || _ISINFd(__bd) || _ISINFd(__ad) || _ISINFd(__bc))) { 96 if (_ISNANd(__a)) 97 __a = _COPYSIGNd(0, __a); 98 if (_ISNANd(__b)) 99 __b = _COPYSIGNd(0, __b); 100 if (_ISNANd(__c)) 101 __c = _COPYSIGNd(0, __c); 102 if (_ISNANd(__d)) 103 __d = _COPYSIGNd(0, __d); 104 __recalc = 1; 105 } 106 if (__recalc) { 107 // Can't use std::numeric_limits<double>::infinity() -- that doesn't have 108 // a device overload (and isn't constexpr before C++11, naturally). 109 __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d); 110 __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c); 111 } 112 } 113 return z; 114 } 115 116 __DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) { 117 float __ac = __a * __c; 118 float __bd = __b * __d; 119 float __ad = __a * __d; 120 float __bc = __b * __c; 121 float _Complex z; 122 __real__(z) = __ac - __bd; 123 __imag__(z) = __ad + __bc; 124 if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) { 125 int __recalc = 0; 126 if (_ISINFf(__a) || _ISINFf(__b)) { 127 __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a); 128 __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b); 129 if (_ISNANf(__c)) 130 __c = _COPYSIGNf(0, __c); 131 if (_ISNANf(__d)) 132 __d = _COPYSIGNf(0, __d); 133 __recalc = 1; 134 } 135 if (_ISINFf(__c) || _ISINFf(__d)) { 136 __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c); 137 __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d); 138 if (_ISNANf(__a)) 139 __a = _COPYSIGNf(0, __a); 140 if (_ISNANf(__b)) 141 __b = _COPYSIGNf(0, __b); 142 __recalc = 1; 143 } 144 if (!__recalc && 145 (_ISINFf(__ac) || _ISINFf(__bd) || _ISINFf(__ad) || _ISINFf(__bc))) { 146 if (_ISNANf(__a)) 147 __a = _COPYSIGNf(0, __a); 148 if (_ISNANf(__b)) 149 __b = _COPYSIGNf(0, __b); 150 if (_ISNANf(__c)) 151 __c = _COPYSIGNf(0, __c); 152 if (_ISNANf(__d)) 153 __d = _COPYSIGNf(0, __d); 154 __recalc = 1; 155 } 156 if (__recalc) { 157 __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d); 158 __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c); 159 } 160 } 161 return z; 162 } 163 164 __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c, 165 double __d) { 166 int __ilogbw = 0; 167 // Can't use std::max, because that's defined in <algorithm>, and we don't 168 // want to pull that in for every compile. The CUDA headers define 169 // ::max(float, float) and ::max(double, double), which is sufficient for us. 170 double __logbw = _LOGBd(max(_ABSd(__c), _ABSd(__d))); 171 if (_ISFINITEd(__logbw)) { 172 __ilogbw = (int)__logbw; 173 __c = _SCALBNd(__c, -__ilogbw); 174 __d = _SCALBNd(__d, -__ilogbw); 175 } 176 double __denom = __c * __c + __d * __d; 177 double _Complex z; 178 __real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw); 179 __imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw); 180 if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) { 181 if ((__denom == 0.0) && (!_ISNANd(__a) || !_ISNANd(__b))) { 182 __real__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __a; 183 __imag__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __b; 184 } else if ((_ISINFd(__a) || _ISINFd(__b)) && _ISFINITEd(__c) && 185 _ISFINITEd(__d)) { 186 __a = _COPYSIGNd(_ISINFd(__a) ? 1.0 : 0.0, __a); 187 __b = _COPYSIGNd(_ISINFd(__b) ? 1.0 : 0.0, __b); 188 __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d); 189 __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d); 190 } else if (_ISINFd(__logbw) && __logbw > 0.0 && _ISFINITEd(__a) && 191 _ISFINITEd(__b)) { 192 __c = _COPYSIGNd(_ISINFd(__c) ? 1.0 : 0.0, __c); 193 __d = _COPYSIGNd(_ISINFd(__d) ? 1.0 : 0.0, __d); 194 __real__(z) = 0.0 * (__a * __c + __b * __d); 195 __imag__(z) = 0.0 * (__b * __c - __a * __d); 196 } 197 } 198 return z; 199 } 200 201 __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) { 202 int __ilogbw = 0; 203 float __logbw = _LOGBf(max(_ABSf(__c), _ABSf(__d))); 204 if (_ISFINITEf(__logbw)) { 205 __ilogbw = (int)__logbw; 206 __c = _SCALBNf(__c, -__ilogbw); 207 __d = _SCALBNf(__d, -__ilogbw); 208 } 209 float __denom = __c * __c + __d * __d; 210 float _Complex z; 211 __real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw); 212 __imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw); 213 if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) { 214 if ((__denom == 0) && (!_ISNANf(__a) || !_ISNANf(__b))) { 215 __real__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __a; 216 __imag__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __b; 217 } else if ((_ISINFf(__a) || _ISINFf(__b)) && _ISFINITEf(__c) && 218 _ISFINITEf(__d)) { 219 __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a); 220 __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b); 221 __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d); 222 __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d); 223 } else if (_ISINFf(__logbw) && __logbw > 0 && _ISFINITEf(__a) && 224 _ISFINITEf(__b)) { 225 __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c); 226 __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d); 227 __real__(z) = 0 * (__a * __c + __b * __d); 228 __imag__(z) = 0 * (__b * __c - __a * __d); 229 } 230 } 231 return z; 232 } 233 234 #if defined(__cplusplus) 235 } // extern "C" 236 #endif 237 238 #undef _ISNANd 239 #undef _ISNANf 240 #undef _ISINFd 241 #undef _ISINFf 242 #undef _COPYSIGNd 243 #undef _COPYSIGNf 244 #undef _ISFINITEd 245 #undef _ISFINITEf 246 #undef _SCALBNd 247 #undef _SCALBNf 248 #undef _ABSd 249 #undef _ABSf 250 #undef _LOGBd 251 #undef _LOGBf 252 253 #ifdef _OPENMP 254 #pragma omp end declare target 255 #endif 256 257 #pragma pop_macro("__DEVICE__") 258 259 #endif // __CLANG_CUDA_COMPLEX_BUILTINS 260