1 /*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __CLANG_CUDA_COMPLEX_BUILTINS 11 #define __CLANG_CUDA_COMPLEX_BUILTINS 12 13 // This header defines __muldc3, __mulsc3, __divdc3, and __divsc3. These are 14 // libgcc functions that clang assumes are available when compiling c99 complex 15 // operations. (These implementations come from libc++, and have been modified 16 // to work with CUDA and OpenMP target offloading [in C and C++ mode].) 17 18 #pragma push_macro("__DEVICE__") 19 #if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__) 20 #pragma omp declare target 21 #define __DEVICE__ __attribute__((noinline, nothrow, cold, weak)) 22 #else 23 #define __DEVICE__ __device__ inline 24 #endif 25 26 // To make the algorithms available for C and C++ in CUDA and OpenMP we select 27 // different but equivalent function versions. TODO: For OpenMP we currently 28 // select the native builtins as the overload support for templates is lacking. 29 #if !defined(__OPENMP_NVPTX__) && !defined(__OPENMP_AMDGCN__) 30 #define _ISNANd std::isnan 31 #define _ISNANf std::isnan 32 #define _ISINFd std::isinf 33 #define _ISINFf std::isinf 34 #define _ISFINITEd std::isfinite 35 #define _ISFINITEf std::isfinite 36 #define _COPYSIGNd std::copysign 37 #define _COPYSIGNf std::copysign 38 #define _SCALBNd std::scalbn 39 #define _SCALBNf std::scalbn 40 #define _ABSd std::abs 41 #define _ABSf std::abs 42 #define _LOGBd std::logb 43 #define _LOGBf std::logb 44 // Rather than pulling in std::max from algorithm everytime, use available ::max. 45 #define _fmaxd max 46 #define _fmaxf max 47 #else 48 #ifdef __AMDGCN__ 49 #define _ISNANd __ocml_isnan_f64 50 #define _ISNANf __ocml_isnan_f32 51 #define _ISINFd __ocml_isinf_f64 52 #define _ISINFf __ocml_isinf_f32 53 #define _ISFINITEd __ocml_isfinite_f64 54 #define _ISFINITEf __ocml_isfinite_f32 55 #define _COPYSIGNd __ocml_copysign_f64 56 #define _COPYSIGNf __ocml_copysign_f32 57 #define _SCALBNd __ocml_scalbn_f64 58 #define _SCALBNf __ocml_scalbn_f32 59 #define _ABSd __ocml_fabs_f64 60 #define _ABSf __ocml_fabs_f32 61 #define _LOGBd __ocml_logb_f64 62 #define _LOGBf __ocml_logb_f32 63 #define _fmaxd __ocml_fmax_f64 64 #define _fmaxf __ocml_fmax_f32 65 #else 66 #define _ISNANd __nv_isnand 67 #define _ISNANf __nv_isnanf 68 #define _ISINFd __nv_isinfd 69 #define _ISINFf __nv_isinff 70 #define _ISFINITEd __nv_isfinited 71 #define _ISFINITEf __nv_finitef 72 #define _COPYSIGNd __nv_copysign 73 #define _COPYSIGNf __nv_copysignf 74 #define _SCALBNd __nv_scalbn 75 #define _SCALBNf __nv_scalbnf 76 #define _ABSd __nv_fabs 77 #define _ABSf __nv_fabsf 78 #define _LOGBd __nv_logb 79 #define _LOGBf __nv_logbf 80 #define _fmaxd __nv_fmax 81 #define _fmaxf __nv_fmaxf 82 #endif 83 #endif 84 85 #if defined(__cplusplus) 86 extern "C" { 87 #endif 88 89 __DEVICE__ double _Complex __muldc3(double __a, double __b, double __c, 90 double __d) { 91 double __ac = __a * __c; 92 double __bd = __b * __d; 93 double __ad = __a * __d; 94 double __bc = __b * __c; 95 double _Complex z; 96 __real__(z) = __ac - __bd; 97 __imag__(z) = __ad + __bc; 98 if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) { 99 int __recalc = 0; 100 if (_ISINFd(__a) || _ISINFd(__b)) { 101 __a = _COPYSIGNd(_ISINFd(__a) ? 1 : 0, __a); 102 __b = _COPYSIGNd(_ISINFd(__b) ? 1 : 0, __b); 103 if (_ISNANd(__c)) 104 __c = _COPYSIGNd(0, __c); 105 if (_ISNANd(__d)) 106 __d = _COPYSIGNd(0, __d); 107 __recalc = 1; 108 } 109 if (_ISINFd(__c) || _ISINFd(__d)) { 110 __c = _COPYSIGNd(_ISINFd(__c) ? 1 : 0, __c); 111 __d = _COPYSIGNd(_ISINFd(__d) ? 1 : 0, __d); 112 if (_ISNANd(__a)) 113 __a = _COPYSIGNd(0, __a); 114 if (_ISNANd(__b)) 115 __b = _COPYSIGNd(0, __b); 116 __recalc = 1; 117 } 118 if (!__recalc && 119 (_ISINFd(__ac) || _ISINFd(__bd) || _ISINFd(__ad) || _ISINFd(__bc))) { 120 if (_ISNANd(__a)) 121 __a = _COPYSIGNd(0, __a); 122 if (_ISNANd(__b)) 123 __b = _COPYSIGNd(0, __b); 124 if (_ISNANd(__c)) 125 __c = _COPYSIGNd(0, __c); 126 if (_ISNANd(__d)) 127 __d = _COPYSIGNd(0, __d); 128 __recalc = 1; 129 } 130 if (__recalc) { 131 // Can't use std::numeric_limits<double>::infinity() -- that doesn't have 132 // a device overload (and isn't constexpr before C++11, naturally). 133 __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d); 134 __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c); 135 } 136 } 137 return z; 138 } 139 140 __DEVICE__ float _Complex __mulsc3(float __a, float __b, float __c, float __d) { 141 float __ac = __a * __c; 142 float __bd = __b * __d; 143 float __ad = __a * __d; 144 float __bc = __b * __c; 145 float _Complex z; 146 __real__(z) = __ac - __bd; 147 __imag__(z) = __ad + __bc; 148 if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) { 149 int __recalc = 0; 150 if (_ISINFf(__a) || _ISINFf(__b)) { 151 __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a); 152 __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b); 153 if (_ISNANf(__c)) 154 __c = _COPYSIGNf(0, __c); 155 if (_ISNANf(__d)) 156 __d = _COPYSIGNf(0, __d); 157 __recalc = 1; 158 } 159 if (_ISINFf(__c) || _ISINFf(__d)) { 160 __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c); 161 __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d); 162 if (_ISNANf(__a)) 163 __a = _COPYSIGNf(0, __a); 164 if (_ISNANf(__b)) 165 __b = _COPYSIGNf(0, __b); 166 __recalc = 1; 167 } 168 if (!__recalc && 169 (_ISINFf(__ac) || _ISINFf(__bd) || _ISINFf(__ad) || _ISINFf(__bc))) { 170 if (_ISNANf(__a)) 171 __a = _COPYSIGNf(0, __a); 172 if (_ISNANf(__b)) 173 __b = _COPYSIGNf(0, __b); 174 if (_ISNANf(__c)) 175 __c = _COPYSIGNf(0, __c); 176 if (_ISNANf(__d)) 177 __d = _COPYSIGNf(0, __d); 178 __recalc = 1; 179 } 180 if (__recalc) { 181 __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d); 182 __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c); 183 } 184 } 185 return z; 186 } 187 188 __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c, 189 double __d) { 190 int __ilogbw = 0; 191 // Can't use std::max, because that's defined in <algorithm>, and we don't 192 // want to pull that in for every compile. The CUDA headers define 193 // ::max(float, float) and ::max(double, double), which is sufficient for us. 194 double __logbw = _LOGBd(_fmaxd(_ABSd(__c), _ABSd(__d))); 195 if (_ISFINITEd(__logbw)) { 196 __ilogbw = (int)__logbw; 197 __c = _SCALBNd(__c, -__ilogbw); 198 __d = _SCALBNd(__d, -__ilogbw); 199 } 200 double __denom = __c * __c + __d * __d; 201 double _Complex z; 202 __real__(z) = _SCALBNd((__a * __c + __b * __d) / __denom, -__ilogbw); 203 __imag__(z) = _SCALBNd((__b * __c - __a * __d) / __denom, -__ilogbw); 204 if (_ISNANd(__real__(z)) && _ISNANd(__imag__(z))) { 205 if ((__denom == 0.0) && (!_ISNANd(__a) || !_ISNANd(__b))) { 206 __real__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __a; 207 __imag__(z) = _COPYSIGNd(__builtin_huge_val(), __c) * __b; 208 } else if ((_ISINFd(__a) || _ISINFd(__b)) && _ISFINITEd(__c) && 209 _ISFINITEd(__d)) { 210 __a = _COPYSIGNd(_ISINFd(__a) ? 1.0 : 0.0, __a); 211 __b = _COPYSIGNd(_ISINFd(__b) ? 1.0 : 0.0, __b); 212 __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d); 213 __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d); 214 } else if (_ISINFd(__logbw) && __logbw > 0.0 && _ISFINITEd(__a) && 215 _ISFINITEd(__b)) { 216 __c = _COPYSIGNd(_ISINFd(__c) ? 1.0 : 0.0, __c); 217 __d = _COPYSIGNd(_ISINFd(__d) ? 1.0 : 0.0, __d); 218 __real__(z) = 0.0 * (__a * __c + __b * __d); 219 __imag__(z) = 0.0 * (__b * __c - __a * __d); 220 } 221 } 222 return z; 223 } 224 225 __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) { 226 int __ilogbw = 0; 227 float __logbw = _LOGBf(_fmaxf(_ABSf(__c), _ABSf(__d))); 228 if (_ISFINITEf(__logbw)) { 229 __ilogbw = (int)__logbw; 230 __c = _SCALBNf(__c, -__ilogbw); 231 __d = _SCALBNf(__d, -__ilogbw); 232 } 233 float __denom = __c * __c + __d * __d; 234 float _Complex z; 235 __real__(z) = _SCALBNf((__a * __c + __b * __d) / __denom, -__ilogbw); 236 __imag__(z) = _SCALBNf((__b * __c - __a * __d) / __denom, -__ilogbw); 237 if (_ISNANf(__real__(z)) && _ISNANf(__imag__(z))) { 238 if ((__denom == 0) && (!_ISNANf(__a) || !_ISNANf(__b))) { 239 __real__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __a; 240 __imag__(z) = _COPYSIGNf(__builtin_huge_valf(), __c) * __b; 241 } else if ((_ISINFf(__a) || _ISINFf(__b)) && _ISFINITEf(__c) && 242 _ISFINITEf(__d)) { 243 __a = _COPYSIGNf(_ISINFf(__a) ? 1 : 0, __a); 244 __b = _COPYSIGNf(_ISINFf(__b) ? 1 : 0, __b); 245 __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d); 246 __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d); 247 } else if (_ISINFf(__logbw) && __logbw > 0 && _ISFINITEf(__a) && 248 _ISFINITEf(__b)) { 249 __c = _COPYSIGNf(_ISINFf(__c) ? 1 : 0, __c); 250 __d = _COPYSIGNf(_ISINFf(__d) ? 1 : 0, __d); 251 __real__(z) = 0 * (__a * __c + __b * __d); 252 __imag__(z) = 0 * (__b * __c - __a * __d); 253 } 254 } 255 return z; 256 } 257 258 #if defined(__cplusplus) 259 } // extern "C" 260 #endif 261 262 #undef _ISNANd 263 #undef _ISNANf 264 #undef _ISINFd 265 #undef _ISINFf 266 #undef _COPYSIGNd 267 #undef _COPYSIGNf 268 #undef _ISFINITEd 269 #undef _ISFINITEf 270 #undef _SCALBNd 271 #undef _SCALBNf 272 #undef _ABSd 273 #undef _ABSf 274 #undef _LOGBd 275 #undef _LOGBf 276 #undef _fmaxd 277 #undef _fmaxf 278 279 #if defined(__OPENMP_NVPTX__) || defined(__OPENMP_AMDGCN__) 280 #pragma omp end declare target 281 #endif 282 283 #pragma pop_macro("__DEVICE__") 284 285 #endif // __CLANG_CUDA_COMPLEX_BUILTINS 286