1 2 /******************************************************************************* 3 MIT License 4 ----------- 5 6 Copyright (c) 2002-2019 Advanced Micro Devices, Inc. 7 8 Permission is hereby granted, free of charge, to any person obtaining a copy 9 of this Software and associated documentaon files (the "Software"), to deal 10 in the Software without restriction, including without limitation the rights 11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 copies of the Software, and to permit persons to whom the Software is 13 furnished to do so, subject to the following conditions: 14 15 The above copyright notice and this permission notice shall be included in 16 all copies or substantial portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 THE SOFTWARE. 25 *******************************************************************************/ 26 27 #include "libm.h" 28 #include "libm_util.h" 29 30 #define FAST_BUT_GREATER_THAN_ONE_ULP /* Helps speed by trading off a little 31 accuracy */ 32 #define USE_SCALEDOUBLE_1 33 #define USE_INFINITY_WITH_FLAGS 34 #define USE_HANDLE_ERROR 35 #include "libm_inlines.h" 36 #undef USE_SCALEDOUBLE_1 37 #undef USE_INFINITY_WITH_FLAGS 38 #undef USE_HANDLE_ERROR 39 40 #include "libm_errno.h" 41 42 #if (_MSC_VER >= 1920) // VS 2019+ / Compiler version 14.20 43 #pragma function(_hypot) 44 #endif 45 46 double FN_PROTOTYPE(_hypot)(double x, double y) 47 { 48 /* Returns sqrt(x*x + y*y) with no overflow or underflow unless 49 the result warrants it */ 50 51 const double large = 1.79769313486231570815e+308; /* 0x7fefffffffffffff */ 52 53 #ifdef FAST_BUT_GREATER_THAN_ONE_ULP 54 double r, retval; 55 unsigned long long xexp, yexp, ux, uy; 56 #else 57 double u, r, retval, hx, tx, x2, hy, ty, y2, hs, ts; 58 unsigned long long xexp, yexp, ux, uy, ut; 59 #endif 60 int dexp, expadjust; 61 62 GET_BITS_DP64(x, ux); 63 ux &= ~SIGNBIT_DP64; 64 GET_BITS_DP64(y, uy); 65 uy &= ~SIGNBIT_DP64; 66 xexp = (ux >> EXPSHIFTBITS_DP64); 67 yexp = (uy >> EXPSHIFTBITS_DP64); 68 69 if (xexp == BIASEDEMAX_DP64 + 1 || yexp == BIASEDEMAX_DP64 + 1) 70 { 71 /* One or both of the arguments are NaN or infinity. The 72 result will also be NaN or infinity. */ 73 retval = x*x + y*y; 74 if (((xexp == BIASEDEMAX_DP64 + 1) && !(ux & MANTBITS_DP64)) || 75 ((yexp == BIASEDEMAX_DP64 + 1) && !(uy & MANTBITS_DP64))) 76 /* x or y is infinity. ISO C99 defines that we must 77 return +infinity, even if the other argument is NaN. 78 Note that the computation of x*x + y*y above will already 79 have raised invalid if either x or y is a signalling NaN. */ 80 return infinity_with_flags(0); 81 else 82 /* One or both of x or y is NaN, and neither is infinity. 83 Raise invalid if it's a signalling NaN */ 84 return retval; 85 } 86 87 /* Set x = abs(x) and y = abs(y) */ 88 PUT_BITS_DP64(ux, x); 89 PUT_BITS_DP64(uy, y); 90 91 /* The difference in exponents between x and y */ 92 dexp = (int)(xexp - yexp); 93 expadjust = 0; 94 95 if (ux == 0) 96 /* x is zero */ 97 return y; 98 else if (uy == 0) 99 /* y is zero */ 100 return x; 101 else if (dexp > MANTLENGTH_DP64 + 1 || dexp < -MANTLENGTH_DP64 - 1) 102 /* One of x and y is insignificant compared to the other */ 103 return x + y; /* Raise inexact */ 104 else if (xexp > EXPBIAS_DP64 + 500 || yexp > EXPBIAS_DP64 + 500) 105 { 106 /* Danger of overflow; scale down by 2**600. */ 107 expadjust = 600; 108 ux -= 0x2580000000000000; 109 PUT_BITS_DP64(ux, x); 110 uy -= 0x2580000000000000; 111 PUT_BITS_DP64(uy, y); 112 } 113 else if (xexp < EXPBIAS_DP64 - 500 || yexp < EXPBIAS_DP64 - 500) 114 { 115 /* Danger of underflow; scale up by 2**600. */ 116 expadjust = -600; 117 if (xexp == 0) 118 { 119 /* x is denormal - handle by adding 601 to the exponent 120 and then subtracting a correction for the implicit bit */ 121 PUT_BITS_DP64(ux + 0x2590000000000000, x); 122 x -= 9.23297861778573578076e-128; /* 0x2590000000000000 */ 123 GET_BITS_DP64(x, ux); 124 } 125 else 126 { 127 /* x is normal - just increase the exponent by 600 */ 128 ux += 0x2580000000000000; 129 PUT_BITS_DP64(ux, x); 130 } 131 if (yexp == 0) 132 { 133 PUT_BITS_DP64(uy + 0x2590000000000000, y); 134 y -= 9.23297861778573578076e-128; /* 0x2590000000000000 */ 135 GET_BITS_DP64(y, uy); 136 } 137 else 138 { 139 uy += 0x2580000000000000; 140 PUT_BITS_DP64(uy, y); 141 } 142 } 143 144 145 #ifdef FAST_BUT_GREATER_THAN_ONE_ULP 146 /* Not awful, but results in accuracy loss larger than 1 ulp */ 147 r = x*x + y*y; 148 #else 149 /* Slower but more accurate */ 150 151 /* Sort so that x is greater than y */ 152 if (x < y) 153 { 154 u = y; 155 y = x; 156 x = u; 157 ut = ux; 158 ux = uy; 159 uy = ut; 160 } 161 162 /* Split x into hx and tx, head and tail */ 163 PUT_BITS_DP64(ux & 0xfffffffff8000000, hx); 164 tx = x - hx; 165 166 PUT_BITS_DP64(uy & 0xfffffffff8000000, hy); 167 ty = y - hy; 168 169 /* Compute r = x*x + y*y with extra precision */ 170 x2 = x*x; 171 y2 = y*y; 172 hs = x2 + y2; 173 174 if (dexp == 0) 175 /* We take most care when x and y have equal exponents, 176 i.e. are almost the same size */ 177 ts = (((x2 - hs) + y2) + 178 ((hx * hx - x2) + 2 * hx * tx) + tx * tx) + 179 ((hy * hy - y2) + 2 * hy * ty) + ty * ty; 180 else 181 ts = (((x2 - hs) + y2) + 182 ((hx * hx - x2) + 2 * hx * tx) + tx * tx); 183 184 r = hs + ts; 185 #endif 186 187 /* The sqrt can introduce another half ulp error. */ 188 /* VC++ intrinsic call */ 189 _mm_store_sd(&retval, _mm_sqrt_sd(_mm_setzero_pd(), _mm_load_sd(&r))); 190 191 /* If necessary scale the result back. This may lead to 192 overflow but if so that's the correct result. */ 193 retval = scaleDouble_1(retval, expadjust); 194 195 if (retval > large) 196 /* The result overflowed. Deal with errno. */ 197 return _handle_error("_hypot", OP_HYPOT, PINFBITPATT_DP64, _OVERFLOW, 198 AMD_F_OVERFLOW | AMD_F_INEXACT, ERANGE, x, y, 2); 199 200 return retval; 201 } 202