1 /* $OpenBSD: e_sqrtl.c,v 1.3 2016/09/12 19:47:02 guenther Exp $ */ 2 /*- 3 * Copyright (c) 2007 Steven G. Kargl 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/types.h> 29 #include <machine/ieee.h> /* for struct ieee_ext */ 30 #include <fenv.h> 31 #include <float.h> 32 #include <math.h> 33 34 #ifdef EXT_IMPLICIT_NBIT 35 #define LDBL_NBIT 0 36 #else /* EXT_IMPLICIT_NBIT */ 37 #define LDBL_NBIT 0x80000000 38 #endif /* EXT_IMPLICIT_NBIT */ 39 40 /* Return (x + ulp) for normal positive x. Assumes no overflow. */ 41 static inline long double 42 inc(long double x) 43 { 44 struct ieee_ext *p = (struct ieee_ext *)&x; 45 46 #ifdef EXT_FRACHMBITS 47 uint64_t frach; 48 49 frach = ((uint64_t)p->ext_frach << EXT_FRACHMBITS) | p->ext_frachm; 50 frach++; 51 p->ext_frach = frach >> EXT_FRACHMBITS; 52 p->ext_frachm = frach & 0x00000000ffffffff; 53 #else /* EXT_FRACHMBITS */ 54 uint32_t frach; 55 56 p->ext_frach++; 57 frach = p->ext_frach; 58 #endif /* EXT_FRACHMBITS */ 59 60 if (frach == 0) { 61 62 #ifdef EXT_FRACLMBITS 63 uint64_t fracl; 64 65 fracl = ((uint64_t)p->ext_fraclm << EXT_FRACLBITS) | 66 p->ext_fracl; 67 fracl++; 68 p->ext_fraclm = fracl >> EXT_FRACLBITS; 69 p->ext_fracl = fracl & 0x00000000ffffffff; 70 #else /* EXT_FRACLMBITS */ 71 uint32_t fracl; 72 73 p->ext_fracl++; 74 fracl = p->ext_fracl; 75 #endif /* EXT_FRACLMBITS */ 76 77 if (fracl == 0) { 78 p->ext_exp++; 79 p->ext_frach |= LDBL_NBIT; 80 } 81 } 82 83 return x; 84 } 85 86 /* Return (x - ulp) for normal positive x. Assumes no underflow. */ 87 static inline long double 88 dec(long double x) 89 { 90 struct ieee_ext *p = (struct ieee_ext *)&x; 91 92 #ifdef EXT_FRACLMBITS 93 uint64_t fracl; 94 95 fracl = ((uint64_t)p->ext_fraclm << EXT_FRACLBITS) | p->ext_fracl; 96 fracl--; 97 p->ext_fraclm = fracl >> EXT_FRACLBITS; 98 p->ext_fracl = fracl & 0x00000000ffffffff; 99 #else /* EXT_FRACLMBITS */ 100 uint32_t fracl; 101 102 p->ext_fracl--; 103 fracl = p->ext_fracl; 104 #endif /* EXT_FRACLMBITS */ 105 106 if (fracl == 0) { 107 108 #ifdef EXT_FRACHMBITS 109 uint64_t frach; 110 111 frach = ((uint64_t)p->ext_frach << EXT_FRACHMBITS) | 112 p->ext_frachm; 113 frach--; 114 p->ext_frach = frach >> EXT_FRACHMBITS; 115 p->ext_frachm = frach & 0x00000000ffffffff; 116 #else /* EXT_FRACHMBITS */ 117 uint32_t frach; 118 119 p->ext_frach--; 120 frach = p->ext_frach; 121 #endif /* EXT_FRACHMBITS */ 122 123 if (frach == LDBL_NBIT) { 124 p->ext_exp--; 125 p->ext_frach |= LDBL_NBIT; 126 } 127 } 128 129 return x; 130 } 131 132 /* 133 * This is slow, but simple and portable. You should use hardware sqrt 134 * if possible. 135 */ 136 137 long double 138 sqrtl(long double x) 139 { 140 union { 141 long double e; 142 struct ieee_ext bits; 143 } u; 144 int k, r; 145 long double lo, xn; 146 147 u.e = x; 148 149 /* If x = NaN, then sqrt(x) = NaN. */ 150 /* If x = Inf, then sqrt(x) = Inf. */ 151 /* If x = -Inf, then sqrt(x) = NaN. */ 152 if (u.bits.ext_exp == LDBL_MAX_EXP * 2 - 1) 153 return (x * x + x); 154 155 /* If x = +-0, then sqrt(x) = +-0. */ 156 if ((u.bits.ext_frach 157 #ifdef EXT_FRACHMBITS 158 | u.bits.ext_frachm 159 #endif /* EXT_FRACHMBITS */ 160 #ifdef EXT_FRACLMBITS 161 | u.bits.ext_fraclm 162 #endif /* EXT_FRACLMBITS */ 163 | u.bits.ext_fracl | u.bits.ext_exp) == 0) 164 return (x); 165 166 /* If x < 0, then raise invalid and return NaN */ 167 if (u.bits.ext_sign) 168 return ((x - x) / (x - x)); 169 170 if (u.bits.ext_exp == 0) { 171 /* Adjust subnormal numbers. */ 172 u.e *= 0x1.0p514; 173 k = -514; 174 } else { 175 k = 0; 176 } 177 /* 178 * u.e is a normal number, so break it into u.e = e*2^n where 179 * u.e = (2*e)*2^2k for odd n and u.e = (4*e)*2^2k for even n. 180 */ 181 if ((u.bits.ext_exp - 0x3ffe) & 1) { /* n is odd. */ 182 k += u.bits.ext_exp - 0x3fff; /* 2k = n - 1. */ 183 u.bits.ext_exp = 0x3fff; /* u.e in [1,2). */ 184 } else { 185 k += u.bits.ext_exp - 0x4000; /* 2k = n - 2. */ 186 u.bits.ext_exp = 0x4000; /* u.e in [2,4). */ 187 } 188 189 /* 190 * Newton's iteration. 191 * Split u.e into a high and low part to achieve additional precision. 192 */ 193 xn = sqrt(u.e); /* 53-bit estimate of sqrtl(x). */ 194 #if LDBL_MANT_DIG > 100 195 xn = (xn + (u.e / xn)) * 0.5; /* 106-bit estimate. */ 196 #endif 197 lo = u.e; 198 u.bits.ext_fracl = 0; /* Zero out lower bits. */ 199 #ifdef EXT_FRACLMBITS 200 u.bits.ext_fraclm = 0; 201 #endif /* EXT_FRACLMBITS */ 202 lo = (lo - u.e) / xn; /* Low bits divided by xn. */ 203 xn = xn + (u.e / xn); /* High portion of estimate. */ 204 u.e = xn + lo; /* Combine everything. */ 205 u.bits.ext_exp += (k >> 1) - 1; 206 207 feclearexcept(FE_INEXACT); 208 r = fegetround(); 209 fesetround(FE_TOWARDZERO); /* Set to round-toward-zero. */ 210 xn = x / u.e; /* Chopped quotient (inexact?). */ 211 212 if (!fetestexcept(FE_INEXACT)) { /* Quotient is exact. */ 213 if (xn == u.e) { 214 fesetround(r); 215 return (u.e); 216 } 217 /* Round correctly for inputs like x = y**2 - ulp. */ 218 xn = dec(xn); /* xn = xn - ulp. */ 219 } 220 221 if (r == FE_TONEAREST) { 222 xn = inc(xn); /* xn = xn + ulp. */ 223 } else if (r == FE_UPWARD) { 224 u.e = inc(u.e); /* u.e = u.e + ulp. */ 225 xn = inc(xn); /* xn = xn + ulp. */ 226 } 227 u.e = u.e + xn; /* Chopped sum. */ 228 fesetround(r); /* Restore env and raise inexact */ 229 u.bits.ext_exp--; 230 return (u.e); 231 } 232 DEF_STD(sqrtl); 233