1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #include <sys/isa_defs.h> 31 #include "libm_inlines.h" 32 33 #ifdef _LITTLE_ENDIAN 34 #define HI(x) *(1+(int*)x) 35 #define LO(x) *(unsigned*)x 36 #else 37 #define HI(x) *(int*)x 38 #define LO(x) *(1+(unsigned*)x) 39 #endif 40 41 #ifdef __RESTRICT 42 #define restrict _Restrict 43 #else 44 #define restrict 45 #endif 46 47 /* double hypot(double x, double y) 48 * 49 * Method : 50 * 1. Special cases: 51 * x or y is +Inf or -Inf => +Inf 52 * x or y is NaN => QNaN 53 * 2. Computes hypot(x,y): 54 * hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm) 55 * Where: 56 * m = max(|x|,|y|) 57 * xnm = x * (1/m) 58 * ynm = y * (1/m) 59 * 60 * Compute xnm * xnm + ynm * ynm by simulating 61 * muti-precision arithmetic. 62 * 63 * Accuracy: 64 * Maximum error observed: less than 0.872 ulp after 16.777.216.000 65 * results. 66 */ 67 68 extern double sqrt(double); 69 extern double fabs(double); 70 71 static const unsigned long long LCONST[] = { 72 0x41b0000000000000ULL, /* D2ON28 = 2 ** 28 */ 73 0x0010000000000000ULL, /* D2ONM1022 = 2 ** -1022 */ 74 0x7fd0000000000000ULL /* D2ONP1022 = 2 ** 1022 */ 75 }; 76 77 static void 78 __vhypot_n(int n, double * restrict px, int stridex, double * restrict py, 79 int stridey, double * restrict pz, int stridez); 80 81 #pragma no_inline(__vhypot_n) 82 83 #define RETURN(ret) \ 84 { \ 85 *pz = (ret); \ 86 py += stridey; \ 87 pz += stridez; \ 88 if (n_n == 0) \ 89 { \ 90 hx0 = HI(px); \ 91 hy0 = HI(py); \ 92 spx = px; spy = py; spz = pz; \ 93 continue; \ 94 } \ 95 n--; \ 96 break; \ 97 } 98 99 void 100 __vhypot(int n, double * restrict px, int stridex, double * restrict py, 101 int stridey, double * restrict pz, int stridez) 102 { 103 int hx0, hx1, hy0, j0, diff; 104 double x_hi, x_lo, y_hi, y_lo; 105 double scl = 0; 106 double x, y, res; 107 double *spx, *spy, *spz; 108 int n_n; 109 double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */ 110 double D2ONM1022 = ((double*)LCONST)[1]; /* 2 **-1022 */ 111 double D2ONP1022 = ((double*)LCONST)[2]; /* 2 ** 1022 */ 112 113 while (n > 1) 114 { 115 n_n = 0; 116 spx = px; 117 spy = py; 118 spz = pz; 119 hx0 = HI(px); 120 hy0 = HI(py); 121 for (; n > 1 ; n--) 122 { 123 px += stridex; 124 hx0 &= 0x7fffffff; 125 hy0 &= 0x7fffffff; 126 127 if (hx0 >= 0x7fe00000) /* |X| >= 2**1023 or Inf or NaN */ 128 { 129 diff = hy0 - hx0; 130 j0 = diff >> 31; 131 j0 = hy0 - (diff & j0); 132 j0 &= 0x7ff00000; 133 x = *(px - stridex); 134 y = *py; 135 x = fabs(x); 136 y = fabs(y); 137 if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */ 138 { 139 int lx = LO((px - stridex)); 140 int ly = LO(py); 141 if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x; 142 else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y; 143 else res = x + y; 144 RETURN (res) 145 } 146 else 147 { 148 j0 = diff >> 31; 149 if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ 150 { 151 x *= D2ONM1022; 152 y *= D2ONM1022; 153 154 x_hi = (x + D2ON28) - D2ON28; 155 x_lo = x - x_hi; 156 y_hi = (y + D2ON28) - D2ON28; 157 y_lo = y - y_hi; 158 res = (x_hi * x_hi + y_hi * y_hi); 159 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 160 161 res = sqrt (res); 162 163 res = D2ONP1022 * res; 164 RETURN (res) 165 } 166 else RETURN (x + y) 167 } 168 } 169 if (hy0 >= 0x7fe00000) /* |Y| >= 2**1023 or Inf or NaN */ 170 { 171 diff = hy0 - hx0; 172 j0 = diff >> 31; 173 j0 = hy0 - (diff & j0); 174 j0 &= 0x7ff00000; 175 x = *(px - stridex); 176 y = *py; 177 x = fabs(x); 178 y = fabs(y); 179 if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */ 180 { 181 int lx = LO((px - stridex)); 182 int ly = LO(py); 183 if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x; 184 else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y; 185 else res = x + y; 186 RETURN (res) 187 } 188 else 189 { 190 j0 = diff >> 31; 191 if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ 192 { 193 x *= D2ONM1022; 194 y *= D2ONM1022; 195 196 x_hi = (x + D2ON28) - D2ON28; 197 x_lo = x - x_hi; 198 y_hi = (y + D2ON28) - D2ON28; 199 y_lo = y - y_hi; 200 res = (x_hi * x_hi + y_hi * y_hi); 201 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 202 203 res = sqrt (res); 204 205 res = D2ONP1022 * res; 206 RETURN (res) 207 } 208 else RETURN (x + y) 209 } 210 } 211 212 hx1 = HI(px); 213 214 if (hx0 < 0x00100000 && hy0 < 0x00100000) /* X and Y are subnormal */ 215 { 216 x = *(px - stridex); 217 y = *py; 218 219 x *= D2ONP1022; 220 y *= D2ONP1022; 221 222 x_hi = (x + D2ON28) - D2ON28; 223 x_lo = x - x_hi; 224 y_hi = (y + D2ON28) - D2ON28; 225 y_lo = y - y_hi; 226 res = (x_hi * x_hi + y_hi * y_hi); 227 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 228 229 res = sqrt(res); 230 231 res = D2ONM1022 * res; 232 RETURN (res) 233 } 234 235 hx0 = hx1; 236 py += stridey; 237 pz += stridez; 238 n_n++; 239 hy0 = HI(py); 240 } 241 if (n_n > 0) 242 __vhypot_n (n_n, spx, stridex, spy, stridey, spz, stridez); 243 } 244 245 if (n > 0) 246 { 247 x = *px; 248 y = *py; 249 hx0 = HI(px); 250 hy0 = HI(py); 251 252 hx0 &= 0x7fffffff; 253 hy0 &= 0x7fffffff; 254 255 diff = hy0 - hx0; 256 j0 = diff >> 31; 257 j0 = hy0 - (diff & j0); 258 j0 &= 0x7ff00000; 259 260 if (j0 >= 0x7fe00000) /* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */ 261 { 262 x = fabs(x); 263 y = fabs(y); 264 if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */ 265 { 266 int lx = LO(px); 267 int ly = LO(py); 268 if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x; 269 else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y; 270 else res = x + y; 271 *pz = res; 272 return; 273 } 274 else 275 { 276 j0 = diff >> 31; 277 if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */ 278 { 279 x *= D2ONM1022; 280 y *= D2ONM1022; 281 282 x_hi = (x + D2ON28) - D2ON28; 283 x_lo = x - x_hi; 284 y_hi = (y + D2ON28) - D2ON28; 285 y_lo = y - y_hi; 286 res = (x_hi * x_hi + y_hi * y_hi); 287 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 288 289 res = sqrt (res); 290 291 res = D2ONP1022 * res; 292 *pz = res; 293 return; 294 } 295 else 296 { 297 *pz = x + y; 298 return; 299 } 300 } 301 } 302 303 if (j0 < 0x00100000) /* X and Y are subnormal */ 304 { 305 x *= D2ONP1022; 306 y *= D2ONP1022; 307 308 x_hi = (x + D2ON28) - D2ON28; 309 x_lo = x - x_hi; 310 y_hi = (y + D2ON28) - D2ON28; 311 y_lo = y - y_hi; 312 res = (x_hi * x_hi + y_hi * y_hi); 313 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 314 315 res = sqrt(res); 316 317 res = D2ONM1022 * res; 318 *pz = res; 319 return; 320 } 321 322 HI(&scl) = (0x7fe00000 - j0); 323 324 x *= scl; 325 y *= scl; 326 327 x_hi = (x + D2ON28) - D2ON28; 328 y_hi = (y + D2ON28) - D2ON28; 329 x_lo = x - x_hi; 330 y_lo = y - y_hi; 331 332 res = (x_hi * x_hi + y_hi * y_hi); 333 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo); 334 335 res = sqrt(res); 336 337 HI(&scl) = j0; 338 339 res = scl * res; 340 *pz = res; 341 } 342 } 343 344 static void 345 __vhypot_n(int n, double * restrict px, int stridex, double * restrict py, 346 int stridey, double * restrict pz, int stridez) 347 { 348 int hx0, hy0, j0, diff0; 349 double x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0; 350 double x0, y0, res0; 351 double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */ 352 353 for(; n > 0 ; n--) 354 { 355 x0 = *px; 356 y0 = *py; 357 hx0 = HI(px); 358 hy0 = HI(py); 359 360 hx0 &= 0x7fffffff; 361 hy0 &= 0x7fffffff; 362 363 diff0 = hy0 - hx0; 364 j0 = diff0 >> 31; 365 j0 = hy0 - (diff0 & j0); 366 j0 &= 0x7ff00000; 367 368 px += stridex; 369 py += stridey; 370 371 HI(&scl0) = (0x7fe00000 - j0); 372 373 x0 *= scl0; 374 y0 *= scl0; 375 376 x_hi0 = (x0 + D2ON28) - D2ON28; 377 y_hi0 = (y0 + D2ON28) - D2ON28; 378 x_lo0 = x0 - x_hi0; 379 y_lo0 = y0 - y_hi0; 380 381 res0 = (x_hi0 * x_hi0 + y_hi0 * y_hi0); 382 res0 += ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0); 383 384 res0 = sqrt(res0); 385 386 HI(&scl0) = j0; 387 388 res0 = scl0 * res0; 389 *pz = res0; 390 391 pz += stridez; 392 } 393 } 394