1 /* Copyright (C) 2007-2018 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify it under 6 the terms of the GNU General Public License as published by the Free 7 Software Foundation; either version 3, or (at your option) any later 8 version. 9 10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 11 WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24 #define BID_128RES 25 #include "bid_internal.h" 26 #include "bid_sqrt_macros.h" 27 #ifdef UNCHANGED_BINARY_STATUS_FLAGS 28 #include <fenv.h> 29 30 #define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT 31 #endif 32 33 BID128_FUNCTION_ARG1 (bid128_sqrt, x) 34 35 UINT256 M256, C256, C4, C8; 36 UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res; 37 UINT64 sign_x, Carry; 38 SINT64 D; 39 int_float fx, f64; 40 int exponent_x, bin_expon_cx; 41 int digits, scale, exponent_q; 42 #ifdef UNCHANGED_BINARY_STATUS_FLAGS 43 fexcept_t binaryflags = 0; 44 #endif 45 46 // unpack arguments, check for NaN or Infinity 47 if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) { 48 res.w[1] = CX.w[1]; 49 res.w[0] = CX.w[0]; 50 // NaN ? 51 if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { 52 #ifdef SET_STATUS_FLAGS 53 if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN 54 __set_status_flags (pfpsf, INVALID_EXCEPTION); 55 #endif 56 res.w[1] = CX.w[1] & QUIET_MASK64; 57 BID_RETURN (res); 58 } 59 // x is Infinity? 60 if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { 61 res.w[1] = CX.w[1]; 62 if (sign_x) { 63 // -Inf, return NaN 64 res.w[1] = 0x7c00000000000000ull; 65 #ifdef SET_STATUS_FLAGS 66 __set_status_flags (pfpsf, INVALID_EXCEPTION); 67 #endif 68 } 69 BID_RETURN (res); 70 } 71 // x is 0 otherwise 72 73 res.w[1] = 74 sign_x | 75 ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) << 49); 76 res.w[0] = 0; 77 BID_RETURN (res); 78 } 79 if (sign_x) { 80 res.w[1] = 0x7c00000000000000ull; 81 res.w[0] = 0; 82 #ifdef SET_STATUS_FLAGS 83 __set_status_flags (pfpsf, INVALID_EXCEPTION); 84 #endif 85 BID_RETURN (res); 86 } 87 #ifdef UNCHANGED_BINARY_STATUS_FLAGS 88 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); 89 #endif 90 // 2^64 91 f64.i = 0x5f800000; 92 93 // fx ~ CX 94 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; 95 bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f; 96 digits = estimate_decimal_digits[bin_expon_cx]; 97 98 A10 = CX; 99 if (exponent_x & 1) { 100 A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61); 101 A10.w[0] = CX.w[0] << 3; 102 CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63); 103 CX2.w[0] = CX.w[0] << 1; 104 __add_128_128 (A10, A10, CX2); 105 } 106 107 CS.w[0] = short_sqrt128 (A10); 108 CS.w[1] = 0; 109 // check for exact result 110 if (CS.w[0] * CS.w[0] == A10.w[0]) { 111 __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]); 112 if (S2.w[1] == A10.w[1]) // && S2.w[0]==A10.w[0]) 113 { 114 get_BID128_very_fast (&res, 0, 115 (exponent_x + 116 DECIMAL_EXPONENT_BIAS_128) >> 1, CS); 117 #ifdef UNCHANGED_BINARY_STATUS_FLAGS 118 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 119 #endif 120 BID_RETURN (res); 121 } 122 } 123 // get number of digits in CX 124 D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1]; 125 if (D > 0 126 || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0])) 127 digits++; 128 129 // if exponent is odd, scale coefficient by 10 130 scale = 67 - digits; 131 exponent_q = exponent_x - scale; 132 scale += (exponent_q & 1); // exp. bias is even 133 134 if (scale > 38) { 135 T128 = power10_table_128[scale - 37]; 136 __mul_128x128_low (CX1, CX, T128); 137 138 TP128 = power10_table_128[37]; 139 __mul_128x128_to_256 (C256, CX1, TP128); 140 } else { 141 T128 = power10_table_128[scale]; 142 __mul_128x128_to_256 (C256, CX, T128); 143 } 144 145 146 // 4*C256 147 C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62); 148 C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62); 149 C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62); 150 C4.w[0] = C256.w[0] << 2; 151 152 long_sqrt128 (&CS, C256); 153 154 #ifndef IEEE_ROUND_NEAREST 155 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 156 if (!((rnd_mode) & 3)) { 157 #endif 158 #endif 159 // compare to midpoints 160 CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63); 161 CSM.w[0] = (CS.w[0] + CS.w[0]) | 1; 162 // CSM^2 163 //__mul_128x128_to_256(M256, CSM, CSM); 164 __sqr128_to_256 (M256, CSM); 165 166 if (C4.w[3] > M256.w[3] 167 || (C4.w[3] == M256.w[3] 168 && (C4.w[2] > M256.w[2] 169 || (C4.w[2] == M256.w[2] 170 && (C4.w[1] > M256.w[1] 171 || (C4.w[1] == M256.w[1] 172 && C4.w[0] > M256.w[0])))))) { 173 // round up 174 CS.w[0]++; 175 if (!CS.w[0]) 176 CS.w[1]++; 177 } else { 178 C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61); 179 C8.w[0] = CS.w[0] << 3; 180 // M256 - 8*CSM 181 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); 182 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); 183 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); 184 M256.w[3] = M256.w[3] - Carry; 185 186 // if CSM' > C256, round up 187 if (M256.w[3] > C4.w[3] 188 || (M256.w[3] == C4.w[3] 189 && (M256.w[2] > C4.w[2] 190 || (M256.w[2] == C4.w[2] 191 && (M256.w[1] > C4.w[1] 192 || (M256.w[1] == C4.w[1] 193 && M256.w[0] > C4.w[0])))))) { 194 // round down 195 if (!CS.w[0]) 196 CS.w[1]--; 197 CS.w[0]--; 198 } 199 } 200 #ifndef IEEE_ROUND_NEAREST 201 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 202 } else { 203 __sqr128_to_256 (M256, CS); 204 C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63); 205 C8.w[0] = CS.w[0] << 1; 206 if (M256.w[3] > C256.w[3] 207 || (M256.w[3] == C256.w[3] 208 && (M256.w[2] > C256.w[2] 209 || (M256.w[2] == C256.w[2] 210 && (M256.w[1] > C256.w[1] 211 || (M256.w[1] == C256.w[1] 212 && M256.w[0] > C256.w[0])))))) { 213 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); 214 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); 215 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); 216 M256.w[3] = M256.w[3] - Carry; 217 M256.w[0]++; 218 if (!M256.w[0]) { 219 M256.w[1]++; 220 if (!M256.w[1]) { 221 M256.w[2]++; 222 if (!M256.w[2]) 223 M256.w[3]++; 224 } 225 } 226 227 if (!CS.w[0]) 228 CS.w[1]--; 229 CS.w[0]--; 230 231 if (M256.w[3] > C256.w[3] 232 || (M256.w[3] == C256.w[3] 233 && (M256.w[2] > C256.w[2] 234 || (M256.w[2] == C256.w[2] 235 && (M256.w[1] > C256.w[1] 236 || (M256.w[1] == C256.w[1] 237 && M256.w[0] > C256.w[0])))))) { 238 239 if (!CS.w[0]) 240 CS.w[1]--; 241 CS.w[0]--; 242 } 243 } 244 245 else { 246 __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]); 247 __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); 248 __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); 249 M256.w[3] = M256.w[3] + Carry; 250 M256.w[0]++; 251 if (!M256.w[0]) { 252 M256.w[1]++; 253 if (!M256.w[1]) { 254 M256.w[2]++; 255 if (!M256.w[2]) 256 M256.w[3]++; 257 } 258 } 259 if (M256.w[3] < C256.w[3] 260 || (M256.w[3] == C256.w[3] 261 && (M256.w[2] < C256.w[2] 262 || (M256.w[2] == C256.w[2] 263 && (M256.w[1] < C256.w[1] 264 || (M256.w[1] == C256.w[1] 265 && M256.w[0] <= C256.w[0])))))) { 266 267 CS.w[0]++; 268 if (!CS.w[0]) 269 CS.w[1]++; 270 } 271 } 272 // RU? 273 if ((rnd_mode) == ROUNDING_UP) { 274 CS.w[0]++; 275 if (!CS.w[0]) 276 CS.w[1]++; 277 } 278 279 } 280 #endif 281 #endif 282 283 #ifdef SET_STATUS_FLAGS 284 __set_status_flags (pfpsf, INEXACT_EXCEPTION); 285 #endif 286 get_BID128_fast (&res, 0, 287 (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, CS); 288 #ifdef UNCHANGED_BINARY_STATUS_FLAGS 289 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 290 #endif 291 BID_RETURN (res); 292 } 293 294 295 296 BID128_FUNCTION_ARGTYPE1 (bid128d_sqrt, UINT64, x) 297 298 UINT256 M256, C256, C4, C8; 299 UINT128 CX, CX1, CX2, A10, S2, T128, TP128, CS, CSM, res; 300 UINT64 sign_x, Carry; 301 SINT64 D; 302 int_float fx, f64; 303 int exponent_x, bin_expon_cx; 304 int digits, scale, exponent_q; 305 #ifdef UNCHANGED_BINARY_STATUS_FLAGS 306 fexcept_t binaryflags = 0; 307 #endif 308 309 // unpack arguments, check for NaN or Infinity 310 // unpack arguments, check for NaN or Infinity 311 CX.w[1] = 0; 312 if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], x)) { 313 res.w[1] = CX.w[0]; 314 res.w[0] = 0; 315 // NaN ? 316 if ((x & 0x7c00000000000000ull) == 0x7c00000000000000ull) { 317 #ifdef SET_STATUS_FLAGS 318 if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN 319 __set_status_flags (pfpsf, INVALID_EXCEPTION); 320 #endif 321 res.w[0] = (CX.w[0] & 0x0003ffffffffffffull); 322 __mul_64x64_to_128 (res, res.w[0], power10_table_128[18].w[0]); 323 res.w[1] |= ((CX.w[0]) & 0xfc00000000000000ull); 324 BID_RETURN (res); 325 } 326 // x is Infinity? 327 if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) { 328 if (sign_x) { 329 // -Inf, return NaN 330 res.w[1] = 0x7c00000000000000ull; 331 #ifdef SET_STATUS_FLAGS 332 __set_status_flags (pfpsf, INVALID_EXCEPTION); 333 #endif 334 } 335 BID_RETURN (res); 336 } 337 // x is 0 otherwise 338 339 exponent_x = 340 exponent_x - DECIMAL_EXPONENT_BIAS + DECIMAL_EXPONENT_BIAS_128; 341 res.w[1] = 342 sign_x | ((((UINT64) (exponent_x + DECIMAL_EXPONENT_BIAS_128)) >> 1) 343 << 49); 344 res.w[0] = 0; 345 BID_RETURN (res); 346 } 347 if (sign_x) { 348 res.w[1] = 0x7c00000000000000ull; 349 res.w[0] = 0; 350 #ifdef SET_STATUS_FLAGS 351 __set_status_flags (pfpsf, INVALID_EXCEPTION); 352 #endif 353 BID_RETURN (res); 354 } 355 #ifdef UNCHANGED_BINARY_STATUS_FLAGS 356 (void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); 357 #endif 358 exponent_x = 359 exponent_x - DECIMAL_EXPONENT_BIAS + DECIMAL_EXPONENT_BIAS_128; 360 361 // 2^64 362 f64.i = 0x5f800000; 363 364 // fx ~ CX 365 fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; 366 bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f; 367 digits = estimate_decimal_digits[bin_expon_cx]; 368 369 A10 = CX; 370 if (exponent_x & 1) { 371 A10.w[1] = (CX.w[1] << 3) | (CX.w[0] >> 61); 372 A10.w[0] = CX.w[0] << 3; 373 CX2.w[1] = (CX.w[1] << 1) | (CX.w[0] >> 63); 374 CX2.w[0] = CX.w[0] << 1; 375 __add_128_128 (A10, A10, CX2); 376 } 377 378 CS.w[0] = short_sqrt128 (A10); 379 CS.w[1] = 0; 380 // check for exact result 381 if (CS.w[0] * CS.w[0] == A10.w[0]) { 382 __mul_64x64_to_128_fast (S2, CS.w[0], CS.w[0]); 383 if (S2.w[1] == A10.w[1]) { 384 get_BID128_very_fast (&res, 0, 385 (exponent_x + DECIMAL_EXPONENT_BIAS_128) >> 1, 386 CS); 387 #ifdef UNCHANGED_BINARY_STATUS_FLAGS 388 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 389 #endif 390 BID_RETURN (res); 391 } 392 } 393 // get number of digits in CX 394 D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1]; 395 if (D > 0 396 || (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0])) 397 digits++; 398 399 // if exponent is odd, scale coefficient by 10 400 scale = 67 - digits; 401 exponent_q = exponent_x - scale; 402 scale += (exponent_q & 1); // exp. bias is even 403 404 if (scale > 38) { 405 T128 = power10_table_128[scale - 37]; 406 __mul_128x128_low (CX1, CX, T128); 407 408 TP128 = power10_table_128[37]; 409 __mul_128x128_to_256 (C256, CX1, TP128); 410 } else { 411 T128 = power10_table_128[scale]; 412 __mul_128x128_to_256 (C256, CX, T128); 413 } 414 415 416 // 4*C256 417 C4.w[3] = (C256.w[3] << 2) | (C256.w[2] >> 62); 418 C4.w[2] = (C256.w[2] << 2) | (C256.w[1] >> 62); 419 C4.w[1] = (C256.w[1] << 2) | (C256.w[0] >> 62); 420 C4.w[0] = C256.w[0] << 2; 421 422 long_sqrt128 (&CS, C256); 423 424 #ifndef IEEE_ROUND_NEAREST 425 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 426 if (!((rnd_mode) & 3)) { 427 #endif 428 #endif 429 // compare to midpoints 430 CSM.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63); 431 CSM.w[0] = (CS.w[0] + CS.w[0]) | 1; 432 // CSM^2 433 //__mul_128x128_to_256(M256, CSM, CSM); 434 __sqr128_to_256 (M256, CSM); 435 436 if (C4.w[3] > M256.w[3] 437 || (C4.w[3] == M256.w[3] 438 && (C4.w[2] > M256.w[2] 439 || (C4.w[2] == M256.w[2] 440 && (C4.w[1] > M256.w[1] 441 || (C4.w[1] == M256.w[1] 442 && C4.w[0] > M256.w[0])))))) { 443 // round up 444 CS.w[0]++; 445 if (!CS.w[0]) 446 CS.w[1]++; 447 } else { 448 C8.w[1] = (CS.w[1] << 3) | (CS.w[0] >> 61); 449 C8.w[0] = CS.w[0] << 3; 450 // M256 - 8*CSM 451 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); 452 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); 453 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); 454 M256.w[3] = M256.w[3] - Carry; 455 456 // if CSM' > C256, round up 457 if (M256.w[3] > C4.w[3] 458 || (M256.w[3] == C4.w[3] 459 && (M256.w[2] > C4.w[2] 460 || (M256.w[2] == C4.w[2] 461 && (M256.w[1] > C4.w[1] 462 || (M256.w[1] == C4.w[1] 463 && M256.w[0] > C4.w[0])))))) { 464 // round down 465 if (!CS.w[0]) 466 CS.w[1]--; 467 CS.w[0]--; 468 } 469 } 470 #ifndef IEEE_ROUND_NEAREST 471 #ifndef IEEE_ROUND_NEAREST_TIES_AWAY 472 } else { 473 __sqr128_to_256 (M256, CS); 474 C8.w[1] = (CS.w[1] << 1) | (CS.w[0] >> 63); 475 C8.w[0] = CS.w[0] << 1; 476 if (M256.w[3] > C256.w[3] 477 || (M256.w[3] == C256.w[3] 478 && (M256.w[2] > C256.w[2] 479 || (M256.w[2] == C256.w[2] 480 && (M256.w[1] > C256.w[1] 481 || (M256.w[1] == C256.w[1] 482 && M256.w[0] > C256.w[0])))))) { 483 __sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]); 484 __sub_borrow_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); 485 __sub_borrow_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); 486 M256.w[3] = M256.w[3] - Carry; 487 M256.w[0]++; 488 if (!M256.w[0]) { 489 M256.w[1]++; 490 if (!M256.w[1]) { 491 M256.w[2]++; 492 if (!M256.w[2]) 493 M256.w[3]++; 494 } 495 } 496 497 if (!CS.w[0]) 498 CS.w[1]--; 499 CS.w[0]--; 500 501 if (M256.w[3] > C256.w[3] 502 || (M256.w[3] == C256.w[3] 503 && (M256.w[2] > C256.w[2] 504 || (M256.w[2] == C256.w[2] 505 && (M256.w[1] > C256.w[1] 506 || (M256.w[1] == C256.w[1] 507 && M256.w[0] > C256.w[0])))))) { 508 509 if (!CS.w[0]) 510 CS.w[1]--; 511 CS.w[0]--; 512 } 513 } 514 515 else { 516 __add_carry_out (M256.w[0], Carry, M256.w[0], C8.w[0]); 517 __add_carry_in_out (M256.w[1], Carry, M256.w[1], C8.w[1], Carry); 518 __add_carry_in_out (M256.w[2], Carry, M256.w[2], 0, Carry); 519 M256.w[3] = M256.w[3] + Carry; 520 M256.w[0]++; 521 if (!M256.w[0]) { 522 M256.w[1]++; 523 if (!M256.w[1]) { 524 M256.w[2]++; 525 if (!M256.w[2]) 526 M256.w[3]++; 527 } 528 } 529 if (M256.w[3] < C256.w[3] 530 || (M256.w[3] == C256.w[3] 531 && (M256.w[2] < C256.w[2] 532 || (M256.w[2] == C256.w[2] 533 && (M256.w[1] < C256.w[1] 534 || (M256.w[1] == C256.w[1] 535 && M256.w[0] <= C256.w[0])))))) { 536 537 CS.w[0]++; 538 if (!CS.w[0]) 539 CS.w[1]++; 540 } 541 } 542 // RU? 543 if ((rnd_mode) == ROUNDING_UP) { 544 CS.w[0]++; 545 if (!CS.w[0]) 546 CS.w[1]++; 547 } 548 549 } 550 #endif 551 #endif 552 553 #ifdef SET_STATUS_FLAGS 554 __set_status_flags (pfpsf, INEXACT_EXCEPTION); 555 #endif 556 get_BID128_fast (&res, 0, (exponent_q + DECIMAL_EXPONENT_BIAS_128) >> 1, 557 CS); 558 #ifdef UNCHANGED_BINARY_STATUS_FLAGS 559 (void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); 560 #endif 561 BID_RETURN (res); 562 563 564 } 565