1/* IEEE-754 double-precision functions for Xtensa 2 Copyright (C) 2006-2019 Free Software Foundation, Inc. 3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT 13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 License for more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26#ifdef __XTENSA_EB__ 27#define xh a2 28#define xl a3 29#define yh a4 30#define yl a5 31#else 32#define xh a3 33#define xl a2 34#define yh a5 35#define yl a4 36#endif 37 38/* Warning! The branch displacements for some Xtensa branch instructions 39 are quite small, and this code has been carefully laid out to keep 40 branch targets in range. If you change anything, be sure to check that 41 the assembler is not relaxing anything to branch over a jump. */ 42 43#ifdef L_negdf2 44 45 .align 4 46 .global __negdf2 47 .type __negdf2, @function 48__negdf2: 49 leaf_entry sp, 16 50 movi a4, 0x80000000 51 xor xh, xh, a4 52 leaf_return 53 54#endif /* L_negdf2 */ 55 56#ifdef L_addsubdf3 57 58 .literal_position 59 /* Addition */ 60__adddf3_aux: 61 62 /* Handle NaNs and Infinities. (This code is placed before the 63 start of the function just to keep it in range of the limited 64 branch displacements.) */ 65 66.Ladd_xnan_or_inf: 67 /* If y is neither Infinity nor NaN, return x. */ 68 bnall yh, a6, .Ladd_return_nan_or_inf 69 /* If x is a NaN, return it. Otherwise, return y. */ 70 slli a7, xh, 12 71 or a7, a7, xl 72 bnez a7, .Ladd_return_nan 73 74.Ladd_ynan_or_inf: 75 /* Return y. */ 76 mov xh, yh 77 mov xl, yl 78 79.Ladd_return_nan_or_inf: 80 slli a7, xh, 12 81 or a7, a7, xl 82 bnez a7, .Ladd_return_nan 83 leaf_return 84 85.Ladd_return_nan: 86 movi a4, 0x80000 /* make it a quiet NaN */ 87 or xh, xh, a4 88 leaf_return 89 90.Ladd_opposite_signs: 91 /* Operand signs differ. Do a subtraction. */ 92 slli a7, a6, 11 93 xor yh, yh, a7 94 j .Lsub_same_sign 95 96 .align 4 97 .global __adddf3 98 .type __adddf3, @function 99__adddf3: 100 leaf_entry sp, 16 101 movi a6, 0x7ff00000 102 103 /* Check if the two operands have the same sign. */ 104 xor a7, xh, yh 105 bltz a7, .Ladd_opposite_signs 106 107.Ladd_same_sign: 108 /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ 109 ball xh, a6, .Ladd_xnan_or_inf 110 ball yh, a6, .Ladd_ynan_or_inf 111 112 /* Compare the exponents. The smaller operand will be shifted 113 right by the exponent difference and added to the larger 114 one. */ 115 extui a7, xh, 20, 12 116 extui a8, yh, 20, 12 117 bltu a7, a8, .Ladd_shiftx 118 119.Ladd_shifty: 120 /* Check if the smaller (or equal) exponent is zero. */ 121 bnone yh, a6, .Ladd_yexpzero 122 123 /* Replace yh sign/exponent with 0x001. */ 124 or yh, yh, a6 125 slli yh, yh, 11 126 srli yh, yh, 11 127 128.Ladd_yexpdiff: 129 /* Compute the exponent difference. Optimize for difference < 32. */ 130 sub a10, a7, a8 131 bgeui a10, 32, .Ladd_bigshifty 132 133 /* Shift yh/yl right by the exponent difference. Any bits that are 134 shifted out of yl are saved in a9 for rounding the result. */ 135 ssr a10 136 movi a9, 0 137 src a9, yl, a9 138 src yl, yh, yl 139 srl yh, yh 140 141.Ladd_addy: 142 /* Do the 64-bit addition. */ 143 add xl, xl, yl 144 add xh, xh, yh 145 bgeu xl, yl, 1f 146 addi xh, xh, 1 1471: 148 /* Check if the add overflowed into the exponent. */ 149 extui a10, xh, 20, 12 150 beq a10, a7, .Ladd_round 151 mov a8, a7 152 j .Ladd_carry 153 154.Ladd_yexpzero: 155 /* y is a subnormal value. Replace its sign/exponent with zero, 156 i.e., no implicit "1.0", and increment the apparent exponent 157 because subnormals behave as if they had the minimum (nonzero) 158 exponent. Test for the case when both exponents are zero. */ 159 slli yh, yh, 12 160 srli yh, yh, 12 161 bnone xh, a6, .Ladd_bothexpzero 162 addi a8, a8, 1 163 j .Ladd_yexpdiff 164 165.Ladd_bothexpzero: 166 /* Both exponents are zero. Handle this as a special case. There 167 is no need to shift or round, and the normal code for handling 168 a carry into the exponent field will not work because it 169 assumes there is an implicit "1.0" that needs to be added. */ 170 add xl, xl, yl 171 add xh, xh, yh 172 bgeu xl, yl, 1f 173 addi xh, xh, 1 1741: leaf_return 175 176.Ladd_bigshifty: 177 /* Exponent difference > 64 -- just return the bigger value. */ 178 bgeui a10, 64, 1b 179 180 /* Shift yh/yl right by the exponent difference. Any bits that are 181 shifted out are saved in a9 for rounding the result. */ 182 ssr a10 183 sll a11, yl /* lost bits shifted out of yl */ 184 src a9, yh, yl 185 srl yl, yh 186 movi yh, 0 187 beqz a11, .Ladd_addy 188 or a9, a9, a10 /* any positive, nonzero value will work */ 189 j .Ladd_addy 190 191.Ladd_xexpzero: 192 /* Same as "yexpzero" except skip handling the case when both 193 exponents are zero. */ 194 slli xh, xh, 12 195 srli xh, xh, 12 196 addi a7, a7, 1 197 j .Ladd_xexpdiff 198 199.Ladd_shiftx: 200 /* Same thing as the "shifty" code, but with x and y swapped. Also, 201 because the exponent difference is always nonzero in this version, 202 the shift sequence can use SLL and skip loading a constant zero. */ 203 bnone xh, a6, .Ladd_xexpzero 204 205 or xh, xh, a6 206 slli xh, xh, 11 207 srli xh, xh, 11 208 209.Ladd_xexpdiff: 210 sub a10, a8, a7 211 bgeui a10, 32, .Ladd_bigshiftx 212 213 ssr a10 214 sll a9, xl 215 src xl, xh, xl 216 srl xh, xh 217 218.Ladd_addx: 219 add xl, xl, yl 220 add xh, xh, yh 221 bgeu xl, yl, 1f 222 addi xh, xh, 1 2231: 224 /* Check if the add overflowed into the exponent. */ 225 extui a10, xh, 20, 12 226 bne a10, a8, .Ladd_carry 227 228.Ladd_round: 229 /* Round up if the leftover fraction is >= 1/2. */ 230 bgez a9, 1f 231 addi xl, xl, 1 232 beqz xl, .Ladd_roundcarry 233 234 /* Check if the leftover fraction is exactly 1/2. */ 235 slli a9, a9, 1 236 beqz a9, .Ladd_exactlyhalf 2371: leaf_return 238 239.Ladd_bigshiftx: 240 /* Mostly the same thing as "bigshifty".... */ 241 bgeui a10, 64, .Ladd_returny 242 243 ssr a10 244 sll a11, xl 245 src a9, xh, xl 246 srl xl, xh 247 movi xh, 0 248 beqz a11, .Ladd_addx 249 or a9, a9, a10 250 j .Ladd_addx 251 252.Ladd_returny: 253 mov xh, yh 254 mov xl, yl 255 leaf_return 256 257.Ladd_carry: 258 /* The addition has overflowed into the exponent field, so the 259 value needs to be renormalized. The mantissa of the result 260 can be recovered by subtracting the original exponent and 261 adding 0x100000 (which is the explicit "1.0" for the 262 mantissa of the non-shifted operand -- the "1.0" for the 263 shifted operand was already added). The mantissa can then 264 be shifted right by one bit. The explicit "1.0" of the 265 shifted mantissa then needs to be replaced by the exponent, 266 incremented by one to account for the normalizing shift. 267 It is faster to combine these operations: do the shift first 268 and combine the additions and subtractions. If x is the 269 original exponent, the result is: 270 shifted mantissa - (x << 19) + (1 << 19) + (x << 20) 271 or: 272 shifted mantissa + ((x + 1) << 19) 273 Note that the exponent is incremented here by leaving the 274 explicit "1.0" of the mantissa in the exponent field. */ 275 276 /* Shift xh/xl right by one bit. Save the lsb of xl. */ 277 mov a10, xl 278 ssai 1 279 src xl, xh, xl 280 srl xh, xh 281 282 /* See explanation above. The original exponent is in a8. */ 283 addi a8, a8, 1 284 slli a8, a8, 19 285 add xh, xh, a8 286 287 /* Return an Infinity if the exponent overflowed. */ 288 ball xh, a6, .Ladd_infinity 289 290 /* Same thing as the "round" code except the msb of the leftover 291 fraction is bit 0 of a10, with the rest of the fraction in a9. */ 292 bbci.l a10, 0, 1f 293 addi xl, xl, 1 294 beqz xl, .Ladd_roundcarry 295 beqz a9, .Ladd_exactlyhalf 2961: leaf_return 297 298.Ladd_infinity: 299 /* Clear the mantissa. */ 300 movi xl, 0 301 srli xh, xh, 20 302 slli xh, xh, 20 303 304 /* The sign bit may have been lost in a carry-out. Put it back. */ 305 slli a8, a8, 1 306 or xh, xh, a8 307 leaf_return 308 309.Ladd_exactlyhalf: 310 /* Round down to the nearest even value. */ 311 srli xl, xl, 1 312 slli xl, xl, 1 313 leaf_return 314 315.Ladd_roundcarry: 316 /* xl is always zero when the rounding increment overflows, so 317 there's no need to round it to an even value. */ 318 addi xh, xh, 1 319 /* Overflow to the exponent is OK. */ 320 leaf_return 321 322 323 /* Subtraction */ 324__subdf3_aux: 325 326 /* Handle NaNs and Infinities. (This code is placed before the 327 start of the function just to keep it in range of the limited 328 branch displacements.) */ 329 330.Lsub_xnan_or_inf: 331 /* If y is neither Infinity nor NaN, return x. */ 332 bnall yh, a6, .Lsub_return_nan_or_inf 333 334.Lsub_return_nan: 335 /* Both x and y are either NaN or Inf, so the result is NaN. */ 336 movi a4, 0x80000 /* make it a quiet NaN */ 337 or xh, xh, a4 338 leaf_return 339 340.Lsub_ynan_or_inf: 341 /* Negate y and return it. */ 342 slli a7, a6, 11 343 xor xh, yh, a7 344 mov xl, yl 345 346.Lsub_return_nan_or_inf: 347 slli a7, xh, 12 348 or a7, a7, xl 349 bnez a7, .Lsub_return_nan 350 leaf_return 351 352.Lsub_opposite_signs: 353 /* Operand signs differ. Do an addition. */ 354 slli a7, a6, 11 355 xor yh, yh, a7 356 j .Ladd_same_sign 357 358 .align 4 359 .global __subdf3 360 .type __subdf3, @function 361__subdf3: 362 leaf_entry sp, 16 363 movi a6, 0x7ff00000 364 365 /* Check if the two operands have the same sign. */ 366 xor a7, xh, yh 367 bltz a7, .Lsub_opposite_signs 368 369.Lsub_same_sign: 370 /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ 371 ball xh, a6, .Lsub_xnan_or_inf 372 ball yh, a6, .Lsub_ynan_or_inf 373 374 /* Compare the operands. In contrast to addition, the entire 375 value matters here. */ 376 extui a7, xh, 20, 11 377 extui a8, yh, 20, 11 378 bltu xh, yh, .Lsub_xsmaller 379 beq xh, yh, .Lsub_compare_low 380 381.Lsub_ysmaller: 382 /* Check if the smaller (or equal) exponent is zero. */ 383 bnone yh, a6, .Lsub_yexpzero 384 385 /* Replace yh sign/exponent with 0x001. */ 386 or yh, yh, a6 387 slli yh, yh, 11 388 srli yh, yh, 11 389 390.Lsub_yexpdiff: 391 /* Compute the exponent difference. Optimize for difference < 32. */ 392 sub a10, a7, a8 393 bgeui a10, 32, .Lsub_bigshifty 394 395 /* Shift yh/yl right by the exponent difference. Any bits that are 396 shifted out of yl are saved in a9 for rounding the result. */ 397 ssr a10 398 movi a9, 0 399 src a9, yl, a9 400 src yl, yh, yl 401 srl yh, yh 402 403.Lsub_suby: 404 /* Do the 64-bit subtraction. */ 405 sub xh, xh, yh 406 bgeu xl, yl, 1f 407 addi xh, xh, -1 4081: sub xl, xl, yl 409 410 /* Subtract the leftover bits in a9 from zero and propagate any 411 borrow from xh/xl. */ 412 neg a9, a9 413 beqz a9, 1f 414 addi a5, xh, -1 415 moveqz xh, a5, xl 416 addi xl, xl, -1 4171: 418 /* Check if the subtract underflowed into the exponent. */ 419 extui a10, xh, 20, 11 420 beq a10, a7, .Lsub_round 421 j .Lsub_borrow 422 423.Lsub_compare_low: 424 /* The high words are equal. Compare the low words. */ 425 bltu xl, yl, .Lsub_xsmaller 426 bltu yl, xl, .Lsub_ysmaller 427 /* The operands are equal. Return 0.0. */ 428 movi xh, 0 429 movi xl, 0 4301: leaf_return 431 432.Lsub_yexpzero: 433 /* y is a subnormal value. Replace its sign/exponent with zero, 434 i.e., no implicit "1.0". Unless x is also a subnormal, increment 435 y's apparent exponent because subnormals behave as if they had 436 the minimum (nonzero) exponent. */ 437 slli yh, yh, 12 438 srli yh, yh, 12 439 bnone xh, a6, .Lsub_yexpdiff 440 addi a8, a8, 1 441 j .Lsub_yexpdiff 442 443.Lsub_bigshifty: 444 /* Exponent difference > 64 -- just return the bigger value. */ 445 bgeui a10, 64, 1b 446 447 /* Shift yh/yl right by the exponent difference. Any bits that are 448 shifted out are saved in a9 for rounding the result. */ 449 ssr a10 450 sll a11, yl /* lost bits shifted out of yl */ 451 src a9, yh, yl 452 srl yl, yh 453 movi yh, 0 454 beqz a11, .Lsub_suby 455 or a9, a9, a10 /* any positive, nonzero value will work */ 456 j .Lsub_suby 457 458.Lsub_xsmaller: 459 /* Same thing as the "ysmaller" code, but with x and y swapped and 460 with y negated. */ 461 bnone xh, a6, .Lsub_xexpzero 462 463 or xh, xh, a6 464 slli xh, xh, 11 465 srli xh, xh, 11 466 467.Lsub_xexpdiff: 468 sub a10, a8, a7 469 bgeui a10, 32, .Lsub_bigshiftx 470 471 ssr a10 472 movi a9, 0 473 src a9, xl, a9 474 src xl, xh, xl 475 srl xh, xh 476 477 /* Negate y. */ 478 slli a11, a6, 11 479 xor yh, yh, a11 480 481.Lsub_subx: 482 sub xl, yl, xl 483 sub xh, yh, xh 484 bgeu yl, xl, 1f 485 addi xh, xh, -1 4861: 487 /* Subtract the leftover bits in a9 from zero and propagate any 488 borrow from xh/xl. */ 489 neg a9, a9 490 beqz a9, 1f 491 addi a5, xh, -1 492 moveqz xh, a5, xl 493 addi xl, xl, -1 4941: 495 /* Check if the subtract underflowed into the exponent. */ 496 extui a10, xh, 20, 11 497 bne a10, a8, .Lsub_borrow 498 499.Lsub_round: 500 /* Round up if the leftover fraction is >= 1/2. */ 501 bgez a9, 1f 502 addi xl, xl, 1 503 beqz xl, .Lsub_roundcarry 504 505 /* Check if the leftover fraction is exactly 1/2. */ 506 slli a9, a9, 1 507 beqz a9, .Lsub_exactlyhalf 5081: leaf_return 509 510.Lsub_xexpzero: 511 /* Same as "yexpzero". */ 512 slli xh, xh, 12 513 srli xh, xh, 12 514 bnone yh, a6, .Lsub_xexpdiff 515 addi a7, a7, 1 516 j .Lsub_xexpdiff 517 518.Lsub_bigshiftx: 519 /* Mostly the same thing as "bigshifty", but with the sign bit of the 520 shifted value set so that the subsequent subtraction flips the 521 sign of y. */ 522 bgeui a10, 64, .Lsub_returny 523 524 ssr a10 525 sll a11, xl 526 src a9, xh, xl 527 srl xl, xh 528 slli xh, a6, 11 /* set sign bit of xh */ 529 beqz a11, .Lsub_subx 530 or a9, a9, a10 531 j .Lsub_subx 532 533.Lsub_returny: 534 /* Negate and return y. */ 535 slli a7, a6, 11 536 xor xh, yh, a7 537 mov xl, yl 538 leaf_return 539 540.Lsub_borrow: 541 /* The subtraction has underflowed into the exponent field, so the 542 value needs to be renormalized. Shift the mantissa left as 543 needed to remove any leading zeros and adjust the exponent 544 accordingly. If the exponent is not large enough to remove 545 all the leading zeros, the result will be a subnormal value. */ 546 547 slli a8, xh, 12 548 beqz a8, .Lsub_xhzero 549 do_nsau a6, a8, a7, a11 550 srli a8, a8, 12 551 bge a6, a10, .Lsub_subnormal 552 addi a6, a6, 1 553 554.Lsub_shift_lt32: 555 /* Shift the mantissa (a8/xl/a9) left by a6. */ 556 ssl a6 557 src a8, a8, xl 558 src xl, xl, a9 559 sll a9, a9 560 561 /* Combine the shifted mantissa with the sign and exponent, 562 decrementing the exponent by a6. (The exponent has already 563 been decremented by one due to the borrow from the subtraction, 564 but adding the mantissa will increment the exponent by one.) */ 565 srli xh, xh, 20 566 sub xh, xh, a6 567 slli xh, xh, 20 568 add xh, xh, a8 569 j .Lsub_round 570 571.Lsub_exactlyhalf: 572 /* Round down to the nearest even value. */ 573 srli xl, xl, 1 574 slli xl, xl, 1 575 leaf_return 576 577.Lsub_roundcarry: 578 /* xl is always zero when the rounding increment overflows, so 579 there's no need to round it to an even value. */ 580 addi xh, xh, 1 581 /* Overflow to the exponent is OK. */ 582 leaf_return 583 584.Lsub_xhzero: 585 /* When normalizing the result, all the mantissa bits in the high 586 word are zero. Shift by "20 + (leading zero count of xl) + 1". */ 587 do_nsau a6, xl, a7, a11 588 addi a6, a6, 21 589 blt a10, a6, .Lsub_subnormal 590 591.Lsub_normalize_shift: 592 bltui a6, 32, .Lsub_shift_lt32 593 594 ssl a6 595 src a8, xl, a9 596 sll xl, a9 597 movi a9, 0 598 599 srli xh, xh, 20 600 sub xh, xh, a6 601 slli xh, xh, 20 602 add xh, xh, a8 603 j .Lsub_round 604 605.Lsub_subnormal: 606 /* The exponent is too small to shift away all the leading zeros. 607 Set a6 to the current exponent (which has already been 608 decremented by the borrow) so that the exponent of the result 609 will be zero. Do not add 1 to a6 in this case, because: (1) 610 adding the mantissa will not increment the exponent, so there is 611 no need to subtract anything extra from the exponent to 612 compensate, and (2) the effective exponent of a subnormal is 1 613 not 0 so the shift amount must be 1 smaller than normal. */ 614 mov a6, a10 615 j .Lsub_normalize_shift 616 617#endif /* L_addsubdf3 */ 618 619#ifdef L_muldf3 620 621 /* Multiplication */ 622#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 623#define XCHAL_NO_MUL 1 624#endif 625 626 .literal_position 627__muldf3_aux: 628 629 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). 630 (This code is placed before the start of the function just to 631 keep it in range of the limited branch displacements.) */ 632 633.Lmul_xexpzero: 634 /* Clear the sign bit of x. */ 635 slli xh, xh, 1 636 srli xh, xh, 1 637 638 /* If x is zero, return zero. */ 639 or a10, xh, xl 640 beqz a10, .Lmul_return_zero 641 642 /* Normalize x. Adjust the exponent in a8. */ 643 beqz xh, .Lmul_xh_zero 644 do_nsau a10, xh, a11, a12 645 addi a10, a10, -11 646 ssl a10 647 src xh, xh, xl 648 sll xl, xl 649 movi a8, 1 650 sub a8, a8, a10 651 j .Lmul_xnormalized 652.Lmul_xh_zero: 653 do_nsau a10, xl, a11, a12 654 addi a10, a10, -11 655 movi a8, -31 656 sub a8, a8, a10 657 ssl a10 658 bltz a10, .Lmul_xl_srl 659 sll xh, xl 660 movi xl, 0 661 j .Lmul_xnormalized 662.Lmul_xl_srl: 663 srl xh, xl 664 sll xl, xl 665 j .Lmul_xnormalized 666 667.Lmul_yexpzero: 668 /* Clear the sign bit of y. */ 669 slli yh, yh, 1 670 srli yh, yh, 1 671 672 /* If y is zero, return zero. */ 673 or a10, yh, yl 674 beqz a10, .Lmul_return_zero 675 676 /* Normalize y. Adjust the exponent in a9. */ 677 beqz yh, .Lmul_yh_zero 678 do_nsau a10, yh, a11, a12 679 addi a10, a10, -11 680 ssl a10 681 src yh, yh, yl 682 sll yl, yl 683 movi a9, 1 684 sub a9, a9, a10 685 j .Lmul_ynormalized 686.Lmul_yh_zero: 687 do_nsau a10, yl, a11, a12 688 addi a10, a10, -11 689 movi a9, -31 690 sub a9, a9, a10 691 ssl a10 692 bltz a10, .Lmul_yl_srl 693 sll yh, yl 694 movi yl, 0 695 j .Lmul_ynormalized 696.Lmul_yl_srl: 697 srl yh, yl 698 sll yl, yl 699 j .Lmul_ynormalized 700 701.Lmul_return_zero: 702 /* Return zero with the appropriate sign bit. */ 703 srli xh, a7, 31 704 slli xh, xh, 31 705 movi xl, 0 706 j .Lmul_done 707 708.Lmul_xnan_or_inf: 709 /* If y is zero, return NaN. */ 710 bnez yl, 1f 711 slli a8, yh, 1 712 beqz a8, .Lmul_return_nan 7131: 714 /* If y is NaN, return y. */ 715 bnall yh, a6, .Lmul_returnx 716 slli a8, yh, 12 717 or a8, a8, yl 718 beqz a8, .Lmul_returnx 719 720.Lmul_returny: 721 mov xh, yh 722 mov xl, yl 723 724.Lmul_returnx: 725 slli a8, xh, 12 726 or a8, a8, xl 727 bnez a8, .Lmul_return_nan 728 /* Set the sign bit and return. */ 729 extui a7, a7, 31, 1 730 slli xh, xh, 1 731 ssai 1 732 src xh, a7, xh 733 j .Lmul_done 734 735.Lmul_ynan_or_inf: 736 /* If x is zero, return NaN. */ 737 bnez xl, .Lmul_returny 738 slli a8, xh, 1 739 bnez a8, .Lmul_returny 740 mov xh, yh 741 742.Lmul_return_nan: 743 movi a4, 0x80000 /* make it a quiet NaN */ 744 or xh, xh, a4 745 j .Lmul_done 746 747 .align 4 748 .global __muldf3 749 .type __muldf3, @function 750__muldf3: 751#if __XTENSA_CALL0_ABI__ 752 leaf_entry sp, 32 753 addi sp, sp, -32 754 s32i a12, sp, 16 755 s32i a13, sp, 20 756 s32i a14, sp, 24 757 s32i a15, sp, 28 758#elif XCHAL_NO_MUL 759 /* This is not really a leaf function; allocate enough stack space 760 to allow CALL12s to a helper function. */ 761 leaf_entry sp, 64 762#else 763 leaf_entry sp, 32 764#endif 765 movi a6, 0x7ff00000 766 767 /* Get the sign of the result. */ 768 xor a7, xh, yh 769 770 /* Check for NaN and infinity. */ 771 ball xh, a6, .Lmul_xnan_or_inf 772 ball yh, a6, .Lmul_ynan_or_inf 773 774 /* Extract the exponents. */ 775 extui a8, xh, 20, 11 776 extui a9, yh, 20, 11 777 778 beqz a8, .Lmul_xexpzero 779.Lmul_xnormalized: 780 beqz a9, .Lmul_yexpzero 781.Lmul_ynormalized: 782 783 /* Add the exponents. */ 784 add a8, a8, a9 785 786 /* Replace sign/exponent fields with explicit "1.0". */ 787 movi a10, 0x1fffff 788 or xh, xh, a6 789 and xh, xh, a10 790 or yh, yh, a6 791 and yh, yh, a10 792 793 /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6. 794 The least-significant word of the result is thrown away except 795 that if it is nonzero, the lsb of a6 is set to 1. */ 796#if XCHAL_HAVE_MUL32_HIGH 797 798 /* Compute a6 with any carry-outs in a10. */ 799 movi a10, 0 800 mull a6, xl, yh 801 mull a11, xh, yl 802 add a6, a6, a11 803 bgeu a6, a11, 1f 804 addi a10, a10, 1 8051: 806 muluh a11, xl, yl 807 add a6, a6, a11 808 bgeu a6, a11, 1f 809 addi a10, a10, 1 8101: 811 /* If the low word of the result is nonzero, set the lsb of a6. */ 812 mull a11, xl, yl 813 beqz a11, 1f 814 movi a9, 1 815 or a6, a6, a9 8161: 817 /* Compute xl with any carry-outs in a9. */ 818 movi a9, 0 819 mull a11, xh, yh 820 add a10, a10, a11 821 bgeu a10, a11, 1f 822 addi a9, a9, 1 8231: 824 muluh a11, xh, yl 825 add a10, a10, a11 826 bgeu a10, a11, 1f 827 addi a9, a9, 1 8281: 829 muluh xl, xl, yh 830 add xl, xl, a10 831 bgeu xl, a10, 1f 832 addi a9, a9, 1 8331: 834 /* Compute xh. */ 835 muluh xh, xh, yh 836 add xh, xh, a9 837 838#else /* ! XCHAL_HAVE_MUL32_HIGH */ 839 840 /* Break the inputs into 16-bit chunks and compute 16 32-bit partial 841 products. These partial products are: 842 843 0 xll * yll 844 845 1 xll * ylh 846 2 xlh * yll 847 848 3 xll * yhl 849 4 xlh * ylh 850 5 xhl * yll 851 852 6 xll * yhh 853 7 xlh * yhl 854 8 xhl * ylh 855 9 xhh * yll 856 857 10 xlh * yhh 858 11 xhl * yhl 859 12 xhh * ylh 860 861 13 xhl * yhh 862 14 xhh * yhl 863 864 15 xhh * yhh 865 866 where the input chunks are (hh, hl, lh, ll). If using the Mul16 867 or Mul32 multiplier options, these input chunks must be stored in 868 separate registers. For Mac16, the UMUL.AA.* opcodes can specify 869 that the inputs come from either half of the registers, so there 870 is no need to shift them out ahead of time. If there is no 871 multiply hardware, the 16-bit chunks can be extracted when setting 872 up the arguments to the separate multiply function. */ 873 874 /* Save a7 since it is needed to hold a temporary value. */ 875 s32i a7, sp, 4 876#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL 877 /* Calling a separate multiply function will clobber a0 and requires 878 use of a8 as a temporary, so save those values now. (The function 879 uses a custom ABI so nothing else needs to be saved.) */ 880 s32i a0, sp, 0 881 s32i a8, sp, 8 882#endif 883 884#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 885 886#define xlh a12 887#define ylh a13 888#define xhh a14 889#define yhh a15 890 891 /* Get the high halves of the inputs into registers. */ 892 srli xlh, xl, 16 893 srli ylh, yl, 16 894 srli xhh, xh, 16 895 srli yhh, yh, 16 896 897#define xll xl 898#define yll yl 899#define xhl xh 900#define yhl yh 901 902#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 903 /* Clear the high halves of the inputs. This does not matter 904 for MUL16 because the high bits are ignored. */ 905 extui xl, xl, 0, 16 906 extui xh, xh, 0, 16 907 extui yl, yl, 0, 16 908 extui yh, yh, 0, 16 909#endif 910#endif /* MUL16 || MUL32 */ 911 912 913#if XCHAL_HAVE_MUL16 914 915#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 916 mul16u dst, xreg ## xhalf, yreg ## yhalf 917 918#elif XCHAL_HAVE_MUL32 919 920#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 921 mull dst, xreg ## xhalf, yreg ## yhalf 922 923#elif XCHAL_HAVE_MAC16 924 925/* The preprocessor insists on inserting a space when concatenating after 926 a period in the definition of do_mul below. These macros are a workaround 927 using underscores instead of periods when doing the concatenation. */ 928#define umul_aa_ll umul.aa.ll 929#define umul_aa_lh umul.aa.lh 930#define umul_aa_hl umul.aa.hl 931#define umul_aa_hh umul.aa.hh 932 933#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 934 umul_aa_ ## xhalf ## yhalf xreg, yreg; \ 935 rsr dst, ACCLO 936 937#else /* no multiply hardware */ 938 939#define set_arg_l(dst, src) \ 940 extui dst, src, 0, 16 941#define set_arg_h(dst, src) \ 942 srli dst, src, 16 943 944#if __XTENSA_CALL0_ABI__ 945#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 946 set_arg_ ## xhalf (a13, xreg); \ 947 set_arg_ ## yhalf (a14, yreg); \ 948 call0 .Lmul_mulsi3; \ 949 mov dst, a12 950#else 951#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 952 set_arg_ ## xhalf (a14, xreg); \ 953 set_arg_ ## yhalf (a15, yreg); \ 954 call12 .Lmul_mulsi3; \ 955 mov dst, a14 956#endif /* __XTENSA_CALL0_ABI__ */ 957 958#endif /* no multiply hardware */ 959 960 /* Add pp1 and pp2 into a10 with carry-out in a9. */ 961 do_mul(a10, xl, l, yl, h) /* pp 1 */ 962 do_mul(a11, xl, h, yl, l) /* pp 2 */ 963 movi a9, 0 964 add a10, a10, a11 965 bgeu a10, a11, 1f 966 addi a9, a9, 1 9671: 968 /* Initialize a6 with a9/a10 shifted into position. Note that 969 this value can be safely incremented without any carry-outs. */ 970 ssai 16 971 src a6, a9, a10 972 973 /* Compute the low word into a10. */ 974 do_mul(a11, xl, l, yl, l) /* pp 0 */ 975 sll a10, a10 976 add a10, a10, a11 977 bgeu a10, a11, 1f 978 addi a6, a6, 1 9791: 980 /* Compute the contributions of pp0-5 to a6, with carry-outs in a9. 981 This is good enough to determine the low half of a6, so that any 982 nonzero bits from the low word of the result can be collapsed 983 into a6, freeing up a register. */ 984 movi a9, 0 985 do_mul(a11, xl, l, yh, l) /* pp 3 */ 986 add a6, a6, a11 987 bgeu a6, a11, 1f 988 addi a9, a9, 1 9891: 990 do_mul(a11, xl, h, yl, h) /* pp 4 */ 991 add a6, a6, a11 992 bgeu a6, a11, 1f 993 addi a9, a9, 1 9941: 995 do_mul(a11, xh, l, yl, l) /* pp 5 */ 996 add a6, a6, a11 997 bgeu a6, a11, 1f 998 addi a9, a9, 1 9991: 1000 /* Collapse any nonzero bits from the low word into a6. */ 1001 beqz a10, 1f 1002 movi a11, 1 1003 or a6, a6, a11 10041: 1005 /* Add pp6-9 into a11 with carry-outs in a10. */ 1006 do_mul(a7, xl, l, yh, h) /* pp 6 */ 1007 do_mul(a11, xh, h, yl, l) /* pp 9 */ 1008 movi a10, 0 1009 add a11, a11, a7 1010 bgeu a11, a7, 1f 1011 addi a10, a10, 1 10121: 1013 do_mul(a7, xl, h, yh, l) /* pp 7 */ 1014 add a11, a11, a7 1015 bgeu a11, a7, 1f 1016 addi a10, a10, 1 10171: 1018 do_mul(a7, xh, l, yl, h) /* pp 8 */ 1019 add a11, a11, a7 1020 bgeu a11, a7, 1f 1021 addi a10, a10, 1 10221: 1023 /* Shift a10/a11 into position, and add low half of a11 to a6. */ 1024 src a10, a10, a11 1025 add a10, a10, a9 1026 sll a11, a11 1027 add a6, a6, a11 1028 bgeu a6, a11, 1f 1029 addi a10, a10, 1 10301: 1031 /* Add pp10-12 into xl with carry-outs in a9. */ 1032 movi a9, 0 1033 do_mul(xl, xl, h, yh, h) /* pp 10 */ 1034 add xl, xl, a10 1035 bgeu xl, a10, 1f 1036 addi a9, a9, 1 10371: 1038 do_mul(a10, xh, l, yh, l) /* pp 11 */ 1039 add xl, xl, a10 1040 bgeu xl, a10, 1f 1041 addi a9, a9, 1 10421: 1043 do_mul(a10, xh, h, yl, h) /* pp 12 */ 1044 add xl, xl, a10 1045 bgeu xl, a10, 1f 1046 addi a9, a9, 1 10471: 1048 /* Add pp13-14 into a11 with carry-outs in a10. */ 1049 do_mul(a11, xh, l, yh, h) /* pp 13 */ 1050 do_mul(a7, xh, h, yh, l) /* pp 14 */ 1051 movi a10, 0 1052 add a11, a11, a7 1053 bgeu a11, a7, 1f 1054 addi a10, a10, 1 10551: 1056 /* Shift a10/a11 into position, and add low half of a11 to a6. */ 1057 src a10, a10, a11 1058 add a10, a10, a9 1059 sll a11, a11 1060 add xl, xl, a11 1061 bgeu xl, a11, 1f 1062 addi a10, a10, 1 10631: 1064 /* Compute xh. */ 1065 do_mul(xh, xh, h, yh, h) /* pp 15 */ 1066 add xh, xh, a10 1067 1068 /* Restore values saved on the stack during the multiplication. */ 1069 l32i a7, sp, 4 1070#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL 1071 l32i a0, sp, 0 1072 l32i a8, sp, 8 1073#endif 1074#endif /* ! XCHAL_HAVE_MUL32_HIGH */ 1075 1076 /* Shift left by 12 bits, unless there was a carry-out from the 1077 multiply, in which case, shift by 11 bits and increment the 1078 exponent. Note: It is convenient to use the constant 0x3ff 1079 instead of 0x400 when removing the extra exponent bias (so that 1080 it is easy to construct 0x7fe for the overflow check). Reverse 1081 the logic here to decrement the exponent sum by one unless there 1082 was a carry-out. */ 1083 movi a4, 11 1084 srli a5, xh, 21 - 12 1085 bnez a5, 1f 1086 addi a4, a4, 1 1087 addi a8, a8, -1 10881: ssl a4 1089 src xh, xh, xl 1090 src xl, xl, a6 1091 sll a6, a6 1092 1093 /* Subtract the extra bias from the exponent sum (plus one to account 1094 for the explicit "1.0" of the mantissa that will be added to the 1095 exponent in the final result). */ 1096 movi a4, 0x3ff 1097 sub a8, a8, a4 1098 1099 /* Check for over/underflow. The value in a8 is one less than the 1100 final exponent, so values in the range 0..7fd are OK here. */ 1101 slli a4, a4, 1 /* 0x7fe */ 1102 bgeu a8, a4, .Lmul_overflow 1103 1104.Lmul_round: 1105 /* Round. */ 1106 bgez a6, .Lmul_rounded 1107 addi xl, xl, 1 1108 beqz xl, .Lmul_roundcarry 1109 slli a6, a6, 1 1110 beqz a6, .Lmul_exactlyhalf 1111 1112.Lmul_rounded: 1113 /* Add the exponent to the mantissa. */ 1114 slli a8, a8, 20 1115 add xh, xh, a8 1116 1117.Lmul_addsign: 1118 /* Add the sign bit. */ 1119 srli a7, a7, 31 1120 slli a7, a7, 31 1121 or xh, xh, a7 1122 1123.Lmul_done: 1124#if __XTENSA_CALL0_ABI__ 1125 l32i a12, sp, 16 1126 l32i a13, sp, 20 1127 l32i a14, sp, 24 1128 l32i a15, sp, 28 1129 addi sp, sp, 32 1130#endif 1131 leaf_return 1132 1133.Lmul_exactlyhalf: 1134 /* Round down to the nearest even value. */ 1135 srli xl, xl, 1 1136 slli xl, xl, 1 1137 j .Lmul_rounded 1138 1139.Lmul_roundcarry: 1140 /* xl is always zero when the rounding increment overflows, so 1141 there's no need to round it to an even value. */ 1142 addi xh, xh, 1 1143 /* Overflow is OK -- it will be added to the exponent. */ 1144 j .Lmul_rounded 1145 1146.Lmul_overflow: 1147 bltz a8, .Lmul_underflow 1148 /* Return +/- Infinity. */ 1149 addi a8, a4, 1 /* 0x7ff */ 1150 slli xh, a8, 20 1151 movi xl, 0 1152 j .Lmul_addsign 1153 1154.Lmul_underflow: 1155 /* Create a subnormal value, where the exponent field contains zero, 1156 but the effective exponent is 1. The value of a8 is one less than 1157 the actual exponent, so just negate it to get the shift amount. */ 1158 neg a8, a8 1159 mov a9, a6 1160 ssr a8 1161 bgeui a8, 32, .Lmul_bigshift 1162 1163 /* Shift xh/xl right. Any bits that are shifted out of xl are saved 1164 in a6 (combined with the shifted-out bits currently in a6) for 1165 rounding the result. */ 1166 sll a6, xl 1167 src xl, xh, xl 1168 srl xh, xh 1169 j 1f 1170 1171.Lmul_bigshift: 1172 bgeui a8, 64, .Lmul_flush_to_zero 1173 sll a10, xl /* lost bits shifted out of xl */ 1174 src a6, xh, xl 1175 srl xl, xh 1176 movi xh, 0 1177 or a9, a9, a10 1178 1179 /* Set the exponent to zero. */ 11801: movi a8, 0 1181 1182 /* Pack any nonzero bits shifted out into a6. */ 1183 beqz a9, .Lmul_round 1184 movi a9, 1 1185 or a6, a6, a9 1186 j .Lmul_round 1187 1188.Lmul_flush_to_zero: 1189 /* Return zero with the appropriate sign bit. */ 1190 srli xh, a7, 31 1191 slli xh, xh, 31 1192 movi xl, 0 1193 j .Lmul_done 1194 1195#if XCHAL_NO_MUL 1196 1197 /* For Xtensa processors with no multiply hardware, this simplified 1198 version of _mulsi3 is used for multiplying 16-bit chunks of 1199 the floating-point mantissas. When using CALL0, this function 1200 uses a custom ABI: the inputs are passed in a13 and a14, the 1201 result is returned in a12, and a8 and a15 are clobbered. */ 1202 .align 4 1203.Lmul_mulsi3: 1204 leaf_entry sp, 16 1205 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 1206 movi \dst, 0 12071: add \tmp1, \src2, \dst 1208 extui \tmp2, \src1, 0, 1 1209 movnez \dst, \tmp1, \tmp2 1210 1211 do_addx2 \tmp1, \src2, \dst, \tmp1 1212 extui \tmp2, \src1, 1, 1 1213 movnez \dst, \tmp1, \tmp2 1214 1215 do_addx4 \tmp1, \src2, \dst, \tmp1 1216 extui \tmp2, \src1, 2, 1 1217 movnez \dst, \tmp1, \tmp2 1218 1219 do_addx8 \tmp1, \src2, \dst, \tmp1 1220 extui \tmp2, \src1, 3, 1 1221 movnez \dst, \tmp1, \tmp2 1222 1223 srli \src1, \src1, 4 1224 slli \src2, \src2, 4 1225 bnez \src1, 1b 1226 .endm 1227#if __XTENSA_CALL0_ABI__ 1228 mul_mulsi3_body a12, a13, a14, a15, a8 1229#else 1230 /* The result will be written into a2, so save that argument in a4. */ 1231 mov a4, a2 1232 mul_mulsi3_body a2, a4, a3, a5, a6 1233#endif 1234 leaf_return 1235#endif /* XCHAL_NO_MUL */ 1236#endif /* L_muldf3 */ 1237 1238#ifdef L_divdf3 1239 1240 /* Division */ 1241 1242#if XCHAL_HAVE_DFP_DIV 1243 1244 .text 1245 .align 4 1246 .global __divdf3 1247 .type __divdf3, @function 1248__divdf3: 1249 leaf_entry sp, 16 1250 1251 wfrd f1, xh, xl 1252 wfrd f2, yh, yl 1253 1254 div0.d f3, f2 1255 nexp01.d f4, f2 1256 const.d f0, 1 1257 maddn.d f0, f4, f3 1258 const.d f5, 0 1259 mov.d f7, f2 1260 mkdadj.d f7, f1 1261 maddn.d f3, f0, f3 1262 maddn.d f5, f0, f0 1263 nexp01.d f1, f1 1264 div0.d f2, f2 1265 maddn.d f3, f5, f3 1266 const.d f5, 1 1267 const.d f0, 0 1268 neg.d f6, f1 1269 maddn.d f5, f4, f3 1270 maddn.d f0, f6, f2 1271 maddn.d f3, f5, f3 1272 maddn.d f6, f4, f0 1273 const.d f2, 1 1274 maddn.d f2, f4, f3 1275 maddn.d f0, f6, f3 1276 neg.d f1, f1 1277 maddn.d f3, f2, f3 1278 maddn.d f1, f4, f0 1279 addexpm.d f0, f7 1280 addexp.d f3, f7 1281 divn.d f0, f1, f3 1282 1283 rfr xl, f0 1284 rfrd xh, f0 1285 1286 leaf_return 1287 1288#else 1289 1290 .literal_position 1291 1292__divdf3_aux: 1293 1294 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). 1295 (This code is placed before the start of the function just to 1296 keep it in range of the limited branch displacements.) */ 1297 1298.Ldiv_yexpzero: 1299 /* Clear the sign bit of y. */ 1300 slli yh, yh, 1 1301 srli yh, yh, 1 1302 1303 /* Check for division by zero. */ 1304 or a10, yh, yl 1305 beqz a10, .Ldiv_yzero 1306 1307 /* Normalize y. Adjust the exponent in a9. */ 1308 beqz yh, .Ldiv_yh_zero 1309 do_nsau a10, yh, a11, a9 1310 addi a10, a10, -11 1311 ssl a10 1312 src yh, yh, yl 1313 sll yl, yl 1314 movi a9, 1 1315 sub a9, a9, a10 1316 j .Ldiv_ynormalized 1317.Ldiv_yh_zero: 1318 do_nsau a10, yl, a11, a9 1319 addi a10, a10, -11 1320 movi a9, -31 1321 sub a9, a9, a10 1322 ssl a10 1323 bltz a10, .Ldiv_yl_srl 1324 sll yh, yl 1325 movi yl, 0 1326 j .Ldiv_ynormalized 1327.Ldiv_yl_srl: 1328 srl yh, yl 1329 sll yl, yl 1330 j .Ldiv_ynormalized 1331 1332.Ldiv_yzero: 1333 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ 1334 slli xh, xh, 1 1335 srli xh, xh, 1 1336 or xl, xl, xh 1337 srli xh, a7, 31 1338 slli xh, xh, 31 1339 or xh, xh, a6 1340 bnez xl, 1f 1341 movi a4, 0x80000 /* make it a quiet NaN */ 1342 or xh, xh, a4 13431: movi xl, 0 1344 leaf_return 1345 1346.Ldiv_xexpzero: 1347 /* Clear the sign bit of x. */ 1348 slli xh, xh, 1 1349 srli xh, xh, 1 1350 1351 /* If x is zero, return zero. */ 1352 or a10, xh, xl 1353 beqz a10, .Ldiv_return_zero 1354 1355 /* Normalize x. Adjust the exponent in a8. */ 1356 beqz xh, .Ldiv_xh_zero 1357 do_nsau a10, xh, a11, a8 1358 addi a10, a10, -11 1359 ssl a10 1360 src xh, xh, xl 1361 sll xl, xl 1362 movi a8, 1 1363 sub a8, a8, a10 1364 j .Ldiv_xnormalized 1365.Ldiv_xh_zero: 1366 do_nsau a10, xl, a11, a8 1367 addi a10, a10, -11 1368 movi a8, -31 1369 sub a8, a8, a10 1370 ssl a10 1371 bltz a10, .Ldiv_xl_srl 1372 sll xh, xl 1373 movi xl, 0 1374 j .Ldiv_xnormalized 1375.Ldiv_xl_srl: 1376 srl xh, xl 1377 sll xl, xl 1378 j .Ldiv_xnormalized 1379 1380.Ldiv_return_zero: 1381 /* Return zero with the appropriate sign bit. */ 1382 srli xh, a7, 31 1383 slli xh, xh, 31 1384 movi xl, 0 1385 leaf_return 1386 1387.Ldiv_xnan_or_inf: 1388 /* Set the sign bit of the result. */ 1389 srli a7, yh, 31 1390 slli a7, a7, 31 1391 xor xh, xh, a7 1392 /* If y is NaN or Inf, return NaN. */ 1393 ball yh, a6, .Ldiv_return_nan 1394 slli a8, xh, 12 1395 or a8, a8, xl 1396 bnez a8, .Ldiv_return_nan 1397 leaf_return 1398 1399.Ldiv_ynan_or_inf: 1400 /* If y is Infinity, return zero. */ 1401 slli a8, yh, 12 1402 or a8, a8, yl 1403 beqz a8, .Ldiv_return_zero 1404 /* y is NaN; return it. */ 1405 mov xh, yh 1406 mov xl, yl 1407 1408.Ldiv_return_nan: 1409 movi a4, 0x80000 /* make it a quiet NaN */ 1410 or xh, xh, a4 1411 leaf_return 1412 1413.Ldiv_highequal1: 1414 bltu xl, yl, 2f 1415 j 3f 1416 1417 .align 4 1418 .global __divdf3 1419 .type __divdf3, @function 1420__divdf3: 1421 leaf_entry sp, 16 1422 movi a6, 0x7ff00000 1423 1424 /* Get the sign of the result. */ 1425 xor a7, xh, yh 1426 1427 /* Check for NaN and infinity. */ 1428 ball xh, a6, .Ldiv_xnan_or_inf 1429 ball yh, a6, .Ldiv_ynan_or_inf 1430 1431 /* Extract the exponents. */ 1432 extui a8, xh, 20, 11 1433 extui a9, yh, 20, 11 1434 1435 beqz a9, .Ldiv_yexpzero 1436.Ldiv_ynormalized: 1437 beqz a8, .Ldiv_xexpzero 1438.Ldiv_xnormalized: 1439 1440 /* Subtract the exponents. */ 1441 sub a8, a8, a9 1442 1443 /* Replace sign/exponent fields with explicit "1.0". */ 1444 movi a10, 0x1fffff 1445 or xh, xh, a6 1446 and xh, xh, a10 1447 or yh, yh, a6 1448 and yh, yh, a10 1449 1450 /* Set SAR for left shift by one. */ 1451 ssai (32 - 1) 1452 1453 /* The first digit of the mantissa division must be a one. 1454 Shift x (and adjust the exponent) as needed to make this true. */ 1455 bltu yh, xh, 3f 1456 beq yh, xh, .Ldiv_highequal1 14572: src xh, xh, xl 1458 sll xl, xl 1459 addi a8, a8, -1 14603: 1461 /* Do the first subtraction and shift. */ 1462 sub xh, xh, yh 1463 bgeu xl, yl, 1f 1464 addi xh, xh, -1 14651: sub xl, xl, yl 1466 src xh, xh, xl 1467 sll xl, xl 1468 1469 /* Put the quotient into a10/a11. */ 1470 movi a10, 0 1471 movi a11, 1 1472 1473 /* Divide one bit at a time for 52 bits. */ 1474 movi a9, 52 1475#if XCHAL_HAVE_LOOPS 1476 loop a9, .Ldiv_loopend 1477#endif 1478.Ldiv_loop: 1479 /* Shift the quotient << 1. */ 1480 src a10, a10, a11 1481 sll a11, a11 1482 1483 /* Is this digit a 0 or 1? */ 1484 bltu xh, yh, 3f 1485 beq xh, yh, .Ldiv_highequal2 1486 1487 /* Output a 1 and subtract. */ 14882: addi a11, a11, 1 1489 sub xh, xh, yh 1490 bgeu xl, yl, 1f 1491 addi xh, xh, -1 14921: sub xl, xl, yl 1493 1494 /* Shift the dividend << 1. */ 14953: src xh, xh, xl 1496 sll xl, xl 1497 1498#if !XCHAL_HAVE_LOOPS 1499 addi a9, a9, -1 1500 bnez a9, .Ldiv_loop 1501#endif 1502.Ldiv_loopend: 1503 1504 /* Add the exponent bias (less one to account for the explicit "1.0" 1505 of the mantissa that will be added to the exponent in the final 1506 result). */ 1507 movi a9, 0x3fe 1508 add a8, a8, a9 1509 1510 /* Check for over/underflow. The value in a8 is one less than the 1511 final exponent, so values in the range 0..7fd are OK here. */ 1512 addmi a9, a9, 0x400 /* 0x7fe */ 1513 bgeu a8, a9, .Ldiv_overflow 1514 1515.Ldiv_round: 1516 /* Round. The remainder (<< 1) is in xh/xl. */ 1517 bltu xh, yh, .Ldiv_rounded 1518 beq xh, yh, .Ldiv_highequal3 1519.Ldiv_roundup: 1520 addi a11, a11, 1 1521 beqz a11, .Ldiv_roundcarry 1522 1523.Ldiv_rounded: 1524 mov xl, a11 1525 /* Add the exponent to the mantissa. */ 1526 slli a8, a8, 20 1527 add xh, a10, a8 1528 1529.Ldiv_addsign: 1530 /* Add the sign bit. */ 1531 srli a7, a7, 31 1532 slli a7, a7, 31 1533 or xh, xh, a7 1534 leaf_return 1535 1536.Ldiv_highequal2: 1537 bgeu xl, yl, 2b 1538 j 3b 1539 1540.Ldiv_highequal3: 1541 bltu xl, yl, .Ldiv_rounded 1542 bne xl, yl, .Ldiv_roundup 1543 1544 /* Remainder is exactly half the divisor. Round even. */ 1545 addi a11, a11, 1 1546 beqz a11, .Ldiv_roundcarry 1547 srli a11, a11, 1 1548 slli a11, a11, 1 1549 j .Ldiv_rounded 1550 1551.Ldiv_overflow: 1552 bltz a8, .Ldiv_underflow 1553 /* Return +/- Infinity. */ 1554 addi a8, a9, 1 /* 0x7ff */ 1555 slli xh, a8, 20 1556 movi xl, 0 1557 j .Ldiv_addsign 1558 1559.Ldiv_underflow: 1560 /* Create a subnormal value, where the exponent field contains zero, 1561 but the effective exponent is 1. The value of a8 is one less than 1562 the actual exponent, so just negate it to get the shift amount. */ 1563 neg a8, a8 1564 ssr a8 1565 bgeui a8, 32, .Ldiv_bigshift 1566 1567 /* Shift a10/a11 right. Any bits that are shifted out of a11 are 1568 saved in a6 for rounding the result. */ 1569 sll a6, a11 1570 src a11, a10, a11 1571 srl a10, a10 1572 j 1f 1573 1574.Ldiv_bigshift: 1575 bgeui a8, 64, .Ldiv_flush_to_zero 1576 sll a9, a11 /* lost bits shifted out of a11 */ 1577 src a6, a10, a11 1578 srl a11, a10 1579 movi a10, 0 1580 or xl, xl, a9 1581 1582 /* Set the exponent to zero. */ 15831: movi a8, 0 1584 1585 /* Pack any nonzero remainder (in xh/xl) into a6. */ 1586 or xh, xh, xl 1587 beqz xh, 1f 1588 movi a9, 1 1589 or a6, a6, a9 1590 1591 /* Round a10/a11 based on the bits shifted out into a6. */ 15921: bgez a6, .Ldiv_rounded 1593 addi a11, a11, 1 1594 beqz a11, .Ldiv_roundcarry 1595 slli a6, a6, 1 1596 bnez a6, .Ldiv_rounded 1597 srli a11, a11, 1 1598 slli a11, a11, 1 1599 j .Ldiv_rounded 1600 1601.Ldiv_roundcarry: 1602 /* a11 is always zero when the rounding increment overflows, so 1603 there's no need to round it to an even value. */ 1604 addi a10, a10, 1 1605 /* Overflow to the exponent field is OK. */ 1606 j .Ldiv_rounded 1607 1608.Ldiv_flush_to_zero: 1609 /* Return zero with the appropriate sign bit. */ 1610 srli xh, a7, 31 1611 slli xh, xh, 31 1612 movi xl, 0 1613 leaf_return 1614 1615#endif /* XCHAL_HAVE_DFP_DIV */ 1616 1617#endif /* L_divdf3 */ 1618 1619#ifdef L_cmpdf2 1620 1621 /* Equal and Not Equal */ 1622 1623 .align 4 1624 .global __eqdf2 1625 .global __nedf2 1626 .set __nedf2, __eqdf2 1627 .type __eqdf2, @function 1628__eqdf2: 1629 leaf_entry sp, 16 1630 bne xl, yl, 2f 1631 bne xh, yh, 4f 1632 1633 /* The values are equal but NaN != NaN. Check the exponent. */ 1634 movi a6, 0x7ff00000 1635 ball xh, a6, 3f 1636 1637 /* Equal. */ 1638 movi a2, 0 1639 leaf_return 1640 1641 /* Not equal. */ 16422: movi a2, 1 1643 leaf_return 1644 1645 /* Check if the mantissas are nonzero. */ 16463: slli a7, xh, 12 1647 or a7, a7, xl 1648 j 5f 1649 1650 /* Check if x and y are zero with different signs. */ 16514: or a7, xh, yh 1652 slli a7, a7, 1 1653 or a7, a7, xl /* xl == yl here */ 1654 1655 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa 1656 or x when exponent(x) = 0x7ff and x == y. */ 16575: movi a2, 0 1658 movi a3, 1 1659 movnez a2, a3, a7 1660 leaf_return 1661 1662 1663 /* Greater Than */ 1664 1665 .align 4 1666 .global __gtdf2 1667 .type __gtdf2, @function 1668__gtdf2: 1669 leaf_entry sp, 16 1670 movi a6, 0x7ff00000 1671 ball xh, a6, 2f 16721: bnall yh, a6, .Lle_cmp 1673 1674 /* Check if y is a NaN. */ 1675 slli a7, yh, 12 1676 or a7, a7, yl 1677 beqz a7, .Lle_cmp 1678 movi a2, 0 1679 leaf_return 1680 1681 /* Check if x is a NaN. */ 16822: slli a7, xh, 12 1683 or a7, a7, xl 1684 beqz a7, 1b 1685 movi a2, 0 1686 leaf_return 1687 1688 1689 /* Less Than or Equal */ 1690 1691 .align 4 1692 .global __ledf2 1693 .type __ledf2, @function 1694__ledf2: 1695 leaf_entry sp, 16 1696 movi a6, 0x7ff00000 1697 ball xh, a6, 2f 16981: bnall yh, a6, .Lle_cmp 1699 1700 /* Check if y is a NaN. */ 1701 slli a7, yh, 12 1702 or a7, a7, yl 1703 beqz a7, .Lle_cmp 1704 movi a2, 1 1705 leaf_return 1706 1707 /* Check if x is a NaN. */ 17082: slli a7, xh, 12 1709 or a7, a7, xl 1710 beqz a7, 1b 1711 movi a2, 1 1712 leaf_return 1713 1714.Lle_cmp: 1715 /* Check if x and y have different signs. */ 1716 xor a7, xh, yh 1717 bltz a7, .Lle_diff_signs 1718 1719 /* Check if x is negative. */ 1720 bltz xh, .Lle_xneg 1721 1722 /* Check if x <= y. */ 1723 bltu xh, yh, 4f 1724 bne xh, yh, 5f 1725 bltu yl, xl, 5f 17264: movi a2, 0 1727 leaf_return 1728 1729.Lle_xneg: 1730 /* Check if y <= x. */ 1731 bltu yh, xh, 4b 1732 bne yh, xh, 5f 1733 bgeu xl, yl, 4b 17345: movi a2, 1 1735 leaf_return 1736 1737.Lle_diff_signs: 1738 bltz xh, 4b 1739 1740 /* Check if both x and y are zero. */ 1741 or a7, xh, yh 1742 slli a7, a7, 1 1743 or a7, a7, xl 1744 or a7, a7, yl 1745 movi a2, 1 1746 movi a3, 0 1747 moveqz a2, a3, a7 1748 leaf_return 1749 1750 1751 /* Greater Than or Equal */ 1752 1753 .align 4 1754 .global __gedf2 1755 .type __gedf2, @function 1756__gedf2: 1757 leaf_entry sp, 16 1758 movi a6, 0x7ff00000 1759 ball xh, a6, 2f 17601: bnall yh, a6, .Llt_cmp 1761 1762 /* Check if y is a NaN. */ 1763 slli a7, yh, 12 1764 or a7, a7, yl 1765 beqz a7, .Llt_cmp 1766 movi a2, -1 1767 leaf_return 1768 1769 /* Check if x is a NaN. */ 17702: slli a7, xh, 12 1771 or a7, a7, xl 1772 beqz a7, 1b 1773 movi a2, -1 1774 leaf_return 1775 1776 1777 /* Less Than */ 1778 1779 .align 4 1780 .global __ltdf2 1781 .type __ltdf2, @function 1782__ltdf2: 1783 leaf_entry sp, 16 1784 movi a6, 0x7ff00000 1785 ball xh, a6, 2f 17861: bnall yh, a6, .Llt_cmp 1787 1788 /* Check if y is a NaN. */ 1789 slli a7, yh, 12 1790 or a7, a7, yl 1791 beqz a7, .Llt_cmp 1792 movi a2, 0 1793 leaf_return 1794 1795 /* Check if x is a NaN. */ 17962: slli a7, xh, 12 1797 or a7, a7, xl 1798 beqz a7, 1b 1799 movi a2, 0 1800 leaf_return 1801 1802.Llt_cmp: 1803 /* Check if x and y have different signs. */ 1804 xor a7, xh, yh 1805 bltz a7, .Llt_diff_signs 1806 1807 /* Check if x is negative. */ 1808 bltz xh, .Llt_xneg 1809 1810 /* Check if x < y. */ 1811 bltu xh, yh, 4f 1812 bne xh, yh, 5f 1813 bgeu xl, yl, 5f 18144: movi a2, -1 1815 leaf_return 1816 1817.Llt_xneg: 1818 /* Check if y < x. */ 1819 bltu yh, xh, 4b 1820 bne yh, xh, 5f 1821 bltu yl, xl, 4b 18225: movi a2, 0 1823 leaf_return 1824 1825.Llt_diff_signs: 1826 bgez xh, 5b 1827 1828 /* Check if both x and y are nonzero. */ 1829 or a7, xh, yh 1830 slli a7, a7, 1 1831 or a7, a7, xl 1832 or a7, a7, yl 1833 movi a2, 0 1834 movi a3, -1 1835 movnez a2, a3, a7 1836 leaf_return 1837 1838 1839 /* Unordered */ 1840 1841 .align 4 1842 .global __unorddf2 1843 .type __unorddf2, @function 1844__unorddf2: 1845 leaf_entry sp, 16 1846 movi a6, 0x7ff00000 1847 ball xh, a6, 3f 18481: ball yh, a6, 4f 18492: movi a2, 0 1850 leaf_return 1851 18523: slli a7, xh, 12 1853 or a7, a7, xl 1854 beqz a7, 1b 1855 movi a2, 1 1856 leaf_return 1857 18584: slli a7, yh, 12 1859 or a7, a7, yl 1860 beqz a7, 2b 1861 movi a2, 1 1862 leaf_return 1863 1864#endif /* L_cmpdf2 */ 1865 1866#ifdef L_fixdfsi 1867 1868 .align 4 1869 .global __fixdfsi 1870 .type __fixdfsi, @function 1871__fixdfsi: 1872 leaf_entry sp, 16 1873 1874 /* Check for NaN and Infinity. */ 1875 movi a6, 0x7ff00000 1876 ball xh, a6, .Lfixdfsi_nan_or_inf 1877 1878 /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */ 1879 extui a4, xh, 20, 11 1880 extui a5, a6, 19, 10 /* 0x3fe */ 1881 sub a4, a4, a5 1882 bgei a4, 32, .Lfixdfsi_maxint 1883 blti a4, 1, .Lfixdfsi_zero 1884 1885 /* Add explicit "1.0" and shift << 11. */ 1886 or a7, xh, a6 1887 ssai (32 - 11) 1888 src a5, a7, xl 1889 1890 /* Shift back to the right, based on the exponent. */ 1891 ssl a4 /* shift by 32 - a4 */ 1892 srl a5, a5 1893 1894 /* Negate the result if sign != 0. */ 1895 neg a2, a5 1896 movgez a2, a5, a7 1897 leaf_return 1898 1899.Lfixdfsi_nan_or_inf: 1900 /* Handle Infinity and NaN. */ 1901 slli a4, xh, 12 1902 or a4, a4, xl 1903 beqz a4, .Lfixdfsi_maxint 1904 1905 /* Translate NaN to +maxint. */ 1906 movi xh, 0 1907 1908.Lfixdfsi_maxint: 1909 slli a4, a6, 11 /* 0x80000000 */ 1910 addi a5, a4, -1 /* 0x7fffffff */ 1911 movgez a4, a5, xh 1912 mov a2, a4 1913 leaf_return 1914 1915.Lfixdfsi_zero: 1916 movi a2, 0 1917 leaf_return 1918 1919#endif /* L_fixdfsi */ 1920 1921#ifdef L_fixdfdi 1922 1923 .align 4 1924 .global __fixdfdi 1925 .type __fixdfdi, @function 1926__fixdfdi: 1927 leaf_entry sp, 16 1928 1929 /* Check for NaN and Infinity. */ 1930 movi a6, 0x7ff00000 1931 ball xh, a6, .Lfixdfdi_nan_or_inf 1932 1933 /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */ 1934 extui a4, xh, 20, 11 1935 extui a5, a6, 19, 10 /* 0x3fe */ 1936 sub a4, a4, a5 1937 bgei a4, 64, .Lfixdfdi_maxint 1938 blti a4, 1, .Lfixdfdi_zero 1939 1940 /* Add explicit "1.0" and shift << 11. */ 1941 or a7, xh, a6 1942 ssai (32 - 11) 1943 src xh, a7, xl 1944 sll xl, xl 1945 1946 /* Shift back to the right, based on the exponent. */ 1947 ssl a4 /* shift by 64 - a4 */ 1948 bgei a4, 32, .Lfixdfdi_smallshift 1949 srl xl, xh 1950 movi xh, 0 1951 1952.Lfixdfdi_shifted: 1953 /* Negate the result if sign != 0. */ 1954 bgez a7, 1f 1955 neg xl, xl 1956 neg xh, xh 1957 beqz xl, 1f 1958 addi xh, xh, -1 19591: leaf_return 1960 1961.Lfixdfdi_smallshift: 1962 src xl, xh, xl 1963 srl xh, xh 1964 j .Lfixdfdi_shifted 1965 1966.Lfixdfdi_nan_or_inf: 1967 /* Handle Infinity and NaN. */ 1968 slli a4, xh, 12 1969 or a4, a4, xl 1970 beqz a4, .Lfixdfdi_maxint 1971 1972 /* Translate NaN to +maxint. */ 1973 movi xh, 0 1974 1975.Lfixdfdi_maxint: 1976 slli a7, a6, 11 /* 0x80000000 */ 1977 bgez xh, 1f 1978 mov xh, a7 1979 movi xl, 0 1980 leaf_return 1981 19821: addi xh, a7, -1 /* 0x7fffffff */ 1983 movi xl, -1 1984 leaf_return 1985 1986.Lfixdfdi_zero: 1987 movi xh, 0 1988 movi xl, 0 1989 leaf_return 1990 1991#endif /* L_fixdfdi */ 1992 1993#ifdef L_fixunsdfsi 1994 1995 .align 4 1996 .global __fixunsdfsi 1997 .type __fixunsdfsi, @function 1998__fixunsdfsi: 1999 leaf_entry sp, 16 2000 2001 /* Check for NaN and Infinity. */ 2002 movi a6, 0x7ff00000 2003 ball xh, a6, .Lfixunsdfsi_nan_or_inf 2004 2005 /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */ 2006 extui a4, xh, 20, 11 2007 extui a5, a6, 20, 10 /* 0x3ff */ 2008 sub a4, a4, a5 2009 bgei a4, 32, .Lfixunsdfsi_maxint 2010 bltz a4, .Lfixunsdfsi_zero 2011 2012 /* Add explicit "1.0" and shift << 11. */ 2013 or a7, xh, a6 2014 ssai (32 - 11) 2015 src a5, a7, xl 2016 2017 /* Shift back to the right, based on the exponent. */ 2018 addi a4, a4, 1 2019 beqi a4, 32, .Lfixunsdfsi_bigexp 2020 ssl a4 /* shift by 32 - a4 */ 2021 srl a5, a5 2022 2023 /* Negate the result if sign != 0. */ 2024 neg a2, a5 2025 movgez a2, a5, a7 2026 leaf_return 2027 2028.Lfixunsdfsi_nan_or_inf: 2029 /* Handle Infinity and NaN. */ 2030 slli a4, xh, 12 2031 or a4, a4, xl 2032 beqz a4, .Lfixunsdfsi_maxint 2033 2034 /* Translate NaN to 0xffffffff. */ 2035 movi a2, -1 2036 leaf_return 2037 2038.Lfixunsdfsi_maxint: 2039 slli a4, a6, 11 /* 0x80000000 */ 2040 movi a5, -1 /* 0xffffffff */ 2041 movgez a4, a5, xh 2042 mov a2, a4 2043 leaf_return 2044 2045.Lfixunsdfsi_zero: 2046 movi a2, 0 2047 leaf_return 2048 2049.Lfixunsdfsi_bigexp: 2050 /* Handle unsigned maximum exponent case. */ 2051 bltz xh, 1f 2052 mov a2, a5 /* no shift needed */ 2053 leaf_return 2054 2055 /* Return 0x80000000 if negative. */ 20561: slli a2, a6, 11 2057 leaf_return 2058 2059#endif /* L_fixunsdfsi */ 2060 2061#ifdef L_fixunsdfdi 2062 2063 .align 4 2064 .global __fixunsdfdi 2065 .type __fixunsdfdi, @function 2066__fixunsdfdi: 2067 leaf_entry sp, 16 2068 2069 /* Check for NaN and Infinity. */ 2070 movi a6, 0x7ff00000 2071 ball xh, a6, .Lfixunsdfdi_nan_or_inf 2072 2073 /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */ 2074 extui a4, xh, 20, 11 2075 extui a5, a6, 20, 10 /* 0x3ff */ 2076 sub a4, a4, a5 2077 bgei a4, 64, .Lfixunsdfdi_maxint 2078 bltz a4, .Lfixunsdfdi_zero 2079 2080 /* Add explicit "1.0" and shift << 11. */ 2081 or a7, xh, a6 2082 ssai (32 - 11) 2083 src xh, a7, xl 2084 sll xl, xl 2085 2086 /* Shift back to the right, based on the exponent. */ 2087 addi a4, a4, 1 2088 beqi a4, 64, .Lfixunsdfdi_bigexp 2089 ssl a4 /* shift by 64 - a4 */ 2090 bgei a4, 32, .Lfixunsdfdi_smallshift 2091 srl xl, xh 2092 movi xh, 0 2093 2094.Lfixunsdfdi_shifted: 2095 /* Negate the result if sign != 0. */ 2096 bgez a7, 1f 2097 neg xl, xl 2098 neg xh, xh 2099 beqz xl, 1f 2100 addi xh, xh, -1 21011: leaf_return 2102 2103.Lfixunsdfdi_smallshift: 2104 src xl, xh, xl 2105 srl xh, xh 2106 j .Lfixunsdfdi_shifted 2107 2108.Lfixunsdfdi_nan_or_inf: 2109 /* Handle Infinity and NaN. */ 2110 slli a4, xh, 12 2111 or a4, a4, xl 2112 beqz a4, .Lfixunsdfdi_maxint 2113 2114 /* Translate NaN to 0xffffffff.... */ 21151: movi xh, -1 2116 movi xl, -1 2117 leaf_return 2118 2119.Lfixunsdfdi_maxint: 2120 bgez xh, 1b 21212: slli xh, a6, 11 /* 0x80000000 */ 2122 movi xl, 0 2123 leaf_return 2124 2125.Lfixunsdfdi_zero: 2126 movi xh, 0 2127 movi xl, 0 2128 leaf_return 2129 2130.Lfixunsdfdi_bigexp: 2131 /* Handle unsigned maximum exponent case. */ 2132 bltz a7, 2b 2133 leaf_return /* no shift needed */ 2134 2135#endif /* L_fixunsdfdi */ 2136 2137#ifdef L_floatsidf 2138 2139 .align 4 2140 .global __floatunsidf 2141 .type __floatunsidf, @function 2142__floatunsidf: 2143 leaf_entry sp, 16 2144 beqz a2, .Lfloatsidf_return_zero 2145 2146 /* Set the sign to zero and jump to the floatsidf code. */ 2147 movi a7, 0 2148 j .Lfloatsidf_normalize 2149 2150 .align 4 2151 .global __floatsidf 2152 .type __floatsidf, @function 2153__floatsidf: 2154 leaf_entry sp, 16 2155 2156 /* Check for zero. */ 2157 beqz a2, .Lfloatsidf_return_zero 2158 2159 /* Save the sign. */ 2160 extui a7, a2, 31, 1 2161 2162 /* Get the absolute value. */ 2163#if XCHAL_HAVE_ABS 2164 abs a2, a2 2165#else 2166 neg a4, a2 2167 movltz a2, a4, a2 2168#endif 2169 2170.Lfloatsidf_normalize: 2171 /* Normalize with the first 1 bit in the msb. */ 2172 do_nsau a4, a2, a5, a6 2173 ssl a4 2174 sll a5, a2 2175 2176 /* Shift the mantissa into position. */ 2177 srli xh, a5, 11 2178 slli xl, a5, (32 - 11) 2179 2180 /* Set the exponent. */ 2181 movi a5, 0x41d /* 0x3fe + 31 */ 2182 sub a5, a5, a4 2183 slli a5, a5, 20 2184 add xh, xh, a5 2185 2186 /* Add the sign and return. */ 2187 slli a7, a7, 31 2188 or xh, xh, a7 2189 leaf_return 2190 2191.Lfloatsidf_return_zero: 2192 movi a3, 0 2193 leaf_return 2194 2195#endif /* L_floatsidf */ 2196 2197#ifdef L_floatdidf 2198 2199 .align 4 2200 .global __floatundidf 2201 .type __floatundidf, @function 2202__floatundidf: 2203 leaf_entry sp, 16 2204 2205 /* Check for zero. */ 2206 or a4, xh, xl 2207 beqz a4, 2f 2208 2209 /* Set the sign to zero and jump to the floatdidf code. */ 2210 movi a7, 0 2211 j .Lfloatdidf_normalize 2212 2213 .align 4 2214 .global __floatdidf 2215 .type __floatdidf, @function 2216__floatdidf: 2217 leaf_entry sp, 16 2218 2219 /* Check for zero. */ 2220 or a4, xh, xl 2221 beqz a4, 2f 2222 2223 /* Save the sign. */ 2224 extui a7, xh, 31, 1 2225 2226 /* Get the absolute value. */ 2227 bgez xh, .Lfloatdidf_normalize 2228 neg xl, xl 2229 neg xh, xh 2230 beqz xl, .Lfloatdidf_normalize 2231 addi xh, xh, -1 2232 2233.Lfloatdidf_normalize: 2234 /* Normalize with the first 1 bit in the msb of xh. */ 2235 beqz xh, .Lfloatdidf_bigshift 2236 do_nsau a4, xh, a5, a6 2237 ssl a4 2238 src xh, xh, xl 2239 sll xl, xl 2240 2241.Lfloatdidf_shifted: 2242 /* Shift the mantissa into position, with rounding bits in a6. */ 2243 ssai 11 2244 sll a6, xl 2245 src xl, xh, xl 2246 srl xh, xh 2247 2248 /* Set the exponent. */ 2249 movi a5, 0x43d /* 0x3fe + 63 */ 2250 sub a5, a5, a4 2251 slli a5, a5, 20 2252 add xh, xh, a5 2253 2254 /* Add the sign. */ 2255 slli a7, a7, 31 2256 or xh, xh, a7 2257 2258 /* Round up if the leftover fraction is >= 1/2. */ 2259 bgez a6, 2f 2260 addi xl, xl, 1 2261 beqz xl, .Lfloatdidf_roundcarry 2262 2263 /* Check if the leftover fraction is exactly 1/2. */ 2264 slli a6, a6, 1 2265 beqz a6, .Lfloatdidf_exactlyhalf 22662: leaf_return 2267 2268.Lfloatdidf_bigshift: 2269 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ 2270 do_nsau a4, xl, a5, a6 2271 ssl a4 2272 sll xh, xl 2273 movi xl, 0 2274 addi a4, a4, 32 2275 j .Lfloatdidf_shifted 2276 2277.Lfloatdidf_exactlyhalf: 2278 /* Round down to the nearest even value. */ 2279 srli xl, xl, 1 2280 slli xl, xl, 1 2281 leaf_return 2282 2283.Lfloatdidf_roundcarry: 2284 /* xl is always zero when the rounding increment overflows, so 2285 there's no need to round it to an even value. */ 2286 addi xh, xh, 1 2287 /* Overflow to the exponent is OK. */ 2288 leaf_return 2289 2290#endif /* L_floatdidf */ 2291 2292#ifdef L_truncdfsf2 2293 2294 .align 4 2295 .global __truncdfsf2 2296 .type __truncdfsf2, @function 2297__truncdfsf2: 2298 leaf_entry sp, 16 2299 2300 /* Adjust the exponent bias. */ 2301 movi a4, (0x3ff - 0x7f) << 20 2302 sub a5, xh, a4 2303 2304 /* Check for underflow. */ 2305 xor a6, xh, a5 2306 bltz a6, .Ltrunc_underflow 2307 extui a6, a5, 20, 11 2308 beqz a6, .Ltrunc_underflow 2309 2310 /* Check for overflow. */ 2311 movi a4, 255 2312 bge a6, a4, .Ltrunc_overflow 2313 2314 /* Shift a5/xl << 3 into a5/a4. */ 2315 ssai (32 - 3) 2316 src a5, a5, xl 2317 sll a4, xl 2318 2319.Ltrunc_addsign: 2320 /* Add the sign bit. */ 2321 extui a6, xh, 31, 1 2322 slli a6, a6, 31 2323 or a2, a6, a5 2324 2325 /* Round up if the leftover fraction is >= 1/2. */ 2326 bgez a4, 1f 2327 addi a2, a2, 1 2328 /* Overflow to the exponent is OK. The answer will be correct. */ 2329 2330 /* Check if the leftover fraction is exactly 1/2. */ 2331 slli a4, a4, 1 2332 beqz a4, .Ltrunc_exactlyhalf 23331: leaf_return 2334 2335.Ltrunc_exactlyhalf: 2336 /* Round down to the nearest even value. */ 2337 srli a2, a2, 1 2338 slli a2, a2, 1 2339 leaf_return 2340 2341.Ltrunc_overflow: 2342 /* Check if exponent == 0x7ff. */ 2343 movi a4, 0x7ff00000 2344 bnall xh, a4, 1f 2345 2346 /* Check if mantissa is nonzero. */ 2347 slli a5, xh, 12 2348 or a5, a5, xl 2349 beqz a5, 1f 2350 2351 /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */ 2352 srli a4, a4, 1 2353 23541: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */ 2355 /* Add the sign bit. */ 2356 extui a6, xh, 31, 1 2357 ssai 1 2358 src a2, a6, a4 2359 leaf_return 2360 2361.Ltrunc_underflow: 2362 /* Find shift count for a subnormal. Flush to zero if >= 32. */ 2363 extui a6, xh, 20, 11 2364 movi a5, 0x3ff - 0x7f 2365 sub a6, a5, a6 2366 addi a6, a6, 1 2367 bgeui a6, 32, 1f 2368 2369 /* Replace the exponent with an explicit "1.0". */ 2370 slli a5, a5, 13 /* 0x700000 */ 2371 or a5, a5, xh 2372 slli a5, a5, 11 2373 srli a5, a5, 11 2374 2375 /* Shift the mantissa left by 3 bits (into a5/a4). */ 2376 ssai (32 - 3) 2377 src a5, a5, xl 2378 sll a4, xl 2379 2380 /* Shift right by a6. */ 2381 ssr a6 2382 sll a7, a4 2383 src a4, a5, a4 2384 srl a5, a5 2385 beqz a7, .Ltrunc_addsign 2386 or a4, a4, a6 /* any positive, nonzero value will work */ 2387 j .Ltrunc_addsign 2388 2389 /* Return +/- zero. */ 23901: extui a2, xh, 31, 1 2391 slli a2, a2, 31 2392 leaf_return 2393 2394#endif /* L_truncdfsf2 */ 2395 2396#ifdef L_extendsfdf2 2397 2398 .align 4 2399 .global __extendsfdf2 2400 .type __extendsfdf2, @function 2401__extendsfdf2: 2402 leaf_entry sp, 16 2403 2404 /* Save the sign bit and then shift it off. */ 2405 extui a5, a2, 31, 1 2406 slli a5, a5, 31 2407 slli a4, a2, 1 2408 2409 /* Extract and check the exponent. */ 2410 extui a6, a2, 23, 8 2411 beqz a6, .Lextend_expzero 2412 addi a6, a6, 1 2413 beqi a6, 256, .Lextend_nan_or_inf 2414 2415 /* Shift >> 3 into a4/xl. */ 2416 srli a4, a4, 4 2417 slli xl, a2, (32 - 3) 2418 2419 /* Adjust the exponent bias. */ 2420 movi a6, (0x3ff - 0x7f) << 20 2421 add a4, a4, a6 2422 2423 /* Add the sign bit. */ 2424 or xh, a4, a5 2425 leaf_return 2426 2427.Lextend_nan_or_inf: 2428 movi a4, 0x7ff00000 2429 2430 /* Check for NaN. */ 2431 slli a7, a2, 9 2432 beqz a7, 1f 2433 2434 slli a6, a6, 11 /* 0x80000 */ 2435 or a4, a4, a6 2436 2437 /* Add the sign and return. */ 24381: or xh, a4, a5 2439 movi xl, 0 2440 leaf_return 2441 2442.Lextend_expzero: 2443 beqz a4, 1b 2444 2445 /* Normalize it to have 8 zero bits before the first 1 bit. */ 2446 do_nsau a7, a4, a2, a3 2447 addi a7, a7, -8 2448 ssl a7 2449 sll a4, a4 2450 2451 /* Shift >> 3 into a4/xl. */ 2452 slli xl, a4, (32 - 3) 2453 srli a4, a4, 3 2454 2455 /* Set the exponent. */ 2456 movi a6, 0x3fe - 0x7f 2457 sub a6, a6, a7 2458 slli a6, a6, 20 2459 add a4, a4, a6 2460 2461 /* Add the sign and return. */ 2462 or xh, a4, a5 2463 leaf_return 2464 2465#endif /* L_extendsfdf2 */ 2466 2467 2468#if XCHAL_HAVE_DFP_SQRT 2469#ifdef L_sqrt 2470 2471 .text 2472 .align 4 2473 .global __ieee754_sqrt 2474 .type __ieee754_sqrt, @function 2475__ieee754_sqrt: 2476 leaf_entry sp, 16 2477 2478 wfrd f1, xh, xl 2479 2480 sqrt0.d f2, f1 2481 const.d f4, 0 2482 maddn.d f4, f2, f2 2483 nexp01.d f3, f1 2484 const.d f0, 3 2485 addexp.d f3, f0 2486 maddn.d f0, f4, f3 2487 nexp01.d f4, f1 2488 maddn.d f2, f0, f2 2489 const.d f5, 0 2490 maddn.d f5, f2, f3 2491 const.d f0, 3 2492 maddn.d f0, f5, f2 2493 neg.d f6, f4 2494 maddn.d f2, f0, f2 2495 const.d f0, 0 2496 const.d f5, 0 2497 const.d f7, 0 2498 maddn.d f0, f6, f2 2499 maddn.d f5, f2, f3 2500 const.d f3, 3 2501 maddn.d f7, f3, f2 2502 maddn.d f4, f0, f0 2503 maddn.d f3, f5, f2 2504 neg.d f2, f7 2505 maddn.d f0, f4, f2 2506 maddn.d f7, f3, f7 2507 mksadj.d f2, f1 2508 nexp01.d f1, f1 2509 maddn.d f1, f0, f0 2510 neg.d f3, f7 2511 addexpm.d f0, f2 2512 addexp.d f3, f2 2513 divn.d f0, f1, f3 2514 2515 rfr xl, f0 2516 rfrd xh, f0 2517 2518 leaf_return 2519 2520#endif /* L_sqrt */ 2521#endif /* XCHAL_HAVE_DFP_SQRT */ 2522 2523#if XCHAL_HAVE_DFP_RECIP 2524#ifdef L_recipdf2 2525 /* Reciprocal */ 2526 2527 .align 4 2528 .global __recipdf2 2529 .type __recipdf2, @function 2530__recipdf2: 2531 leaf_entry sp, 16 2532 2533 wfrd f1, xh, xl 2534 2535 recip0.d f0, f1 2536 const.d f2, 2 2537 msub.d f2, f1, f0 2538 mul.d f3, f1, f0 2539 const.d f4, 2 2540 mul.d f5, f0, f2 2541 msub.d f4, f3, f2 2542 const.d f2, 1 2543 mul.d f0, f5, f4 2544 msub.d f2, f1, f0 2545 maddn.d f0, f0, f2 2546 2547 rfr xl, f0 2548 rfrd xh, f0 2549 2550 leaf_return 2551 2552#endif /* L_recipdf2 */ 2553#endif /* XCHAL_HAVE_DFP_RECIP */ 2554 2555#if XCHAL_HAVE_DFP_RSQRT 2556#ifdef L_rsqrtdf2 2557 /* Reciprocal square root */ 2558 2559 .align 4 2560 .global __rsqrtdf2 2561 .type __rsqrtdf2, @function 2562__rsqrtdf2: 2563 leaf_entry sp, 16 2564 2565 wfrd f1, xh, xl 2566 2567 rsqrt0.d f0, f1 2568 mul.d f2, f1, f0 2569 const.d f3, 3 2570 mul.d f4, f3, f0 2571 const.d f5, 1 2572 msub.d f5, f2, f0 2573 maddn.d f0, f4, f5 2574 const.d f2, 1 2575 mul.d f4, f1, f0 2576 mul.d f5, f3, f0 2577 msub.d f2, f4, f0 2578 maddn.d f0, f5, f2 2579 const.d f2, 1 2580 mul.d f1, f1, f0 2581 mul.d f3, f3, f0 2582 msub.d f2, f1, f0 2583 maddn.d f0, f3, f2 2584 2585 rfr xl, f0 2586 rfrd xh, f0 2587 2588 leaf_return 2589 2590#endif /* L_rsqrtdf2 */ 2591#endif /* XCHAL_HAVE_DFP_RSQRT */ 2592