1/* IEEE-754 double-precision functions for Xtensa 2 Copyright (C) 2006 Free Software Foundation, Inc. 3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2, or (at your option) 10 any later version. 11 12 In addition to the permissions in the GNU General Public License, 13 the Free Software Foundation gives you unlimited permission to link 14 the compiled version of this file into combinations with other 15 programs, and to distribute those combinations without any 16 restriction coming from the use of this file. (The General Public 17 License restrictions do apply in other respects; for example, they 18 cover modification of the file, and distribution when not linked 19 into a combine executable.) 20 21 GCC is distributed in the hope that it will be useful, but WITHOUT 22 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 23 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 24 License for more details. 25 26 You should have received a copy of the GNU General Public License 27 along with GCC; see the file COPYING. If not, write to the Free 28 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 29 02110-1301, USA. */ 30 31#ifdef __XTENSA_EB__ 32#define xh a2 33#define xl a3 34#define yh a4 35#define yl a5 36#else 37#define xh a3 38#define xl a2 39#define yh a5 40#define yl a4 41#endif 42 43/* Warning! The branch displacements for some Xtensa branch instructions 44 are quite small, and this code has been carefully laid out to keep 45 branch targets in range. If you change anything, be sure to check that 46 the assembler is not relaxing anything to branch over a jump. */ 47 48#ifdef L_negdf2 49 50 .align 4 51 .global __negdf2 52 .type __negdf2, @function 53__negdf2: 54 leaf_entry sp, 16 55 movi a4, 0x80000000 56 xor xh, xh, a4 57 leaf_return 58 59#endif /* L_negdf2 */ 60 61#ifdef L_addsubdf3 62 63 /* Addition */ 64__adddf3_aux: 65 66 /* Handle NaNs and Infinities. (This code is placed before the 67 start of the function just to keep it in range of the limited 68 branch displacements.) */ 69 70.Ladd_xnan_or_inf: 71 /* If y is neither Infinity nor NaN, return x. */ 72 bnall yh, a6, 1f 73 /* If x is a NaN, return it. Otherwise, return y. */ 74 slli a7, xh, 12 75 or a7, a7, xl 76 beqz a7, .Ladd_ynan_or_inf 771: leaf_return 78 79.Ladd_ynan_or_inf: 80 /* Return y. */ 81 mov xh, yh 82 mov xl, yl 83 leaf_return 84 85.Ladd_opposite_signs: 86 /* Operand signs differ. Do a subtraction. */ 87 slli a7, a6, 11 88 xor yh, yh, a7 89 j .Lsub_same_sign 90 91 .align 4 92 .global __adddf3 93 .type __adddf3, @function 94__adddf3: 95 leaf_entry sp, 16 96 movi a6, 0x7ff00000 97 98 /* Check if the two operands have the same sign. */ 99 xor a7, xh, yh 100 bltz a7, .Ladd_opposite_signs 101 102.Ladd_same_sign: 103 /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ 104 ball xh, a6, .Ladd_xnan_or_inf 105 ball yh, a6, .Ladd_ynan_or_inf 106 107 /* Compare the exponents. The smaller operand will be shifted 108 right by the exponent difference and added to the larger 109 one. */ 110 extui a7, xh, 20, 12 111 extui a8, yh, 20, 12 112 bltu a7, a8, .Ladd_shiftx 113 114.Ladd_shifty: 115 /* Check if the smaller (or equal) exponent is zero. */ 116 bnone yh, a6, .Ladd_yexpzero 117 118 /* Replace yh sign/exponent with 0x001. */ 119 or yh, yh, a6 120 slli yh, yh, 11 121 srli yh, yh, 11 122 123.Ladd_yexpdiff: 124 /* Compute the exponent difference. Optimize for difference < 32. */ 125 sub a10, a7, a8 126 bgeui a10, 32, .Ladd_bigshifty 127 128 /* Shift yh/yl right by the exponent difference. Any bits that are 129 shifted out of yl are saved in a9 for rounding the result. */ 130 ssr a10 131 movi a9, 0 132 src a9, yl, a9 133 src yl, yh, yl 134 srl yh, yh 135 136.Ladd_addy: 137 /* Do the 64-bit addition. */ 138 add xl, xl, yl 139 add xh, xh, yh 140 bgeu xl, yl, 1f 141 addi xh, xh, 1 1421: 143 /* Check if the add overflowed into the exponent. */ 144 extui a10, xh, 20, 12 145 beq a10, a7, .Ladd_round 146 mov a8, a7 147 j .Ladd_carry 148 149.Ladd_yexpzero: 150 /* y is a subnormal value. Replace its sign/exponent with zero, 151 i.e., no implicit "1.0", and increment the apparent exponent 152 because subnormals behave as if they had the minimum (nonzero) 153 exponent. Test for the case when both exponents are zero. */ 154 slli yh, yh, 12 155 srli yh, yh, 12 156 bnone xh, a6, .Ladd_bothexpzero 157 addi a8, a8, 1 158 j .Ladd_yexpdiff 159 160.Ladd_bothexpzero: 161 /* Both exponents are zero. Handle this as a special case. There 162 is no need to shift or round, and the normal code for handling 163 a carry into the exponent field will not work because it 164 assumes there is an implicit "1.0" that needs to be added. */ 165 add xl, xl, yl 166 add xh, xh, yh 167 bgeu xl, yl, 1f 168 addi xh, xh, 1 1691: leaf_return 170 171.Ladd_bigshifty: 172 /* Exponent difference > 64 -- just return the bigger value. */ 173 bgeui a10, 64, 1b 174 175 /* Shift yh/yl right by the exponent difference. Any bits that are 176 shifted out are saved in a9 for rounding the result. */ 177 ssr a10 178 sll a11, yl /* lost bits shifted out of yl */ 179 src a9, yh, yl 180 srl yl, yh 181 movi yh, 0 182 beqz a11, .Ladd_addy 183 or a9, a9, a10 /* any positive, nonzero value will work */ 184 j .Ladd_addy 185 186.Ladd_xexpzero: 187 /* Same as "yexpzero" except skip handling the case when both 188 exponents are zero. */ 189 slli xh, xh, 12 190 srli xh, xh, 12 191 addi a7, a7, 1 192 j .Ladd_xexpdiff 193 194.Ladd_shiftx: 195 /* Same thing as the "shifty" code, but with x and y swapped. Also, 196 because the exponent difference is always nonzero in this version, 197 the shift sequence can use SLL and skip loading a constant zero. */ 198 bnone xh, a6, .Ladd_xexpzero 199 200 or xh, xh, a6 201 slli xh, xh, 11 202 srli xh, xh, 11 203 204.Ladd_xexpdiff: 205 sub a10, a8, a7 206 bgeui a10, 32, .Ladd_bigshiftx 207 208 ssr a10 209 sll a9, xl 210 src xl, xh, xl 211 srl xh, xh 212 213.Ladd_addx: 214 add xl, xl, yl 215 add xh, xh, yh 216 bgeu xl, yl, 1f 217 addi xh, xh, 1 2181: 219 /* Check if the add overflowed into the exponent. */ 220 extui a10, xh, 20, 12 221 bne a10, a8, .Ladd_carry 222 223.Ladd_round: 224 /* Round up if the leftover fraction is >= 1/2. */ 225 bgez a9, 1f 226 addi xl, xl, 1 227 beqz xl, .Ladd_roundcarry 228 229 /* Check if the leftover fraction is exactly 1/2. */ 230 slli a9, a9, 1 231 beqz a9, .Ladd_exactlyhalf 2321: leaf_return 233 234.Ladd_bigshiftx: 235 /* Mostly the same thing as "bigshifty".... */ 236 bgeui a10, 64, .Ladd_returny 237 238 ssr a10 239 sll a11, xl 240 src a9, xh, xl 241 srl xl, xh 242 movi xh, 0 243 beqz a11, .Ladd_addx 244 or a9, a9, a10 245 j .Ladd_addx 246 247.Ladd_returny: 248 mov xh, yh 249 mov xl, yl 250 leaf_return 251 252.Ladd_carry: 253 /* The addition has overflowed into the exponent field, so the 254 value needs to be renormalized. The mantissa of the result 255 can be recovered by subtracting the original exponent and 256 adding 0x100000 (which is the explicit "1.0" for the 257 mantissa of the non-shifted operand -- the "1.0" for the 258 shifted operand was already added). The mantissa can then 259 be shifted right by one bit. The explicit "1.0" of the 260 shifted mantissa then needs to be replaced by the exponent, 261 incremented by one to account for the normalizing shift. 262 It is faster to combine these operations: do the shift first 263 and combine the additions and subtractions. If x is the 264 original exponent, the result is: 265 shifted mantissa - (x << 19) + (1 << 19) + (x << 20) 266 or: 267 shifted mantissa + ((x + 1) << 19) 268 Note that the exponent is incremented here by leaving the 269 explicit "1.0" of the mantissa in the exponent field. */ 270 271 /* Shift xh/xl right by one bit. Save the lsb of xl. */ 272 mov a10, xl 273 ssai 1 274 src xl, xh, xl 275 srl xh, xh 276 277 /* See explanation above. The original exponent is in a8. */ 278 addi a8, a8, 1 279 slli a8, a8, 19 280 add xh, xh, a8 281 282 /* Return an Infinity if the exponent overflowed. */ 283 ball xh, a6, .Ladd_infinity 284 285 /* Same thing as the "round" code except the msb of the leftover 286 fraction is bit 0 of a10, with the rest of the fraction in a9. */ 287 bbci.l a10, 0, 1f 288 addi xl, xl, 1 289 beqz xl, .Ladd_roundcarry 290 beqz a9, .Ladd_exactlyhalf 2911: leaf_return 292 293.Ladd_infinity: 294 /* Clear the mantissa. */ 295 movi xl, 0 296 srli xh, xh, 20 297 slli xh, xh, 20 298 299 /* The sign bit may have been lost in a carry-out. Put it back. */ 300 slli a8, a8, 1 301 or xh, xh, a8 302 leaf_return 303 304.Ladd_exactlyhalf: 305 /* Round down to the nearest even value. */ 306 srli xl, xl, 1 307 slli xl, xl, 1 308 leaf_return 309 310.Ladd_roundcarry: 311 /* xl is always zero when the rounding increment overflows, so 312 there's no need to round it to an even value. */ 313 addi xh, xh, 1 314 /* Overflow to the exponent is OK. */ 315 leaf_return 316 317 318 /* Subtraction */ 319__subdf3_aux: 320 321 /* Handle NaNs and Infinities. (This code is placed before the 322 start of the function just to keep it in range of the limited 323 branch displacements.) */ 324 325.Lsub_xnan_or_inf: 326 /* If y is neither Infinity nor NaN, return x. */ 327 bnall yh, a6, 1f 328 /* Both x and y are either NaN or Inf, so the result is NaN. */ 329 movi a4, 0x80000 /* make it a quiet NaN */ 330 or xh, xh, a4 3311: leaf_return 332 333.Lsub_ynan_or_inf: 334 /* Negate y and return it. */ 335 slli a7, a6, 11 336 xor xh, yh, a7 337 mov xl, yl 338 leaf_return 339 340.Lsub_opposite_signs: 341 /* Operand signs differ. Do an addition. */ 342 slli a7, a6, 11 343 xor yh, yh, a7 344 j .Ladd_same_sign 345 346 .align 4 347 .global __subdf3 348 .type __subdf3, @function 349__subdf3: 350 leaf_entry sp, 16 351 movi a6, 0x7ff00000 352 353 /* Check if the two operands have the same sign. */ 354 xor a7, xh, yh 355 bltz a7, .Lsub_opposite_signs 356 357.Lsub_same_sign: 358 /* Check if either exponent == 0x7ff (i.e., NaN or Infinity). */ 359 ball xh, a6, .Lsub_xnan_or_inf 360 ball yh, a6, .Lsub_ynan_or_inf 361 362 /* Compare the operands. In contrast to addition, the entire 363 value matters here. */ 364 extui a7, xh, 20, 11 365 extui a8, yh, 20, 11 366 bltu xh, yh, .Lsub_xsmaller 367 beq xh, yh, .Lsub_compare_low 368 369.Lsub_ysmaller: 370 /* Check if the smaller (or equal) exponent is zero. */ 371 bnone yh, a6, .Lsub_yexpzero 372 373 /* Replace yh sign/exponent with 0x001. */ 374 or yh, yh, a6 375 slli yh, yh, 11 376 srli yh, yh, 11 377 378.Lsub_yexpdiff: 379 /* Compute the exponent difference. Optimize for difference < 32. */ 380 sub a10, a7, a8 381 bgeui a10, 32, .Lsub_bigshifty 382 383 /* Shift yh/yl right by the exponent difference. Any bits that are 384 shifted out of yl are saved in a9 for rounding the result. */ 385 ssr a10 386 movi a9, 0 387 src a9, yl, a9 388 src yl, yh, yl 389 srl yh, yh 390 391.Lsub_suby: 392 /* Do the 64-bit subtraction. */ 393 sub xh, xh, yh 394 bgeu xl, yl, 1f 395 addi xh, xh, -1 3961: sub xl, xl, yl 397 398 /* Subtract the leftover bits in a9 from zero and propagate any 399 borrow from xh/xl. */ 400 neg a9, a9 401 beqz a9, 1f 402 addi a5, xh, -1 403 moveqz xh, a5, xl 404 addi xl, xl, -1 4051: 406 /* Check if the subtract underflowed into the exponent. */ 407 extui a10, xh, 20, 11 408 beq a10, a7, .Lsub_round 409 j .Lsub_borrow 410 411.Lsub_compare_low: 412 /* The high words are equal. Compare the low words. */ 413 bltu xl, yl, .Lsub_xsmaller 414 bltu yl, xl, .Lsub_ysmaller 415 /* The operands are equal. Return 0.0. */ 416 movi xh, 0 417 movi xl, 0 4181: leaf_return 419 420.Lsub_yexpzero: 421 /* y is a subnormal value. Replace its sign/exponent with zero, 422 i.e., no implicit "1.0". Unless x is also a subnormal, increment 423 y's apparent exponent because subnormals behave as if they had 424 the minimum (nonzero) exponent. */ 425 slli yh, yh, 12 426 srli yh, yh, 12 427 bnone xh, a6, .Lsub_yexpdiff 428 addi a8, a8, 1 429 j .Lsub_yexpdiff 430 431.Lsub_bigshifty: 432 /* Exponent difference > 64 -- just return the bigger value. */ 433 bgeui a10, 64, 1b 434 435 /* Shift yh/yl right by the exponent difference. Any bits that are 436 shifted out are saved in a9 for rounding the result. */ 437 ssr a10 438 sll a11, yl /* lost bits shifted out of yl */ 439 src a9, yh, yl 440 srl yl, yh 441 movi yh, 0 442 beqz a11, .Lsub_suby 443 or a9, a9, a10 /* any positive, nonzero value will work */ 444 j .Lsub_suby 445 446.Lsub_xsmaller: 447 /* Same thing as the "ysmaller" code, but with x and y swapped and 448 with y negated. */ 449 bnone xh, a6, .Lsub_xexpzero 450 451 or xh, xh, a6 452 slli xh, xh, 11 453 srli xh, xh, 11 454 455.Lsub_xexpdiff: 456 sub a10, a8, a7 457 bgeui a10, 32, .Lsub_bigshiftx 458 459 ssr a10 460 movi a9, 0 461 src a9, xl, a9 462 src xl, xh, xl 463 srl xh, xh 464 465 /* Negate y. */ 466 slli a11, a6, 11 467 xor yh, yh, a11 468 469.Lsub_subx: 470 sub xl, yl, xl 471 sub xh, yh, xh 472 bgeu yl, xl, 1f 473 addi xh, xh, -1 4741: 475 /* Subtract the leftover bits in a9 from zero and propagate any 476 borrow from xh/xl. */ 477 neg a9, a9 478 beqz a9, 1f 479 addi a5, xh, -1 480 moveqz xh, a5, xl 481 addi xl, xl, -1 4821: 483 /* Check if the subtract underflowed into the exponent. */ 484 extui a10, xh, 20, 11 485 bne a10, a8, .Lsub_borrow 486 487.Lsub_round: 488 /* Round up if the leftover fraction is >= 1/2. */ 489 bgez a9, 1f 490 addi xl, xl, 1 491 beqz xl, .Lsub_roundcarry 492 493 /* Check if the leftover fraction is exactly 1/2. */ 494 slli a9, a9, 1 495 beqz a9, .Lsub_exactlyhalf 4961: leaf_return 497 498.Lsub_xexpzero: 499 /* Same as "yexpzero". */ 500 slli xh, xh, 12 501 srli xh, xh, 12 502 bnone yh, a6, .Lsub_xexpdiff 503 addi a7, a7, 1 504 j .Lsub_xexpdiff 505 506.Lsub_bigshiftx: 507 /* Mostly the same thing as "bigshifty", but with the sign bit of the 508 shifted value set so that the subsequent subtraction flips the 509 sign of y. */ 510 bgeui a10, 64, .Lsub_returny 511 512 ssr a10 513 sll a11, xl 514 src a9, xh, xl 515 srl xl, xh 516 slli xh, a6, 11 /* set sign bit of xh */ 517 beqz a11, .Lsub_subx 518 or a9, a9, a10 519 j .Lsub_subx 520 521.Lsub_returny: 522 /* Negate and return y. */ 523 slli a7, a6, 11 524 xor xh, yh, a7 525 mov xl, yl 526 leaf_return 527 528.Lsub_borrow: 529 /* The subtraction has underflowed into the exponent field, so the 530 value needs to be renormalized. Shift the mantissa left as 531 needed to remove any leading zeros and adjust the exponent 532 accordingly. If the exponent is not large enough to remove 533 all the leading zeros, the result will be a subnormal value. */ 534 535 slli a8, xh, 12 536 beqz a8, .Lsub_xhzero 537 do_nsau a6, a8, a7, a11 538 srli a8, a8, 12 539 bge a6, a10, .Lsub_subnormal 540 addi a6, a6, 1 541 542.Lsub_shift_lt32: 543 /* Shift the mantissa (a8/xl/a9) left by a6. */ 544 ssl a6 545 src a8, a8, xl 546 src xl, xl, a9 547 sll a9, a9 548 549 /* Combine the shifted mantissa with the sign and exponent, 550 decrementing the exponent by a6. (The exponent has already 551 been decremented by one due to the borrow from the subtraction, 552 but adding the mantissa will increment the exponent by one.) */ 553 srli xh, xh, 20 554 sub xh, xh, a6 555 slli xh, xh, 20 556 add xh, xh, a8 557 j .Lsub_round 558 559.Lsub_exactlyhalf: 560 /* Round down to the nearest even value. */ 561 srli xl, xl, 1 562 slli xl, xl, 1 563 leaf_return 564 565.Lsub_roundcarry: 566 /* xl is always zero when the rounding increment overflows, so 567 there's no need to round it to an even value. */ 568 addi xh, xh, 1 569 /* Overflow to the exponent is OK. */ 570 leaf_return 571 572.Lsub_xhzero: 573 /* When normalizing the result, all the mantissa bits in the high 574 word are zero. Shift by "20 + (leading zero count of xl) + 1". */ 575 do_nsau a6, xl, a7, a11 576 addi a6, a6, 21 577 blt a10, a6, .Lsub_subnormal 578 579.Lsub_normalize_shift: 580 bltui a6, 32, .Lsub_shift_lt32 581 582 ssl a6 583 src a8, xl, a9 584 sll xl, a9 585 movi a9, 0 586 587 srli xh, xh, 20 588 sub xh, xh, a6 589 slli xh, xh, 20 590 add xh, xh, a8 591 j .Lsub_round 592 593.Lsub_subnormal: 594 /* The exponent is too small to shift away all the leading zeros. 595 Set a6 to the current exponent (which has already been 596 decremented by the borrow) so that the exponent of the result 597 will be zero. Do not add 1 to a6 in this case, because: (1) 598 adding the mantissa will not increment the exponent, so there is 599 no need to subtract anything extra from the exponent to 600 compensate, and (2) the effective exponent of a subnormal is 1 601 not 0 so the shift amount must be 1 smaller than normal. */ 602 mov a6, a10 603 j .Lsub_normalize_shift 604 605#endif /* L_addsubdf3 */ 606 607#ifdef L_muldf3 608 609 /* Multiplication */ 610__muldf3_aux: 611 612 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). 613 (This code is placed before the start of the function just to 614 keep it in range of the limited branch displacements.) */ 615 616.Lmul_xexpzero: 617 /* Clear the sign bit of x. */ 618 slli xh, xh, 1 619 srli xh, xh, 1 620 621 /* If x is zero, return zero. */ 622 or a10, xh, xl 623 beqz a10, .Lmul_return_zero 624 625 /* Normalize x. Adjust the exponent in a8. */ 626 beqz xh, .Lmul_xh_zero 627 do_nsau a10, xh, a11, a12 628 addi a10, a10, -11 629 ssl a10 630 src xh, xh, xl 631 sll xl, xl 632 movi a8, 1 633 sub a8, a8, a10 634 j .Lmul_xnormalized 635.Lmul_xh_zero: 636 do_nsau a10, xl, a11, a12 637 addi a10, a10, -11 638 movi a8, -31 639 sub a8, a8, a10 640 ssl a10 641 bltz a10, .Lmul_xl_srl 642 sll xh, xl 643 movi xl, 0 644 j .Lmul_xnormalized 645.Lmul_xl_srl: 646 srl xh, xl 647 sll xl, xl 648 j .Lmul_xnormalized 649 650.Lmul_yexpzero: 651 /* Clear the sign bit of y. */ 652 slli yh, yh, 1 653 srli yh, yh, 1 654 655 /* If y is zero, return zero. */ 656 or a10, yh, yl 657 beqz a10, .Lmul_return_zero 658 659 /* Normalize y. Adjust the exponent in a9. */ 660 beqz yh, .Lmul_yh_zero 661 do_nsau a10, yh, a11, a12 662 addi a10, a10, -11 663 ssl a10 664 src yh, yh, yl 665 sll yl, yl 666 movi a9, 1 667 sub a9, a9, a10 668 j .Lmul_ynormalized 669.Lmul_yh_zero: 670 do_nsau a10, yl, a11, a12 671 addi a10, a10, -11 672 movi a9, -31 673 sub a9, a9, a10 674 ssl a10 675 bltz a10, .Lmul_yl_srl 676 sll yh, yl 677 movi yl, 0 678 j .Lmul_ynormalized 679.Lmul_yl_srl: 680 srl yh, yl 681 sll yl, yl 682 j .Lmul_ynormalized 683 684.Lmul_return_zero: 685 /* Return zero with the appropriate sign bit. */ 686 srli xh, a7, 31 687 slli xh, xh, 31 688 movi xl, 0 689 j .Lmul_done 690 691.Lmul_xnan_or_inf: 692 /* If y is zero, return NaN. */ 693 bnez yl, 1f 694 slli a8, yh, 1 695 bnez a8, 1f 696 movi a4, 0x80000 /* make it a quiet NaN */ 697 or xh, xh, a4 698 j .Lmul_done 6991: 700 /* If y is NaN, return y. */ 701 bnall yh, a6, .Lmul_returnx 702 slli a8, yh, 12 703 or a8, a8, yl 704 beqz a8, .Lmul_returnx 705 706.Lmul_returny: 707 mov xh, yh 708 mov xl, yl 709 710.Lmul_returnx: 711 /* Set the sign bit and return. */ 712 extui a7, a7, 31, 1 713 slli xh, xh, 1 714 ssai 1 715 src xh, a7, xh 716 j .Lmul_done 717 718.Lmul_ynan_or_inf: 719 /* If x is zero, return NaN. */ 720 bnez xl, .Lmul_returny 721 slli a8, xh, 1 722 bnez a8, .Lmul_returny 723 movi a7, 0x80000 /* make it a quiet NaN */ 724 or xh, yh, a7 725 j .Lmul_done 726 727 .align 4 728 .global __muldf3 729 .type __muldf3, @function 730__muldf3: 731 leaf_entry sp, 32 732#if __XTENSA_CALL0_ABI__ 733 addi sp, sp, -32 734 s32i a12, sp, 16 735 s32i a13, sp, 20 736 s32i a14, sp, 24 737 s32i a15, sp, 28 738#endif 739 movi a6, 0x7ff00000 740 741 /* Get the sign of the result. */ 742 xor a7, xh, yh 743 744 /* Check for NaN and infinity. */ 745 ball xh, a6, .Lmul_xnan_or_inf 746 ball yh, a6, .Lmul_ynan_or_inf 747 748 /* Extract the exponents. */ 749 extui a8, xh, 20, 11 750 extui a9, yh, 20, 11 751 752 beqz a8, .Lmul_xexpzero 753.Lmul_xnormalized: 754 beqz a9, .Lmul_yexpzero 755.Lmul_ynormalized: 756 757 /* Add the exponents. */ 758 add a8, a8, a9 759 760 /* Replace sign/exponent fields with explicit "1.0". */ 761 movi a10, 0x1fffff 762 or xh, xh, a6 763 and xh, xh, a10 764 or yh, yh, a6 765 and yh, yh, a10 766 767 /* Multiply 64x64 to 128 bits. The result ends up in xh/xl/a6. 768 The least-significant word of the result is thrown away except 769 that if it is nonzero, the lsb of a6 is set to 1. */ 770#if XCHAL_HAVE_MUL32_HIGH 771 772 /* Compute a6 with any carry-outs in a10. */ 773 movi a10, 0 774 mull a6, xl, yh 775 mull a11, xh, yl 776 add a6, a6, a11 777 bgeu a6, a11, 1f 778 addi a10, a10, 1 7791: 780 muluh a11, xl, yl 781 add a6, a6, a11 782 bgeu a6, a11, 1f 783 addi a10, a10, 1 7841: 785 /* If the low word of the result is nonzero, set the lsb of a6. */ 786 mull a11, xl, yl 787 beqz a11, 1f 788 movi a9, 1 789 or a6, a6, a9 7901: 791 /* Compute xl with any carry-outs in a9. */ 792 movi a9, 0 793 mull a11, xh, yh 794 add a10, a10, a11 795 bgeu a10, a11, 1f 796 addi a9, a9, 1 7971: 798 muluh a11, xh, yl 799 add a10, a10, a11 800 bgeu a10, a11, 1f 801 addi a9, a9, 1 8021: 803 muluh xl, xl, yh 804 add xl, xl, a10 805 bgeu xl, a10, 1f 806 addi a9, a9, 1 8071: 808 /* Compute xh. */ 809 muluh xh, xh, yh 810 add xh, xh, a9 811 812#else 813 814 /* Break the inputs into 16-bit chunks and compute 16 32-bit partial 815 products. These partial products are: 816 817 0 xll * yll 818 819 1 xll * ylh 820 2 xlh * yll 821 822 3 xll * yhl 823 4 xlh * ylh 824 5 xhl * yll 825 826 6 xll * yhh 827 7 xlh * yhl 828 8 xhl * ylh 829 9 xhh * yll 830 831 10 xlh * yhh 832 11 xhl * yhl 833 12 xhh * ylh 834 835 13 xhl * yhh 836 14 xhh * yhl 837 838 15 xhh * yhh 839 840 where the input chunks are (hh, hl, lh, ll). If using the Mul16 841 or Mul32 multiplier options, these input chunks must be stored in 842 separate registers. For Mac16, the UMUL.AA.* opcodes can specify 843 that the inputs come from either half of the registers, so there 844 is no need to shift them out ahead of time. If there is no 845 multiply hardware, the 16-bit chunks can be extracted when setting 846 up the arguments to the separate multiply function. */ 847 848 /* Save a7 since it is needed to hold a temporary value. */ 849 s32i a7, sp, 4 850#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 851 /* Calling a separate multiply function will clobber a0 and requires 852 use of a8 as a temporary, so save those values now. (The function 853 uses a custom ABI so nothing else needs to be saved.) */ 854 s32i a0, sp, 0 855 s32i a8, sp, 8 856#endif 857 858#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 859 860#define xlh a12 861#define ylh a13 862#define xhh a14 863#define yhh a15 864 865 /* Get the high halves of the inputs into registers. */ 866 srli xlh, xl, 16 867 srli ylh, yl, 16 868 srli xhh, xh, 16 869 srli yhh, yh, 16 870 871#define xll xl 872#define yll yl 873#define xhl xh 874#define yhl yh 875 876#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 877 /* Clear the high halves of the inputs. This does not matter 878 for MUL16 because the high bits are ignored. */ 879 extui xl, xl, 0, 16 880 extui xh, xh, 0, 16 881 extui yl, yl, 0, 16 882 extui yh, yh, 0, 16 883#endif 884#endif /* MUL16 || MUL32 */ 885 886 887#if XCHAL_HAVE_MUL16 888 889#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 890 mul16u dst, xreg ## xhalf, yreg ## yhalf 891 892#elif XCHAL_HAVE_MUL32 893 894#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 895 mull dst, xreg ## xhalf, yreg ## yhalf 896 897#elif XCHAL_HAVE_MAC16 898 899/* The preprocessor insists on inserting a space when concatenating after 900 a period in the definition of do_mul below. These macros are a workaround 901 using underscores instead of periods when doing the concatenation. */ 902#define umul_aa_ll umul.aa.ll 903#define umul_aa_lh umul.aa.lh 904#define umul_aa_hl umul.aa.hl 905#define umul_aa_hh umul.aa.hh 906 907#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 908 umul_aa_ ## xhalf ## yhalf xreg, yreg; \ 909 rsr dst, ACCLO 910 911#else /* no multiply hardware */ 912 913#define set_arg_l(dst, src) \ 914 extui dst, src, 0, 16 915#define set_arg_h(dst, src) \ 916 srli dst, src, 16 917 918#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 919 set_arg_ ## xhalf (a13, xreg); \ 920 set_arg_ ## yhalf (a14, yreg); \ 921 call0 .Lmul_mulsi3; \ 922 mov dst, a12 923#endif 924 925 /* Add pp1 and pp2 into a10 with carry-out in a9. */ 926 do_mul(a10, xl, l, yl, h) /* pp 1 */ 927 do_mul(a11, xl, h, yl, l) /* pp 2 */ 928 movi a9, 0 929 add a10, a10, a11 930 bgeu a10, a11, 1f 931 addi a9, a9, 1 9321: 933 /* Initialize a6 with a9/a10 shifted into position. Note that 934 this value can be safely incremented without any carry-outs. */ 935 ssai 16 936 src a6, a9, a10 937 938 /* Compute the low word into a10. */ 939 do_mul(a11, xl, l, yl, l) /* pp 0 */ 940 sll a10, a10 941 add a10, a10, a11 942 bgeu a10, a11, 1f 943 addi a6, a6, 1 9441: 945 /* Compute the contributions of pp0-5 to a6, with carry-outs in a9. 946 This is good enough to determine the low half of a6, so that any 947 nonzero bits from the low word of the result can be collapsed 948 into a6, freeing up a register. */ 949 movi a9, 0 950 do_mul(a11, xl, l, yh, l) /* pp 3 */ 951 add a6, a6, a11 952 bgeu a6, a11, 1f 953 addi a9, a9, 1 9541: 955 do_mul(a11, xl, h, yl, h) /* pp 4 */ 956 add a6, a6, a11 957 bgeu a6, a11, 1f 958 addi a9, a9, 1 9591: 960 do_mul(a11, xh, l, yl, l) /* pp 5 */ 961 add a6, a6, a11 962 bgeu a6, a11, 1f 963 addi a9, a9, 1 9641: 965 /* Collapse any nonzero bits from the low word into a6. */ 966 beqz a10, 1f 967 movi a11, 1 968 or a6, a6, a11 9691: 970 /* Add pp6-9 into a11 with carry-outs in a10. */ 971 do_mul(a7, xl, l, yh, h) /* pp 6 */ 972 do_mul(a11, xh, h, yl, l) /* pp 9 */ 973 movi a10, 0 974 add a11, a11, a7 975 bgeu a11, a7, 1f 976 addi a10, a10, 1 9771: 978 do_mul(a7, xl, h, yh, l) /* pp 7 */ 979 add a11, a11, a7 980 bgeu a11, a7, 1f 981 addi a10, a10, 1 9821: 983 do_mul(a7, xh, l, yl, h) /* pp 8 */ 984 add a11, a11, a7 985 bgeu a11, a7, 1f 986 addi a10, a10, 1 9871: 988 /* Shift a10/a11 into position, and add low half of a11 to a6. */ 989 src a10, a10, a11 990 add a10, a10, a9 991 sll a11, a11 992 add a6, a6, a11 993 bgeu a6, a11, 1f 994 addi a10, a10, 1 9951: 996 /* Add pp10-12 into xl with carry-outs in a9. */ 997 movi a9, 0 998 do_mul(xl, xl, h, yh, h) /* pp 10 */ 999 add xl, xl, a10 1000 bgeu xl, a10, 1f 1001 addi a9, a9, 1 10021: 1003 do_mul(a10, xh, l, yh, l) /* pp 11 */ 1004 add xl, xl, a10 1005 bgeu xl, a10, 1f 1006 addi a9, a9, 1 10071: 1008 do_mul(a10, xh, h, yl, h) /* pp 12 */ 1009 add xl, xl, a10 1010 bgeu xl, a10, 1f 1011 addi a9, a9, 1 10121: 1013 /* Add pp13-14 into a11 with carry-outs in a10. */ 1014 do_mul(a11, xh, l, yh, h) /* pp 13 */ 1015 do_mul(a7, xh, h, yh, l) /* pp 14 */ 1016 movi a10, 0 1017 add a11, a11, a7 1018 bgeu a11, a7, 1f 1019 addi a10, a10, 1 10201: 1021 /* Shift a10/a11 into position, and add low half of a11 to a6. */ 1022 src a10, a10, a11 1023 add a10, a10, a9 1024 sll a11, a11 1025 add xl, xl, a11 1026 bgeu xl, a11, 1f 1027 addi a10, a10, 1 10281: 1029 /* Compute xh. */ 1030 do_mul(xh, xh, h, yh, h) /* pp 15 */ 1031 add xh, xh, a10 1032 1033 /* Restore values saved on the stack during the multiplication. */ 1034 l32i a7, sp, 4 1035#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 1036 l32i a0, sp, 0 1037 l32i a8, sp, 8 1038#endif 1039#endif 1040 1041 /* Shift left by 12 bits, unless there was a carry-out from the 1042 multiply, in which case, shift by 11 bits and increment the 1043 exponent. Note: It is convenient to use the constant 0x3ff 1044 instead of 0x400 when removing the extra exponent bias (so that 1045 it is easy to construct 0x7fe for the overflow check). Reverse 1046 the logic here to decrement the exponent sum by one unless there 1047 was a carry-out. */ 1048 movi a4, 11 1049 srli a5, xh, 21 - 12 1050 bnez a5, 1f 1051 addi a4, a4, 1 1052 addi a8, a8, -1 10531: ssl a4 1054 src xh, xh, xl 1055 src xl, xl, a6 1056 sll a6, a6 1057 1058 /* Subtract the extra bias from the exponent sum (plus one to account 1059 for the explicit "1.0" of the mantissa that will be added to the 1060 exponent in the final result). */ 1061 movi a4, 0x3ff 1062 sub a8, a8, a4 1063 1064 /* Check for over/underflow. The value in a8 is one less than the 1065 final exponent, so values in the range 0..7fd are OK here. */ 1066 slli a4, a4, 1 /* 0x7fe */ 1067 bgeu a8, a4, .Lmul_overflow 1068 1069.Lmul_round: 1070 /* Round. */ 1071 bgez a6, .Lmul_rounded 1072 addi xl, xl, 1 1073 beqz xl, .Lmul_roundcarry 1074 slli a6, a6, 1 1075 beqz a6, .Lmul_exactlyhalf 1076 1077.Lmul_rounded: 1078 /* Add the exponent to the mantissa. */ 1079 slli a8, a8, 20 1080 add xh, xh, a8 1081 1082.Lmul_addsign: 1083 /* Add the sign bit. */ 1084 srli a7, a7, 31 1085 slli a7, a7, 31 1086 or xh, xh, a7 1087 1088.Lmul_done: 1089#if __XTENSA_CALL0_ABI__ 1090 l32i a12, sp, 16 1091 l32i a13, sp, 20 1092 l32i a14, sp, 24 1093 l32i a15, sp, 28 1094 addi sp, sp, 32 1095#endif 1096 leaf_return 1097 1098.Lmul_exactlyhalf: 1099 /* Round down to the nearest even value. */ 1100 srli xl, xl, 1 1101 slli xl, xl, 1 1102 j .Lmul_rounded 1103 1104.Lmul_roundcarry: 1105 /* xl is always zero when the rounding increment overflows, so 1106 there's no need to round it to an even value. */ 1107 addi xh, xh, 1 1108 /* Overflow is OK -- it will be added to the exponent. */ 1109 j .Lmul_rounded 1110 1111.Lmul_overflow: 1112 bltz a8, .Lmul_underflow 1113 /* Return +/- Infinity. */ 1114 addi a8, a4, 1 /* 0x7ff */ 1115 slli xh, a8, 20 1116 movi xl, 0 1117 j .Lmul_addsign 1118 1119.Lmul_underflow: 1120 /* Create a subnormal value, where the exponent field contains zero, 1121 but the effective exponent is 1. The value of a8 is one less than 1122 the actual exponent, so just negate it to get the shift amount. */ 1123 neg a8, a8 1124 mov a9, a6 1125 ssr a8 1126 bgeui a8, 32, .Lmul_bigshift 1127 1128 /* Shift xh/xl right. Any bits that are shifted out of xl are saved 1129 in a6 (combined with the shifted-out bits currently in a6) for 1130 rounding the result. */ 1131 sll a6, xl 1132 src xl, xh, xl 1133 srl xh, xh 1134 j 1f 1135 1136.Lmul_bigshift: 1137 bgeui a8, 64, .Lmul_flush_to_zero 1138 sll a10, xl /* lost bits shifted out of xl */ 1139 src a6, xh, xl 1140 srl xl, xh 1141 movi xh, 0 1142 or a9, a9, a10 1143 1144 /* Set the exponent to zero. */ 11451: movi a8, 0 1146 1147 /* Pack any nonzero bits shifted out into a6. */ 1148 beqz a9, .Lmul_round 1149 movi a9, 1 1150 or a6, a6, a9 1151 j .Lmul_round 1152 1153.Lmul_flush_to_zero: 1154 /* Return zero with the appropriate sign bit. */ 1155 srli xh, a7, 31 1156 slli xh, xh, 31 1157 movi xl, 0 1158 j .Lmul_done 1159 1160#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 1161 1162 /* For Xtensa processors with no multiply hardware, this simplified 1163 version of _mulsi3 is used for multiplying 16-bit chunks of 1164 the floating-point mantissas. It uses a custom ABI: the inputs 1165 are passed in a13 and a14, the result is returned in a12, and 1166 a8 and a15 are clobbered. */ 1167 .align 4 1168.Lmul_mulsi3: 1169 movi a12, 0 1170.Lmul_mult_loop: 1171 add a15, a14, a12 1172 extui a8, a13, 0, 1 1173 movnez a12, a15, a8 1174 1175 do_addx2 a15, a14, a12, a15 1176 extui a8, a13, 1, 1 1177 movnez a12, a15, a8 1178 1179 do_addx4 a15, a14, a12, a15 1180 extui a8, a13, 2, 1 1181 movnez a12, a15, a8 1182 1183 do_addx8 a15, a14, a12, a15 1184 extui a8, a13, 3, 1 1185 movnez a12, a15, a8 1186 1187 srli a13, a13, 4 1188 slli a14, a14, 4 1189 bnez a13, .Lmul_mult_loop 1190 ret 1191#endif /* !MUL16 && !MUL32 && !MAC16 */ 1192#endif /* L_muldf3 */ 1193 1194#ifdef L_divdf3 1195 1196 /* Division */ 1197__divdf3_aux: 1198 1199 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). 1200 (This code is placed before the start of the function just to 1201 keep it in range of the limited branch displacements.) */ 1202 1203.Ldiv_yexpzero: 1204 /* Clear the sign bit of y. */ 1205 slli yh, yh, 1 1206 srli yh, yh, 1 1207 1208 /* Check for division by zero. */ 1209 or a10, yh, yl 1210 beqz a10, .Ldiv_yzero 1211 1212 /* Normalize y. Adjust the exponent in a9. */ 1213 beqz yh, .Ldiv_yh_zero 1214 do_nsau a10, yh, a11, a9 1215 addi a10, a10, -11 1216 ssl a10 1217 src yh, yh, yl 1218 sll yl, yl 1219 movi a9, 1 1220 sub a9, a9, a10 1221 j .Ldiv_ynormalized 1222.Ldiv_yh_zero: 1223 do_nsau a10, yl, a11, a9 1224 addi a10, a10, -11 1225 movi a9, -31 1226 sub a9, a9, a10 1227 ssl a10 1228 bltz a10, .Ldiv_yl_srl 1229 sll yh, yl 1230 movi yl, 0 1231 j .Ldiv_ynormalized 1232.Ldiv_yl_srl: 1233 srl yh, yl 1234 sll yl, yl 1235 j .Ldiv_ynormalized 1236 1237.Ldiv_yzero: 1238 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ 1239 slli xh, xh, 1 1240 srli xh, xh, 1 1241 or xl, xl, xh 1242 srli xh, a7, 31 1243 slli xh, xh, 31 1244 or xh, xh, a6 1245 bnez xl, 1f 1246 movi a4, 0x80000 /* make it a quiet NaN */ 1247 or xh, xh, a4 12481: movi xl, 0 1249 leaf_return 1250 1251.Ldiv_xexpzero: 1252 /* Clear the sign bit of x. */ 1253 slli xh, xh, 1 1254 srli xh, xh, 1 1255 1256 /* If x is zero, return zero. */ 1257 or a10, xh, xl 1258 beqz a10, .Ldiv_return_zero 1259 1260 /* Normalize x. Adjust the exponent in a8. */ 1261 beqz xh, .Ldiv_xh_zero 1262 do_nsau a10, xh, a11, a8 1263 addi a10, a10, -11 1264 ssl a10 1265 src xh, xh, xl 1266 sll xl, xl 1267 movi a8, 1 1268 sub a8, a8, a10 1269 j .Ldiv_xnormalized 1270.Ldiv_xh_zero: 1271 do_nsau a10, xl, a11, a8 1272 addi a10, a10, -11 1273 movi a8, -31 1274 sub a8, a8, a10 1275 ssl a10 1276 bltz a10, .Ldiv_xl_srl 1277 sll xh, xl 1278 movi xl, 0 1279 j .Ldiv_xnormalized 1280.Ldiv_xl_srl: 1281 srl xh, xl 1282 sll xl, xl 1283 j .Ldiv_xnormalized 1284 1285.Ldiv_return_zero: 1286 /* Return zero with the appropriate sign bit. */ 1287 srli xh, a7, 31 1288 slli xh, xh, 31 1289 movi xl, 0 1290 leaf_return 1291 1292.Ldiv_xnan_or_inf: 1293 /* Set the sign bit of the result. */ 1294 srli a7, yh, 31 1295 slli a7, a7, 31 1296 xor xh, xh, a7 1297 /* If y is NaN or Inf, return NaN. */ 1298 bnall yh, a6, 1f 1299 movi a4, 0x80000 /* make it a quiet NaN */ 1300 or xh, xh, a4 13011: leaf_return 1302 1303.Ldiv_ynan_or_inf: 1304 /* If y is Infinity, return zero. */ 1305 slli a8, yh, 12 1306 or a8, a8, yl 1307 beqz a8, .Ldiv_return_zero 1308 /* y is NaN; return it. */ 1309 mov xh, yh 1310 mov xl, yl 1311 leaf_return 1312 1313.Ldiv_highequal1: 1314 bltu xl, yl, 2f 1315 j 3f 1316 1317 .align 4 1318 .global __divdf3 1319 .type __divdf3, @function 1320__divdf3: 1321 leaf_entry sp, 16 1322 movi a6, 0x7ff00000 1323 1324 /* Get the sign of the result. */ 1325 xor a7, xh, yh 1326 1327 /* Check for NaN and infinity. */ 1328 ball xh, a6, .Ldiv_xnan_or_inf 1329 ball yh, a6, .Ldiv_ynan_or_inf 1330 1331 /* Extract the exponents. */ 1332 extui a8, xh, 20, 11 1333 extui a9, yh, 20, 11 1334 1335 beqz a9, .Ldiv_yexpzero 1336.Ldiv_ynormalized: 1337 beqz a8, .Ldiv_xexpzero 1338.Ldiv_xnormalized: 1339 1340 /* Subtract the exponents. */ 1341 sub a8, a8, a9 1342 1343 /* Replace sign/exponent fields with explicit "1.0". */ 1344 movi a10, 0x1fffff 1345 or xh, xh, a6 1346 and xh, xh, a10 1347 or yh, yh, a6 1348 and yh, yh, a10 1349 1350 /* Set SAR for left shift by one. */ 1351 ssai (32 - 1) 1352 1353 /* The first digit of the mantissa division must be a one. 1354 Shift x (and adjust the exponent) as needed to make this true. */ 1355 bltu yh, xh, 3f 1356 beq yh, xh, .Ldiv_highequal1 13572: src xh, xh, xl 1358 sll xl, xl 1359 addi a8, a8, -1 13603: 1361 /* Do the first subtraction and shift. */ 1362 sub xh, xh, yh 1363 bgeu xl, yl, 1f 1364 addi xh, xh, -1 13651: sub xl, xl, yl 1366 src xh, xh, xl 1367 sll xl, xl 1368 1369 /* Put the quotient into a10/a11. */ 1370 movi a10, 0 1371 movi a11, 1 1372 1373 /* Divide one bit at a time for 52 bits. */ 1374 movi a9, 52 1375#if XCHAL_HAVE_LOOPS 1376 loop a9, .Ldiv_loopend 1377#endif 1378.Ldiv_loop: 1379 /* Shift the quotient << 1. */ 1380 src a10, a10, a11 1381 sll a11, a11 1382 1383 /* Is this digit a 0 or 1? */ 1384 bltu xh, yh, 3f 1385 beq xh, yh, .Ldiv_highequal2 1386 1387 /* Output a 1 and subtract. */ 13882: addi a11, a11, 1 1389 sub xh, xh, yh 1390 bgeu xl, yl, 1f 1391 addi xh, xh, -1 13921: sub xl, xl, yl 1393 1394 /* Shift the dividend << 1. */ 13953: src xh, xh, xl 1396 sll xl, xl 1397 1398#if !XCHAL_HAVE_LOOPS 1399 addi a9, a9, -1 1400 bnez a9, .Ldiv_loop 1401#endif 1402.Ldiv_loopend: 1403 1404 /* Add the exponent bias (less one to account for the explicit "1.0" 1405 of the mantissa that will be added to the exponent in the final 1406 result). */ 1407 movi a9, 0x3fe 1408 add a8, a8, a9 1409 1410 /* Check for over/underflow. The value in a8 is one less than the 1411 final exponent, so values in the range 0..7fd are OK here. */ 1412 addmi a9, a9, 0x400 /* 0x7fe */ 1413 bgeu a8, a9, .Ldiv_overflow 1414 1415.Ldiv_round: 1416 /* Round. The remainder (<< 1) is in xh/xl. */ 1417 bltu xh, yh, .Ldiv_rounded 1418 beq xh, yh, .Ldiv_highequal3 1419.Ldiv_roundup: 1420 addi a11, a11, 1 1421 beqz a11, .Ldiv_roundcarry 1422 1423.Ldiv_rounded: 1424 mov xl, a11 1425 /* Add the exponent to the mantissa. */ 1426 slli a8, a8, 20 1427 add xh, a10, a8 1428 1429.Ldiv_addsign: 1430 /* Add the sign bit. */ 1431 srli a7, a7, 31 1432 slli a7, a7, 31 1433 or xh, xh, a7 1434 leaf_return 1435 1436.Ldiv_highequal2: 1437 bgeu xl, yl, 2b 1438 j 3b 1439 1440.Ldiv_highequal3: 1441 bltu xl, yl, .Ldiv_rounded 1442 bne xl, yl, .Ldiv_roundup 1443 1444 /* Remainder is exactly half the divisor. Round even. */ 1445 addi a11, a11, 1 1446 beqz a11, .Ldiv_roundcarry 1447 srli a11, a11, 1 1448 slli a11, a11, 1 1449 j .Ldiv_rounded 1450 1451.Ldiv_overflow: 1452 bltz a8, .Ldiv_underflow 1453 /* Return +/- Infinity. */ 1454 addi a8, a9, 1 /* 0x7ff */ 1455 slli xh, a8, 20 1456 movi xl, 0 1457 j .Ldiv_addsign 1458 1459.Ldiv_underflow: 1460 /* Create a subnormal value, where the exponent field contains zero, 1461 but the effective exponent is 1. The value of a8 is one less than 1462 the actual exponent, so just negate it to get the shift amount. */ 1463 neg a8, a8 1464 ssr a8 1465 bgeui a8, 32, .Ldiv_bigshift 1466 1467 /* Shift a10/a11 right. Any bits that are shifted out of a11 are 1468 saved in a6 for rounding the result. */ 1469 sll a6, a11 1470 src a11, a10, a11 1471 srl a10, a10 1472 j 1f 1473 1474.Ldiv_bigshift: 1475 bgeui a8, 64, .Ldiv_flush_to_zero 1476 sll a9, a11 /* lost bits shifted out of a11 */ 1477 src a6, a10, a11 1478 srl a11, a10 1479 movi a10, 0 1480 or xl, xl, a9 1481 1482 /* Set the exponent to zero. */ 14831: movi a8, 0 1484 1485 /* Pack any nonzero remainder (in xh/xl) into a6. */ 1486 or xh, xh, xl 1487 beqz xh, 1f 1488 movi a9, 1 1489 or a6, a6, a9 1490 1491 /* Round a10/a11 based on the bits shifted out into a6. */ 14921: bgez a6, .Ldiv_rounded 1493 addi a11, a11, 1 1494 beqz a11, .Ldiv_roundcarry 1495 slli a6, a6, 1 1496 bnez a6, .Ldiv_rounded 1497 srli a11, a11, 1 1498 slli a11, a11, 1 1499 j .Ldiv_rounded 1500 1501.Ldiv_roundcarry: 1502 /* a11 is always zero when the rounding increment overflows, so 1503 there's no need to round it to an even value. */ 1504 addi a10, a10, 1 1505 /* Overflow to the exponent field is OK. */ 1506 j .Ldiv_rounded 1507 1508.Ldiv_flush_to_zero: 1509 /* Return zero with the appropriate sign bit. */ 1510 srli xh, a7, 31 1511 slli xh, xh, 31 1512 movi xl, 0 1513 leaf_return 1514 1515#endif /* L_divdf3 */ 1516 1517#ifdef L_cmpdf2 1518 1519 /* Equal and Not Equal */ 1520 1521 .align 4 1522 .global __eqdf2 1523 .global __nedf2 1524 .set __nedf2, __eqdf2 1525 .type __eqdf2, @function 1526__eqdf2: 1527 leaf_entry sp, 16 1528 bne xl, yl, 2f 1529 bne xh, yh, 4f 1530 1531 /* The values are equal but NaN != NaN. Check the exponent. */ 1532 movi a6, 0x7ff00000 1533 ball xh, a6, 3f 1534 1535 /* Equal. */ 1536 movi a2, 0 1537 leaf_return 1538 1539 /* Not equal. */ 15402: movi a2, 1 1541 leaf_return 1542 1543 /* Check if the mantissas are nonzero. */ 15443: slli a7, xh, 12 1545 or a7, a7, xl 1546 j 5f 1547 1548 /* Check if x and y are zero with different signs. */ 15494: or a7, xh, yh 1550 slli a7, a7, 1 1551 or a7, a7, xl /* xl == yl here */ 1552 1553 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa 1554 or x when exponent(x) = 0x7ff and x == y. */ 15555: movi a2, 0 1556 movi a3, 1 1557 movnez a2, a3, a7 1558 leaf_return 1559 1560 1561 /* Greater Than */ 1562 1563 .align 4 1564 .global __gtdf2 1565 .type __gtdf2, @function 1566__gtdf2: 1567 leaf_entry sp, 16 1568 movi a6, 0x7ff00000 1569 ball xh, a6, 2f 15701: bnall yh, a6, .Lle_cmp 1571 1572 /* Check if y is a NaN. */ 1573 slli a7, yh, 12 1574 or a7, a7, yl 1575 beqz a7, .Lle_cmp 1576 movi a2, 0 1577 leaf_return 1578 1579 /* Check if x is a NaN. */ 15802: slli a7, xh, 12 1581 or a7, a7, xl 1582 beqz a7, 1b 1583 movi a2, 0 1584 leaf_return 1585 1586 1587 /* Less Than or Equal */ 1588 1589 .align 4 1590 .global __ledf2 1591 .type __ledf2, @function 1592__ledf2: 1593 leaf_entry sp, 16 1594 movi a6, 0x7ff00000 1595 ball xh, a6, 2f 15961: bnall yh, a6, .Lle_cmp 1597 1598 /* Check if y is a NaN. */ 1599 slli a7, yh, 12 1600 or a7, a7, yl 1601 beqz a7, .Lle_cmp 1602 movi a2, 1 1603 leaf_return 1604 1605 /* Check if x is a NaN. */ 16062: slli a7, xh, 12 1607 or a7, a7, xl 1608 beqz a7, 1b 1609 movi a2, 1 1610 leaf_return 1611 1612.Lle_cmp: 1613 /* Check if x and y have different signs. */ 1614 xor a7, xh, yh 1615 bltz a7, .Lle_diff_signs 1616 1617 /* Check if x is negative. */ 1618 bltz xh, .Lle_xneg 1619 1620 /* Check if x <= y. */ 1621 bltu xh, yh, 4f 1622 bne xh, yh, 5f 1623 bltu yl, xl, 5f 16244: movi a2, 0 1625 leaf_return 1626 1627.Lle_xneg: 1628 /* Check if y <= x. */ 1629 bltu yh, xh, 4b 1630 bne yh, xh, 5f 1631 bgeu xl, yl, 4b 16325: movi a2, 1 1633 leaf_return 1634 1635.Lle_diff_signs: 1636 bltz xh, 4b 1637 1638 /* Check if both x and y are zero. */ 1639 or a7, xh, yh 1640 slli a7, a7, 1 1641 or a7, a7, xl 1642 or a7, a7, yl 1643 movi a2, 1 1644 movi a3, 0 1645 moveqz a2, a3, a7 1646 leaf_return 1647 1648 1649 /* Greater Than or Equal */ 1650 1651 .align 4 1652 .global __gedf2 1653 .type __gedf2, @function 1654__gedf2: 1655 leaf_entry sp, 16 1656 movi a6, 0x7ff00000 1657 ball xh, a6, 2f 16581: bnall yh, a6, .Llt_cmp 1659 1660 /* Check if y is a NaN. */ 1661 slli a7, yh, 12 1662 or a7, a7, yl 1663 beqz a7, .Llt_cmp 1664 movi a2, -1 1665 leaf_return 1666 1667 /* Check if x is a NaN. */ 16682: slli a7, xh, 12 1669 or a7, a7, xl 1670 beqz a7, 1b 1671 movi a2, -1 1672 leaf_return 1673 1674 1675 /* Less Than */ 1676 1677 .align 4 1678 .global __ltdf2 1679 .type __ltdf2, @function 1680__ltdf2: 1681 leaf_entry sp, 16 1682 movi a6, 0x7ff00000 1683 ball xh, a6, 2f 16841: bnall yh, a6, .Llt_cmp 1685 1686 /* Check if y is a NaN. */ 1687 slli a7, yh, 12 1688 or a7, a7, yl 1689 beqz a7, .Llt_cmp 1690 movi a2, 0 1691 leaf_return 1692 1693 /* Check if x is a NaN. */ 16942: slli a7, xh, 12 1695 or a7, a7, xl 1696 beqz a7, 1b 1697 movi a2, 0 1698 leaf_return 1699 1700.Llt_cmp: 1701 /* Check if x and y have different signs. */ 1702 xor a7, xh, yh 1703 bltz a7, .Llt_diff_signs 1704 1705 /* Check if x is negative. */ 1706 bltz xh, .Llt_xneg 1707 1708 /* Check if x < y. */ 1709 bltu xh, yh, 4f 1710 bne xh, yh, 5f 1711 bgeu xl, yl, 5f 17124: movi a2, -1 1713 leaf_return 1714 1715.Llt_xneg: 1716 /* Check if y < x. */ 1717 bltu yh, xh, 4b 1718 bne yh, xh, 5f 1719 bltu yl, xl, 4b 17205: movi a2, 0 1721 leaf_return 1722 1723.Llt_diff_signs: 1724 bgez xh, 5b 1725 1726 /* Check if both x and y are nonzero. */ 1727 or a7, xh, yh 1728 slli a7, a7, 1 1729 or a7, a7, xl 1730 or a7, a7, yl 1731 movi a2, 0 1732 movi a3, -1 1733 movnez a2, a3, a7 1734 leaf_return 1735 1736 1737 /* Unordered */ 1738 1739 .align 4 1740 .global __unorddf2 1741 .type __unorddf2, @function 1742__unorddf2: 1743 leaf_entry sp, 16 1744 movi a6, 0x7ff00000 1745 ball xh, a6, 3f 17461: ball yh, a6, 4f 17472: movi a2, 0 1748 leaf_return 1749 17503: slli a7, xh, 12 1751 or a7, a7, xl 1752 beqz a7, 1b 1753 movi a2, 1 1754 leaf_return 1755 17564: slli a7, yh, 12 1757 or a7, a7, yl 1758 beqz a7, 2b 1759 movi a2, 1 1760 leaf_return 1761 1762#endif /* L_cmpdf2 */ 1763 1764#ifdef L_fixdfsi 1765 1766 .align 4 1767 .global __fixdfsi 1768 .type __fixdfsi, @function 1769__fixdfsi: 1770 leaf_entry sp, 16 1771 1772 /* Check for NaN and Infinity. */ 1773 movi a6, 0x7ff00000 1774 ball xh, a6, .Lfixdfsi_nan_or_inf 1775 1776 /* Extract the exponent and check if 0 < (exp - 0x3fe) < 32. */ 1777 extui a4, xh, 20, 11 1778 extui a5, a6, 19, 10 /* 0x3fe */ 1779 sub a4, a4, a5 1780 bgei a4, 32, .Lfixdfsi_maxint 1781 blti a4, 1, .Lfixdfsi_zero 1782 1783 /* Add explicit "1.0" and shift << 11. */ 1784 or a7, xh, a6 1785 ssai (32 - 11) 1786 src a5, a7, xl 1787 1788 /* Shift back to the right, based on the exponent. */ 1789 ssl a4 /* shift by 32 - a4 */ 1790 srl a5, a5 1791 1792 /* Negate the result if sign != 0. */ 1793 neg a2, a5 1794 movgez a2, a5, a7 1795 leaf_return 1796 1797.Lfixdfsi_nan_or_inf: 1798 /* Handle Infinity and NaN. */ 1799 slli a4, xh, 12 1800 or a4, a4, xl 1801 beqz a4, .Lfixdfsi_maxint 1802 1803 /* Translate NaN to +maxint. */ 1804 movi xh, 0 1805 1806.Lfixdfsi_maxint: 1807 slli a4, a6, 11 /* 0x80000000 */ 1808 addi a5, a4, -1 /* 0x7fffffff */ 1809 movgez a4, a5, xh 1810 mov a2, a4 1811 leaf_return 1812 1813.Lfixdfsi_zero: 1814 movi a2, 0 1815 leaf_return 1816 1817#endif /* L_fixdfsi */ 1818 1819#ifdef L_fixdfdi 1820 1821 .align 4 1822 .global __fixdfdi 1823 .type __fixdfdi, @function 1824__fixdfdi: 1825 leaf_entry sp, 16 1826 1827 /* Check for NaN and Infinity. */ 1828 movi a6, 0x7ff00000 1829 ball xh, a6, .Lfixdfdi_nan_or_inf 1830 1831 /* Extract the exponent and check if 0 < (exp - 0x3fe) < 64. */ 1832 extui a4, xh, 20, 11 1833 extui a5, a6, 19, 10 /* 0x3fe */ 1834 sub a4, a4, a5 1835 bgei a4, 64, .Lfixdfdi_maxint 1836 blti a4, 1, .Lfixdfdi_zero 1837 1838 /* Add explicit "1.0" and shift << 11. */ 1839 or a7, xh, a6 1840 ssai (32 - 11) 1841 src xh, a7, xl 1842 sll xl, xl 1843 1844 /* Shift back to the right, based on the exponent. */ 1845 ssl a4 /* shift by 64 - a4 */ 1846 bgei a4, 32, .Lfixdfdi_smallshift 1847 srl xl, xh 1848 movi xh, 0 1849 1850.Lfixdfdi_shifted: 1851 /* Negate the result if sign != 0. */ 1852 bgez a7, 1f 1853 neg xl, xl 1854 neg xh, xh 1855 beqz xl, 1f 1856 addi xh, xh, -1 18571: leaf_return 1858 1859.Lfixdfdi_smallshift: 1860 src xl, xh, xl 1861 srl xh, xh 1862 j .Lfixdfdi_shifted 1863 1864.Lfixdfdi_nan_or_inf: 1865 /* Handle Infinity and NaN. */ 1866 slli a4, xh, 12 1867 or a4, a4, xl 1868 beqz a4, .Lfixdfdi_maxint 1869 1870 /* Translate NaN to +maxint. */ 1871 movi xh, 0 1872 1873.Lfixdfdi_maxint: 1874 slli a7, a6, 11 /* 0x80000000 */ 1875 bgez xh, 1f 1876 mov xh, a7 1877 movi xl, 0 1878 leaf_return 1879 18801: addi xh, a7, -1 /* 0x7fffffff */ 1881 movi xl, -1 1882 leaf_return 1883 1884.Lfixdfdi_zero: 1885 movi xh, 0 1886 movi xl, 0 1887 leaf_return 1888 1889#endif /* L_fixdfdi */ 1890 1891#ifdef L_fixunsdfsi 1892 1893 .align 4 1894 .global __fixunsdfsi 1895 .type __fixunsdfsi, @function 1896__fixunsdfsi: 1897 leaf_entry sp, 16 1898 1899 /* Check for NaN and Infinity. */ 1900 movi a6, 0x7ff00000 1901 ball xh, a6, .Lfixunsdfsi_nan_or_inf 1902 1903 /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 32. */ 1904 extui a4, xh, 20, 11 1905 extui a5, a6, 20, 10 /* 0x3ff */ 1906 sub a4, a4, a5 1907 bgei a4, 32, .Lfixunsdfsi_maxint 1908 bltz a4, .Lfixunsdfsi_zero 1909 1910 /* Add explicit "1.0" and shift << 11. */ 1911 or a7, xh, a6 1912 ssai (32 - 11) 1913 src a5, a7, xl 1914 1915 /* Shift back to the right, based on the exponent. */ 1916 addi a4, a4, 1 1917 beqi a4, 32, .Lfixunsdfsi_bigexp 1918 ssl a4 /* shift by 32 - a4 */ 1919 srl a5, a5 1920 1921 /* Negate the result if sign != 0. */ 1922 neg a2, a5 1923 movgez a2, a5, a7 1924 leaf_return 1925 1926.Lfixunsdfsi_nan_or_inf: 1927 /* Handle Infinity and NaN. */ 1928 slli a4, xh, 12 1929 or a4, a4, xl 1930 beqz a4, .Lfixunsdfsi_maxint 1931 1932 /* Translate NaN to 0xffffffff. */ 1933 movi a2, -1 1934 leaf_return 1935 1936.Lfixunsdfsi_maxint: 1937 slli a4, a6, 11 /* 0x80000000 */ 1938 movi a5, -1 /* 0xffffffff */ 1939 movgez a4, a5, xh 1940 mov a2, a4 1941 leaf_return 1942 1943.Lfixunsdfsi_zero: 1944 movi a2, 0 1945 leaf_return 1946 1947.Lfixunsdfsi_bigexp: 1948 /* Handle unsigned maximum exponent case. */ 1949 bltz xh, 1f 1950 mov a2, a5 /* no shift needed */ 1951 leaf_return 1952 1953 /* Return 0x80000000 if negative. */ 19541: slli a2, a6, 11 1955 leaf_return 1956 1957#endif /* L_fixunsdfsi */ 1958 1959#ifdef L_fixunsdfdi 1960 1961 .align 4 1962 .global __fixunsdfdi 1963 .type __fixunsdfdi, @function 1964__fixunsdfdi: 1965 leaf_entry sp, 16 1966 1967 /* Check for NaN and Infinity. */ 1968 movi a6, 0x7ff00000 1969 ball xh, a6, .Lfixunsdfdi_nan_or_inf 1970 1971 /* Extract the exponent and check if 0 <= (exp - 0x3ff) < 64. */ 1972 extui a4, xh, 20, 11 1973 extui a5, a6, 20, 10 /* 0x3ff */ 1974 sub a4, a4, a5 1975 bgei a4, 64, .Lfixunsdfdi_maxint 1976 bltz a4, .Lfixunsdfdi_zero 1977 1978 /* Add explicit "1.0" and shift << 11. */ 1979 or a7, xh, a6 1980 ssai (32 - 11) 1981 src xh, a7, xl 1982 sll xl, xl 1983 1984 /* Shift back to the right, based on the exponent. */ 1985 addi a4, a4, 1 1986 beqi a4, 64, .Lfixunsdfdi_bigexp 1987 ssl a4 /* shift by 64 - a4 */ 1988 bgei a4, 32, .Lfixunsdfdi_smallshift 1989 srl xl, xh 1990 movi xh, 0 1991 1992.Lfixunsdfdi_shifted: 1993 /* Negate the result if sign != 0. */ 1994 bgez a7, 1f 1995 neg xl, xl 1996 neg xh, xh 1997 beqz xl, 1f 1998 addi xh, xh, -1 19991: leaf_return 2000 2001.Lfixunsdfdi_smallshift: 2002 src xl, xh, xl 2003 srl xh, xh 2004 j .Lfixunsdfdi_shifted 2005 2006.Lfixunsdfdi_nan_or_inf: 2007 /* Handle Infinity and NaN. */ 2008 slli a4, xh, 12 2009 or a4, a4, xl 2010 beqz a4, .Lfixunsdfdi_maxint 2011 2012 /* Translate NaN to 0xffffffff.... */ 20131: movi xh, -1 2014 movi xl, -1 2015 leaf_return 2016 2017.Lfixunsdfdi_maxint: 2018 bgez xh, 1b 20192: slli xh, a6, 11 /* 0x80000000 */ 2020 movi xl, 0 2021 leaf_return 2022 2023.Lfixunsdfdi_zero: 2024 movi xh, 0 2025 movi xl, 0 2026 leaf_return 2027 2028.Lfixunsdfdi_bigexp: 2029 /* Handle unsigned maximum exponent case. */ 2030 bltz a7, 2b 2031 leaf_return /* no shift needed */ 2032 2033#endif /* L_fixunsdfdi */ 2034 2035#ifdef L_floatsidf 2036 2037 .align 4 2038 .global __floatunsidf 2039 .type __floatunsidf, @function 2040__floatunsidf: 2041 leaf_entry sp, 16 2042 beqz a2, .Lfloatsidf_return_zero 2043 2044 /* Set the sign to zero and jump to the floatsidf code. */ 2045 movi a7, 0 2046 j .Lfloatsidf_normalize 2047 2048 .align 4 2049 .global __floatsidf 2050 .type __floatsidf, @function 2051__floatsidf: 2052 leaf_entry sp, 16 2053 2054 /* Check for zero. */ 2055 beqz a2, .Lfloatsidf_return_zero 2056 2057 /* Save the sign. */ 2058 extui a7, a2, 31, 1 2059 2060 /* Get the absolute value. */ 2061#if XCHAL_HAVE_ABS 2062 abs a2, a2 2063#else 2064 neg a4, a2 2065 movltz a2, a4, a2 2066#endif 2067 2068.Lfloatsidf_normalize: 2069 /* Normalize with the first 1 bit in the msb. */ 2070 do_nsau a4, a2, a5, a6 2071 ssl a4 2072 sll a5, a2 2073 2074 /* Shift the mantissa into position. */ 2075 srli xh, a5, 11 2076 slli xl, a5, (32 - 11) 2077 2078 /* Set the exponent. */ 2079 movi a5, 0x41d /* 0x3fe + 31 */ 2080 sub a5, a5, a4 2081 slli a5, a5, 20 2082 add xh, xh, a5 2083 2084 /* Add the sign and return. */ 2085 slli a7, a7, 31 2086 or xh, xh, a7 2087 leaf_return 2088 2089.Lfloatsidf_return_zero: 2090 movi a3, 0 2091 leaf_return 2092 2093#endif /* L_floatsidf */ 2094 2095#ifdef L_floatdidf 2096 2097 .align 4 2098 .global __floatundidf 2099 .type __floatundidf, @function 2100__floatundidf: 2101 leaf_entry sp, 16 2102 2103 /* Check for zero. */ 2104 or a4, xh, xl 2105 beqz a4, 2f 2106 2107 /* Set the sign to zero and jump to the floatdidf code. */ 2108 movi a7, 0 2109 j .Lfloatdidf_normalize 2110 2111 .align 4 2112 .global __floatdidf 2113 .type __floatdidf, @function 2114__floatdidf: 2115 leaf_entry sp, 16 2116 2117 /* Check for zero. */ 2118 or a4, xh, xl 2119 beqz a4, 2f 2120 2121 /* Save the sign. */ 2122 extui a7, xh, 31, 1 2123 2124 /* Get the absolute value. */ 2125 bgez xh, .Lfloatdidf_normalize 2126 neg xl, xl 2127 neg xh, xh 2128 beqz xl, .Lfloatdidf_normalize 2129 addi xh, xh, -1 2130 2131.Lfloatdidf_normalize: 2132 /* Normalize with the first 1 bit in the msb of xh. */ 2133 beqz xh, .Lfloatdidf_bigshift 2134 do_nsau a4, xh, a5, a6 2135 ssl a4 2136 src xh, xh, xl 2137 sll xl, xl 2138 2139.Lfloatdidf_shifted: 2140 /* Shift the mantissa into position, with rounding bits in a6. */ 2141 ssai 11 2142 sll a6, xl 2143 src xl, xh, xl 2144 srl xh, xh 2145 2146 /* Set the exponent. */ 2147 movi a5, 0x43d /* 0x3fe + 63 */ 2148 sub a5, a5, a4 2149 slli a5, a5, 20 2150 add xh, xh, a5 2151 2152 /* Add the sign. */ 2153 slli a7, a7, 31 2154 or xh, xh, a7 2155 2156 /* Round up if the leftover fraction is >= 1/2. */ 2157 bgez a6, 2f 2158 addi xl, xl, 1 2159 beqz xl, .Lfloatdidf_roundcarry 2160 2161 /* Check if the leftover fraction is exactly 1/2. */ 2162 slli a6, a6, 1 2163 beqz a6, .Lfloatdidf_exactlyhalf 21642: leaf_return 2165 2166.Lfloatdidf_bigshift: 2167 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ 2168 do_nsau a4, xl, a5, a6 2169 ssl a4 2170 sll xh, xl 2171 movi xl, 0 2172 addi a4, a4, 32 2173 j .Lfloatdidf_shifted 2174 2175.Lfloatdidf_exactlyhalf: 2176 /* Round down to the nearest even value. */ 2177 srli xl, xl, 1 2178 slli xl, xl, 1 2179 leaf_return 2180 2181.Lfloatdidf_roundcarry: 2182 /* xl is always zero when the rounding increment overflows, so 2183 there's no need to round it to an even value. */ 2184 addi xh, xh, 1 2185 /* Overflow to the exponent is OK. */ 2186 leaf_return 2187 2188#endif /* L_floatdidf */ 2189 2190#ifdef L_truncdfsf2 2191 2192 .align 4 2193 .global __truncdfsf2 2194 .type __truncdfsf2, @function 2195__truncdfsf2: 2196 leaf_entry sp, 16 2197 2198 /* Adjust the exponent bias. */ 2199 movi a4, (0x3ff - 0x7f) << 20 2200 sub a5, xh, a4 2201 2202 /* Check for underflow. */ 2203 xor a6, xh, a5 2204 bltz a6, .Ltrunc_underflow 2205 extui a6, a5, 20, 11 2206 beqz a6, .Ltrunc_underflow 2207 2208 /* Check for overflow. */ 2209 movi a4, 255 2210 bge a6, a4, .Ltrunc_overflow 2211 2212 /* Shift a5/xl << 3 into a5/a4. */ 2213 ssai (32 - 3) 2214 src a5, a5, xl 2215 sll a4, xl 2216 2217.Ltrunc_addsign: 2218 /* Add the sign bit. */ 2219 extui a6, xh, 31, 1 2220 slli a6, a6, 31 2221 or a2, a6, a5 2222 2223 /* Round up if the leftover fraction is >= 1/2. */ 2224 bgez a4, 1f 2225 addi a2, a2, 1 2226 /* Overflow to the exponent is OK. The answer will be correct. */ 2227 2228 /* Check if the leftover fraction is exactly 1/2. */ 2229 slli a4, a4, 1 2230 beqz a4, .Ltrunc_exactlyhalf 22311: leaf_return 2232 2233.Ltrunc_exactlyhalf: 2234 /* Round down to the nearest even value. */ 2235 srli a2, a2, 1 2236 slli a2, a2, 1 2237 leaf_return 2238 2239.Ltrunc_overflow: 2240 /* Check if exponent == 0x7ff. */ 2241 movi a4, 0x7ff00000 2242 bnall xh, a4, 1f 2243 2244 /* Check if mantissa is nonzero. */ 2245 slli a5, xh, 12 2246 or a5, a5, xl 2247 beqz a5, 1f 2248 2249 /* Shift a4 to set a bit in the mantissa, making a quiet NaN. */ 2250 srli a4, a4, 1 2251 22521: slli a4, a4, 4 /* 0xff000000 or 0xff800000 */ 2253 /* Add the sign bit. */ 2254 extui a6, xh, 31, 1 2255 ssai 1 2256 src a2, a6, a4 2257 leaf_return 2258 2259.Ltrunc_underflow: 2260 /* Find shift count for a subnormal. Flush to zero if >= 32. */ 2261 extui a6, xh, 20, 11 2262 movi a5, 0x3ff - 0x7f 2263 sub a6, a5, a6 2264 addi a6, a6, 1 2265 bgeui a6, 32, 1f 2266 2267 /* Replace the exponent with an explicit "1.0". */ 2268 slli a5, a5, 13 /* 0x700000 */ 2269 or a5, a5, xh 2270 slli a5, a5, 11 2271 srli a5, a5, 11 2272 2273 /* Shift the mantissa left by 3 bits (into a5/a4). */ 2274 ssai (32 - 3) 2275 src a5, a5, xl 2276 sll a4, xl 2277 2278 /* Shift right by a6. */ 2279 ssr a6 2280 sll a7, a4 2281 src a4, a5, a4 2282 srl a5, a5 2283 beqz a7, .Ltrunc_addsign 2284 or a4, a4, a6 /* any positive, nonzero value will work */ 2285 j .Ltrunc_addsign 2286 2287 /* Return +/- zero. */ 22881: extui a2, xh, 31, 1 2289 slli a2, a2, 31 2290 leaf_return 2291 2292#endif /* L_truncdfsf2 */ 2293 2294#ifdef L_extendsfdf2 2295 2296 .align 4 2297 .global __extendsfdf2 2298 .type __extendsfdf2, @function 2299__extendsfdf2: 2300 leaf_entry sp, 16 2301 2302 /* Save the sign bit and then shift it off. */ 2303 extui a5, a2, 31, 1 2304 slli a5, a5, 31 2305 slli a4, a2, 1 2306 2307 /* Extract and check the exponent. */ 2308 extui a6, a2, 23, 8 2309 beqz a6, .Lextend_expzero 2310 addi a6, a6, 1 2311 beqi a6, 256, .Lextend_nan_or_inf 2312 2313 /* Shift >> 3 into a4/xl. */ 2314 srli a4, a4, 4 2315 slli xl, a2, (32 - 3) 2316 2317 /* Adjust the exponent bias. */ 2318 movi a6, (0x3ff - 0x7f) << 20 2319 add a4, a4, a6 2320 2321 /* Add the sign bit. */ 2322 or xh, a4, a5 2323 leaf_return 2324 2325.Lextend_nan_or_inf: 2326 movi a4, 0x7ff00000 2327 2328 /* Check for NaN. */ 2329 slli a7, a2, 9 2330 beqz a7, 1f 2331 2332 slli a6, a6, 11 /* 0x80000 */ 2333 or a4, a4, a6 2334 2335 /* Add the sign and return. */ 23361: or xh, a4, a5 2337 movi xl, 0 2338 leaf_return 2339 2340.Lextend_expzero: 2341 beqz a4, 1b 2342 2343 /* Normalize it to have 8 zero bits before the first 1 bit. */ 2344 do_nsau a7, a4, a2, a3 2345 addi a7, a7, -8 2346 ssl a7 2347 sll a4, a4 2348 2349 /* Shift >> 3 into a4/xl. */ 2350 slli xl, a4, (32 - 3) 2351 srli a4, a4, 3 2352 2353 /* Set the exponent. */ 2354 movi a6, 0x3fe - 0x7f 2355 sub a6, a6, a7 2356 slli a6, a6, 20 2357 add a4, a4, a6 2358 2359 /* Add the sign and return. */ 2360 or xh, a4, a5 2361 leaf_return 2362 2363#endif /* L_extendsfdf2 */ 2364 2365 2366