1/* IEEE-754 single-precision functions for Xtensa 2 Copyright (C) 2006-2013 Free Software Foundation, Inc. 3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3, or (at your option) 10 any later version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT 13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 License for more details. 16 17 Under Section 7 of GPL version 3, you are granted additional 18 permissions described in the GCC Runtime Library Exception, version 19 3.1, as published by the Free Software Foundation. 20 21 You should have received a copy of the GNU General Public License and 22 a copy of the GCC Runtime Library Exception along with this program; 23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 <http://www.gnu.org/licenses/>. */ 25 26#ifdef __XTENSA_EB__ 27#define xh a2 28#define xl a3 29#define yh a4 30#define yl a5 31#else 32#define xh a3 33#define xl a2 34#define yh a5 35#define yl a4 36#endif 37 38/* Warning! The branch displacements for some Xtensa branch instructions 39 are quite small, and this code has been carefully laid out to keep 40 branch targets in range. If you change anything, be sure to check that 41 the assembler is not relaxing anything to branch over a jump. */ 42 43#ifdef L_negsf2 44 45 .align 4 46 .global __negsf2 47 .type __negsf2, @function 48__negsf2: 49 leaf_entry sp, 16 50 movi a4, 0x80000000 51 xor a2, a2, a4 52 leaf_return 53 54#endif /* L_negsf2 */ 55 56#ifdef L_addsubsf3 57 58 /* Addition */ 59__addsf3_aux: 60 61 /* Handle NaNs and Infinities. (This code is placed before the 62 start of the function just to keep it in range of the limited 63 branch displacements.) */ 64 65.Ladd_xnan_or_inf: 66 /* If y is neither Infinity nor NaN, return x. */ 67 bnall a3, a6, 1f 68 /* If x is a NaN, return it. Otherwise, return y. */ 69 slli a7, a2, 9 70 beqz a7, .Ladd_ynan_or_inf 711: leaf_return 72 73.Ladd_ynan_or_inf: 74 /* Return y. */ 75 mov a2, a3 76 leaf_return 77 78.Ladd_opposite_signs: 79 /* Operand signs differ. Do a subtraction. */ 80 slli a7, a6, 8 81 xor a3, a3, a7 82 j .Lsub_same_sign 83 84 .align 4 85 .global __addsf3 86 .type __addsf3, @function 87__addsf3: 88 leaf_entry sp, 16 89 movi a6, 0x7f800000 90 91 /* Check if the two operands have the same sign. */ 92 xor a7, a2, a3 93 bltz a7, .Ladd_opposite_signs 94 95.Ladd_same_sign: 96 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ 97 ball a2, a6, .Ladd_xnan_or_inf 98 ball a3, a6, .Ladd_ynan_or_inf 99 100 /* Compare the exponents. The smaller operand will be shifted 101 right by the exponent difference and added to the larger 102 one. */ 103 extui a7, a2, 23, 9 104 extui a8, a3, 23, 9 105 bltu a7, a8, .Ladd_shiftx 106 107.Ladd_shifty: 108 /* Check if the smaller (or equal) exponent is zero. */ 109 bnone a3, a6, .Ladd_yexpzero 110 111 /* Replace y sign/exponent with 0x008. */ 112 or a3, a3, a6 113 slli a3, a3, 8 114 srli a3, a3, 8 115 116.Ladd_yexpdiff: 117 /* Compute the exponent difference. */ 118 sub a10, a7, a8 119 120 /* Exponent difference > 32 -- just return the bigger value. */ 121 bgeui a10, 32, 1f 122 123 /* Shift y right by the exponent difference. Any bits that are 124 shifted out of y are saved in a9 for rounding the result. */ 125 ssr a10 126 movi a9, 0 127 src a9, a3, a9 128 srl a3, a3 129 130 /* Do the addition. */ 131 add a2, a2, a3 132 133 /* Check if the add overflowed into the exponent. */ 134 extui a10, a2, 23, 9 135 beq a10, a7, .Ladd_round 136 mov a8, a7 137 j .Ladd_carry 138 139.Ladd_yexpzero: 140 /* y is a subnormal value. Replace its sign/exponent with zero, 141 i.e., no implicit "1.0", and increment the apparent exponent 142 because subnormals behave as if they had the minimum (nonzero) 143 exponent. Test for the case when both exponents are zero. */ 144 slli a3, a3, 9 145 srli a3, a3, 9 146 bnone a2, a6, .Ladd_bothexpzero 147 addi a8, a8, 1 148 j .Ladd_yexpdiff 149 150.Ladd_bothexpzero: 151 /* Both exponents are zero. Handle this as a special case. There 152 is no need to shift or round, and the normal code for handling 153 a carry into the exponent field will not work because it 154 assumes there is an implicit "1.0" that needs to be added. */ 155 add a2, a2, a3 1561: leaf_return 157 158.Ladd_xexpzero: 159 /* Same as "yexpzero" except skip handling the case when both 160 exponents are zero. */ 161 slli a2, a2, 9 162 srli a2, a2, 9 163 addi a7, a7, 1 164 j .Ladd_xexpdiff 165 166.Ladd_shiftx: 167 /* Same thing as the "shifty" code, but with x and y swapped. Also, 168 because the exponent difference is always nonzero in this version, 169 the shift sequence can use SLL and skip loading a constant zero. */ 170 bnone a2, a6, .Ladd_xexpzero 171 172 or a2, a2, a6 173 slli a2, a2, 8 174 srli a2, a2, 8 175 176.Ladd_xexpdiff: 177 sub a10, a8, a7 178 bgeui a10, 32, .Ladd_returny 179 180 ssr a10 181 sll a9, a2 182 srl a2, a2 183 184 add a2, a2, a3 185 186 /* Check if the add overflowed into the exponent. */ 187 extui a10, a2, 23, 9 188 bne a10, a8, .Ladd_carry 189 190.Ladd_round: 191 /* Round up if the leftover fraction is >= 1/2. */ 192 bgez a9, 1f 193 addi a2, a2, 1 194 195 /* Check if the leftover fraction is exactly 1/2. */ 196 slli a9, a9, 1 197 beqz a9, .Ladd_exactlyhalf 1981: leaf_return 199 200.Ladd_returny: 201 mov a2, a3 202 leaf_return 203 204.Ladd_carry: 205 /* The addition has overflowed into the exponent field, so the 206 value needs to be renormalized. The mantissa of the result 207 can be recovered by subtracting the original exponent and 208 adding 0x800000 (which is the explicit "1.0" for the 209 mantissa of the non-shifted operand -- the "1.0" for the 210 shifted operand was already added). The mantissa can then 211 be shifted right by one bit. The explicit "1.0" of the 212 shifted mantissa then needs to be replaced by the exponent, 213 incremented by one to account for the normalizing shift. 214 It is faster to combine these operations: do the shift first 215 and combine the additions and subtractions. If x is the 216 original exponent, the result is: 217 shifted mantissa - (x << 22) + (1 << 22) + (x << 23) 218 or: 219 shifted mantissa + ((x + 1) << 22) 220 Note that the exponent is incremented here by leaving the 221 explicit "1.0" of the mantissa in the exponent field. */ 222 223 /* Shift x right by one bit. Save the lsb. */ 224 mov a10, a2 225 srli a2, a2, 1 226 227 /* See explanation above. The original exponent is in a8. */ 228 addi a8, a8, 1 229 slli a8, a8, 22 230 add a2, a2, a8 231 232 /* Return an Infinity if the exponent overflowed. */ 233 ball a2, a6, .Ladd_infinity 234 235 /* Same thing as the "round" code except the msb of the leftover 236 fraction is bit 0 of a10, with the rest of the fraction in a9. */ 237 bbci.l a10, 0, 1f 238 addi a2, a2, 1 239 beqz a9, .Ladd_exactlyhalf 2401: leaf_return 241 242.Ladd_infinity: 243 /* Clear the mantissa. */ 244 srli a2, a2, 23 245 slli a2, a2, 23 246 247 /* The sign bit may have been lost in a carry-out. Put it back. */ 248 slli a8, a8, 1 249 or a2, a2, a8 250 leaf_return 251 252.Ladd_exactlyhalf: 253 /* Round down to the nearest even value. */ 254 srli a2, a2, 1 255 slli a2, a2, 1 256 leaf_return 257 258 259 /* Subtraction */ 260__subsf3_aux: 261 262 /* Handle NaNs and Infinities. (This code is placed before the 263 start of the function just to keep it in range of the limited 264 branch displacements.) */ 265 266.Lsub_xnan_or_inf: 267 /* If y is neither Infinity nor NaN, return x. */ 268 bnall a3, a6, 1f 269 /* Both x and y are either NaN or Inf, so the result is NaN. */ 270 movi a4, 0x400000 /* make it a quiet NaN */ 271 or a2, a2, a4 2721: leaf_return 273 274.Lsub_ynan_or_inf: 275 /* Negate y and return it. */ 276 slli a7, a6, 8 277 xor a2, a3, a7 278 leaf_return 279 280.Lsub_opposite_signs: 281 /* Operand signs differ. Do an addition. */ 282 slli a7, a6, 8 283 xor a3, a3, a7 284 j .Ladd_same_sign 285 286 .align 4 287 .global __subsf3 288 .type __subsf3, @function 289__subsf3: 290 leaf_entry sp, 16 291 movi a6, 0x7f800000 292 293 /* Check if the two operands have the same sign. */ 294 xor a7, a2, a3 295 bltz a7, .Lsub_opposite_signs 296 297.Lsub_same_sign: 298 /* Check if either exponent == 0x7f8 (i.e., NaN or Infinity). */ 299 ball a2, a6, .Lsub_xnan_or_inf 300 ball a3, a6, .Lsub_ynan_or_inf 301 302 /* Compare the operands. In contrast to addition, the entire 303 value matters here. */ 304 extui a7, a2, 23, 8 305 extui a8, a3, 23, 8 306 bltu a2, a3, .Lsub_xsmaller 307 308.Lsub_ysmaller: 309 /* Check if the smaller (or equal) exponent is zero. */ 310 bnone a3, a6, .Lsub_yexpzero 311 312 /* Replace y sign/exponent with 0x008. */ 313 or a3, a3, a6 314 slli a3, a3, 8 315 srli a3, a3, 8 316 317.Lsub_yexpdiff: 318 /* Compute the exponent difference. */ 319 sub a10, a7, a8 320 321 /* Exponent difference > 32 -- just return the bigger value. */ 322 bgeui a10, 32, 1f 323 324 /* Shift y right by the exponent difference. Any bits that are 325 shifted out of y are saved in a9 for rounding the result. */ 326 ssr a10 327 movi a9, 0 328 src a9, a3, a9 329 srl a3, a3 330 331 sub a2, a2, a3 332 333 /* Subtract the leftover bits in a9 from zero and propagate any 334 borrow from a2. */ 335 neg a9, a9 336 addi a10, a2, -1 337 movnez a2, a10, a9 338 339 /* Check if the subtract underflowed into the exponent. */ 340 extui a10, a2, 23, 8 341 beq a10, a7, .Lsub_round 342 j .Lsub_borrow 343 344.Lsub_yexpzero: 345 /* Return zero if the inputs are equal. (For the non-subnormal 346 case, subtracting the "1.0" will cause a borrow from the exponent 347 and this case can be detected when handling the borrow.) */ 348 beq a2, a3, .Lsub_return_zero 349 350 /* y is a subnormal value. Replace its sign/exponent with zero, 351 i.e., no implicit "1.0". Unless x is also a subnormal, increment 352 y's apparent exponent because subnormals behave as if they had 353 the minimum (nonzero) exponent. */ 354 slli a3, a3, 9 355 srli a3, a3, 9 356 bnone a2, a6, .Lsub_yexpdiff 357 addi a8, a8, 1 358 j .Lsub_yexpdiff 359 360.Lsub_returny: 361 /* Negate and return y. */ 362 slli a7, a6, 8 363 xor a2, a3, a7 3641: leaf_return 365 366.Lsub_xsmaller: 367 /* Same thing as the "ysmaller" code, but with x and y swapped and 368 with y negated. */ 369 bnone a2, a6, .Lsub_xexpzero 370 371 or a2, a2, a6 372 slli a2, a2, 8 373 srli a2, a2, 8 374 375.Lsub_xexpdiff: 376 sub a10, a8, a7 377 bgeui a10, 32, .Lsub_returny 378 379 ssr a10 380 movi a9, 0 381 src a9, a2, a9 382 srl a2, a2 383 384 /* Negate y. */ 385 slli a11, a6, 8 386 xor a3, a3, a11 387 388 sub a2, a3, a2 389 390 neg a9, a9 391 addi a10, a2, -1 392 movnez a2, a10, a9 393 394 /* Check if the subtract underflowed into the exponent. */ 395 extui a10, a2, 23, 8 396 bne a10, a8, .Lsub_borrow 397 398.Lsub_round: 399 /* Round up if the leftover fraction is >= 1/2. */ 400 bgez a9, 1f 401 addi a2, a2, 1 402 403 /* Check if the leftover fraction is exactly 1/2. */ 404 slli a9, a9, 1 405 beqz a9, .Lsub_exactlyhalf 4061: leaf_return 407 408.Lsub_xexpzero: 409 /* Same as "yexpzero". */ 410 beq a2, a3, .Lsub_return_zero 411 slli a2, a2, 9 412 srli a2, a2, 9 413 bnone a3, a6, .Lsub_xexpdiff 414 addi a7, a7, 1 415 j .Lsub_xexpdiff 416 417.Lsub_return_zero: 418 movi a2, 0 419 leaf_return 420 421.Lsub_borrow: 422 /* The subtraction has underflowed into the exponent field, so the 423 value needs to be renormalized. Shift the mantissa left as 424 needed to remove any leading zeros and adjust the exponent 425 accordingly. If the exponent is not large enough to remove 426 all the leading zeros, the result will be a subnormal value. */ 427 428 slli a8, a2, 9 429 beqz a8, .Lsub_xzero 430 do_nsau a6, a8, a7, a11 431 srli a8, a8, 9 432 bge a6, a10, .Lsub_subnormal 433 addi a6, a6, 1 434 435.Lsub_normalize_shift: 436 /* Shift the mantissa (a8/a9) left by a6. */ 437 ssl a6 438 src a8, a8, a9 439 sll a9, a9 440 441 /* Combine the shifted mantissa with the sign and exponent, 442 decrementing the exponent by a6. (The exponent has already 443 been decremented by one due to the borrow from the subtraction, 444 but adding the mantissa will increment the exponent by one.) */ 445 srli a2, a2, 23 446 sub a2, a2, a6 447 slli a2, a2, 23 448 add a2, a2, a8 449 j .Lsub_round 450 451.Lsub_exactlyhalf: 452 /* Round down to the nearest even value. */ 453 srli a2, a2, 1 454 slli a2, a2, 1 455 leaf_return 456 457.Lsub_xzero: 458 /* If there was a borrow from the exponent, and the mantissa and 459 guard digits are all zero, then the inputs were equal and the 460 result should be zero. */ 461 beqz a9, .Lsub_return_zero 462 463 /* Only the guard digit is nonzero. Shift by min(24, a10). */ 464 addi a11, a10, -24 465 movi a6, 24 466 movltz a6, a10, a11 467 j .Lsub_normalize_shift 468 469.Lsub_subnormal: 470 /* The exponent is too small to shift away all the leading zeros. 471 Set a6 to the current exponent (which has already been 472 decremented by the borrow) so that the exponent of the result 473 will be zero. Do not add 1 to a6 in this case, because: (1) 474 adding the mantissa will not increment the exponent, so there is 475 no need to subtract anything extra from the exponent to 476 compensate, and (2) the effective exponent of a subnormal is 1 477 not 0 so the shift amount must be 1 smaller than normal. */ 478 mov a6, a10 479 j .Lsub_normalize_shift 480 481#endif /* L_addsubsf3 */ 482 483#ifdef L_mulsf3 484 485 /* Multiplication */ 486#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 487#define XCHAL_NO_MUL 1 488#endif 489 490__mulsf3_aux: 491 492 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). 493 (This code is placed before the start of the function just to 494 keep it in range of the limited branch displacements.) */ 495 496.Lmul_xexpzero: 497 /* Clear the sign bit of x. */ 498 slli a2, a2, 1 499 srli a2, a2, 1 500 501 /* If x is zero, return zero. */ 502 beqz a2, .Lmul_return_zero 503 504 /* Normalize x. Adjust the exponent in a8. */ 505 do_nsau a10, a2, a11, a12 506 addi a10, a10, -8 507 ssl a10 508 sll a2, a2 509 movi a8, 1 510 sub a8, a8, a10 511 j .Lmul_xnormalized 512 513.Lmul_yexpzero: 514 /* Clear the sign bit of y. */ 515 slli a3, a3, 1 516 srli a3, a3, 1 517 518 /* If y is zero, return zero. */ 519 beqz a3, .Lmul_return_zero 520 521 /* Normalize y. Adjust the exponent in a9. */ 522 do_nsau a10, a3, a11, a12 523 addi a10, a10, -8 524 ssl a10 525 sll a3, a3 526 movi a9, 1 527 sub a9, a9, a10 528 j .Lmul_ynormalized 529 530.Lmul_return_zero: 531 /* Return zero with the appropriate sign bit. */ 532 srli a2, a7, 31 533 slli a2, a2, 31 534 j .Lmul_done 535 536.Lmul_xnan_or_inf: 537 /* If y is zero, return NaN. */ 538 slli a8, a3, 1 539 bnez a8, 1f 540 movi a4, 0x400000 /* make it a quiet NaN */ 541 or a2, a2, a4 542 j .Lmul_done 5431: 544 /* If y is NaN, return y. */ 545 bnall a3, a6, .Lmul_returnx 546 slli a8, a3, 9 547 beqz a8, .Lmul_returnx 548 549.Lmul_returny: 550 mov a2, a3 551 552.Lmul_returnx: 553 /* Set the sign bit and return. */ 554 extui a7, a7, 31, 1 555 slli a2, a2, 1 556 ssai 1 557 src a2, a7, a2 558 j .Lmul_done 559 560.Lmul_ynan_or_inf: 561 /* If x is zero, return NaN. */ 562 slli a8, a2, 1 563 bnez a8, .Lmul_returny 564 movi a7, 0x400000 /* make it a quiet NaN */ 565 or a2, a3, a7 566 j .Lmul_done 567 568 .align 4 569 .global __mulsf3 570 .type __mulsf3, @function 571__mulsf3: 572#if __XTENSA_CALL0_ABI__ 573 leaf_entry sp, 32 574 addi sp, sp, -32 575 s32i a12, sp, 16 576 s32i a13, sp, 20 577 s32i a14, sp, 24 578 s32i a15, sp, 28 579#elif XCHAL_NO_MUL 580 /* This is not really a leaf function; allocate enough stack space 581 to allow CALL12s to a helper function. */ 582 leaf_entry sp, 64 583#else 584 leaf_entry sp, 32 585#endif 586 movi a6, 0x7f800000 587 588 /* Get the sign of the result. */ 589 xor a7, a2, a3 590 591 /* Check for NaN and infinity. */ 592 ball a2, a6, .Lmul_xnan_or_inf 593 ball a3, a6, .Lmul_ynan_or_inf 594 595 /* Extract the exponents. */ 596 extui a8, a2, 23, 8 597 extui a9, a3, 23, 8 598 599 beqz a8, .Lmul_xexpzero 600.Lmul_xnormalized: 601 beqz a9, .Lmul_yexpzero 602.Lmul_ynormalized: 603 604 /* Add the exponents. */ 605 add a8, a8, a9 606 607 /* Replace sign/exponent fields with explicit "1.0". */ 608 movi a10, 0xffffff 609 or a2, a2, a6 610 and a2, a2, a10 611 or a3, a3, a6 612 and a3, a3, a10 613 614 /* Multiply 32x32 to 64 bits. The result ends up in a2/a6. */ 615 616#if XCHAL_HAVE_MUL32_HIGH 617 618 mull a6, a2, a3 619 muluh a2, a2, a3 620 621#else 622 623 /* Break the inputs into 16-bit chunks and compute 4 32-bit partial 624 products. These partial products are: 625 626 0 xl * yl 627 628 1 xl * yh 629 2 xh * yl 630 631 3 xh * yh 632 633 If using the Mul16 or Mul32 multiplier options, these input 634 chunks must be stored in separate registers. For Mac16, the 635 UMUL.AA.* opcodes can specify that the inputs come from either 636 half of the registers, so there is no need to shift them out 637 ahead of time. If there is no multiply hardware, the 16-bit 638 chunks can be extracted when setting up the arguments to the 639 separate multiply function. */ 640 641#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL 642 /* Calling a separate multiply function will clobber a0 and requires 643 use of a8 as a temporary, so save those values now. (The function 644 uses a custom ABI so nothing else needs to be saved.) */ 645 s32i a0, sp, 0 646 s32i a8, sp, 4 647#endif 648 649#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 650 651#define a2h a4 652#define a3h a5 653 654 /* Get the high halves of the inputs into registers. */ 655 srli a2h, a2, 16 656 srli a3h, a3, 16 657 658#define a2l a2 659#define a3l a3 660 661#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 662 /* Clear the high halves of the inputs. This does not matter 663 for MUL16 because the high bits are ignored. */ 664 extui a2, a2, 0, 16 665 extui a3, a3, 0, 16 666#endif 667#endif /* MUL16 || MUL32 */ 668 669 670#if XCHAL_HAVE_MUL16 671 672#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 673 mul16u dst, xreg ## xhalf, yreg ## yhalf 674 675#elif XCHAL_HAVE_MUL32 676 677#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 678 mull dst, xreg ## xhalf, yreg ## yhalf 679 680#elif XCHAL_HAVE_MAC16 681 682/* The preprocessor insists on inserting a space when concatenating after 683 a period in the definition of do_mul below. These macros are a workaround 684 using underscores instead of periods when doing the concatenation. */ 685#define umul_aa_ll umul.aa.ll 686#define umul_aa_lh umul.aa.lh 687#define umul_aa_hl umul.aa.hl 688#define umul_aa_hh umul.aa.hh 689 690#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 691 umul_aa_ ## xhalf ## yhalf xreg, yreg; \ 692 rsr dst, ACCLO 693 694#else /* no multiply hardware */ 695 696#define set_arg_l(dst, src) \ 697 extui dst, src, 0, 16 698#define set_arg_h(dst, src) \ 699 srli dst, src, 16 700 701#if __XTENSA_CALL0_ABI__ 702#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 703 set_arg_ ## xhalf (a13, xreg); \ 704 set_arg_ ## yhalf (a14, yreg); \ 705 call0 .Lmul_mulsi3; \ 706 mov dst, a12 707#else 708#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 709 set_arg_ ## xhalf (a14, xreg); \ 710 set_arg_ ## yhalf (a15, yreg); \ 711 call12 .Lmul_mulsi3; \ 712 mov dst, a14 713#endif /* __XTENSA_CALL0_ABI__ */ 714 715#endif /* no multiply hardware */ 716 717 /* Add pp1 and pp2 into a6 with carry-out in a9. */ 718 do_mul(a6, a2, l, a3, h) /* pp 1 */ 719 do_mul(a11, a2, h, a3, l) /* pp 2 */ 720 movi a9, 0 721 add a6, a6, a11 722 bgeu a6, a11, 1f 723 addi a9, a9, 1 7241: 725 /* Shift the high half of a9/a6 into position in a9. Note that 726 this value can be safely incremented without any carry-outs. */ 727 ssai 16 728 src a9, a9, a6 729 730 /* Compute the low word into a6. */ 731 do_mul(a11, a2, l, a3, l) /* pp 0 */ 732 sll a6, a6 733 add a6, a6, a11 734 bgeu a6, a11, 1f 735 addi a9, a9, 1 7361: 737 /* Compute the high word into a2. */ 738 do_mul(a2, a2, h, a3, h) /* pp 3 */ 739 add a2, a2, a9 740 741#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL 742 /* Restore values saved on the stack during the multiplication. */ 743 l32i a0, sp, 0 744 l32i a8, sp, 4 745#endif 746#endif /* ! XCHAL_HAVE_MUL32_HIGH */ 747 748 /* Shift left by 9 bits, unless there was a carry-out from the 749 multiply, in which case, shift by 8 bits and increment the 750 exponent. */ 751 movi a4, 9 752 srli a5, a2, 24 - 9 753 beqz a5, 1f 754 addi a4, a4, -1 755 addi a8, a8, 1 7561: ssl a4 757 src a2, a2, a6 758 sll a6, a6 759 760 /* Subtract the extra bias from the exponent sum (plus one to account 761 for the explicit "1.0" of the mantissa that will be added to the 762 exponent in the final result). */ 763 movi a4, 0x80 764 sub a8, a8, a4 765 766 /* Check for over/underflow. The value in a8 is one less than the 767 final exponent, so values in the range 0..fd are OK here. */ 768 movi a4, 0xfe 769 bgeu a8, a4, .Lmul_overflow 770 771.Lmul_round: 772 /* Round. */ 773 bgez a6, .Lmul_rounded 774 addi a2, a2, 1 775 slli a6, a6, 1 776 beqz a6, .Lmul_exactlyhalf 777 778.Lmul_rounded: 779 /* Add the exponent to the mantissa. */ 780 slli a8, a8, 23 781 add a2, a2, a8 782 783.Lmul_addsign: 784 /* Add the sign bit. */ 785 srli a7, a7, 31 786 slli a7, a7, 31 787 or a2, a2, a7 788 789.Lmul_done: 790#if __XTENSA_CALL0_ABI__ 791 l32i a12, sp, 16 792 l32i a13, sp, 20 793 l32i a14, sp, 24 794 l32i a15, sp, 28 795 addi sp, sp, 32 796#endif 797 leaf_return 798 799.Lmul_exactlyhalf: 800 /* Round down to the nearest even value. */ 801 srli a2, a2, 1 802 slli a2, a2, 1 803 j .Lmul_rounded 804 805.Lmul_overflow: 806 bltz a8, .Lmul_underflow 807 /* Return +/- Infinity. */ 808 movi a8, 0xff 809 slli a2, a8, 23 810 j .Lmul_addsign 811 812.Lmul_underflow: 813 /* Create a subnormal value, where the exponent field contains zero, 814 but the effective exponent is 1. The value of a8 is one less than 815 the actual exponent, so just negate it to get the shift amount. */ 816 neg a8, a8 817 mov a9, a6 818 ssr a8 819 bgeui a8, 32, .Lmul_flush_to_zero 820 821 /* Shift a2 right. Any bits that are shifted out of a2 are saved 822 in a6 (combined with the shifted-out bits currently in a6) for 823 rounding the result. */ 824 sll a6, a2 825 srl a2, a2 826 827 /* Set the exponent to zero. */ 828 movi a8, 0 829 830 /* Pack any nonzero bits shifted out into a6. */ 831 beqz a9, .Lmul_round 832 movi a9, 1 833 or a6, a6, a9 834 j .Lmul_round 835 836.Lmul_flush_to_zero: 837 /* Return zero with the appropriate sign bit. */ 838 srli a2, a7, 31 839 slli a2, a2, 31 840 j .Lmul_done 841 842#if XCHAL_NO_MUL 843 844 /* For Xtensa processors with no multiply hardware, this simplified 845 version of _mulsi3 is used for multiplying 16-bit chunks of 846 the floating-point mantissas. When using CALL0, this function 847 uses a custom ABI: the inputs are passed in a13 and a14, the 848 result is returned in a12, and a8 and a15 are clobbered. */ 849 .align 4 850.Lmul_mulsi3: 851 leaf_entry sp, 16 852 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 853 movi \dst, 0 8541: add \tmp1, \src2, \dst 855 extui \tmp2, \src1, 0, 1 856 movnez \dst, \tmp1, \tmp2 857 858 do_addx2 \tmp1, \src2, \dst, \tmp1 859 extui \tmp2, \src1, 1, 1 860 movnez \dst, \tmp1, \tmp2 861 862 do_addx4 \tmp1, \src2, \dst, \tmp1 863 extui \tmp2, \src1, 2, 1 864 movnez \dst, \tmp1, \tmp2 865 866 do_addx8 \tmp1, \src2, \dst, \tmp1 867 extui \tmp2, \src1, 3, 1 868 movnez \dst, \tmp1, \tmp2 869 870 srli \src1, \src1, 4 871 slli \src2, \src2, 4 872 bnez \src1, 1b 873 .endm 874#if __XTENSA_CALL0_ABI__ 875 mul_mulsi3_body a12, a13, a14, a15, a8 876#else 877 /* The result will be written into a2, so save that argument in a4. */ 878 mov a4, a2 879 mul_mulsi3_body a2, a4, a3, a5, a6 880#endif 881 leaf_return 882#endif /* XCHAL_NO_MUL */ 883#endif /* L_mulsf3 */ 884 885#ifdef L_divsf3 886 887 /* Division */ 888__divsf3_aux: 889 890 /* Handle unusual cases (zeros, subnormals, NaNs and Infinities). 891 (This code is placed before the start of the function just to 892 keep it in range of the limited branch displacements.) */ 893 894.Ldiv_yexpzero: 895 /* Clear the sign bit of y. */ 896 slli a3, a3, 1 897 srli a3, a3, 1 898 899 /* Check for division by zero. */ 900 beqz a3, .Ldiv_yzero 901 902 /* Normalize y. Adjust the exponent in a9. */ 903 do_nsau a10, a3, a4, a5 904 addi a10, a10, -8 905 ssl a10 906 sll a3, a3 907 movi a9, 1 908 sub a9, a9, a10 909 j .Ldiv_ynormalized 910 911.Ldiv_yzero: 912 /* y is zero. Return NaN if x is also zero; otherwise, infinity. */ 913 slli a4, a2, 1 914 srli a4, a4, 1 915 srli a2, a7, 31 916 slli a2, a2, 31 917 or a2, a2, a6 918 bnez a4, 1f 919 movi a4, 0x400000 /* make it a quiet NaN */ 920 or a2, a2, a4 9211: leaf_return 922 923.Ldiv_xexpzero: 924 /* Clear the sign bit of x. */ 925 slli a2, a2, 1 926 srli a2, a2, 1 927 928 /* If x is zero, return zero. */ 929 beqz a2, .Ldiv_return_zero 930 931 /* Normalize x. Adjust the exponent in a8. */ 932 do_nsau a10, a2, a4, a5 933 addi a10, a10, -8 934 ssl a10 935 sll a2, a2 936 movi a8, 1 937 sub a8, a8, a10 938 j .Ldiv_xnormalized 939 940.Ldiv_return_zero: 941 /* Return zero with the appropriate sign bit. */ 942 srli a2, a7, 31 943 slli a2, a2, 31 944 leaf_return 945 946.Ldiv_xnan_or_inf: 947 /* Set the sign bit of the result. */ 948 srli a7, a3, 31 949 slli a7, a7, 31 950 xor a2, a2, a7 951 /* If y is NaN or Inf, return NaN. */ 952 bnall a3, a6, 1f 953 movi a4, 0x400000 /* make it a quiet NaN */ 954 or a2, a2, a4 9551: leaf_return 956 957.Ldiv_ynan_or_inf: 958 /* If y is Infinity, return zero. */ 959 slli a8, a3, 9 960 beqz a8, .Ldiv_return_zero 961 /* y is NaN; return it. */ 962 mov a2, a3 963 leaf_return 964 965 .align 4 966 .global __divsf3 967 .type __divsf3, @function 968__divsf3: 969 leaf_entry sp, 16 970 movi a6, 0x7f800000 971 972 /* Get the sign of the result. */ 973 xor a7, a2, a3 974 975 /* Check for NaN and infinity. */ 976 ball a2, a6, .Ldiv_xnan_or_inf 977 ball a3, a6, .Ldiv_ynan_or_inf 978 979 /* Extract the exponents. */ 980 extui a8, a2, 23, 8 981 extui a9, a3, 23, 8 982 983 beqz a9, .Ldiv_yexpzero 984.Ldiv_ynormalized: 985 beqz a8, .Ldiv_xexpzero 986.Ldiv_xnormalized: 987 988 /* Subtract the exponents. */ 989 sub a8, a8, a9 990 991 /* Replace sign/exponent fields with explicit "1.0". */ 992 movi a10, 0xffffff 993 or a2, a2, a6 994 and a2, a2, a10 995 or a3, a3, a6 996 and a3, a3, a10 997 998 /* The first digit of the mantissa division must be a one. 999 Shift x (and adjust the exponent) as needed to make this true. */ 1000 bltu a3, a2, 1f 1001 slli a2, a2, 1 1002 addi a8, a8, -1 10031: 1004 /* Do the first subtraction and shift. */ 1005 sub a2, a2, a3 1006 slli a2, a2, 1 1007 1008 /* Put the quotient into a10. */ 1009 movi a10, 1 1010 1011 /* Divide one bit at a time for 23 bits. */ 1012 movi a9, 23 1013#if XCHAL_HAVE_LOOPS 1014 loop a9, .Ldiv_loopend 1015#endif 1016.Ldiv_loop: 1017 /* Shift the quotient << 1. */ 1018 slli a10, a10, 1 1019 1020 /* Is this digit a 0 or 1? */ 1021 bltu a2, a3, 1f 1022 1023 /* Output a 1 and subtract. */ 1024 addi a10, a10, 1 1025 sub a2, a2, a3 1026 1027 /* Shift the dividend << 1. */ 10281: slli a2, a2, 1 1029 1030#if !XCHAL_HAVE_LOOPS 1031 addi a9, a9, -1 1032 bnez a9, .Ldiv_loop 1033#endif 1034.Ldiv_loopend: 1035 1036 /* Add the exponent bias (less one to account for the explicit "1.0" 1037 of the mantissa that will be added to the exponent in the final 1038 result). */ 1039 addi a8, a8, 0x7e 1040 1041 /* Check for over/underflow. The value in a8 is one less than the 1042 final exponent, so values in the range 0..fd are OK here. */ 1043 movi a4, 0xfe 1044 bgeu a8, a4, .Ldiv_overflow 1045 1046.Ldiv_round: 1047 /* Round. The remainder (<< 1) is in a2. */ 1048 bltu a2, a3, .Ldiv_rounded 1049 addi a10, a10, 1 1050 beq a2, a3, .Ldiv_exactlyhalf 1051 1052.Ldiv_rounded: 1053 /* Add the exponent to the mantissa. */ 1054 slli a8, a8, 23 1055 add a2, a10, a8 1056 1057.Ldiv_addsign: 1058 /* Add the sign bit. */ 1059 srli a7, a7, 31 1060 slli a7, a7, 31 1061 or a2, a2, a7 1062 leaf_return 1063 1064.Ldiv_overflow: 1065 bltz a8, .Ldiv_underflow 1066 /* Return +/- Infinity. */ 1067 addi a8, a4, 1 /* 0xff */ 1068 slli a2, a8, 23 1069 j .Ldiv_addsign 1070 1071.Ldiv_exactlyhalf: 1072 /* Remainder is exactly half the divisor. Round even. */ 1073 srli a10, a10, 1 1074 slli a10, a10, 1 1075 j .Ldiv_rounded 1076 1077.Ldiv_underflow: 1078 /* Create a subnormal value, where the exponent field contains zero, 1079 but the effective exponent is 1. The value of a8 is one less than 1080 the actual exponent, so just negate it to get the shift amount. */ 1081 neg a8, a8 1082 ssr a8 1083 bgeui a8, 32, .Ldiv_flush_to_zero 1084 1085 /* Shift a10 right. Any bits that are shifted out of a10 are 1086 saved in a6 for rounding the result. */ 1087 sll a6, a10 1088 srl a10, a10 1089 1090 /* Set the exponent to zero. */ 1091 movi a8, 0 1092 1093 /* Pack any nonzero remainder (in a2) into a6. */ 1094 beqz a2, 1f 1095 movi a9, 1 1096 or a6, a6, a9 1097 1098 /* Round a10 based on the bits shifted out into a6. */ 10991: bgez a6, .Ldiv_rounded 1100 addi a10, a10, 1 1101 slli a6, a6, 1 1102 bnez a6, .Ldiv_rounded 1103 srli a10, a10, 1 1104 slli a10, a10, 1 1105 j .Ldiv_rounded 1106 1107.Ldiv_flush_to_zero: 1108 /* Return zero with the appropriate sign bit. */ 1109 srli a2, a7, 31 1110 slli a2, a2, 31 1111 leaf_return 1112 1113#endif /* L_divsf3 */ 1114 1115#ifdef L_cmpsf2 1116 1117 /* Equal and Not Equal */ 1118 1119 .align 4 1120 .global __eqsf2 1121 .global __nesf2 1122 .set __nesf2, __eqsf2 1123 .type __eqsf2, @function 1124__eqsf2: 1125 leaf_entry sp, 16 1126 bne a2, a3, 4f 1127 1128 /* The values are equal but NaN != NaN. Check the exponent. */ 1129 movi a6, 0x7f800000 1130 ball a2, a6, 3f 1131 1132 /* Equal. */ 1133 movi a2, 0 1134 leaf_return 1135 1136 /* Not equal. */ 11372: movi a2, 1 1138 leaf_return 1139 1140 /* Check if the mantissas are nonzero. */ 11413: slli a7, a2, 9 1142 j 5f 1143 1144 /* Check if x and y are zero with different signs. */ 11454: or a7, a2, a3 1146 slli a7, a7, 1 1147 1148 /* Equal if a7 == 0, where a7 is either abs(x | y) or the mantissa 1149 or x when exponent(x) = 0x7f8 and x == y. */ 11505: movi a2, 0 1151 movi a3, 1 1152 movnez a2, a3, a7 1153 leaf_return 1154 1155 1156 /* Greater Than */ 1157 1158 .align 4 1159 .global __gtsf2 1160 .type __gtsf2, @function 1161__gtsf2: 1162 leaf_entry sp, 16 1163 movi a6, 0x7f800000 1164 ball a2, a6, 2f 11651: bnall a3, a6, .Lle_cmp 1166 1167 /* Check if y is a NaN. */ 1168 slli a7, a3, 9 1169 beqz a7, .Lle_cmp 1170 movi a2, 0 1171 leaf_return 1172 1173 /* Check if x is a NaN. */ 11742: slli a7, a2, 9 1175 beqz a7, 1b 1176 movi a2, 0 1177 leaf_return 1178 1179 1180 /* Less Than or Equal */ 1181 1182 .align 4 1183 .global __lesf2 1184 .type __lesf2, @function 1185__lesf2: 1186 leaf_entry sp, 16 1187 movi a6, 0x7f800000 1188 ball a2, a6, 2f 11891: bnall a3, a6, .Lle_cmp 1190 1191 /* Check if y is a NaN. */ 1192 slli a7, a3, 9 1193 beqz a7, .Lle_cmp 1194 movi a2, 1 1195 leaf_return 1196 1197 /* Check if x is a NaN. */ 11982: slli a7, a2, 9 1199 beqz a7, 1b 1200 movi a2, 1 1201 leaf_return 1202 1203.Lle_cmp: 1204 /* Check if x and y have different signs. */ 1205 xor a7, a2, a3 1206 bltz a7, .Lle_diff_signs 1207 1208 /* Check if x is negative. */ 1209 bltz a2, .Lle_xneg 1210 1211 /* Check if x <= y. */ 1212 bltu a3, a2, 5f 12134: movi a2, 0 1214 leaf_return 1215 1216.Lle_xneg: 1217 /* Check if y <= x. */ 1218 bgeu a2, a3, 4b 12195: movi a2, 1 1220 leaf_return 1221 1222.Lle_diff_signs: 1223 bltz a2, 4b 1224 1225 /* Check if both x and y are zero. */ 1226 or a7, a2, a3 1227 slli a7, a7, 1 1228 movi a2, 1 1229 movi a3, 0 1230 moveqz a2, a3, a7 1231 leaf_return 1232 1233 1234 /* Greater Than or Equal */ 1235 1236 .align 4 1237 .global __gesf2 1238 .type __gesf2, @function 1239__gesf2: 1240 leaf_entry sp, 16 1241 movi a6, 0x7f800000 1242 ball a2, a6, 2f 12431: bnall a3, a6, .Llt_cmp 1244 1245 /* Check if y is a NaN. */ 1246 slli a7, a3, 9 1247 beqz a7, .Llt_cmp 1248 movi a2, -1 1249 leaf_return 1250 1251 /* Check if x is a NaN. */ 12522: slli a7, a2, 9 1253 beqz a7, 1b 1254 movi a2, -1 1255 leaf_return 1256 1257 1258 /* Less Than */ 1259 1260 .align 4 1261 .global __ltsf2 1262 .type __ltsf2, @function 1263__ltsf2: 1264 leaf_entry sp, 16 1265 movi a6, 0x7f800000 1266 ball a2, a6, 2f 12671: bnall a3, a6, .Llt_cmp 1268 1269 /* Check if y is a NaN. */ 1270 slli a7, a3, 9 1271 beqz a7, .Llt_cmp 1272 movi a2, 0 1273 leaf_return 1274 1275 /* Check if x is a NaN. */ 12762: slli a7, a2, 9 1277 beqz a7, 1b 1278 movi a2, 0 1279 leaf_return 1280 1281.Llt_cmp: 1282 /* Check if x and y have different signs. */ 1283 xor a7, a2, a3 1284 bltz a7, .Llt_diff_signs 1285 1286 /* Check if x is negative. */ 1287 bltz a2, .Llt_xneg 1288 1289 /* Check if x < y. */ 1290 bgeu a2, a3, 5f 12914: movi a2, -1 1292 leaf_return 1293 1294.Llt_xneg: 1295 /* Check if y < x. */ 1296 bltu a3, a2, 4b 12975: movi a2, 0 1298 leaf_return 1299 1300.Llt_diff_signs: 1301 bgez a2, 5b 1302 1303 /* Check if both x and y are nonzero. */ 1304 or a7, a2, a3 1305 slli a7, a7, 1 1306 movi a2, 0 1307 movi a3, -1 1308 movnez a2, a3, a7 1309 leaf_return 1310 1311 1312 /* Unordered */ 1313 1314 .align 4 1315 .global __unordsf2 1316 .type __unordsf2, @function 1317__unordsf2: 1318 leaf_entry sp, 16 1319 movi a6, 0x7f800000 1320 ball a2, a6, 3f 13211: ball a3, a6, 4f 13222: movi a2, 0 1323 leaf_return 1324 13253: slli a7, a2, 9 1326 beqz a7, 1b 1327 movi a2, 1 1328 leaf_return 1329 13304: slli a7, a3, 9 1331 beqz a7, 2b 1332 movi a2, 1 1333 leaf_return 1334 1335#endif /* L_cmpsf2 */ 1336 1337#ifdef L_fixsfsi 1338 1339 .align 4 1340 .global __fixsfsi 1341 .type __fixsfsi, @function 1342__fixsfsi: 1343 leaf_entry sp, 16 1344 1345 /* Check for NaN and Infinity. */ 1346 movi a6, 0x7f800000 1347 ball a2, a6, .Lfixsfsi_nan_or_inf 1348 1349 /* Extract the exponent and check if 0 < (exp - 0x7e) < 32. */ 1350 extui a4, a2, 23, 8 1351 addi a4, a4, -0x7e 1352 bgei a4, 32, .Lfixsfsi_maxint 1353 blti a4, 1, .Lfixsfsi_zero 1354 1355 /* Add explicit "1.0" and shift << 8. */ 1356 or a7, a2, a6 1357 slli a5, a7, 8 1358 1359 /* Shift back to the right, based on the exponent. */ 1360 ssl a4 /* shift by 32 - a4 */ 1361 srl a5, a5 1362 1363 /* Negate the result if sign != 0. */ 1364 neg a2, a5 1365 movgez a2, a5, a7 1366 leaf_return 1367 1368.Lfixsfsi_nan_or_inf: 1369 /* Handle Infinity and NaN. */ 1370 slli a4, a2, 9 1371 beqz a4, .Lfixsfsi_maxint 1372 1373 /* Translate NaN to +maxint. */ 1374 movi a2, 0 1375 1376.Lfixsfsi_maxint: 1377 slli a4, a6, 8 /* 0x80000000 */ 1378 addi a5, a4, -1 /* 0x7fffffff */ 1379 movgez a4, a5, a2 1380 mov a2, a4 1381 leaf_return 1382 1383.Lfixsfsi_zero: 1384 movi a2, 0 1385 leaf_return 1386 1387#endif /* L_fixsfsi */ 1388 1389#ifdef L_fixsfdi 1390 1391 .align 4 1392 .global __fixsfdi 1393 .type __fixsfdi, @function 1394__fixsfdi: 1395 leaf_entry sp, 16 1396 1397 /* Check for NaN and Infinity. */ 1398 movi a6, 0x7f800000 1399 ball a2, a6, .Lfixsfdi_nan_or_inf 1400 1401 /* Extract the exponent and check if 0 < (exp - 0x7e) < 64. */ 1402 extui a4, a2, 23, 8 1403 addi a4, a4, -0x7e 1404 bgei a4, 64, .Lfixsfdi_maxint 1405 blti a4, 1, .Lfixsfdi_zero 1406 1407 /* Add explicit "1.0" and shift << 8. */ 1408 or a7, a2, a6 1409 slli xh, a7, 8 1410 1411 /* Shift back to the right, based on the exponent. */ 1412 ssl a4 /* shift by 64 - a4 */ 1413 bgei a4, 32, .Lfixsfdi_smallshift 1414 srl xl, xh 1415 movi xh, 0 1416 1417.Lfixsfdi_shifted: 1418 /* Negate the result if sign != 0. */ 1419 bgez a7, 1f 1420 neg xl, xl 1421 neg xh, xh 1422 beqz xl, 1f 1423 addi xh, xh, -1 14241: leaf_return 1425 1426.Lfixsfdi_smallshift: 1427 movi xl, 0 1428 sll xl, xh 1429 srl xh, xh 1430 j .Lfixsfdi_shifted 1431 1432.Lfixsfdi_nan_or_inf: 1433 /* Handle Infinity and NaN. */ 1434 slli a4, a2, 9 1435 beqz a4, .Lfixsfdi_maxint 1436 1437 /* Translate NaN to +maxint. */ 1438 movi a2, 0 1439 1440.Lfixsfdi_maxint: 1441 slli a7, a6, 8 /* 0x80000000 */ 1442 bgez a2, 1f 1443 mov xh, a7 1444 movi xl, 0 1445 leaf_return 1446 14471: addi xh, a7, -1 /* 0x7fffffff */ 1448 movi xl, -1 1449 leaf_return 1450 1451.Lfixsfdi_zero: 1452 movi xh, 0 1453 movi xl, 0 1454 leaf_return 1455 1456#endif /* L_fixsfdi */ 1457 1458#ifdef L_fixunssfsi 1459 1460 .align 4 1461 .global __fixunssfsi 1462 .type __fixunssfsi, @function 1463__fixunssfsi: 1464 leaf_entry sp, 16 1465 1466 /* Check for NaN and Infinity. */ 1467 movi a6, 0x7f800000 1468 ball a2, a6, .Lfixunssfsi_nan_or_inf 1469 1470 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 32. */ 1471 extui a4, a2, 23, 8 1472 addi a4, a4, -0x7f 1473 bgei a4, 32, .Lfixunssfsi_maxint 1474 bltz a4, .Lfixunssfsi_zero 1475 1476 /* Add explicit "1.0" and shift << 8. */ 1477 or a7, a2, a6 1478 slli a5, a7, 8 1479 1480 /* Shift back to the right, based on the exponent. */ 1481 addi a4, a4, 1 1482 beqi a4, 32, .Lfixunssfsi_bigexp 1483 ssl a4 /* shift by 32 - a4 */ 1484 srl a5, a5 1485 1486 /* Negate the result if sign != 0. */ 1487 neg a2, a5 1488 movgez a2, a5, a7 1489 leaf_return 1490 1491.Lfixunssfsi_nan_or_inf: 1492 /* Handle Infinity and NaN. */ 1493 slli a4, a2, 9 1494 beqz a4, .Lfixunssfsi_maxint 1495 1496 /* Translate NaN to 0xffffffff. */ 1497 movi a2, -1 1498 leaf_return 1499 1500.Lfixunssfsi_maxint: 1501 slli a4, a6, 8 /* 0x80000000 */ 1502 movi a5, -1 /* 0xffffffff */ 1503 movgez a4, a5, a2 1504 mov a2, a4 1505 leaf_return 1506 1507.Lfixunssfsi_zero: 1508 movi a2, 0 1509 leaf_return 1510 1511.Lfixunssfsi_bigexp: 1512 /* Handle unsigned maximum exponent case. */ 1513 bltz a2, 1f 1514 mov a2, a5 /* no shift needed */ 1515 leaf_return 1516 1517 /* Return 0x80000000 if negative. */ 15181: slli a2, a6, 8 1519 leaf_return 1520 1521#endif /* L_fixunssfsi */ 1522 1523#ifdef L_fixunssfdi 1524 1525 .align 4 1526 .global __fixunssfdi 1527 .type __fixunssfdi, @function 1528__fixunssfdi: 1529 leaf_entry sp, 16 1530 1531 /* Check for NaN and Infinity. */ 1532 movi a6, 0x7f800000 1533 ball a2, a6, .Lfixunssfdi_nan_or_inf 1534 1535 /* Extract the exponent and check if 0 <= (exp - 0x7f) < 64. */ 1536 extui a4, a2, 23, 8 1537 addi a4, a4, -0x7f 1538 bgei a4, 64, .Lfixunssfdi_maxint 1539 bltz a4, .Lfixunssfdi_zero 1540 1541 /* Add explicit "1.0" and shift << 8. */ 1542 or a7, a2, a6 1543 slli xh, a7, 8 1544 1545 /* Shift back to the right, based on the exponent. */ 1546 addi a4, a4, 1 1547 beqi a4, 64, .Lfixunssfdi_bigexp 1548 ssl a4 /* shift by 64 - a4 */ 1549 bgei a4, 32, .Lfixunssfdi_smallshift 1550 srl xl, xh 1551 movi xh, 0 1552 1553.Lfixunssfdi_shifted: 1554 /* Negate the result if sign != 0. */ 1555 bgez a7, 1f 1556 neg xl, xl 1557 neg xh, xh 1558 beqz xl, 1f 1559 addi xh, xh, -1 15601: leaf_return 1561 1562.Lfixunssfdi_smallshift: 1563 movi xl, 0 1564 src xl, xh, xl 1565 srl xh, xh 1566 j .Lfixunssfdi_shifted 1567 1568.Lfixunssfdi_nan_or_inf: 1569 /* Handle Infinity and NaN. */ 1570 slli a4, a2, 9 1571 beqz a4, .Lfixunssfdi_maxint 1572 1573 /* Translate NaN to 0xffffffff.... */ 15741: movi xh, -1 1575 movi xl, -1 1576 leaf_return 1577 1578.Lfixunssfdi_maxint: 1579 bgez a2, 1b 15802: slli xh, a6, 8 /* 0x80000000 */ 1581 movi xl, 0 1582 leaf_return 1583 1584.Lfixunssfdi_zero: 1585 movi xh, 0 1586 movi xl, 0 1587 leaf_return 1588 1589.Lfixunssfdi_bigexp: 1590 /* Handle unsigned maximum exponent case. */ 1591 bltz a7, 2b 1592 movi xl, 0 1593 leaf_return /* no shift needed */ 1594 1595#endif /* L_fixunssfdi */ 1596 1597#ifdef L_floatsisf 1598 1599 .align 4 1600 .global __floatunsisf 1601 .type __floatunsisf, @function 1602__floatunsisf: 1603 leaf_entry sp, 16 1604 beqz a2, .Lfloatsisf_return 1605 1606 /* Set the sign to zero and jump to the floatsisf code. */ 1607 movi a7, 0 1608 j .Lfloatsisf_normalize 1609 1610 .align 4 1611 .global __floatsisf 1612 .type __floatsisf, @function 1613__floatsisf: 1614 leaf_entry sp, 16 1615 1616 /* Check for zero. */ 1617 beqz a2, .Lfloatsisf_return 1618 1619 /* Save the sign. */ 1620 extui a7, a2, 31, 1 1621 1622 /* Get the absolute value. */ 1623#if XCHAL_HAVE_ABS 1624 abs a2, a2 1625#else 1626 neg a4, a2 1627 movltz a2, a4, a2 1628#endif 1629 1630.Lfloatsisf_normalize: 1631 /* Normalize with the first 1 bit in the msb. */ 1632 do_nsau a4, a2, a5, a6 1633 ssl a4 1634 sll a5, a2 1635 1636 /* Shift the mantissa into position, with rounding bits in a6. */ 1637 srli a2, a5, 8 1638 slli a6, a5, (32 - 8) 1639 1640 /* Set the exponent. */ 1641 movi a5, 0x9d /* 0x7e + 31 */ 1642 sub a5, a5, a4 1643 slli a5, a5, 23 1644 add a2, a2, a5 1645 1646 /* Add the sign. */ 1647 slli a7, a7, 31 1648 or a2, a2, a7 1649 1650 /* Round up if the leftover fraction is >= 1/2. */ 1651 bgez a6, .Lfloatsisf_return 1652 addi a2, a2, 1 /* Overflow to the exponent is OK. */ 1653 1654 /* Check if the leftover fraction is exactly 1/2. */ 1655 slli a6, a6, 1 1656 beqz a6, .Lfloatsisf_exactlyhalf 1657 1658.Lfloatsisf_return: 1659 leaf_return 1660 1661.Lfloatsisf_exactlyhalf: 1662 /* Round down to the nearest even value. */ 1663 srli a2, a2, 1 1664 slli a2, a2, 1 1665 leaf_return 1666 1667#endif /* L_floatsisf */ 1668 1669#ifdef L_floatdisf 1670 1671 .align 4 1672 .global __floatundisf 1673 .type __floatundisf, @function 1674__floatundisf: 1675 leaf_entry sp, 16 1676 1677 /* Check for zero. */ 1678 or a4, xh, xl 1679 beqz a4, 2f 1680 1681 /* Set the sign to zero and jump to the floatdisf code. */ 1682 movi a7, 0 1683 j .Lfloatdisf_normalize 1684 1685 .align 4 1686 .global __floatdisf 1687 .type __floatdisf, @function 1688__floatdisf: 1689 leaf_entry sp, 16 1690 1691 /* Check for zero. */ 1692 or a4, xh, xl 1693 beqz a4, 2f 1694 1695 /* Save the sign. */ 1696 extui a7, xh, 31, 1 1697 1698 /* Get the absolute value. */ 1699 bgez xh, .Lfloatdisf_normalize 1700 neg xl, xl 1701 neg xh, xh 1702 beqz xl, .Lfloatdisf_normalize 1703 addi xh, xh, -1 1704 1705.Lfloatdisf_normalize: 1706 /* Normalize with the first 1 bit in the msb of xh. */ 1707 beqz xh, .Lfloatdisf_bigshift 1708 do_nsau a4, xh, a5, a6 1709 ssl a4 1710 src xh, xh, xl 1711 sll xl, xl 1712 1713.Lfloatdisf_shifted: 1714 /* Shift the mantissa into position, with rounding bits in a6. */ 1715 ssai 8 1716 sll a5, xl 1717 src a6, xh, xl 1718 srl xh, xh 1719 beqz a5, 1f 1720 movi a5, 1 1721 or a6, a6, a5 17221: 1723 /* Set the exponent. */ 1724 movi a5, 0xbd /* 0x7e + 63 */ 1725 sub a5, a5, a4 1726 slli a5, a5, 23 1727 add a2, xh, a5 1728 1729 /* Add the sign. */ 1730 slli a7, a7, 31 1731 or a2, a2, a7 1732 1733 /* Round up if the leftover fraction is >= 1/2. */ 1734 bgez a6, 2f 1735 addi a2, a2, 1 /* Overflow to the exponent is OK. */ 1736 1737 /* Check if the leftover fraction is exactly 1/2. */ 1738 slli a6, a6, 1 1739 beqz a6, .Lfloatdisf_exactlyhalf 17402: leaf_return 1741 1742.Lfloatdisf_bigshift: 1743 /* xh is zero. Normalize with first 1 bit of xl in the msb of xh. */ 1744 do_nsau a4, xl, a5, a6 1745 ssl a4 1746 sll xh, xl 1747 movi xl, 0 1748 addi a4, a4, 32 1749 j .Lfloatdisf_shifted 1750 1751.Lfloatdisf_exactlyhalf: 1752 /* Round down to the nearest even value. */ 1753 srli a2, a2, 1 1754 slli a2, a2, 1 1755 leaf_return 1756 1757#endif /* L_floatdisf */ 1758