1/* Assembly functions for the Xtensa version of libgcc1. 2 Copyright (C) 2001-2021 Free Software Foundation, Inc. 3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17Under Section 7 of GPL version 3, you are granted additional 18permissions described in the GCC Runtime Library Exception, version 193.1, as published by the Free Software Foundation. 20 21You should have received a copy of the GNU General Public License and 22a copy of the GCC Runtime Library Exception along with this program; 23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24<http://www.gnu.org/licenses/>. */ 25 26#include "xtensa-config.h" 27 28/* Define macros for the ABS and ADDX* instructions to handle cases 29 where they are not included in the Xtensa processor configuration. */ 30 31 .macro do_abs dst, src, tmp 32#if XCHAL_HAVE_ABS 33 abs \dst, \src 34#else 35 neg \tmp, \src 36 movgez \tmp, \src, \src 37 mov \dst, \tmp 38#endif 39 .endm 40 41 .macro do_addx2 dst, as, at, tmp 42#if XCHAL_HAVE_ADDX 43 addx2 \dst, \as, \at 44#else 45 slli \tmp, \as, 1 46 add \dst, \tmp, \at 47#endif 48 .endm 49 50 .macro do_addx4 dst, as, at, tmp 51#if XCHAL_HAVE_ADDX 52 addx4 \dst, \as, \at 53#else 54 slli \tmp, \as, 2 55 add \dst, \tmp, \at 56#endif 57 .endm 58 59 .macro do_addx8 dst, as, at, tmp 60#if XCHAL_HAVE_ADDX 61 addx8 \dst, \as, \at 62#else 63 slli \tmp, \as, 3 64 add \dst, \tmp, \at 65#endif 66 .endm 67 68/* Define macros for leaf function entry and return, supporting either the 69 standard register windowed ABI or the non-windowed call0 ABI. These 70 macros do not allocate any extra stack space, so they only work for 71 leaf functions that do not need to spill anything to the stack. */ 72 73 .macro leaf_entry reg, size 74#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ 75 entry \reg, \size 76#else 77 /* do nothing */ 78#endif 79 .endm 80 81 .macro leaf_return 82#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ 83 retw 84#else 85 ret 86#endif 87 .endm 88 89 90#ifdef L_mulsi3 91 .align 4 92 .global __mulsi3 93 .type __mulsi3, @function 94__mulsi3: 95 leaf_entry sp, 16 96 97#if XCHAL_HAVE_MUL32 98 mull a2, a2, a3 99 100#elif XCHAL_HAVE_MUL16 101 or a4, a2, a3 102 srai a4, a4, 16 103 bnez a4, .LMUL16 104 mul16u a2, a2, a3 105 leaf_return 106.LMUL16: 107 srai a4, a2, 16 108 srai a5, a3, 16 109 mul16u a7, a4, a3 110 mul16u a6, a5, a2 111 mul16u a4, a2, a3 112 add a7, a7, a6 113 slli a7, a7, 16 114 add a2, a7, a4 115 116#elif XCHAL_HAVE_MAC16 117 mul.aa.hl a2, a3 118 mula.aa.lh a2, a3 119 rsr a5, ACCLO 120 umul.aa.ll a2, a3 121 rsr a4, ACCLO 122 slli a5, a5, 16 123 add a2, a4, a5 124 125#else /* !MUL32 && !MUL16 && !MAC16 */ 126 127 /* Multiply one bit at a time, but unroll the loop 4x to better 128 exploit the addx instructions and avoid overhead. 129 Peel the first iteration to save a cycle on init. */ 130 131 /* Avoid negative numbers. */ 132 xor a5, a2, a3 /* Top bit is 1 if one input is negative. */ 133 do_abs a3, a3, a6 134 do_abs a2, a2, a6 135 136 /* Swap so the second argument is smaller. */ 137 sub a7, a2, a3 138 mov a4, a3 139 movgez a4, a2, a7 /* a4 = max (a2, a3) */ 140 movltz a3, a2, a7 /* a3 = min (a2, a3) */ 141 142 movi a2, 0 143 extui a6, a3, 0, 1 144 movnez a2, a4, a6 145 146 do_addx2 a7, a4, a2, a7 147 extui a6, a3, 1, 1 148 movnez a2, a7, a6 149 150 do_addx4 a7, a4, a2, a7 151 extui a6, a3, 2, 1 152 movnez a2, a7, a6 153 154 do_addx8 a7, a4, a2, a7 155 extui a6, a3, 3, 1 156 movnez a2, a7, a6 157 158 bgeui a3, 16, .Lmult_main_loop 159 neg a3, a2 160 movltz a2, a3, a5 161 leaf_return 162 163 .align 4 164.Lmult_main_loop: 165 srli a3, a3, 4 166 slli a4, a4, 4 167 168 add a7, a4, a2 169 extui a6, a3, 0, 1 170 movnez a2, a7, a6 171 172 do_addx2 a7, a4, a2, a7 173 extui a6, a3, 1, 1 174 movnez a2, a7, a6 175 176 do_addx4 a7, a4, a2, a7 177 extui a6, a3, 2, 1 178 movnez a2, a7, a6 179 180 do_addx8 a7, a4, a2, a7 181 extui a6, a3, 3, 1 182 movnez a2, a7, a6 183 184 bgeui a3, 16, .Lmult_main_loop 185 186 neg a3, a2 187 movltz a2, a3, a5 188 189#endif /* !MUL32 && !MUL16 && !MAC16 */ 190 191 leaf_return 192 .size __mulsi3, . - __mulsi3 193 194#endif /* L_mulsi3 */ 195 196 197#ifdef L_umulsidi3 198 199#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16 200#define XCHAL_NO_MUL 1 201#endif 202 203 .align 4 204 .global __umulsidi3 205 .type __umulsidi3, @function 206__umulsidi3: 207#if __XTENSA_CALL0_ABI__ 208 leaf_entry sp, 32 209 addi sp, sp, -32 210 s32i a12, sp, 16 211 s32i a13, sp, 20 212 s32i a14, sp, 24 213 s32i a15, sp, 28 214#elif XCHAL_NO_MUL 215 /* This is not really a leaf function; allocate enough stack space 216 to allow CALL12s to a helper function. */ 217 leaf_entry sp, 48 218#else 219 leaf_entry sp, 16 220#endif 221 222#ifdef __XTENSA_EB__ 223#define wh a2 224#define wl a3 225#else 226#define wh a3 227#define wl a2 228#endif /* __XTENSA_EB__ */ 229 230 /* This code is taken from the mulsf3 routine in ieee754-sf.S. 231 See more comments there. */ 232 233#if XCHAL_HAVE_MUL32_HIGH 234 mull a6, a2, a3 235 muluh wh, a2, a3 236 mov wl, a6 237 238#else /* ! MUL32_HIGH */ 239 240#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL 241 /* a0 and a8 will be clobbered by calling the multiply function 242 but a8 is not used here and need not be saved. */ 243 s32i a0, sp, 0 244#endif 245 246#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 247 248#define a2h a4 249#define a3h a5 250 251 /* Get the high halves of the inputs into registers. */ 252 srli a2h, a2, 16 253 srli a3h, a3, 16 254 255#define a2l a2 256#define a3l a3 257 258#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16 259 /* Clear the high halves of the inputs. This does not matter 260 for MUL16 because the high bits are ignored. */ 261 extui a2, a2, 0, 16 262 extui a3, a3, 0, 16 263#endif 264#endif /* MUL16 || MUL32 */ 265 266 267#if XCHAL_HAVE_MUL16 268 269#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 270 mul16u dst, xreg ## xhalf, yreg ## yhalf 271 272#elif XCHAL_HAVE_MUL32 273 274#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 275 mull dst, xreg ## xhalf, yreg ## yhalf 276 277#elif XCHAL_HAVE_MAC16 278 279/* The preprocessor insists on inserting a space when concatenating after 280 a period in the definition of do_mul below. These macros are a workaround 281 using underscores instead of periods when doing the concatenation. */ 282#define umul_aa_ll umul.aa.ll 283#define umul_aa_lh umul.aa.lh 284#define umul_aa_hl umul.aa.hl 285#define umul_aa_hh umul.aa.hh 286 287#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 288 umul_aa_ ## xhalf ## yhalf xreg, yreg; \ 289 rsr dst, ACCLO 290 291#else /* no multiply hardware */ 292 293#define set_arg_l(dst, src) \ 294 extui dst, src, 0, 16 295#define set_arg_h(dst, src) \ 296 srli dst, src, 16 297 298#if __XTENSA_CALL0_ABI__ 299#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 300 set_arg_ ## xhalf (a13, xreg); \ 301 set_arg_ ## yhalf (a14, yreg); \ 302 call0 .Lmul_mulsi3; \ 303 mov dst, a12 304#else 305#define do_mul(dst, xreg, xhalf, yreg, yhalf) \ 306 set_arg_ ## xhalf (a14, xreg); \ 307 set_arg_ ## yhalf (a15, yreg); \ 308 call12 .Lmul_mulsi3; \ 309 mov dst, a14 310#endif /* __XTENSA_CALL0_ABI__ */ 311 312#endif /* no multiply hardware */ 313 314 /* Add pp1 and pp2 into a6 with carry-out in a9. */ 315 do_mul(a6, a2, l, a3, h) /* pp 1 */ 316 do_mul(a11, a2, h, a3, l) /* pp 2 */ 317 movi a9, 0 318 add a6, a6, a11 319 bgeu a6, a11, 1f 320 addi a9, a9, 1 3211: 322 /* Shift the high half of a9/a6 into position in a9. Note that 323 this value can be safely incremented without any carry-outs. */ 324 ssai 16 325 src a9, a9, a6 326 327 /* Compute the low word into a6. */ 328 do_mul(a11, a2, l, a3, l) /* pp 0 */ 329 sll a6, a6 330 add a6, a6, a11 331 bgeu a6, a11, 1f 332 addi a9, a9, 1 3331: 334 /* Compute the high word into wh. */ 335 do_mul(wh, a2, h, a3, h) /* pp 3 */ 336 add wh, wh, a9 337 mov wl, a6 338 339#endif /* !MUL32_HIGH */ 340 341#if __XTENSA_CALL0_ABI__ && XCHAL_NO_MUL 342 /* Restore the original return address. */ 343 l32i a0, sp, 0 344#endif 345#if __XTENSA_CALL0_ABI__ 346 l32i a12, sp, 16 347 l32i a13, sp, 20 348 l32i a14, sp, 24 349 l32i a15, sp, 28 350 addi sp, sp, 32 351#endif 352 leaf_return 353 354#if XCHAL_NO_MUL 355 356 /* For Xtensa processors with no multiply hardware, this simplified 357 version of _mulsi3 is used for multiplying 16-bit chunks of 358 the floating-point mantissas. When using CALL0, this function 359 uses a custom ABI: the inputs are passed in a13 and a14, the 360 result is returned in a12, and a8 and a15 are clobbered. */ 361 .align 4 362.Lmul_mulsi3: 363 leaf_entry sp, 16 364 .macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2 365 movi \dst, 0 3661: add \tmp1, \src2, \dst 367 extui \tmp2, \src1, 0, 1 368 movnez \dst, \tmp1, \tmp2 369 370 do_addx2 \tmp1, \src2, \dst, \tmp1 371 extui \tmp2, \src1, 1, 1 372 movnez \dst, \tmp1, \tmp2 373 374 do_addx4 \tmp1, \src2, \dst, \tmp1 375 extui \tmp2, \src1, 2, 1 376 movnez \dst, \tmp1, \tmp2 377 378 do_addx8 \tmp1, \src2, \dst, \tmp1 379 extui \tmp2, \src1, 3, 1 380 movnez \dst, \tmp1, \tmp2 381 382 srli \src1, \src1, 4 383 slli \src2, \src2, 4 384 bnez \src1, 1b 385 .endm 386#if __XTENSA_CALL0_ABI__ 387 mul_mulsi3_body a12, a13, a14, a15, a8 388#else 389 /* The result will be written into a2, so save that argument in a4. */ 390 mov a4, a2 391 mul_mulsi3_body a2, a4, a3, a5, a6 392#endif 393 leaf_return 394#endif /* XCHAL_NO_MUL */ 395 396 .size __umulsidi3, . - __umulsidi3 397 398#endif /* L_umulsidi3 */ 399 400 401/* Define a macro for the NSAU (unsigned normalize shift amount) 402 instruction, which computes the number of leading zero bits, 403 to handle cases where it is not included in the Xtensa processor 404 configuration. */ 405 406 .macro do_nsau cnt, val, tmp, a 407#if XCHAL_HAVE_NSA 408 nsau \cnt, \val 409#else 410 mov \a, \val 411 movi \cnt, 0 412 extui \tmp, \a, 16, 16 413 bnez \tmp, 0f 414 movi \cnt, 16 415 slli \a, \a, 16 4160: 417 extui \tmp, \a, 24, 8 418 bnez \tmp, 1f 419 addi \cnt, \cnt, 8 420 slli \a, \a, 8 4211: 422 movi \tmp, __nsau_data 423 extui \a, \a, 24, 8 424 add \tmp, \tmp, \a 425 l8ui \tmp, \tmp, 0 426 add \cnt, \cnt, \tmp 427#endif /* !XCHAL_HAVE_NSA */ 428 .endm 429 430#ifdef L_clz 431 .section .rodata 432 .align 4 433 .global __nsau_data 434 .type __nsau_data, @object 435__nsau_data: 436#if !XCHAL_HAVE_NSA 437 .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 438 .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 439 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 440 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 441 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 442 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 443 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 444 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 445 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 446 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 447 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 448 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 449 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 450 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 451 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 452 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 453#endif /* !XCHAL_HAVE_NSA */ 454 .size __nsau_data, . - __nsau_data 455 .hidden __nsau_data 456#endif /* L_clz */ 457 458 459#ifdef L_clzsi2 460 .align 4 461 .global __clzsi2 462 .type __clzsi2, @function 463__clzsi2: 464 leaf_entry sp, 16 465 do_nsau a2, a2, a3, a4 466 leaf_return 467 .size __clzsi2, . - __clzsi2 468 469#endif /* L_clzsi2 */ 470 471 472#ifdef L_ctzsi2 473 .align 4 474 .global __ctzsi2 475 .type __ctzsi2, @function 476__ctzsi2: 477 leaf_entry sp, 16 478 neg a3, a2 479 and a3, a3, a2 480 do_nsau a2, a3, a4, a5 481 neg a2, a2 482 addi a2, a2, 31 483 leaf_return 484 .size __ctzsi2, . - __ctzsi2 485 486#endif /* L_ctzsi2 */ 487 488 489#ifdef L_ffssi2 490 .align 4 491 .global __ffssi2 492 .type __ffssi2, @function 493__ffssi2: 494 leaf_entry sp, 16 495 neg a3, a2 496 and a3, a3, a2 497 do_nsau a2, a3, a4, a5 498 neg a2, a2 499 addi a2, a2, 32 500 leaf_return 501 .size __ffssi2, . - __ffssi2 502 503#endif /* L_ffssi2 */ 504 505 506#ifdef L_udivsi3 507 .align 4 508 .global __udivsi3 509 .type __udivsi3, @function 510__udivsi3: 511 leaf_entry sp, 16 512#if XCHAL_HAVE_DIV32 513 quou a2, a2, a3 514#else 515 bltui a3, 2, .Lle_one /* check if the divisor <= 1 */ 516 517 mov a6, a2 /* keep dividend in a6 */ 518 do_nsau a5, a6, a2, a7 /* dividend_shift = nsau (dividend) */ 519 do_nsau a4, a3, a2, a7 /* divisor_shift = nsau (divisor) */ 520 bgeu a5, a4, .Lspecial 521 522 sub a4, a4, a5 /* count = divisor_shift - dividend_shift */ 523 ssl a4 524 sll a3, a3 /* divisor <<= count */ 525 movi a2, 0 /* quotient = 0 */ 526 527 /* test-subtract-and-shift loop; one quotient bit on each iteration */ 528#if XCHAL_HAVE_LOOPS 529 loopnez a4, .Lloopend 530#endif /* XCHAL_HAVE_LOOPS */ 531.Lloop: 532 bltu a6, a3, .Lzerobit 533 sub a6, a6, a3 534 addi a2, a2, 1 535.Lzerobit: 536 slli a2, a2, 1 537 srli a3, a3, 1 538#if !XCHAL_HAVE_LOOPS 539 addi a4, a4, -1 540 bnez a4, .Lloop 541#endif /* !XCHAL_HAVE_LOOPS */ 542.Lloopend: 543 544 bltu a6, a3, .Lreturn 545 addi a2, a2, 1 /* increment quotient if dividend >= divisor */ 546.Lreturn: 547 leaf_return 548 549.Lle_one: 550 beqz a3, .Lerror /* if divisor == 1, return the dividend */ 551 leaf_return 552 553.Lspecial: 554 /* return dividend >= divisor */ 555 bltu a6, a3, .Lreturn0 556 movi a2, 1 557 leaf_return 558 559.Lerror: 560 /* Divide by zero: Use an illegal instruction to force an exception. 561 The subsequent "DIV0" string can be recognized by the exception 562 handler to identify the real cause of the exception. */ 563 ill 564 .ascii "DIV0" 565 566.Lreturn0: 567 movi a2, 0 568#endif /* XCHAL_HAVE_DIV32 */ 569 leaf_return 570 .size __udivsi3, . - __udivsi3 571 572#endif /* L_udivsi3 */ 573 574 575#ifdef L_divsi3 576 .align 4 577 .global __divsi3 578 .type __divsi3, @function 579__divsi3: 580 leaf_entry sp, 16 581#if XCHAL_HAVE_DIV32 582 quos a2, a2, a3 583#else 584 xor a7, a2, a3 /* sign = dividend ^ divisor */ 585 do_abs a6, a2, a4 /* udividend = abs (dividend) */ 586 do_abs a3, a3, a4 /* udivisor = abs (divisor) */ 587 bltui a3, 2, .Lle_one /* check if udivisor <= 1 */ 588 do_nsau a5, a6, a2, a8 /* udividend_shift = nsau (udividend) */ 589 do_nsau a4, a3, a2, a8 /* udivisor_shift = nsau (udivisor) */ 590 bgeu a5, a4, .Lspecial 591 592 sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */ 593 ssl a4 594 sll a3, a3 /* udivisor <<= count */ 595 movi a2, 0 /* quotient = 0 */ 596 597 /* test-subtract-and-shift loop; one quotient bit on each iteration */ 598#if XCHAL_HAVE_LOOPS 599 loopnez a4, .Lloopend 600#endif /* XCHAL_HAVE_LOOPS */ 601.Lloop: 602 bltu a6, a3, .Lzerobit 603 sub a6, a6, a3 604 addi a2, a2, 1 605.Lzerobit: 606 slli a2, a2, 1 607 srli a3, a3, 1 608#if !XCHAL_HAVE_LOOPS 609 addi a4, a4, -1 610 bnez a4, .Lloop 611#endif /* !XCHAL_HAVE_LOOPS */ 612.Lloopend: 613 614 bltu a6, a3, .Lreturn 615 addi a2, a2, 1 /* increment if udividend >= udivisor */ 616.Lreturn: 617 neg a5, a2 618 movltz a2, a5, a7 /* return (sign < 0) ? -quotient : quotient */ 619 leaf_return 620 621.Lle_one: 622 beqz a3, .Lerror 623 neg a2, a6 /* if udivisor == 1, then return... */ 624 movgez a2, a6, a7 /* (sign < 0) ? -udividend : udividend */ 625 leaf_return 626 627.Lspecial: 628 bltu a6, a3, .Lreturn0 /* if dividend < divisor, return 0 */ 629 movi a2, 1 630 movi a4, -1 631 movltz a2, a4, a7 /* else return (sign < 0) ? -1 : 1 */ 632 leaf_return 633 634.Lerror: 635 /* Divide by zero: Use an illegal instruction to force an exception. 636 The subsequent "DIV0" string can be recognized by the exception 637 handler to identify the real cause of the exception. */ 638 ill 639 .ascii "DIV0" 640 641.Lreturn0: 642 movi a2, 0 643#endif /* XCHAL_HAVE_DIV32 */ 644 leaf_return 645 .size __divsi3, . - __divsi3 646 647#endif /* L_divsi3 */ 648 649 650#ifdef L_umodsi3 651 .align 4 652 .global __umodsi3 653 .type __umodsi3, @function 654__umodsi3: 655 leaf_entry sp, 16 656#if XCHAL_HAVE_DIV32 657 remu a2, a2, a3 658#else 659 bltui a3, 2, .Lle_one /* check if the divisor is <= 1 */ 660 661 do_nsau a5, a2, a6, a7 /* dividend_shift = nsau (dividend) */ 662 do_nsau a4, a3, a6, a7 /* divisor_shift = nsau (divisor) */ 663 bgeu a5, a4, .Lspecial 664 665 sub a4, a4, a5 /* count = divisor_shift - dividend_shift */ 666 ssl a4 667 sll a3, a3 /* divisor <<= count */ 668 669 /* test-subtract-and-shift loop */ 670#if XCHAL_HAVE_LOOPS 671 loopnez a4, .Lloopend 672#endif /* XCHAL_HAVE_LOOPS */ 673.Lloop: 674 bltu a2, a3, .Lzerobit 675 sub a2, a2, a3 676.Lzerobit: 677 srli a3, a3, 1 678#if !XCHAL_HAVE_LOOPS 679 addi a4, a4, -1 680 bnez a4, .Lloop 681#endif /* !XCHAL_HAVE_LOOPS */ 682.Lloopend: 683 684.Lspecial: 685 bltu a2, a3, .Lreturn 686 sub a2, a2, a3 /* subtract once more if dividend >= divisor */ 687.Lreturn: 688 leaf_return 689 690.Lle_one: 691 bnez a3, .Lreturn0 692 693 /* Divide by zero: Use an illegal instruction to force an exception. 694 The subsequent "DIV0" string can be recognized by the exception 695 handler to identify the real cause of the exception. */ 696 ill 697 .ascii "DIV0" 698 699.Lreturn0: 700 movi a2, 0 701#endif /* XCHAL_HAVE_DIV32 */ 702 leaf_return 703 .size __umodsi3, . - __umodsi3 704 705#endif /* L_umodsi3 */ 706 707 708#ifdef L_modsi3 709 .align 4 710 .global __modsi3 711 .type __modsi3, @function 712__modsi3: 713 leaf_entry sp, 16 714#if XCHAL_HAVE_DIV32 715 rems a2, a2, a3 716#else 717 mov a7, a2 /* save original (signed) dividend */ 718 do_abs a2, a2, a4 /* udividend = abs (dividend) */ 719 do_abs a3, a3, a4 /* udivisor = abs (divisor) */ 720 bltui a3, 2, .Lle_one /* check if udivisor <= 1 */ 721 do_nsau a5, a2, a6, a8 /* udividend_shift = nsau (udividend) */ 722 do_nsau a4, a3, a6, a8 /* udivisor_shift = nsau (udivisor) */ 723 bgeu a5, a4, .Lspecial 724 725 sub a4, a4, a5 /* count = udivisor_shift - udividend_shift */ 726 ssl a4 727 sll a3, a3 /* udivisor <<= count */ 728 729 /* test-subtract-and-shift loop */ 730#if XCHAL_HAVE_LOOPS 731 loopnez a4, .Lloopend 732#endif /* XCHAL_HAVE_LOOPS */ 733.Lloop: 734 bltu a2, a3, .Lzerobit 735 sub a2, a2, a3 736.Lzerobit: 737 srli a3, a3, 1 738#if !XCHAL_HAVE_LOOPS 739 addi a4, a4, -1 740 bnez a4, .Lloop 741#endif /* !XCHAL_HAVE_LOOPS */ 742.Lloopend: 743 744.Lspecial: 745 bltu a2, a3, .Lreturn 746 sub a2, a2, a3 /* subtract again if udividend >= udivisor */ 747.Lreturn: 748 bgez a7, .Lpositive 749 neg a2, a2 /* if (dividend < 0), return -udividend */ 750.Lpositive: 751 leaf_return 752 753.Lle_one: 754 bnez a3, .Lreturn0 755 756 /* Divide by zero: Use an illegal instruction to force an exception. 757 The subsequent "DIV0" string can be recognized by the exception 758 handler to identify the real cause of the exception. */ 759 ill 760 .ascii "DIV0" 761 762.Lreturn0: 763 movi a2, 0 764#endif /* XCHAL_HAVE_DIV32 */ 765 leaf_return 766 .size __modsi3, . - __modsi3 767 768#endif /* L_modsi3 */ 769 770 771#ifdef __XTENSA_EB__ 772#define uh a2 773#define ul a3 774#else 775#define uh a3 776#define ul a2 777#endif /* __XTENSA_EB__ */ 778 779 780#ifdef L_ashldi3 781 .align 4 782 .global __ashldi3 783 .type __ashldi3, @function 784__ashldi3: 785 leaf_entry sp, 16 786 ssl a4 787 bgei a4, 32, .Llow_only 788 src uh, uh, ul 789 sll ul, ul 790 leaf_return 791 792.Llow_only: 793 sll uh, ul 794 movi ul, 0 795 leaf_return 796 .size __ashldi3, . - __ashldi3 797 798#endif /* L_ashldi3 */ 799 800 801#ifdef L_ashrdi3 802 .align 4 803 .global __ashrdi3 804 .type __ashrdi3, @function 805__ashrdi3: 806 leaf_entry sp, 16 807 ssr a4 808 bgei a4, 32, .Lhigh_only 809 src ul, uh, ul 810 sra uh, uh 811 leaf_return 812 813.Lhigh_only: 814 sra ul, uh 815 srai uh, uh, 31 816 leaf_return 817 .size __ashrdi3, . - __ashrdi3 818 819#endif /* L_ashrdi3 */ 820 821 822#ifdef L_lshrdi3 823 .align 4 824 .global __lshrdi3 825 .type __lshrdi3, @function 826__lshrdi3: 827 leaf_entry sp, 16 828 ssr a4 829 bgei a4, 32, .Lhigh_only1 830 src ul, uh, ul 831 srl uh, uh 832 leaf_return 833 834.Lhigh_only1: 835 srl ul, uh 836 movi uh, 0 837 leaf_return 838 .size __lshrdi3, . - __lshrdi3 839 840#endif /* L_lshrdi3 */ 841 842 843#ifdef L_bswapsi2 844 .align 4 845 .global __bswapsi2 846 .type __bswapsi2, @function 847__bswapsi2: 848 leaf_entry sp, 16 849 ssai 8 850 srli a3, a2, 16 851 src a3, a3, a2 852 src a3, a3, a3 853 src a2, a2, a3 854 leaf_return 855 .size __bswapsi2, . - __bswapsi2 856 857#endif /* L_bswapsi2 */ 858 859 860#ifdef L_bswapdi2 861 .align 4 862 .global __bswapdi2 863 .type __bswapdi2, @function 864__bswapdi2: 865 leaf_entry sp, 16 866 ssai 8 867 srli a4, a2, 16 868 src a4, a4, a2 869 src a4, a4, a4 870 src a4, a2, a4 871 srli a2, a3, 16 872 src a2, a2, a3 873 src a2, a2, a2 874 src a2, a3, a2 875 mov a3, a4 876 leaf_return 877 .size __bswapdi2, . - __bswapdi2 878 879#endif /* L_bswapdi2 */ 880 881 882#include "ieee754-df.S" 883#include "ieee754-sf.S" 884