1/* Assembly functions for the Xtensa version of libgcc1. 2 Copyright (C) 2001,2002,2003 Free Software Foundation, Inc. 3 Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 2, or (at your option) any later 10version. 11 12In addition to the permissions in the GNU General Public License, the 13Free Software Foundation gives you unlimited permission to link the 14compiled version of this file into combinations with other programs, 15and to distribute those combinations without any restriction coming 16from the use of this file. (The General Public License restrictions 17do apply in other respects; for example, they cover modification of 18the file, and distribution when not linked into a combine 19executable.) 20 21GCC is distributed in the hope that it will be useful, but WITHOUT ANY 22WARRANTY; without even the implied warranty of MERCHANTABILITY or 23FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 24for more details. 25 26You should have received a copy of the GNU General Public License 27along with GCC; see the file COPYING. If not, write to the Free 28Software Foundation, 59 Temple Place - Suite 330, Boston, MA 2902111-1307, USA. */ 30 31#include "xtensa-config.h" 32 33# Note: These functions use a minimum stack frame size of 32. This is 34# necessary for Xtensa configurations that only support a fixed register 35# window size of 8, where even leaf functions (such as these) need to 36# allocate space for a 4-word "extra save area". 37 38# Define macros for the ABS and ADDX* instructions to handle cases 39# where they are not included in the Xtensa processor configuration. 40 41 .macro do_abs dst, src, tmp 42#if XCHAL_HAVE_ABS 43 abs \dst, \src 44#else 45 neg \tmp, \src 46 movgez \tmp, \src, \src 47 mov \dst, \tmp 48#endif 49 .endm 50 51 .macro do_addx2 dst, as, at, tmp 52#if XCHAL_HAVE_ADDX 53 addx2 \dst, \as, \at 54#else 55 slli \tmp, \as, 1 56 add \dst, \tmp, \at 57#endif 58 .endm 59 60 .macro do_addx4 dst, as, at, tmp 61#if XCHAL_HAVE_ADDX 62 addx4 \dst, \as, \at 63#else 64 slli \tmp, \as, 2 65 add \dst, \tmp, \at 66#endif 67 .endm 68 69 .macro do_addx8 dst, as, at, tmp 70#if XCHAL_HAVE_ADDX 71 addx8 \dst, \as, \at 72#else 73 slli \tmp, \as, 3 74 add \dst, \tmp, \at 75#endif 76 .endm 77 78# Define macros for function entry and return, supporting either the 79# standard register windowed ABI or the non-windowed call0 ABI. These 80# macros do not allocate any extra stack space, so they only work for 81# leaf functions that do not need to spill anything to the stack. 82 83 .macro abi_entry reg, size 84#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ 85 entry \reg, \size 86#else 87 /* do nothing */ 88#endif 89 .endm 90 91 .macro abi_return 92#if XCHAL_HAVE_WINDOWED && !__XTENSA_CALL0_ABI__ 93 retw 94#else 95 ret 96#endif 97 .endm 98 99 100#ifdef L_mulsi3 101 .align 4 102 .global __mulsi3 103 .type __mulsi3,@function 104__mulsi3: 105 abi_entry sp, 32 106 107#if XCHAL_HAVE_MUL16 108 or a4, a2, a3 109 srai a4, a4, 16 110 bnez a4, .LMUL16 111 mul16u a2, a2, a3 112 abi_return 113.LMUL16: 114 srai a4, a2, 16 115 srai a5, a3, 16 116 mul16u a7, a4, a3 117 mul16u a6, a5, a2 118 mul16u a4, a2, a3 119 add a7, a7, a6 120 slli a7, a7, 16 121 add a2, a7, a4 122 123#elif XCHAL_HAVE_MAC16 124 mul.aa.hl a2, a3 125 mula.aa.lh a2, a3 126 rsr a5, 16 # ACCLO 127 umul.aa.ll a2, a3 128 rsr a4, 16 # ACCLO 129 slli a5, a5, 16 130 add a2, a4, a5 131 132#else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */ 133 134 # Multiply one bit at a time, but unroll the loop 4x to better 135 # exploit the addx instructions and avoid overhead. 136 # Peel the first iteration to save a cycle on init. 137 138 # Avoid negative numbers. 139 xor a5, a2, a3 # top bit is 1 iff one of the inputs is negative 140 do_abs a3, a3, a6 141 do_abs a2, a2, a6 142 143 # Swap so the second argument is smaller. 144 sub a7, a2, a3 145 mov a4, a3 146 movgez a4, a2, a7 # a4 = max(a2, a3) 147 movltz a3, a2, a7 # a3 = min(a2, a3) 148 149 movi a2, 0 150 extui a6, a3, 0, 1 151 movnez a2, a4, a6 152 153 do_addx2 a7, a4, a2, a7 154 extui a6, a3, 1, 1 155 movnez a2, a7, a6 156 157 do_addx4 a7, a4, a2, a7 158 extui a6, a3, 2, 1 159 movnez a2, a7, a6 160 161 do_addx8 a7, a4, a2, a7 162 extui a6, a3, 3, 1 163 movnez a2, a7, a6 164 165 bgeui a3, 16, .Lmult_main_loop 166 neg a3, a2 167 movltz a2, a3, a5 168 abi_return 169 170 .align 4 171.Lmult_main_loop: 172 srli a3, a3, 4 173 slli a4, a4, 4 174 175 add a7, a4, a2 176 extui a6, a3, 0, 1 177 movnez a2, a7, a6 178 179 do_addx2 a7, a4, a2, a7 180 extui a6, a3, 1, 1 181 movnez a2, a7, a6 182 183 do_addx4 a7, a4, a2, a7 184 extui a6, a3, 2, 1 185 movnez a2, a7, a6 186 187 do_addx8 a7, a4, a2, a7 188 extui a6, a3, 3, 1 189 movnez a2, a7, a6 190 191 bgeui a3, 16, .Lmult_main_loop 192 193 neg a3, a2 194 movltz a2, a3, a5 195 196#endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */ 197 198 abi_return 199 .size __mulsi3,.-__mulsi3 200 201#endif /* L_mulsi3 */ 202 203 204# Define a macro for the NSAU (unsigned normalize shift amount) 205# instruction, which computes the number of leading zero bits, 206# to handle cases where it is not included in the Xtensa processor 207# configuration. 208 209 .macro do_nsau cnt, val, tmp, a 210#if XCHAL_HAVE_NSA 211 nsau \cnt, \val 212#else 213 mov \a, \val 214 movi \cnt, 0 215 extui \tmp, \a, 16, 16 216 bnez \tmp, 0f 217 movi \cnt, 16 218 slli \a, \a, 16 2190: 220 extui \tmp, \a, 24, 8 221 bnez \tmp, 1f 222 addi \cnt, \cnt, 8 223 slli \a, \a, 8 2241: 225 movi \tmp, __nsau_data 226 extui \a, \a, 24, 8 227 add \tmp, \tmp, \a 228 l8ui \tmp, \tmp, 0 229 add \cnt, \cnt, \tmp 230#endif /* !XCHAL_HAVE_NSA */ 231 .endm 232 233#ifdef L_nsau 234 .section .rodata 235 .align 4 236 .global __nsau_data 237 .type __nsau_data,@object 238__nsau_data: 239#if !XCHAL_HAVE_NSA 240 .byte 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4 241 .byte 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3 242 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 243 .byte 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 244 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 245 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 246 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 247 .byte 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 248 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 249 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 250 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 251 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 252 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 253 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 254 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 255 .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 256#endif /* !XCHAL_HAVE_NSA */ 257 .size __nsau_data,.-__nsau_data 258 .hidden __nsau_data 259#endif /* L_nsau */ 260 261 262#ifdef L_udivsi3 263 .align 4 264 .global __udivsi3 265 .type __udivsi3,@function 266__udivsi3: 267 abi_entry sp, 32 268 bltui a3, 2, .Lle_one # check if the divisor <= 1 269 270 mov a6, a2 # keep dividend in a6 271 do_nsau a5, a6, a2, a7 # dividend_shift = nsau(dividend) 272 do_nsau a4, a3, a2, a7 # divisor_shift = nsau(divisor) 273 bgeu a5, a4, .Lspecial 274 275 sub a4, a4, a5 # count = divisor_shift - dividend_shift 276 ssl a4 277 sll a3, a3 # divisor <<= count 278 movi a2, 0 # quotient = 0 279 280 # test-subtract-and-shift loop; one quotient bit on each iteration 281#if XCHAL_HAVE_LOOPS 282 loopnez a4, .Lloopend 283#endif /* XCHAL_HAVE_LOOPS */ 284.Lloop: 285 bltu a6, a3, .Lzerobit 286 sub a6, a6, a3 287 addi a2, a2, 1 288.Lzerobit: 289 slli a2, a2, 1 290 srli a3, a3, 1 291#if !XCHAL_HAVE_LOOPS 292 addi a4, a4, -1 293 bnez a4, .Lloop 294#endif /* !XCHAL_HAVE_LOOPS */ 295.Lloopend: 296 297 bltu a6, a3, .Lreturn 298 addi a2, a2, 1 # increment quotient if dividend >= divisor 299.Lreturn: 300 abi_return 301 302.Lspecial: 303 # return dividend >= divisor 304 movi a2, 0 305 bltu a6, a3, .Lreturn2 306 movi a2, 1 307.Lreturn2: 308 abi_return 309 310.Lle_one: 311 beqz a3, .Lerror # if divisor == 1, return the dividend 312 abi_return 313.Lerror: 314 movi a2, 0 # just return 0; could throw an exception 315 abi_return 316 .size __udivsi3,.-__udivsi3 317 318#endif /* L_udivsi3 */ 319 320 321#ifdef L_divsi3 322 .align 4 323 .global __divsi3 324 .type __divsi3,@function 325__divsi3: 326 abi_entry sp, 32 327 xor a7, a2, a3 # sign = dividend ^ divisor 328 do_abs a6, a2, a4 # udividend = abs(dividend) 329 do_abs a3, a3, a4 # udivisor = abs(divisor) 330 bltui a3, 2, .Lle_one # check if udivisor <= 1 331 do_nsau a5, a6, a2, a8 # udividend_shift = nsau(udividend) 332 do_nsau a4, a3, a2, a8 # udivisor_shift = nsau(udivisor) 333 bgeu a5, a4, .Lspecial 334 335 sub a4, a4, a5 # count = udivisor_shift - udividend_shift 336 ssl a4 337 sll a3, a3 # udivisor <<= count 338 movi a2, 0 # quotient = 0 339 340 # test-subtract-and-shift loop; one quotient bit on each iteration 341#if XCHAL_HAVE_LOOPS 342 loopnez a4, .Lloopend 343#endif /* XCHAL_HAVE_LOOPS */ 344.Lloop: 345 bltu a6, a3, .Lzerobit 346 sub a6, a6, a3 347 addi a2, a2, 1 348.Lzerobit: 349 slli a2, a2, 1 350 srli a3, a3, 1 351#if !XCHAL_HAVE_LOOPS 352 addi a4, a4, -1 353 bnez a4, .Lloop 354#endif /* !XCHAL_HAVE_LOOPS */ 355.Lloopend: 356 357 bltu a6, a3, .Lreturn 358 addi a2, a2, 1 # increment quotient if udividend >= udivisor 359.Lreturn: 360 neg a5, a2 361 movltz a2, a5, a7 # return (sign < 0) ? -quotient : quotient 362 abi_return 363 364.Lspecial: 365 movi a2, 0 366 bltu a6, a3, .Lreturn2 # if dividend < divisor, return 0 367 movi a2, 1 368 movi a4, -1 369 movltz a2, a4, a7 # else return (sign < 0) ? -1 : 1 370.Lreturn2: 371 abi_return 372 373.Lle_one: 374 beqz a3, .Lerror 375 neg a2, a6 # if udivisor == 1, then return... 376 movgez a2, a6, a7 # (sign < 0) ? -udividend : udividend 377 abi_return 378.Lerror: 379 movi a2, 0 # just return 0; could throw an exception 380 abi_return 381 .size __divsi3,.-__divsi3 382 383#endif /* L_divsi3 */ 384 385 386#ifdef L_umodsi3 387 .align 4 388 .global __umodsi3 389 .type __umodsi3,@function 390__umodsi3: 391 abi_entry sp, 32 392 bltui a3, 2, .Lle_one # check if the divisor is <= 1 393 394 do_nsau a5, a2, a6, a7 # dividend_shift = nsau(dividend) 395 do_nsau a4, a3, a6, a7 # divisor_shift = nsau(divisor) 396 bgeu a5, a4, .Lspecial 397 398 sub a4, a4, a5 # count = divisor_shift - dividend_shift 399 ssl a4 400 sll a3, a3 # divisor <<= count 401 402 # test-subtract-and-shift loop 403#if XCHAL_HAVE_LOOPS 404 loopnez a4, .Lloopend 405#endif /* XCHAL_HAVE_LOOPS */ 406.Lloop: 407 bltu a2, a3, .Lzerobit 408 sub a2, a2, a3 409.Lzerobit: 410 srli a3, a3, 1 411#if !XCHAL_HAVE_LOOPS 412 addi a4, a4, -1 413 bnez a4, .Lloop 414#endif /* !XCHAL_HAVE_LOOPS */ 415.Lloopend: 416 417 bltu a2, a3, .Lreturn 418 sub a2, a2, a3 # subtract once more if dividend >= divisor 419.Lreturn: 420 abi_return 421 422.Lspecial: 423 bltu a2, a3, .Lreturn2 424 sub a2, a2, a3 # subtract once if dividend >= divisor 425.Lreturn2: 426 abi_return 427 428.Lle_one: 429 # the divisor is either 0 or 1, so just return 0. 430 # someday we may want to throw an exception if the divisor is 0. 431 movi a2, 0 432 abi_return 433 .size __umodsi3,.-__umodsi3 434 435#endif /* L_umodsi3 */ 436 437 438#ifdef L_modsi3 439 .align 4 440 .global __modsi3 441 .type __modsi3,@function 442__modsi3: 443 abi_entry sp, 32 444 mov a7, a2 # save original (signed) dividend 445 do_abs a2, a2, a4 # udividend = abs(dividend) 446 do_abs a3, a3, a4 # udivisor = abs(divisor) 447 bltui a3, 2, .Lle_one # check if udivisor <= 1 448 do_nsau a5, a2, a6, a8 # udividend_shift = nsau(udividend) 449 do_nsau a4, a3, a6, a8 # udivisor_shift = nsau(udivisor) 450 bgeu a5, a4, .Lspecial 451 452 sub a4, a4, a5 # count = udivisor_shift - udividend_shift 453 ssl a4 454 sll a3, a3 # udivisor <<= count 455 456 # test-subtract-and-shift loop 457#if XCHAL_HAVE_LOOPS 458 loopnez a4, .Lloopend 459#endif /* XCHAL_HAVE_LOOPS */ 460.Lloop: 461 bltu a2, a3, .Lzerobit 462 sub a2, a2, a3 463.Lzerobit: 464 srli a3, a3, 1 465#if !XCHAL_HAVE_LOOPS 466 addi a4, a4, -1 467 bnez a4, .Lloop 468#endif /* !XCHAL_HAVE_LOOPS */ 469.Lloopend: 470 471 bltu a2, a3, .Lreturn 472 sub a2, a2, a3 # subtract once more if udividend >= udivisor 473.Lreturn: 474 bgez a7, .Lpositive 475 neg a2, a2 # if (dividend < 0), return -udividend 476.Lpositive: 477 abi_return 478 479.Lspecial: 480 bltu a2, a3, .Lreturn2 481 sub a2, a2, a3 # subtract once if dividend >= divisor 482.Lreturn2: 483 bgez a7, .Lpositive2 484 neg a2, a2 # if (dividend < 0), return -udividend 485.Lpositive2: 486 abi_return 487 488.Lle_one: 489 # udivisor is either 0 or 1, so just return 0. 490 # someday we may want to throw an exception if udivisor is 0. 491 movi a2, 0 492 abi_return 493 .size __modsi3,.-__modsi3 494 495#endif /* L_modsi3 */ 496