1@ libgcc routines for ARM cpu. 2@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) 3 4/* Copyright 1995, 1996, 1998, 1999, 2000 Free Software Foundation, Inc. 5 6This file is free software; you can redistribute it and/or modify it 7under the terms of the GNU General Public License as published by the 8Free Software Foundation; either version 2, or (at your option) any 9later version. 10 11In addition to the permissions in the GNU General Public License, the 12Free Software Foundation gives you unlimited permission to link the 13compiled version of this file into combinations with other programs, 14and to distribute those combinations without any restriction coming 15from the use of this file. (The General Public License restrictions 16do apply in other respects; for example, they cover modification of 17the file, and distribution when not linked into a combine 18executable.) 19 20This file is distributed in the hope that it will be useful, but 21WITHOUT ANY WARRANTY; without even the implied warranty of 22MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 23General Public License for more details. 24 25You should have received a copy of the GNU General Public License 26along with this program; see the file COPYING. If not, write to 27the Free Software Foundation, 59 Temple Place - Suite 330, 28Boston, MA 02111-1307, USA. */ 29/* ------------------------------------------------------------------------ */ 30 31/* We need to know what prefix to add to function names. */ 32 33#ifndef __USER_LABEL_PREFIX__ 34#error __USER_LABEL_PREFIX__ not defined 35#endif 36 37/* ANSI concatenation macros. */ 38 39#define CONCAT1(a, b) CONCAT2(a, b) 40#define CONCAT2(a, b) a ## b 41 42/* Use the right prefix for global labels. */ 43 44#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x) 45 46#ifdef __ELF__ 47#ifdef __thumb__ 48#define __PLT__ /* Not supported in Thumb assembler (for now). */ 49#else 50#define __PLT__ (PLT) 51#endif 52#define TYPE(x) .type SYM(x),function 53#define SIZE(x) .size SYM(x), . - SYM(x) 54#else 55#define __PLT__ 56#define TYPE(x) 57#define SIZE(x) 58#endif 59 60/* Function end macros. Variants for 26 bit APCS and interworking. */ 61 62#ifdef __APCS_26__ 63# define RET movs pc, lr 64# define RETc(x) mov##x##s pc, lr 65# define RETCOND ^ 66.macro ARM_LDIV0 67Ldiv0: 68 str lr, [sp, #-4]! 69 bl SYM (__div0) __PLT__ 70 mov r0, #0 @ About as wrong as it could be. 71 ldmia sp!, {pc}^ 72.endm 73#else 74# ifdef __THUMB_INTERWORK__ 75# define RET bx lr 76# define RETc(x) bx##x lr 77.macro THUMB_LDIV0 78Ldiv0: 79 push { lr } 80 bl SYM (__div0) 81 mov r0, #0 @ About as wrong as it could be. 82 pop { r1 } 83 bx r1 84.endm 85.macro ARM_LDIV0 86Ldiv0: 87 str lr, [sp, #-4]! 88 bl SYM (__div0) __PLT__ 89 mov r0, #0 @ About as wrong as it could be. 90 ldr lr, [sp], #4 91 bx lr 92.endm 93# else 94# define RET mov pc, lr 95# define RETc(x) mov##x pc, lr 96.macro THUMB_LDIV0 97Ldiv0: 98 push { lr } 99 bl SYM (__div0) 100 mov r0, #0 @ About as wrong as it could be. 101 pop { pc } 102.endm 103.macro ARM_LDIV0 104Ldiv0: 105 str lr, [sp, #-4]! 106 bl SYM (__div0) __PLT__ 107 mov r0, #0 @ About as wrong as it could be. 108 ldmia sp!, {pc} 109.endm 110# endif 111# define RETCOND 112#endif 113 114.macro FUNC_END name 115Ldiv0: 116#ifdef __thumb__ 117 THUMB_LDIV0 118#else 119 ARM_LDIV0 120#endif 121 SIZE (__\name) 122.endm 123 124.macro THUMB_FUNC_START name 125 .globl SYM (\name) 126 TYPE (\name) 127 .thumb_func 128SYM (\name): 129.endm 130 131/* Function start macros. Variants for ARM and Thumb. */ 132 133#ifdef __thumb__ 134#define THUMB_FUNC .thumb_func 135#define THUMB_CODE .force_thumb 136#else 137#define THUMB_FUNC 138#define THUMB_CODE 139#endif 140 141.macro FUNC_START name 142 .text 143 .globl SYM (__\name) 144 TYPE (__\name) 145 .align 0 146 THUMB_CODE 147 THUMB_FUNC 148SYM (__\name): 149.endm 150 151/* Register aliases. */ 152 153work .req r4 @ XXXX is this safe ? 154dividend .req r0 155divisor .req r1 156overdone .req r2 157result .req r2 158curbit .req r3 159ip .req r12 160sp .req r13 161lr .req r14 162pc .req r15 163 164/* ------------------------------------------------------------------------ */ 165/* Bodies of the divsion and modulo routines. */ 166/* ------------------------------------------------------------------------ */ 167.macro ARM_DIV_MOD_BODY modulo 168Loop1: 169 @ Unless the divisor is very big, shift it up in multiples of 170 @ four bits, since this is the amount of unwinding in the main 171 @ division loop. Continue shifting until the divisor is 172 @ larger than the dividend. 173 cmp divisor, #0x10000000 174 cmplo divisor, dividend 175 movlo divisor, divisor, lsl #4 176 movlo curbit, curbit, lsl #4 177 blo Loop1 178 179Lbignum: 180 @ For very big divisors, we must shift it a bit at a time, or 181 @ we will be in danger of overflowing. 182 cmp divisor, #0x80000000 183 cmplo divisor, dividend 184 movlo divisor, divisor, lsl #1 185 movlo curbit, curbit, lsl #1 186 blo Lbignum 187 188Loop3: 189 @ Test for possible subtractions. On the final pass, this may 190 @ subtract too much from the dividend ... 191 192 .if \modulo 193 @ ... so keep track of which subtractions are done in OVERDONE. 194 @ We can fix them up afterwards. 195 mov overdone, #0 196 cmp dividend, divisor 197 subhs dividend, dividend, divisor 198 cmp dividend, divisor, lsr #1 199 subhs dividend, dividend, divisor, lsr #1 200 orrhs overdone, overdone, curbit, ror #1 201 cmp dividend, divisor, lsr #2 202 subhs dividend, dividend, divisor, lsr #2 203 orrhs overdone, overdone, curbit, ror #2 204 cmp dividend, divisor, lsr #3 205 subhs dividend, dividend, divisor, lsr #3 206 orrhs overdone, overdone, curbit, ror #3 207 mov ip, curbit 208 .else 209 @ ... so keep track of which subtractions are done in RESULT. 210 @ The result will be ok, since the "bit" will have been 211 @ shifted out at the bottom. 212 cmp dividend, divisor 213 subhs dividend, dividend, divisor 214 orrhs result, result, curbit 215 cmp dividend, divisor, lsr #1 216 subhs dividend, dividend, divisor, lsr #1 217 orrhs result, result, curbit, lsr #1 218 cmp dividend, divisor, lsr #2 219 subhs dividend, dividend, divisor, lsr #2 220 orrhs result, result, curbit, lsr #2 221 cmp dividend, divisor, lsr #3 222 subhs dividend, dividend, divisor, lsr #3 223 orrhs result, result, curbit, lsr #3 224 .endif 225 226 cmp dividend, #0 @ Early termination? 227 movnes curbit, curbit, lsr #4 @ No, any more bits to do? 228 movne divisor, divisor, lsr #4 229 bne Loop3 230 231 .if \modulo 232Lfixup_dividend: 233 @ Any subtractions that we should not have done will be recorded in 234 @ the top three bits of OVERDONE. Exactly which were not needed 235 @ are governed by the position of the bit, stored in IP. 236 ands overdone, overdone, #0xe0000000 237 @ If we terminated early, because dividend became zero, then the 238 @ bit in ip will not be in the bottom nibble, and we should not 239 @ perform the additions below. We must test for this though 240 @ (rather relying upon the TSTs to prevent the additions) since 241 @ the bit in ip could be in the top two bits which might then match 242 @ with one of the smaller RORs. 243 tstne ip, #0x7 244 beq Lgot_result 245 tst overdone, ip, ror #3 246 addne dividend, dividend, divisor, lsr #3 247 tst overdone, ip, ror #2 248 addne dividend, dividend, divisor, lsr #2 249 tst overdone, ip, ror #1 250 addne dividend, dividend, divisor, lsr #1 251 .endif 252 253Lgot_result: 254.endm 255/* ------------------------------------------------------------------------ */ 256.macro THUMB_DIV_MOD_BODY modulo 257 @ Load the constant 0x10000000 into our work register. 258 mov work, #1 259 lsl work, #28 260Loop1: 261 @ Unless the divisor is very big, shift it up in multiples of 262 @ four bits, since this is the amount of unwinding in the main 263 @ division loop. Continue shifting until the divisor is 264 @ larger than the dividend. 265 cmp divisor, work 266 bhs Lbignum 267 cmp divisor, dividend 268 bhs Lbignum 269 lsl divisor, #4 270 lsl curbit, #4 271 b Loop1 272Lbignum: 273 @ Set work to 0x80000000 274 lsl work, #3 275Loop2: 276 @ For very big divisors, we must shift it a bit at a time, or 277 @ we will be in danger of overflowing. 278 cmp divisor, work 279 bhs Loop3 280 cmp divisor, dividend 281 bhs Loop3 282 lsl divisor, #1 283 lsl curbit, #1 284 b Loop2 285Loop3: 286 @ Test for possible subtractions ... 287 .if \modulo 288 @ ... On the final pass, this may subtract too much from the dividend, 289 @ so keep track of which subtractions are done, we can fix them up 290 @ afterwards. 291 mov overdone, #0 292 cmp dividend, divisor 293 blo Lover1 294 sub dividend, dividend, divisor 295Lover1: 296 lsr work, divisor, #1 297 cmp dividend, work 298 blo Lover2 299 sub dividend, dividend, work 300 mov ip, curbit 301 mov work, #1 302 ror curbit, work 303 orr overdone, curbit 304 mov curbit, ip 305Lover2: 306 lsr work, divisor, #2 307 cmp dividend, work 308 blo Lover3 309 sub dividend, dividend, work 310 mov ip, curbit 311 mov work, #2 312 ror curbit, work 313 orr overdone, curbit 314 mov curbit, ip 315Lover3: 316 lsr work, divisor, #3 317 cmp dividend, work 318 blo Lover4 319 sub dividend, dividend, work 320 mov ip, curbit 321 mov work, #3 322 ror curbit, work 323 orr overdone, curbit 324 mov curbit, ip 325Lover4: 326 mov ip, curbit 327 .else 328 @ ... and note which bits are done in the result. On the final pass, 329 @ this may subtract too much from the dividend, but the result will be ok, 330 @ since the "bit" will have been shifted out at the bottom. 331 cmp dividend, divisor 332 blo Lover1 333 sub dividend, dividend, divisor 334 orr result, result, curbit 335Lover1: 336 lsr work, divisor, #1 337 cmp dividend, work 338 blo Lover2 339 sub dividend, dividend, work 340 lsr work, curbit, #1 341 orr result, work 342Lover2: 343 lsr work, divisor, #2 344 cmp dividend, work 345 blo Lover3 346 sub dividend, dividend, work 347 lsr work, curbit, #2 348 orr result, work 349Lover3: 350 lsr work, divisor, #3 351 cmp dividend, work 352 blo Lover4 353 sub dividend, dividend, work 354 lsr work, curbit, #3 355 orr result, work 356Lover4: 357 .endif 358 359 cmp dividend, #0 @ Early termination? 360 beq Lover5 361 lsr curbit, #4 @ No, any more bits to do? 362 beq Lover5 363 lsr divisor, #4 364 b Loop3 365Lover5: 366 .if \modulo 367 @ Any subtractions that we should not have done will be recorded in 368 @ the top three bits of "overdone". Exactly which were not needed 369 @ are governed by the position of the bit, stored in ip. 370 mov work, #0xe 371 lsl work, #28 372 and overdone, work 373 beq Lgot_result 374 375 @ If we terminated early, because dividend became zero, then the 376 @ bit in ip will not be in the bottom nibble, and we should not 377 @ perform the additions below. We must test for this though 378 @ (rather relying upon the TSTs to prevent the additions) since 379 @ the bit in ip could be in the top two bits which might then match 380 @ with one of the smaller RORs. 381 mov curbit, ip 382 mov work, #0x7 383 tst curbit, work 384 beq Lgot_result 385 386 mov curbit, ip 387 mov work, #3 388 ror curbit, work 389 tst overdone, curbit 390 beq Lover6 391 lsr work, divisor, #3 392 add dividend, work 393Lover6: 394 mov curbit, ip 395 mov work, #2 396 ror curbit, work 397 tst overdone, curbit 398 beq Lover7 399 lsr work, divisor, #2 400 add dividend, work 401Lover7: 402 mov curbit, ip 403 mov work, #1 404 ror curbit, work 405 tst overdone, curbit 406 beq Lgot_result 407 lsr work, divisor, #1 408 add dividend, work 409 .endif 410Lgot_result: 411.endm 412/* ------------------------------------------------------------------------ */ 413/* Start of the Real Functions */ 414/* ------------------------------------------------------------------------ */ 415#ifdef L_udivsi3 416 417 FUNC_START udivsi3 418 419#ifdef __thumb__ 420 421 cmp divisor, #0 422 beq Ldiv0 423 mov curbit, #1 424 mov result, #0 425 426 push { work } 427 cmp dividend, divisor 428 blo Lgot_result 429 430 THUMB_DIV_MOD_BODY 0 431 432 mov r0, result 433 pop { work } 434 RET 435 436#else /* ARM version. */ 437 438 cmp divisor, #0 439 beq Ldiv0 440 mov curbit, #1 441 mov result, #0 442 cmp dividend, divisor 443 blo Lgot_result 444 445 ARM_DIV_MOD_BODY 0 446 447 mov r0, result 448 RET 449 450#endif /* ARM version */ 451 452 FUNC_END udivsi3 453 454#endif /* L_udivsi3 */ 455/* ------------------------------------------------------------------------ */ 456#ifdef L_umodsi3 457 458 FUNC_START umodsi3 459 460#ifdef __thumb__ 461 462 cmp divisor, #0 463 beq Ldiv0 464 mov curbit, #1 465 cmp dividend, divisor 466 bhs Lover10 467 RET 468 469Lover10: 470 push { work } 471 472 THUMB_DIV_MOD_BODY 1 473 474 pop { work } 475 RET 476 477#else /* ARM version. */ 478 479 cmp divisor, #0 480 beq Ldiv0 481 cmp divisor, #1 482 cmpne dividend, divisor 483 moveq dividend, #0 484 RETc(lo) 485 mov curbit, #1 486 487 ARM_DIV_MOD_BODY 1 488 489 RET 490 491#endif /* ARM version. */ 492 493 FUNC_END umodsi3 494 495#endif /* L_umodsi3 */ 496/* ------------------------------------------------------------------------ */ 497#ifdef L_divsi3 498 499 FUNC_START divsi3 500 501#ifdef __thumb__ 502 cmp divisor, #0 503 beq Ldiv0 504 505 push { work } 506 mov work, dividend 507 eor work, divisor @ Save the sign of the result. 508 mov ip, work 509 mov curbit, #1 510 mov result, #0 511 cmp divisor, #0 512 bpl Lover10 513 neg divisor, divisor @ Loops below use unsigned. 514Lover10: 515 cmp dividend, #0 516 bpl Lover11 517 neg dividend, dividend 518Lover11: 519 cmp dividend, divisor 520 blo Lgot_result 521 522 THUMB_DIV_MOD_BODY 0 523 524 mov r0, result 525 mov work, ip 526 cmp work, #0 527 bpl Lover12 528 neg r0, r0 529Lover12: 530 pop { work } 531 RET 532 533#else /* ARM version. */ 534 535 eor ip, dividend, divisor @ Save the sign of the result. 536 mov curbit, #1 537 mov result, #0 538 cmp divisor, #0 539 rsbmi divisor, divisor, #0 @ Loops below use unsigned. 540 beq Ldiv0 541 cmp dividend, #0 542 rsbmi dividend, dividend, #0 543 cmp dividend, divisor 544 blo Lgot_result 545 546 ARM_DIV_MOD_BODY 0 547 548 mov r0, result 549 cmp ip, #0 550 rsbmi r0, r0, #0 551 RET 552 553#endif /* ARM version */ 554 555 FUNC_END divsi3 556 557#endif /* L_divsi3 */ 558/* ------------------------------------------------------------------------ */ 559#ifdef L_modsi3 560 561 FUNC_START modsi3 562 563#ifdef __thumb__ 564 565 mov curbit, #1 566 cmp divisor, #0 567 beq Ldiv0 568 bpl Lover10 569 neg divisor, divisor @ Loops below use unsigned. 570Lover10: 571 push { work } 572 @ Need to save the sign of the dividend, unfortunately, we need 573 @ work later on. Must do this after saving the original value of 574 @ the work register, because we will pop this value off first. 575 push { dividend } 576 cmp dividend, #0 577 bpl Lover11 578 neg dividend, dividend 579Lover11: 580 cmp dividend, divisor 581 blo Lgot_result 582 583 THUMB_DIV_MOD_BODY 1 584 585 pop { work } 586 cmp work, #0 587 bpl Lover12 588 neg dividend, dividend 589Lover12: 590 pop { work } 591 RET 592 593#else /* ARM version. */ 594 595 cmp divisor, #0 596 rsbmi divisor, divisor, #0 @ Loops below use unsigned. 597 beq Ldiv0 598 @ Need to save the sign of the dividend, unfortunately, we need 599 @ ip later on; this is faster than pushing lr and using that. 600 str dividend, [sp, #-4]! 601 cmp dividend, #0 @ Test dividend against zero 602 rsbmi dividend, dividend, #0 @ If negative make positive 603 cmp dividend, divisor @ else if zero return zero 604 blo Lgot_result @ if smaller return dividend 605 mov curbit, #1 606 607 ARM_DIV_MOD_BODY 1 608 609 ldr ip, [sp], #4 610 cmp ip, #0 611 rsbmi dividend, dividend, #0 612 RET 613 614#endif /* ARM version */ 615 616 FUNC_END modsi3 617 618#endif /* L_modsi3 */ 619/* ------------------------------------------------------------------------ */ 620#ifdef L_dvmd_tls 621 622 FUNC_START div0 623 624 RET 625 626 SIZE (__div0) 627 628#endif /* L_divmodsi_tools */ 629/* ------------------------------------------------------------------------ */ 630#ifdef L_dvmd_lnx 631@ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls 632 633/* Constants taken from <asm/unistd.h> and <asm/signal.h> */ 634#define SIGFPE 8 635#define __NR_SYSCALL_BASE 0x900000 636#define __NR_getpid (__NR_SYSCALL_BASE+ 20) 637#define __NR_kill (__NR_SYSCALL_BASE+ 37) 638 639 FUNC_START div0 640 641 stmfd sp!, {r1, lr} 642 swi __NR_getpid 643 cmn r0, #1000 644 ldmhsfd sp!, {r1, pc}RETCOND @ not much we can do 645 mov r1, #SIGFPE 646 swi __NR_kill 647#ifdef __THUMB_INTERWORK__ 648 ldmfd sp!, {r1, lr} 649 bx lr 650#else 651 ldmfd sp!, {r1, pc}RETCOND 652#endif 653 654 SIZE (__div0) 655 656#endif /* L_dvmd_lnx */ 657/* ------------------------------------------------------------------------ */ 658/* These next two sections are here despite the fact that they contain Thumb 659 assembler because their presence allows interworked code to be linked even 660 when the GCC library is this one. */ 661 662/* Do not build the interworking functions when the target architecture does 663 not support Thumb instructions. (This can be a multilib option). */ 664#if defined L_call_via_rX && (defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__) 665 666/* These labels & instructions are used by the Arm/Thumb interworking code. 667 The address of function to be called is loaded into a register and then 668 one of these labels is called via a BL instruction. This puts the 669 return address into the link register with the bottom bit set, and the 670 code here switches to the correct mode before executing the function. */ 671 672 .text 673 .align 0 674 .force_thumb 675 676.macro call_via register 677 THUMB_FUNC_START _call_via_\register 678 679 bx \register 680 nop 681 682 SIZE (_call_via_\register) 683.endm 684 685 call_via r0 686 call_via r1 687 call_via r2 688 call_via r3 689 call_via r4 690 call_via r5 691 call_via r6 692 call_via r7 693 call_via r8 694 call_via r9 695 call_via sl 696 call_via fp 697 call_via ip 698 call_via sp 699 call_via lr 700 701#endif /* L_call_via_rX */ 702/* ------------------------------------------------------------------------ */ 703/* Do not build the interworking functions when the target architecture does 704 not support Thumb instructions. (This can be a multilib option). */ 705#if defined L_interwork_call_via_rX && (defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__) 706 707/* These labels & instructions are used by the Arm/Thumb interworking code, 708 when the target address is in an unknown instruction set. The address 709 of function to be called is loaded into a register and then one of these 710 labels is called via a BL instruction. This puts the return address 711 into the link register with the bottom bit set, and the code here 712 switches to the correct mode before executing the function. Unfortunately 713 the target code cannot be relied upon to return via a BX instruction, so 714 instead we have to store the resturn address on the stack and allow the 715 called function to return here instead. Upon return we recover the real 716 return address and use a BX to get back to Thumb mode. */ 717 718 .text 719 .align 0 720 721 .code 32 722 .globl _arm_return 723_arm_return: 724 ldmia r13!, {r12} 725 bx r12 726 .code 16 727 728.macro interwork register 729 .code 16 730 731 THUMB_FUNC_START _interwork_call_via_\register 732 733 bx pc 734 nop 735 736 .code 32 737 .globl .Lchange_\register 738.Lchange_\register: 739 tst \register, #1 740 stmeqdb r13!, {lr} 741 adreq lr, _arm_return 742 bx \register 743 744 SIZE (_interwork_call_via_\register) 745.endm 746 747 interwork r0 748 interwork r1 749 interwork r2 750 interwork r3 751 interwork r4 752 interwork r5 753 interwork r6 754 interwork r7 755 interwork r8 756 interwork r9 757 interwork sl 758 interwork fp 759 interwork ip 760 interwork sp 761 762 /* The LR case has to be handled a little differently... */ 763 .code 16 764 765 THUMB_FUNC_START _interwork_call_via_lr 766 767 bx pc 768 nop 769 770 .code 32 771 .globl .Lchange_lr 772.Lchange_lr: 773 tst lr, #1 774 stmeqdb r13!, {lr} 775 mov ip, lr 776 adreq lr, _arm_return 777 bx ip 778 779 SIZE (_interwork_call_via_lr) 780 781#endif /* L_interwork_call_via_rX */ 782