1@ libgcc routines for ARM cpu. 2@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) 3 4/* Copyright (C) 1995-2018 Free Software Foundation, Inc. 5 6This file is free software; you can redistribute it and/or modify it 7under the terms of the GNU General Public License as published by the 8Free Software Foundation; either version 3, or (at your option) any 9later version. 10 11This file is distributed in the hope that it will be useful, but 12WITHOUT ANY WARRANTY; without even the implied warranty of 13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14General Public License for more details. 15 16Under Section 7 of GPL version 3, you are granted additional 17permissions described in the GCC Runtime Library Exception, version 183.1, as published by the Free Software Foundation. 19 20You should have received a copy of the GNU General Public License and 21a copy of the GCC Runtime Library Exception along with this program; 22see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23<http://www.gnu.org/licenses/>. */ 24 25/* An executable stack is *not* required for these functions. */ 26#if defined(__ELF__) && defined(__linux__) 27.section .note.GNU-stack,"",%progbits 28.previous 29#endif /* __ELF__ and __linux__ */ 30 31#ifdef __ARM_EABI__ 32/* Some attributes that are common to all routines in this file. */ 33 /* Tag_ABI_align_needed: This code does not require 8-byte 34 alignment from the caller. */ 35 /* .eabi_attribute 24, 0 -- default setting. */ 36 /* Tag_ABI_align_preserved: This code preserves 8-byte 37 alignment in any callee. */ 38 .eabi_attribute 25, 1 39#endif /* __ARM_EABI__ */ 40/* ------------------------------------------------------------------------ */ 41 42/* We need to know what prefix to add to function names. */ 43 44#ifndef __USER_LABEL_PREFIX__ 45#error __USER_LABEL_PREFIX__ not defined 46#endif 47 48/* ANSI concatenation macros. */ 49 50#define CONCAT1(a, b) CONCAT2(a, b) 51#define CONCAT2(a, b) a ## b 52 53/* Use the right prefix for global labels. */ 54 55#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x) 56 57#ifdef __ELF__ 58#ifdef __thumb__ 59#define __PLT__ /* Not supported in Thumb assembler (for now). */ 60#elif defined __vxworks && !defined __PIC__ 61#define __PLT__ /* Not supported by the kernel loader. */ 62#else 63#define __PLT__ (PLT) 64#endif 65#define TYPE(x) .type SYM(x),function 66#define SIZE(x) .size SYM(x), . - SYM(x) 67#define LSYM(x) .x 68#else 69#define __PLT__ 70#define TYPE(x) 71#define SIZE(x) 72#define LSYM(x) x 73#endif 74 75/* Function end macros. Variants for interworking. */ 76 77#if defined(__ARM_ARCH_2__) 78# define __ARM_ARCH__ 2 79#endif 80 81#if defined(__ARM_ARCH_3__) 82# define __ARM_ARCH__ 3 83#endif 84 85#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \ 86 || defined(__ARM_ARCH_4T__) 87/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with 88 long multiply instructions. That includes v3M. */ 89# define __ARM_ARCH__ 4 90#endif 91 92#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \ 93 || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ 94 || defined(__ARM_ARCH_5TEJ__) 95# define __ARM_ARCH__ 5 96#endif 97 98#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ 99 || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ 100 || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \ 101 || defined(__ARM_ARCH_6M__) 102# define __ARM_ARCH__ 6 103#endif 104 105#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ 106 || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ 107 || defined(__ARM_ARCH_7EM__) 108# define __ARM_ARCH__ 7 109#endif 110 111#if defined(__ARM_ARCH_8A__) || defined(__ARM_ARCH_8M_BASE__) \ 112 || defined(__ARM_ARCH_8M_MAIN__) || defined(__ARM_ARCH_8R__) 113# define __ARM_ARCH__ 8 114#endif 115 116#ifndef __ARM_ARCH__ 117#error Unable to determine architecture. 118#endif 119 120/* There are times when we might prefer Thumb1 code even if ARM code is 121 permitted, for example, the code might be smaller, or there might be 122 interworking problems with switching to ARM state if interworking is 123 disabled. */ 124#if (defined(__thumb__) \ 125 && !defined(__thumb2__) \ 126 && (!defined(__THUMB_INTERWORK__) \ 127 || defined (__OPTIMIZE_SIZE__) \ 128 || !__ARM_ARCH_ISA_ARM)) 129# define __prefer_thumb__ 130#endif 131 132#if !__ARM_ARCH_ISA_ARM && __ARM_ARCH_ISA_THUMB == 1 133#define NOT_ISA_TARGET_32BIT 1 134#endif 135 136/* How to return from a function call depends on the architecture variant. */ 137 138#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__) 139 140# define RET bx lr 141# define RETc(x) bx##x lr 142 143/* Special precautions for interworking on armv4t. */ 144# if (__ARM_ARCH__ == 4) 145 146/* Always use bx, not ldr pc. */ 147# if (defined(__thumb__) || defined(__THUMB_INTERWORK__)) 148# define __INTERWORKING__ 149# endif /* __THUMB__ || __THUMB_INTERWORK__ */ 150 151/* Include thumb stub before arm mode code. */ 152# if defined(__thumb__) && !defined(__THUMB_INTERWORK__) 153# define __INTERWORKING_STUBS__ 154# endif /* __thumb__ && !__THUMB_INTERWORK__ */ 155 156#endif /* __ARM_ARCH == 4 */ 157 158#else 159 160# define RET mov pc, lr 161# define RETc(x) mov##x pc, lr 162 163#endif 164 165.macro cfi_pop advance, reg, cfa_offset 166#ifdef __ELF__ 167 .pushsection .debug_frame 168 .byte 0x4 /* DW_CFA_advance_loc4 */ 169 .4byte \advance 170 .byte (0xc0 | \reg) /* DW_CFA_restore */ 171 .byte 0xe /* DW_CFA_def_cfa_offset */ 172 .uleb128 \cfa_offset 173 .popsection 174#endif 175.endm 176.macro cfi_push advance, reg, offset, cfa_offset 177#ifdef __ELF__ 178 .pushsection .debug_frame 179 .byte 0x4 /* DW_CFA_advance_loc4 */ 180 .4byte \advance 181 .byte (0x80 | \reg) /* DW_CFA_offset */ 182 .uleb128 (\offset / -4) 183 .byte 0xe /* DW_CFA_def_cfa_offset */ 184 .uleb128 \cfa_offset 185 .popsection 186#endif 187.endm 188.macro cfi_start start_label, end_label 189#ifdef __ELF__ 190 .pushsection .debug_frame 191LSYM(Lstart_frame): 192 .4byte LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE 193LSYM(Lstart_cie): 194 .4byte 0xffffffff @ CIE Identifier Tag 195 .byte 0x1 @ CIE Version 196 .ascii "\0" @ CIE Augmentation 197 .uleb128 0x1 @ CIE Code Alignment Factor 198 .sleb128 -4 @ CIE Data Alignment Factor 199 .byte 0xe @ CIE RA Column 200 .byte 0xc @ DW_CFA_def_cfa 201 .uleb128 0xd 202 .uleb128 0x0 203 204 .align 2 205LSYM(Lend_cie): 206 .4byte LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length 207LSYM(Lstart_fde): 208 .4byte LSYM(Lstart_frame) @ FDE CIE offset 209 .4byte \start_label @ FDE initial location 210 .4byte \end_label-\start_label @ FDE address range 211 .popsection 212#endif 213.endm 214.macro cfi_end end_label 215#ifdef __ELF__ 216 .pushsection .debug_frame 217 .align 2 218LSYM(Lend_fde): 219 .popsection 220\end_label: 221#endif 222.endm 223 224/* Don't pass dirn, it's there just to get token pasting right. */ 225 226.macro RETLDM regs=, cond=, unwind=, dirn=ia 227#if defined (__INTERWORKING__) 228 .ifc "\regs","" 229 ldr\cond lr, [sp], #8 230 .else 231# if defined(__thumb2__) 232 pop\cond {\regs, lr} 233# else 234 ldm\cond\dirn sp!, {\regs, lr} 235# endif 236 .endif 237 .ifnc "\unwind", "" 238 /* Mark LR as restored. */ 23997: cfi_pop 97b - \unwind, 0xe, 0x0 240 .endif 241 bx\cond lr 242#else 243 /* Caller is responsible for providing IT instruction. */ 244 .ifc "\regs","" 245 ldr\cond pc, [sp], #8 246 .else 247# if defined(__thumb2__) 248 pop\cond {\regs, pc} 249# else 250 ldm\cond\dirn sp!, {\regs, pc} 251# endif 252 .endif 253#endif 254.endm 255 256/* The Unified assembly syntax allows the same code to be assembled for both 257 ARM and Thumb-2. However this is only supported by recent gas, so define 258 a set of macros to allow ARM code on older assemblers. */ 259#if defined(__thumb2__) 260.macro do_it cond, suffix="" 261 it\suffix \cond 262.endm 263.macro shift1 op, arg0, arg1, arg2 264 \op \arg0, \arg1, \arg2 265.endm 266#define do_push push 267#define do_pop pop 268#define COND(op1, op2, cond) op1 ## op2 ## cond 269/* Perform an arithmetic operation with a variable shift operand. This 270 requires two instructions and a scratch register on Thumb-2. */ 271.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp 272 \shiftop \tmp, \src2, \shiftreg 273 \name \dest, \src1, \tmp 274.endm 275#else 276.macro do_it cond, suffix="" 277.endm 278.macro shift1 op, arg0, arg1, arg2 279 mov \arg0, \arg1, \op \arg2 280.endm 281#define do_push stmfd sp!, 282#define do_pop ldmfd sp!, 283#define COND(op1, op2, cond) op1 ## cond ## op2 284.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp 285 \name \dest, \src1, \src2, \shiftop \shiftreg 286.endm 287#endif 288 289#ifdef __ARM_EABI__ 290.macro ARM_LDIV0 name signed 291 cmp r0, #0 292 .ifc \signed, unsigned 293 movne r0, #0xffffffff 294 .else 295 movgt r0, #0x7fffffff 296 movlt r0, #0x80000000 297 .endif 298 b SYM (__aeabi_idiv0) __PLT__ 299.endm 300#else 301.macro ARM_LDIV0 name signed 302 str lr, [sp, #-8]! 30398: cfi_push 98b - __\name, 0xe, -0x8, 0x8 304 bl SYM (__div0) __PLT__ 305 mov r0, #0 @ About as wrong as it could be. 306 RETLDM unwind=98b 307.endm 308#endif 309 310 311#ifdef __ARM_EABI__ 312.macro THUMB_LDIV0 name signed 313#ifdef NOT_ISA_TARGET_32BIT 314 315 push {r0, lr} 316 mov r0, #0 317 bl SYM(__aeabi_idiv0) 318 @ We know we are not on armv4t, so pop pc is safe. 319 pop {r1, pc} 320 321#elif defined(__thumb2__) 322 .syntax unified 323 .ifc \signed, unsigned 324 cbz r0, 1f 325 mov r0, #0xffffffff 3261: 327 .else 328 cmp r0, #0 329 do_it gt 330 movgt r0, #0x7fffffff 331 do_it lt 332 movlt r0, #0x80000000 333 .endif 334 b.w SYM(__aeabi_idiv0) __PLT__ 335#else 336 .align 2 337 bx pc 338 nop 339 .arm 340 cmp r0, #0 341 .ifc \signed, unsigned 342 movne r0, #0xffffffff 343 .else 344 movgt r0, #0x7fffffff 345 movlt r0, #0x80000000 346 .endif 347 b SYM(__aeabi_idiv0) __PLT__ 348 .thumb 349#endif 350.endm 351#else 352.macro THUMB_LDIV0 name signed 353 push { r1, lr } 35498: cfi_push 98b - __\name, 0xe, -0x4, 0x8 355 bl SYM (__div0) 356 mov r0, #0 @ About as wrong as it could be. 357#if defined (__INTERWORKING__) 358 pop { r1, r2 } 359 bx r2 360#else 361 pop { r1, pc } 362#endif 363.endm 364#endif 365 366.macro FUNC_END name 367 SIZE (__\name) 368.endm 369 370.macro DIV_FUNC_END name signed 371 cfi_start __\name, LSYM(Lend_div0) 372LSYM(Ldiv0): 373#ifdef __thumb__ 374 THUMB_LDIV0 \name \signed 375#else 376 ARM_LDIV0 \name \signed 377#endif 378 cfi_end LSYM(Lend_div0) 379 FUNC_END \name 380.endm 381 382.macro THUMB_FUNC_START name 383 .globl SYM (\name) 384 TYPE (\name) 385 .thumb_func 386SYM (\name): 387.endm 388 389/* Function start macros. Variants for ARM and Thumb. */ 390 391#ifdef __thumb__ 392#define THUMB_FUNC .thumb_func 393#define THUMB_CODE .force_thumb 394# if defined(__thumb2__) 395#define THUMB_SYNTAX .syntax divided 396# else 397#define THUMB_SYNTAX 398# endif 399#else 400#define THUMB_FUNC 401#define THUMB_CODE 402#define THUMB_SYNTAX 403#endif 404 405.macro FUNC_START name sp_section= 406 .ifc \sp_section, function_section 407 .section .text.__\name,"ax",%progbits 408 .else 409 .text 410 .endif 411 .globl SYM (__\name) 412 TYPE (__\name) 413 .align 0 414 THUMB_CODE 415 THUMB_FUNC 416 THUMB_SYNTAX 417SYM (__\name): 418.endm 419 420.macro ARM_SYM_START name 421 TYPE (\name) 422 .align 0 423SYM (\name): 424.endm 425 426.macro SYM_END name 427 SIZE (\name) 428.endm 429 430/* Special function that will always be coded in ARM assembly, even if 431 in Thumb-only compilation. */ 432 433#if defined(__thumb2__) 434 435/* For Thumb-2 we build everything in thumb mode. */ 436.macro ARM_FUNC_START name sp_section= 437 FUNC_START \name \sp_section 438 .syntax unified 439.endm 440#define EQUIV .thumb_set 441.macro ARM_CALL name 442 bl __\name 443.endm 444 445#elif defined(__INTERWORKING_STUBS__) 446 447.macro ARM_FUNC_START name 448 FUNC_START \name 449 bx pc 450 nop 451 .arm 452/* A hook to tell gdb that we've switched to ARM mode. Also used to call 453 directly from other local arm routines. */ 454_L__\name: 455.endm 456#define EQUIV .thumb_set 457/* Branch directly to a function declared with ARM_FUNC_START. 458 Must be called in arm mode. */ 459.macro ARM_CALL name 460 bl _L__\name 461.endm 462 463#else /* !(__INTERWORKING_STUBS__ || __thumb2__) */ 464 465#ifdef NOT_ISA_TARGET_32BIT 466#define EQUIV .thumb_set 467#else 468.macro ARM_FUNC_START name sp_section= 469 .ifc \sp_section, function_section 470 .section .text.__\name,"ax",%progbits 471 .else 472 .text 473 .endif 474 .globl SYM (__\name) 475 TYPE (__\name) 476 .align 0 477 .arm 478SYM (__\name): 479.endm 480#define EQUIV .set 481.macro ARM_CALL name 482 bl __\name 483.endm 484#endif 485 486#endif 487 488.macro FUNC_ALIAS new old 489 .globl SYM (__\new) 490#if defined (__thumb__) 491 .thumb_set SYM (__\new), SYM (__\old) 492#else 493 .set SYM (__\new), SYM (__\old) 494#endif 495.endm 496 497#ifndef NOT_ISA_TARGET_32BIT 498.macro ARM_FUNC_ALIAS new old 499 .globl SYM (__\new) 500 EQUIV SYM (__\new), SYM (__\old) 501#if defined(__INTERWORKING_STUBS__) 502 .set SYM (_L__\new), SYM (_L__\old) 503#endif 504.endm 505#endif 506 507#ifdef __ARMEB__ 508#define xxh r0 509#define xxl r1 510#define yyh r2 511#define yyl r3 512#else 513#define xxh r1 514#define xxl r0 515#define yyh r3 516#define yyl r2 517#endif 518 519#ifdef __ARM_EABI__ 520.macro WEAK name 521 .weak SYM (__\name) 522.endm 523#endif 524 525#ifdef __thumb__ 526/* Register aliases. */ 527 528work .req r4 @ XXXX is this safe ? 529dividend .req r0 530divisor .req r1 531overdone .req r2 532result .req r2 533curbit .req r3 534#endif 535#if 0 536ip .req r12 537sp .req r13 538lr .req r14 539pc .req r15 540#endif 541 542/* ------------------------------------------------------------------------ */ 543/* Bodies of the division and modulo routines. */ 544/* ------------------------------------------------------------------------ */ 545.macro ARM_DIV_BODY dividend, divisor, result, curbit 546 547#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__) 548 549#if defined (__thumb2__) 550 clz \curbit, \dividend 551 clz \result, \divisor 552 sub \curbit, \result, \curbit 553 rsb \curbit, \curbit, #31 554 adr \result, 1f 555 add \curbit, \result, \curbit, lsl #4 556 mov \result, #0 557 mov pc, \curbit 558.p2align 3 5591: 560 .set shift, 32 561 .rept 32 562 .set shift, shift - 1 563 cmp.w \dividend, \divisor, lsl #shift 564 nop.n 565 adc.w \result, \result, \result 566 it cs 567 subcs.w \dividend, \dividend, \divisor, lsl #shift 568 .endr 569#else 570 clz \curbit, \dividend 571 clz \result, \divisor 572 sub \curbit, \result, \curbit 573 rsbs \curbit, \curbit, #31 574 addne \curbit, \curbit, \curbit, lsl #1 575 mov \result, #0 576 addne pc, pc, \curbit, lsl #2 577 nop 578 .set shift, 32 579 .rept 32 580 .set shift, shift - 1 581 cmp \dividend, \divisor, lsl #shift 582 adc \result, \result, \result 583 subcs \dividend, \dividend, \divisor, lsl #shift 584 .endr 585#endif 586 587#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ 588#if __ARM_ARCH__ >= 5 589 590 clz \curbit, \divisor 591 clz \result, \dividend 592 sub \result, \curbit, \result 593 mov \curbit, #1 594 mov \divisor, \divisor, lsl \result 595 mov \curbit, \curbit, lsl \result 596 mov \result, #0 597 598#else /* __ARM_ARCH__ < 5 */ 599 600 @ Initially shift the divisor left 3 bits if possible, 601 @ set curbit accordingly. This allows for curbit to be located 602 @ at the left end of each 4-bit nibbles in the division loop 603 @ to save one loop in most cases. 604 tst \divisor, #0xe0000000 605 moveq \divisor, \divisor, lsl #3 606 moveq \curbit, #8 607 movne \curbit, #1 608 609 @ Unless the divisor is very big, shift it up in multiples of 610 @ four bits, since this is the amount of unwinding in the main 611 @ division loop. Continue shifting until the divisor is 612 @ larger than the dividend. 6131: cmp \divisor, #0x10000000 614 cmplo \divisor, \dividend 615 movlo \divisor, \divisor, lsl #4 616 movlo \curbit, \curbit, lsl #4 617 blo 1b 618 619 @ For very big divisors, we must shift it a bit at a time, or 620 @ we will be in danger of overflowing. 6211: cmp \divisor, #0x80000000 622 cmplo \divisor, \dividend 623 movlo \divisor, \divisor, lsl #1 624 movlo \curbit, \curbit, lsl #1 625 blo 1b 626 627 mov \result, #0 628 629#endif /* __ARM_ARCH__ < 5 */ 630 631 @ Division loop 6321: cmp \dividend, \divisor 633 do_it hs, t 634 subhs \dividend, \dividend, \divisor 635 orrhs \result, \result, \curbit 636 cmp \dividend, \divisor, lsr #1 637 do_it hs, t 638 subhs \dividend, \dividend, \divisor, lsr #1 639 orrhs \result, \result, \curbit, lsr #1 640 cmp \dividend, \divisor, lsr #2 641 do_it hs, t 642 subhs \dividend, \dividend, \divisor, lsr #2 643 orrhs \result, \result, \curbit, lsr #2 644 cmp \dividend, \divisor, lsr #3 645 do_it hs, t 646 subhs \dividend, \dividend, \divisor, lsr #3 647 orrhs \result, \result, \curbit, lsr #3 648 cmp \dividend, #0 @ Early termination? 649 do_it ne, t 650 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? 651 movne \divisor, \divisor, lsr #4 652 bne 1b 653 654#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ 655 656.endm 657/* ------------------------------------------------------------------------ */ 658.macro ARM_DIV2_ORDER divisor, order 659 660#if __ARM_ARCH__ >= 5 661 662 clz \order, \divisor 663 rsb \order, \order, #31 664 665#else 666 667 cmp \divisor, #(1 << 16) 668 movhs \divisor, \divisor, lsr #16 669 movhs \order, #16 670 movlo \order, #0 671 672 cmp \divisor, #(1 << 8) 673 movhs \divisor, \divisor, lsr #8 674 addhs \order, \order, #8 675 676 cmp \divisor, #(1 << 4) 677 movhs \divisor, \divisor, lsr #4 678 addhs \order, \order, #4 679 680 cmp \divisor, #(1 << 2) 681 addhi \order, \order, #3 682 addls \order, \order, \divisor, lsr #1 683 684#endif 685 686.endm 687/* ------------------------------------------------------------------------ */ 688.macro ARM_MOD_BODY dividend, divisor, order, spare 689 690#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__) 691 692 clz \order, \divisor 693 clz \spare, \dividend 694 sub \order, \order, \spare 695 rsbs \order, \order, #31 696 addne pc, pc, \order, lsl #3 697 nop 698 .set shift, 32 699 .rept 32 700 .set shift, shift - 1 701 cmp \dividend, \divisor, lsl #shift 702 subcs \dividend, \dividend, \divisor, lsl #shift 703 .endr 704 705#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ 706#if __ARM_ARCH__ >= 5 707 708 clz \order, \divisor 709 clz \spare, \dividend 710 sub \order, \order, \spare 711 mov \divisor, \divisor, lsl \order 712 713#else /* __ARM_ARCH__ < 5 */ 714 715 mov \order, #0 716 717 @ Unless the divisor is very big, shift it up in multiples of 718 @ four bits, since this is the amount of unwinding in the main 719 @ division loop. Continue shifting until the divisor is 720 @ larger than the dividend. 7211: cmp \divisor, #0x10000000 722 cmplo \divisor, \dividend 723 movlo \divisor, \divisor, lsl #4 724 addlo \order, \order, #4 725 blo 1b 726 727 @ For very big divisors, we must shift it a bit at a time, or 728 @ we will be in danger of overflowing. 7291: cmp \divisor, #0x80000000 730 cmplo \divisor, \dividend 731 movlo \divisor, \divisor, lsl #1 732 addlo \order, \order, #1 733 blo 1b 734 735#endif /* __ARM_ARCH__ < 5 */ 736 737 @ Perform all needed substractions to keep only the reminder. 738 @ Do comparisons in batch of 4 first. 739 subs \order, \order, #3 @ yes, 3 is intended here 740 blt 2f 741 7421: cmp \dividend, \divisor 743 subhs \dividend, \dividend, \divisor 744 cmp \dividend, \divisor, lsr #1 745 subhs \dividend, \dividend, \divisor, lsr #1 746 cmp \dividend, \divisor, lsr #2 747 subhs \dividend, \dividend, \divisor, lsr #2 748 cmp \dividend, \divisor, lsr #3 749 subhs \dividend, \dividend, \divisor, lsr #3 750 cmp \dividend, #1 751 mov \divisor, \divisor, lsr #4 752 subges \order, \order, #4 753 bge 1b 754 755 tst \order, #3 756 teqne \dividend, #0 757 beq 5f 758 759 @ Either 1, 2 or 3 comparison/substractions are left. 7602: cmn \order, #2 761 blt 4f 762 beq 3f 763 cmp \dividend, \divisor 764 subhs \dividend, \dividend, \divisor 765 mov \divisor, \divisor, lsr #1 7663: cmp \dividend, \divisor 767 subhs \dividend, \dividend, \divisor 768 mov \divisor, \divisor, lsr #1 7694: cmp \dividend, \divisor 770 subhs \dividend, \dividend, \divisor 7715: 772 773#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ 774 775.endm 776/* ------------------------------------------------------------------------ */ 777.macro THUMB_DIV_MOD_BODY modulo 778 @ Load the constant 0x10000000 into our work register. 779 mov work, #1 780 lsl work, #28 781LSYM(Loop1): 782 @ Unless the divisor is very big, shift it up in multiples of 783 @ four bits, since this is the amount of unwinding in the main 784 @ division loop. Continue shifting until the divisor is 785 @ larger than the dividend. 786 cmp divisor, work 787 bhs LSYM(Lbignum) 788 cmp divisor, dividend 789 bhs LSYM(Lbignum) 790 lsl divisor, #4 791 lsl curbit, #4 792 b LSYM(Loop1) 793LSYM(Lbignum): 794 @ Set work to 0x80000000 795 lsl work, #3 796LSYM(Loop2): 797 @ For very big divisors, we must shift it a bit at a time, or 798 @ we will be in danger of overflowing. 799 cmp divisor, work 800 bhs LSYM(Loop3) 801 cmp divisor, dividend 802 bhs LSYM(Loop3) 803 lsl divisor, #1 804 lsl curbit, #1 805 b LSYM(Loop2) 806LSYM(Loop3): 807 @ Test for possible subtractions ... 808 .if \modulo 809 @ ... On the final pass, this may subtract too much from the dividend, 810 @ so keep track of which subtractions are done, we can fix them up 811 @ afterwards. 812 mov overdone, #0 813 cmp dividend, divisor 814 blo LSYM(Lover1) 815 sub dividend, dividend, divisor 816LSYM(Lover1): 817 lsr work, divisor, #1 818 cmp dividend, work 819 blo LSYM(Lover2) 820 sub dividend, dividend, work 821 mov ip, curbit 822 mov work, #1 823 ror curbit, work 824 orr overdone, curbit 825 mov curbit, ip 826LSYM(Lover2): 827 lsr work, divisor, #2 828 cmp dividend, work 829 blo LSYM(Lover3) 830 sub dividend, dividend, work 831 mov ip, curbit 832 mov work, #2 833 ror curbit, work 834 orr overdone, curbit 835 mov curbit, ip 836LSYM(Lover3): 837 lsr work, divisor, #3 838 cmp dividend, work 839 blo LSYM(Lover4) 840 sub dividend, dividend, work 841 mov ip, curbit 842 mov work, #3 843 ror curbit, work 844 orr overdone, curbit 845 mov curbit, ip 846LSYM(Lover4): 847 mov ip, curbit 848 .else 849 @ ... and note which bits are done in the result. On the final pass, 850 @ this may subtract too much from the dividend, but the result will be ok, 851 @ since the "bit" will have been shifted out at the bottom. 852 cmp dividend, divisor 853 blo LSYM(Lover1) 854 sub dividend, dividend, divisor 855 orr result, result, curbit 856LSYM(Lover1): 857 lsr work, divisor, #1 858 cmp dividend, work 859 blo LSYM(Lover2) 860 sub dividend, dividend, work 861 lsr work, curbit, #1 862 orr result, work 863LSYM(Lover2): 864 lsr work, divisor, #2 865 cmp dividend, work 866 blo LSYM(Lover3) 867 sub dividend, dividend, work 868 lsr work, curbit, #2 869 orr result, work 870LSYM(Lover3): 871 lsr work, divisor, #3 872 cmp dividend, work 873 blo LSYM(Lover4) 874 sub dividend, dividend, work 875 lsr work, curbit, #3 876 orr result, work 877LSYM(Lover4): 878 .endif 879 880 cmp dividend, #0 @ Early termination? 881 beq LSYM(Lover5) 882 lsr curbit, #4 @ No, any more bits to do? 883 beq LSYM(Lover5) 884 lsr divisor, #4 885 b LSYM(Loop3) 886LSYM(Lover5): 887 .if \modulo 888 @ Any subtractions that we should not have done will be recorded in 889 @ the top three bits of "overdone". Exactly which were not needed 890 @ are governed by the position of the bit, stored in ip. 891 mov work, #0xe 892 lsl work, #28 893 and overdone, work 894 beq LSYM(Lgot_result) 895 896 @ If we terminated early, because dividend became zero, then the 897 @ bit in ip will not be in the bottom nibble, and we should not 898 @ perform the additions below. We must test for this though 899 @ (rather relying upon the TSTs to prevent the additions) since 900 @ the bit in ip could be in the top two bits which might then match 901 @ with one of the smaller RORs. 902 mov curbit, ip 903 mov work, #0x7 904 tst curbit, work 905 beq LSYM(Lgot_result) 906 907 mov curbit, ip 908 mov work, #3 909 ror curbit, work 910 tst overdone, curbit 911 beq LSYM(Lover6) 912 lsr work, divisor, #3 913 add dividend, work 914LSYM(Lover6): 915 mov curbit, ip 916 mov work, #2 917 ror curbit, work 918 tst overdone, curbit 919 beq LSYM(Lover7) 920 lsr work, divisor, #2 921 add dividend, work 922LSYM(Lover7): 923 mov curbit, ip 924 mov work, #1 925 ror curbit, work 926 tst overdone, curbit 927 beq LSYM(Lgot_result) 928 lsr work, divisor, #1 929 add dividend, work 930 .endif 931LSYM(Lgot_result): 932.endm 933 934/* If performance is preferred, the following functions are provided. */ 935#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__) 936 937/* Branch to div(n), and jump to label if curbit is lo than divisior. */ 938.macro BranchToDiv n, label 939 lsr curbit, dividend, \n 940 cmp curbit, divisor 941 blo \label 942.endm 943 944/* Body of div(n). Shift the divisor in n bits and compare the divisor 945 and dividend. Update the dividend as the substruction result. */ 946.macro DoDiv n 947 lsr curbit, dividend, \n 948 cmp curbit, divisor 949 bcc 1f 950 lsl curbit, divisor, \n 951 sub dividend, dividend, curbit 952 9531: adc result, result 954.endm 955 956/* The body of division with positive divisor. Unless the divisor is very 957 big, shift it up in multiples of four bits, since this is the amount of 958 unwinding in the main division loop. Continue shifting until the divisor 959 is larger than the dividend. */ 960.macro THUMB1_Div_Positive 961 mov result, #0 962 BranchToDiv #1, LSYM(Lthumb1_div1) 963 BranchToDiv #4, LSYM(Lthumb1_div4) 964 BranchToDiv #8, LSYM(Lthumb1_div8) 965 BranchToDiv #12, LSYM(Lthumb1_div12) 966 BranchToDiv #16, LSYM(Lthumb1_div16) 967LSYM(Lthumb1_div_large_positive): 968 mov result, #0xff 969 lsl divisor, divisor, #8 970 rev result, result 971 lsr curbit, dividend, #16 972 cmp curbit, divisor 973 blo 1f 974 asr result, #8 975 lsl divisor, divisor, #8 976 beq LSYM(Ldivbyzero_waypoint) 977 9781: lsr curbit, dividend, #12 979 cmp curbit, divisor 980 blo LSYM(Lthumb1_div12) 981 b LSYM(Lthumb1_div16) 982LSYM(Lthumb1_div_loop): 983 lsr divisor, divisor, #8 984LSYM(Lthumb1_div16): 985 Dodiv #15 986 Dodiv #14 987 Dodiv #13 988 Dodiv #12 989LSYM(Lthumb1_div12): 990 Dodiv #11 991 Dodiv #10 992 Dodiv #9 993 Dodiv #8 994 bcs LSYM(Lthumb1_div_loop) 995LSYM(Lthumb1_div8): 996 Dodiv #7 997 Dodiv #6 998 Dodiv #5 999LSYM(Lthumb1_div5): 1000 Dodiv #4 1001LSYM(Lthumb1_div4): 1002 Dodiv #3 1003LSYM(Lthumb1_div3): 1004 Dodiv #2 1005LSYM(Lthumb1_div2): 1006 Dodiv #1 1007LSYM(Lthumb1_div1): 1008 sub divisor, dividend, divisor 1009 bcs 1f 1010 cpy divisor, dividend 1011 10121: adc result, result 1013 cpy dividend, result 1014 RET 1015 1016LSYM(Ldivbyzero_waypoint): 1017 b LSYM(Ldiv0) 1018.endm 1019 1020/* The body of division with negative divisor. Similar with 1021 THUMB1_Div_Positive except that the shift steps are in multiples 1022 of six bits. */ 1023.macro THUMB1_Div_Negative 1024 lsr result, divisor, #31 1025 beq 1f 1026 neg divisor, divisor 1027 10281: asr curbit, dividend, #32 1029 bcc 2f 1030 neg dividend, dividend 1031 10322: eor curbit, result 1033 mov result, #0 1034 cpy ip, curbit 1035 BranchToDiv #4, LSYM(Lthumb1_div_negative4) 1036 BranchToDiv #8, LSYM(Lthumb1_div_negative8) 1037LSYM(Lthumb1_div_large): 1038 mov result, #0xfc 1039 lsl divisor, divisor, #6 1040 rev result, result 1041 lsr curbit, dividend, #8 1042 cmp curbit, divisor 1043 blo LSYM(Lthumb1_div_negative8) 1044 1045 lsl divisor, divisor, #6 1046 asr result, result, #6 1047 cmp curbit, divisor 1048 blo LSYM(Lthumb1_div_negative8) 1049 1050 lsl divisor, divisor, #6 1051 asr result, result, #6 1052 cmp curbit, divisor 1053 blo LSYM(Lthumb1_div_negative8) 1054 1055 lsl divisor, divisor, #6 1056 beq LSYM(Ldivbyzero_negative) 1057 asr result, result, #6 1058 b LSYM(Lthumb1_div_negative8) 1059LSYM(Lthumb1_div_negative_loop): 1060 lsr divisor, divisor, #6 1061LSYM(Lthumb1_div_negative8): 1062 DoDiv #7 1063 DoDiv #6 1064 DoDiv #5 1065 DoDiv #4 1066LSYM(Lthumb1_div_negative4): 1067 DoDiv #3 1068 DoDiv #2 1069 bcs LSYM(Lthumb1_div_negative_loop) 1070 DoDiv #1 1071 sub divisor, dividend, divisor 1072 bcs 1f 1073 cpy divisor, dividend 1074 10751: cpy curbit, ip 1076 adc result, result 1077 asr curbit, curbit, #1 1078 cpy dividend, result 1079 bcc 2f 1080 neg dividend, dividend 1081 cmp curbit, #0 1082 10832: bpl 3f 1084 neg divisor, divisor 1085 10863: RET 1087 1088LSYM(Ldivbyzero_negative): 1089 cpy curbit, ip 1090 asr curbit, curbit, #1 1091 bcc LSYM(Ldiv0) 1092 neg dividend, dividend 1093.endm 1094#endif /* ARM Thumb version. */ 1095 1096/* ------------------------------------------------------------------------ */ 1097/* Start of the Real Functions */ 1098/* ------------------------------------------------------------------------ */ 1099#ifdef L_udivsi3 1100 1101#if defined(__prefer_thumb__) 1102 1103 FUNC_START udivsi3 1104 FUNC_ALIAS aeabi_uidiv udivsi3 1105#if defined(__OPTIMIZE_SIZE__) 1106 1107 cmp divisor, #0 1108 beq LSYM(Ldiv0) 1109LSYM(udivsi3_skip_div0_test): 1110 mov curbit, #1 1111 mov result, #0 1112 1113 push { work } 1114 cmp dividend, divisor 1115 blo LSYM(Lgot_result) 1116 1117 THUMB_DIV_MOD_BODY 0 1118 1119 mov r0, result 1120 pop { work } 1121 RET 1122 1123/* Implementation of aeabi_uidiv for ARMv6m. This version is only 1124 used in ARMv6-M when we need an efficient implementation. */ 1125#else 1126LSYM(udivsi3_skip_div0_test): 1127 THUMB1_Div_Positive 1128 1129#endif /* __OPTIMIZE_SIZE__ */ 1130 1131#elif defined(__ARM_ARCH_EXT_IDIV__) 1132 1133 ARM_FUNC_START udivsi3 1134 ARM_FUNC_ALIAS aeabi_uidiv udivsi3 1135 1136 cmp r1, #0 1137 beq LSYM(Ldiv0) 1138 1139 udiv r0, r0, r1 1140 RET 1141 1142#else /* ARM version/Thumb-2. */ 1143 1144 ARM_FUNC_START udivsi3 1145 ARM_FUNC_ALIAS aeabi_uidiv udivsi3 1146 1147 /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily 1148 check for division-by-zero a second time. */ 1149LSYM(udivsi3_skip_div0_test): 1150 subs r2, r1, #1 1151 do_it eq 1152 RETc(eq) 1153 bcc LSYM(Ldiv0) 1154 cmp r0, r1 1155 bls 11f 1156 tst r1, r2 1157 beq 12f 1158 1159 ARM_DIV_BODY r0, r1, r2, r3 1160 1161 mov r0, r2 1162 RET 1163 116411: do_it eq, e 1165 moveq r0, #1 1166 movne r0, #0 1167 RET 1168 116912: ARM_DIV2_ORDER r1, r2 1170 1171 mov r0, r0, lsr r2 1172 RET 1173 1174#endif /* ARM version */ 1175 1176 DIV_FUNC_END udivsi3 unsigned 1177 1178#if defined(__prefer_thumb__) 1179FUNC_START aeabi_uidivmod 1180 cmp r1, #0 1181 beq LSYM(Ldiv0) 1182# if defined(__OPTIMIZE_SIZE__) 1183 push {r0, r1, lr} 1184 bl LSYM(udivsi3_skip_div0_test) 1185 POP {r1, r2, r3} 1186 mul r2, r0 1187 sub r1, r1, r2 1188 bx r3 1189# else 1190 /* Both the quotient and remainder are calculated simultaneously 1191 in THUMB1_Div_Positive. There is no need to calculate the 1192 remainder again here. */ 1193 b LSYM(udivsi3_skip_div0_test) 1194 RET 1195# endif /* __OPTIMIZE_SIZE__ */ 1196 1197#elif defined(__ARM_ARCH_EXT_IDIV__) 1198ARM_FUNC_START aeabi_uidivmod 1199 cmp r1, #0 1200 beq LSYM(Ldiv0) 1201 mov r2, r0 1202 udiv r0, r0, r1 1203 mls r1, r0, r1, r2 1204 RET 1205#else 1206ARM_FUNC_START aeabi_uidivmod 1207 cmp r1, #0 1208 beq LSYM(Ldiv0) 1209 stmfd sp!, { r0, r1, lr } 1210 bl LSYM(udivsi3_skip_div0_test) 1211 ldmfd sp!, { r1, r2, lr } 1212 mul r3, r2, r0 1213 sub r1, r1, r3 1214 RET 1215#endif 1216 FUNC_END aeabi_uidivmod 1217 1218#endif /* L_udivsi3 */ 1219/* ------------------------------------------------------------------------ */ 1220#ifdef L_umodsi3 1221 1222#if defined(__ARM_ARCH_EXT_IDIV__) && __ARM_ARCH_ISA_THUMB != 1 1223 1224 ARM_FUNC_START umodsi3 1225 1226 cmp r1, #0 1227 beq LSYM(Ldiv0) 1228 udiv r2, r0, r1 1229 mls r0, r1, r2, r0 1230 RET 1231 1232#elif defined(__thumb__) 1233 1234 FUNC_START umodsi3 1235 1236 cmp divisor, #0 1237 beq LSYM(Ldiv0) 1238 mov curbit, #1 1239 cmp dividend, divisor 1240 bhs LSYM(Lover10) 1241 RET 1242 1243LSYM(Lover10): 1244 push { work } 1245 1246 THUMB_DIV_MOD_BODY 1 1247 1248 pop { work } 1249 RET 1250 1251#else /* ARM version. */ 1252 1253 FUNC_START umodsi3 1254 1255 subs r2, r1, #1 @ compare divisor with 1 1256 bcc LSYM(Ldiv0) 1257 cmpne r0, r1 @ compare dividend with divisor 1258 moveq r0, #0 1259 tsthi r1, r2 @ see if divisor is power of 2 1260 andeq r0, r0, r2 1261 RETc(ls) 1262 1263 ARM_MOD_BODY r0, r1, r2, r3 1264 1265 RET 1266 1267#endif /* ARM version. */ 1268 1269 DIV_FUNC_END umodsi3 unsigned 1270 1271#endif /* L_umodsi3 */ 1272/* ------------------------------------------------------------------------ */ 1273#ifdef L_divsi3 1274 1275#if defined(__prefer_thumb__) 1276 1277 FUNC_START divsi3 1278 FUNC_ALIAS aeabi_idiv divsi3 1279#if defined(__OPTIMIZE_SIZE__) 1280 1281 cmp divisor, #0 1282 beq LSYM(Ldiv0) 1283LSYM(divsi3_skip_div0_test): 1284 push { work } 1285 mov work, dividend 1286 eor work, divisor @ Save the sign of the result. 1287 mov ip, work 1288 mov curbit, #1 1289 mov result, #0 1290 cmp divisor, #0 1291 bpl LSYM(Lover10) 1292 neg divisor, divisor @ Loops below use unsigned. 1293LSYM(Lover10): 1294 cmp dividend, #0 1295 bpl LSYM(Lover11) 1296 neg dividend, dividend 1297LSYM(Lover11): 1298 cmp dividend, divisor 1299 blo LSYM(Lgot_result) 1300 1301 THUMB_DIV_MOD_BODY 0 1302 1303 mov r0, result 1304 mov work, ip 1305 cmp work, #0 1306 bpl LSYM(Lover12) 1307 neg r0, r0 1308LSYM(Lover12): 1309 pop { work } 1310 RET 1311 1312/* Implementation of aeabi_idiv for ARMv6m. This version is only 1313 used in ARMv6-M when we need an efficient implementation. */ 1314#else 1315LSYM(divsi3_skip_div0_test): 1316 cpy curbit, dividend 1317 orr curbit, divisor 1318 bmi LSYM(Lthumb1_div_negative) 1319 1320LSYM(Lthumb1_div_positive): 1321 THUMB1_Div_Positive 1322 1323LSYM(Lthumb1_div_negative): 1324 THUMB1_Div_Negative 1325 1326#endif /* __OPTIMIZE_SIZE__ */ 1327 1328#elif defined(__ARM_ARCH_EXT_IDIV__) 1329 1330 ARM_FUNC_START divsi3 1331 ARM_FUNC_ALIAS aeabi_idiv divsi3 1332 1333 cmp r1, #0 1334 beq LSYM(Ldiv0) 1335 sdiv r0, r0, r1 1336 RET 1337 1338#else /* ARM/Thumb-2 version. */ 1339 1340 ARM_FUNC_START divsi3 1341 ARM_FUNC_ALIAS aeabi_idiv divsi3 1342 1343 cmp r1, #0 1344 beq LSYM(Ldiv0) 1345LSYM(divsi3_skip_div0_test): 1346 eor ip, r0, r1 @ save the sign of the result. 1347 do_it mi 1348 rsbmi r1, r1, #0 @ loops below use unsigned. 1349 subs r2, r1, #1 @ division by 1 or -1 ? 1350 beq 10f 1351 movs r3, r0 1352 do_it mi 1353 rsbmi r3, r0, #0 @ positive dividend value 1354 cmp r3, r1 1355 bls 11f 1356 tst r1, r2 @ divisor is power of 2 ? 1357 beq 12f 1358 1359 ARM_DIV_BODY r3, r1, r0, r2 1360 1361 cmp ip, #0 1362 do_it mi 1363 rsbmi r0, r0, #0 1364 RET 1365 136610: teq ip, r0 @ same sign ? 1367 do_it mi 1368 rsbmi r0, r0, #0 1369 RET 1370 137111: do_it lo 1372 movlo r0, #0 1373 do_it eq,t 1374 moveq r0, ip, asr #31 1375 orreq r0, r0, #1 1376 RET 1377 137812: ARM_DIV2_ORDER r1, r2 1379 1380 cmp ip, #0 1381 mov r0, r3, lsr r2 1382 do_it mi 1383 rsbmi r0, r0, #0 1384 RET 1385 1386#endif /* ARM version */ 1387 1388 DIV_FUNC_END divsi3 signed 1389 1390#if defined(__prefer_thumb__) 1391FUNC_START aeabi_idivmod 1392 cmp r1, #0 1393 beq LSYM(Ldiv0) 1394# if defined(__OPTIMIZE_SIZE__) 1395 push {r0, r1, lr} 1396 bl LSYM(divsi3_skip_div0_test) 1397 POP {r1, r2, r3} 1398 mul r2, r0 1399 sub r1, r1, r2 1400 bx r3 1401# else 1402 /* Both the quotient and remainder are calculated simultaneously 1403 in THUMB1_Div_Positive and THUMB1_Div_Negative. There is no 1404 need to calculate the remainder again here. */ 1405 b LSYM(divsi3_skip_div0_test) 1406 RET 1407# endif /* __OPTIMIZE_SIZE__ */ 1408 1409#elif defined(__ARM_ARCH_EXT_IDIV__) 1410ARM_FUNC_START aeabi_idivmod 1411 cmp r1, #0 1412 beq LSYM(Ldiv0) 1413 mov r2, r0 1414 sdiv r0, r0, r1 1415 mls r1, r0, r1, r2 1416 RET 1417#else 1418ARM_FUNC_START aeabi_idivmod 1419 cmp r1, #0 1420 beq LSYM(Ldiv0) 1421 stmfd sp!, { r0, r1, lr } 1422 bl LSYM(divsi3_skip_div0_test) 1423 ldmfd sp!, { r1, r2, lr } 1424 mul r3, r2, r0 1425 sub r1, r1, r3 1426 RET 1427#endif 1428 FUNC_END aeabi_idivmod 1429 1430#endif /* L_divsi3 */ 1431/* ------------------------------------------------------------------------ */ 1432#ifdef L_modsi3 1433 1434#if defined(__ARM_ARCH_EXT_IDIV__) && __ARM_ARCH_ISA_THUMB != 1 1435 1436 ARM_FUNC_START modsi3 1437 1438 cmp r1, #0 1439 beq LSYM(Ldiv0) 1440 1441 sdiv r2, r0, r1 1442 mls r0, r1, r2, r0 1443 RET 1444 1445#elif defined(__thumb__) 1446 1447 FUNC_START modsi3 1448 1449 mov curbit, #1 1450 cmp divisor, #0 1451 beq LSYM(Ldiv0) 1452 bpl LSYM(Lover10) 1453 neg divisor, divisor @ Loops below use unsigned. 1454LSYM(Lover10): 1455 push { work } 1456 @ Need to save the sign of the dividend, unfortunately, we need 1457 @ work later on. Must do this after saving the original value of 1458 @ the work register, because we will pop this value off first. 1459 push { dividend } 1460 cmp dividend, #0 1461 bpl LSYM(Lover11) 1462 neg dividend, dividend 1463LSYM(Lover11): 1464 cmp dividend, divisor 1465 blo LSYM(Lgot_result) 1466 1467 THUMB_DIV_MOD_BODY 1 1468 1469 pop { work } 1470 cmp work, #0 1471 bpl LSYM(Lover12) 1472 neg dividend, dividend 1473LSYM(Lover12): 1474 pop { work } 1475 RET 1476 1477#else /* ARM version. */ 1478 1479 FUNC_START modsi3 1480 1481 cmp r1, #0 1482 beq LSYM(Ldiv0) 1483 rsbmi r1, r1, #0 @ loops below use unsigned. 1484 movs ip, r0 @ preserve sign of dividend 1485 rsbmi r0, r0, #0 @ if negative make positive 1486 subs r2, r1, #1 @ compare divisor with 1 1487 cmpne r0, r1 @ compare dividend with divisor 1488 moveq r0, #0 1489 tsthi r1, r2 @ see if divisor is power of 2 1490 andeq r0, r0, r2 1491 bls 10f 1492 1493 ARM_MOD_BODY r0, r1, r2, r3 1494 149510: cmp ip, #0 1496 rsbmi r0, r0, #0 1497 RET 1498 1499#endif /* ARM version */ 1500 1501 DIV_FUNC_END modsi3 signed 1502 1503#endif /* L_modsi3 */ 1504/* ------------------------------------------------------------------------ */ 1505#ifdef L_dvmd_tls 1506 1507#ifdef __ARM_EABI__ 1508 WEAK aeabi_idiv0 1509 WEAK aeabi_ldiv0 1510 FUNC_START aeabi_idiv0 1511 FUNC_START aeabi_ldiv0 1512 RET 1513 FUNC_END aeabi_ldiv0 1514 FUNC_END aeabi_idiv0 1515#else 1516 FUNC_START div0 1517 RET 1518 FUNC_END div0 1519#endif 1520 1521#endif /* L_divmodsi_tools */ 1522/* ------------------------------------------------------------------------ */ 1523#ifdef L_dvmd_lnx 1524@ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls 1525 1526/* Constant taken from <asm/signal.h>. */ 1527#define SIGFPE 8 1528 1529#ifdef __ARM_EABI__ 1530 cfi_start __aeabi_ldiv0, LSYM(Lend_aeabi_ldiv0) 1531 WEAK aeabi_idiv0 1532 WEAK aeabi_ldiv0 1533 ARM_FUNC_START aeabi_idiv0 1534 ARM_FUNC_START aeabi_ldiv0 1535 do_push {r1, lr} 153698: cfi_push 98b - __aeabi_ldiv0, 0xe, -0x4, 0x8 1537#else 1538 cfi_start __div0, LSYM(Lend_div0) 1539 ARM_FUNC_START div0 1540 do_push {r1, lr} 154198: cfi_push 98b - __div0, 0xe, -0x4, 0x8 1542#endif 1543 1544 mov r0, #SIGFPE 1545 bl SYM(raise) __PLT__ 1546 RETLDM r1 unwind=98b 1547 1548#ifdef __ARM_EABI__ 1549 cfi_end LSYM(Lend_aeabi_ldiv0) 1550 FUNC_END aeabi_ldiv0 1551 FUNC_END aeabi_idiv0 1552#else 1553 cfi_end LSYM(Lend_div0) 1554 FUNC_END div0 1555#endif 1556 1557#endif /* L_dvmd_lnx */ 1558#ifdef L_clear_cache 1559#if defined __ARM_EABI__ && defined __linux__ 1560@ EABI GNU/Linux call to cacheflush syscall. 1561 ARM_FUNC_START clear_cache 1562 do_push {r7} 1563#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__) 1564 movw r7, #2 1565 movt r7, #0xf 1566#else 1567 mov r7, #0xf0000 1568 add r7, r7, #2 1569#endif 1570 mov r2, #0 1571 swi 0 1572 do_pop {r7} 1573 RET 1574 FUNC_END clear_cache 1575#else 1576#error "This is only for ARM EABI GNU/Linux" 1577#endif 1578#endif /* L_clear_cache */ 1579/* ------------------------------------------------------------------------ */ 1580/* Dword shift operations. */ 1581/* All the following Dword shift variants rely on the fact that 1582 shft xxx, Reg 1583 is in fact done as 1584 shft xxx, (Reg & 255) 1585 so for Reg value in (32...63) and (-1...-31) we will get zero (in the 1586 case of logical shifts) or the sign (for asr). */ 1587 1588#ifdef __ARMEB__ 1589#define al r1 1590#define ah r0 1591#else 1592#define al r0 1593#define ah r1 1594#endif 1595 1596/* Prevent __aeabi double-word shifts from being produced on SymbianOS. */ 1597#ifndef __symbian__ 1598 1599#ifdef L_lshrdi3 1600 1601 FUNC_START lshrdi3 1602 FUNC_ALIAS aeabi_llsr lshrdi3 1603 1604#ifdef __thumb__ 1605 lsr al, r2 1606 mov r3, ah 1607 lsr ah, r2 1608 mov ip, r3 1609 sub r2, #32 1610 lsr r3, r2 1611 orr al, r3 1612 neg r2, r2 1613 mov r3, ip 1614 lsl r3, r2 1615 orr al, r3 1616 RET 1617#else 1618 subs r3, r2, #32 1619 rsb ip, r2, #32 1620 movmi al, al, lsr r2 1621 movpl al, ah, lsr r3 1622 orrmi al, al, ah, lsl ip 1623 mov ah, ah, lsr r2 1624 RET 1625#endif 1626 FUNC_END aeabi_llsr 1627 FUNC_END lshrdi3 1628 1629#endif 1630 1631#ifdef L_ashrdi3 1632 1633 FUNC_START ashrdi3 1634 FUNC_ALIAS aeabi_lasr ashrdi3 1635 1636#ifdef __thumb__ 1637 lsr al, r2 1638 mov r3, ah 1639 asr ah, r2 1640 sub r2, #32 1641 @ If r2 is negative at this point the following step would OR 1642 @ the sign bit into all of AL. That's not what we want... 1643 bmi 1f 1644 mov ip, r3 1645 asr r3, r2 1646 orr al, r3 1647 mov r3, ip 16481: 1649 neg r2, r2 1650 lsl r3, r2 1651 orr al, r3 1652 RET 1653#else 1654 subs r3, r2, #32 1655 rsb ip, r2, #32 1656 movmi al, al, lsr r2 1657 movpl al, ah, asr r3 1658 orrmi al, al, ah, lsl ip 1659 mov ah, ah, asr r2 1660 RET 1661#endif 1662 1663 FUNC_END aeabi_lasr 1664 FUNC_END ashrdi3 1665 1666#endif 1667 1668#ifdef L_ashldi3 1669 1670 FUNC_START ashldi3 1671 FUNC_ALIAS aeabi_llsl ashldi3 1672 1673#ifdef __thumb__ 1674 lsl ah, r2 1675 mov r3, al 1676 lsl al, r2 1677 mov ip, r3 1678 sub r2, #32 1679 lsl r3, r2 1680 orr ah, r3 1681 neg r2, r2 1682 mov r3, ip 1683 lsr r3, r2 1684 orr ah, r3 1685 RET 1686#else 1687 subs r3, r2, #32 1688 rsb ip, r2, #32 1689 movmi ah, ah, lsl r2 1690 movpl ah, al, lsl r3 1691 orrmi ah, ah, al, lsr ip 1692 mov al, al, lsl r2 1693 RET 1694#endif 1695 FUNC_END aeabi_llsl 1696 FUNC_END ashldi3 1697 1698#endif 1699 1700#endif /* __symbian__ */ 1701 1702#if (__ARM_ARCH_ISA_THUMB == 2 \ 1703 || (__ARM_ARCH_ISA_ARM \ 1704 && (__ARM_ARCH__ > 5 \ 1705 || (__ARM_ARCH__ == 5 && __ARM_ARCH_ISA_THUMB)))) 1706#define HAVE_ARM_CLZ 1 1707#endif 1708 1709#ifdef L_clzsi2 1710#ifdef NOT_ISA_TARGET_32BIT 1711FUNC_START clzsi2 1712 mov r1, #28 1713 mov r3, #1 1714 lsl r3, r3, #16 1715 cmp r0, r3 /* 0x10000 */ 1716 bcc 2f 1717 lsr r0, r0, #16 1718 sub r1, r1, #16 17192: lsr r3, r3, #8 1720 cmp r0, r3 /* #0x100 */ 1721 bcc 2f 1722 lsr r0, r0, #8 1723 sub r1, r1, #8 17242: lsr r3, r3, #4 1725 cmp r0, r3 /* #0x10 */ 1726 bcc 2f 1727 lsr r0, r0, #4 1728 sub r1, r1, #4 17292: adr r2, 1f 1730 ldrb r0, [r2, r0] 1731 add r0, r0, r1 1732 bx lr 1733.align 2 17341: 1735.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 1736 FUNC_END clzsi2 1737#else 1738ARM_FUNC_START clzsi2 1739# if defined(HAVE_ARM_CLZ) 1740 clz r0, r0 1741 RET 1742# else 1743 mov r1, #28 1744 cmp r0, #0x10000 1745 do_it cs, t 1746 movcs r0, r0, lsr #16 1747 subcs r1, r1, #16 1748 cmp r0, #0x100 1749 do_it cs, t 1750 movcs r0, r0, lsr #8 1751 subcs r1, r1, #8 1752 cmp r0, #0x10 1753 do_it cs, t 1754 movcs r0, r0, lsr #4 1755 subcs r1, r1, #4 1756 adr r2, 1f 1757 ldrb r0, [r2, r0] 1758 add r0, r0, r1 1759 RET 1760.align 2 17611: 1762.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 1763# endif /* !HAVE_ARM_CLZ */ 1764 FUNC_END clzsi2 1765#endif 1766#endif /* L_clzsi2 */ 1767 1768#ifdef L_clzdi2 1769#if !defined(HAVE_ARM_CLZ) 1770 1771# ifdef NOT_ISA_TARGET_32BIT 1772FUNC_START clzdi2 1773 push {r4, lr} 1774# else 1775ARM_FUNC_START clzdi2 1776 do_push {r4, lr} 1777# endif 1778 cmp xxh, #0 1779 bne 1f 1780# ifdef __ARMEB__ 1781 mov r0, xxl 1782 bl __clzsi2 1783 add r0, r0, #32 1784 b 2f 17851: 1786 bl __clzsi2 1787# else 1788 bl __clzsi2 1789 add r0, r0, #32 1790 b 2f 17911: 1792 mov r0, xxh 1793 bl __clzsi2 1794# endif 17952: 1796# ifdef NOT_ISA_TARGET_32BIT 1797 pop {r4, pc} 1798# else 1799 RETLDM r4 1800# endif 1801 FUNC_END clzdi2 1802 1803#else /* HAVE_ARM_CLZ */ 1804 1805ARM_FUNC_START clzdi2 1806 cmp xxh, #0 1807 do_it eq, et 1808 clzeq r0, xxl 1809 clzne r0, xxh 1810 addeq r0, r0, #32 1811 RET 1812 FUNC_END clzdi2 1813 1814#endif 1815#endif /* L_clzdi2 */ 1816 1817#ifdef L_ctzsi2 1818#ifdef NOT_ISA_TARGET_32BIT 1819FUNC_START ctzsi2 1820 neg r1, r0 1821 and r0, r0, r1 1822 mov r1, #28 1823 mov r3, #1 1824 lsl r3, r3, #16 1825 cmp r0, r3 /* 0x10000 */ 1826 bcc 2f 1827 lsr r0, r0, #16 1828 sub r1, r1, #16 18292: lsr r3, r3, #8 1830 cmp r0, r3 /* #0x100 */ 1831 bcc 2f 1832 lsr r0, r0, #8 1833 sub r1, r1, #8 18342: lsr r3, r3, #4 1835 cmp r0, r3 /* #0x10 */ 1836 bcc 2f 1837 lsr r0, r0, #4 1838 sub r1, r1, #4 18392: adr r2, 1f 1840 ldrb r0, [r2, r0] 1841 sub r0, r0, r1 1842 bx lr 1843.align 2 18441: 1845.byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31 1846 FUNC_END ctzsi2 1847#else 1848ARM_FUNC_START ctzsi2 1849 rsb r1, r0, #0 1850 and r0, r0, r1 1851# if defined(HAVE_ARM_CLZ) 1852 clz r0, r0 1853 rsb r0, r0, #31 1854 RET 1855# else 1856 mov r1, #28 1857 cmp r0, #0x10000 1858 do_it cs, t 1859 movcs r0, r0, lsr #16 1860 subcs r1, r1, #16 1861 cmp r0, #0x100 1862 do_it cs, t 1863 movcs r0, r0, lsr #8 1864 subcs r1, r1, #8 1865 cmp r0, #0x10 1866 do_it cs, t 1867 movcs r0, r0, lsr #4 1868 subcs r1, r1, #4 1869 adr r2, 1f 1870 ldrb r0, [r2, r0] 1871 sub r0, r0, r1 1872 RET 1873.align 2 18741: 1875.byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31 1876# endif /* !HAVE_ARM_CLZ */ 1877 FUNC_END ctzsi2 1878#endif 1879#endif /* L_clzsi2 */ 1880 1881/* ------------------------------------------------------------------------ */ 1882/* These next two sections are here despite the fact that they contain Thumb 1883 assembler because their presence allows interworked code to be linked even 1884 when the GCC library is this one. */ 1885 1886/* Do not build the interworking functions when the target architecture does 1887 not support Thumb instructions. (This can be a multilib option). */ 1888#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\ 1889 || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \ 1890 || __ARM_ARCH__ >= 6 1891 1892#if defined L_call_via_rX 1893 1894/* These labels & instructions are used by the Arm/Thumb interworking code. 1895 The address of function to be called is loaded into a register and then 1896 one of these labels is called via a BL instruction. This puts the 1897 return address into the link register with the bottom bit set, and the 1898 code here switches to the correct mode before executing the function. */ 1899 1900 .text 1901 .align 0 1902 .force_thumb 1903 1904.macro call_via register 1905 THUMB_FUNC_START _call_via_\register 1906 1907 bx \register 1908 nop 1909 1910 SIZE (_call_via_\register) 1911.endm 1912 1913 call_via r0 1914 call_via r1 1915 call_via r2 1916 call_via r3 1917 call_via r4 1918 call_via r5 1919 call_via r6 1920 call_via r7 1921 call_via r8 1922 call_via r9 1923 call_via sl 1924 call_via fp 1925 call_via ip 1926 call_via sp 1927 call_via lr 1928 1929#endif /* L_call_via_rX */ 1930 1931/* Don't bother with the old interworking routines for Thumb-2. */ 1932/* ??? Maybe only omit these on "m" variants. */ 1933#if !defined(__thumb2__) && __ARM_ARCH_ISA_ARM 1934 1935#if defined L_interwork_call_via_rX 1936 1937/* These labels & instructions are used by the Arm/Thumb interworking code, 1938 when the target address is in an unknown instruction set. The address 1939 of function to be called is loaded into a register and then one of these 1940 labels is called via a BL instruction. This puts the return address 1941 into the link register with the bottom bit set, and the code here 1942 switches to the correct mode before executing the function. Unfortunately 1943 the target code cannot be relied upon to return via a BX instruction, so 1944 instead we have to store the resturn address on the stack and allow the 1945 called function to return here instead. Upon return we recover the real 1946 return address and use a BX to get back to Thumb mode. 1947 1948 There are three variations of this code. The first, 1949 _interwork_call_via_rN(), will push the return address onto the 1950 stack and pop it in _arm_return(). It should only be used if all 1951 arguments are passed in registers. 1952 1953 The second, _interwork_r7_call_via_rN(), instead stores the return 1954 address at [r7, #-4]. It is the caller's responsibility to ensure 1955 that this address is valid and contains no useful data. 1956 1957 The third, _interwork_r11_call_via_rN(), works in the same way but 1958 uses r11 instead of r7. It is useful if the caller does not really 1959 need a frame pointer. */ 1960 1961 .text 1962 .align 0 1963 1964 .code 32 1965 .globl _arm_return 1966LSYM(Lstart_arm_return): 1967 cfi_start LSYM(Lstart_arm_return) LSYM(Lend_arm_return) 1968 cfi_push 0, 0xe, -0x8, 0x8 1969 nop @ This nop is for the benefit of debuggers, so that 1970 @ backtraces will use the correct unwind information. 1971_arm_return: 1972 RETLDM unwind=LSYM(Lstart_arm_return) 1973 cfi_end LSYM(Lend_arm_return) 1974 1975 .globl _arm_return_r7 1976_arm_return_r7: 1977 ldr lr, [r7, #-4] 1978 bx lr 1979 1980 .globl _arm_return_r11 1981_arm_return_r11: 1982 ldr lr, [r11, #-4] 1983 bx lr 1984 1985.macro interwork_with_frame frame, register, name, return 1986 .code 16 1987 1988 THUMB_FUNC_START \name 1989 1990 bx pc 1991 nop 1992 1993 .code 32 1994 tst \register, #1 1995 streq lr, [\frame, #-4] 1996 adreq lr, _arm_return_\frame 1997 bx \register 1998 1999 SIZE (\name) 2000.endm 2001 2002.macro interwork register 2003 .code 16 2004 2005 THUMB_FUNC_START _interwork_call_via_\register 2006 2007 bx pc 2008 nop 2009 2010 .code 32 2011 .globl LSYM(Lchange_\register) 2012LSYM(Lchange_\register): 2013 tst \register, #1 2014 streq lr, [sp, #-8]! 2015 adreq lr, _arm_return 2016 bx \register 2017 2018 SIZE (_interwork_call_via_\register) 2019 2020 interwork_with_frame r7,\register,_interwork_r7_call_via_\register 2021 interwork_with_frame r11,\register,_interwork_r11_call_via_\register 2022.endm 2023 2024 interwork r0 2025 interwork r1 2026 interwork r2 2027 interwork r3 2028 interwork r4 2029 interwork r5 2030 interwork r6 2031 interwork r7 2032 interwork r8 2033 interwork r9 2034 interwork sl 2035 interwork fp 2036 interwork ip 2037 interwork sp 2038 2039 /* The LR case has to be handled a little differently... */ 2040 .code 16 2041 2042 THUMB_FUNC_START _interwork_call_via_lr 2043 2044 bx pc 2045 nop 2046 2047 .code 32 2048 .globl .Lchange_lr 2049.Lchange_lr: 2050 tst lr, #1 2051 stmeqdb r13!, {lr, pc} 2052 mov ip, lr 2053 adreq lr, _arm_return 2054 bx ip 2055 2056 SIZE (_interwork_call_via_lr) 2057 2058#endif /* L_interwork_call_via_rX */ 2059#endif /* !__thumb2__ */ 2060 2061/* Functions to support compact pic switch tables in thumb1 state. 2062 All these routines take an index into the table in r0. The 2063 table is at LR & ~1 (but this must be rounded up in the case 2064 of 32-bit entires). They are only permitted to clobber r12 2065 and r14 and r0 must be preserved on exit. */ 2066#ifdef L_thumb1_case_sqi 2067 2068 .text 2069 .align 0 2070 .force_thumb 2071 .syntax unified 2072 THUMB_FUNC_START __gnu_thumb1_case_sqi 2073 push {r1} 2074 mov r1, lr 2075 lsrs r1, r1, #1 2076 lsls r1, r1, #1 2077 ldrsb r1, [r1, r0] 2078 lsls r1, r1, #1 2079 add lr, lr, r1 2080 pop {r1} 2081 bx lr 2082 SIZE (__gnu_thumb1_case_sqi) 2083#endif 2084 2085#ifdef L_thumb1_case_uqi 2086 2087 .text 2088 .align 0 2089 .force_thumb 2090 .syntax unified 2091 THUMB_FUNC_START __gnu_thumb1_case_uqi 2092 push {r1} 2093 mov r1, lr 2094 lsrs r1, r1, #1 2095 lsls r1, r1, #1 2096 ldrb r1, [r1, r0] 2097 lsls r1, r1, #1 2098 add lr, lr, r1 2099 pop {r1} 2100 bx lr 2101 SIZE (__gnu_thumb1_case_uqi) 2102#endif 2103 2104#ifdef L_thumb1_case_shi 2105 2106 .text 2107 .align 0 2108 .force_thumb 2109 .syntax unified 2110 THUMB_FUNC_START __gnu_thumb1_case_shi 2111 push {r0, r1} 2112 mov r1, lr 2113 lsrs r1, r1, #1 2114 lsls r0, r0, #1 2115 lsls r1, r1, #1 2116 ldrsh r1, [r1, r0] 2117 lsls r1, r1, #1 2118 add lr, lr, r1 2119 pop {r0, r1} 2120 bx lr 2121 SIZE (__gnu_thumb1_case_shi) 2122#endif 2123 2124#ifdef L_thumb1_case_uhi 2125 2126 .text 2127 .align 0 2128 .force_thumb 2129 .syntax unified 2130 THUMB_FUNC_START __gnu_thumb1_case_uhi 2131 push {r0, r1} 2132 mov r1, lr 2133 lsrs r1, r1, #1 2134 lsls r0, r0, #1 2135 lsls r1, r1, #1 2136 ldrh r1, [r1, r0] 2137 lsls r1, r1, #1 2138 add lr, lr, r1 2139 pop {r0, r1} 2140 bx lr 2141 SIZE (__gnu_thumb1_case_uhi) 2142#endif 2143 2144#ifdef L_thumb1_case_si 2145 2146 .text 2147 .align 0 2148 .force_thumb 2149 .syntax unified 2150 THUMB_FUNC_START __gnu_thumb1_case_si 2151 push {r0, r1} 2152 mov r1, lr 2153 adds.n r1, r1, #2 /* Align to word. */ 2154 lsrs r1, r1, #2 2155 lsls r0, r0, #2 2156 lsls r1, r1, #2 2157 ldr r0, [r1, r0] 2158 adds r0, r0, r1 2159 mov lr, r0 2160 pop {r0, r1} 2161 mov pc, lr /* We know we were called from thumb code. */ 2162 SIZE (__gnu_thumb1_case_si) 2163#endif 2164 2165#endif /* Arch supports thumb. */ 2166 2167.macro CFI_START_FUNCTION 2168 .cfi_startproc 2169 .cfi_remember_state 2170.endm 2171 2172.macro CFI_END_FUNCTION 2173 .cfi_restore_state 2174 .cfi_endproc 2175.endm 2176 2177#ifndef __symbian__ 2178/* The condition here must match the one in gcc/config/arm/elf.h. */ 2179#ifndef NOT_ISA_TARGET_32BIT 2180#include "ieee754-df.S" 2181#include "ieee754-sf.S" 2182#include "bpabi.S" 2183#else /* NOT_ISA_TARGET_32BIT */ 2184#include "bpabi-v6m.S" 2185#endif /* NOT_ISA_TARGET_32BIT */ 2186#endif /* !__symbian__ */ 2187