1@ libgcc routines for ARM cpu. 2@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) 3 4/* Copyright (C) 1995-2016 Free Software Foundation, Inc. 5 6This file is free software; you can redistribute it and/or modify it 7under the terms of the GNU General Public License as published by the 8Free Software Foundation; either version 3, or (at your option) any 9later version. 10 11This file is distributed in the hope that it will be useful, but 12WITHOUT ANY WARRANTY; without even the implied warranty of 13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14General Public License for more details. 15 16Under Section 7 of GPL version 3, you are granted additional 17permissions described in the GCC Runtime Library Exception, version 183.1, as published by the Free Software Foundation. 19 20You should have received a copy of the GNU General Public License and 21a copy of the GCC Runtime Library Exception along with this program; 22see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23<http://www.gnu.org/licenses/>. */ 24 25/* An executable stack is *not* required for these functions. */ 26#if defined(__ELF__) && defined(__linux__) 27.section .note.GNU-stack,"",%progbits 28.previous 29#endif /* __ELF__ and __linux__ */ 30 31#ifdef __ARM_EABI__ 32/* Some attributes that are common to all routines in this file. */ 33 /* Tag_ABI_align_needed: This code does not require 8-byte 34 alignment from the caller. */ 35 /* .eabi_attribute 24, 0 -- default setting. */ 36 /* Tag_ABI_align_preserved: This code preserves 8-byte 37 alignment in any callee. */ 38 .eabi_attribute 25, 1 39#endif /* __ARM_EABI__ */ 40/* ------------------------------------------------------------------------ */ 41 42/* We need to know what prefix to add to function names. */ 43 44#ifndef __USER_LABEL_PREFIX__ 45#error __USER_LABEL_PREFIX__ not defined 46#endif 47 48/* ANSI concatenation macros. */ 49 50#define CONCAT1(a, b) CONCAT2(a, b) 51#define CONCAT2(a, b) a ## b 52 53/* Use the right prefix for global labels. */ 54 55#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x) 56 57#ifdef __ELF__ 58#ifdef __thumb__ 59#define __PLT__ /* Not supported in Thumb assembler (for now). */ 60#elif defined __vxworks && !defined __PIC__ 61#define __PLT__ /* Not supported by the kernel loader. */ 62#else 63#define __PLT__ (PLT) 64#endif 65#define TYPE(x) .type SYM(x),function 66#define SIZE(x) .size SYM(x), . - SYM(x) 67#define LSYM(x) .x 68#else 69#define __PLT__ 70#define TYPE(x) 71#define SIZE(x) 72#define LSYM(x) x 73#endif 74 75/* Function end macros. Variants for interworking. */ 76 77#if defined(__ARM_ARCH_2__) 78# define __ARM_ARCH__ 2 79#endif 80 81#if defined(__ARM_ARCH_3__) 82# define __ARM_ARCH__ 3 83#endif 84 85#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \ 86 || defined(__ARM_ARCH_4T__) 87/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with 88 long multiply instructions. That includes v3M. */ 89# define __ARM_ARCH__ 4 90#endif 91 92#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \ 93 || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ 94 || defined(__ARM_ARCH_5TEJ__) 95# define __ARM_ARCH__ 5 96#endif 97 98#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ 99 || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ 100 || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \ 101 || defined(__ARM_ARCH_6M__) 102# define __ARM_ARCH__ 6 103#endif 104 105#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ 106 || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ 107 || defined(__ARM_ARCH_7EM__) 108# define __ARM_ARCH__ 7 109#endif 110 111#if defined(__ARM_ARCH_8A__) 112# define __ARM_ARCH__ 8 113#endif 114 115#ifndef __ARM_ARCH__ 116#error Unable to determine architecture. 117#endif 118 119/* There are times when we might prefer Thumb1 code even if ARM code is 120 permitted, for example, the code might be smaller, or there might be 121 interworking problems with switching to ARM state if interworking is 122 disabled. */ 123#if (defined(__thumb__) \ 124 && !defined(__thumb2__) \ 125 && (!defined(__THUMB_INTERWORK__) \ 126 || defined (__OPTIMIZE_SIZE__) \ 127 || defined(__ARM_ARCH_6M__))) 128# define __prefer_thumb__ 129#endif 130 131/* How to return from a function call depends on the architecture variant. */ 132 133#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__) 134 135# define RET bx lr 136# define RETc(x) bx##x lr 137 138/* Special precautions for interworking on armv4t. */ 139# if (__ARM_ARCH__ == 4) 140 141/* Always use bx, not ldr pc. */ 142# if (defined(__thumb__) || defined(__THUMB_INTERWORK__)) 143# define __INTERWORKING__ 144# endif /* __THUMB__ || __THUMB_INTERWORK__ */ 145 146/* Include thumb stub before arm mode code. */ 147# if defined(__thumb__) && !defined(__THUMB_INTERWORK__) 148# define __INTERWORKING_STUBS__ 149# endif /* __thumb__ && !__THUMB_INTERWORK__ */ 150 151#endif /* __ARM_ARCH == 4 */ 152 153#else 154 155# define RET mov pc, lr 156# define RETc(x) mov##x pc, lr 157 158#endif 159 160.macro cfi_pop advance, reg, cfa_offset 161#ifdef __ELF__ 162 .pushsection .debug_frame 163 .byte 0x4 /* DW_CFA_advance_loc4 */ 164 .4byte \advance 165 .byte (0xc0 | \reg) /* DW_CFA_restore */ 166 .byte 0xe /* DW_CFA_def_cfa_offset */ 167 .uleb128 \cfa_offset 168 .popsection 169#endif 170.endm 171.macro cfi_push advance, reg, offset, cfa_offset 172#ifdef __ELF__ 173 .pushsection .debug_frame 174 .byte 0x4 /* DW_CFA_advance_loc4 */ 175 .4byte \advance 176 .byte (0x80 | \reg) /* DW_CFA_offset */ 177 .uleb128 (\offset / -4) 178 .byte 0xe /* DW_CFA_def_cfa_offset */ 179 .uleb128 \cfa_offset 180 .popsection 181#endif 182.endm 183.macro cfi_start start_label, end_label 184#ifdef __ELF__ 185 .pushsection .debug_frame 186LSYM(Lstart_frame): 187 .4byte LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE 188LSYM(Lstart_cie): 189 .4byte 0xffffffff @ CIE Identifier Tag 190 .byte 0x1 @ CIE Version 191 .ascii "\0" @ CIE Augmentation 192 .uleb128 0x1 @ CIE Code Alignment Factor 193 .sleb128 -4 @ CIE Data Alignment Factor 194 .byte 0xe @ CIE RA Column 195 .byte 0xc @ DW_CFA_def_cfa 196 .uleb128 0xd 197 .uleb128 0x0 198 199 .align 2 200LSYM(Lend_cie): 201 .4byte LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length 202LSYM(Lstart_fde): 203 .4byte LSYM(Lstart_frame) @ FDE CIE offset 204 .4byte \start_label @ FDE initial location 205 .4byte \end_label-\start_label @ FDE address range 206 .popsection 207#endif 208.endm 209.macro cfi_end end_label 210#ifdef __ELF__ 211 .pushsection .debug_frame 212 .align 2 213LSYM(Lend_fde): 214 .popsection 215\end_label: 216#endif 217.endm 218 219/* Don't pass dirn, it's there just to get token pasting right. */ 220 221.macro RETLDM regs=, cond=, unwind=, dirn=ia 222#if defined (__INTERWORKING__) 223 .ifc "\regs","" 224 ldr\cond lr, [sp], #8 225 .else 226# if defined(__thumb2__) 227 pop\cond {\regs, lr} 228# else 229 ldm\cond\dirn sp!, {\regs, lr} 230# endif 231 .endif 232 .ifnc "\unwind", "" 233 /* Mark LR as restored. */ 23497: cfi_pop 97b - \unwind, 0xe, 0x0 235 .endif 236 bx\cond lr 237#else 238 /* Caller is responsible for providing IT instruction. */ 239 .ifc "\regs","" 240 ldr\cond pc, [sp], #8 241 .else 242# if defined(__thumb2__) 243 pop\cond {\regs, pc} 244# else 245 ldm\cond\dirn sp!, {\regs, pc} 246# endif 247 .endif 248#endif 249.endm 250 251/* The Unified assembly syntax allows the same code to be assembled for both 252 ARM and Thumb-2. However this is only supported by recent gas, so define 253 a set of macros to allow ARM code on older assemblers. */ 254#if defined(__thumb2__) 255.macro do_it cond, suffix="" 256 it\suffix \cond 257.endm 258.macro shift1 op, arg0, arg1, arg2 259 \op \arg0, \arg1, \arg2 260.endm 261#define do_push push 262#define do_pop pop 263#define COND(op1, op2, cond) op1 ## op2 ## cond 264/* Perform an arithmetic operation with a variable shift operand. This 265 requires two instructions and a scratch register on Thumb-2. */ 266.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp 267 \shiftop \tmp, \src2, \shiftreg 268 \name \dest, \src1, \tmp 269.endm 270#else 271.macro do_it cond, suffix="" 272.endm 273.macro shift1 op, arg0, arg1, arg2 274 mov \arg0, \arg1, \op \arg2 275.endm 276#define do_push stmfd sp!, 277#define do_pop ldmfd sp!, 278#define COND(op1, op2, cond) op1 ## cond ## op2 279.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp 280 \name \dest, \src1, \src2, \shiftop \shiftreg 281.endm 282#endif 283 284#ifdef __ARM_EABI__ 285.macro ARM_LDIV0 name signed 286 cmp r0, #0 287 .ifc \signed, unsigned 288 movne r0, #0xffffffff 289 .else 290 movgt r0, #0x7fffffff 291 movlt r0, #0x80000000 292 .endif 293 b SYM (__aeabi_idiv0) __PLT__ 294.endm 295#else 296.macro ARM_LDIV0 name signed 297 str lr, [sp, #-8]! 29898: cfi_push 98b - __\name, 0xe, -0x8, 0x8 299 bl SYM (__div0) __PLT__ 300 mov r0, #0 @ About as wrong as it could be. 301 RETLDM unwind=98b 302.endm 303#endif 304 305 306#ifdef __ARM_EABI__ 307.macro THUMB_LDIV0 name signed 308#if defined(__ARM_ARCH_6M__) 309 .ifc \signed, unsigned 310 cmp r0, #0 311 beq 1f 312 mov r0, #0 313 mvn r0, r0 @ 0xffffffff 3141: 315 .else 316 cmp r0, #0 317 beq 2f 318 blt 3f 319 mov r0, #0 320 mvn r0, r0 321 lsr r0, r0, #1 @ 0x7fffffff 322 b 2f 3233: mov r0, #0x80 324 lsl r0, r0, #24 @ 0x80000000 3252: 326 .endif 327 push {r0, r1, r2} 328 ldr r0, 4f 329 adr r1, 4f 330 add r0, r1 331 str r0, [sp, #8] 332 @ We know we are not on armv4t, so pop pc is safe. 333 pop {r0, r1, pc} 334 .align 2 3354: 336 .word __aeabi_idiv0 - 4b 337#elif defined(__thumb2__) 338 .syntax unified 339 .ifc \signed, unsigned 340 cbz r0, 1f 341 mov r0, #0xffffffff 3421: 343 .else 344 cmp r0, #0 345 do_it gt 346 movgt r0, #0x7fffffff 347 do_it lt 348 movlt r0, #0x80000000 349 .endif 350 b.w SYM(__aeabi_idiv0) __PLT__ 351#else 352 .align 2 353 bx pc 354 nop 355 .arm 356 cmp r0, #0 357 .ifc \signed, unsigned 358 movne r0, #0xffffffff 359 .else 360 movgt r0, #0x7fffffff 361 movlt r0, #0x80000000 362 .endif 363 b SYM(__aeabi_idiv0) __PLT__ 364 .thumb 365#endif 366.endm 367#else 368.macro THUMB_LDIV0 name signed 369 push { r1, lr } 37098: cfi_push 98b - __\name, 0xe, -0x4, 0x8 371 bl SYM (__div0) 372 mov r0, #0 @ About as wrong as it could be. 373#if defined (__INTERWORKING__) 374 pop { r1, r2 } 375 bx r2 376#else 377 pop { r1, pc } 378#endif 379.endm 380#endif 381 382.macro FUNC_END name 383 SIZE (__\name) 384.endm 385 386.macro DIV_FUNC_END name signed 387 cfi_start __\name, LSYM(Lend_div0) 388LSYM(Ldiv0): 389#ifdef __thumb__ 390 THUMB_LDIV0 \name \signed 391#else 392 ARM_LDIV0 \name \signed 393#endif 394 cfi_end LSYM(Lend_div0) 395 FUNC_END \name 396.endm 397 398.macro THUMB_FUNC_START name 399 .globl SYM (\name) 400 TYPE (\name) 401 .thumb_func 402SYM (\name): 403.endm 404 405/* Function start macros. Variants for ARM and Thumb. */ 406 407#ifdef __thumb__ 408#define THUMB_FUNC .thumb_func 409#define THUMB_CODE .force_thumb 410# if defined(__thumb2__) 411#define THUMB_SYNTAX .syntax divided 412# else 413#define THUMB_SYNTAX 414# endif 415#else 416#define THUMB_FUNC 417#define THUMB_CODE 418#define THUMB_SYNTAX 419#endif 420 421.macro FUNC_START name sp_section= 422 .ifc \sp_section, function_section 423 .section .text.__\name,"ax",%progbits 424 .else 425 .text 426 .endif 427 .globl SYM (__\name) 428 TYPE (__\name) 429 .align 0 430 THUMB_CODE 431 THUMB_FUNC 432 THUMB_SYNTAX 433SYM (__\name): 434.endm 435 436.macro ARM_SYM_START name 437 TYPE (\name) 438 .align 0 439SYM (\name): 440.endm 441 442.macro SYM_END name 443 SIZE (\name) 444.endm 445 446/* Special function that will always be coded in ARM assembly, even if 447 in Thumb-only compilation. */ 448 449#if defined(__thumb2__) 450 451/* For Thumb-2 we build everything in thumb mode. */ 452.macro ARM_FUNC_START name sp_section= 453 FUNC_START \name \sp_section 454 .syntax unified 455.endm 456#define EQUIV .thumb_set 457.macro ARM_CALL name 458 bl __\name 459.endm 460 461#elif defined(__INTERWORKING_STUBS__) 462 463.macro ARM_FUNC_START name 464 FUNC_START \name 465 bx pc 466 nop 467 .arm 468/* A hook to tell gdb that we've switched to ARM mode. Also used to call 469 directly from other local arm routines. */ 470_L__\name: 471.endm 472#define EQUIV .thumb_set 473/* Branch directly to a function declared with ARM_FUNC_START. 474 Must be called in arm mode. */ 475.macro ARM_CALL name 476 bl _L__\name 477.endm 478 479#else /* !(__INTERWORKING_STUBS__ || __thumb2__) */ 480 481#ifdef __ARM_ARCH_6M__ 482#define EQUIV .thumb_set 483#else 484.macro ARM_FUNC_START name sp_section= 485 .ifc \sp_section, function_section 486 .section .text.__\name,"ax",%progbits 487 .else 488 .text 489 .endif 490 .globl SYM (__\name) 491 TYPE (__\name) 492 .align 0 493 .arm 494SYM (__\name): 495.endm 496#define EQUIV .set 497.macro ARM_CALL name 498 bl __\name 499.endm 500#endif 501 502#endif 503 504.macro FUNC_ALIAS new old 505 .globl SYM (__\new) 506#if defined (__thumb__) 507 .thumb_set SYM (__\new), SYM (__\old) 508#else 509 .set SYM (__\new), SYM (__\old) 510#endif 511.endm 512 513#ifndef __ARM_ARCH_6M__ 514.macro ARM_FUNC_ALIAS new old 515 .globl SYM (__\new) 516 EQUIV SYM (__\new), SYM (__\old) 517#if defined(__INTERWORKING_STUBS__) 518 .set SYM (_L__\new), SYM (_L__\old) 519#endif 520.endm 521#endif 522 523#ifdef __ARMEB__ 524#define xxh r0 525#define xxl r1 526#define yyh r2 527#define yyl r3 528#else 529#define xxh r1 530#define xxl r0 531#define yyh r3 532#define yyl r2 533#endif 534 535#ifdef __ARM_EABI__ 536.macro WEAK name 537 .weak SYM (__\name) 538.endm 539#endif 540 541#ifdef __thumb__ 542/* Register aliases. */ 543 544work .req r4 @ XXXX is this safe ? 545dividend .req r0 546divisor .req r1 547overdone .req r2 548result .req r2 549curbit .req r3 550#endif 551#if 0 552ip .req r12 553sp .req r13 554lr .req r14 555pc .req r15 556#endif 557 558/* ------------------------------------------------------------------------ */ 559/* Bodies of the division and modulo routines. */ 560/* ------------------------------------------------------------------------ */ 561.macro ARM_DIV_BODY dividend, divisor, result, curbit 562 563#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__) 564 565#if defined (__thumb2__) 566 clz \curbit, \dividend 567 clz \result, \divisor 568 sub \curbit, \result, \curbit 569 rsb \curbit, \curbit, #31 570 adr \result, 1f 571 add \curbit, \result, \curbit, lsl #4 572 mov \result, #0 573 mov pc, \curbit 574.p2align 3 5751: 576 .set shift, 32 577 .rept 32 578 .set shift, shift - 1 579 cmp.w \dividend, \divisor, lsl #shift 580 nop.n 581 adc.w \result, \result, \result 582 it cs 583 subcs.w \dividend, \dividend, \divisor, lsl #shift 584 .endr 585#else 586 clz \curbit, \dividend 587 clz \result, \divisor 588 sub \curbit, \result, \curbit 589 rsbs \curbit, \curbit, #31 590 addne \curbit, \curbit, \curbit, lsl #1 591 mov \result, #0 592 addne pc, pc, \curbit, lsl #2 593 nop 594 .set shift, 32 595 .rept 32 596 .set shift, shift - 1 597 cmp \dividend, \divisor, lsl #shift 598 adc \result, \result, \result 599 subcs \dividend, \dividend, \divisor, lsl #shift 600 .endr 601#endif 602 603#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ 604#if __ARM_ARCH__ >= 5 605 606 clz \curbit, \divisor 607 clz \result, \dividend 608 sub \result, \curbit, \result 609 mov \curbit, #1 610 mov \divisor, \divisor, lsl \result 611 mov \curbit, \curbit, lsl \result 612 mov \result, #0 613 614#else /* __ARM_ARCH__ < 5 */ 615 616 @ Initially shift the divisor left 3 bits if possible, 617 @ set curbit accordingly. This allows for curbit to be located 618 @ at the left end of each 4-bit nibbles in the division loop 619 @ to save one loop in most cases. 620 tst \divisor, #0xe0000000 621 moveq \divisor, \divisor, lsl #3 622 moveq \curbit, #8 623 movne \curbit, #1 624 625 @ Unless the divisor is very big, shift it up in multiples of 626 @ four bits, since this is the amount of unwinding in the main 627 @ division loop. Continue shifting until the divisor is 628 @ larger than the dividend. 6291: cmp \divisor, #0x10000000 630 cmplo \divisor, \dividend 631 movlo \divisor, \divisor, lsl #4 632 movlo \curbit, \curbit, lsl #4 633 blo 1b 634 635 @ For very big divisors, we must shift it a bit at a time, or 636 @ we will be in danger of overflowing. 6371: cmp \divisor, #0x80000000 638 cmplo \divisor, \dividend 639 movlo \divisor, \divisor, lsl #1 640 movlo \curbit, \curbit, lsl #1 641 blo 1b 642 643 mov \result, #0 644 645#endif /* __ARM_ARCH__ < 5 */ 646 647 @ Division loop 6481: cmp \dividend, \divisor 649 do_it hs, t 650 subhs \dividend, \dividend, \divisor 651 orrhs \result, \result, \curbit 652 cmp \dividend, \divisor, lsr #1 653 do_it hs, t 654 subhs \dividend, \dividend, \divisor, lsr #1 655 orrhs \result, \result, \curbit, lsr #1 656 cmp \dividend, \divisor, lsr #2 657 do_it hs, t 658 subhs \dividend, \dividend, \divisor, lsr #2 659 orrhs \result, \result, \curbit, lsr #2 660 cmp \dividend, \divisor, lsr #3 661 do_it hs, t 662 subhs \dividend, \dividend, \divisor, lsr #3 663 orrhs \result, \result, \curbit, lsr #3 664 cmp \dividend, #0 @ Early termination? 665 do_it ne, t 666 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? 667 movne \divisor, \divisor, lsr #4 668 bne 1b 669 670#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ 671 672.endm 673/* ------------------------------------------------------------------------ */ 674.macro ARM_DIV2_ORDER divisor, order 675 676#if __ARM_ARCH__ >= 5 677 678 clz \order, \divisor 679 rsb \order, \order, #31 680 681#else 682 683 cmp \divisor, #(1 << 16) 684 movhs \divisor, \divisor, lsr #16 685 movhs \order, #16 686 movlo \order, #0 687 688 cmp \divisor, #(1 << 8) 689 movhs \divisor, \divisor, lsr #8 690 addhs \order, \order, #8 691 692 cmp \divisor, #(1 << 4) 693 movhs \divisor, \divisor, lsr #4 694 addhs \order, \order, #4 695 696 cmp \divisor, #(1 << 2) 697 addhi \order, \order, #3 698 addls \order, \order, \divisor, lsr #1 699 700#endif 701 702.endm 703/* ------------------------------------------------------------------------ */ 704.macro ARM_MOD_BODY dividend, divisor, order, spare 705 706#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__) 707 708 clz \order, \divisor 709 clz \spare, \dividend 710 sub \order, \order, \spare 711 rsbs \order, \order, #31 712 addne pc, pc, \order, lsl #3 713 nop 714 .set shift, 32 715 .rept 32 716 .set shift, shift - 1 717 cmp \dividend, \divisor, lsl #shift 718 subcs \dividend, \dividend, \divisor, lsl #shift 719 .endr 720 721#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ 722#if __ARM_ARCH__ >= 5 723 724 clz \order, \divisor 725 clz \spare, \dividend 726 sub \order, \order, \spare 727 mov \divisor, \divisor, lsl \order 728 729#else /* __ARM_ARCH__ < 5 */ 730 731 mov \order, #0 732 733 @ Unless the divisor is very big, shift it up in multiples of 734 @ four bits, since this is the amount of unwinding in the main 735 @ division loop. Continue shifting until the divisor is 736 @ larger than the dividend. 7371: cmp \divisor, #0x10000000 738 cmplo \divisor, \dividend 739 movlo \divisor, \divisor, lsl #4 740 addlo \order, \order, #4 741 blo 1b 742 743 @ For very big divisors, we must shift it a bit at a time, or 744 @ we will be in danger of overflowing. 7451: cmp \divisor, #0x80000000 746 cmplo \divisor, \dividend 747 movlo \divisor, \divisor, lsl #1 748 addlo \order, \order, #1 749 blo 1b 750 751#endif /* __ARM_ARCH__ < 5 */ 752 753 @ Perform all needed substractions to keep only the reminder. 754 @ Do comparisons in batch of 4 first. 755 subs \order, \order, #3 @ yes, 3 is intended here 756 blt 2f 757 7581: cmp \dividend, \divisor 759 subhs \dividend, \dividend, \divisor 760 cmp \dividend, \divisor, lsr #1 761 subhs \dividend, \dividend, \divisor, lsr #1 762 cmp \dividend, \divisor, lsr #2 763 subhs \dividend, \dividend, \divisor, lsr #2 764 cmp \dividend, \divisor, lsr #3 765 subhs \dividend, \dividend, \divisor, lsr #3 766 cmp \dividend, #1 767 mov \divisor, \divisor, lsr #4 768 subges \order, \order, #4 769 bge 1b 770 771 tst \order, #3 772 teqne \dividend, #0 773 beq 5f 774 775 @ Either 1, 2 or 3 comparison/substractions are left. 7762: cmn \order, #2 777 blt 4f 778 beq 3f 779 cmp \dividend, \divisor 780 subhs \dividend, \dividend, \divisor 781 mov \divisor, \divisor, lsr #1 7823: cmp \dividend, \divisor 783 subhs \dividend, \dividend, \divisor 784 mov \divisor, \divisor, lsr #1 7854: cmp \dividend, \divisor 786 subhs \dividend, \dividend, \divisor 7875: 788 789#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ 790 791.endm 792/* ------------------------------------------------------------------------ */ 793.macro THUMB_DIV_MOD_BODY modulo 794 @ Load the constant 0x10000000 into our work register. 795 mov work, #1 796 lsl work, #28 797LSYM(Loop1): 798 @ Unless the divisor is very big, shift it up in multiples of 799 @ four bits, since this is the amount of unwinding in the main 800 @ division loop. Continue shifting until the divisor is 801 @ larger than the dividend. 802 cmp divisor, work 803 bhs LSYM(Lbignum) 804 cmp divisor, dividend 805 bhs LSYM(Lbignum) 806 lsl divisor, #4 807 lsl curbit, #4 808 b LSYM(Loop1) 809LSYM(Lbignum): 810 @ Set work to 0x80000000 811 lsl work, #3 812LSYM(Loop2): 813 @ For very big divisors, we must shift it a bit at a time, or 814 @ we will be in danger of overflowing. 815 cmp divisor, work 816 bhs LSYM(Loop3) 817 cmp divisor, dividend 818 bhs LSYM(Loop3) 819 lsl divisor, #1 820 lsl curbit, #1 821 b LSYM(Loop2) 822LSYM(Loop3): 823 @ Test for possible subtractions ... 824 .if \modulo 825 @ ... On the final pass, this may subtract too much from the dividend, 826 @ so keep track of which subtractions are done, we can fix them up 827 @ afterwards. 828 mov overdone, #0 829 cmp dividend, divisor 830 blo LSYM(Lover1) 831 sub dividend, dividend, divisor 832LSYM(Lover1): 833 lsr work, divisor, #1 834 cmp dividend, work 835 blo LSYM(Lover2) 836 sub dividend, dividend, work 837 mov ip, curbit 838 mov work, #1 839 ror curbit, work 840 orr overdone, curbit 841 mov curbit, ip 842LSYM(Lover2): 843 lsr work, divisor, #2 844 cmp dividend, work 845 blo LSYM(Lover3) 846 sub dividend, dividend, work 847 mov ip, curbit 848 mov work, #2 849 ror curbit, work 850 orr overdone, curbit 851 mov curbit, ip 852LSYM(Lover3): 853 lsr work, divisor, #3 854 cmp dividend, work 855 blo LSYM(Lover4) 856 sub dividend, dividend, work 857 mov ip, curbit 858 mov work, #3 859 ror curbit, work 860 orr overdone, curbit 861 mov curbit, ip 862LSYM(Lover4): 863 mov ip, curbit 864 .else 865 @ ... and note which bits are done in the result. On the final pass, 866 @ this may subtract too much from the dividend, but the result will be ok, 867 @ since the "bit" will have been shifted out at the bottom. 868 cmp dividend, divisor 869 blo LSYM(Lover1) 870 sub dividend, dividend, divisor 871 orr result, result, curbit 872LSYM(Lover1): 873 lsr work, divisor, #1 874 cmp dividend, work 875 blo LSYM(Lover2) 876 sub dividend, dividend, work 877 lsr work, curbit, #1 878 orr result, work 879LSYM(Lover2): 880 lsr work, divisor, #2 881 cmp dividend, work 882 blo LSYM(Lover3) 883 sub dividend, dividend, work 884 lsr work, curbit, #2 885 orr result, work 886LSYM(Lover3): 887 lsr work, divisor, #3 888 cmp dividend, work 889 blo LSYM(Lover4) 890 sub dividend, dividend, work 891 lsr work, curbit, #3 892 orr result, work 893LSYM(Lover4): 894 .endif 895 896 cmp dividend, #0 @ Early termination? 897 beq LSYM(Lover5) 898 lsr curbit, #4 @ No, any more bits to do? 899 beq LSYM(Lover5) 900 lsr divisor, #4 901 b LSYM(Loop3) 902LSYM(Lover5): 903 .if \modulo 904 @ Any subtractions that we should not have done will be recorded in 905 @ the top three bits of "overdone". Exactly which were not needed 906 @ are governed by the position of the bit, stored in ip. 907 mov work, #0xe 908 lsl work, #28 909 and overdone, work 910 beq LSYM(Lgot_result) 911 912 @ If we terminated early, because dividend became zero, then the 913 @ bit in ip will not be in the bottom nibble, and we should not 914 @ perform the additions below. We must test for this though 915 @ (rather relying upon the TSTs to prevent the additions) since 916 @ the bit in ip could be in the top two bits which might then match 917 @ with one of the smaller RORs. 918 mov curbit, ip 919 mov work, #0x7 920 tst curbit, work 921 beq LSYM(Lgot_result) 922 923 mov curbit, ip 924 mov work, #3 925 ror curbit, work 926 tst overdone, curbit 927 beq LSYM(Lover6) 928 lsr work, divisor, #3 929 add dividend, work 930LSYM(Lover6): 931 mov curbit, ip 932 mov work, #2 933 ror curbit, work 934 tst overdone, curbit 935 beq LSYM(Lover7) 936 lsr work, divisor, #2 937 add dividend, work 938LSYM(Lover7): 939 mov curbit, ip 940 mov work, #1 941 ror curbit, work 942 tst overdone, curbit 943 beq LSYM(Lgot_result) 944 lsr work, divisor, #1 945 add dividend, work 946 .endif 947LSYM(Lgot_result): 948.endm 949/* ------------------------------------------------------------------------ */ 950/* Start of the Real Functions */ 951/* ------------------------------------------------------------------------ */ 952#ifdef L_udivsi3 953 954#if defined(__prefer_thumb__) 955 956 FUNC_START udivsi3 957 FUNC_ALIAS aeabi_uidiv udivsi3 958 959 cmp divisor, #0 960 beq LSYM(Ldiv0) 961LSYM(udivsi3_skip_div0_test): 962 mov curbit, #1 963 mov result, #0 964 965 push { work } 966 cmp dividend, divisor 967 blo LSYM(Lgot_result) 968 969 THUMB_DIV_MOD_BODY 0 970 971 mov r0, result 972 pop { work } 973 RET 974 975#elif defined(__ARM_ARCH_EXT_IDIV__) 976 977 ARM_FUNC_START udivsi3 978 ARM_FUNC_ALIAS aeabi_uidiv udivsi3 979 980 cmp r1, #0 981 beq LSYM(Ldiv0) 982 983 udiv r0, r0, r1 984 RET 985 986#else /* ARM version/Thumb-2. */ 987 988 ARM_FUNC_START udivsi3 989 ARM_FUNC_ALIAS aeabi_uidiv udivsi3 990 991 /* Note: if called via udivsi3_skip_div0_test, this will unnecessarily 992 check for division-by-zero a second time. */ 993LSYM(udivsi3_skip_div0_test): 994 subs r2, r1, #1 995 do_it eq 996 RETc(eq) 997 bcc LSYM(Ldiv0) 998 cmp r0, r1 999 bls 11f 1000 tst r1, r2 1001 beq 12f 1002 1003 ARM_DIV_BODY r0, r1, r2, r3 1004 1005 mov r0, r2 1006 RET 1007 100811: do_it eq, e 1009 moveq r0, #1 1010 movne r0, #0 1011 RET 1012 101312: ARM_DIV2_ORDER r1, r2 1014 1015 mov r0, r0, lsr r2 1016 RET 1017 1018#endif /* ARM version */ 1019 1020 DIV_FUNC_END udivsi3 unsigned 1021 1022#if defined(__prefer_thumb__) 1023FUNC_START aeabi_uidivmod 1024 cmp r1, #0 1025 beq LSYM(Ldiv0) 1026 push {r0, r1, lr} 1027 bl LSYM(udivsi3_skip_div0_test) 1028 POP {r1, r2, r3} 1029 mul r2, r0 1030 sub r1, r1, r2 1031 bx r3 1032#elif defined(__ARM_ARCH_EXT_IDIV__) 1033ARM_FUNC_START aeabi_uidivmod 1034 cmp r1, #0 1035 beq LSYM(Ldiv0) 1036 mov r2, r0 1037 udiv r0, r0, r1 1038 mls r1, r0, r1, r2 1039 RET 1040#else 1041ARM_FUNC_START aeabi_uidivmod 1042 cmp r1, #0 1043 beq LSYM(Ldiv0) 1044 stmfd sp!, { r0, r1, lr } 1045 bl LSYM(udivsi3_skip_div0_test) 1046 ldmfd sp!, { r1, r2, lr } 1047 mul r3, r2, r0 1048 sub r1, r1, r3 1049 RET 1050#endif 1051 FUNC_END aeabi_uidivmod 1052 1053#endif /* L_udivsi3 */ 1054/* ------------------------------------------------------------------------ */ 1055#ifdef L_umodsi3 1056 1057#ifdef __ARM_ARCH_EXT_IDIV__ 1058 1059 ARM_FUNC_START umodsi3 1060 1061 cmp r1, #0 1062 beq LSYM(Ldiv0) 1063 udiv r2, r0, r1 1064 mls r0, r1, r2, r0 1065 RET 1066 1067#elif defined(__thumb__) 1068 1069 FUNC_START umodsi3 1070 1071 cmp divisor, #0 1072 beq LSYM(Ldiv0) 1073 mov curbit, #1 1074 cmp dividend, divisor 1075 bhs LSYM(Lover10) 1076 RET 1077 1078LSYM(Lover10): 1079 push { work } 1080 1081 THUMB_DIV_MOD_BODY 1 1082 1083 pop { work } 1084 RET 1085 1086#else /* ARM version. */ 1087 1088 FUNC_START umodsi3 1089 1090 subs r2, r1, #1 @ compare divisor with 1 1091 bcc LSYM(Ldiv0) 1092 cmpne r0, r1 @ compare dividend with divisor 1093 moveq r0, #0 1094 tsthi r1, r2 @ see if divisor is power of 2 1095 andeq r0, r0, r2 1096 RETc(ls) 1097 1098 ARM_MOD_BODY r0, r1, r2, r3 1099 1100 RET 1101 1102#endif /* ARM version. */ 1103 1104 DIV_FUNC_END umodsi3 unsigned 1105 1106#endif /* L_umodsi3 */ 1107/* ------------------------------------------------------------------------ */ 1108#ifdef L_divsi3 1109 1110#if defined(__prefer_thumb__) 1111 1112 FUNC_START divsi3 1113 FUNC_ALIAS aeabi_idiv divsi3 1114 1115 cmp divisor, #0 1116 beq LSYM(Ldiv0) 1117LSYM(divsi3_skip_div0_test): 1118 push { work } 1119 mov work, dividend 1120 eor work, divisor @ Save the sign of the result. 1121 mov ip, work 1122 mov curbit, #1 1123 mov result, #0 1124 cmp divisor, #0 1125 bpl LSYM(Lover10) 1126 neg divisor, divisor @ Loops below use unsigned. 1127LSYM(Lover10): 1128 cmp dividend, #0 1129 bpl LSYM(Lover11) 1130 neg dividend, dividend 1131LSYM(Lover11): 1132 cmp dividend, divisor 1133 blo LSYM(Lgot_result) 1134 1135 THUMB_DIV_MOD_BODY 0 1136 1137 mov r0, result 1138 mov work, ip 1139 cmp work, #0 1140 bpl LSYM(Lover12) 1141 neg r0, r0 1142LSYM(Lover12): 1143 pop { work } 1144 RET 1145 1146#elif defined(__ARM_ARCH_EXT_IDIV__) 1147 1148 ARM_FUNC_START divsi3 1149 ARM_FUNC_ALIAS aeabi_idiv divsi3 1150 1151 cmp r1, #0 1152 beq LSYM(Ldiv0) 1153 sdiv r0, r0, r1 1154 RET 1155 1156#else /* ARM/Thumb-2 version. */ 1157 1158 ARM_FUNC_START divsi3 1159 ARM_FUNC_ALIAS aeabi_idiv divsi3 1160 1161 cmp r1, #0 1162 beq LSYM(Ldiv0) 1163LSYM(divsi3_skip_div0_test): 1164 eor ip, r0, r1 @ save the sign of the result. 1165 do_it mi 1166 rsbmi r1, r1, #0 @ loops below use unsigned. 1167 subs r2, r1, #1 @ division by 1 or -1 ? 1168 beq 10f 1169 movs r3, r0 1170 do_it mi 1171 rsbmi r3, r0, #0 @ positive dividend value 1172 cmp r3, r1 1173 bls 11f 1174 tst r1, r2 @ divisor is power of 2 ? 1175 beq 12f 1176 1177 ARM_DIV_BODY r3, r1, r0, r2 1178 1179 cmp ip, #0 1180 do_it mi 1181 rsbmi r0, r0, #0 1182 RET 1183 118410: teq ip, r0 @ same sign ? 1185 do_it mi 1186 rsbmi r0, r0, #0 1187 RET 1188 118911: do_it lo 1190 movlo r0, #0 1191 do_it eq,t 1192 moveq r0, ip, asr #31 1193 orreq r0, r0, #1 1194 RET 1195 119612: ARM_DIV2_ORDER r1, r2 1197 1198 cmp ip, #0 1199 mov r0, r3, lsr r2 1200 do_it mi 1201 rsbmi r0, r0, #0 1202 RET 1203 1204#endif /* ARM version */ 1205 1206 DIV_FUNC_END divsi3 signed 1207 1208#if defined(__prefer_thumb__) 1209FUNC_START aeabi_idivmod 1210 cmp r1, #0 1211 beq LSYM(Ldiv0) 1212 push {r0, r1, lr} 1213 bl LSYM(divsi3_skip_div0_test) 1214 POP {r1, r2, r3} 1215 mul r2, r0 1216 sub r1, r1, r2 1217 bx r3 1218#elif defined(__ARM_ARCH_EXT_IDIV__) 1219ARM_FUNC_START aeabi_idivmod 1220 cmp r1, #0 1221 beq LSYM(Ldiv0) 1222 mov r2, r0 1223 sdiv r0, r0, r1 1224 mls r1, r0, r1, r2 1225 RET 1226#else 1227ARM_FUNC_START aeabi_idivmod 1228 cmp r1, #0 1229 beq LSYM(Ldiv0) 1230 stmfd sp!, { r0, r1, lr } 1231 bl LSYM(divsi3_skip_div0_test) 1232 ldmfd sp!, { r1, r2, lr } 1233 mul r3, r2, r0 1234 sub r1, r1, r3 1235 RET 1236#endif 1237 FUNC_END aeabi_idivmod 1238 1239#endif /* L_divsi3 */ 1240/* ------------------------------------------------------------------------ */ 1241#ifdef L_modsi3 1242 1243#if defined(__ARM_ARCH_EXT_IDIV__) 1244 1245 ARM_FUNC_START modsi3 1246 1247 cmp r1, #0 1248 beq LSYM(Ldiv0) 1249 1250 sdiv r2, r0, r1 1251 mls r0, r1, r2, r0 1252 RET 1253 1254#elif defined(__thumb__) 1255 1256 FUNC_START modsi3 1257 1258 mov curbit, #1 1259 cmp divisor, #0 1260 beq LSYM(Ldiv0) 1261 bpl LSYM(Lover10) 1262 neg divisor, divisor @ Loops below use unsigned. 1263LSYM(Lover10): 1264 push { work } 1265 @ Need to save the sign of the dividend, unfortunately, we need 1266 @ work later on. Must do this after saving the original value of 1267 @ the work register, because we will pop this value off first. 1268 push { dividend } 1269 cmp dividend, #0 1270 bpl LSYM(Lover11) 1271 neg dividend, dividend 1272LSYM(Lover11): 1273 cmp dividend, divisor 1274 blo LSYM(Lgot_result) 1275 1276 THUMB_DIV_MOD_BODY 1 1277 1278 pop { work } 1279 cmp work, #0 1280 bpl LSYM(Lover12) 1281 neg dividend, dividend 1282LSYM(Lover12): 1283 pop { work } 1284 RET 1285 1286#else /* ARM version. */ 1287 1288 FUNC_START modsi3 1289 1290 cmp r1, #0 1291 beq LSYM(Ldiv0) 1292 rsbmi r1, r1, #0 @ loops below use unsigned. 1293 movs ip, r0 @ preserve sign of dividend 1294 rsbmi r0, r0, #0 @ if negative make positive 1295 subs r2, r1, #1 @ compare divisor with 1 1296 cmpne r0, r1 @ compare dividend with divisor 1297 moveq r0, #0 1298 tsthi r1, r2 @ see if divisor is power of 2 1299 andeq r0, r0, r2 1300 bls 10f 1301 1302 ARM_MOD_BODY r0, r1, r2, r3 1303 130410: cmp ip, #0 1305 rsbmi r0, r0, #0 1306 RET 1307 1308#endif /* ARM version */ 1309 1310 DIV_FUNC_END modsi3 signed 1311 1312#endif /* L_modsi3 */ 1313/* ------------------------------------------------------------------------ */ 1314#ifdef L_dvmd_tls 1315 1316#ifdef __ARM_EABI__ 1317 WEAK aeabi_idiv0 1318 WEAK aeabi_ldiv0 1319 FUNC_START aeabi_idiv0 1320 FUNC_START aeabi_ldiv0 1321 RET 1322 FUNC_END aeabi_ldiv0 1323 FUNC_END aeabi_idiv0 1324#else 1325 FUNC_START div0 1326 RET 1327 FUNC_END div0 1328#endif 1329 1330#endif /* L_divmodsi_tools */ 1331/* ------------------------------------------------------------------------ */ 1332#ifdef L_dvmd_lnx 1333@ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls 1334 1335/* Constant taken from <asm/signal.h>. */ 1336#define SIGFPE 8 1337 1338#ifdef __ARM_EABI__ 1339 cfi_start __aeabi_ldiv0, LSYM(Lend_aeabi_ldiv0) 1340 WEAK aeabi_idiv0 1341 WEAK aeabi_ldiv0 1342 ARM_FUNC_START aeabi_idiv0 1343 ARM_FUNC_START aeabi_ldiv0 1344 do_push {r1, lr} 134598: cfi_push 98b - __aeabi_ldiv0, 0xe, -0x4, 0x8 1346#else 1347 cfi_start __div0, LSYM(Lend_div0) 1348 ARM_FUNC_START div0 1349 do_push {r1, lr} 135098: cfi_push 98b - __div0, 0xe, -0x4, 0x8 1351#endif 1352 1353 mov r0, #SIGFPE 1354 bl SYM(raise) __PLT__ 1355 RETLDM r1 unwind=98b 1356 1357#ifdef __ARM_EABI__ 1358 cfi_end LSYM(Lend_aeabi_ldiv0) 1359 FUNC_END aeabi_ldiv0 1360 FUNC_END aeabi_idiv0 1361#else 1362 cfi_end LSYM(Lend_div0) 1363 FUNC_END div0 1364#endif 1365 1366#endif /* L_dvmd_lnx */ 1367#ifdef L_clear_cache 1368#if defined __ARM_EABI__ && defined __linux__ 1369@ EABI GNU/Linux call to cacheflush syscall. 1370 ARM_FUNC_START clear_cache 1371 do_push {r7} 1372#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__) 1373 movw r7, #2 1374 movt r7, #0xf 1375#else 1376 mov r7, #0xf0000 1377 add r7, r7, #2 1378#endif 1379 mov r2, #0 1380 swi 0 1381 do_pop {r7} 1382 RET 1383 FUNC_END clear_cache 1384#else 1385#error "This is only for ARM EABI GNU/Linux" 1386#endif 1387#endif /* L_clear_cache */ 1388/* ------------------------------------------------------------------------ */ 1389/* Dword shift operations. */ 1390/* All the following Dword shift variants rely on the fact that 1391 shft xxx, Reg 1392 is in fact done as 1393 shft xxx, (Reg & 255) 1394 so for Reg value in (32...63) and (-1...-31) we will get zero (in the 1395 case of logical shifts) or the sign (for asr). */ 1396 1397#ifdef __ARMEB__ 1398#define al r1 1399#define ah r0 1400#else 1401#define al r0 1402#define ah r1 1403#endif 1404 1405/* Prevent __aeabi double-word shifts from being produced on SymbianOS. */ 1406#ifndef __symbian__ 1407 1408#ifdef L_lshrdi3 1409 1410 FUNC_START lshrdi3 1411 FUNC_ALIAS aeabi_llsr lshrdi3 1412 1413#ifdef __thumb__ 1414 lsr al, r2 1415 mov r3, ah 1416 lsr ah, r2 1417 mov ip, r3 1418 sub r2, #32 1419 lsr r3, r2 1420 orr al, r3 1421 neg r2, r2 1422 mov r3, ip 1423 lsl r3, r2 1424 orr al, r3 1425 RET 1426#else 1427 subs r3, r2, #32 1428 rsb ip, r2, #32 1429 movmi al, al, lsr r2 1430 movpl al, ah, lsr r3 1431 orrmi al, al, ah, lsl ip 1432 mov ah, ah, lsr r2 1433 RET 1434#endif 1435 FUNC_END aeabi_llsr 1436 FUNC_END lshrdi3 1437 1438#endif 1439 1440#ifdef L_ashrdi3 1441 1442 FUNC_START ashrdi3 1443 FUNC_ALIAS aeabi_lasr ashrdi3 1444 1445#ifdef __thumb__ 1446 lsr al, r2 1447 mov r3, ah 1448 asr ah, r2 1449 sub r2, #32 1450 @ If r2 is negative at this point the following step would OR 1451 @ the sign bit into all of AL. That's not what we want... 1452 bmi 1f 1453 mov ip, r3 1454 asr r3, r2 1455 orr al, r3 1456 mov r3, ip 14571: 1458 neg r2, r2 1459 lsl r3, r2 1460 orr al, r3 1461 RET 1462#else 1463 subs r3, r2, #32 1464 rsb ip, r2, #32 1465 movmi al, al, lsr r2 1466 movpl al, ah, asr r3 1467 orrmi al, al, ah, lsl ip 1468 mov ah, ah, asr r2 1469 RET 1470#endif 1471 1472 FUNC_END aeabi_lasr 1473 FUNC_END ashrdi3 1474 1475#endif 1476 1477#ifdef L_ashldi3 1478 1479 FUNC_START ashldi3 1480 FUNC_ALIAS aeabi_llsl ashldi3 1481 1482#ifdef __thumb__ 1483 lsl ah, r2 1484 mov r3, al 1485 lsl al, r2 1486 mov ip, r3 1487 sub r2, #32 1488 lsl r3, r2 1489 orr ah, r3 1490 neg r2, r2 1491 mov r3, ip 1492 lsr r3, r2 1493 orr ah, r3 1494 RET 1495#else 1496 subs r3, r2, #32 1497 rsb ip, r2, #32 1498 movmi ah, ah, lsl r2 1499 movpl ah, al, lsl r3 1500 orrmi ah, ah, al, lsr ip 1501 mov al, al, lsl r2 1502 RET 1503#endif 1504 FUNC_END aeabi_llsl 1505 FUNC_END ashldi3 1506 1507#endif 1508 1509#endif /* __symbian__ */ 1510 1511#if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \ 1512 || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ 1513 || defined(__ARM_ARCH_5TEJ__) 1514#define HAVE_ARM_CLZ 1 1515#endif 1516 1517#ifdef L_clzsi2 1518#if defined(__ARM_ARCH_6M__) 1519FUNC_START clzsi2 1520 mov r1, #28 1521 mov r3, #1 1522 lsl r3, r3, #16 1523 cmp r0, r3 /* 0x10000 */ 1524 bcc 2f 1525 lsr r0, r0, #16 1526 sub r1, r1, #16 15272: lsr r3, r3, #8 1528 cmp r0, r3 /* #0x100 */ 1529 bcc 2f 1530 lsr r0, r0, #8 1531 sub r1, r1, #8 15322: lsr r3, r3, #4 1533 cmp r0, r3 /* #0x10 */ 1534 bcc 2f 1535 lsr r0, r0, #4 1536 sub r1, r1, #4 15372: adr r2, 1f 1538 ldrb r0, [r2, r0] 1539 add r0, r0, r1 1540 bx lr 1541.align 2 15421: 1543.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 1544 FUNC_END clzsi2 1545#else 1546ARM_FUNC_START clzsi2 1547# if defined(HAVE_ARM_CLZ) 1548 clz r0, r0 1549 RET 1550# else 1551 mov r1, #28 1552 cmp r0, #0x10000 1553 do_it cs, t 1554 movcs r0, r0, lsr #16 1555 subcs r1, r1, #16 1556 cmp r0, #0x100 1557 do_it cs, t 1558 movcs r0, r0, lsr #8 1559 subcs r1, r1, #8 1560 cmp r0, #0x10 1561 do_it cs, t 1562 movcs r0, r0, lsr #4 1563 subcs r1, r1, #4 1564 adr r2, 1f 1565 ldrb r0, [r2, r0] 1566 add r0, r0, r1 1567 RET 1568.align 2 15691: 1570.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 1571# endif /* !HAVE_ARM_CLZ */ 1572 FUNC_END clzsi2 1573#endif 1574#endif /* L_clzsi2 */ 1575 1576#ifdef L_clzdi2 1577#if !defined(HAVE_ARM_CLZ) 1578 1579# if defined(__ARM_ARCH_6M__) 1580FUNC_START clzdi2 1581 push {r4, lr} 1582# else 1583ARM_FUNC_START clzdi2 1584 do_push {r4, lr} 1585# endif 1586 cmp xxh, #0 1587 bne 1f 1588# ifdef __ARMEB__ 1589 mov r0, xxl 1590 bl __clzsi2 1591 add r0, r0, #32 1592 b 2f 15931: 1594 bl __clzsi2 1595# else 1596 bl __clzsi2 1597 add r0, r0, #32 1598 b 2f 15991: 1600 mov r0, xxh 1601 bl __clzsi2 1602# endif 16032: 1604# if defined(__ARM_ARCH_6M__) 1605 pop {r4, pc} 1606# else 1607 RETLDM r4 1608# endif 1609 FUNC_END clzdi2 1610 1611#else /* HAVE_ARM_CLZ */ 1612 1613ARM_FUNC_START clzdi2 1614 cmp xxh, #0 1615 do_it eq, et 1616 clzeq r0, xxl 1617 clzne r0, xxh 1618 addeq r0, r0, #32 1619 RET 1620 FUNC_END clzdi2 1621 1622#endif 1623#endif /* L_clzdi2 */ 1624 1625#ifdef L_ctzsi2 1626#if defined(__ARM_ARCH_6M__) 1627FUNC_START ctzsi2 1628 neg r1, r0 1629 and r0, r0, r1 1630 mov r1, #28 1631 mov r3, #1 1632 lsl r3, r3, #16 1633 cmp r0, r3 /* 0x10000 */ 1634 bcc 2f 1635 lsr r0, r0, #16 1636 sub r1, r1, #16 16372: lsr r3, r3, #8 1638 cmp r0, r3 /* #0x100 */ 1639 bcc 2f 1640 lsr r0, r0, #8 1641 sub r1, r1, #8 16422: lsr r3, r3, #4 1643 cmp r0, r3 /* #0x10 */ 1644 bcc 2f 1645 lsr r0, r0, #4 1646 sub r1, r1, #4 16472: adr r2, 1f 1648 ldrb r0, [r2, r0] 1649 sub r0, r0, r1 1650 bx lr 1651.align 2 16521: 1653.byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31 1654 FUNC_END ctzsi2 1655#else 1656ARM_FUNC_START ctzsi2 1657 rsb r1, r0, #0 1658 and r0, r0, r1 1659# if defined(HAVE_ARM_CLZ) 1660 clz r0, r0 1661 rsb r0, r0, #31 1662 RET 1663# else 1664 mov r1, #28 1665 cmp r0, #0x10000 1666 do_it cs, t 1667 movcs r0, r0, lsr #16 1668 subcs r1, r1, #16 1669 cmp r0, #0x100 1670 do_it cs, t 1671 movcs r0, r0, lsr #8 1672 subcs r1, r1, #8 1673 cmp r0, #0x10 1674 do_it cs, t 1675 movcs r0, r0, lsr #4 1676 subcs r1, r1, #4 1677 adr r2, 1f 1678 ldrb r0, [r2, r0] 1679 sub r0, r0, r1 1680 RET 1681.align 2 16821: 1683.byte 27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31 1684# endif /* !HAVE_ARM_CLZ */ 1685 FUNC_END ctzsi2 1686#endif 1687#endif /* L_clzsi2 */ 1688 1689/* ------------------------------------------------------------------------ */ 1690/* These next two sections are here despite the fact that they contain Thumb 1691 assembler because their presence allows interworked code to be linked even 1692 when the GCC library is this one. */ 1693 1694/* Do not build the interworking functions when the target architecture does 1695 not support Thumb instructions. (This can be a multilib option). */ 1696#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\ 1697 || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \ 1698 || __ARM_ARCH__ >= 6 1699 1700#if defined L_call_via_rX 1701 1702/* These labels & instructions are used by the Arm/Thumb interworking code. 1703 The address of function to be called is loaded into a register and then 1704 one of these labels is called via a BL instruction. This puts the 1705 return address into the link register with the bottom bit set, and the 1706 code here switches to the correct mode before executing the function. */ 1707 1708 .text 1709 .align 0 1710 .force_thumb 1711 1712.macro call_via register 1713 THUMB_FUNC_START _call_via_\register 1714 1715 bx \register 1716 nop 1717 1718 SIZE (_call_via_\register) 1719.endm 1720 1721 call_via r0 1722 call_via r1 1723 call_via r2 1724 call_via r3 1725 call_via r4 1726 call_via r5 1727 call_via r6 1728 call_via r7 1729 call_via r8 1730 call_via r9 1731 call_via sl 1732 call_via fp 1733 call_via ip 1734 call_via sp 1735 call_via lr 1736 1737#endif /* L_call_via_rX */ 1738 1739/* Don't bother with the old interworking routines for Thumb-2. */ 1740/* ??? Maybe only omit these on "m" variants. */ 1741#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__) 1742 1743#if defined L_interwork_call_via_rX 1744 1745/* These labels & instructions are used by the Arm/Thumb interworking code, 1746 when the target address is in an unknown instruction set. The address 1747 of function to be called is loaded into a register and then one of these 1748 labels is called via a BL instruction. This puts the return address 1749 into the link register with the bottom bit set, and the code here 1750 switches to the correct mode before executing the function. Unfortunately 1751 the target code cannot be relied upon to return via a BX instruction, so 1752 instead we have to store the resturn address on the stack and allow the 1753 called function to return here instead. Upon return we recover the real 1754 return address and use a BX to get back to Thumb mode. 1755 1756 There are three variations of this code. The first, 1757 _interwork_call_via_rN(), will push the return address onto the 1758 stack and pop it in _arm_return(). It should only be used if all 1759 arguments are passed in registers. 1760 1761 The second, _interwork_r7_call_via_rN(), instead stores the return 1762 address at [r7, #-4]. It is the caller's responsibility to ensure 1763 that this address is valid and contains no useful data. 1764 1765 The third, _interwork_r11_call_via_rN(), works in the same way but 1766 uses r11 instead of r7. It is useful if the caller does not really 1767 need a frame pointer. */ 1768 1769 .text 1770 .align 0 1771 1772 .code 32 1773 .globl _arm_return 1774LSYM(Lstart_arm_return): 1775 cfi_start LSYM(Lstart_arm_return) LSYM(Lend_arm_return) 1776 cfi_push 0, 0xe, -0x8, 0x8 1777 nop @ This nop is for the benefit of debuggers, so that 1778 @ backtraces will use the correct unwind information. 1779_arm_return: 1780 RETLDM unwind=LSYM(Lstart_arm_return) 1781 cfi_end LSYM(Lend_arm_return) 1782 1783 .globl _arm_return_r7 1784_arm_return_r7: 1785 ldr lr, [r7, #-4] 1786 bx lr 1787 1788 .globl _arm_return_r11 1789_arm_return_r11: 1790 ldr lr, [r11, #-4] 1791 bx lr 1792 1793.macro interwork_with_frame frame, register, name, return 1794 .code 16 1795 1796 THUMB_FUNC_START \name 1797 1798 bx pc 1799 nop 1800 1801 .code 32 1802 tst \register, #1 1803 streq lr, [\frame, #-4] 1804 adreq lr, _arm_return_\frame 1805 bx \register 1806 1807 SIZE (\name) 1808.endm 1809 1810.macro interwork register 1811 .code 16 1812 1813 THUMB_FUNC_START _interwork_call_via_\register 1814 1815 bx pc 1816 nop 1817 1818 .code 32 1819 .globl LSYM(Lchange_\register) 1820LSYM(Lchange_\register): 1821 tst \register, #1 1822 streq lr, [sp, #-8]! 1823 adreq lr, _arm_return 1824 bx \register 1825 1826 SIZE (_interwork_call_via_\register) 1827 1828 interwork_with_frame r7,\register,_interwork_r7_call_via_\register 1829 interwork_with_frame r11,\register,_interwork_r11_call_via_\register 1830.endm 1831 1832 interwork r0 1833 interwork r1 1834 interwork r2 1835 interwork r3 1836 interwork r4 1837 interwork r5 1838 interwork r6 1839 interwork r7 1840 interwork r8 1841 interwork r9 1842 interwork sl 1843 interwork fp 1844 interwork ip 1845 interwork sp 1846 1847 /* The LR case has to be handled a little differently... */ 1848 .code 16 1849 1850 THUMB_FUNC_START _interwork_call_via_lr 1851 1852 bx pc 1853 nop 1854 1855 .code 32 1856 .globl .Lchange_lr 1857.Lchange_lr: 1858 tst lr, #1 1859 stmeqdb r13!, {lr, pc} 1860 mov ip, lr 1861 adreq lr, _arm_return 1862 bx ip 1863 1864 SIZE (_interwork_call_via_lr) 1865 1866#endif /* L_interwork_call_via_rX */ 1867#endif /* !__thumb2__ */ 1868 1869/* Functions to support compact pic switch tables in thumb1 state. 1870 All these routines take an index into the table in r0. The 1871 table is at LR & ~1 (but this must be rounded up in the case 1872 of 32-bit entires). They are only permitted to clobber r12 1873 and r14 and r0 must be preserved on exit. */ 1874#ifdef L_thumb1_case_sqi 1875 1876 .text 1877 .align 0 1878 .force_thumb 1879 .syntax unified 1880 THUMB_FUNC_START __gnu_thumb1_case_sqi 1881 push {r1} 1882 mov r1, lr 1883 lsrs r1, r1, #1 1884 lsls r1, r1, #1 1885 ldrsb r1, [r1, r0] 1886 lsls r1, r1, #1 1887 add lr, lr, r1 1888 pop {r1} 1889 bx lr 1890 SIZE (__gnu_thumb1_case_sqi) 1891#endif 1892 1893#ifdef L_thumb1_case_uqi 1894 1895 .text 1896 .align 0 1897 .force_thumb 1898 .syntax unified 1899 THUMB_FUNC_START __gnu_thumb1_case_uqi 1900 push {r1} 1901 mov r1, lr 1902 lsrs r1, r1, #1 1903 lsls r1, r1, #1 1904 ldrb r1, [r1, r0] 1905 lsls r1, r1, #1 1906 add lr, lr, r1 1907 pop {r1} 1908 bx lr 1909 SIZE (__gnu_thumb1_case_uqi) 1910#endif 1911 1912#ifdef L_thumb1_case_shi 1913 1914 .text 1915 .align 0 1916 .force_thumb 1917 .syntax unified 1918 THUMB_FUNC_START __gnu_thumb1_case_shi 1919 push {r0, r1} 1920 mov r1, lr 1921 lsrs r1, r1, #1 1922 lsls r0, r0, #1 1923 lsls r1, r1, #1 1924 ldrsh r1, [r1, r0] 1925 lsls r1, r1, #1 1926 add lr, lr, r1 1927 pop {r0, r1} 1928 bx lr 1929 SIZE (__gnu_thumb1_case_shi) 1930#endif 1931 1932#ifdef L_thumb1_case_uhi 1933 1934 .text 1935 .align 0 1936 .force_thumb 1937 .syntax unified 1938 THUMB_FUNC_START __gnu_thumb1_case_uhi 1939 push {r0, r1} 1940 mov r1, lr 1941 lsrs r1, r1, #1 1942 lsls r0, r0, #1 1943 lsls r1, r1, #1 1944 ldrh r1, [r1, r0] 1945 lsls r1, r1, #1 1946 add lr, lr, r1 1947 pop {r0, r1} 1948 bx lr 1949 SIZE (__gnu_thumb1_case_uhi) 1950#endif 1951 1952#ifdef L_thumb1_case_si 1953 1954 .text 1955 .align 0 1956 .force_thumb 1957 .syntax unified 1958 THUMB_FUNC_START __gnu_thumb1_case_si 1959 push {r0, r1} 1960 mov r1, lr 1961 adds.n r1, r1, #2 /* Align to word. */ 1962 lsrs r1, r1, #2 1963 lsls r0, r0, #2 1964 lsls r1, r1, #2 1965 ldr r0, [r1, r0] 1966 adds r0, r0, r1 1967 mov lr, r0 1968 pop {r0, r1} 1969 mov pc, lr /* We know we were called from thumb code. */ 1970 SIZE (__gnu_thumb1_case_si) 1971#endif 1972 1973#endif /* Arch supports thumb. */ 1974 1975.macro CFI_START_FUNCTION 1976 .cfi_startproc 1977 .cfi_remember_state 1978.endm 1979 1980.macro CFI_END_FUNCTION 1981 .cfi_restore_state 1982 .cfi_endproc 1983.endm 1984 1985#ifndef __symbian__ 1986#ifndef __ARM_ARCH_6M__ 1987#include "ieee754-df.S" 1988#include "ieee754-sf.S" 1989#include "bpabi.S" 1990#else /* __ARM_ARCH_6M__ */ 1991#include "bpabi-v6m.S" 1992#endif /* __ARM_ARCH_6M__ */ 1993#endif /* !__symbian__ */ 1994