1*404b540aSrobert@ libgcc routines for ARM cpu. 2*404b540aSrobert@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk) 3*404b540aSrobert 4*404b540aSrobert/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005 5*404b540aSrobert Free Software Foundation, Inc. 6*404b540aSrobert 7*404b540aSrobertThis file is free software; you can redistribute it and/or modify it 8*404b540aSrobertunder the terms of the GNU General Public License as published by the 9*404b540aSrobertFree Software Foundation; either version 2, or (at your option) any 10*404b540aSrobertlater version. 11*404b540aSrobert 12*404b540aSrobertIn addition to the permissions in the GNU General Public License, the 13*404b540aSrobertFree Software Foundation gives you unlimited permission to link the 14*404b540aSrobertcompiled version of this file into combinations with other programs, 15*404b540aSrobertand to distribute those combinations without any restriction coming 16*404b540aSrobertfrom the use of this file. (The General Public License restrictions 17*404b540aSrobertdo apply in other respects; for example, they cover modification of 18*404b540aSrobertthe file, and distribution when not linked into a combine 19*404b540aSrobertexecutable.) 20*404b540aSrobert 21*404b540aSrobertThis file is distributed in the hope that it will be useful, but 22*404b540aSrobertWITHOUT ANY WARRANTY; without even the implied warranty of 23*404b540aSrobertMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 24*404b540aSrobertGeneral Public License for more details. 25*404b540aSrobert 26*404b540aSrobertYou should have received a copy of the GNU General Public License 27*404b540aSrobertalong with this program; see the file COPYING. If not, write to 28*404b540aSrobertthe Free Software Foundation, 51 Franklin Street, Fifth Floor, 29*404b540aSrobertBoston, MA 02110-1301, USA. */ 30*404b540aSrobert/* ------------------------------------------------------------------------ */ 31*404b540aSrobert 32*404b540aSrobert/* We need to know what prefix to add to function names. */ 33*404b540aSrobert 34*404b540aSrobert#ifndef __USER_LABEL_PREFIX__ 35*404b540aSrobert#error __USER_LABEL_PREFIX__ not defined 36*404b540aSrobert#endif 37*404b540aSrobert 38*404b540aSrobert/* ANSI concatenation macros. */ 39*404b540aSrobert 40*404b540aSrobert#define CONCAT1(a, b) CONCAT2(a, b) 41*404b540aSrobert#define CONCAT2(a, b) a ## b 42*404b540aSrobert 43*404b540aSrobert/* Use the right prefix for global labels. */ 44*404b540aSrobert 45*404b540aSrobert#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x) 46*404b540aSrobert 47*404b540aSrobert#ifdef __ELF__ 48*404b540aSrobert#ifdef __thumb__ 49*404b540aSrobert#define __PLT__ /* Not supported in Thumb assembler (for now). */ 50*404b540aSrobert#else 51*404b540aSrobert#define __PLT__ (PLT) 52*404b540aSrobert#endif 53*404b540aSrobert#define TYPE(x) .type SYM(x),function 54*404b540aSrobert#define SIZE(x) .size SYM(x), . - SYM(x) 55*404b540aSrobert#define LSYM(x) .x 56*404b540aSrobert#else 57*404b540aSrobert#define __PLT__ 58*404b540aSrobert#define TYPE(x) 59*404b540aSrobert#define SIZE(x) 60*404b540aSrobert#define LSYM(x) x 61*404b540aSrobert#endif 62*404b540aSrobert 63*404b540aSrobert/* Function end macros. Variants for interworking. */ 64*404b540aSrobert 65*404b540aSrobert@ This selects the minimum architecture level required. 66*404b540aSrobert#define __ARM_ARCH__ 3 67*404b540aSrobert 68*404b540aSrobert#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \ 69*404b540aSrobert || defined(__ARM_ARCH_4T__) 70*404b540aSrobert/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with 71*404b540aSrobert long multiply instructions. That includes v3M. */ 72*404b540aSrobert# undef __ARM_ARCH__ 73*404b540aSrobert# define __ARM_ARCH__ 4 74*404b540aSrobert#endif 75*404b540aSrobert 76*404b540aSrobert#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \ 77*404b540aSrobert || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ 78*404b540aSrobert || defined(__ARM_ARCH_5TEJ__) 79*404b540aSrobert# undef __ARM_ARCH__ 80*404b540aSrobert# define __ARM_ARCH__ 5 81*404b540aSrobert#endif 82*404b540aSrobert 83*404b540aSrobert#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ 84*404b540aSrobert || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ 85*404b540aSrobert || defined(__ARM_ARCH_6ZK__) 86*404b540aSrobert# undef __ARM_ARCH__ 87*404b540aSrobert# define __ARM_ARCH__ 6 88*404b540aSrobert#endif 89*404b540aSrobert 90*404b540aSrobert#ifndef __ARM_ARCH__ 91*404b540aSrobert#error Unable to determine architecture. 92*404b540aSrobert#endif 93*404b540aSrobert 94*404b540aSrobert/* How to return from a function call depends on the architecture variant. */ 95*404b540aSrobert 96*404b540aSrobert#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__) 97*404b540aSrobert 98*404b540aSrobert# define RET bx lr 99*404b540aSrobert# define RETc(x) bx##x lr 100*404b540aSrobert 101*404b540aSrobert/* Special precautions for interworking on armv4t. */ 102*404b540aSrobert# if (__ARM_ARCH__ == 4) 103*404b540aSrobert 104*404b540aSrobert/* Always use bx, not ldr pc. */ 105*404b540aSrobert# if (defined(__thumb__) || defined(__THUMB_INTERWORK__)) 106*404b540aSrobert# define __INTERWORKING__ 107*404b540aSrobert# endif /* __THUMB__ || __THUMB_INTERWORK__ */ 108*404b540aSrobert 109*404b540aSrobert/* Include thumb stub before arm mode code. */ 110*404b540aSrobert# if defined(__thumb__) && !defined(__THUMB_INTERWORK__) 111*404b540aSrobert# define __INTERWORKING_STUBS__ 112*404b540aSrobert# endif /* __thumb__ && !__THUMB_INTERWORK__ */ 113*404b540aSrobert 114*404b540aSrobert#endif /* __ARM_ARCH == 4 */ 115*404b540aSrobert 116*404b540aSrobert#else 117*404b540aSrobert 118*404b540aSrobert# define RET mov pc, lr 119*404b540aSrobert# define RETc(x) mov##x pc, lr 120*404b540aSrobert 121*404b540aSrobert#endif 122*404b540aSrobert 123*404b540aSrobert.macro cfi_pop advance, reg, cfa_offset 124*404b540aSrobert#ifdef __ELF__ 125*404b540aSrobert .pushsection .debug_frame 126*404b540aSrobert .byte 0x4 /* DW_CFA_advance_loc4 */ 127*404b540aSrobert .4byte \advance 128*404b540aSrobert .byte (0xc0 | \reg) /* DW_CFA_restore */ 129*404b540aSrobert .byte 0xe /* DW_CFA_def_cfa_offset */ 130*404b540aSrobert .uleb128 \cfa_offset 131*404b540aSrobert .popsection 132*404b540aSrobert#endif 133*404b540aSrobert.endm 134*404b540aSrobert.macro cfi_push advance, reg, offset, cfa_offset 135*404b540aSrobert#ifdef __ELF__ 136*404b540aSrobert .pushsection .debug_frame 137*404b540aSrobert .byte 0x4 /* DW_CFA_advance_loc4 */ 138*404b540aSrobert .4byte \advance 139*404b540aSrobert .byte (0x80 | \reg) /* DW_CFA_offset */ 140*404b540aSrobert .uleb128 (\offset / -4) 141*404b540aSrobert .byte 0xe /* DW_CFA_def_cfa_offset */ 142*404b540aSrobert .uleb128 \cfa_offset 143*404b540aSrobert .popsection 144*404b540aSrobert#endif 145*404b540aSrobert.endm 146*404b540aSrobert.macro cfi_start start_label, end_label 147*404b540aSrobert#ifdef __ELF__ 148*404b540aSrobert .pushsection .debug_frame 149*404b540aSrobertLSYM(Lstart_frame): 150*404b540aSrobert .4byte LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE 151*404b540aSrobertLSYM(Lstart_cie): 152*404b540aSrobert .4byte 0xffffffff @ CIE Identifier Tag 153*404b540aSrobert .byte 0x1 @ CIE Version 154*404b540aSrobert .ascii "\0" @ CIE Augmentation 155*404b540aSrobert .uleb128 0x1 @ CIE Code Alignment Factor 156*404b540aSrobert .sleb128 -4 @ CIE Data Alignment Factor 157*404b540aSrobert .byte 0xe @ CIE RA Column 158*404b540aSrobert .byte 0xc @ DW_CFA_def_cfa 159*404b540aSrobert .uleb128 0xd 160*404b540aSrobert .uleb128 0x0 161*404b540aSrobert 162*404b540aSrobert .align 2 163*404b540aSrobertLSYM(Lend_cie): 164*404b540aSrobert .4byte LSYM(Lend_fde)-LSYM(Lstart_fde) @ FDE Length 165*404b540aSrobertLSYM(Lstart_fde): 166*404b540aSrobert .4byte LSYM(Lstart_frame) @ FDE CIE offset 167*404b540aSrobert .4byte \start_label @ FDE initial location 168*404b540aSrobert .4byte \end_label-\start_label @ FDE address range 169*404b540aSrobert .popsection 170*404b540aSrobert#endif 171*404b540aSrobert.endm 172*404b540aSrobert.macro cfi_end end_label 173*404b540aSrobert#ifdef __ELF__ 174*404b540aSrobert .pushsection .debug_frame 175*404b540aSrobert .align 2 176*404b540aSrobertLSYM(Lend_fde): 177*404b540aSrobert .popsection 178*404b540aSrobert\end_label: 179*404b540aSrobert#endif 180*404b540aSrobert.endm 181*404b540aSrobert 182*404b540aSrobert/* Don't pass dirn, it's there just to get token pasting right. */ 183*404b540aSrobert 184*404b540aSrobert.macro RETLDM regs=, cond=, unwind=, dirn=ia 185*404b540aSrobert#if defined (__INTERWORKING__) 186*404b540aSrobert .ifc "\regs","" 187*404b540aSrobert ldr\cond lr, [sp], #8 188*404b540aSrobert .else 189*404b540aSrobert ldm\cond\dirn sp!, {\regs, lr} 190*404b540aSrobert .endif 191*404b540aSrobert .ifnc "\unwind", "" 192*404b540aSrobert /* Mark LR as restored. */ 193*404b540aSrobert97: cfi_pop 97b - \unwind, 0xe, 0x0 194*404b540aSrobert .endif 195*404b540aSrobert bx\cond lr 196*404b540aSrobert#else 197*404b540aSrobert .ifc "\regs","" 198*404b540aSrobert ldr\cond pc, [sp], #8 199*404b540aSrobert .else 200*404b540aSrobert ldm\cond\dirn sp!, {\regs, pc} 201*404b540aSrobert .endif 202*404b540aSrobert#endif 203*404b540aSrobert.endm 204*404b540aSrobert 205*404b540aSrobert 206*404b540aSrobert.macro ARM_LDIV0 name 207*404b540aSrobert str lr, [sp, #-8]! 208*404b540aSrobert98: cfi_push 98b - __\name, 0xe, -0x8, 0x8 209*404b540aSrobert bl SYM (__div0) __PLT__ 210*404b540aSrobert mov r0, #0 @ About as wrong as it could be. 211*404b540aSrobert RETLDM unwind=98b 212*404b540aSrobert.endm 213*404b540aSrobert 214*404b540aSrobert 215*404b540aSrobert.macro THUMB_LDIV0 name 216*404b540aSrobert push { r1, lr } 217*404b540aSrobert98: cfi_push 98b - __\name, 0xe, -0x4, 0x8 218*404b540aSrobert bl SYM (__div0) 219*404b540aSrobert mov r0, #0 @ About as wrong as it could be. 220*404b540aSrobert#if defined (__INTERWORKING__) 221*404b540aSrobert pop { r1, r2 } 222*404b540aSrobert bx r2 223*404b540aSrobert#else 224*404b540aSrobert pop { r1, pc } 225*404b540aSrobert#endif 226*404b540aSrobert.endm 227*404b540aSrobert 228*404b540aSrobert.macro FUNC_END name 229*404b540aSrobert SIZE (__\name) 230*404b540aSrobert.endm 231*404b540aSrobert 232*404b540aSrobert.macro DIV_FUNC_END name 233*404b540aSrobert cfi_start __\name, LSYM(Lend_div0) 234*404b540aSrobertLSYM(Ldiv0): 235*404b540aSrobert#ifdef __thumb__ 236*404b540aSrobert THUMB_LDIV0 \name 237*404b540aSrobert#else 238*404b540aSrobert ARM_LDIV0 \name 239*404b540aSrobert#endif 240*404b540aSrobert cfi_end LSYM(Lend_div0) 241*404b540aSrobert FUNC_END \name 242*404b540aSrobert.endm 243*404b540aSrobert 244*404b540aSrobert.macro THUMB_FUNC_START name 245*404b540aSrobert .globl SYM (\name) 246*404b540aSrobert TYPE (\name) 247*404b540aSrobert .thumb_func 248*404b540aSrobertSYM (\name): 249*404b540aSrobert.endm 250*404b540aSrobert 251*404b540aSrobert/* Function start macros. Variants for ARM and Thumb. */ 252*404b540aSrobert 253*404b540aSrobert#ifdef __thumb__ 254*404b540aSrobert#define THUMB_FUNC .thumb_func 255*404b540aSrobert#define THUMB_CODE .force_thumb 256*404b540aSrobert#else 257*404b540aSrobert#define THUMB_FUNC 258*404b540aSrobert#define THUMB_CODE 259*404b540aSrobert#endif 260*404b540aSrobert 261*404b540aSrobert.macro FUNC_START name 262*404b540aSrobert .text 263*404b540aSrobert .globl SYM (__\name) 264*404b540aSrobert TYPE (__\name) 265*404b540aSrobert .align 0 266*404b540aSrobert THUMB_CODE 267*404b540aSrobert THUMB_FUNC 268*404b540aSrobertSYM (__\name): 269*404b540aSrobert.endm 270*404b540aSrobert 271*404b540aSrobert/* Special function that will always be coded in ARM assembly, even if 272*404b540aSrobert in Thumb-only compilation. */ 273*404b540aSrobert 274*404b540aSrobert#if defined(__INTERWORKING_STUBS__) 275*404b540aSrobert.macro ARM_FUNC_START name 276*404b540aSrobert FUNC_START \name 277*404b540aSrobert bx pc 278*404b540aSrobert nop 279*404b540aSrobert .arm 280*404b540aSrobert/* A hook to tell gdb that we've switched to ARM mode. Also used to call 281*404b540aSrobert directly from other local arm routines. */ 282*404b540aSrobert_L__\name: 283*404b540aSrobert.endm 284*404b540aSrobert#define EQUIV .thumb_set 285*404b540aSrobert/* Branch directly to a function declared with ARM_FUNC_START. 286*404b540aSrobert Must be called in arm mode. */ 287*404b540aSrobert.macro ARM_CALL name 288*404b540aSrobert bl _L__\name 289*404b540aSrobert.endm 290*404b540aSrobert#else 291*404b540aSrobert.macro ARM_FUNC_START name 292*404b540aSrobert .text 293*404b540aSrobert .globl SYM (__\name) 294*404b540aSrobert TYPE (__\name) 295*404b540aSrobert .align 0 296*404b540aSrobert .arm 297*404b540aSrobertSYM (__\name): 298*404b540aSrobert.endm 299*404b540aSrobert#define EQUIV .set 300*404b540aSrobert.macro ARM_CALL name 301*404b540aSrobert bl __\name 302*404b540aSrobert.endm 303*404b540aSrobert#endif 304*404b540aSrobert 305*404b540aSrobert.macro FUNC_ALIAS new old 306*404b540aSrobert .globl SYM (__\new) 307*404b540aSrobert#if defined (__thumb__) 308*404b540aSrobert .thumb_set SYM (__\new), SYM (__\old) 309*404b540aSrobert#else 310*404b540aSrobert .set SYM (__\new), SYM (__\old) 311*404b540aSrobert#endif 312*404b540aSrobert.endm 313*404b540aSrobert 314*404b540aSrobert.macro ARM_FUNC_ALIAS new old 315*404b540aSrobert .globl SYM (__\new) 316*404b540aSrobert EQUIV SYM (__\new), SYM (__\old) 317*404b540aSrobert#if defined(__INTERWORKING_STUBS__) 318*404b540aSrobert .set SYM (_L__\new), SYM (_L__\old) 319*404b540aSrobert#endif 320*404b540aSrobert.endm 321*404b540aSrobert 322*404b540aSrobert#ifdef __thumb__ 323*404b540aSrobert/* Register aliases. */ 324*404b540aSrobert 325*404b540aSrobertwork .req r4 @ XXXX is this safe ? 326*404b540aSrobertdividend .req r0 327*404b540aSrobertdivisor .req r1 328*404b540aSrobertoverdone .req r2 329*404b540aSrobertresult .req r2 330*404b540aSrobertcurbit .req r3 331*404b540aSrobert#endif 332*404b540aSrobert#if 0 333*404b540aSrobertip .req r12 334*404b540aSrobertsp .req r13 335*404b540aSrobertlr .req r14 336*404b540aSrobertpc .req r15 337*404b540aSrobert#endif 338*404b540aSrobert 339*404b540aSrobert/* ------------------------------------------------------------------------ */ 340*404b540aSrobert/* Bodies of the division and modulo routines. */ 341*404b540aSrobert/* ------------------------------------------------------------------------ */ 342*404b540aSrobert.macro ARM_DIV_BODY dividend, divisor, result, curbit 343*404b540aSrobert 344*404b540aSrobert#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__) 345*404b540aSrobert 346*404b540aSrobert clz \curbit, \dividend 347*404b540aSrobert clz \result, \divisor 348*404b540aSrobert sub \curbit, \result, \curbit 349*404b540aSrobert rsbs \curbit, \curbit, #31 350*404b540aSrobert addne \curbit, \curbit, \curbit, lsl #1 351*404b540aSrobert mov \result, #0 352*404b540aSrobert addne pc, pc, \curbit, lsl #2 353*404b540aSrobert nop 354*404b540aSrobert .set shift, 32 355*404b540aSrobert .rept 32 356*404b540aSrobert .set shift, shift - 1 357*404b540aSrobert cmp \dividend, \divisor, lsl #shift 358*404b540aSrobert adc \result, \result, \result 359*404b540aSrobert subcs \dividend, \dividend, \divisor, lsl #shift 360*404b540aSrobert .endr 361*404b540aSrobert 362*404b540aSrobert#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ 363*404b540aSrobert#if __ARM_ARCH__ >= 5 364*404b540aSrobert 365*404b540aSrobert clz \curbit, \divisor 366*404b540aSrobert clz \result, \dividend 367*404b540aSrobert sub \result, \curbit, \result 368*404b540aSrobert mov \curbit, #1 369*404b540aSrobert mov \divisor, \divisor, lsl \result 370*404b540aSrobert mov \curbit, \curbit, lsl \result 371*404b540aSrobert mov \result, #0 372*404b540aSrobert 373*404b540aSrobert#else /* __ARM_ARCH__ < 5 */ 374*404b540aSrobert 375*404b540aSrobert @ Initially shift the divisor left 3 bits if possible, 376*404b540aSrobert @ set curbit accordingly. This allows for curbit to be located 377*404b540aSrobert @ at the left end of each 4 bit nibbles in the division loop 378*404b540aSrobert @ to save one loop in most cases. 379*404b540aSrobert tst \divisor, #0xe0000000 380*404b540aSrobert moveq \divisor, \divisor, lsl #3 381*404b540aSrobert moveq \curbit, #8 382*404b540aSrobert movne \curbit, #1 383*404b540aSrobert 384*404b540aSrobert @ Unless the divisor is very big, shift it up in multiples of 385*404b540aSrobert @ four bits, since this is the amount of unwinding in the main 386*404b540aSrobert @ division loop. Continue shifting until the divisor is 387*404b540aSrobert @ larger than the dividend. 388*404b540aSrobert1: cmp \divisor, #0x10000000 389*404b540aSrobert cmplo \divisor, \dividend 390*404b540aSrobert movlo \divisor, \divisor, lsl #4 391*404b540aSrobert movlo \curbit, \curbit, lsl #4 392*404b540aSrobert blo 1b 393*404b540aSrobert 394*404b540aSrobert @ For very big divisors, we must shift it a bit at a time, or 395*404b540aSrobert @ we will be in danger of overflowing. 396*404b540aSrobert1: cmp \divisor, #0x80000000 397*404b540aSrobert cmplo \divisor, \dividend 398*404b540aSrobert movlo \divisor, \divisor, lsl #1 399*404b540aSrobert movlo \curbit, \curbit, lsl #1 400*404b540aSrobert blo 1b 401*404b540aSrobert 402*404b540aSrobert mov \result, #0 403*404b540aSrobert 404*404b540aSrobert#endif /* __ARM_ARCH__ < 5 */ 405*404b540aSrobert 406*404b540aSrobert @ Division loop 407*404b540aSrobert1: cmp \dividend, \divisor 408*404b540aSrobert subhs \dividend, \dividend, \divisor 409*404b540aSrobert orrhs \result, \result, \curbit 410*404b540aSrobert cmp \dividend, \divisor, lsr #1 411*404b540aSrobert subhs \dividend, \dividend, \divisor, lsr #1 412*404b540aSrobert orrhs \result, \result, \curbit, lsr #1 413*404b540aSrobert cmp \dividend, \divisor, lsr #2 414*404b540aSrobert subhs \dividend, \dividend, \divisor, lsr #2 415*404b540aSrobert orrhs \result, \result, \curbit, lsr #2 416*404b540aSrobert cmp \dividend, \divisor, lsr #3 417*404b540aSrobert subhs \dividend, \dividend, \divisor, lsr #3 418*404b540aSrobert orrhs \result, \result, \curbit, lsr #3 419*404b540aSrobert cmp \dividend, #0 @ Early termination? 420*404b540aSrobert movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? 421*404b540aSrobert movne \divisor, \divisor, lsr #4 422*404b540aSrobert bne 1b 423*404b540aSrobert 424*404b540aSrobert#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ 425*404b540aSrobert 426*404b540aSrobert.endm 427*404b540aSrobert/* ------------------------------------------------------------------------ */ 428*404b540aSrobert.macro ARM_DIV2_ORDER divisor, order 429*404b540aSrobert 430*404b540aSrobert#if __ARM_ARCH__ >= 5 431*404b540aSrobert 432*404b540aSrobert clz \order, \divisor 433*404b540aSrobert rsb \order, \order, #31 434*404b540aSrobert 435*404b540aSrobert#else 436*404b540aSrobert 437*404b540aSrobert cmp \divisor, #(1 << 16) 438*404b540aSrobert movhs \divisor, \divisor, lsr #16 439*404b540aSrobert movhs \order, #16 440*404b540aSrobert movlo \order, #0 441*404b540aSrobert 442*404b540aSrobert cmp \divisor, #(1 << 8) 443*404b540aSrobert movhs \divisor, \divisor, lsr #8 444*404b540aSrobert addhs \order, \order, #8 445*404b540aSrobert 446*404b540aSrobert cmp \divisor, #(1 << 4) 447*404b540aSrobert movhs \divisor, \divisor, lsr #4 448*404b540aSrobert addhs \order, \order, #4 449*404b540aSrobert 450*404b540aSrobert cmp \divisor, #(1 << 2) 451*404b540aSrobert addhi \order, \order, #3 452*404b540aSrobert addls \order, \order, \divisor, lsr #1 453*404b540aSrobert 454*404b540aSrobert#endif 455*404b540aSrobert 456*404b540aSrobert.endm 457*404b540aSrobert/* ------------------------------------------------------------------------ */ 458*404b540aSrobert.macro ARM_MOD_BODY dividend, divisor, order, spare 459*404b540aSrobert 460*404b540aSrobert#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__) 461*404b540aSrobert 462*404b540aSrobert clz \order, \divisor 463*404b540aSrobert clz \spare, \dividend 464*404b540aSrobert sub \order, \order, \spare 465*404b540aSrobert rsbs \order, \order, #31 466*404b540aSrobert addne pc, pc, \order, lsl #3 467*404b540aSrobert nop 468*404b540aSrobert .set shift, 32 469*404b540aSrobert .rept 32 470*404b540aSrobert .set shift, shift - 1 471*404b540aSrobert cmp \dividend, \divisor, lsl #shift 472*404b540aSrobert subcs \dividend, \dividend, \divisor, lsl #shift 473*404b540aSrobert .endr 474*404b540aSrobert 475*404b540aSrobert#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ 476*404b540aSrobert#if __ARM_ARCH__ >= 5 477*404b540aSrobert 478*404b540aSrobert clz \order, \divisor 479*404b540aSrobert clz \spare, \dividend 480*404b540aSrobert sub \order, \order, \spare 481*404b540aSrobert mov \divisor, \divisor, lsl \order 482*404b540aSrobert 483*404b540aSrobert#else /* __ARM_ARCH__ < 5 */ 484*404b540aSrobert 485*404b540aSrobert mov \order, #0 486*404b540aSrobert 487*404b540aSrobert @ Unless the divisor is very big, shift it up in multiples of 488*404b540aSrobert @ four bits, since this is the amount of unwinding in the main 489*404b540aSrobert @ division loop. Continue shifting until the divisor is 490*404b540aSrobert @ larger than the dividend. 491*404b540aSrobert1: cmp \divisor, #0x10000000 492*404b540aSrobert cmplo \divisor, \dividend 493*404b540aSrobert movlo \divisor, \divisor, lsl #4 494*404b540aSrobert addlo \order, \order, #4 495*404b540aSrobert blo 1b 496*404b540aSrobert 497*404b540aSrobert @ For very big divisors, we must shift it a bit at a time, or 498*404b540aSrobert @ we will be in danger of overflowing. 499*404b540aSrobert1: cmp \divisor, #0x80000000 500*404b540aSrobert cmplo \divisor, \dividend 501*404b540aSrobert movlo \divisor, \divisor, lsl #1 502*404b540aSrobert addlo \order, \order, #1 503*404b540aSrobert blo 1b 504*404b540aSrobert 505*404b540aSrobert#endif /* __ARM_ARCH__ < 5 */ 506*404b540aSrobert 507*404b540aSrobert @ Perform all needed substractions to keep only the reminder. 508*404b540aSrobert @ Do comparisons in batch of 4 first. 509*404b540aSrobert subs \order, \order, #3 @ yes, 3 is intended here 510*404b540aSrobert blt 2f 511*404b540aSrobert 512*404b540aSrobert1: cmp \dividend, \divisor 513*404b540aSrobert subhs \dividend, \dividend, \divisor 514*404b540aSrobert cmp \dividend, \divisor, lsr #1 515*404b540aSrobert subhs \dividend, \dividend, \divisor, lsr #1 516*404b540aSrobert cmp \dividend, \divisor, lsr #2 517*404b540aSrobert subhs \dividend, \dividend, \divisor, lsr #2 518*404b540aSrobert cmp \dividend, \divisor, lsr #3 519*404b540aSrobert subhs \dividend, \dividend, \divisor, lsr #3 520*404b540aSrobert cmp \dividend, #1 521*404b540aSrobert mov \divisor, \divisor, lsr #4 522*404b540aSrobert subges \order, \order, #4 523*404b540aSrobert bge 1b 524*404b540aSrobert 525*404b540aSrobert tst \order, #3 526*404b540aSrobert teqne \dividend, #0 527*404b540aSrobert beq 5f 528*404b540aSrobert 529*404b540aSrobert @ Either 1, 2 or 3 comparison/substractions are left. 530*404b540aSrobert2: cmn \order, #2 531*404b540aSrobert blt 4f 532*404b540aSrobert beq 3f 533*404b540aSrobert cmp \dividend, \divisor 534*404b540aSrobert subhs \dividend, \dividend, \divisor 535*404b540aSrobert mov \divisor, \divisor, lsr #1 536*404b540aSrobert3: cmp \dividend, \divisor 537*404b540aSrobert subhs \dividend, \dividend, \divisor 538*404b540aSrobert mov \divisor, \divisor, lsr #1 539*404b540aSrobert4: cmp \dividend, \divisor 540*404b540aSrobert subhs \dividend, \dividend, \divisor 541*404b540aSrobert5: 542*404b540aSrobert 543*404b540aSrobert#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */ 544*404b540aSrobert 545*404b540aSrobert.endm 546*404b540aSrobert/* ------------------------------------------------------------------------ */ 547*404b540aSrobert.macro THUMB_DIV_MOD_BODY modulo 548*404b540aSrobert @ Load the constant 0x10000000 into our work register. 549*404b540aSrobert mov work, #1 550*404b540aSrobert lsl work, #28 551*404b540aSrobertLSYM(Loop1): 552*404b540aSrobert @ Unless the divisor is very big, shift it up in multiples of 553*404b540aSrobert @ four bits, since this is the amount of unwinding in the main 554*404b540aSrobert @ division loop. Continue shifting until the divisor is 555*404b540aSrobert @ larger than the dividend. 556*404b540aSrobert cmp divisor, work 557*404b540aSrobert bhs LSYM(Lbignum) 558*404b540aSrobert cmp divisor, dividend 559*404b540aSrobert bhs LSYM(Lbignum) 560*404b540aSrobert lsl divisor, #4 561*404b540aSrobert lsl curbit, #4 562*404b540aSrobert b LSYM(Loop1) 563*404b540aSrobertLSYM(Lbignum): 564*404b540aSrobert @ Set work to 0x80000000 565*404b540aSrobert lsl work, #3 566*404b540aSrobertLSYM(Loop2): 567*404b540aSrobert @ For very big divisors, we must shift it a bit at a time, or 568*404b540aSrobert @ we will be in danger of overflowing. 569*404b540aSrobert cmp divisor, work 570*404b540aSrobert bhs LSYM(Loop3) 571*404b540aSrobert cmp divisor, dividend 572*404b540aSrobert bhs LSYM(Loop3) 573*404b540aSrobert lsl divisor, #1 574*404b540aSrobert lsl curbit, #1 575*404b540aSrobert b LSYM(Loop2) 576*404b540aSrobertLSYM(Loop3): 577*404b540aSrobert @ Test for possible subtractions ... 578*404b540aSrobert .if \modulo 579*404b540aSrobert @ ... On the final pass, this may subtract too much from the dividend, 580*404b540aSrobert @ so keep track of which subtractions are done, we can fix them up 581*404b540aSrobert @ afterwards. 582*404b540aSrobert mov overdone, #0 583*404b540aSrobert cmp dividend, divisor 584*404b540aSrobert blo LSYM(Lover1) 585*404b540aSrobert sub dividend, dividend, divisor 586*404b540aSrobertLSYM(Lover1): 587*404b540aSrobert lsr work, divisor, #1 588*404b540aSrobert cmp dividend, work 589*404b540aSrobert blo LSYM(Lover2) 590*404b540aSrobert sub dividend, dividend, work 591*404b540aSrobert mov ip, curbit 592*404b540aSrobert mov work, #1 593*404b540aSrobert ror curbit, work 594*404b540aSrobert orr overdone, curbit 595*404b540aSrobert mov curbit, ip 596*404b540aSrobertLSYM(Lover2): 597*404b540aSrobert lsr work, divisor, #2 598*404b540aSrobert cmp dividend, work 599*404b540aSrobert blo LSYM(Lover3) 600*404b540aSrobert sub dividend, dividend, work 601*404b540aSrobert mov ip, curbit 602*404b540aSrobert mov work, #2 603*404b540aSrobert ror curbit, work 604*404b540aSrobert orr overdone, curbit 605*404b540aSrobert mov curbit, ip 606*404b540aSrobertLSYM(Lover3): 607*404b540aSrobert lsr work, divisor, #3 608*404b540aSrobert cmp dividend, work 609*404b540aSrobert blo LSYM(Lover4) 610*404b540aSrobert sub dividend, dividend, work 611*404b540aSrobert mov ip, curbit 612*404b540aSrobert mov work, #3 613*404b540aSrobert ror curbit, work 614*404b540aSrobert orr overdone, curbit 615*404b540aSrobert mov curbit, ip 616*404b540aSrobertLSYM(Lover4): 617*404b540aSrobert mov ip, curbit 618*404b540aSrobert .else 619*404b540aSrobert @ ... and note which bits are done in the result. On the final pass, 620*404b540aSrobert @ this may subtract too much from the dividend, but the result will be ok, 621*404b540aSrobert @ since the "bit" will have been shifted out at the bottom. 622*404b540aSrobert cmp dividend, divisor 623*404b540aSrobert blo LSYM(Lover1) 624*404b540aSrobert sub dividend, dividend, divisor 625*404b540aSrobert orr result, result, curbit 626*404b540aSrobertLSYM(Lover1): 627*404b540aSrobert lsr work, divisor, #1 628*404b540aSrobert cmp dividend, work 629*404b540aSrobert blo LSYM(Lover2) 630*404b540aSrobert sub dividend, dividend, work 631*404b540aSrobert lsr work, curbit, #1 632*404b540aSrobert orr result, work 633*404b540aSrobertLSYM(Lover2): 634*404b540aSrobert lsr work, divisor, #2 635*404b540aSrobert cmp dividend, work 636*404b540aSrobert blo LSYM(Lover3) 637*404b540aSrobert sub dividend, dividend, work 638*404b540aSrobert lsr work, curbit, #2 639*404b540aSrobert orr result, work 640*404b540aSrobertLSYM(Lover3): 641*404b540aSrobert lsr work, divisor, #3 642*404b540aSrobert cmp dividend, work 643*404b540aSrobert blo LSYM(Lover4) 644*404b540aSrobert sub dividend, dividend, work 645*404b540aSrobert lsr work, curbit, #3 646*404b540aSrobert orr result, work 647*404b540aSrobertLSYM(Lover4): 648*404b540aSrobert .endif 649*404b540aSrobert 650*404b540aSrobert cmp dividend, #0 @ Early termination? 651*404b540aSrobert beq LSYM(Lover5) 652*404b540aSrobert lsr curbit, #4 @ No, any more bits to do? 653*404b540aSrobert beq LSYM(Lover5) 654*404b540aSrobert lsr divisor, #4 655*404b540aSrobert b LSYM(Loop3) 656*404b540aSrobertLSYM(Lover5): 657*404b540aSrobert .if \modulo 658*404b540aSrobert @ Any subtractions that we should not have done will be recorded in 659*404b540aSrobert @ the top three bits of "overdone". Exactly which were not needed 660*404b540aSrobert @ are governed by the position of the bit, stored in ip. 661*404b540aSrobert mov work, #0xe 662*404b540aSrobert lsl work, #28 663*404b540aSrobert and overdone, work 664*404b540aSrobert beq LSYM(Lgot_result) 665*404b540aSrobert 666*404b540aSrobert @ If we terminated early, because dividend became zero, then the 667*404b540aSrobert @ bit in ip will not be in the bottom nibble, and we should not 668*404b540aSrobert @ perform the additions below. We must test for this though 669*404b540aSrobert @ (rather relying upon the TSTs to prevent the additions) since 670*404b540aSrobert @ the bit in ip could be in the top two bits which might then match 671*404b540aSrobert @ with one of the smaller RORs. 672*404b540aSrobert mov curbit, ip 673*404b540aSrobert mov work, #0x7 674*404b540aSrobert tst curbit, work 675*404b540aSrobert beq LSYM(Lgot_result) 676*404b540aSrobert 677*404b540aSrobert mov curbit, ip 678*404b540aSrobert mov work, #3 679*404b540aSrobert ror curbit, work 680*404b540aSrobert tst overdone, curbit 681*404b540aSrobert beq LSYM(Lover6) 682*404b540aSrobert lsr work, divisor, #3 683*404b540aSrobert add dividend, work 684*404b540aSrobertLSYM(Lover6): 685*404b540aSrobert mov curbit, ip 686*404b540aSrobert mov work, #2 687*404b540aSrobert ror curbit, work 688*404b540aSrobert tst overdone, curbit 689*404b540aSrobert beq LSYM(Lover7) 690*404b540aSrobert lsr work, divisor, #2 691*404b540aSrobert add dividend, work 692*404b540aSrobertLSYM(Lover7): 693*404b540aSrobert mov curbit, ip 694*404b540aSrobert mov work, #1 695*404b540aSrobert ror curbit, work 696*404b540aSrobert tst overdone, curbit 697*404b540aSrobert beq LSYM(Lgot_result) 698*404b540aSrobert lsr work, divisor, #1 699*404b540aSrobert add dividend, work 700*404b540aSrobert .endif 701*404b540aSrobertLSYM(Lgot_result): 702*404b540aSrobert.endm 703*404b540aSrobert/* ------------------------------------------------------------------------ */ 704*404b540aSrobert/* Start of the Real Functions */ 705*404b540aSrobert/* ------------------------------------------------------------------------ */ 706*404b540aSrobert#ifdef L_udivsi3 707*404b540aSrobert 708*404b540aSrobert FUNC_START udivsi3 709*404b540aSrobert FUNC_ALIAS aeabi_uidiv udivsi3 710*404b540aSrobert 711*404b540aSrobert#ifdef __thumb__ 712*404b540aSrobert 713*404b540aSrobert cmp divisor, #0 714*404b540aSrobert beq LSYM(Ldiv0) 715*404b540aSrobert mov curbit, #1 716*404b540aSrobert mov result, #0 717*404b540aSrobert 718*404b540aSrobert push { work } 719*404b540aSrobert cmp dividend, divisor 720*404b540aSrobert blo LSYM(Lgot_result) 721*404b540aSrobert 722*404b540aSrobert THUMB_DIV_MOD_BODY 0 723*404b540aSrobert 724*404b540aSrobert mov r0, result 725*404b540aSrobert pop { work } 726*404b540aSrobert RET 727*404b540aSrobert 728*404b540aSrobert#else /* ARM version. */ 729*404b540aSrobert 730*404b540aSrobert subs r2, r1, #1 731*404b540aSrobert RETc(eq) 732*404b540aSrobert bcc LSYM(Ldiv0) 733*404b540aSrobert cmp r0, r1 734*404b540aSrobert bls 11f 735*404b540aSrobert tst r1, r2 736*404b540aSrobert beq 12f 737*404b540aSrobert 738*404b540aSrobert ARM_DIV_BODY r0, r1, r2, r3 739*404b540aSrobert 740*404b540aSrobert mov r0, r2 741*404b540aSrobert RET 742*404b540aSrobert 743*404b540aSrobert11: moveq r0, #1 744*404b540aSrobert movne r0, #0 745*404b540aSrobert RET 746*404b540aSrobert 747*404b540aSrobert12: ARM_DIV2_ORDER r1, r2 748*404b540aSrobert 749*404b540aSrobert mov r0, r0, lsr r2 750*404b540aSrobert RET 751*404b540aSrobert 752*404b540aSrobert#endif /* ARM version */ 753*404b540aSrobert 754*404b540aSrobert DIV_FUNC_END udivsi3 755*404b540aSrobert 756*404b540aSrobertFUNC_START aeabi_uidivmod 757*404b540aSrobert#ifdef __thumb__ 758*404b540aSrobert push {r0, r1, lr} 759*404b540aSrobert bl SYM(__udivsi3) 760*404b540aSrobert POP {r1, r2, r3} 761*404b540aSrobert mul r2, r0 762*404b540aSrobert sub r1, r1, r2 763*404b540aSrobert bx r3 764*404b540aSrobert#else 765*404b540aSrobert stmfd sp!, { r0, r1, lr } 766*404b540aSrobert bl SYM(__udivsi3) 767*404b540aSrobert ldmfd sp!, { r1, r2, lr } 768*404b540aSrobert mul r3, r2, r0 769*404b540aSrobert sub r1, r1, r3 770*404b540aSrobert RET 771*404b540aSrobert#endif 772*404b540aSrobert FUNC_END aeabi_uidivmod 773*404b540aSrobert 774*404b540aSrobert#endif /* L_udivsi3 */ 775*404b540aSrobert/* ------------------------------------------------------------------------ */ 776*404b540aSrobert#ifdef L_umodsi3 777*404b540aSrobert 778*404b540aSrobert FUNC_START umodsi3 779*404b540aSrobert 780*404b540aSrobert#ifdef __thumb__ 781*404b540aSrobert 782*404b540aSrobert cmp divisor, #0 783*404b540aSrobert beq LSYM(Ldiv0) 784*404b540aSrobert mov curbit, #1 785*404b540aSrobert cmp dividend, divisor 786*404b540aSrobert bhs LSYM(Lover10) 787*404b540aSrobert RET 788*404b540aSrobert 789*404b540aSrobertLSYM(Lover10): 790*404b540aSrobert push { work } 791*404b540aSrobert 792*404b540aSrobert THUMB_DIV_MOD_BODY 1 793*404b540aSrobert 794*404b540aSrobert pop { work } 795*404b540aSrobert RET 796*404b540aSrobert 797*404b540aSrobert#else /* ARM version. */ 798*404b540aSrobert 799*404b540aSrobert subs r2, r1, #1 @ compare divisor with 1 800*404b540aSrobert bcc LSYM(Ldiv0) 801*404b540aSrobert cmpne r0, r1 @ compare dividend with divisor 802*404b540aSrobert moveq r0, #0 803*404b540aSrobert tsthi r1, r2 @ see if divisor is power of 2 804*404b540aSrobert andeq r0, r0, r2 805*404b540aSrobert RETc(ls) 806*404b540aSrobert 807*404b540aSrobert ARM_MOD_BODY r0, r1, r2, r3 808*404b540aSrobert 809*404b540aSrobert RET 810*404b540aSrobert 811*404b540aSrobert#endif /* ARM version. */ 812*404b540aSrobert 813*404b540aSrobert DIV_FUNC_END umodsi3 814*404b540aSrobert 815*404b540aSrobert#endif /* L_umodsi3 */ 816*404b540aSrobert/* ------------------------------------------------------------------------ */ 817*404b540aSrobert#ifdef L_divsi3 818*404b540aSrobert 819*404b540aSrobert FUNC_START divsi3 820*404b540aSrobert FUNC_ALIAS aeabi_idiv divsi3 821*404b540aSrobert 822*404b540aSrobert#ifdef __thumb__ 823*404b540aSrobert cmp divisor, #0 824*404b540aSrobert beq LSYM(Ldiv0) 825*404b540aSrobert 826*404b540aSrobert push { work } 827*404b540aSrobert mov work, dividend 828*404b540aSrobert eor work, divisor @ Save the sign of the result. 829*404b540aSrobert mov ip, work 830*404b540aSrobert mov curbit, #1 831*404b540aSrobert mov result, #0 832*404b540aSrobert cmp divisor, #0 833*404b540aSrobert bpl LSYM(Lover10) 834*404b540aSrobert neg divisor, divisor @ Loops below use unsigned. 835*404b540aSrobertLSYM(Lover10): 836*404b540aSrobert cmp dividend, #0 837*404b540aSrobert bpl LSYM(Lover11) 838*404b540aSrobert neg dividend, dividend 839*404b540aSrobertLSYM(Lover11): 840*404b540aSrobert cmp dividend, divisor 841*404b540aSrobert blo LSYM(Lgot_result) 842*404b540aSrobert 843*404b540aSrobert THUMB_DIV_MOD_BODY 0 844*404b540aSrobert 845*404b540aSrobert mov r0, result 846*404b540aSrobert mov work, ip 847*404b540aSrobert cmp work, #0 848*404b540aSrobert bpl LSYM(Lover12) 849*404b540aSrobert neg r0, r0 850*404b540aSrobertLSYM(Lover12): 851*404b540aSrobert pop { work } 852*404b540aSrobert RET 853*404b540aSrobert 854*404b540aSrobert#else /* ARM version. */ 855*404b540aSrobert 856*404b540aSrobert cmp r1, #0 857*404b540aSrobert eor ip, r0, r1 @ save the sign of the result. 858*404b540aSrobert beq LSYM(Ldiv0) 859*404b540aSrobert rsbmi r1, r1, #0 @ loops below use unsigned. 860*404b540aSrobert subs r2, r1, #1 @ division by 1 or -1 ? 861*404b540aSrobert beq 10f 862*404b540aSrobert movs r3, r0 863*404b540aSrobert rsbmi r3, r0, #0 @ positive dividend value 864*404b540aSrobert cmp r3, r1 865*404b540aSrobert bls 11f 866*404b540aSrobert tst r1, r2 @ divisor is power of 2 ? 867*404b540aSrobert beq 12f 868*404b540aSrobert 869*404b540aSrobert ARM_DIV_BODY r3, r1, r0, r2 870*404b540aSrobert 871*404b540aSrobert cmp ip, #0 872*404b540aSrobert rsbmi r0, r0, #0 873*404b540aSrobert RET 874*404b540aSrobert 875*404b540aSrobert10: teq ip, r0 @ same sign ? 876*404b540aSrobert rsbmi r0, r0, #0 877*404b540aSrobert RET 878*404b540aSrobert 879*404b540aSrobert11: movlo r0, #0 880*404b540aSrobert moveq r0, ip, asr #31 881*404b540aSrobert orreq r0, r0, #1 882*404b540aSrobert RET 883*404b540aSrobert 884*404b540aSrobert12: ARM_DIV2_ORDER r1, r2 885*404b540aSrobert 886*404b540aSrobert cmp ip, #0 887*404b540aSrobert mov r0, r3, lsr r2 888*404b540aSrobert rsbmi r0, r0, #0 889*404b540aSrobert RET 890*404b540aSrobert 891*404b540aSrobert#endif /* ARM version */ 892*404b540aSrobert 893*404b540aSrobert DIV_FUNC_END divsi3 894*404b540aSrobert 895*404b540aSrobertFUNC_START aeabi_idivmod 896*404b540aSrobert#ifdef __thumb__ 897*404b540aSrobert push {r0, r1, lr} 898*404b540aSrobert bl SYM(__divsi3) 899*404b540aSrobert POP {r1, r2, r3} 900*404b540aSrobert mul r2, r0 901*404b540aSrobert sub r1, r1, r2 902*404b540aSrobert bx r3 903*404b540aSrobert#else 904*404b540aSrobert stmfd sp!, { r0, r1, lr } 905*404b540aSrobert bl SYM(__divsi3) 906*404b540aSrobert ldmfd sp!, { r1, r2, lr } 907*404b540aSrobert mul r3, r2, r0 908*404b540aSrobert sub r1, r1, r3 909*404b540aSrobert RET 910*404b540aSrobert#endif 911*404b540aSrobert FUNC_END aeabi_idivmod 912*404b540aSrobert 913*404b540aSrobert#endif /* L_divsi3 */ 914*404b540aSrobert/* ------------------------------------------------------------------------ */ 915*404b540aSrobert#ifdef L_modsi3 916*404b540aSrobert 917*404b540aSrobert FUNC_START modsi3 918*404b540aSrobert 919*404b540aSrobert#ifdef __thumb__ 920*404b540aSrobert 921*404b540aSrobert mov curbit, #1 922*404b540aSrobert cmp divisor, #0 923*404b540aSrobert beq LSYM(Ldiv0) 924*404b540aSrobert bpl LSYM(Lover10) 925*404b540aSrobert neg divisor, divisor @ Loops below use unsigned. 926*404b540aSrobertLSYM(Lover10): 927*404b540aSrobert push { work } 928*404b540aSrobert @ Need to save the sign of the dividend, unfortunately, we need 929*404b540aSrobert @ work later on. Must do this after saving the original value of 930*404b540aSrobert @ the work register, because we will pop this value off first. 931*404b540aSrobert push { dividend } 932*404b540aSrobert cmp dividend, #0 933*404b540aSrobert bpl LSYM(Lover11) 934*404b540aSrobert neg dividend, dividend 935*404b540aSrobertLSYM(Lover11): 936*404b540aSrobert cmp dividend, divisor 937*404b540aSrobert blo LSYM(Lgot_result) 938*404b540aSrobert 939*404b540aSrobert THUMB_DIV_MOD_BODY 1 940*404b540aSrobert 941*404b540aSrobert pop { work } 942*404b540aSrobert cmp work, #0 943*404b540aSrobert bpl LSYM(Lover12) 944*404b540aSrobert neg dividend, dividend 945*404b540aSrobertLSYM(Lover12): 946*404b540aSrobert pop { work } 947*404b540aSrobert RET 948*404b540aSrobert 949*404b540aSrobert#else /* ARM version. */ 950*404b540aSrobert 951*404b540aSrobert cmp r1, #0 952*404b540aSrobert beq LSYM(Ldiv0) 953*404b540aSrobert rsbmi r1, r1, #0 @ loops below use unsigned. 954*404b540aSrobert movs ip, r0 @ preserve sign of dividend 955*404b540aSrobert rsbmi r0, r0, #0 @ if negative make positive 956*404b540aSrobert subs r2, r1, #1 @ compare divisor with 1 957*404b540aSrobert cmpne r0, r1 @ compare dividend with divisor 958*404b540aSrobert moveq r0, #0 959*404b540aSrobert tsthi r1, r2 @ see if divisor is power of 2 960*404b540aSrobert andeq r0, r0, r2 961*404b540aSrobert bls 10f 962*404b540aSrobert 963*404b540aSrobert ARM_MOD_BODY r0, r1, r2, r3 964*404b540aSrobert 965*404b540aSrobert10: cmp ip, #0 966*404b540aSrobert rsbmi r0, r0, #0 967*404b540aSrobert RET 968*404b540aSrobert 969*404b540aSrobert#endif /* ARM version */ 970*404b540aSrobert 971*404b540aSrobert DIV_FUNC_END modsi3 972*404b540aSrobert 973*404b540aSrobert#endif /* L_modsi3 */ 974*404b540aSrobert/* ------------------------------------------------------------------------ */ 975*404b540aSrobert#ifdef L_dvmd_tls 976*404b540aSrobert 977*404b540aSrobert FUNC_START div0 978*404b540aSrobert FUNC_ALIAS aeabi_idiv0 div0 979*404b540aSrobert FUNC_ALIAS aeabi_ldiv0 div0 980*404b540aSrobert 981*404b540aSrobert RET 982*404b540aSrobert 983*404b540aSrobert FUNC_END aeabi_ldiv0 984*404b540aSrobert FUNC_END aeabi_idiv0 985*404b540aSrobert FUNC_END div0 986*404b540aSrobert 987*404b540aSrobert#endif /* L_divmodsi_tools */ 988*404b540aSrobert/* ------------------------------------------------------------------------ */ 989*404b540aSrobert#ifdef L_dvmd_lnx 990*404b540aSrobert@ GNU/Linux division-by zero handler. Used in place of L_dvmd_tls 991*404b540aSrobert 992*404b540aSrobert/* Constant taken from <asm/signal.h>. */ 993*404b540aSrobert#define SIGFPE 8 994*404b540aSrobert 995*404b540aSrobert .code 32 996*404b540aSrobert FUNC_START div0 997*404b540aSrobert 998*404b540aSrobert stmfd sp!, {r1, lr} 999*404b540aSrobert mov r0, #SIGFPE 1000*404b540aSrobert bl SYM(raise) __PLT__ 1001*404b540aSrobert RETLDM r1 1002*404b540aSrobert 1003*404b540aSrobert FUNC_END div0 1004*404b540aSrobert 1005*404b540aSrobert#endif /* L_dvmd_lnx */ 1006*404b540aSrobert/* ------------------------------------------------------------------------ */ 1007*404b540aSrobert/* Dword shift operations. */ 1008*404b540aSrobert/* All the following Dword shift variants rely on the fact that 1009*404b540aSrobert shft xxx, Reg 1010*404b540aSrobert is in fact done as 1011*404b540aSrobert shft xxx, (Reg & 255) 1012*404b540aSrobert so for Reg value in (32...63) and (-1...-31) we will get zero (in the 1013*404b540aSrobert case of logical shifts) or the sign (for asr). */ 1014*404b540aSrobert 1015*404b540aSrobert#ifdef __ARMEB__ 1016*404b540aSrobert#define al r1 1017*404b540aSrobert#define ah r0 1018*404b540aSrobert#else 1019*404b540aSrobert#define al r0 1020*404b540aSrobert#define ah r1 1021*404b540aSrobert#endif 1022*404b540aSrobert 1023*404b540aSrobert/* Prevent __aeabi double-word shifts from being produced on SymbianOS. */ 1024*404b540aSrobert#ifndef __symbian__ 1025*404b540aSrobert 1026*404b540aSrobert#ifdef L_lshrdi3 1027*404b540aSrobert 1028*404b540aSrobert FUNC_START lshrdi3 1029*404b540aSrobert FUNC_ALIAS aeabi_llsr lshrdi3 1030*404b540aSrobert 1031*404b540aSrobert#ifdef __thumb__ 1032*404b540aSrobert lsr al, r2 1033*404b540aSrobert mov r3, ah 1034*404b540aSrobert lsr ah, r2 1035*404b540aSrobert mov ip, r3 1036*404b540aSrobert sub r2, #32 1037*404b540aSrobert lsr r3, r2 1038*404b540aSrobert orr al, r3 1039*404b540aSrobert neg r2, r2 1040*404b540aSrobert mov r3, ip 1041*404b540aSrobert lsl r3, r2 1042*404b540aSrobert orr al, r3 1043*404b540aSrobert RET 1044*404b540aSrobert#else 1045*404b540aSrobert subs r3, r2, #32 1046*404b540aSrobert rsb ip, r2, #32 1047*404b540aSrobert movmi al, al, lsr r2 1048*404b540aSrobert movpl al, ah, lsr r3 1049*404b540aSrobert orrmi al, al, ah, lsl ip 1050*404b540aSrobert mov ah, ah, lsr r2 1051*404b540aSrobert RET 1052*404b540aSrobert#endif 1053*404b540aSrobert FUNC_END aeabi_llsr 1054*404b540aSrobert FUNC_END lshrdi3 1055*404b540aSrobert 1056*404b540aSrobert#endif 1057*404b540aSrobert 1058*404b540aSrobert#ifdef L_ashrdi3 1059*404b540aSrobert 1060*404b540aSrobert FUNC_START ashrdi3 1061*404b540aSrobert FUNC_ALIAS aeabi_lasr ashrdi3 1062*404b540aSrobert 1063*404b540aSrobert#ifdef __thumb__ 1064*404b540aSrobert lsr al, r2 1065*404b540aSrobert mov r3, ah 1066*404b540aSrobert asr ah, r2 1067*404b540aSrobert sub r2, #32 1068*404b540aSrobert @ If r2 is negative at this point the following step would OR 1069*404b540aSrobert @ the sign bit into all of AL. That's not what we want... 1070*404b540aSrobert bmi 1f 1071*404b540aSrobert mov ip, r3 1072*404b540aSrobert asr r3, r2 1073*404b540aSrobert orr al, r3 1074*404b540aSrobert mov r3, ip 1075*404b540aSrobert1: 1076*404b540aSrobert neg r2, r2 1077*404b540aSrobert lsl r3, r2 1078*404b540aSrobert orr al, r3 1079*404b540aSrobert RET 1080*404b540aSrobert#else 1081*404b540aSrobert subs r3, r2, #32 1082*404b540aSrobert rsb ip, r2, #32 1083*404b540aSrobert movmi al, al, lsr r2 1084*404b540aSrobert movpl al, ah, asr r3 1085*404b540aSrobert orrmi al, al, ah, lsl ip 1086*404b540aSrobert mov ah, ah, asr r2 1087*404b540aSrobert RET 1088*404b540aSrobert#endif 1089*404b540aSrobert 1090*404b540aSrobert FUNC_END aeabi_lasr 1091*404b540aSrobert FUNC_END ashrdi3 1092*404b540aSrobert 1093*404b540aSrobert#endif 1094*404b540aSrobert 1095*404b540aSrobert#ifdef L_ashldi3 1096*404b540aSrobert 1097*404b540aSrobert FUNC_START ashldi3 1098*404b540aSrobert FUNC_ALIAS aeabi_llsl ashldi3 1099*404b540aSrobert 1100*404b540aSrobert#ifdef __thumb__ 1101*404b540aSrobert lsl ah, r2 1102*404b540aSrobert mov r3, al 1103*404b540aSrobert lsl al, r2 1104*404b540aSrobert mov ip, r3 1105*404b540aSrobert sub r2, #32 1106*404b540aSrobert lsl r3, r2 1107*404b540aSrobert orr ah, r3 1108*404b540aSrobert neg r2, r2 1109*404b540aSrobert mov r3, ip 1110*404b540aSrobert lsr r3, r2 1111*404b540aSrobert orr ah, r3 1112*404b540aSrobert RET 1113*404b540aSrobert#else 1114*404b540aSrobert subs r3, r2, #32 1115*404b540aSrobert rsb ip, r2, #32 1116*404b540aSrobert movmi ah, ah, lsl r2 1117*404b540aSrobert movpl ah, al, lsl r3 1118*404b540aSrobert orrmi ah, ah, al, lsr ip 1119*404b540aSrobert mov al, al, lsl r2 1120*404b540aSrobert RET 1121*404b540aSrobert#endif 1122*404b540aSrobert FUNC_END aeabi_llsl 1123*404b540aSrobert FUNC_END ashldi3 1124*404b540aSrobert 1125*404b540aSrobert#endif 1126*404b540aSrobert 1127*404b540aSrobert#endif /* __symbian__ */ 1128*404b540aSrobert 1129*404b540aSrobert/* ------------------------------------------------------------------------ */ 1130*404b540aSrobert/* These next two sections are here despite the fact that they contain Thumb 1131*404b540aSrobert assembler because their presence allows interworked code to be linked even 1132*404b540aSrobert when the GCC library is this one. */ 1133*404b540aSrobert 1134*404b540aSrobert/* Do not build the interworking functions when the target architecture does 1135*404b540aSrobert not support Thumb instructions. (This can be a multilib option). */ 1136*404b540aSrobert#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\ 1137*404b540aSrobert || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \ 1138*404b540aSrobert || __ARM_ARCH__ >= 6 1139*404b540aSrobert 1140*404b540aSrobert#if defined L_call_via_rX 1141*404b540aSrobert 1142*404b540aSrobert/* These labels & instructions are used by the Arm/Thumb interworking code. 1143*404b540aSrobert The address of function to be called is loaded into a register and then 1144*404b540aSrobert one of these labels is called via a BL instruction. This puts the 1145*404b540aSrobert return address into the link register with the bottom bit set, and the 1146*404b540aSrobert code here switches to the correct mode before executing the function. */ 1147*404b540aSrobert 1148*404b540aSrobert .text 1149*404b540aSrobert .align 0 1150*404b540aSrobert .force_thumb 1151*404b540aSrobert 1152*404b540aSrobert.macro call_via register 1153*404b540aSrobert THUMB_FUNC_START _call_via_\register 1154*404b540aSrobert 1155*404b540aSrobert bx \register 1156*404b540aSrobert nop 1157*404b540aSrobert 1158*404b540aSrobert SIZE (_call_via_\register) 1159*404b540aSrobert.endm 1160*404b540aSrobert 1161*404b540aSrobert call_via r0 1162*404b540aSrobert call_via r1 1163*404b540aSrobert call_via r2 1164*404b540aSrobert call_via r3 1165*404b540aSrobert call_via r4 1166*404b540aSrobert call_via r5 1167*404b540aSrobert call_via r6 1168*404b540aSrobert call_via r7 1169*404b540aSrobert call_via r8 1170*404b540aSrobert call_via r9 1171*404b540aSrobert call_via sl 1172*404b540aSrobert call_via fp 1173*404b540aSrobert call_via ip 1174*404b540aSrobert call_via sp 1175*404b540aSrobert call_via lr 1176*404b540aSrobert 1177*404b540aSrobert#endif /* L_call_via_rX */ 1178*404b540aSrobert 1179*404b540aSrobert#if defined L_interwork_call_via_rX 1180*404b540aSrobert 1181*404b540aSrobert/* These labels & instructions are used by the Arm/Thumb interworking code, 1182*404b540aSrobert when the target address is in an unknown instruction set. The address 1183*404b540aSrobert of function to be called is loaded into a register and then one of these 1184*404b540aSrobert labels is called via a BL instruction. This puts the return address 1185*404b540aSrobert into the link register with the bottom bit set, and the code here 1186*404b540aSrobert switches to the correct mode before executing the function. Unfortunately 1187*404b540aSrobert the target code cannot be relied upon to return via a BX instruction, so 1188*404b540aSrobert instead we have to store the resturn address on the stack and allow the 1189*404b540aSrobert called function to return here instead. Upon return we recover the real 1190*404b540aSrobert return address and use a BX to get back to Thumb mode. 1191*404b540aSrobert 1192*404b540aSrobert There are three variations of this code. The first, 1193*404b540aSrobert _interwork_call_via_rN(), will push the return address onto the 1194*404b540aSrobert stack and pop it in _arm_return(). It should only be used if all 1195*404b540aSrobert arguments are passed in registers. 1196*404b540aSrobert 1197*404b540aSrobert The second, _interwork_r7_call_via_rN(), instead stores the return 1198*404b540aSrobert address at [r7, #-4]. It is the caller's responsibility to ensure 1199*404b540aSrobert that this address is valid and contains no useful data. 1200*404b540aSrobert 1201*404b540aSrobert The third, _interwork_r11_call_via_rN(), works in the same way but 1202*404b540aSrobert uses r11 instead of r7. It is useful if the caller does not really 1203*404b540aSrobert need a frame pointer. */ 1204*404b540aSrobert 1205*404b540aSrobert .text 1206*404b540aSrobert .align 0 1207*404b540aSrobert 1208*404b540aSrobert .code 32 1209*404b540aSrobert .globl _arm_return 1210*404b540aSrobertLSYM(Lstart_arm_return): 1211*404b540aSrobert cfi_start LSYM(Lstart_arm_return) LSYM(Lend_arm_return) 1212*404b540aSrobert cfi_push 0, 0xe, -0x8, 0x8 1213*404b540aSrobert nop @ This nop is for the benefit of debuggers, so that 1214*404b540aSrobert @ backtraces will use the correct unwind information. 1215*404b540aSrobert_arm_return: 1216*404b540aSrobert RETLDM unwind=LSYM(Lstart_arm_return) 1217*404b540aSrobert cfi_end LSYM(Lend_arm_return) 1218*404b540aSrobert 1219*404b540aSrobert .globl _arm_return_r7 1220*404b540aSrobert_arm_return_r7: 1221*404b540aSrobert ldr lr, [r7, #-4] 1222*404b540aSrobert bx lr 1223*404b540aSrobert 1224*404b540aSrobert .globl _arm_return_r11 1225*404b540aSrobert_arm_return_r11: 1226*404b540aSrobert ldr lr, [r11, #-4] 1227*404b540aSrobert bx lr 1228*404b540aSrobert 1229*404b540aSrobert.macro interwork_with_frame frame, register, name, return 1230*404b540aSrobert .code 16 1231*404b540aSrobert 1232*404b540aSrobert THUMB_FUNC_START \name 1233*404b540aSrobert 1234*404b540aSrobert bx pc 1235*404b540aSrobert nop 1236*404b540aSrobert 1237*404b540aSrobert .code 32 1238*404b540aSrobert tst \register, #1 1239*404b540aSrobert streq lr, [\frame, #-4] 1240*404b540aSrobert adreq lr, _arm_return_\frame 1241*404b540aSrobert bx \register 1242*404b540aSrobert 1243*404b540aSrobert SIZE (\name) 1244*404b540aSrobert.endm 1245*404b540aSrobert 1246*404b540aSrobert.macro interwork register 1247*404b540aSrobert .code 16 1248*404b540aSrobert 1249*404b540aSrobert THUMB_FUNC_START _interwork_call_via_\register 1250*404b540aSrobert 1251*404b540aSrobert bx pc 1252*404b540aSrobert nop 1253*404b540aSrobert 1254*404b540aSrobert .code 32 1255*404b540aSrobert .globl LSYM(Lchange_\register) 1256*404b540aSrobertLSYM(Lchange_\register): 1257*404b540aSrobert tst \register, #1 1258*404b540aSrobert streq lr, [sp, #-8]! 1259*404b540aSrobert adreq lr, _arm_return 1260*404b540aSrobert bx \register 1261*404b540aSrobert 1262*404b540aSrobert SIZE (_interwork_call_via_\register) 1263*404b540aSrobert 1264*404b540aSrobert interwork_with_frame r7,\register,_interwork_r7_call_via_\register 1265*404b540aSrobert interwork_with_frame r11,\register,_interwork_r11_call_via_\register 1266*404b540aSrobert.endm 1267*404b540aSrobert 1268*404b540aSrobert interwork r0 1269*404b540aSrobert interwork r1 1270*404b540aSrobert interwork r2 1271*404b540aSrobert interwork r3 1272*404b540aSrobert interwork r4 1273*404b540aSrobert interwork r5 1274*404b540aSrobert interwork r6 1275*404b540aSrobert interwork r7 1276*404b540aSrobert interwork r8 1277*404b540aSrobert interwork r9 1278*404b540aSrobert interwork sl 1279*404b540aSrobert interwork fp 1280*404b540aSrobert interwork ip 1281*404b540aSrobert interwork sp 1282*404b540aSrobert 1283*404b540aSrobert /* The LR case has to be handled a little differently... */ 1284*404b540aSrobert .code 16 1285*404b540aSrobert 1286*404b540aSrobert THUMB_FUNC_START _interwork_call_via_lr 1287*404b540aSrobert 1288*404b540aSrobert bx pc 1289*404b540aSrobert nop 1290*404b540aSrobert 1291*404b540aSrobert .code 32 1292*404b540aSrobert .globl .Lchange_lr 1293*404b540aSrobert.Lchange_lr: 1294*404b540aSrobert tst lr, #1 1295*404b540aSrobert stmeqdb r13!, {lr, pc} 1296*404b540aSrobert mov ip, lr 1297*404b540aSrobert adreq lr, _arm_return 1298*404b540aSrobert bx ip 1299*404b540aSrobert 1300*404b540aSrobert SIZE (_interwork_call_via_lr) 1301*404b540aSrobert 1302*404b540aSrobert#endif /* L_interwork_call_via_rX */ 1303*404b540aSrobert#endif /* Arch supports thumb. */ 1304*404b540aSrobert 1305*404b540aSrobert#ifndef __symbian__ 1306*404b540aSrobert#include "ieee754-df.S" 1307*404b540aSrobert#include "ieee754-sf.S" 1308*404b540aSrobert#include "bpabi.S" 1309*404b540aSrobert#endif /* __symbian__ */ 1310