config/arm/lib1funcs.asm

*404b540aSrobert@ libgcc routines for ARM cpu.
*404b540aSrobert@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
*404b540aSrobert
*404b540aSrobert/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
*404b540aSrobert   Free Software Foundation, Inc.
*404b540aSrobert
*404b540aSrobertThis file is free software; you can redistribute it and/or modify it
*404b540aSrobertunder the terms of the GNU General Public License as published by the
*404b540aSrobertFree Software Foundation; either version 2, or (at your option) any
*404b540aSrobertlater version.
*404b540aSrobert
*404b540aSrobertIn addition to the permissions in the GNU General Public License, the
*404b540aSrobertFree Software Foundation gives you unlimited permission to link the
*404b540aSrobertcompiled version of this file into combinations with other programs,
*404b540aSrobertand to distribute those combinations without any restriction coming
*404b540aSrobertfrom the use of this file.  (The General Public License restrictions
*404b540aSrobertdo apply in other respects; for example, they cover modification of
*404b540aSrobertthe file, and distribution when not linked into a combine
*404b540aSrobertexecutable.)
*404b540aSrobert
*404b540aSrobertThis file is distributed in the hope that it will be useful, but
*404b540aSrobertWITHOUT ANY WARRANTY; without even the implied warranty of
*404b540aSrobertMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
*404b540aSrobertGeneral Public License for more details.
*404b540aSrobert
*404b540aSrobertYou should have received a copy of the GNU General Public License
*404b540aSrobertalong with this program; see the file COPYING.  If not, write to
*404b540aSrobertthe Free Software Foundation, 51 Franklin Street, Fifth Floor,
*404b540aSrobertBoston, MA 02110-1301, USA.  */
*404b540aSrobert/* ------------------------------------------------------------------------ */
*404b540aSrobert
*404b540aSrobert/* We need to know what prefix to add to function names.  */
*404b540aSrobert
*404b540aSrobert#ifndef __USER_LABEL_PREFIX__
*404b540aSrobert#error  __USER_LABEL_PREFIX__ not defined
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert/* ANSI concatenation macros.  */
*404b540aSrobert
*404b540aSrobert#define CONCAT1(a, b) CONCAT2(a, b)
*404b540aSrobert#define CONCAT2(a, b) a ## b
*404b540aSrobert
*404b540aSrobert/* Use the right prefix for global labels.  */
*404b540aSrobert
*404b540aSrobert#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
*404b540aSrobert
*404b540aSrobert#ifdef __ELF__
*404b540aSrobert#ifdef __thumb__
*404b540aSrobert#define __PLT__  /* Not supported in Thumb assembler (for now).  */
*404b540aSrobert#else
*404b540aSrobert#define __PLT__ (PLT)
*404b540aSrobert#endif
*404b540aSrobert#define TYPE(x) .type SYM(x),function
*404b540aSrobert#define SIZE(x) .size SYM(x), . - SYM(x)
*404b540aSrobert#define LSYM(x) .x
*404b540aSrobert#else
*404b540aSrobert#define __PLT__
*404b540aSrobert#define TYPE(x)
*404b540aSrobert#define SIZE(x)
*404b540aSrobert#define LSYM(x) x
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert/* Function end macros.  Variants for interworking.  */
*404b540aSrobert
*404b540aSrobert@ This selects the minimum architecture level required.
*404b540aSrobert#define __ARM_ARCH__ 3
*404b540aSrobert
*404b540aSrobert#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
*404b540aSrobert	|| defined(__ARM_ARCH_4T__)
*404b540aSrobert/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
*404b540aSrobert   long multiply instructions.  That includes v3M.  */
*404b540aSrobert# undef __ARM_ARCH__
*404b540aSrobert# define __ARM_ARCH__ 4
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
*404b540aSrobert	|| defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
*404b540aSrobert	|| defined(__ARM_ARCH_5TEJ__)
*404b540aSrobert# undef __ARM_ARCH__
*404b540aSrobert# define __ARM_ARCH__ 5
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
*404b540aSrobert	|| defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
*404b540aSrobert	|| defined(__ARM_ARCH_6ZK__)
*404b540aSrobert# undef __ARM_ARCH__
*404b540aSrobert# define __ARM_ARCH__ 6
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert#ifndef __ARM_ARCH__
*404b540aSrobert#error Unable to determine architecture.
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert/* How to return from a function call depends on the architecture variant.  */
*404b540aSrobert
*404b540aSrobert#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
*404b540aSrobert
*404b540aSrobert# define RET		bx	lr
*404b540aSrobert# define RETc(x)	bx##x	lr
*404b540aSrobert
*404b540aSrobert/* Special precautions for interworking on armv4t.  */
*404b540aSrobert# if (__ARM_ARCH__ == 4)
*404b540aSrobert
*404b540aSrobert/* Always use bx, not ldr pc.  */
*404b540aSrobert#  if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
*404b540aSrobert#    define __INTERWORKING__
*404b540aSrobert#   endif /* __THUMB__ || __THUMB_INTERWORK__ */
*404b540aSrobert
*404b540aSrobert/* Include thumb stub before arm mode code.  */
*404b540aSrobert#  if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
*404b540aSrobert#   define __INTERWORKING_STUBS__
*404b540aSrobert#  endif /* __thumb__ && !__THUMB_INTERWORK__ */
*404b540aSrobert
*404b540aSrobert#endif /* __ARM_ARCH == 4 */
*404b540aSrobert
*404b540aSrobert#else
*404b540aSrobert
*404b540aSrobert# define RET		mov	pc, lr
*404b540aSrobert# define RETc(x)	mov##x	pc, lr
*404b540aSrobert
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert.macro	cfi_pop		advance, reg, cfa_offset
*404b540aSrobert#ifdef __ELF__
*404b540aSrobert	.pushsection	.debug_frame
*404b540aSrobert	.byte	0x4		/* DW_CFA_advance_loc4 */
*404b540aSrobert	.4byte	\advance
*404b540aSrobert	.byte	(0xc0 | \reg)	/* DW_CFA_restore */
*404b540aSrobert	.byte	0xe		/* DW_CFA_def_cfa_offset */
*404b540aSrobert	.uleb128 \cfa_offset
*404b540aSrobert	.popsection
*404b540aSrobert#endif
*404b540aSrobert.endm
*404b540aSrobert.macro	cfi_push	advance, reg, offset, cfa_offset
*404b540aSrobert#ifdef __ELF__
*404b540aSrobert	.pushsection	.debug_frame
*404b540aSrobert	.byte	0x4		/* DW_CFA_advance_loc4 */
*404b540aSrobert	.4byte	\advance
*404b540aSrobert	.byte	(0x80 | \reg)	/* DW_CFA_offset */
*404b540aSrobert	.uleb128 (\offset / -4)
*404b540aSrobert	.byte	0xe		/* DW_CFA_def_cfa_offset */
*404b540aSrobert	.uleb128 \cfa_offset
*404b540aSrobert	.popsection
*404b540aSrobert#endif
*404b540aSrobert.endm
*404b540aSrobert.macro cfi_start	start_label, end_label
*404b540aSrobert#ifdef __ELF__
*404b540aSrobert	.pushsection	.debug_frame
*404b540aSrobertLSYM(Lstart_frame):
*404b540aSrobert	.4byte	LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
*404b540aSrobertLSYM(Lstart_cie):
*404b540aSrobert        .4byte	0xffffffff	@ CIE Identifier Tag
*404b540aSrobert        .byte	0x1	@ CIE Version
*404b540aSrobert        .ascii	"\0"	@ CIE Augmentation
*404b540aSrobert        .uleb128 0x1	@ CIE Code Alignment Factor
*404b540aSrobert        .sleb128 -4	@ CIE Data Alignment Factor
*404b540aSrobert        .byte	0xe	@ CIE RA Column
*404b540aSrobert        .byte	0xc	@ DW_CFA_def_cfa
*404b540aSrobert        .uleb128 0xd
*404b540aSrobert        .uleb128 0x0
*404b540aSrobert
*404b540aSrobert	.align 2
*404b540aSrobertLSYM(Lend_cie):
*404b540aSrobert	.4byte	LSYM(Lend_fde)-LSYM(Lstart_fde)	@ FDE Length
*404b540aSrobertLSYM(Lstart_fde):
*404b540aSrobert	.4byte	LSYM(Lstart_frame)	@ FDE CIE offset
*404b540aSrobert	.4byte	\start_label	@ FDE initial location
*404b540aSrobert	.4byte	\end_label-\start_label	@ FDE address range
*404b540aSrobert	.popsection
*404b540aSrobert#endif
*404b540aSrobert.endm
*404b540aSrobert.macro cfi_end	end_label
*404b540aSrobert#ifdef __ELF__
*404b540aSrobert	.pushsection	.debug_frame
*404b540aSrobert	.align	2
*404b540aSrobertLSYM(Lend_fde):
*404b540aSrobert	.popsection
*404b540aSrobert\end_label:
*404b540aSrobert#endif
*404b540aSrobert.endm
*404b540aSrobert
*404b540aSrobert/* Don't pass dirn, it's there just to get token pasting right.  */
*404b540aSrobert
*404b540aSrobert.macro	RETLDM	regs=, cond=, unwind=, dirn=ia
*404b540aSrobert#if defined (__INTERWORKING__)
*404b540aSrobert	.ifc "\regs",""
*404b540aSrobert	ldr\cond	lr, [sp], #8
*404b540aSrobert	.else
*404b540aSrobert	ldm\cond\dirn	sp!, {\regs, lr}
*404b540aSrobert	.endif
*404b540aSrobert	.ifnc "\unwind", ""
*404b540aSrobert	/* Mark LR as restored.  */
*404b540aSrobert97:	cfi_pop 97b - \unwind, 0xe, 0x0
*404b540aSrobert	.endif
*404b540aSrobert	bx\cond	lr
*404b540aSrobert#else
*404b540aSrobert	.ifc "\regs",""
*404b540aSrobert	ldr\cond	pc, [sp], #8
*404b540aSrobert	.else
*404b540aSrobert	ldm\cond\dirn	sp!, {\regs, pc}
*404b540aSrobert	.endif
*404b540aSrobert#endif
*404b540aSrobert.endm
*404b540aSrobert
*404b540aSrobert
*404b540aSrobert.macro ARM_LDIV0 name
*404b540aSrobert	str	lr, [sp, #-8]!
*404b540aSrobert98:	cfi_push 98b - __\name, 0xe, -0x8, 0x8
*404b540aSrobert	bl	SYM (__div0) __PLT__
*404b540aSrobert	mov	r0, #0			@ About as wrong as it could be.
*404b540aSrobert	RETLDM	unwind=98b
*404b540aSrobert.endm
*404b540aSrobert
*404b540aSrobert
*404b540aSrobert.macro THUMB_LDIV0 name
*404b540aSrobert	push	{ r1, lr }
*404b540aSrobert98:	cfi_push 98b - __\name, 0xe, -0x4, 0x8
*404b540aSrobert	bl	SYM (__div0)
*404b540aSrobert	mov	r0, #0			@ About as wrong as it could be.
*404b540aSrobert#if defined (__INTERWORKING__)
*404b540aSrobert	pop	{ r1, r2 }
*404b540aSrobert	bx	r2
*404b540aSrobert#else
*404b540aSrobert	pop	{ r1, pc }
*404b540aSrobert#endif
*404b540aSrobert.endm
*404b540aSrobert
*404b540aSrobert.macro FUNC_END name
*404b540aSrobert	SIZE (__\name)
*404b540aSrobert.endm
*404b540aSrobert
*404b540aSrobert.macro DIV_FUNC_END name
*404b540aSrobert	cfi_start	__\name, LSYM(Lend_div0)
*404b540aSrobertLSYM(Ldiv0):
*404b540aSrobert#ifdef __thumb__
*404b540aSrobert	THUMB_LDIV0 \name
*404b540aSrobert#else
*404b540aSrobert	ARM_LDIV0 \name
*404b540aSrobert#endif
*404b540aSrobert	cfi_end	LSYM(Lend_div0)
*404b540aSrobert	FUNC_END \name
*404b540aSrobert.endm
*404b540aSrobert
*404b540aSrobert.macro THUMB_FUNC_START name
*404b540aSrobert	.globl	SYM (\name)
*404b540aSrobert	TYPE	(\name)
*404b540aSrobert	.thumb_func
*404b540aSrobertSYM (\name):
*404b540aSrobert.endm
*404b540aSrobert
*404b540aSrobert/* Function start macros.  Variants for ARM and Thumb.  */
*404b540aSrobert
*404b540aSrobert#ifdef __thumb__
*404b540aSrobert#define THUMB_FUNC .thumb_func
*404b540aSrobert#define THUMB_CODE .force_thumb
*404b540aSrobert#else
*404b540aSrobert#define THUMB_FUNC
*404b540aSrobert#define THUMB_CODE
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert.macro FUNC_START name
*404b540aSrobert	.text
*404b540aSrobert	.globl SYM (__\name)
*404b540aSrobert	TYPE (__\name)
*404b540aSrobert	.align 0
*404b540aSrobert	THUMB_CODE
*404b540aSrobert	THUMB_FUNC
*404b540aSrobertSYM (__\name):
*404b540aSrobert.endm
*404b540aSrobert
*404b540aSrobert/* Special function that will always be coded in ARM assembly, even if
*404b540aSrobert   in Thumb-only compilation.  */
*404b540aSrobert
*404b540aSrobert#if defined(__INTERWORKING_STUBS__)
*404b540aSrobert.macro	ARM_FUNC_START name
*404b540aSrobert	FUNC_START \name
*404b540aSrobert	bx	pc
*404b540aSrobert	nop
*404b540aSrobert	.arm
*404b540aSrobert/* A hook to tell gdb that we've switched to ARM mode.  Also used to call
*404b540aSrobert   directly from other local arm routines.  */
*404b540aSrobert_L__\name:
*404b540aSrobert.endm
*404b540aSrobert#define EQUIV .thumb_set
*404b540aSrobert/* Branch directly to a function declared with ARM_FUNC_START.
*404b540aSrobert   Must be called in arm mode.  */
*404b540aSrobert.macro  ARM_CALL name
*404b540aSrobert	bl	_L__\name
*404b540aSrobert.endm
*404b540aSrobert#else
*404b540aSrobert.macro	ARM_FUNC_START name
*404b540aSrobert	.text
*404b540aSrobert	.globl SYM (__\name)
*404b540aSrobert	TYPE (__\name)
*404b540aSrobert	.align 0
*404b540aSrobert	.arm
*404b540aSrobertSYM (__\name):
*404b540aSrobert.endm
*404b540aSrobert#define EQUIV .set
*404b540aSrobert.macro  ARM_CALL name
*404b540aSrobert	bl	__\name
*404b540aSrobert.endm
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert.macro	FUNC_ALIAS new old
*404b540aSrobert	.globl	SYM (__\new)
*404b540aSrobert#if defined (__thumb__)
*404b540aSrobert	.thumb_set	SYM (__\new), SYM (__\old)
*404b540aSrobert#else
*404b540aSrobert	.set	SYM (__\new), SYM (__\old)
*404b540aSrobert#endif
*404b540aSrobert.endm
*404b540aSrobert
*404b540aSrobert.macro	ARM_FUNC_ALIAS new old
*404b540aSrobert	.globl	SYM (__\new)
*404b540aSrobert	EQUIV	SYM (__\new), SYM (__\old)
*404b540aSrobert#if defined(__INTERWORKING_STUBS__)
*404b540aSrobert	.set	SYM (_L__\new), SYM (_L__\old)
*404b540aSrobert#endif
*404b540aSrobert.endm
*404b540aSrobert
*404b540aSrobert#ifdef __thumb__
*404b540aSrobert/* Register aliases.  */
*404b540aSrobert
*404b540aSrobertwork		.req	r4	@ XXXX is this safe ?
*404b540aSrobertdividend	.req	r0
*404b540aSrobertdivisor		.req	r1
*404b540aSrobertoverdone	.req	r2
*404b540aSrobertresult		.req	r2
*404b540aSrobertcurbit		.req	r3
*404b540aSrobert#endif
*404b540aSrobert#if 0
*404b540aSrobertip		.req	r12
*404b540aSrobertsp		.req	r13
*404b540aSrobertlr		.req	r14
*404b540aSrobertpc		.req	r15
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert/* ------------------------------------------------------------------------ */
*404b540aSrobert/*		Bodies of the division and modulo routines.		    */
*404b540aSrobert/* ------------------------------------------------------------------------ */
*404b540aSrobert.macro ARM_DIV_BODY dividend, divisor, result, curbit
*404b540aSrobert
*404b540aSrobert#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
*404b540aSrobert
*404b540aSrobert	clz	\curbit, \dividend
*404b540aSrobert	clz	\result, \divisor
*404b540aSrobert	sub	\curbit, \result, \curbit
*404b540aSrobert	rsbs	\curbit, \curbit, #31
*404b540aSrobert	addne	\curbit, \curbit, \curbit, lsl #1
*404b540aSrobert	mov	\result, #0
*404b540aSrobert	addne	pc, pc, \curbit, lsl #2
*404b540aSrobert	nop
*404b540aSrobert	.set	shift, 32
*404b540aSrobert	.rept	32
*404b540aSrobert	.set	shift, shift - 1
*404b540aSrobert	cmp	\dividend, \divisor, lsl #shift
*404b540aSrobert	adc	\result, \result, \result
*404b540aSrobert	subcs	\dividend, \dividend, \divisor, lsl #shift
*404b540aSrobert	.endr
*404b540aSrobert
*404b540aSrobert#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
*404b540aSrobert#if __ARM_ARCH__ >= 5
*404b540aSrobert
*404b540aSrobert	clz	\curbit, \divisor
*404b540aSrobert	clz	\result, \dividend
*404b540aSrobert	sub	\result, \curbit, \result
*404b540aSrobert	mov	\curbit, #1
*404b540aSrobert	mov	\divisor, \divisor, lsl \result
*404b540aSrobert	mov	\curbit, \curbit, lsl \result
*404b540aSrobert	mov	\result, #0
*404b540aSrobert
*404b540aSrobert#else /* __ARM_ARCH__ < 5 */
*404b540aSrobert
*404b540aSrobert	@ Initially shift the divisor left 3 bits if possible,
*404b540aSrobert	@ set curbit accordingly.  This allows for curbit to be located
*404b540aSrobert	@ at the left end of each 4 bit nibbles in the division loop
*404b540aSrobert	@ to save one loop in most cases.
*404b540aSrobert	tst	\divisor, #0xe0000000
*404b540aSrobert	moveq	\divisor, \divisor, lsl #3
*404b540aSrobert	moveq	\curbit, #8
*404b540aSrobert	movne	\curbit, #1
*404b540aSrobert
*404b540aSrobert	@ Unless the divisor is very big, shift it up in multiples of
*404b540aSrobert	@ four bits, since this is the amount of unwinding in the main
*404b540aSrobert	@ division loop.  Continue shifting until the divisor is
*404b540aSrobert	@ larger than the dividend.
*404b540aSrobert1:	cmp	\divisor, #0x10000000
*404b540aSrobert	cmplo	\divisor, \dividend
*404b540aSrobert	movlo	\divisor, \divisor, lsl #4
*404b540aSrobert	movlo	\curbit, \curbit, lsl #4
*404b540aSrobert	blo	1b
*404b540aSrobert
*404b540aSrobert	@ For very big divisors, we must shift it a bit at a time, or
*404b540aSrobert	@ we will be in danger of overflowing.
*404b540aSrobert1:	cmp	\divisor, #0x80000000
*404b540aSrobert	cmplo	\divisor, \dividend
*404b540aSrobert	movlo	\divisor, \divisor, lsl #1
*404b540aSrobert	movlo	\curbit, \curbit, lsl #1
*404b540aSrobert	blo	1b
*404b540aSrobert
*404b540aSrobert	mov	\result, #0
*404b540aSrobert
*404b540aSrobert#endif /* __ARM_ARCH__ < 5 */
*404b540aSrobert
*404b540aSrobert	@ Division loop
*404b540aSrobert1:	cmp	\dividend, \divisor
*404b540aSrobert	subhs	\dividend, \dividend, \divisor
*404b540aSrobert	orrhs	\result,   \result,   \curbit
*404b540aSrobert	cmp	\dividend, \divisor,  lsr #1
*404b540aSrobert	subhs	\dividend, \dividend, \divisor, lsr #1
*404b540aSrobert	orrhs	\result,   \result,   \curbit,  lsr #1
*404b540aSrobert	cmp	\dividend, \divisor,  lsr #2
*404b540aSrobert	subhs	\dividend, \dividend, \divisor, lsr #2
*404b540aSrobert	orrhs	\result,   \result,   \curbit,  lsr #2
*404b540aSrobert	cmp	\dividend, \divisor,  lsr #3
*404b540aSrobert	subhs	\dividend, \dividend, \divisor, lsr #3
*404b540aSrobert	orrhs	\result,   \result,   \curbit,  lsr #3
*404b540aSrobert	cmp	\dividend, #0			@ Early termination?
*404b540aSrobert	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
*404b540aSrobert	movne	\divisor,  \divisor, lsr #4
*404b540aSrobert	bne	1b
*404b540aSrobert
*404b540aSrobert#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
*404b540aSrobert
*404b540aSrobert.endm
*404b540aSrobert/* ------------------------------------------------------------------------ */
*404b540aSrobert.macro ARM_DIV2_ORDER divisor, order
*404b540aSrobert
*404b540aSrobert#if __ARM_ARCH__ >= 5
*404b540aSrobert
*404b540aSrobert	clz	\order, \divisor
*404b540aSrobert	rsb	\order, \order, #31
*404b540aSrobert
*404b540aSrobert#else
*404b540aSrobert
*404b540aSrobert	cmp	\divisor, #(1 << 16)
*404b540aSrobert	movhs	\divisor, \divisor, lsr #16
*404b540aSrobert	movhs	\order, #16
*404b540aSrobert	movlo	\order, #0
*404b540aSrobert
*404b540aSrobert	cmp	\divisor, #(1 << 8)
*404b540aSrobert	movhs	\divisor, \divisor, lsr #8
*404b540aSrobert	addhs	\order, \order, #8
*404b540aSrobert
*404b540aSrobert	cmp	\divisor, #(1 << 4)
*404b540aSrobert	movhs	\divisor, \divisor, lsr #4
*404b540aSrobert	addhs	\order, \order, #4
*404b540aSrobert
*404b540aSrobert	cmp	\divisor, #(1 << 2)
*404b540aSrobert	addhi	\order, \order, #3
*404b540aSrobert	addls	\order, \order, \divisor, lsr #1
*404b540aSrobert
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert.endm
*404b540aSrobert/* ------------------------------------------------------------------------ */
*404b540aSrobert.macro ARM_MOD_BODY dividend, divisor, order, spare
*404b540aSrobert
*404b540aSrobert#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
*404b540aSrobert
*404b540aSrobert	clz	\order, \divisor
*404b540aSrobert	clz	\spare, \dividend
*404b540aSrobert	sub	\order, \order, \spare
*404b540aSrobert	rsbs	\order, \order, #31
*404b540aSrobert	addne	pc, pc, \order, lsl #3
*404b540aSrobert	nop
*404b540aSrobert	.set	shift, 32
*404b540aSrobert	.rept	32
*404b540aSrobert	.set	shift, shift - 1
*404b540aSrobert	cmp	\dividend, \divisor, lsl #shift
*404b540aSrobert	subcs	\dividend, \dividend, \divisor, lsl #shift
*404b540aSrobert	.endr
*404b540aSrobert
*404b540aSrobert#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
*404b540aSrobert#if __ARM_ARCH__ >= 5
*404b540aSrobert
*404b540aSrobert	clz	\order, \divisor
*404b540aSrobert	clz	\spare, \dividend
*404b540aSrobert	sub	\order, \order, \spare
*404b540aSrobert	mov	\divisor, \divisor, lsl \order
*404b540aSrobert
*404b540aSrobert#else /* __ARM_ARCH__ < 5 */
*404b540aSrobert
*404b540aSrobert	mov	\order, #0
*404b540aSrobert
*404b540aSrobert	@ Unless the divisor is very big, shift it up in multiples of
*404b540aSrobert	@ four bits, since this is the amount of unwinding in the main
*404b540aSrobert	@ division loop.  Continue shifting until the divisor is
*404b540aSrobert	@ larger than the dividend.
*404b540aSrobert1:	cmp	\divisor, #0x10000000
*404b540aSrobert	cmplo	\divisor, \dividend
*404b540aSrobert	movlo	\divisor, \divisor, lsl #4
*404b540aSrobert	addlo	\order, \order, #4
*404b540aSrobert	blo	1b
*404b540aSrobert
*404b540aSrobert	@ For very big divisors, we must shift it a bit at a time, or
*404b540aSrobert	@ we will be in danger of overflowing.
*404b540aSrobert1:	cmp	\divisor, #0x80000000
*404b540aSrobert	cmplo	\divisor, \dividend
*404b540aSrobert	movlo	\divisor, \divisor, lsl #1
*404b540aSrobert	addlo	\order, \order, #1
*404b540aSrobert	blo	1b
*404b540aSrobert
*404b540aSrobert#endif /* __ARM_ARCH__ < 5 */
*404b540aSrobert
*404b540aSrobert	@ Perform all needed substractions to keep only the reminder.
*404b540aSrobert	@ Do comparisons in batch of 4 first.
*404b540aSrobert	subs	\order, \order, #3		@ yes, 3 is intended here
*404b540aSrobert	blt	2f
*404b540aSrobert
*404b540aSrobert1:	cmp	\dividend, \divisor
*404b540aSrobert	subhs	\dividend, \dividend, \divisor
*404b540aSrobert	cmp	\dividend, \divisor,  lsr #1
*404b540aSrobert	subhs	\dividend, \dividend, \divisor, lsr #1
*404b540aSrobert	cmp	\dividend, \divisor,  lsr #2
*404b540aSrobert	subhs	\dividend, \dividend, \divisor, lsr #2
*404b540aSrobert	cmp	\dividend, \divisor,  lsr #3
*404b540aSrobert	subhs	\dividend, \dividend, \divisor, lsr #3
*404b540aSrobert	cmp	\dividend, #1
*404b540aSrobert	mov	\divisor, \divisor, lsr #4
*404b540aSrobert	subges	\order, \order, #4
*404b540aSrobert	bge	1b
*404b540aSrobert
*404b540aSrobert	tst	\order, #3
*404b540aSrobert	teqne	\dividend, #0
*404b540aSrobert	beq	5f
*404b540aSrobert
*404b540aSrobert	@ Either 1, 2 or 3 comparison/substractions are left.
*404b540aSrobert2:	cmn	\order, #2
*404b540aSrobert	blt	4f
*404b540aSrobert	beq	3f
*404b540aSrobert	cmp	\dividend, \divisor
*404b540aSrobert	subhs	\dividend, \dividend, \divisor
*404b540aSrobert	mov	\divisor,  \divisor,  lsr #1
*404b540aSrobert3:	cmp	\dividend, \divisor
*404b540aSrobert	subhs	\dividend, \dividend, \divisor
*404b540aSrobert	mov	\divisor,  \divisor,  lsr #1
*404b540aSrobert4:	cmp	\dividend, \divisor
*404b540aSrobert	subhs	\dividend, \dividend, \divisor
*404b540aSrobert5:
*404b540aSrobert
*404b540aSrobert#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
*404b540aSrobert
*404b540aSrobert.endm
*404b540aSrobert/* ------------------------------------------------------------------------ */
*404b540aSrobert.macro THUMB_DIV_MOD_BODY modulo
*404b540aSrobert	@ Load the constant 0x10000000 into our work register.
*404b540aSrobert	mov	work, #1
*404b540aSrobert	lsl	work, #28
*404b540aSrobertLSYM(Loop1):
*404b540aSrobert	@ Unless the divisor is very big, shift it up in multiples of
*404b540aSrobert	@ four bits, since this is the amount of unwinding in the main
*404b540aSrobert	@ division loop.  Continue shifting until the divisor is
*404b540aSrobert	@ larger than the dividend.
*404b540aSrobert	cmp	divisor, work
*404b540aSrobert	bhs	LSYM(Lbignum)
*404b540aSrobert	cmp	divisor, dividend
*404b540aSrobert	bhs	LSYM(Lbignum)
*404b540aSrobert	lsl	divisor, #4
*404b540aSrobert	lsl	curbit,  #4
*404b540aSrobert	b	LSYM(Loop1)
*404b540aSrobertLSYM(Lbignum):
*404b540aSrobert	@ Set work to 0x80000000
*404b540aSrobert	lsl	work, #3
*404b540aSrobertLSYM(Loop2):
*404b540aSrobert	@ For very big divisors, we must shift it a bit at a time, or
*404b540aSrobert	@ we will be in danger of overflowing.
*404b540aSrobert	cmp	divisor, work
*404b540aSrobert	bhs	LSYM(Loop3)
*404b540aSrobert	cmp	divisor, dividend
*404b540aSrobert	bhs	LSYM(Loop3)
*404b540aSrobert	lsl	divisor, #1
*404b540aSrobert	lsl	curbit,  #1
*404b540aSrobert	b	LSYM(Loop2)
*404b540aSrobertLSYM(Loop3):
*404b540aSrobert	@ Test for possible subtractions ...
*404b540aSrobert  .if \modulo
*404b540aSrobert	@ ... On the final pass, this may subtract too much from the dividend,
*404b540aSrobert	@ so keep track of which subtractions are done, we can fix them up
*404b540aSrobert	@ afterwards.
*404b540aSrobert	mov	overdone, #0
*404b540aSrobert	cmp	dividend, divisor
*404b540aSrobert	blo	LSYM(Lover1)
*404b540aSrobert	sub	dividend, dividend, divisor
*404b540aSrobertLSYM(Lover1):
*404b540aSrobert	lsr	work, divisor, #1
*404b540aSrobert	cmp	dividend, work
*404b540aSrobert	blo	LSYM(Lover2)
*404b540aSrobert	sub	dividend, dividend, work
*404b540aSrobert	mov	ip, curbit
*404b540aSrobert	mov	work, #1
*404b540aSrobert	ror	curbit, work
*404b540aSrobert	orr	overdone, curbit
*404b540aSrobert	mov	curbit, ip
*404b540aSrobertLSYM(Lover2):
*404b540aSrobert	lsr	work, divisor, #2
*404b540aSrobert	cmp	dividend, work
*404b540aSrobert	blo	LSYM(Lover3)
*404b540aSrobert	sub	dividend, dividend, work
*404b540aSrobert	mov	ip, curbit
*404b540aSrobert	mov	work, #2
*404b540aSrobert	ror	curbit, work
*404b540aSrobert	orr	overdone, curbit
*404b540aSrobert	mov	curbit, ip
*404b540aSrobertLSYM(Lover3):
*404b540aSrobert	lsr	work, divisor, #3
*404b540aSrobert	cmp	dividend, work
*404b540aSrobert	blo	LSYM(Lover4)
*404b540aSrobert	sub	dividend, dividend, work
*404b540aSrobert	mov	ip, curbit
*404b540aSrobert	mov	work, #3
*404b540aSrobert	ror	curbit, work
*404b540aSrobert	orr	overdone, curbit
*404b540aSrobert	mov	curbit, ip
*404b540aSrobertLSYM(Lover4):
*404b540aSrobert	mov	ip, curbit
*404b540aSrobert  .else
*404b540aSrobert	@ ... and note which bits are done in the result.  On the final pass,
*404b540aSrobert	@ this may subtract too much from the dividend, but the result will be ok,
*404b540aSrobert	@ since the "bit" will have been shifted out at the bottom.
*404b540aSrobert	cmp	dividend, divisor
*404b540aSrobert	blo	LSYM(Lover1)
*404b540aSrobert	sub	dividend, dividend, divisor
*404b540aSrobert	orr	result, result, curbit
*404b540aSrobertLSYM(Lover1):
*404b540aSrobert	lsr	work, divisor, #1
*404b540aSrobert	cmp	dividend, work
*404b540aSrobert	blo	LSYM(Lover2)
*404b540aSrobert	sub	dividend, dividend, work
*404b540aSrobert	lsr	work, curbit, #1
*404b540aSrobert	orr	result, work
*404b540aSrobertLSYM(Lover2):
*404b540aSrobert	lsr	work, divisor, #2
*404b540aSrobert	cmp	dividend, work
*404b540aSrobert	blo	LSYM(Lover3)
*404b540aSrobert	sub	dividend, dividend, work
*404b540aSrobert	lsr	work, curbit, #2
*404b540aSrobert	orr	result, work
*404b540aSrobertLSYM(Lover3):
*404b540aSrobert	lsr	work, divisor, #3
*404b540aSrobert	cmp	dividend, work
*404b540aSrobert	blo	LSYM(Lover4)
*404b540aSrobert	sub	dividend, dividend, work
*404b540aSrobert	lsr	work, curbit, #3
*404b540aSrobert	orr	result, work
*404b540aSrobertLSYM(Lover4):
*404b540aSrobert  .endif
*404b540aSrobert
*404b540aSrobert	cmp	dividend, #0			@ Early termination?
*404b540aSrobert	beq	LSYM(Lover5)
*404b540aSrobert	lsr	curbit,  #4			@ No, any more bits to do?
*404b540aSrobert	beq	LSYM(Lover5)
*404b540aSrobert	lsr	divisor, #4
*404b540aSrobert	b	LSYM(Loop3)
*404b540aSrobertLSYM(Lover5):
*404b540aSrobert  .if \modulo
*404b540aSrobert	@ Any subtractions that we should not have done will be recorded in
*404b540aSrobert	@ the top three bits of "overdone".  Exactly which were not needed
*404b540aSrobert	@ are governed by the position of the bit, stored in ip.
*404b540aSrobert	mov	work, #0xe
*404b540aSrobert	lsl	work, #28
*404b540aSrobert	and	overdone, work
*404b540aSrobert	beq	LSYM(Lgot_result)
*404b540aSrobert
*404b540aSrobert	@ If we terminated early, because dividend became zero, then the
*404b540aSrobert	@ bit in ip will not be in the bottom nibble, and we should not
*404b540aSrobert	@ perform the additions below.  We must test for this though
*404b540aSrobert	@ (rather relying upon the TSTs to prevent the additions) since
*404b540aSrobert	@ the bit in ip could be in the top two bits which might then match
*404b540aSrobert	@ with one of the smaller RORs.
*404b540aSrobert	mov	curbit, ip
*404b540aSrobert	mov	work, #0x7
*404b540aSrobert	tst	curbit, work
*404b540aSrobert	beq	LSYM(Lgot_result)
*404b540aSrobert
*404b540aSrobert	mov	curbit, ip
*404b540aSrobert	mov	work, #3
*404b540aSrobert	ror	curbit, work
*404b540aSrobert	tst	overdone, curbit
*404b540aSrobert	beq	LSYM(Lover6)
*404b540aSrobert	lsr	work, divisor, #3
*404b540aSrobert	add	dividend, work
*404b540aSrobertLSYM(Lover6):
*404b540aSrobert	mov	curbit, ip
*404b540aSrobert	mov	work, #2
*404b540aSrobert	ror	curbit, work
*404b540aSrobert	tst	overdone, curbit
*404b540aSrobert	beq	LSYM(Lover7)
*404b540aSrobert	lsr	work, divisor, #2
*404b540aSrobert	add	dividend, work
*404b540aSrobertLSYM(Lover7):
*404b540aSrobert	mov	curbit, ip
*404b540aSrobert	mov	work, #1
*404b540aSrobert	ror	curbit, work
*404b540aSrobert	tst	overdone, curbit
*404b540aSrobert	beq	LSYM(Lgot_result)
*404b540aSrobert	lsr	work, divisor, #1
*404b540aSrobert	add	dividend, work
*404b540aSrobert  .endif
*404b540aSrobertLSYM(Lgot_result):
*404b540aSrobert.endm
*404b540aSrobert/* ------------------------------------------------------------------------ */
*404b540aSrobert/*		Start of the Real Functions				    */
*404b540aSrobert/* ------------------------------------------------------------------------ */
*404b540aSrobert#ifdef L_udivsi3
*404b540aSrobert
*404b540aSrobert	FUNC_START udivsi3
*404b540aSrobert	FUNC_ALIAS aeabi_uidiv udivsi3
*404b540aSrobert
*404b540aSrobert#ifdef __thumb__
*404b540aSrobert
*404b540aSrobert	cmp	divisor, #0
*404b540aSrobert	beq	LSYM(Ldiv0)
*404b540aSrobert	mov	curbit, #1
*404b540aSrobert	mov	result, #0
*404b540aSrobert
*404b540aSrobert	push	{ work }
*404b540aSrobert	cmp	dividend, divisor
*404b540aSrobert	blo	LSYM(Lgot_result)
*404b540aSrobert
*404b540aSrobert	THUMB_DIV_MOD_BODY 0
*404b540aSrobert
*404b540aSrobert	mov	r0, result
*404b540aSrobert	pop	{ work }
*404b540aSrobert	RET
*404b540aSrobert
*404b540aSrobert#else /* ARM version.  */
*404b540aSrobert
*404b540aSrobert	subs	r2, r1, #1
*404b540aSrobert	RETc(eq)
*404b540aSrobert	bcc	LSYM(Ldiv0)
*404b540aSrobert	cmp	r0, r1
*404b540aSrobert	bls	11f
*404b540aSrobert	tst	r1, r2
*404b540aSrobert	beq	12f
*404b540aSrobert
*404b540aSrobert	ARM_DIV_BODY r0, r1, r2, r3
*404b540aSrobert
*404b540aSrobert	mov	r0, r2
*404b540aSrobert	RET
*404b540aSrobert
*404b540aSrobert11:	moveq	r0, #1
*404b540aSrobert	movne	r0, #0
*404b540aSrobert	RET
*404b540aSrobert
*404b540aSrobert12:	ARM_DIV2_ORDER r1, r2
*404b540aSrobert
*404b540aSrobert	mov	r0, r0, lsr r2
*404b540aSrobert	RET
*404b540aSrobert
*404b540aSrobert#endif /* ARM version */
*404b540aSrobert
*404b540aSrobert	DIV_FUNC_END udivsi3
*404b540aSrobert
*404b540aSrobertFUNC_START aeabi_uidivmod
*404b540aSrobert#ifdef __thumb__
*404b540aSrobert	push	{r0, r1, lr}
*404b540aSrobert	bl	SYM(__udivsi3)
*404b540aSrobert	POP	{r1, r2, r3}
*404b540aSrobert	mul	r2, r0
*404b540aSrobert	sub	r1, r1, r2
*404b540aSrobert	bx	r3
*404b540aSrobert#else
*404b540aSrobert	stmfd	sp!, { r0, r1, lr }
*404b540aSrobert	bl	SYM(__udivsi3)
*404b540aSrobert	ldmfd	sp!, { r1, r2, lr }
*404b540aSrobert	mul	r3, r2, r0
*404b540aSrobert	sub	r1, r1, r3
*404b540aSrobert	RET
*404b540aSrobert#endif
*404b540aSrobert	FUNC_END aeabi_uidivmod
*404b540aSrobert
*404b540aSrobert#endif /* L_udivsi3 */
*404b540aSrobert/* ------------------------------------------------------------------------ */
*404b540aSrobert#ifdef L_umodsi3
*404b540aSrobert
*404b540aSrobert	FUNC_START umodsi3
*404b540aSrobert
*404b540aSrobert#ifdef __thumb__
*404b540aSrobert
*404b540aSrobert	cmp	divisor, #0
*404b540aSrobert	beq	LSYM(Ldiv0)
*404b540aSrobert	mov	curbit, #1
*404b540aSrobert	cmp	dividend, divisor
*404b540aSrobert	bhs	LSYM(Lover10)
*404b540aSrobert	RET
*404b540aSrobert
*404b540aSrobertLSYM(Lover10):
*404b540aSrobert	push	{ work }
*404b540aSrobert
*404b540aSrobert	THUMB_DIV_MOD_BODY 1
*404b540aSrobert
*404b540aSrobert	pop	{ work }
*404b540aSrobert	RET
*404b540aSrobert
*404b540aSrobert#else  /* ARM version.  */
*404b540aSrobert
*404b540aSrobert	subs	r2, r1, #1			@ compare divisor with 1
*404b540aSrobert	bcc	LSYM(Ldiv0)
*404b540aSrobert	cmpne	r0, r1				@ compare dividend with divisor
*404b540aSrobert	moveq   r0, #0
*404b540aSrobert	tsthi	r1, r2				@ see if divisor is power of 2
*404b540aSrobert	andeq	r0, r0, r2
*404b540aSrobert	RETc(ls)
*404b540aSrobert
*404b540aSrobert	ARM_MOD_BODY r0, r1, r2, r3
*404b540aSrobert
*404b540aSrobert	RET
*404b540aSrobert
*404b540aSrobert#endif /* ARM version.  */
*404b540aSrobert
*404b540aSrobert	DIV_FUNC_END umodsi3
*404b540aSrobert
*404b540aSrobert#endif /* L_umodsi3 */
*404b540aSrobert/* ------------------------------------------------------------------------ */
*404b540aSrobert#ifdef L_divsi3
*404b540aSrobert
*404b540aSrobert	FUNC_START divsi3
*404b540aSrobert	FUNC_ALIAS aeabi_idiv divsi3
*404b540aSrobert
*404b540aSrobert#ifdef __thumb__
*404b540aSrobert	cmp	divisor, #0
*404b540aSrobert	beq	LSYM(Ldiv0)
*404b540aSrobert
*404b540aSrobert	push	{ work }
*404b540aSrobert	mov	work, dividend
*404b540aSrobert	eor	work, divisor		@ Save the sign of the result.
*404b540aSrobert	mov	ip, work
*404b540aSrobert	mov	curbit, #1
*404b540aSrobert	mov	result, #0
*404b540aSrobert	cmp	divisor, #0
*404b540aSrobert	bpl	LSYM(Lover10)
*404b540aSrobert	neg	divisor, divisor	@ Loops below use unsigned.
*404b540aSrobertLSYM(Lover10):
*404b540aSrobert	cmp	dividend, #0
*404b540aSrobert	bpl	LSYM(Lover11)
*404b540aSrobert	neg	dividend, dividend
*404b540aSrobertLSYM(Lover11):
*404b540aSrobert	cmp	dividend, divisor
*404b540aSrobert	blo	LSYM(Lgot_result)
*404b540aSrobert
*404b540aSrobert	THUMB_DIV_MOD_BODY 0
*404b540aSrobert
*404b540aSrobert	mov	r0, result
*404b540aSrobert	mov	work, ip
*404b540aSrobert	cmp	work, #0
*404b540aSrobert	bpl	LSYM(Lover12)
*404b540aSrobert	neg	r0, r0
*404b540aSrobertLSYM(Lover12):
*404b540aSrobert	pop	{ work }
*404b540aSrobert	RET
*404b540aSrobert
*404b540aSrobert#else /* ARM version.  */
*404b540aSrobert
*404b540aSrobert	cmp	r1, #0
*404b540aSrobert	eor	ip, r0, r1			@ save the sign of the result.
*404b540aSrobert	beq	LSYM(Ldiv0)
*404b540aSrobert	rsbmi	r1, r1, #0			@ loops below use unsigned.
*404b540aSrobert	subs	r2, r1, #1			@ division by 1 or -1 ?
*404b540aSrobert	beq	10f
*404b540aSrobert	movs	r3, r0
*404b540aSrobert	rsbmi	r3, r0, #0			@ positive dividend value
*404b540aSrobert	cmp	r3, r1
*404b540aSrobert	bls	11f
*404b540aSrobert	tst	r1, r2				@ divisor is power of 2 ?
*404b540aSrobert	beq	12f
*404b540aSrobert
*404b540aSrobert	ARM_DIV_BODY r3, r1, r0, r2
*404b540aSrobert
*404b540aSrobert	cmp	ip, #0
*404b540aSrobert	rsbmi	r0, r0, #0
*404b540aSrobert	RET
*404b540aSrobert
*404b540aSrobert10:	teq	ip, r0				@ same sign ?
*404b540aSrobert	rsbmi	r0, r0, #0
*404b540aSrobert	RET
*404b540aSrobert
*404b540aSrobert11:	movlo	r0, #0
*404b540aSrobert	moveq	r0, ip, asr #31
*404b540aSrobert	orreq	r0, r0, #1
*404b540aSrobert	RET
*404b540aSrobert
*404b540aSrobert12:	ARM_DIV2_ORDER r1, r2
*404b540aSrobert
*404b540aSrobert	cmp	ip, #0
*404b540aSrobert	mov	r0, r3, lsr r2
*404b540aSrobert	rsbmi	r0, r0, #0
*404b540aSrobert	RET
*404b540aSrobert
*404b540aSrobert#endif /* ARM version */
*404b540aSrobert
*404b540aSrobert	DIV_FUNC_END divsi3
*404b540aSrobert
*404b540aSrobertFUNC_START aeabi_idivmod
*404b540aSrobert#ifdef __thumb__
*404b540aSrobert	push	{r0, r1, lr}
*404b540aSrobert	bl	SYM(__divsi3)
*404b540aSrobert	POP	{r1, r2, r3}
*404b540aSrobert	mul	r2, r0
*404b540aSrobert	sub	r1, r1, r2
*404b540aSrobert	bx	r3
*404b540aSrobert#else
*404b540aSrobert	stmfd	sp!, { r0, r1, lr }
*404b540aSrobert	bl	SYM(__divsi3)
*404b540aSrobert	ldmfd	sp!, { r1, r2, lr }
*404b540aSrobert	mul	r3, r2, r0
*404b540aSrobert	sub	r1, r1, r3
*404b540aSrobert	RET
*404b540aSrobert#endif
*404b540aSrobert	FUNC_END aeabi_idivmod
*404b540aSrobert
*404b540aSrobert#endif /* L_divsi3 */
*404b540aSrobert/* ------------------------------------------------------------------------ */
*404b540aSrobert#ifdef L_modsi3
*404b540aSrobert
*404b540aSrobert	FUNC_START modsi3
*404b540aSrobert
*404b540aSrobert#ifdef __thumb__
*404b540aSrobert
*404b540aSrobert	mov	curbit, #1
*404b540aSrobert	cmp	divisor, #0
*404b540aSrobert	beq	LSYM(Ldiv0)
*404b540aSrobert	bpl	LSYM(Lover10)
*404b540aSrobert	neg	divisor, divisor		@ Loops below use unsigned.
*404b540aSrobertLSYM(Lover10):
*404b540aSrobert	push	{ work }
*404b540aSrobert	@ Need to save the sign of the dividend, unfortunately, we need
*404b540aSrobert	@ work later on.  Must do this after saving the original value of
*404b540aSrobert	@ the work register, because we will pop this value off first.
*404b540aSrobert	push	{ dividend }
*404b540aSrobert	cmp	dividend, #0
*404b540aSrobert	bpl	LSYM(Lover11)
*404b540aSrobert	neg	dividend, dividend
*404b540aSrobertLSYM(Lover11):
*404b540aSrobert	cmp	dividend, divisor
*404b540aSrobert	blo	LSYM(Lgot_result)
*404b540aSrobert
*404b540aSrobert	THUMB_DIV_MOD_BODY 1
*404b540aSrobert
*404b540aSrobert	pop	{ work }
*404b540aSrobert	cmp	work, #0
*404b540aSrobert	bpl	LSYM(Lover12)
*404b540aSrobert	neg	dividend, dividend
*404b540aSrobertLSYM(Lover12):
*404b540aSrobert	pop	{ work }
*404b540aSrobert	RET
*404b540aSrobert
*404b540aSrobert#else /* ARM version.  */
*404b540aSrobert
*404b540aSrobert	cmp	r1, #0
*404b540aSrobert	beq	LSYM(Ldiv0)
*404b540aSrobert	rsbmi	r1, r1, #0			@ loops below use unsigned.
*404b540aSrobert	movs	ip, r0				@ preserve sign of dividend
*404b540aSrobert	rsbmi	r0, r0, #0			@ if negative make positive
*404b540aSrobert	subs	r2, r1, #1			@ compare divisor with 1
*404b540aSrobert	cmpne	r0, r1				@ compare dividend with divisor
*404b540aSrobert	moveq	r0, #0
*404b540aSrobert	tsthi	r1, r2				@ see if divisor is power of 2
*404b540aSrobert	andeq	r0, r0, r2
*404b540aSrobert	bls	10f
*404b540aSrobert
*404b540aSrobert	ARM_MOD_BODY r0, r1, r2, r3
*404b540aSrobert
*404b540aSrobert10:	cmp	ip, #0
*404b540aSrobert	rsbmi	r0, r0, #0
*404b540aSrobert	RET
*404b540aSrobert
*404b540aSrobert#endif /* ARM version */
*404b540aSrobert
*404b540aSrobert	DIV_FUNC_END modsi3
*404b540aSrobert
*404b540aSrobert#endif /* L_modsi3 */
*404b540aSrobert/* ------------------------------------------------------------------------ */
*404b540aSrobert#ifdef L_dvmd_tls
*404b540aSrobert
*404b540aSrobert	FUNC_START div0
*404b540aSrobert	FUNC_ALIAS aeabi_idiv0 div0
*404b540aSrobert	FUNC_ALIAS aeabi_ldiv0 div0
*404b540aSrobert
*404b540aSrobert	RET
*404b540aSrobert
*404b540aSrobert	FUNC_END aeabi_ldiv0
*404b540aSrobert	FUNC_END aeabi_idiv0
*404b540aSrobert	FUNC_END div0
*404b540aSrobert
*404b540aSrobert#endif /* L_divmodsi_tools */
*404b540aSrobert/* ------------------------------------------------------------------------ */
*404b540aSrobert#ifdef L_dvmd_lnx
*404b540aSrobert@ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
*404b540aSrobert
*404b540aSrobert/* Constant taken from <asm/signal.h>.  */
*404b540aSrobert#define SIGFPE	8
*404b540aSrobert
*404b540aSrobert	.code	32
*404b540aSrobert	FUNC_START div0
*404b540aSrobert
*404b540aSrobert	stmfd	sp!, {r1, lr}
*404b540aSrobert	mov	r0, #SIGFPE
*404b540aSrobert	bl	SYM(raise) __PLT__
*404b540aSrobert	RETLDM	r1
*404b540aSrobert
*404b540aSrobert	FUNC_END div0
*404b540aSrobert
*404b540aSrobert#endif /* L_dvmd_lnx */
*404b540aSrobert/* ------------------------------------------------------------------------ */
*404b540aSrobert/* Dword shift operations.  */
*404b540aSrobert/* All the following Dword shift variants rely on the fact that
*404b540aSrobert	shft xxx, Reg
*404b540aSrobert   is in fact done as
*404b540aSrobert	shft xxx, (Reg & 255)
*404b540aSrobert   so for Reg value in (32...63) and (-1...-31) we will get zero (in the
*404b540aSrobert   case of logical shifts) or the sign (for asr).  */
*404b540aSrobert
*404b540aSrobert#ifdef __ARMEB__
*404b540aSrobert#define al	r1
*404b540aSrobert#define ah	r0
*404b540aSrobert#else
*404b540aSrobert#define al	r0
*404b540aSrobert#define ah	r1
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert/* Prevent __aeabi double-word shifts from being produced on SymbianOS.  */
*404b540aSrobert#ifndef __symbian__
*404b540aSrobert
*404b540aSrobert#ifdef L_lshrdi3
*404b540aSrobert
*404b540aSrobert	FUNC_START lshrdi3
*404b540aSrobert	FUNC_ALIAS aeabi_llsr lshrdi3
*404b540aSrobert
*404b540aSrobert#ifdef __thumb__
*404b540aSrobert	lsr	al, r2
*404b540aSrobert	mov	r3, ah
*404b540aSrobert	lsr	ah, r2
*404b540aSrobert	mov	ip, r3
*404b540aSrobert	sub	r2, #32
*404b540aSrobert	lsr	r3, r2
*404b540aSrobert	orr	al, r3
*404b540aSrobert	neg	r2, r2
*404b540aSrobert	mov	r3, ip
*404b540aSrobert	lsl	r3, r2
*404b540aSrobert	orr	al, r3
*404b540aSrobert	RET
*404b540aSrobert#else
*404b540aSrobert	subs	r3, r2, #32
*404b540aSrobert	rsb	ip, r2, #32
*404b540aSrobert	movmi	al, al, lsr r2
*404b540aSrobert	movpl	al, ah, lsr r3
*404b540aSrobert	orrmi	al, al, ah, lsl ip
*404b540aSrobert	mov	ah, ah, lsr r2
*404b540aSrobert	RET
*404b540aSrobert#endif
*404b540aSrobert	FUNC_END aeabi_llsr
*404b540aSrobert	FUNC_END lshrdi3
*404b540aSrobert
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert#ifdef L_ashrdi3
*404b540aSrobert
*404b540aSrobert	FUNC_START ashrdi3
*404b540aSrobert	FUNC_ALIAS aeabi_lasr ashrdi3
*404b540aSrobert
*404b540aSrobert#ifdef __thumb__
*404b540aSrobert	lsr	al, r2
*404b540aSrobert	mov	r3, ah
*404b540aSrobert	asr	ah, r2
*404b540aSrobert	sub	r2, #32
*404b540aSrobert	@ If r2 is negative at this point the following step would OR
*404b540aSrobert	@ the sign bit into all of AL.  That's not what we want...
*404b540aSrobert	bmi	1f
*404b540aSrobert	mov	ip, r3
*404b540aSrobert	asr	r3, r2
*404b540aSrobert	orr	al, r3
*404b540aSrobert	mov	r3, ip
*404b540aSrobert1:
*404b540aSrobert	neg	r2, r2
*404b540aSrobert	lsl	r3, r2
*404b540aSrobert	orr	al, r3
*404b540aSrobert	RET
*404b540aSrobert#else
*404b540aSrobert	subs	r3, r2, #32
*404b540aSrobert	rsb	ip, r2, #32
*404b540aSrobert	movmi	al, al, lsr r2
*404b540aSrobert	movpl	al, ah, asr r3
*404b540aSrobert	orrmi	al, al, ah, lsl ip
*404b540aSrobert	mov	ah, ah, asr r2
*404b540aSrobert	RET
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert	FUNC_END aeabi_lasr
*404b540aSrobert	FUNC_END ashrdi3
*404b540aSrobert
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert#ifdef L_ashldi3
*404b540aSrobert
*404b540aSrobert	FUNC_START ashldi3
*404b540aSrobert	FUNC_ALIAS aeabi_llsl ashldi3
*404b540aSrobert
*404b540aSrobert#ifdef __thumb__
*404b540aSrobert	lsl	ah, r2
*404b540aSrobert	mov	r3, al
*404b540aSrobert	lsl	al, r2
*404b540aSrobert	mov	ip, r3
*404b540aSrobert	sub	r2, #32
*404b540aSrobert	lsl	r3, r2
*404b540aSrobert	orr	ah, r3
*404b540aSrobert	neg	r2, r2
*404b540aSrobert	mov	r3, ip
*404b540aSrobert	lsr	r3, r2
*404b540aSrobert	orr	ah, r3
*404b540aSrobert	RET
*404b540aSrobert#else
*404b540aSrobert	subs	r3, r2, #32
*404b540aSrobert	rsb	ip, r2, #32
*404b540aSrobert	movmi	ah, ah, lsl r2
*404b540aSrobert	movpl	ah, al, lsl r3
*404b540aSrobert	orrmi	ah, ah, al, lsr ip
*404b540aSrobert	mov	al, al, lsl r2
*404b540aSrobert	RET
*404b540aSrobert#endif
*404b540aSrobert	FUNC_END aeabi_llsl
*404b540aSrobert	FUNC_END ashldi3
*404b540aSrobert
*404b540aSrobert#endif
*404b540aSrobert
*404b540aSrobert#endif /* __symbian__ */
*404b540aSrobert
*404b540aSrobert/* ------------------------------------------------------------------------ */
*404b540aSrobert/* These next two sections are here despite the fact that they contain Thumb
*404b540aSrobert   assembler because their presence allows interworked code to be linked even
*404b540aSrobert   when the GCC library is this one.  */
*404b540aSrobert
*404b540aSrobert/* Do not build the interworking functions when the target architecture does
*404b540aSrobert   not support Thumb instructions.  (This can be a multilib option).  */
*404b540aSrobert#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
*404b540aSrobert      || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
*404b540aSrobert      || __ARM_ARCH__ >= 6
*404b540aSrobert
*404b540aSrobert#if defined L_call_via_rX
*404b540aSrobert
*404b540aSrobert/* These labels & instructions are used by the Arm/Thumb interworking code.
*404b540aSrobert   The address of function to be called is loaded into a register and then
*404b540aSrobert   one of these labels is called via a BL instruction.  This puts the
*404b540aSrobert   return address into the link register with the bottom bit set, and the
*404b540aSrobert   code here switches to the correct mode before executing the function.  */
*404b540aSrobert
*404b540aSrobert	.text
*404b540aSrobert	.align 0
*404b540aSrobert        .force_thumb
*404b540aSrobert
*404b540aSrobert.macro call_via register
*404b540aSrobert	THUMB_FUNC_START _call_via_\register
*404b540aSrobert
*404b540aSrobert	bx	\register
*404b540aSrobert	nop
*404b540aSrobert
*404b540aSrobert	SIZE	(_call_via_\register)
*404b540aSrobert.endm
*404b540aSrobert
*404b540aSrobert	call_via r0
*404b540aSrobert	call_via r1
*404b540aSrobert	call_via r2
*404b540aSrobert	call_via r3
*404b540aSrobert	call_via r4
*404b540aSrobert	call_via r5
*404b540aSrobert	call_via r6
*404b540aSrobert	call_via r7
*404b540aSrobert	call_via r8
*404b540aSrobert	call_via r9
*404b540aSrobert	call_via sl
*404b540aSrobert	call_via fp
*404b540aSrobert	call_via ip
*404b540aSrobert	call_via sp
*404b540aSrobert	call_via lr
*404b540aSrobert
*404b540aSrobert#endif /* L_call_via_rX */
*404b540aSrobert
*404b540aSrobert#if defined L_interwork_call_via_rX
*404b540aSrobert
*404b540aSrobert/* These labels & instructions are used by the Arm/Thumb interworking code,
*404b540aSrobert   when the target address is in an unknown instruction set.  The address
*404b540aSrobert   of function to be called is loaded into a register and then one of these
*404b540aSrobert   labels is called via a BL instruction.  This puts the return address
*404b540aSrobert   into the link register with the bottom bit set, and the code here
*404b540aSrobert   switches to the correct mode before executing the function.  Unfortunately
*404b540aSrobert   the target code cannot be relied upon to return via a BX instruction, so
*404b540aSrobert   instead we have to store the resturn address on the stack and allow the
*404b540aSrobert   called function to return here instead.  Upon return we recover the real
*404b540aSrobert   return address and use a BX to get back to Thumb mode.
*404b540aSrobert
*404b540aSrobert   There are three variations of this code.  The first,
*404b540aSrobert   _interwork_call_via_rN(), will push the return address onto the
*404b540aSrobert   stack and pop it in _arm_return().  It should only be used if all
*404b540aSrobert   arguments are passed in registers.
*404b540aSrobert
*404b540aSrobert   The second, _interwork_r7_call_via_rN(), instead stores the return
*404b540aSrobert   address at [r7, #-4].  It is the caller's responsibility to ensure
*404b540aSrobert   that this address is valid and contains no useful data.
*404b540aSrobert
*404b540aSrobert   The third, _interwork_r11_call_via_rN(), works in the same way but
*404b540aSrobert   uses r11 instead of r7.  It is useful if the caller does not really
*404b540aSrobert   need a frame pointer.  */
*404b540aSrobert
*404b540aSrobert	.text
*404b540aSrobert	.align 0
*404b540aSrobert
*404b540aSrobert	.code   32
*404b540aSrobert	.globl _arm_return
*404b540aSrobertLSYM(Lstart_arm_return):
*404b540aSrobert	cfi_start	LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
*404b540aSrobert	cfi_push	0, 0xe, -0x8, 0x8
*404b540aSrobert	nop	@ This nop is for the benefit of debuggers, so that
*404b540aSrobert		@ backtraces will use the correct unwind information.
*404b540aSrobert_arm_return:
*404b540aSrobert	RETLDM	unwind=LSYM(Lstart_arm_return)
*404b540aSrobert	cfi_end	LSYM(Lend_arm_return)
*404b540aSrobert
*404b540aSrobert	.globl _arm_return_r7
*404b540aSrobert_arm_return_r7:
*404b540aSrobert	ldr	lr, [r7, #-4]
*404b540aSrobert	bx	lr
*404b540aSrobert
*404b540aSrobert	.globl _arm_return_r11
*404b540aSrobert_arm_return_r11:
*404b540aSrobert	ldr	lr, [r11, #-4]
*404b540aSrobert	bx	lr
*404b540aSrobert
*404b540aSrobert.macro interwork_with_frame frame, register, name, return
*404b540aSrobert	.code	16
*404b540aSrobert
*404b540aSrobert	THUMB_FUNC_START \name
*404b540aSrobert
*404b540aSrobert	bx	pc
*404b540aSrobert	nop
*404b540aSrobert
*404b540aSrobert	.code	32
*404b540aSrobert	tst	\register, #1
*404b540aSrobert	streq	lr, [\frame, #-4]
*404b540aSrobert	adreq	lr, _arm_return_\frame
*404b540aSrobert	bx	\register
*404b540aSrobert
*404b540aSrobert	SIZE	(\name)
*404b540aSrobert.endm
*404b540aSrobert
*404b540aSrobert.macro interwork register
*404b540aSrobert	.code	16
*404b540aSrobert
*404b540aSrobert	THUMB_FUNC_START _interwork_call_via_\register
*404b540aSrobert
*404b540aSrobert	bx	pc
*404b540aSrobert	nop
*404b540aSrobert
*404b540aSrobert	.code	32
*404b540aSrobert	.globl LSYM(Lchange_\register)
*404b540aSrobertLSYM(Lchange_\register):
*404b540aSrobert	tst	\register, #1
*404b540aSrobert	streq	lr, [sp, #-8]!
*404b540aSrobert	adreq	lr, _arm_return
*404b540aSrobert	bx	\register
*404b540aSrobert
*404b540aSrobert	SIZE	(_interwork_call_via_\register)
*404b540aSrobert
*404b540aSrobert	interwork_with_frame r7,\register,_interwork_r7_call_via_\register
*404b540aSrobert	interwork_with_frame r11,\register,_interwork_r11_call_via_\register
*404b540aSrobert.endm
*404b540aSrobert
*404b540aSrobert	interwork r0
*404b540aSrobert	interwork r1
*404b540aSrobert	interwork r2
*404b540aSrobert	interwork r3
*404b540aSrobert	interwork r4
*404b540aSrobert	interwork r5
*404b540aSrobert	interwork r6
*404b540aSrobert	interwork r7
*404b540aSrobert	interwork r8
*404b540aSrobert	interwork r9
*404b540aSrobert	interwork sl
*404b540aSrobert	interwork fp
*404b540aSrobert	interwork ip
*404b540aSrobert	interwork sp
*404b540aSrobert
*404b540aSrobert	/* The LR case has to be handled a little differently...  */
*404b540aSrobert	.code 16
*404b540aSrobert
*404b540aSrobert	THUMB_FUNC_START _interwork_call_via_lr
*404b540aSrobert
*404b540aSrobert	bx 	pc
*404b540aSrobert	nop
*404b540aSrobert
*404b540aSrobert	.code 32
*404b540aSrobert	.globl .Lchange_lr
*404b540aSrobert.Lchange_lr:
*404b540aSrobert	tst	lr, #1
*404b540aSrobert	stmeqdb	r13!, {lr, pc}
*404b540aSrobert	mov	ip, lr
*404b540aSrobert	adreq	lr, _arm_return
*404b540aSrobert	bx	ip
*404b540aSrobert
*404b540aSrobert	SIZE	(_interwork_call_via_lr)
*404b540aSrobert
*404b540aSrobert#endif /* L_interwork_call_via_rX */
*404b540aSrobert#endif /* Arch supports thumb.  */
*404b540aSrobert
*404b540aSrobert#ifndef __symbian__
*404b540aSrobert#include "ieee754-df.S"
*404b540aSrobert#include "ieee754-sf.S"
*404b540aSrobert#include "bpabi.S"
*404b540aSrobert#endif /* __symbian__ */