xref: /openbsd/gnu/gcc/gcc/config/arm/lib1funcs.asm (revision 404b540a)
1*404b540aSrobert@ libgcc routines for ARM cpu.
2*404b540aSrobert@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
3*404b540aSrobert
4*404b540aSrobert/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
5*404b540aSrobert   Free Software Foundation, Inc.
6*404b540aSrobert
7*404b540aSrobertThis file is free software; you can redistribute it and/or modify it
8*404b540aSrobertunder the terms of the GNU General Public License as published by the
9*404b540aSrobertFree Software Foundation; either version 2, or (at your option) any
10*404b540aSrobertlater version.
11*404b540aSrobert
12*404b540aSrobertIn addition to the permissions in the GNU General Public License, the
13*404b540aSrobertFree Software Foundation gives you unlimited permission to link the
14*404b540aSrobertcompiled version of this file into combinations with other programs,
15*404b540aSrobertand to distribute those combinations without any restriction coming
16*404b540aSrobertfrom the use of this file.  (The General Public License restrictions
17*404b540aSrobertdo apply in other respects; for example, they cover modification of
18*404b540aSrobertthe file, and distribution when not linked into a combine
19*404b540aSrobertexecutable.)
20*404b540aSrobert
21*404b540aSrobertThis file is distributed in the hope that it will be useful, but
22*404b540aSrobertWITHOUT ANY WARRANTY; without even the implied warranty of
23*404b540aSrobertMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24*404b540aSrobertGeneral Public License for more details.
25*404b540aSrobert
26*404b540aSrobertYou should have received a copy of the GNU General Public License
27*404b540aSrobertalong with this program; see the file COPYING.  If not, write to
28*404b540aSrobertthe Free Software Foundation, 51 Franklin Street, Fifth Floor,
29*404b540aSrobertBoston, MA 02110-1301, USA.  */
30*404b540aSrobert/* ------------------------------------------------------------------------ */
31*404b540aSrobert
32*404b540aSrobert/* We need to know what prefix to add to function names.  */
33*404b540aSrobert
34*404b540aSrobert#ifndef __USER_LABEL_PREFIX__
35*404b540aSrobert#error  __USER_LABEL_PREFIX__ not defined
36*404b540aSrobert#endif
37*404b540aSrobert
38*404b540aSrobert/* ANSI concatenation macros.  */
39*404b540aSrobert
40*404b540aSrobert#define CONCAT1(a, b) CONCAT2(a, b)
41*404b540aSrobert#define CONCAT2(a, b) a ## b
42*404b540aSrobert
43*404b540aSrobert/* Use the right prefix for global labels.  */
44*404b540aSrobert
45*404b540aSrobert#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
46*404b540aSrobert
47*404b540aSrobert#ifdef __ELF__
48*404b540aSrobert#ifdef __thumb__
49*404b540aSrobert#define __PLT__  /* Not supported in Thumb assembler (for now).  */
50*404b540aSrobert#else
51*404b540aSrobert#define __PLT__ (PLT)
52*404b540aSrobert#endif
53*404b540aSrobert#define TYPE(x) .type SYM(x),function
54*404b540aSrobert#define SIZE(x) .size SYM(x), . - SYM(x)
55*404b540aSrobert#define LSYM(x) .x
56*404b540aSrobert#else
57*404b540aSrobert#define __PLT__
58*404b540aSrobert#define TYPE(x)
59*404b540aSrobert#define SIZE(x)
60*404b540aSrobert#define LSYM(x) x
61*404b540aSrobert#endif
62*404b540aSrobert
63*404b540aSrobert/* Function end macros.  Variants for interworking.  */
64*404b540aSrobert
65*404b540aSrobert@ This selects the minimum architecture level required.
66*404b540aSrobert#define __ARM_ARCH__ 3
67*404b540aSrobert
68*404b540aSrobert#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
69*404b540aSrobert	|| defined(__ARM_ARCH_4T__)
70*404b540aSrobert/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
71*404b540aSrobert   long multiply instructions.  That includes v3M.  */
72*404b540aSrobert# undef __ARM_ARCH__
73*404b540aSrobert# define __ARM_ARCH__ 4
74*404b540aSrobert#endif
75*404b540aSrobert
76*404b540aSrobert#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
77*404b540aSrobert	|| defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
78*404b540aSrobert	|| defined(__ARM_ARCH_5TEJ__)
79*404b540aSrobert# undef __ARM_ARCH__
80*404b540aSrobert# define __ARM_ARCH__ 5
81*404b540aSrobert#endif
82*404b540aSrobert
83*404b540aSrobert#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
84*404b540aSrobert	|| defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
85*404b540aSrobert	|| defined(__ARM_ARCH_6ZK__)
86*404b540aSrobert# undef __ARM_ARCH__
87*404b540aSrobert# define __ARM_ARCH__ 6
88*404b540aSrobert#endif
89*404b540aSrobert
90*404b540aSrobert#ifndef __ARM_ARCH__
91*404b540aSrobert#error Unable to determine architecture.
92*404b540aSrobert#endif
93*404b540aSrobert
94*404b540aSrobert/* How to return from a function call depends on the architecture variant.  */
95*404b540aSrobert
96*404b540aSrobert#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
97*404b540aSrobert
98*404b540aSrobert# define RET		bx	lr
99*404b540aSrobert# define RETc(x)	bx##x	lr
100*404b540aSrobert
101*404b540aSrobert/* Special precautions for interworking on armv4t.  */
102*404b540aSrobert# if (__ARM_ARCH__ == 4)
103*404b540aSrobert
104*404b540aSrobert/* Always use bx, not ldr pc.  */
105*404b540aSrobert#  if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
106*404b540aSrobert#    define __INTERWORKING__
107*404b540aSrobert#   endif /* __THUMB__ || __THUMB_INTERWORK__ */
108*404b540aSrobert
109*404b540aSrobert/* Include thumb stub before arm mode code.  */
110*404b540aSrobert#  if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
111*404b540aSrobert#   define __INTERWORKING_STUBS__
112*404b540aSrobert#  endif /* __thumb__ && !__THUMB_INTERWORK__ */
113*404b540aSrobert
114*404b540aSrobert#endif /* __ARM_ARCH == 4 */
115*404b540aSrobert
116*404b540aSrobert#else
117*404b540aSrobert
118*404b540aSrobert# define RET		mov	pc, lr
119*404b540aSrobert# define RETc(x)	mov##x	pc, lr
120*404b540aSrobert
121*404b540aSrobert#endif
122*404b540aSrobert
123*404b540aSrobert.macro	cfi_pop		advance, reg, cfa_offset
124*404b540aSrobert#ifdef __ELF__
125*404b540aSrobert	.pushsection	.debug_frame
126*404b540aSrobert	.byte	0x4		/* DW_CFA_advance_loc4 */
127*404b540aSrobert	.4byte	\advance
128*404b540aSrobert	.byte	(0xc0 | \reg)	/* DW_CFA_restore */
129*404b540aSrobert	.byte	0xe		/* DW_CFA_def_cfa_offset */
130*404b540aSrobert	.uleb128 \cfa_offset
131*404b540aSrobert	.popsection
132*404b540aSrobert#endif
133*404b540aSrobert.endm
134*404b540aSrobert.macro	cfi_push	advance, reg, offset, cfa_offset
135*404b540aSrobert#ifdef __ELF__
136*404b540aSrobert	.pushsection	.debug_frame
137*404b540aSrobert	.byte	0x4		/* DW_CFA_advance_loc4 */
138*404b540aSrobert	.4byte	\advance
139*404b540aSrobert	.byte	(0x80 | \reg)	/* DW_CFA_offset */
140*404b540aSrobert	.uleb128 (\offset / -4)
141*404b540aSrobert	.byte	0xe		/* DW_CFA_def_cfa_offset */
142*404b540aSrobert	.uleb128 \cfa_offset
143*404b540aSrobert	.popsection
144*404b540aSrobert#endif
145*404b540aSrobert.endm
146*404b540aSrobert.macro cfi_start	start_label, end_label
147*404b540aSrobert#ifdef __ELF__
148*404b540aSrobert	.pushsection	.debug_frame
149*404b540aSrobertLSYM(Lstart_frame):
150*404b540aSrobert	.4byte	LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
151*404b540aSrobertLSYM(Lstart_cie):
152*404b540aSrobert        .4byte	0xffffffff	@ CIE Identifier Tag
153*404b540aSrobert        .byte	0x1	@ CIE Version
154*404b540aSrobert        .ascii	"\0"	@ CIE Augmentation
155*404b540aSrobert        .uleb128 0x1	@ CIE Code Alignment Factor
156*404b540aSrobert        .sleb128 -4	@ CIE Data Alignment Factor
157*404b540aSrobert        .byte	0xe	@ CIE RA Column
158*404b540aSrobert        .byte	0xc	@ DW_CFA_def_cfa
159*404b540aSrobert        .uleb128 0xd
160*404b540aSrobert        .uleb128 0x0
161*404b540aSrobert
162*404b540aSrobert	.align 2
163*404b540aSrobertLSYM(Lend_cie):
164*404b540aSrobert	.4byte	LSYM(Lend_fde)-LSYM(Lstart_fde)	@ FDE Length
165*404b540aSrobertLSYM(Lstart_fde):
166*404b540aSrobert	.4byte	LSYM(Lstart_frame)	@ FDE CIE offset
167*404b540aSrobert	.4byte	\start_label	@ FDE initial location
168*404b540aSrobert	.4byte	\end_label-\start_label	@ FDE address range
169*404b540aSrobert	.popsection
170*404b540aSrobert#endif
171*404b540aSrobert.endm
172*404b540aSrobert.macro cfi_end	end_label
173*404b540aSrobert#ifdef __ELF__
174*404b540aSrobert	.pushsection	.debug_frame
175*404b540aSrobert	.align	2
176*404b540aSrobertLSYM(Lend_fde):
177*404b540aSrobert	.popsection
178*404b540aSrobert\end_label:
179*404b540aSrobert#endif
180*404b540aSrobert.endm
181*404b540aSrobert
182*404b540aSrobert/* Don't pass dirn, it's there just to get token pasting right.  */
183*404b540aSrobert
184*404b540aSrobert.macro	RETLDM	regs=, cond=, unwind=, dirn=ia
185*404b540aSrobert#if defined (__INTERWORKING__)
186*404b540aSrobert	.ifc "\regs",""
187*404b540aSrobert	ldr\cond	lr, [sp], #8
188*404b540aSrobert	.else
189*404b540aSrobert	ldm\cond\dirn	sp!, {\regs, lr}
190*404b540aSrobert	.endif
191*404b540aSrobert	.ifnc "\unwind", ""
192*404b540aSrobert	/* Mark LR as restored.  */
193*404b540aSrobert97:	cfi_pop 97b - \unwind, 0xe, 0x0
194*404b540aSrobert	.endif
195*404b540aSrobert	bx\cond	lr
196*404b540aSrobert#else
197*404b540aSrobert	.ifc "\regs",""
198*404b540aSrobert	ldr\cond	pc, [sp], #8
199*404b540aSrobert	.else
200*404b540aSrobert	ldm\cond\dirn	sp!, {\regs, pc}
201*404b540aSrobert	.endif
202*404b540aSrobert#endif
203*404b540aSrobert.endm
204*404b540aSrobert
205*404b540aSrobert
206*404b540aSrobert.macro ARM_LDIV0 name
207*404b540aSrobert	str	lr, [sp, #-8]!
208*404b540aSrobert98:	cfi_push 98b - __\name, 0xe, -0x8, 0x8
209*404b540aSrobert	bl	SYM (__div0) __PLT__
210*404b540aSrobert	mov	r0, #0			@ About as wrong as it could be.
211*404b540aSrobert	RETLDM	unwind=98b
212*404b540aSrobert.endm
213*404b540aSrobert
214*404b540aSrobert
215*404b540aSrobert.macro THUMB_LDIV0 name
216*404b540aSrobert	push	{ r1, lr }
217*404b540aSrobert98:	cfi_push 98b - __\name, 0xe, -0x4, 0x8
218*404b540aSrobert	bl	SYM (__div0)
219*404b540aSrobert	mov	r0, #0			@ About as wrong as it could be.
220*404b540aSrobert#if defined (__INTERWORKING__)
221*404b540aSrobert	pop	{ r1, r2 }
222*404b540aSrobert	bx	r2
223*404b540aSrobert#else
224*404b540aSrobert	pop	{ r1, pc }
225*404b540aSrobert#endif
226*404b540aSrobert.endm
227*404b540aSrobert
228*404b540aSrobert.macro FUNC_END name
229*404b540aSrobert	SIZE (__\name)
230*404b540aSrobert.endm
231*404b540aSrobert
232*404b540aSrobert.macro DIV_FUNC_END name
233*404b540aSrobert	cfi_start	__\name, LSYM(Lend_div0)
234*404b540aSrobertLSYM(Ldiv0):
235*404b540aSrobert#ifdef __thumb__
236*404b540aSrobert	THUMB_LDIV0 \name
237*404b540aSrobert#else
238*404b540aSrobert	ARM_LDIV0 \name
239*404b540aSrobert#endif
240*404b540aSrobert	cfi_end	LSYM(Lend_div0)
241*404b540aSrobert	FUNC_END \name
242*404b540aSrobert.endm
243*404b540aSrobert
244*404b540aSrobert.macro THUMB_FUNC_START name
245*404b540aSrobert	.globl	SYM (\name)
246*404b540aSrobert	TYPE	(\name)
247*404b540aSrobert	.thumb_func
248*404b540aSrobertSYM (\name):
249*404b540aSrobert.endm
250*404b540aSrobert
251*404b540aSrobert/* Function start macros.  Variants for ARM and Thumb.  */
252*404b540aSrobert
253*404b540aSrobert#ifdef __thumb__
254*404b540aSrobert#define THUMB_FUNC .thumb_func
255*404b540aSrobert#define THUMB_CODE .force_thumb
256*404b540aSrobert#else
257*404b540aSrobert#define THUMB_FUNC
258*404b540aSrobert#define THUMB_CODE
259*404b540aSrobert#endif
260*404b540aSrobert
261*404b540aSrobert.macro FUNC_START name
262*404b540aSrobert	.text
263*404b540aSrobert	.globl SYM (__\name)
264*404b540aSrobert	TYPE (__\name)
265*404b540aSrobert	.align 0
266*404b540aSrobert	THUMB_CODE
267*404b540aSrobert	THUMB_FUNC
268*404b540aSrobertSYM (__\name):
269*404b540aSrobert.endm
270*404b540aSrobert
271*404b540aSrobert/* Special function that will always be coded in ARM assembly, even if
272*404b540aSrobert   in Thumb-only compilation.  */
273*404b540aSrobert
274*404b540aSrobert#if defined(__INTERWORKING_STUBS__)
275*404b540aSrobert.macro	ARM_FUNC_START name
276*404b540aSrobert	FUNC_START \name
277*404b540aSrobert	bx	pc
278*404b540aSrobert	nop
279*404b540aSrobert	.arm
280*404b540aSrobert/* A hook to tell gdb that we've switched to ARM mode.  Also used to call
281*404b540aSrobert   directly from other local arm routines.  */
282*404b540aSrobert_L__\name:
283*404b540aSrobert.endm
284*404b540aSrobert#define EQUIV .thumb_set
285*404b540aSrobert/* Branch directly to a function declared with ARM_FUNC_START.
286*404b540aSrobert   Must be called in arm mode.  */
287*404b540aSrobert.macro  ARM_CALL name
288*404b540aSrobert	bl	_L__\name
289*404b540aSrobert.endm
290*404b540aSrobert#else
291*404b540aSrobert.macro	ARM_FUNC_START name
292*404b540aSrobert	.text
293*404b540aSrobert	.globl SYM (__\name)
294*404b540aSrobert	TYPE (__\name)
295*404b540aSrobert	.align 0
296*404b540aSrobert	.arm
297*404b540aSrobertSYM (__\name):
298*404b540aSrobert.endm
299*404b540aSrobert#define EQUIV .set
300*404b540aSrobert.macro  ARM_CALL name
301*404b540aSrobert	bl	__\name
302*404b540aSrobert.endm
303*404b540aSrobert#endif
304*404b540aSrobert
305*404b540aSrobert.macro	FUNC_ALIAS new old
306*404b540aSrobert	.globl	SYM (__\new)
307*404b540aSrobert#if defined (__thumb__)
308*404b540aSrobert	.thumb_set	SYM (__\new), SYM (__\old)
309*404b540aSrobert#else
310*404b540aSrobert	.set	SYM (__\new), SYM (__\old)
311*404b540aSrobert#endif
312*404b540aSrobert.endm
313*404b540aSrobert
314*404b540aSrobert.macro	ARM_FUNC_ALIAS new old
315*404b540aSrobert	.globl	SYM (__\new)
316*404b540aSrobert	EQUIV	SYM (__\new), SYM (__\old)
317*404b540aSrobert#if defined(__INTERWORKING_STUBS__)
318*404b540aSrobert	.set	SYM (_L__\new), SYM (_L__\old)
319*404b540aSrobert#endif
320*404b540aSrobert.endm
321*404b540aSrobert
322*404b540aSrobert#ifdef __thumb__
323*404b540aSrobert/* Register aliases.  */
324*404b540aSrobert
325*404b540aSrobertwork		.req	r4	@ XXXX is this safe ?
326*404b540aSrobertdividend	.req	r0
327*404b540aSrobertdivisor		.req	r1
328*404b540aSrobertoverdone	.req	r2
329*404b540aSrobertresult		.req	r2
330*404b540aSrobertcurbit		.req	r3
331*404b540aSrobert#endif
332*404b540aSrobert#if 0
333*404b540aSrobertip		.req	r12
334*404b540aSrobertsp		.req	r13
335*404b540aSrobertlr		.req	r14
336*404b540aSrobertpc		.req	r15
337*404b540aSrobert#endif
338*404b540aSrobert
339*404b540aSrobert/* ------------------------------------------------------------------------ */
340*404b540aSrobert/*		Bodies of the division and modulo routines.		    */
341*404b540aSrobert/* ------------------------------------------------------------------------ */
342*404b540aSrobert.macro ARM_DIV_BODY dividend, divisor, result, curbit
343*404b540aSrobert
344*404b540aSrobert#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
345*404b540aSrobert
346*404b540aSrobert	clz	\curbit, \dividend
347*404b540aSrobert	clz	\result, \divisor
348*404b540aSrobert	sub	\curbit, \result, \curbit
349*404b540aSrobert	rsbs	\curbit, \curbit, #31
350*404b540aSrobert	addne	\curbit, \curbit, \curbit, lsl #1
351*404b540aSrobert	mov	\result, #0
352*404b540aSrobert	addne	pc, pc, \curbit, lsl #2
353*404b540aSrobert	nop
354*404b540aSrobert	.set	shift, 32
355*404b540aSrobert	.rept	32
356*404b540aSrobert	.set	shift, shift - 1
357*404b540aSrobert	cmp	\dividend, \divisor, lsl #shift
358*404b540aSrobert	adc	\result, \result, \result
359*404b540aSrobert	subcs	\dividend, \dividend, \divisor, lsl #shift
360*404b540aSrobert	.endr
361*404b540aSrobert
362*404b540aSrobert#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
363*404b540aSrobert#if __ARM_ARCH__ >= 5
364*404b540aSrobert
365*404b540aSrobert	clz	\curbit, \divisor
366*404b540aSrobert	clz	\result, \dividend
367*404b540aSrobert	sub	\result, \curbit, \result
368*404b540aSrobert	mov	\curbit, #1
369*404b540aSrobert	mov	\divisor, \divisor, lsl \result
370*404b540aSrobert	mov	\curbit, \curbit, lsl \result
371*404b540aSrobert	mov	\result, #0
372*404b540aSrobert
373*404b540aSrobert#else /* __ARM_ARCH__ < 5 */
374*404b540aSrobert
375*404b540aSrobert	@ Initially shift the divisor left 3 bits if possible,
376*404b540aSrobert	@ set curbit accordingly.  This allows for curbit to be located
377*404b540aSrobert	@ at the left end of each 4 bit nibbles in the division loop
378*404b540aSrobert	@ to save one loop in most cases.
379*404b540aSrobert	tst	\divisor, #0xe0000000
380*404b540aSrobert	moveq	\divisor, \divisor, lsl #3
381*404b540aSrobert	moveq	\curbit, #8
382*404b540aSrobert	movne	\curbit, #1
383*404b540aSrobert
384*404b540aSrobert	@ Unless the divisor is very big, shift it up in multiples of
385*404b540aSrobert	@ four bits, since this is the amount of unwinding in the main
386*404b540aSrobert	@ division loop.  Continue shifting until the divisor is
387*404b540aSrobert	@ larger than the dividend.
388*404b540aSrobert1:	cmp	\divisor, #0x10000000
389*404b540aSrobert	cmplo	\divisor, \dividend
390*404b540aSrobert	movlo	\divisor, \divisor, lsl #4
391*404b540aSrobert	movlo	\curbit, \curbit, lsl #4
392*404b540aSrobert	blo	1b
393*404b540aSrobert
394*404b540aSrobert	@ For very big divisors, we must shift it a bit at a time, or
395*404b540aSrobert	@ we will be in danger of overflowing.
396*404b540aSrobert1:	cmp	\divisor, #0x80000000
397*404b540aSrobert	cmplo	\divisor, \dividend
398*404b540aSrobert	movlo	\divisor, \divisor, lsl #1
399*404b540aSrobert	movlo	\curbit, \curbit, lsl #1
400*404b540aSrobert	blo	1b
401*404b540aSrobert
402*404b540aSrobert	mov	\result, #0
403*404b540aSrobert
404*404b540aSrobert#endif /* __ARM_ARCH__ < 5 */
405*404b540aSrobert
406*404b540aSrobert	@ Division loop
407*404b540aSrobert1:	cmp	\dividend, \divisor
408*404b540aSrobert	subhs	\dividend, \dividend, \divisor
409*404b540aSrobert	orrhs	\result,   \result,   \curbit
410*404b540aSrobert	cmp	\dividend, \divisor,  lsr #1
411*404b540aSrobert	subhs	\dividend, \dividend, \divisor, lsr #1
412*404b540aSrobert	orrhs	\result,   \result,   \curbit,  lsr #1
413*404b540aSrobert	cmp	\dividend, \divisor,  lsr #2
414*404b540aSrobert	subhs	\dividend, \dividend, \divisor, lsr #2
415*404b540aSrobert	orrhs	\result,   \result,   \curbit,  lsr #2
416*404b540aSrobert	cmp	\dividend, \divisor,  lsr #3
417*404b540aSrobert	subhs	\dividend, \dividend, \divisor, lsr #3
418*404b540aSrobert	orrhs	\result,   \result,   \curbit,  lsr #3
419*404b540aSrobert	cmp	\dividend, #0			@ Early termination?
420*404b540aSrobert	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
421*404b540aSrobert	movne	\divisor,  \divisor, lsr #4
422*404b540aSrobert	bne	1b
423*404b540aSrobert
424*404b540aSrobert#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
425*404b540aSrobert
426*404b540aSrobert.endm
427*404b540aSrobert/* ------------------------------------------------------------------------ */
428*404b540aSrobert.macro ARM_DIV2_ORDER divisor, order
429*404b540aSrobert
430*404b540aSrobert#if __ARM_ARCH__ >= 5
431*404b540aSrobert
432*404b540aSrobert	clz	\order, \divisor
433*404b540aSrobert	rsb	\order, \order, #31
434*404b540aSrobert
435*404b540aSrobert#else
436*404b540aSrobert
437*404b540aSrobert	cmp	\divisor, #(1 << 16)
438*404b540aSrobert	movhs	\divisor, \divisor, lsr #16
439*404b540aSrobert	movhs	\order, #16
440*404b540aSrobert	movlo	\order, #0
441*404b540aSrobert
442*404b540aSrobert	cmp	\divisor, #(1 << 8)
443*404b540aSrobert	movhs	\divisor, \divisor, lsr #8
444*404b540aSrobert	addhs	\order, \order, #8
445*404b540aSrobert
446*404b540aSrobert	cmp	\divisor, #(1 << 4)
447*404b540aSrobert	movhs	\divisor, \divisor, lsr #4
448*404b540aSrobert	addhs	\order, \order, #4
449*404b540aSrobert
450*404b540aSrobert	cmp	\divisor, #(1 << 2)
451*404b540aSrobert	addhi	\order, \order, #3
452*404b540aSrobert	addls	\order, \order, \divisor, lsr #1
453*404b540aSrobert
454*404b540aSrobert#endif
455*404b540aSrobert
456*404b540aSrobert.endm
457*404b540aSrobert/* ------------------------------------------------------------------------ */
458*404b540aSrobert.macro ARM_MOD_BODY dividend, divisor, order, spare
459*404b540aSrobert
460*404b540aSrobert#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
461*404b540aSrobert
462*404b540aSrobert	clz	\order, \divisor
463*404b540aSrobert	clz	\spare, \dividend
464*404b540aSrobert	sub	\order, \order, \spare
465*404b540aSrobert	rsbs	\order, \order, #31
466*404b540aSrobert	addne	pc, pc, \order, lsl #3
467*404b540aSrobert	nop
468*404b540aSrobert	.set	shift, 32
469*404b540aSrobert	.rept	32
470*404b540aSrobert	.set	shift, shift - 1
471*404b540aSrobert	cmp	\dividend, \divisor, lsl #shift
472*404b540aSrobert	subcs	\dividend, \dividend, \divisor, lsl #shift
473*404b540aSrobert	.endr
474*404b540aSrobert
475*404b540aSrobert#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
476*404b540aSrobert#if __ARM_ARCH__ >= 5
477*404b540aSrobert
478*404b540aSrobert	clz	\order, \divisor
479*404b540aSrobert	clz	\spare, \dividend
480*404b540aSrobert	sub	\order, \order, \spare
481*404b540aSrobert	mov	\divisor, \divisor, lsl \order
482*404b540aSrobert
483*404b540aSrobert#else /* __ARM_ARCH__ < 5 */
484*404b540aSrobert
485*404b540aSrobert	mov	\order, #0
486*404b540aSrobert
487*404b540aSrobert	@ Unless the divisor is very big, shift it up in multiples of
488*404b540aSrobert	@ four bits, since this is the amount of unwinding in the main
489*404b540aSrobert	@ division loop.  Continue shifting until the divisor is
490*404b540aSrobert	@ larger than the dividend.
491*404b540aSrobert1:	cmp	\divisor, #0x10000000
492*404b540aSrobert	cmplo	\divisor, \dividend
493*404b540aSrobert	movlo	\divisor, \divisor, lsl #4
494*404b540aSrobert	addlo	\order, \order, #4
495*404b540aSrobert	blo	1b
496*404b540aSrobert
497*404b540aSrobert	@ For very big divisors, we must shift it a bit at a time, or
498*404b540aSrobert	@ we will be in danger of overflowing.
499*404b540aSrobert1:	cmp	\divisor, #0x80000000
500*404b540aSrobert	cmplo	\divisor, \dividend
501*404b540aSrobert	movlo	\divisor, \divisor, lsl #1
502*404b540aSrobert	addlo	\order, \order, #1
503*404b540aSrobert	blo	1b
504*404b540aSrobert
505*404b540aSrobert#endif /* __ARM_ARCH__ < 5 */
506*404b540aSrobert
507*404b540aSrobert	@ Perform all needed substractions to keep only the reminder.
508*404b540aSrobert	@ Do comparisons in batch of 4 first.
509*404b540aSrobert	subs	\order, \order, #3		@ yes, 3 is intended here
510*404b540aSrobert	blt	2f
511*404b540aSrobert
512*404b540aSrobert1:	cmp	\dividend, \divisor
513*404b540aSrobert	subhs	\dividend, \dividend, \divisor
514*404b540aSrobert	cmp	\dividend, \divisor,  lsr #1
515*404b540aSrobert	subhs	\dividend, \dividend, \divisor, lsr #1
516*404b540aSrobert	cmp	\dividend, \divisor,  lsr #2
517*404b540aSrobert	subhs	\dividend, \dividend, \divisor, lsr #2
518*404b540aSrobert	cmp	\dividend, \divisor,  lsr #3
519*404b540aSrobert	subhs	\dividend, \dividend, \divisor, lsr #3
520*404b540aSrobert	cmp	\dividend, #1
521*404b540aSrobert	mov	\divisor, \divisor, lsr #4
522*404b540aSrobert	subges	\order, \order, #4
523*404b540aSrobert	bge	1b
524*404b540aSrobert
525*404b540aSrobert	tst	\order, #3
526*404b540aSrobert	teqne	\dividend, #0
527*404b540aSrobert	beq	5f
528*404b540aSrobert
529*404b540aSrobert	@ Either 1, 2 or 3 comparison/substractions are left.
530*404b540aSrobert2:	cmn	\order, #2
531*404b540aSrobert	blt	4f
532*404b540aSrobert	beq	3f
533*404b540aSrobert	cmp	\dividend, \divisor
534*404b540aSrobert	subhs	\dividend, \dividend, \divisor
535*404b540aSrobert	mov	\divisor,  \divisor,  lsr #1
536*404b540aSrobert3:	cmp	\dividend, \divisor
537*404b540aSrobert	subhs	\dividend, \dividend, \divisor
538*404b540aSrobert	mov	\divisor,  \divisor,  lsr #1
539*404b540aSrobert4:	cmp	\dividend, \divisor
540*404b540aSrobert	subhs	\dividend, \dividend, \divisor
541*404b540aSrobert5:
542*404b540aSrobert
543*404b540aSrobert#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
544*404b540aSrobert
545*404b540aSrobert.endm
546*404b540aSrobert/* ------------------------------------------------------------------------ */
547*404b540aSrobert.macro THUMB_DIV_MOD_BODY modulo
548*404b540aSrobert	@ Load the constant 0x10000000 into our work register.
549*404b540aSrobert	mov	work, #1
550*404b540aSrobert	lsl	work, #28
551*404b540aSrobertLSYM(Loop1):
552*404b540aSrobert	@ Unless the divisor is very big, shift it up in multiples of
553*404b540aSrobert	@ four bits, since this is the amount of unwinding in the main
554*404b540aSrobert	@ division loop.  Continue shifting until the divisor is
555*404b540aSrobert	@ larger than the dividend.
556*404b540aSrobert	cmp	divisor, work
557*404b540aSrobert	bhs	LSYM(Lbignum)
558*404b540aSrobert	cmp	divisor, dividend
559*404b540aSrobert	bhs	LSYM(Lbignum)
560*404b540aSrobert	lsl	divisor, #4
561*404b540aSrobert	lsl	curbit,  #4
562*404b540aSrobert	b	LSYM(Loop1)
563*404b540aSrobertLSYM(Lbignum):
564*404b540aSrobert	@ Set work to 0x80000000
565*404b540aSrobert	lsl	work, #3
566*404b540aSrobertLSYM(Loop2):
567*404b540aSrobert	@ For very big divisors, we must shift it a bit at a time, or
568*404b540aSrobert	@ we will be in danger of overflowing.
569*404b540aSrobert	cmp	divisor, work
570*404b540aSrobert	bhs	LSYM(Loop3)
571*404b540aSrobert	cmp	divisor, dividend
572*404b540aSrobert	bhs	LSYM(Loop3)
573*404b540aSrobert	lsl	divisor, #1
574*404b540aSrobert	lsl	curbit,  #1
575*404b540aSrobert	b	LSYM(Loop2)
576*404b540aSrobertLSYM(Loop3):
577*404b540aSrobert	@ Test for possible subtractions ...
578*404b540aSrobert  .if \modulo
579*404b540aSrobert	@ ... On the final pass, this may subtract too much from the dividend,
580*404b540aSrobert	@ so keep track of which subtractions are done, we can fix them up
581*404b540aSrobert	@ afterwards.
582*404b540aSrobert	mov	overdone, #0
583*404b540aSrobert	cmp	dividend, divisor
584*404b540aSrobert	blo	LSYM(Lover1)
585*404b540aSrobert	sub	dividend, dividend, divisor
586*404b540aSrobertLSYM(Lover1):
587*404b540aSrobert	lsr	work, divisor, #1
588*404b540aSrobert	cmp	dividend, work
589*404b540aSrobert	blo	LSYM(Lover2)
590*404b540aSrobert	sub	dividend, dividend, work
591*404b540aSrobert	mov	ip, curbit
592*404b540aSrobert	mov	work, #1
593*404b540aSrobert	ror	curbit, work
594*404b540aSrobert	orr	overdone, curbit
595*404b540aSrobert	mov	curbit, ip
596*404b540aSrobertLSYM(Lover2):
597*404b540aSrobert	lsr	work, divisor, #2
598*404b540aSrobert	cmp	dividend, work
599*404b540aSrobert	blo	LSYM(Lover3)
600*404b540aSrobert	sub	dividend, dividend, work
601*404b540aSrobert	mov	ip, curbit
602*404b540aSrobert	mov	work, #2
603*404b540aSrobert	ror	curbit, work
604*404b540aSrobert	orr	overdone, curbit
605*404b540aSrobert	mov	curbit, ip
606*404b540aSrobertLSYM(Lover3):
607*404b540aSrobert	lsr	work, divisor, #3
608*404b540aSrobert	cmp	dividend, work
609*404b540aSrobert	blo	LSYM(Lover4)
610*404b540aSrobert	sub	dividend, dividend, work
611*404b540aSrobert	mov	ip, curbit
612*404b540aSrobert	mov	work, #3
613*404b540aSrobert	ror	curbit, work
614*404b540aSrobert	orr	overdone, curbit
615*404b540aSrobert	mov	curbit, ip
616*404b540aSrobertLSYM(Lover4):
617*404b540aSrobert	mov	ip, curbit
618*404b540aSrobert  .else
619*404b540aSrobert	@ ... and note which bits are done in the result.  On the final pass,
620*404b540aSrobert	@ this may subtract too much from the dividend, but the result will be ok,
621*404b540aSrobert	@ since the "bit" will have been shifted out at the bottom.
622*404b540aSrobert	cmp	dividend, divisor
623*404b540aSrobert	blo	LSYM(Lover1)
624*404b540aSrobert	sub	dividend, dividend, divisor
625*404b540aSrobert	orr	result, result, curbit
626*404b540aSrobertLSYM(Lover1):
627*404b540aSrobert	lsr	work, divisor, #1
628*404b540aSrobert	cmp	dividend, work
629*404b540aSrobert	blo	LSYM(Lover2)
630*404b540aSrobert	sub	dividend, dividend, work
631*404b540aSrobert	lsr	work, curbit, #1
632*404b540aSrobert	orr	result, work
633*404b540aSrobertLSYM(Lover2):
634*404b540aSrobert	lsr	work, divisor, #2
635*404b540aSrobert	cmp	dividend, work
636*404b540aSrobert	blo	LSYM(Lover3)
637*404b540aSrobert	sub	dividend, dividend, work
638*404b540aSrobert	lsr	work, curbit, #2
639*404b540aSrobert	orr	result, work
640*404b540aSrobertLSYM(Lover3):
641*404b540aSrobert	lsr	work, divisor, #3
642*404b540aSrobert	cmp	dividend, work
643*404b540aSrobert	blo	LSYM(Lover4)
644*404b540aSrobert	sub	dividend, dividend, work
645*404b540aSrobert	lsr	work, curbit, #3
646*404b540aSrobert	orr	result, work
647*404b540aSrobertLSYM(Lover4):
648*404b540aSrobert  .endif
649*404b540aSrobert
650*404b540aSrobert	cmp	dividend, #0			@ Early termination?
651*404b540aSrobert	beq	LSYM(Lover5)
652*404b540aSrobert	lsr	curbit,  #4			@ No, any more bits to do?
653*404b540aSrobert	beq	LSYM(Lover5)
654*404b540aSrobert	lsr	divisor, #4
655*404b540aSrobert	b	LSYM(Loop3)
656*404b540aSrobertLSYM(Lover5):
657*404b540aSrobert  .if \modulo
658*404b540aSrobert	@ Any subtractions that we should not have done will be recorded in
659*404b540aSrobert	@ the top three bits of "overdone".  Exactly which were not needed
660*404b540aSrobert	@ are governed by the position of the bit, stored in ip.
661*404b540aSrobert	mov	work, #0xe
662*404b540aSrobert	lsl	work, #28
663*404b540aSrobert	and	overdone, work
664*404b540aSrobert	beq	LSYM(Lgot_result)
665*404b540aSrobert
666*404b540aSrobert	@ If we terminated early, because dividend became zero, then the
667*404b540aSrobert	@ bit in ip will not be in the bottom nibble, and we should not
668*404b540aSrobert	@ perform the additions below.  We must test for this though
669*404b540aSrobert	@ (rather relying upon the TSTs to prevent the additions) since
670*404b540aSrobert	@ the bit in ip could be in the top two bits which might then match
671*404b540aSrobert	@ with one of the smaller RORs.
672*404b540aSrobert	mov	curbit, ip
673*404b540aSrobert	mov	work, #0x7
674*404b540aSrobert	tst	curbit, work
675*404b540aSrobert	beq	LSYM(Lgot_result)
676*404b540aSrobert
677*404b540aSrobert	mov	curbit, ip
678*404b540aSrobert	mov	work, #3
679*404b540aSrobert	ror	curbit, work
680*404b540aSrobert	tst	overdone, curbit
681*404b540aSrobert	beq	LSYM(Lover6)
682*404b540aSrobert	lsr	work, divisor, #3
683*404b540aSrobert	add	dividend, work
684*404b540aSrobertLSYM(Lover6):
685*404b540aSrobert	mov	curbit, ip
686*404b540aSrobert	mov	work, #2
687*404b540aSrobert	ror	curbit, work
688*404b540aSrobert	tst	overdone, curbit
689*404b540aSrobert	beq	LSYM(Lover7)
690*404b540aSrobert	lsr	work, divisor, #2
691*404b540aSrobert	add	dividend, work
692*404b540aSrobertLSYM(Lover7):
693*404b540aSrobert	mov	curbit, ip
694*404b540aSrobert	mov	work, #1
695*404b540aSrobert	ror	curbit, work
696*404b540aSrobert	tst	overdone, curbit
697*404b540aSrobert	beq	LSYM(Lgot_result)
698*404b540aSrobert	lsr	work, divisor, #1
699*404b540aSrobert	add	dividend, work
700*404b540aSrobert  .endif
701*404b540aSrobertLSYM(Lgot_result):
702*404b540aSrobert.endm
703*404b540aSrobert/* ------------------------------------------------------------------------ */
704*404b540aSrobert/*		Start of the Real Functions				    */
705*404b540aSrobert/* ------------------------------------------------------------------------ */
706*404b540aSrobert#ifdef L_udivsi3
707*404b540aSrobert
708*404b540aSrobert	FUNC_START udivsi3
709*404b540aSrobert	FUNC_ALIAS aeabi_uidiv udivsi3
710*404b540aSrobert
711*404b540aSrobert#ifdef __thumb__
712*404b540aSrobert
713*404b540aSrobert	cmp	divisor, #0
714*404b540aSrobert	beq	LSYM(Ldiv0)
715*404b540aSrobert	mov	curbit, #1
716*404b540aSrobert	mov	result, #0
717*404b540aSrobert
718*404b540aSrobert	push	{ work }
719*404b540aSrobert	cmp	dividend, divisor
720*404b540aSrobert	blo	LSYM(Lgot_result)
721*404b540aSrobert
722*404b540aSrobert	THUMB_DIV_MOD_BODY 0
723*404b540aSrobert
724*404b540aSrobert	mov	r0, result
725*404b540aSrobert	pop	{ work }
726*404b540aSrobert	RET
727*404b540aSrobert
728*404b540aSrobert#else /* ARM version.  */
729*404b540aSrobert
730*404b540aSrobert	subs	r2, r1, #1
731*404b540aSrobert	RETc(eq)
732*404b540aSrobert	bcc	LSYM(Ldiv0)
733*404b540aSrobert	cmp	r0, r1
734*404b540aSrobert	bls	11f
735*404b540aSrobert	tst	r1, r2
736*404b540aSrobert	beq	12f
737*404b540aSrobert
738*404b540aSrobert	ARM_DIV_BODY r0, r1, r2, r3
739*404b540aSrobert
740*404b540aSrobert	mov	r0, r2
741*404b540aSrobert	RET
742*404b540aSrobert
743*404b540aSrobert11:	moveq	r0, #1
744*404b540aSrobert	movne	r0, #0
745*404b540aSrobert	RET
746*404b540aSrobert
747*404b540aSrobert12:	ARM_DIV2_ORDER r1, r2
748*404b540aSrobert
749*404b540aSrobert	mov	r0, r0, lsr r2
750*404b540aSrobert	RET
751*404b540aSrobert
752*404b540aSrobert#endif /* ARM version */
753*404b540aSrobert
754*404b540aSrobert	DIV_FUNC_END udivsi3
755*404b540aSrobert
756*404b540aSrobertFUNC_START aeabi_uidivmod
757*404b540aSrobert#ifdef __thumb__
758*404b540aSrobert	push	{r0, r1, lr}
759*404b540aSrobert	bl	SYM(__udivsi3)
760*404b540aSrobert	POP	{r1, r2, r3}
761*404b540aSrobert	mul	r2, r0
762*404b540aSrobert	sub	r1, r1, r2
763*404b540aSrobert	bx	r3
764*404b540aSrobert#else
765*404b540aSrobert	stmfd	sp!, { r0, r1, lr }
766*404b540aSrobert	bl	SYM(__udivsi3)
767*404b540aSrobert	ldmfd	sp!, { r1, r2, lr }
768*404b540aSrobert	mul	r3, r2, r0
769*404b540aSrobert	sub	r1, r1, r3
770*404b540aSrobert	RET
771*404b540aSrobert#endif
772*404b540aSrobert	FUNC_END aeabi_uidivmod
773*404b540aSrobert
774*404b540aSrobert#endif /* L_udivsi3 */
775*404b540aSrobert/* ------------------------------------------------------------------------ */
776*404b540aSrobert#ifdef L_umodsi3
777*404b540aSrobert
778*404b540aSrobert	FUNC_START umodsi3
779*404b540aSrobert
780*404b540aSrobert#ifdef __thumb__
781*404b540aSrobert
782*404b540aSrobert	cmp	divisor, #0
783*404b540aSrobert	beq	LSYM(Ldiv0)
784*404b540aSrobert	mov	curbit, #1
785*404b540aSrobert	cmp	dividend, divisor
786*404b540aSrobert	bhs	LSYM(Lover10)
787*404b540aSrobert	RET
788*404b540aSrobert
789*404b540aSrobertLSYM(Lover10):
790*404b540aSrobert	push	{ work }
791*404b540aSrobert
792*404b540aSrobert	THUMB_DIV_MOD_BODY 1
793*404b540aSrobert
794*404b540aSrobert	pop	{ work }
795*404b540aSrobert	RET
796*404b540aSrobert
797*404b540aSrobert#else  /* ARM version.  */
798*404b540aSrobert
799*404b540aSrobert	subs	r2, r1, #1			@ compare divisor with 1
800*404b540aSrobert	bcc	LSYM(Ldiv0)
801*404b540aSrobert	cmpne	r0, r1				@ compare dividend with divisor
802*404b540aSrobert	moveq   r0, #0
803*404b540aSrobert	tsthi	r1, r2				@ see if divisor is power of 2
804*404b540aSrobert	andeq	r0, r0, r2
805*404b540aSrobert	RETc(ls)
806*404b540aSrobert
807*404b540aSrobert	ARM_MOD_BODY r0, r1, r2, r3
808*404b540aSrobert
809*404b540aSrobert	RET
810*404b540aSrobert
811*404b540aSrobert#endif /* ARM version.  */
812*404b540aSrobert
813*404b540aSrobert	DIV_FUNC_END umodsi3
814*404b540aSrobert
815*404b540aSrobert#endif /* L_umodsi3 */
816*404b540aSrobert/* ------------------------------------------------------------------------ */
817*404b540aSrobert#ifdef L_divsi3
818*404b540aSrobert
819*404b540aSrobert	FUNC_START divsi3
820*404b540aSrobert	FUNC_ALIAS aeabi_idiv divsi3
821*404b540aSrobert
822*404b540aSrobert#ifdef __thumb__
823*404b540aSrobert	cmp	divisor, #0
824*404b540aSrobert	beq	LSYM(Ldiv0)
825*404b540aSrobert
826*404b540aSrobert	push	{ work }
827*404b540aSrobert	mov	work, dividend
828*404b540aSrobert	eor	work, divisor		@ Save the sign of the result.
829*404b540aSrobert	mov	ip, work
830*404b540aSrobert	mov	curbit, #1
831*404b540aSrobert	mov	result, #0
832*404b540aSrobert	cmp	divisor, #0
833*404b540aSrobert	bpl	LSYM(Lover10)
834*404b540aSrobert	neg	divisor, divisor	@ Loops below use unsigned.
835*404b540aSrobertLSYM(Lover10):
836*404b540aSrobert	cmp	dividend, #0
837*404b540aSrobert	bpl	LSYM(Lover11)
838*404b540aSrobert	neg	dividend, dividend
839*404b540aSrobertLSYM(Lover11):
840*404b540aSrobert	cmp	dividend, divisor
841*404b540aSrobert	blo	LSYM(Lgot_result)
842*404b540aSrobert
843*404b540aSrobert	THUMB_DIV_MOD_BODY 0
844*404b540aSrobert
845*404b540aSrobert	mov	r0, result
846*404b540aSrobert	mov	work, ip
847*404b540aSrobert	cmp	work, #0
848*404b540aSrobert	bpl	LSYM(Lover12)
849*404b540aSrobert	neg	r0, r0
850*404b540aSrobertLSYM(Lover12):
851*404b540aSrobert	pop	{ work }
852*404b540aSrobert	RET
853*404b540aSrobert
854*404b540aSrobert#else /* ARM version.  */
855*404b540aSrobert
856*404b540aSrobert	cmp	r1, #0
857*404b540aSrobert	eor	ip, r0, r1			@ save the sign of the result.
858*404b540aSrobert	beq	LSYM(Ldiv0)
859*404b540aSrobert	rsbmi	r1, r1, #0			@ loops below use unsigned.
860*404b540aSrobert	subs	r2, r1, #1			@ division by 1 or -1 ?
861*404b540aSrobert	beq	10f
862*404b540aSrobert	movs	r3, r0
863*404b540aSrobert	rsbmi	r3, r0, #0			@ positive dividend value
864*404b540aSrobert	cmp	r3, r1
865*404b540aSrobert	bls	11f
866*404b540aSrobert	tst	r1, r2				@ divisor is power of 2 ?
867*404b540aSrobert	beq	12f
868*404b540aSrobert
869*404b540aSrobert	ARM_DIV_BODY r3, r1, r0, r2
870*404b540aSrobert
871*404b540aSrobert	cmp	ip, #0
872*404b540aSrobert	rsbmi	r0, r0, #0
873*404b540aSrobert	RET
874*404b540aSrobert
875*404b540aSrobert10:	teq	ip, r0				@ same sign ?
876*404b540aSrobert	rsbmi	r0, r0, #0
877*404b540aSrobert	RET
878*404b540aSrobert
879*404b540aSrobert11:	movlo	r0, #0
880*404b540aSrobert	moveq	r0, ip, asr #31
881*404b540aSrobert	orreq	r0, r0, #1
882*404b540aSrobert	RET
883*404b540aSrobert
884*404b540aSrobert12:	ARM_DIV2_ORDER r1, r2
885*404b540aSrobert
886*404b540aSrobert	cmp	ip, #0
887*404b540aSrobert	mov	r0, r3, lsr r2
888*404b540aSrobert	rsbmi	r0, r0, #0
889*404b540aSrobert	RET
890*404b540aSrobert
891*404b540aSrobert#endif /* ARM version */
892*404b540aSrobert
893*404b540aSrobert	DIV_FUNC_END divsi3
894*404b540aSrobert
895*404b540aSrobertFUNC_START aeabi_idivmod
896*404b540aSrobert#ifdef __thumb__
897*404b540aSrobert	push	{r0, r1, lr}
898*404b540aSrobert	bl	SYM(__divsi3)
899*404b540aSrobert	POP	{r1, r2, r3}
900*404b540aSrobert	mul	r2, r0
901*404b540aSrobert	sub	r1, r1, r2
902*404b540aSrobert	bx	r3
903*404b540aSrobert#else
904*404b540aSrobert	stmfd	sp!, { r0, r1, lr }
905*404b540aSrobert	bl	SYM(__divsi3)
906*404b540aSrobert	ldmfd	sp!, { r1, r2, lr }
907*404b540aSrobert	mul	r3, r2, r0
908*404b540aSrobert	sub	r1, r1, r3
909*404b540aSrobert	RET
910*404b540aSrobert#endif
911*404b540aSrobert	FUNC_END aeabi_idivmod
912*404b540aSrobert
913*404b540aSrobert#endif /* L_divsi3 */
914*404b540aSrobert/* ------------------------------------------------------------------------ */
915*404b540aSrobert#ifdef L_modsi3
916*404b540aSrobert
917*404b540aSrobert	FUNC_START modsi3
918*404b540aSrobert
919*404b540aSrobert#ifdef __thumb__
920*404b540aSrobert
921*404b540aSrobert	mov	curbit, #1
922*404b540aSrobert	cmp	divisor, #0
923*404b540aSrobert	beq	LSYM(Ldiv0)
924*404b540aSrobert	bpl	LSYM(Lover10)
925*404b540aSrobert	neg	divisor, divisor		@ Loops below use unsigned.
926*404b540aSrobertLSYM(Lover10):
927*404b540aSrobert	push	{ work }
928*404b540aSrobert	@ Need to save the sign of the dividend, unfortunately, we need
929*404b540aSrobert	@ work later on.  Must do this after saving the original value of
930*404b540aSrobert	@ the work register, because we will pop this value off first.
931*404b540aSrobert	push	{ dividend }
932*404b540aSrobert	cmp	dividend, #0
933*404b540aSrobert	bpl	LSYM(Lover11)
934*404b540aSrobert	neg	dividend, dividend
935*404b540aSrobertLSYM(Lover11):
936*404b540aSrobert	cmp	dividend, divisor
937*404b540aSrobert	blo	LSYM(Lgot_result)
938*404b540aSrobert
939*404b540aSrobert	THUMB_DIV_MOD_BODY 1
940*404b540aSrobert
941*404b540aSrobert	pop	{ work }
942*404b540aSrobert	cmp	work, #0
943*404b540aSrobert	bpl	LSYM(Lover12)
944*404b540aSrobert	neg	dividend, dividend
945*404b540aSrobertLSYM(Lover12):
946*404b540aSrobert	pop	{ work }
947*404b540aSrobert	RET
948*404b540aSrobert
949*404b540aSrobert#else /* ARM version.  */
950*404b540aSrobert
951*404b540aSrobert	cmp	r1, #0
952*404b540aSrobert	beq	LSYM(Ldiv0)
953*404b540aSrobert	rsbmi	r1, r1, #0			@ loops below use unsigned.
954*404b540aSrobert	movs	ip, r0				@ preserve sign of dividend
955*404b540aSrobert	rsbmi	r0, r0, #0			@ if negative make positive
956*404b540aSrobert	subs	r2, r1, #1			@ compare divisor with 1
957*404b540aSrobert	cmpne	r0, r1				@ compare dividend with divisor
958*404b540aSrobert	moveq	r0, #0
959*404b540aSrobert	tsthi	r1, r2				@ see if divisor is power of 2
960*404b540aSrobert	andeq	r0, r0, r2
961*404b540aSrobert	bls	10f
962*404b540aSrobert
963*404b540aSrobert	ARM_MOD_BODY r0, r1, r2, r3
964*404b540aSrobert
965*404b540aSrobert10:	cmp	ip, #0
966*404b540aSrobert	rsbmi	r0, r0, #0
967*404b540aSrobert	RET
968*404b540aSrobert
969*404b540aSrobert#endif /* ARM version */
970*404b540aSrobert
971*404b540aSrobert	DIV_FUNC_END modsi3
972*404b540aSrobert
973*404b540aSrobert#endif /* L_modsi3 */
974*404b540aSrobert/* ------------------------------------------------------------------------ */
975*404b540aSrobert#ifdef L_dvmd_tls
976*404b540aSrobert
977*404b540aSrobert	FUNC_START div0
978*404b540aSrobert	FUNC_ALIAS aeabi_idiv0 div0
979*404b540aSrobert	FUNC_ALIAS aeabi_ldiv0 div0
980*404b540aSrobert
981*404b540aSrobert	RET
982*404b540aSrobert
983*404b540aSrobert	FUNC_END aeabi_ldiv0
984*404b540aSrobert	FUNC_END aeabi_idiv0
985*404b540aSrobert	FUNC_END div0
986*404b540aSrobert
987*404b540aSrobert#endif /* L_divmodsi_tools */
988*404b540aSrobert/* ------------------------------------------------------------------------ */
989*404b540aSrobert#ifdef L_dvmd_lnx
990*404b540aSrobert@ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
991*404b540aSrobert
992*404b540aSrobert/* Constant taken from <asm/signal.h>.  */
993*404b540aSrobert#define SIGFPE	8
994*404b540aSrobert
995*404b540aSrobert	.code	32
996*404b540aSrobert	FUNC_START div0
997*404b540aSrobert
998*404b540aSrobert	stmfd	sp!, {r1, lr}
999*404b540aSrobert	mov	r0, #SIGFPE
1000*404b540aSrobert	bl	SYM(raise) __PLT__
1001*404b540aSrobert	RETLDM	r1
1002*404b540aSrobert
1003*404b540aSrobert	FUNC_END div0
1004*404b540aSrobert
1005*404b540aSrobert#endif /* L_dvmd_lnx */
1006*404b540aSrobert/* ------------------------------------------------------------------------ */
1007*404b540aSrobert/* Dword shift operations.  */
1008*404b540aSrobert/* All the following Dword shift variants rely on the fact that
1009*404b540aSrobert	shft xxx, Reg
1010*404b540aSrobert   is in fact done as
1011*404b540aSrobert	shft xxx, (Reg & 255)
1012*404b540aSrobert   so for Reg value in (32...63) and (-1...-31) we will get zero (in the
1013*404b540aSrobert   case of logical shifts) or the sign (for asr).  */
1014*404b540aSrobert
1015*404b540aSrobert#ifdef __ARMEB__
1016*404b540aSrobert#define al	r1
1017*404b540aSrobert#define ah	r0
1018*404b540aSrobert#else
1019*404b540aSrobert#define al	r0
1020*404b540aSrobert#define ah	r1
1021*404b540aSrobert#endif
1022*404b540aSrobert
1023*404b540aSrobert/* Prevent __aeabi double-word shifts from being produced on SymbianOS.  */
1024*404b540aSrobert#ifndef __symbian__
1025*404b540aSrobert
1026*404b540aSrobert#ifdef L_lshrdi3
1027*404b540aSrobert
1028*404b540aSrobert	FUNC_START lshrdi3
1029*404b540aSrobert	FUNC_ALIAS aeabi_llsr lshrdi3
1030*404b540aSrobert
1031*404b540aSrobert#ifdef __thumb__
1032*404b540aSrobert	lsr	al, r2
1033*404b540aSrobert	mov	r3, ah
1034*404b540aSrobert	lsr	ah, r2
1035*404b540aSrobert	mov	ip, r3
1036*404b540aSrobert	sub	r2, #32
1037*404b540aSrobert	lsr	r3, r2
1038*404b540aSrobert	orr	al, r3
1039*404b540aSrobert	neg	r2, r2
1040*404b540aSrobert	mov	r3, ip
1041*404b540aSrobert	lsl	r3, r2
1042*404b540aSrobert	orr	al, r3
1043*404b540aSrobert	RET
1044*404b540aSrobert#else
1045*404b540aSrobert	subs	r3, r2, #32
1046*404b540aSrobert	rsb	ip, r2, #32
1047*404b540aSrobert	movmi	al, al, lsr r2
1048*404b540aSrobert	movpl	al, ah, lsr r3
1049*404b540aSrobert	orrmi	al, al, ah, lsl ip
1050*404b540aSrobert	mov	ah, ah, lsr r2
1051*404b540aSrobert	RET
1052*404b540aSrobert#endif
1053*404b540aSrobert	FUNC_END aeabi_llsr
1054*404b540aSrobert	FUNC_END lshrdi3
1055*404b540aSrobert
1056*404b540aSrobert#endif
1057*404b540aSrobert
1058*404b540aSrobert#ifdef L_ashrdi3
1059*404b540aSrobert
1060*404b540aSrobert	FUNC_START ashrdi3
1061*404b540aSrobert	FUNC_ALIAS aeabi_lasr ashrdi3
1062*404b540aSrobert
1063*404b540aSrobert#ifdef __thumb__
1064*404b540aSrobert	lsr	al, r2
1065*404b540aSrobert	mov	r3, ah
1066*404b540aSrobert	asr	ah, r2
1067*404b540aSrobert	sub	r2, #32
1068*404b540aSrobert	@ If r2 is negative at this point the following step would OR
1069*404b540aSrobert	@ the sign bit into all of AL.  That's not what we want...
1070*404b540aSrobert	bmi	1f
1071*404b540aSrobert	mov	ip, r3
1072*404b540aSrobert	asr	r3, r2
1073*404b540aSrobert	orr	al, r3
1074*404b540aSrobert	mov	r3, ip
1075*404b540aSrobert1:
1076*404b540aSrobert	neg	r2, r2
1077*404b540aSrobert	lsl	r3, r2
1078*404b540aSrobert	orr	al, r3
1079*404b540aSrobert	RET
1080*404b540aSrobert#else
1081*404b540aSrobert	subs	r3, r2, #32
1082*404b540aSrobert	rsb	ip, r2, #32
1083*404b540aSrobert	movmi	al, al, lsr r2
1084*404b540aSrobert	movpl	al, ah, asr r3
1085*404b540aSrobert	orrmi	al, al, ah, lsl ip
1086*404b540aSrobert	mov	ah, ah, asr r2
1087*404b540aSrobert	RET
1088*404b540aSrobert#endif
1089*404b540aSrobert
1090*404b540aSrobert	FUNC_END aeabi_lasr
1091*404b540aSrobert	FUNC_END ashrdi3
1092*404b540aSrobert
1093*404b540aSrobert#endif
1094*404b540aSrobert
1095*404b540aSrobert#ifdef L_ashldi3
1096*404b540aSrobert
1097*404b540aSrobert	FUNC_START ashldi3
1098*404b540aSrobert	FUNC_ALIAS aeabi_llsl ashldi3
1099*404b540aSrobert
1100*404b540aSrobert#ifdef __thumb__
1101*404b540aSrobert	lsl	ah, r2
1102*404b540aSrobert	mov	r3, al
1103*404b540aSrobert	lsl	al, r2
1104*404b540aSrobert	mov	ip, r3
1105*404b540aSrobert	sub	r2, #32
1106*404b540aSrobert	lsl	r3, r2
1107*404b540aSrobert	orr	ah, r3
1108*404b540aSrobert	neg	r2, r2
1109*404b540aSrobert	mov	r3, ip
1110*404b540aSrobert	lsr	r3, r2
1111*404b540aSrobert	orr	ah, r3
1112*404b540aSrobert	RET
1113*404b540aSrobert#else
1114*404b540aSrobert	subs	r3, r2, #32
1115*404b540aSrobert	rsb	ip, r2, #32
1116*404b540aSrobert	movmi	ah, ah, lsl r2
1117*404b540aSrobert	movpl	ah, al, lsl r3
1118*404b540aSrobert	orrmi	ah, ah, al, lsr ip
1119*404b540aSrobert	mov	al, al, lsl r2
1120*404b540aSrobert	RET
1121*404b540aSrobert#endif
1122*404b540aSrobert	FUNC_END aeabi_llsl
1123*404b540aSrobert	FUNC_END ashldi3
1124*404b540aSrobert
1125*404b540aSrobert#endif
1126*404b540aSrobert
1127*404b540aSrobert#endif /* __symbian__ */
1128*404b540aSrobert
1129*404b540aSrobert/* ------------------------------------------------------------------------ */
1130*404b540aSrobert/* These next two sections are here despite the fact that they contain Thumb
1131*404b540aSrobert   assembler because their presence allows interworked code to be linked even
1132*404b540aSrobert   when the GCC library is this one.  */
1133*404b540aSrobert
1134*404b540aSrobert/* Do not build the interworking functions when the target architecture does
1135*404b540aSrobert   not support Thumb instructions.  (This can be a multilib option).  */
1136*404b540aSrobert#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
1137*404b540aSrobert      || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
1138*404b540aSrobert      || __ARM_ARCH__ >= 6
1139*404b540aSrobert
1140*404b540aSrobert#if defined L_call_via_rX
1141*404b540aSrobert
1142*404b540aSrobert/* These labels & instructions are used by the Arm/Thumb interworking code.
1143*404b540aSrobert   The address of function to be called is loaded into a register and then
1144*404b540aSrobert   one of these labels is called via a BL instruction.  This puts the
1145*404b540aSrobert   return address into the link register with the bottom bit set, and the
1146*404b540aSrobert   code here switches to the correct mode before executing the function.  */
1147*404b540aSrobert
1148*404b540aSrobert	.text
1149*404b540aSrobert	.align 0
1150*404b540aSrobert        .force_thumb
1151*404b540aSrobert
1152*404b540aSrobert.macro call_via register
1153*404b540aSrobert	THUMB_FUNC_START _call_via_\register
1154*404b540aSrobert
1155*404b540aSrobert	bx	\register
1156*404b540aSrobert	nop
1157*404b540aSrobert
1158*404b540aSrobert	SIZE	(_call_via_\register)
1159*404b540aSrobert.endm
1160*404b540aSrobert
1161*404b540aSrobert	call_via r0
1162*404b540aSrobert	call_via r1
1163*404b540aSrobert	call_via r2
1164*404b540aSrobert	call_via r3
1165*404b540aSrobert	call_via r4
1166*404b540aSrobert	call_via r5
1167*404b540aSrobert	call_via r6
1168*404b540aSrobert	call_via r7
1169*404b540aSrobert	call_via r8
1170*404b540aSrobert	call_via r9
1171*404b540aSrobert	call_via sl
1172*404b540aSrobert	call_via fp
1173*404b540aSrobert	call_via ip
1174*404b540aSrobert	call_via sp
1175*404b540aSrobert	call_via lr
1176*404b540aSrobert
1177*404b540aSrobert#endif /* L_call_via_rX */
1178*404b540aSrobert
1179*404b540aSrobert#if defined L_interwork_call_via_rX
1180*404b540aSrobert
1181*404b540aSrobert/* These labels & instructions are used by the Arm/Thumb interworking code,
1182*404b540aSrobert   when the target address is in an unknown instruction set.  The address
1183*404b540aSrobert   of function to be called is loaded into a register and then one of these
1184*404b540aSrobert   labels is called via a BL instruction.  This puts the return address
1185*404b540aSrobert   into the link register with the bottom bit set, and the code here
1186*404b540aSrobert   switches to the correct mode before executing the function.  Unfortunately
1187*404b540aSrobert   the target code cannot be relied upon to return via a BX instruction, so
1188*404b540aSrobert   instead we have to store the resturn address on the stack and allow the
1189*404b540aSrobert   called function to return here instead.  Upon return we recover the real
1190*404b540aSrobert   return address and use a BX to get back to Thumb mode.
1191*404b540aSrobert
1192*404b540aSrobert   There are three variations of this code.  The first,
1193*404b540aSrobert   _interwork_call_via_rN(), will push the return address onto the
1194*404b540aSrobert   stack and pop it in _arm_return().  It should only be used if all
1195*404b540aSrobert   arguments are passed in registers.
1196*404b540aSrobert
1197*404b540aSrobert   The second, _interwork_r7_call_via_rN(), instead stores the return
1198*404b540aSrobert   address at [r7, #-4].  It is the caller's responsibility to ensure
1199*404b540aSrobert   that this address is valid and contains no useful data.
1200*404b540aSrobert
1201*404b540aSrobert   The third, _interwork_r11_call_via_rN(), works in the same way but
1202*404b540aSrobert   uses r11 instead of r7.  It is useful if the caller does not really
1203*404b540aSrobert   need a frame pointer.  */
1204*404b540aSrobert
1205*404b540aSrobert	.text
1206*404b540aSrobert	.align 0
1207*404b540aSrobert
1208*404b540aSrobert	.code   32
1209*404b540aSrobert	.globl _arm_return
1210*404b540aSrobertLSYM(Lstart_arm_return):
1211*404b540aSrobert	cfi_start	LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
1212*404b540aSrobert	cfi_push	0, 0xe, -0x8, 0x8
1213*404b540aSrobert	nop	@ This nop is for the benefit of debuggers, so that
1214*404b540aSrobert		@ backtraces will use the correct unwind information.
1215*404b540aSrobert_arm_return:
1216*404b540aSrobert	RETLDM	unwind=LSYM(Lstart_arm_return)
1217*404b540aSrobert	cfi_end	LSYM(Lend_arm_return)
1218*404b540aSrobert
1219*404b540aSrobert	.globl _arm_return_r7
1220*404b540aSrobert_arm_return_r7:
1221*404b540aSrobert	ldr	lr, [r7, #-4]
1222*404b540aSrobert	bx	lr
1223*404b540aSrobert
1224*404b540aSrobert	.globl _arm_return_r11
1225*404b540aSrobert_arm_return_r11:
1226*404b540aSrobert	ldr	lr, [r11, #-4]
1227*404b540aSrobert	bx	lr
1228*404b540aSrobert
1229*404b540aSrobert.macro interwork_with_frame frame, register, name, return
1230*404b540aSrobert	.code	16
1231*404b540aSrobert
1232*404b540aSrobert	THUMB_FUNC_START \name
1233*404b540aSrobert
1234*404b540aSrobert	bx	pc
1235*404b540aSrobert	nop
1236*404b540aSrobert
1237*404b540aSrobert	.code	32
1238*404b540aSrobert	tst	\register, #1
1239*404b540aSrobert	streq	lr, [\frame, #-4]
1240*404b540aSrobert	adreq	lr, _arm_return_\frame
1241*404b540aSrobert	bx	\register
1242*404b540aSrobert
1243*404b540aSrobert	SIZE	(\name)
1244*404b540aSrobert.endm
1245*404b540aSrobert
1246*404b540aSrobert.macro interwork register
1247*404b540aSrobert	.code	16
1248*404b540aSrobert
1249*404b540aSrobert	THUMB_FUNC_START _interwork_call_via_\register
1250*404b540aSrobert
1251*404b540aSrobert	bx	pc
1252*404b540aSrobert	nop
1253*404b540aSrobert
1254*404b540aSrobert	.code	32
1255*404b540aSrobert	.globl LSYM(Lchange_\register)
1256*404b540aSrobertLSYM(Lchange_\register):
1257*404b540aSrobert	tst	\register, #1
1258*404b540aSrobert	streq	lr, [sp, #-8]!
1259*404b540aSrobert	adreq	lr, _arm_return
1260*404b540aSrobert	bx	\register
1261*404b540aSrobert
1262*404b540aSrobert	SIZE	(_interwork_call_via_\register)
1263*404b540aSrobert
1264*404b540aSrobert	interwork_with_frame r7,\register,_interwork_r7_call_via_\register
1265*404b540aSrobert	interwork_with_frame r11,\register,_interwork_r11_call_via_\register
1266*404b540aSrobert.endm
1267*404b540aSrobert
1268*404b540aSrobert	interwork r0
1269*404b540aSrobert	interwork r1
1270*404b540aSrobert	interwork r2
1271*404b540aSrobert	interwork r3
1272*404b540aSrobert	interwork r4
1273*404b540aSrobert	interwork r5
1274*404b540aSrobert	interwork r6
1275*404b540aSrobert	interwork r7
1276*404b540aSrobert	interwork r8
1277*404b540aSrobert	interwork r9
1278*404b540aSrobert	interwork sl
1279*404b540aSrobert	interwork fp
1280*404b540aSrobert	interwork ip
1281*404b540aSrobert	interwork sp
1282*404b540aSrobert
1283*404b540aSrobert	/* The LR case has to be handled a little differently...  */
1284*404b540aSrobert	.code 16
1285*404b540aSrobert
1286*404b540aSrobert	THUMB_FUNC_START _interwork_call_via_lr
1287*404b540aSrobert
1288*404b540aSrobert	bx 	pc
1289*404b540aSrobert	nop
1290*404b540aSrobert
1291*404b540aSrobert	.code 32
1292*404b540aSrobert	.globl .Lchange_lr
1293*404b540aSrobert.Lchange_lr:
1294*404b540aSrobert	tst	lr, #1
1295*404b540aSrobert	stmeqdb	r13!, {lr, pc}
1296*404b540aSrobert	mov	ip, lr
1297*404b540aSrobert	adreq	lr, _arm_return
1298*404b540aSrobert	bx	ip
1299*404b540aSrobert
1300*404b540aSrobert	SIZE	(_interwork_call_via_lr)
1301*404b540aSrobert
1302*404b540aSrobert#endif /* L_interwork_call_via_rX */
1303*404b540aSrobert#endif /* Arch supports thumb.  */
1304*404b540aSrobert
1305*404b540aSrobert#ifndef __symbian__
1306*404b540aSrobert#include "ieee754-df.S"
1307*404b540aSrobert#include "ieee754-sf.S"
1308*404b540aSrobert#include "bpabi.S"
1309*404b540aSrobert#endif /* __symbian__ */
1310