1@ libgcc routines for ARM cpu.
2@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
3
4/* Copyright (C) 1995-2019 Free Software Foundation, Inc.
5
6This file is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11This file is distributed in the hope that it will be useful, but
12WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14General Public License for more details.
15
16Under Section 7 of GPL version 3, you are granted additional
17permissions described in the GCC Runtime Library Exception, version
183.1, as published by the Free Software Foundation.
19
20You should have received a copy of the GNU General Public License and
21a copy of the GCC Runtime Library Exception along with this program;
22see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23<http://www.gnu.org/licenses/>.  */
24
25/* An executable stack is *not* required for these functions.  */
26#if defined(__ELF__) && defined(__linux__)
27.section .note.GNU-stack,"",%progbits
28.previous
29#endif  /* __ELF__ and __linux__ */
30
31#ifdef __ARM_EABI__
32/* Some attributes that are common to all routines in this file.  */
33	/* Tag_ABI_align_needed: This code does not require 8-byte
34	   alignment from the caller.  */
35	/* .eabi_attribute 24, 0  -- default setting.  */
36	/* Tag_ABI_align_preserved: This code preserves 8-byte
37	   alignment in any callee.  */
38	.eabi_attribute 25, 1
39#endif /* __ARM_EABI__ */
40/* ------------------------------------------------------------------------ */
41
42/* We need to know what prefix to add to function names.  */
43
44#ifndef __USER_LABEL_PREFIX__
45#error  __USER_LABEL_PREFIX__ not defined
46#endif
47
48/* ANSI concatenation macros.  */
49
50#define CONCAT1(a, b) CONCAT2(a, b)
51#define CONCAT2(a, b) a ## b
52
53/* Use the right prefix for global labels.  */
54
55#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
56
57#ifdef __ELF__
58#ifdef __thumb__
59#define __PLT__  /* Not supported in Thumb assembler (for now).  */
60#elif defined __vxworks && !defined __PIC__
61#define __PLT__ /* Not supported by the kernel loader.  */
62#else
63#define __PLT__ (PLT)
64#endif
65#define TYPE(x) .type SYM(x),function
66#define SIZE(x) .size SYM(x), . - SYM(x)
67#define LSYM(x) .x
68#else
69#define __PLT__
70#define TYPE(x)
71#define SIZE(x)
72#define LSYM(x) x
73#endif
74
75/* Function end macros.  Variants for interworking.  */
76
77/* There are times when we might prefer Thumb1 code even if ARM code is
78   permitted, for example, the code might be smaller, or there might be
79   interworking problems with switching to ARM state if interworking is
80   disabled.  */
81#if (defined(__thumb__)			\
82     && !defined(__thumb2__)		\
83     && (!defined(__THUMB_INTERWORK__)	\
84	 || defined (__OPTIMIZE_SIZE__)	\
85	 || !__ARM_ARCH_ISA_ARM))
86# define __prefer_thumb__
87#endif
88
89#if !__ARM_ARCH_ISA_ARM && __ARM_ARCH_ISA_THUMB == 1
90#define NOT_ISA_TARGET_32BIT 1
91#endif
92
93/* How to return from a function call depends on the architecture variant.  */
94
95#if (__ARM_ARCH > 4) || defined(__ARM_ARCH_4T__)
96
97# define RET		bx	lr
98# define RETc(x)	bx##x	lr
99
100/* Special precautions for interworking on armv4t.  */
101# if (__ARM_ARCH == 4)
102
103/* Always use bx, not ldr pc.  */
104#  if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
105#    define __INTERWORKING__
106#   endif /* __THUMB__ || __THUMB_INTERWORK__ */
107
108/* Include thumb stub before arm mode code.  */
109#  if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
110#   define __INTERWORKING_STUBS__
111#  endif /* __thumb__ && !__THUMB_INTERWORK__ */
112
113#endif /* __ARM_ARCH == 4 */
114
115#else
116
117# define RET		mov	pc, lr
118# define RETc(x)	mov##x	pc, lr
119
120#endif
121
122.macro	cfi_pop		advance, reg, cfa_offset
123#ifdef __ELF__
124	.pushsection	.debug_frame
125	.byte	0x4		/* DW_CFA_advance_loc4 */
126	.4byte	\advance
127	.byte	(0xc0 | \reg)	/* DW_CFA_restore */
128	.byte	0xe		/* DW_CFA_def_cfa_offset */
129	.uleb128 \cfa_offset
130	.popsection
131#endif
132.endm
133.macro	cfi_push	advance, reg, offset, cfa_offset
134#ifdef __ELF__
135	.pushsection	.debug_frame
136	.byte	0x4		/* DW_CFA_advance_loc4 */
137	.4byte	\advance
138	.byte	(0x80 | \reg)	/* DW_CFA_offset */
139	.uleb128 (\offset / -4)
140	.byte	0xe		/* DW_CFA_def_cfa_offset */
141	.uleb128 \cfa_offset
142	.popsection
143#endif
144.endm
145.macro cfi_start	start_label, end_label
146#ifdef __ELF__
147	.pushsection	.debug_frame
148LSYM(Lstart_frame):
149	.4byte	LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
150LSYM(Lstart_cie):
151        .4byte	0xffffffff	@ CIE Identifier Tag
152        .byte	0x1	@ CIE Version
153        .ascii	"\0"	@ CIE Augmentation
154        .uleb128 0x1	@ CIE Code Alignment Factor
155        .sleb128 -4	@ CIE Data Alignment Factor
156        .byte	0xe	@ CIE RA Column
157        .byte	0xc	@ DW_CFA_def_cfa
158        .uleb128 0xd
159        .uleb128 0x0
160
161	.align 2
162LSYM(Lend_cie):
163	.4byte	LSYM(Lend_fde)-LSYM(Lstart_fde)	@ FDE Length
164LSYM(Lstart_fde):
165	.4byte	LSYM(Lstart_frame)	@ FDE CIE offset
166	.4byte	\start_label	@ FDE initial location
167	.4byte	\end_label-\start_label	@ FDE address range
168	.popsection
169#endif
170.endm
171.macro cfi_end	end_label
172#ifdef __ELF__
173	.pushsection	.debug_frame
174	.align	2
175LSYM(Lend_fde):
176	.popsection
177\end_label:
178#endif
179.endm
180
181/* Don't pass dirn, it's there just to get token pasting right.  */
182
183.macro	RETLDM	regs=, cond=, unwind=, dirn=ia
184#if defined (__INTERWORKING__)
185	.ifc "\regs",""
186	ldr\cond	lr, [sp], #8
187	.else
188# if defined(__thumb2__)
189	pop\cond	{\regs, lr}
190# else
191	ldm\cond\dirn	sp!, {\regs, lr}
192# endif
193	.endif
194	.ifnc "\unwind", ""
195	/* Mark LR as restored.  */
19697:	cfi_pop 97b - \unwind, 0xe, 0x0
197	.endif
198	bx\cond	lr
199#else
200	/* Caller is responsible for providing IT instruction.  */
201	.ifc "\regs",""
202	ldr\cond	pc, [sp], #8
203	.else
204# if defined(__thumb2__)
205	pop\cond	{\regs, pc}
206# else
207	ldm\cond\dirn	sp!, {\regs, pc}
208# endif
209	.endif
210#endif
211.endm
212
213/* The Unified assembly syntax allows the same code to be assembled for both
214   ARM and Thumb-2.  However this is only supported by recent gas, so define
215   a set of macros to allow ARM code on older assemblers.  */
216#if defined(__thumb2__)
217.macro do_it cond, suffix=""
218	it\suffix	\cond
219.endm
220.macro shift1 op, arg0, arg1, arg2
221	\op	\arg0, \arg1, \arg2
222.endm
223#define do_push	push
224#define do_pop	pop
225#define COND(op1, op2, cond) op1 ## op2 ## cond
226/* Perform an arithmetic operation with a variable shift operand.  This
227   requires two instructions and a scratch register on Thumb-2.  */
228.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
229	\shiftop \tmp, \src2, \shiftreg
230	\name \dest, \src1, \tmp
231.endm
232#else
233.macro do_it cond, suffix=""
234.endm
235.macro shift1 op, arg0, arg1, arg2
236	mov	\arg0, \arg1, \op \arg2
237.endm
238#define do_push	stmfd sp!,
239#define do_pop	ldmfd sp!,
240#define COND(op1, op2, cond) op1 ## cond ## op2
241.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
242	\name \dest, \src1, \src2, \shiftop \shiftreg
243.endm
244#endif
245
246#ifdef __ARM_EABI__
247.macro ARM_LDIV0 name signed
248	cmp	r0, #0
249	.ifc	\signed, unsigned
250	movne	r0, #0xffffffff
251	.else
252	movgt	r0, #0x7fffffff
253	movlt	r0, #0x80000000
254	.endif
255	b	SYM (__aeabi_idiv0) __PLT__
256.endm
257#else
258.macro ARM_LDIV0 name signed
259	str	lr, [sp, #-8]!
26098:	cfi_push 98b - __\name, 0xe, -0x8, 0x8
261	bl	SYM (__div0) __PLT__
262	mov	r0, #0			@ About as wrong as it could be.
263	RETLDM	unwind=98b
264.endm
265#endif
266
267
268#ifdef __ARM_EABI__
269.macro THUMB_LDIV0 name signed
270#ifdef NOT_ISA_TARGET_32BIT
271
272	push	{r0, lr}
273	mov	r0, #0
274	bl	SYM(__aeabi_idiv0)
275	@ We know we are not on armv4t, so pop pc is safe.
276	pop	{r1, pc}
277
278#elif defined(__thumb2__)
279	.syntax unified
280	.ifc \signed, unsigned
281	cbz	r0, 1f
282	mov	r0, #0xffffffff
2831:
284	.else
285	cmp	r0, #0
286	do_it	gt
287	movgt	r0, #0x7fffffff
288	do_it	lt
289	movlt	r0, #0x80000000
290	.endif
291	b.w	SYM(__aeabi_idiv0) __PLT__
292#else
293	.align	2
294	bx	pc
295	nop
296	.arm
297	cmp	r0, #0
298	.ifc	\signed, unsigned
299	movne	r0, #0xffffffff
300	.else
301	movgt	r0, #0x7fffffff
302	movlt	r0, #0x80000000
303	.endif
304	b	SYM(__aeabi_idiv0) __PLT__
305	.thumb
306#endif
307.endm
308#else
309.macro THUMB_LDIV0 name signed
310	push	{ r1, lr }
31198:	cfi_push 98b - __\name, 0xe, -0x4, 0x8
312	bl	SYM (__div0)
313	mov	r0, #0			@ About as wrong as it could be.
314#if defined (__INTERWORKING__)
315	pop	{ r1, r2 }
316	bx	r2
317#else
318	pop	{ r1, pc }
319#endif
320.endm
321#endif
322
323.macro FUNC_END name
324	SIZE (__\name)
325.endm
326
327.macro DIV_FUNC_END name signed
328	cfi_start	__\name, LSYM(Lend_div0)
329LSYM(Ldiv0):
330#ifdef __thumb__
331	THUMB_LDIV0 \name \signed
332#else
333	ARM_LDIV0 \name \signed
334#endif
335	cfi_end	LSYM(Lend_div0)
336	FUNC_END \name
337.endm
338
339.macro THUMB_FUNC_START name
340	.globl	SYM (\name)
341	TYPE	(\name)
342	.thumb_func
343SYM (\name):
344.endm
345
346/* Function start macros.  Variants for ARM and Thumb.  */
347
348#ifdef __thumb__
349#define THUMB_FUNC .thumb_func
350#define THUMB_CODE .force_thumb
351# if defined(__thumb2__)
352#define THUMB_SYNTAX .syntax divided
353# else
354#define THUMB_SYNTAX
355# endif
356#else
357#define THUMB_FUNC
358#define THUMB_CODE
359#define THUMB_SYNTAX
360#endif
361
362.macro FUNC_START name
363	.text
364	.globl SYM (__\name)
365	TYPE (__\name)
366	.align 0
367	THUMB_CODE
368	THUMB_FUNC
369	THUMB_SYNTAX
370SYM (__\name):
371.endm
372
373.macro ARM_SYM_START name
374       TYPE (\name)
375       .align 0
376SYM (\name):
377.endm
378
379.macro SYM_END name
380       SIZE (\name)
381.endm
382
383/* Special function that will always be coded in ARM assembly, even if
384   in Thumb-only compilation.  */
385
386#if defined(__thumb2__)
387
388/* For Thumb-2 we build everything in thumb mode.  */
389.macro ARM_FUNC_START name
390       FUNC_START \name
391       .syntax unified
392.endm
393#define EQUIV .thumb_set
394.macro  ARM_CALL name
395	bl	__\name
396.endm
397
398#elif defined(__INTERWORKING_STUBS__)
399
400.macro	ARM_FUNC_START name
401	FUNC_START \name
402	bx	pc
403	nop
404	.arm
405/* A hook to tell gdb that we've switched to ARM mode.  Also used to call
406   directly from other local arm routines.  */
407_L__\name:
408.endm
409#define EQUIV .thumb_set
410/* Branch directly to a function declared with ARM_FUNC_START.
411   Must be called in arm mode.  */
412.macro  ARM_CALL name
413	bl	_L__\name
414.endm
415
416#else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
417
418#ifdef NOT_ISA_TARGET_32BIT
419#define EQUIV .thumb_set
420#else
421.macro	ARM_FUNC_START name
422	.text
423	.globl SYM (__\name)
424	TYPE (__\name)
425	.align 0
426	.arm
427SYM (__\name):
428.endm
429#define EQUIV .set
430.macro  ARM_CALL name
431	bl	__\name
432.endm
433#endif
434
435#endif
436
437.macro	FUNC_ALIAS new old
438	.globl	SYM (__\new)
439#if defined (__thumb__)
440	.thumb_set	SYM (__\new), SYM (__\old)
441#else
442	.set	SYM (__\new), SYM (__\old)
443#endif
444.endm
445
446#ifndef NOT_ISA_TARGET_32BIT
447.macro	ARM_FUNC_ALIAS new old
448	.globl	SYM (__\new)
449	EQUIV	SYM (__\new), SYM (__\old)
450#if defined(__INTERWORKING_STUBS__)
451	.set	SYM (_L__\new), SYM (_L__\old)
452#endif
453.endm
454#endif
455
456#ifdef __ARMEB__
457#define xxh r0
458#define xxl r1
459#define yyh r2
460#define yyl r3
461#else
462#define xxh r1
463#define xxl r0
464#define yyh r3
465#define yyl r2
466#endif
467
468#ifdef __ARM_EABI__
469.macro	WEAK name
470	.weak SYM (__\name)
471.endm
472#endif
473
474#ifdef __thumb__
475/* Register aliases.  */
476
477work		.req	r4	@ XXXX is this safe ?
478dividend	.req	r0
479divisor		.req	r1
480overdone	.req	r2
481result		.req	r2
482curbit		.req	r3
483#endif
484#if 0
485ip		.req	r12
486sp		.req	r13
487lr		.req	r14
488pc		.req	r15
489#endif
490
491/* ------------------------------------------------------------------------ */
492/*		Bodies of the division and modulo routines.		    */
493/* ------------------------------------------------------------------------ */
494.macro ARM_DIV_BODY dividend, divisor, result, curbit
495
496#if defined (__ARM_FEATURE_CLZ) && ! defined (__OPTIMIZE_SIZE__)
497
498#if defined (__thumb2__)
499	clz	\curbit, \dividend
500	clz	\result, \divisor
501	sub	\curbit, \result, \curbit
502	rsb	\curbit, \curbit, #31
503	adr	\result, 1f
504	add	\curbit, \result, \curbit, lsl #4
505	mov	\result, #0
506	mov	pc, \curbit
507.p2align 3
5081:
509	.set	shift, 32
510	.rept	32
511	.set	shift, shift - 1
512	cmp.w	\dividend, \divisor, lsl #shift
513	nop.n
514	adc.w	\result, \result, \result
515	it	cs
516	subcs.w	\dividend, \dividend, \divisor, lsl #shift
517	.endr
518#else
519	clz	\curbit, \dividend
520	clz	\result, \divisor
521	sub	\curbit, \result, \curbit
522	rsbs	\curbit, \curbit, #31
523	addne	\curbit, \curbit, \curbit, lsl #1
524	mov	\result, #0
525	addne	pc, pc, \curbit, lsl #2
526	nop
527	.set	shift, 32
528	.rept	32
529	.set	shift, shift - 1
530	cmp	\dividend, \divisor, lsl #shift
531	adc	\result, \result, \result
532	subcs	\dividend, \dividend, \divisor, lsl #shift
533	.endr
534#endif
535
536#else /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
537#if defined (__ARM_FEATURE_CLZ)
538
539	clz	\curbit, \divisor
540	clz	\result, \dividend
541	sub	\result, \curbit, \result
542	mov	\curbit, #1
543	mov	\divisor, \divisor, lsl \result
544	mov	\curbit, \curbit, lsl \result
545	mov	\result, #0
546
547#else /* !defined (__ARM_FEATURE_CLZ) */
548
549	@ Initially shift the divisor left 3 bits if possible,
550	@ set curbit accordingly.  This allows for curbit to be located
551	@ at the left end of each 4-bit nibbles in the division loop
552	@ to save one loop in most cases.
553	tst	\divisor, #0xe0000000
554	moveq	\divisor, \divisor, lsl #3
555	moveq	\curbit, #8
556	movne	\curbit, #1
557
558	@ Unless the divisor is very big, shift it up in multiples of
559	@ four bits, since this is the amount of unwinding in the main
560	@ division loop.  Continue shifting until the divisor is
561	@ larger than the dividend.
5621:	cmp	\divisor, #0x10000000
563	cmplo	\divisor, \dividend
564	movlo	\divisor, \divisor, lsl #4
565	movlo	\curbit, \curbit, lsl #4
566	blo	1b
567
568	@ For very big divisors, we must shift it a bit at a time, or
569	@ we will be in danger of overflowing.
5701:	cmp	\divisor, #0x80000000
571	cmplo	\divisor, \dividend
572	movlo	\divisor, \divisor, lsl #1
573	movlo	\curbit, \curbit, lsl #1
574	blo	1b
575
576	mov	\result, #0
577
578#endif /* !defined (__ARM_FEATURE_CLZ) */
579
580	@ Division loop
5811:	cmp	\dividend, \divisor
582	do_it	hs, t
583	subhs	\dividend, \dividend, \divisor
584	orrhs	\result,   \result,   \curbit
585	cmp	\dividend, \divisor,  lsr #1
586	do_it	hs, t
587	subhs	\dividend, \dividend, \divisor, lsr #1
588	orrhs	\result,   \result,   \curbit,  lsr #1
589	cmp	\dividend, \divisor,  lsr #2
590	do_it	hs, t
591	subhs	\dividend, \dividend, \divisor, lsr #2
592	orrhs	\result,   \result,   \curbit,  lsr #2
593	cmp	\dividend, \divisor,  lsr #3
594	do_it	hs, t
595	subhs	\dividend, \dividend, \divisor, lsr #3
596	orrhs	\result,   \result,   \curbit,  lsr #3
597	cmp	\dividend, #0			@ Early termination?
598	do_it	ne, t
599	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
600	movne	\divisor,  \divisor, lsr #4
601	bne	1b
602
603#endif /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
604
605.endm
606/* ------------------------------------------------------------------------ */
607.macro ARM_DIV2_ORDER divisor, order
608
609#if defined (__ARM_FEATURE_CLZ)
610
611	clz	\order, \divisor
612	rsb	\order, \order, #31
613
614#else
615
616	cmp	\divisor, #(1 << 16)
617	movhs	\divisor, \divisor, lsr #16
618	movhs	\order, #16
619	movlo	\order, #0
620
621	cmp	\divisor, #(1 << 8)
622	movhs	\divisor, \divisor, lsr #8
623	addhs	\order, \order, #8
624
625	cmp	\divisor, #(1 << 4)
626	movhs	\divisor, \divisor, lsr #4
627	addhs	\order, \order, #4
628
629	cmp	\divisor, #(1 << 2)
630	addhi	\order, \order, #3
631	addls	\order, \order, \divisor, lsr #1
632
633#endif
634
635.endm
636/* ------------------------------------------------------------------------ */
637.macro ARM_MOD_BODY dividend, divisor, order, spare
638
639#if defined(__ARM_FEATURE_CLZ) && ! defined (__OPTIMIZE_SIZE__)
640
641	clz	\order, \divisor
642	clz	\spare, \dividend
643	sub	\order, \order, \spare
644	rsbs	\order, \order, #31
645	addne	pc, pc, \order, lsl #3
646	nop
647	.set	shift, 32
648	.rept	32
649	.set	shift, shift - 1
650	cmp	\dividend, \divisor, lsl #shift
651	subcs	\dividend, \dividend, \divisor, lsl #shift
652	.endr
653
654#else /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
655#if defined (__ARM_FEATURE_CLZ)
656
657	clz	\order, \divisor
658	clz	\spare, \dividend
659	sub	\order, \order, \spare
660	mov	\divisor, \divisor, lsl \order
661
662#else /* !defined (__ARM_FEATURE_CLZ) */
663
664	mov	\order, #0
665
666	@ Unless the divisor is very big, shift it up in multiples of
667	@ four bits, since this is the amount of unwinding in the main
668	@ division loop.  Continue shifting until the divisor is
669	@ larger than the dividend.
6701:	cmp	\divisor, #0x10000000
671	cmplo	\divisor, \dividend
672	movlo	\divisor, \divisor, lsl #4
673	addlo	\order, \order, #4
674	blo	1b
675
676	@ For very big divisors, we must shift it a bit at a time, or
677	@ we will be in danger of overflowing.
6781:	cmp	\divisor, #0x80000000
679	cmplo	\divisor, \dividend
680	movlo	\divisor, \divisor, lsl #1
681	addlo	\order, \order, #1
682	blo	1b
683
684#endif /* !defined (__ARM_FEATURE_CLZ) */
685
686	@ Perform all needed substractions to keep only the reminder.
687	@ Do comparisons in batch of 4 first.
688	subs	\order, \order, #3		@ yes, 3 is intended here
689	blt	2f
690
6911:	cmp	\dividend, \divisor
692	subhs	\dividend, \dividend, \divisor
693	cmp	\dividend, \divisor,  lsr #1
694	subhs	\dividend, \dividend, \divisor, lsr #1
695	cmp	\dividend, \divisor,  lsr #2
696	subhs	\dividend, \dividend, \divisor, lsr #2
697	cmp	\dividend, \divisor,  lsr #3
698	subhs	\dividend, \dividend, \divisor, lsr #3
699	cmp	\dividend, #1
700	mov	\divisor, \divisor, lsr #4
701	subges	\order, \order, #4
702	bge	1b
703
704	tst	\order, #3
705	teqne	\dividend, #0
706	beq	5f
707
708	@ Either 1, 2 or 3 comparison/substractions are left.
7092:	cmn	\order, #2
710	blt	4f
711	beq	3f
712	cmp	\dividend, \divisor
713	subhs	\dividend, \dividend, \divisor
714	mov	\divisor,  \divisor,  lsr #1
7153:	cmp	\dividend, \divisor
716	subhs	\dividend, \dividend, \divisor
717	mov	\divisor,  \divisor,  lsr #1
7184:	cmp	\dividend, \divisor
719	subhs	\dividend, \dividend, \divisor
7205:
721
722#endif /* !defined (__ARM_FEATURE_CLZ) || defined (__OPTIMIZE_SIZE__) */
723
724.endm
725/* ------------------------------------------------------------------------ */
726.macro THUMB_DIV_MOD_BODY modulo
727	@ Load the constant 0x10000000 into our work register.
728	mov	work, #1
729	lsl	work, #28
730LSYM(Loop1):
731	@ Unless the divisor is very big, shift it up in multiples of
732	@ four bits, since this is the amount of unwinding in the main
733	@ division loop.  Continue shifting until the divisor is
734	@ larger than the dividend.
735	cmp	divisor, work
736	bhs	LSYM(Lbignum)
737	cmp	divisor, dividend
738	bhs	LSYM(Lbignum)
739	lsl	divisor, #4
740	lsl	curbit,  #4
741	b	LSYM(Loop1)
742LSYM(Lbignum):
743	@ Set work to 0x80000000
744	lsl	work, #3
745LSYM(Loop2):
746	@ For very big divisors, we must shift it a bit at a time, or
747	@ we will be in danger of overflowing.
748	cmp	divisor, work
749	bhs	LSYM(Loop3)
750	cmp	divisor, dividend
751	bhs	LSYM(Loop3)
752	lsl	divisor, #1
753	lsl	curbit,  #1
754	b	LSYM(Loop2)
755LSYM(Loop3):
756	@ Test for possible subtractions ...
757  .if \modulo
758	@ ... On the final pass, this may subtract too much from the dividend,
759	@ so keep track of which subtractions are done, we can fix them up
760	@ afterwards.
761	mov	overdone, #0
762	cmp	dividend, divisor
763	blo	LSYM(Lover1)
764	sub	dividend, dividend, divisor
765LSYM(Lover1):
766	lsr	work, divisor, #1
767	cmp	dividend, work
768	blo	LSYM(Lover2)
769	sub	dividend, dividend, work
770	mov	ip, curbit
771	mov	work, #1
772	ror	curbit, work
773	orr	overdone, curbit
774	mov	curbit, ip
775LSYM(Lover2):
776	lsr	work, divisor, #2
777	cmp	dividend, work
778	blo	LSYM(Lover3)
779	sub	dividend, dividend, work
780	mov	ip, curbit
781	mov	work, #2
782	ror	curbit, work
783	orr	overdone, curbit
784	mov	curbit, ip
785LSYM(Lover3):
786	lsr	work, divisor, #3
787	cmp	dividend, work
788	blo	LSYM(Lover4)
789	sub	dividend, dividend, work
790	mov	ip, curbit
791	mov	work, #3
792	ror	curbit, work
793	orr	overdone, curbit
794	mov	curbit, ip
795LSYM(Lover4):
796	mov	ip, curbit
797  .else
798	@ ... and note which bits are done in the result.  On the final pass,
799	@ this may subtract too much from the dividend, but the result will be ok,
800	@ since the "bit" will have been shifted out at the bottom.
801	cmp	dividend, divisor
802	blo	LSYM(Lover1)
803	sub	dividend, dividend, divisor
804	orr	result, result, curbit
805LSYM(Lover1):
806	lsr	work, divisor, #1
807	cmp	dividend, work
808	blo	LSYM(Lover2)
809	sub	dividend, dividend, work
810	lsr	work, curbit, #1
811	orr	result, work
812LSYM(Lover2):
813	lsr	work, divisor, #2
814	cmp	dividend, work
815	blo	LSYM(Lover3)
816	sub	dividend, dividend, work
817	lsr	work, curbit, #2
818	orr	result, work
819LSYM(Lover3):
820	lsr	work, divisor, #3
821	cmp	dividend, work
822	blo	LSYM(Lover4)
823	sub	dividend, dividend, work
824	lsr	work, curbit, #3
825	orr	result, work
826LSYM(Lover4):
827  .endif
828
829	cmp	dividend, #0			@ Early termination?
830	beq	LSYM(Lover5)
831	lsr	curbit,  #4			@ No, any more bits to do?
832	beq	LSYM(Lover5)
833	lsr	divisor, #4
834	b	LSYM(Loop3)
835LSYM(Lover5):
836  .if \modulo
837	@ Any subtractions that we should not have done will be recorded in
838	@ the top three bits of "overdone".  Exactly which were not needed
839	@ are governed by the position of the bit, stored in ip.
840	mov	work, #0xe
841	lsl	work, #28
842	and	overdone, work
843	beq	LSYM(Lgot_result)
844
845	@ If we terminated early, because dividend became zero, then the
846	@ bit in ip will not be in the bottom nibble, and we should not
847	@ perform the additions below.  We must test for this though
848	@ (rather relying upon the TSTs to prevent the additions) since
849	@ the bit in ip could be in the top two bits which might then match
850	@ with one of the smaller RORs.
851	mov	curbit, ip
852	mov	work, #0x7
853	tst	curbit, work
854	beq	LSYM(Lgot_result)
855
856	mov	curbit, ip
857	mov	work, #3
858	ror	curbit, work
859	tst	overdone, curbit
860	beq	LSYM(Lover6)
861	lsr	work, divisor, #3
862	add	dividend, work
863LSYM(Lover6):
864	mov	curbit, ip
865	mov	work, #2
866	ror	curbit, work
867	tst	overdone, curbit
868	beq	LSYM(Lover7)
869	lsr	work, divisor, #2
870	add	dividend, work
871LSYM(Lover7):
872	mov	curbit, ip
873	mov	work, #1
874	ror	curbit, work
875	tst	overdone, curbit
876	beq	LSYM(Lgot_result)
877	lsr	work, divisor, #1
878	add	dividend, work
879  .endif
880LSYM(Lgot_result):
881.endm
882
883/* If performance is preferred, the following functions are provided.  */
884#if defined(__prefer_thumb__) && !defined(__OPTIMIZE_SIZE__)
885
886/* Branch to div(n), and jump to label if curbit is lo than divisior.  */
887.macro BranchToDiv n, label
888	lsr	curbit, dividend, \n
889	cmp	curbit, divisor
890	blo	\label
891.endm
892
893/* Body of div(n).  Shift the divisor in n bits and compare the divisor
894   and dividend.  Update the dividend as the substruction result.  */
895.macro DoDiv n
896	lsr	curbit, dividend, \n
897	cmp	curbit, divisor
898	bcc	1f
899	lsl	curbit, divisor, \n
900	sub	dividend, dividend, curbit
901
9021:	adc	result, result
903.endm
904
905/* The body of division with positive divisor.  Unless the divisor is very
906   big, shift it up in multiples of four bits, since this is the amount of
907   unwinding in the main division loop.  Continue shifting until the divisor
908   is larger than the dividend.  */
909.macro THUMB1_Div_Positive
910	mov	result, #0
911	BranchToDiv #1, LSYM(Lthumb1_div1)
912	BranchToDiv #4, LSYM(Lthumb1_div4)
913	BranchToDiv #8, LSYM(Lthumb1_div8)
914	BranchToDiv #12, LSYM(Lthumb1_div12)
915	BranchToDiv #16, LSYM(Lthumb1_div16)
916LSYM(Lthumb1_div_large_positive):
917	mov	result, #0xff
918	lsl	divisor, divisor, #8
919	rev	result, result
920	lsr	curbit, dividend, #16
921	cmp	curbit, divisor
922	blo	1f
923	asr	result, #8
924	lsl	divisor, divisor, #8
925	beq	LSYM(Ldivbyzero_waypoint)
926
9271:	lsr	curbit, dividend, #12
928	cmp	curbit, divisor
929	blo	LSYM(Lthumb1_div12)
930	b	LSYM(Lthumb1_div16)
931LSYM(Lthumb1_div_loop):
932	lsr	divisor, divisor, #8
933LSYM(Lthumb1_div16):
934	Dodiv	#15
935	Dodiv	#14
936	Dodiv	#13
937	Dodiv	#12
938LSYM(Lthumb1_div12):
939	Dodiv	#11
940	Dodiv	#10
941	Dodiv	#9
942	Dodiv	#8
943	bcs	LSYM(Lthumb1_div_loop)
944LSYM(Lthumb1_div8):
945	Dodiv	#7
946	Dodiv	#6
947	Dodiv	#5
948LSYM(Lthumb1_div5):
949	Dodiv	#4
950LSYM(Lthumb1_div4):
951	Dodiv	#3
952LSYM(Lthumb1_div3):
953	Dodiv	#2
954LSYM(Lthumb1_div2):
955	Dodiv	#1
956LSYM(Lthumb1_div1):
957	sub	divisor, dividend, divisor
958	bcs	1f
959	cpy	divisor, dividend
960
9611:	adc	result, result
962	cpy	dividend, result
963	RET
964
965LSYM(Ldivbyzero_waypoint):
966	b	LSYM(Ldiv0)
967.endm
968
969/* The body of division with negative divisor.  Similar with
970   THUMB1_Div_Positive except that the shift steps are in multiples
971   of six bits.  */
972.macro THUMB1_Div_Negative
973	lsr	result, divisor, #31
974	beq	1f
975	neg	divisor, divisor
976
9771:	asr	curbit, dividend, #32
978	bcc	2f
979	neg	dividend, dividend
980
9812:	eor	curbit, result
982	mov	result, #0
983	cpy	ip, curbit
984	BranchToDiv #4, LSYM(Lthumb1_div_negative4)
985	BranchToDiv #8, LSYM(Lthumb1_div_negative8)
986LSYM(Lthumb1_div_large):
987	mov	result, #0xfc
988	lsl	divisor, divisor, #6
989	rev	result, result
990	lsr	curbit, dividend, #8
991	cmp	curbit, divisor
992	blo	LSYM(Lthumb1_div_negative8)
993
994	lsl	divisor, divisor, #6
995	asr	result, result, #6
996	cmp	curbit, divisor
997	blo	LSYM(Lthumb1_div_negative8)
998
999	lsl	divisor, divisor, #6
1000	asr	result, result, #6
1001	cmp	curbit, divisor
1002	blo	LSYM(Lthumb1_div_negative8)
1003
1004	lsl	divisor, divisor, #6
1005	beq	LSYM(Ldivbyzero_negative)
1006	asr	result, result, #6
1007	b	LSYM(Lthumb1_div_negative8)
1008LSYM(Lthumb1_div_negative_loop):
1009	lsr	divisor, divisor, #6
1010LSYM(Lthumb1_div_negative8):
1011	DoDiv	#7
1012	DoDiv	#6
1013	DoDiv	#5
1014	DoDiv	#4
1015LSYM(Lthumb1_div_negative4):
1016	DoDiv	#3
1017	DoDiv	#2
1018	bcs	LSYM(Lthumb1_div_negative_loop)
1019	DoDiv	#1
1020	sub	divisor, dividend, divisor
1021	bcs	1f
1022	cpy	divisor, dividend
1023
10241:	cpy	curbit, ip
1025	adc	result, result
1026	asr	curbit, curbit, #1
1027	cpy	dividend, result
1028	bcc	2f
1029	neg	dividend, dividend
1030	cmp	curbit, #0
1031
10322:	bpl	3f
1033	neg	divisor, divisor
1034
10353:	RET
1036
1037LSYM(Ldivbyzero_negative):
1038	cpy	curbit, ip
1039	asr	curbit, curbit, #1
1040	bcc	LSYM(Ldiv0)
1041	neg	dividend, dividend
1042.endm
1043#endif /* ARM Thumb version.  */
1044
1045/* ------------------------------------------------------------------------ */
1046/*		Start of the Real Functions				    */
1047/* ------------------------------------------------------------------------ */
1048#ifdef L_udivsi3
1049
1050#if defined(__prefer_thumb__)
1051
1052	FUNC_START udivsi3
1053	FUNC_ALIAS aeabi_uidiv udivsi3
1054#if defined(__OPTIMIZE_SIZE__)
1055
1056	cmp	divisor, #0
1057	beq	LSYM(Ldiv0)
1058LSYM(udivsi3_skip_div0_test):
1059	mov	curbit, #1
1060	mov	result, #0
1061
1062	push	{ work }
1063	cmp	dividend, divisor
1064	blo	LSYM(Lgot_result)
1065
1066	THUMB_DIV_MOD_BODY 0
1067
1068	mov	r0, result
1069	pop	{ work }
1070	RET
1071
1072/* Implementation of aeabi_uidiv for ARMv6m.  This version is only
1073   used in ARMv6-M when we need an efficient implementation.  */
1074#else
1075LSYM(udivsi3_skip_div0_test):
1076	THUMB1_Div_Positive
1077
1078#endif /* __OPTIMIZE_SIZE__ */
1079
1080#elif defined(__ARM_ARCH_EXT_IDIV__)
1081
1082	ARM_FUNC_START udivsi3
1083	ARM_FUNC_ALIAS aeabi_uidiv udivsi3
1084
1085	cmp	r1, #0
1086	beq	LSYM(Ldiv0)
1087
1088	udiv	r0, r0, r1
1089	RET
1090
1091#else /* ARM version/Thumb-2.  */
1092
1093	ARM_FUNC_START udivsi3
1094	ARM_FUNC_ALIAS aeabi_uidiv udivsi3
1095
1096	/* Note: if called via udivsi3_skip_div0_test, this will unnecessarily
1097	   check for division-by-zero a second time.  */
1098LSYM(udivsi3_skip_div0_test):
1099	subs	r2, r1, #1
1100	do_it	eq
1101	RETc(eq)
1102	bcc	LSYM(Ldiv0)
1103	cmp	r0, r1
1104	bls	11f
1105	tst	r1, r2
1106	beq	12f
1107
1108	ARM_DIV_BODY r0, r1, r2, r3
1109
1110	mov	r0, r2
1111	RET
1112
111311:	do_it	eq, e
1114	moveq	r0, #1
1115	movne	r0, #0
1116	RET
1117
111812:	ARM_DIV2_ORDER r1, r2
1119
1120	mov	r0, r0, lsr r2
1121	RET
1122
1123#endif /* ARM version */
1124
1125	DIV_FUNC_END udivsi3 unsigned
1126
1127#if defined(__prefer_thumb__)
1128FUNC_START aeabi_uidivmod
1129	cmp	r1, #0
1130	beq	LSYM(Ldiv0)
1131# if defined(__OPTIMIZE_SIZE__)
1132	push	{r0, r1, lr}
1133	bl	LSYM(udivsi3_skip_div0_test)
1134	POP	{r1, r2, r3}
1135	mul	r2, r0
1136	sub	r1, r1, r2
1137	bx	r3
1138# else
1139	/* Both the quotient and remainder are calculated simultaneously
1140	   in THUMB1_Div_Positive.  There is no need to calculate the
1141	   remainder again here.  */
1142	b	LSYM(udivsi3_skip_div0_test)
1143	RET
1144# endif /* __OPTIMIZE_SIZE__ */
1145
1146#elif defined(__ARM_ARCH_EXT_IDIV__)
1147ARM_FUNC_START aeabi_uidivmod
1148	cmp	r1, #0
1149	beq	LSYM(Ldiv0)
1150	mov     r2, r0
1151	udiv	r0, r0, r1
1152	mls     r1, r0, r1, r2
1153	RET
1154#else
1155ARM_FUNC_START aeabi_uidivmod
1156	cmp	r1, #0
1157	beq	LSYM(Ldiv0)
1158	stmfd	sp!, { r0, r1, lr }
1159	bl	LSYM(udivsi3_skip_div0_test)
1160	ldmfd	sp!, { r1, r2, lr }
1161	mul	r3, r2, r0
1162	sub	r1, r1, r3
1163	RET
1164#endif
1165	FUNC_END aeabi_uidivmod
1166
1167#endif /* L_udivsi3 */
1168/* ------------------------------------------------------------------------ */
1169#ifdef L_umodsi3
1170
1171#if defined(__ARM_ARCH_EXT_IDIV__) && __ARM_ARCH_ISA_THUMB != 1
1172
1173	ARM_FUNC_START umodsi3
1174
1175	cmp	r1, #0
1176	beq	LSYM(Ldiv0)
1177	udiv	r2, r0, r1
1178	mls     r0, r1, r2, r0
1179	RET
1180
1181#elif defined(__thumb__)
1182
1183	FUNC_START umodsi3
1184
1185	cmp	divisor, #0
1186	beq	LSYM(Ldiv0)
1187	mov	curbit, #1
1188	cmp	dividend, divisor
1189	bhs	LSYM(Lover10)
1190	RET
1191
1192LSYM(Lover10):
1193	push	{ work }
1194
1195	THUMB_DIV_MOD_BODY 1
1196
1197	pop	{ work }
1198	RET
1199
1200#else  /* ARM version.  */
1201
1202	FUNC_START umodsi3
1203
1204	subs	r2, r1, #1			@ compare divisor with 1
1205	bcc	LSYM(Ldiv0)
1206	cmpne	r0, r1				@ compare dividend with divisor
1207	moveq   r0, #0
1208	tsthi	r1, r2				@ see if divisor is power of 2
1209	andeq	r0, r0, r2
1210	RETc(ls)
1211
1212	ARM_MOD_BODY r0, r1, r2, r3
1213
1214	RET
1215
1216#endif /* ARM version.  */
1217
1218	DIV_FUNC_END umodsi3 unsigned
1219
1220#endif /* L_umodsi3 */
1221/* ------------------------------------------------------------------------ */
1222#ifdef L_divsi3
1223
1224#if defined(__prefer_thumb__)
1225
1226	FUNC_START divsi3
1227	FUNC_ALIAS aeabi_idiv divsi3
1228#if defined(__OPTIMIZE_SIZE__)
1229
1230	cmp	divisor, #0
1231	beq	LSYM(Ldiv0)
1232LSYM(divsi3_skip_div0_test):
1233	push	{ work }
1234	mov	work, dividend
1235	eor	work, divisor		@ Save the sign of the result.
1236	mov	ip, work
1237	mov	curbit, #1
1238	mov	result, #0
1239	cmp	divisor, #0
1240	bpl	LSYM(Lover10)
1241	neg	divisor, divisor	@ Loops below use unsigned.
1242LSYM(Lover10):
1243	cmp	dividend, #0
1244	bpl	LSYM(Lover11)
1245	neg	dividend, dividend
1246LSYM(Lover11):
1247	cmp	dividend, divisor
1248	blo	LSYM(Lgot_result)
1249
1250	THUMB_DIV_MOD_BODY 0
1251
1252	mov	r0, result
1253	mov	work, ip
1254	cmp	work, #0
1255	bpl	LSYM(Lover12)
1256	neg	r0, r0
1257LSYM(Lover12):
1258	pop	{ work }
1259	RET
1260
1261/* Implementation of aeabi_idiv for ARMv6m.  This version is only
1262   used in ARMv6-M when we need an efficient implementation.  */
1263#else
1264LSYM(divsi3_skip_div0_test):
1265	cpy	curbit, dividend
1266	orr	curbit, divisor
1267	bmi	LSYM(Lthumb1_div_negative)
1268
1269LSYM(Lthumb1_div_positive):
1270	THUMB1_Div_Positive
1271
1272LSYM(Lthumb1_div_negative):
1273	THUMB1_Div_Negative
1274
1275#endif /* __OPTIMIZE_SIZE__ */
1276
1277#elif defined(__ARM_ARCH_EXT_IDIV__)
1278
1279	ARM_FUNC_START divsi3
1280	ARM_FUNC_ALIAS aeabi_idiv divsi3
1281
1282	cmp 	r1, #0
1283	beq	LSYM(Ldiv0)
1284	sdiv	r0, r0, r1
1285	RET
1286
1287#else /* ARM/Thumb-2 version.  */
1288
1289	ARM_FUNC_START divsi3
1290	ARM_FUNC_ALIAS aeabi_idiv divsi3
1291
1292	cmp	r1, #0
1293	beq	LSYM(Ldiv0)
1294LSYM(divsi3_skip_div0_test):
1295	eor	ip, r0, r1			@ save the sign of the result.
1296	do_it	mi
1297	rsbmi	r1, r1, #0			@ loops below use unsigned.
1298	subs	r2, r1, #1			@ division by 1 or -1 ?
1299	beq	10f
1300	movs	r3, r0
1301	do_it	mi
1302	rsbmi	r3, r0, #0			@ positive dividend value
1303	cmp	r3, r1
1304	bls	11f
1305	tst	r1, r2				@ divisor is power of 2 ?
1306	beq	12f
1307
1308	ARM_DIV_BODY r3, r1, r0, r2
1309
1310	cmp	ip, #0
1311	do_it	mi
1312	rsbmi	r0, r0, #0
1313	RET
1314
131510:	teq	ip, r0				@ same sign ?
1316	do_it	mi
1317	rsbmi	r0, r0, #0
1318	RET
1319
132011:	do_it	lo
1321	movlo	r0, #0
1322	do_it	eq,t
1323	moveq	r0, ip, asr #31
1324	orreq	r0, r0, #1
1325	RET
1326
132712:	ARM_DIV2_ORDER r1, r2
1328
1329	cmp	ip, #0
1330	mov	r0, r3, lsr r2
1331	do_it	mi
1332	rsbmi	r0, r0, #0
1333	RET
1334
1335#endif /* ARM version */
1336
1337	DIV_FUNC_END divsi3 signed
1338
1339#if defined(__prefer_thumb__)
1340FUNC_START aeabi_idivmod
1341	cmp	r1, #0
1342	beq	LSYM(Ldiv0)
1343# if defined(__OPTIMIZE_SIZE__)
1344	push	{r0, r1, lr}
1345	bl	LSYM(divsi3_skip_div0_test)
1346	POP	{r1, r2, r3}
1347	mul	r2, r0
1348	sub	r1, r1, r2
1349	bx	r3
1350# else
1351	/* Both the quotient and remainder are calculated simultaneously
1352	   in THUMB1_Div_Positive and THUMB1_Div_Negative.  There is no
1353	   need to calculate the remainder again here.  */
1354	b	LSYM(divsi3_skip_div0_test)
1355	RET
1356# endif /* __OPTIMIZE_SIZE__ */
1357
1358#elif defined(__ARM_ARCH_EXT_IDIV__)
1359ARM_FUNC_START aeabi_idivmod
1360	cmp 	r1, #0
1361	beq	LSYM(Ldiv0)
1362	mov     r2, r0
1363	sdiv	r0, r0, r1
1364	mls     r1, r0, r1, r2
1365	RET
1366#else
1367ARM_FUNC_START aeabi_idivmod
1368	cmp	r1, #0
1369	beq	LSYM(Ldiv0)
1370	stmfd	sp!, { r0, r1, lr }
1371	bl	LSYM(divsi3_skip_div0_test)
1372	ldmfd	sp!, { r1, r2, lr }
1373	mul	r3, r2, r0
1374	sub	r1, r1, r3
1375	RET
1376#endif
1377	FUNC_END aeabi_idivmod
1378
1379#endif /* L_divsi3 */
1380/* ------------------------------------------------------------------------ */
1381#ifdef L_modsi3
1382
1383#if defined(__ARM_ARCH_EXT_IDIV__) && __ARM_ARCH_ISA_THUMB != 1
1384
1385	ARM_FUNC_START modsi3
1386
1387	cmp	r1, #0
1388	beq	LSYM(Ldiv0)
1389
1390	sdiv	r2, r0, r1
1391	mls     r0, r1, r2, r0
1392	RET
1393
1394#elif defined(__thumb__)
1395
1396	FUNC_START modsi3
1397
1398	mov	curbit, #1
1399	cmp	divisor, #0
1400	beq	LSYM(Ldiv0)
1401	bpl	LSYM(Lover10)
1402	neg	divisor, divisor		@ Loops below use unsigned.
1403LSYM(Lover10):
1404	push	{ work }
1405	@ Need to save the sign of the dividend, unfortunately, we need
1406	@ work later on.  Must do this after saving the original value of
1407	@ the work register, because we will pop this value off first.
1408	push	{ dividend }
1409	cmp	dividend, #0
1410	bpl	LSYM(Lover11)
1411	neg	dividend, dividend
1412LSYM(Lover11):
1413	cmp	dividend, divisor
1414	blo	LSYM(Lgot_result)
1415
1416	THUMB_DIV_MOD_BODY 1
1417
1418	pop	{ work }
1419	cmp	work, #0
1420	bpl	LSYM(Lover12)
1421	neg	dividend, dividend
1422LSYM(Lover12):
1423	pop	{ work }
1424	RET
1425
1426#else /* ARM version.  */
1427
1428	FUNC_START modsi3
1429
1430	cmp	r1, #0
1431	beq	LSYM(Ldiv0)
1432	rsbmi	r1, r1, #0			@ loops below use unsigned.
1433	movs	ip, r0				@ preserve sign of dividend
1434	rsbmi	r0, r0, #0			@ if negative make positive
1435	subs	r2, r1, #1			@ compare divisor with 1
1436	cmpne	r0, r1				@ compare dividend with divisor
1437	moveq	r0, #0
1438	tsthi	r1, r2				@ see if divisor is power of 2
1439	andeq	r0, r0, r2
1440	bls	10f
1441
1442	ARM_MOD_BODY r0, r1, r2, r3
1443
144410:	cmp	ip, #0
1445	rsbmi	r0, r0, #0
1446	RET
1447
1448#endif /* ARM version */
1449
1450	DIV_FUNC_END modsi3 signed
1451
1452#endif /* L_modsi3 */
1453/* ------------------------------------------------------------------------ */
1454#ifdef L_dvmd_tls
1455
1456#ifdef __ARM_EABI__
1457	WEAK aeabi_idiv0
1458	WEAK aeabi_ldiv0
1459	FUNC_START aeabi_idiv0
1460	FUNC_START aeabi_ldiv0
1461	RET
1462	FUNC_END aeabi_ldiv0
1463	FUNC_END aeabi_idiv0
1464#else
1465	FUNC_START div0
1466	RET
1467	FUNC_END div0
1468#endif
1469
1470#endif /* L_divmodsi_tools */
1471/* ------------------------------------------------------------------------ */
1472#ifdef L_dvmd_lnx
1473@ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
1474
1475/* Constant taken from <asm/signal.h>.  */
1476#define SIGFPE	8
1477
1478#ifdef __ARM_EABI__
1479	cfi_start	__aeabi_ldiv0, LSYM(Lend_aeabi_ldiv0)
1480	WEAK aeabi_idiv0
1481	WEAK aeabi_ldiv0
1482	ARM_FUNC_START aeabi_idiv0
1483	ARM_FUNC_START aeabi_ldiv0
1484	do_push	{r1, lr}
148598:	cfi_push 98b - __aeabi_ldiv0, 0xe, -0x4, 0x8
1486#else
1487	cfi_start	__div0, LSYM(Lend_div0)
1488	ARM_FUNC_START div0
1489	do_push	{r1, lr}
149098:	cfi_push 98b - __div0, 0xe, -0x4, 0x8
1491#endif
1492
1493	mov	r0, #SIGFPE
1494	bl	SYM(raise) __PLT__
1495	RETLDM	r1 unwind=98b
1496
1497#ifdef __ARM_EABI__
1498	cfi_end	LSYM(Lend_aeabi_ldiv0)
1499	FUNC_END aeabi_ldiv0
1500	FUNC_END aeabi_idiv0
1501#else
1502	cfi_end	LSYM(Lend_div0)
1503	FUNC_END div0
1504#endif
1505
1506#endif /* L_dvmd_lnx */
1507#ifdef L_clear_cache
1508#if defined __ARM_EABI__ && defined __linux__
1509@ EABI GNU/Linux call to cacheflush syscall.
1510	ARM_FUNC_START clear_cache
1511	do_push	{r7}
1512#if __ARM_ARCH >= 7 || defined(__ARM_ARCH_6T2__)
1513	movw	r7, #2
1514	movt	r7, #0xf
1515#else
1516	mov	r7, #0xf0000
1517	add	r7, r7, #2
1518#endif
1519	mov	r2, #0
1520	swi	0
1521	do_pop	{r7}
1522	RET
1523	FUNC_END clear_cache
1524#else
1525#error "This is only for ARM EABI GNU/Linux"
1526#endif
1527#endif /* L_clear_cache */
1528
1529#ifdef L_speculation_barrier
1530	FUNC_START speculation_barrier
1531#if __ARM_ARCH >= 7
1532	isb
1533	dsb sy
1534#elif defined __ARM_EABI__ && defined __linux__
1535	/* We don't have a speculation barrier directly for this
1536	   platform/architecture variant.  But we can use a kernel
1537	   clear_cache service routine which will emit such instructions
1538	   if run on a later version of the architecture.  We don't
1539	   really want to flush the cache, but we must give it a valid
1540	   address, so just clear pc..pc+1.  */
1541#if defined __thumb__ && !defined __thumb2__
1542	push	{r7}
1543	mov	r7, #0xf
1544	lsl	r7, #16
1545	add	r7, #2
1546	adr	r0, . + 4
1547	add	r1, r0, #1
1548	mov	r2, #0
1549	svc	0
1550	pop	{r7}
1551#else
1552	do_push	{r7}
1553#ifdef __ARM_ARCH_6T2__
1554	movw	r7, #2
1555	movt	r7, #0xf
1556#else
1557	mov	r7, #0xf0000
1558	add	r7, r7, #2
1559#endif
1560	add	r0, pc, #0	/* ADR.  */
1561	add	r1, r0, #1
1562	mov	r2, #0
1563	svc	0
1564	do_pop	{r7}
1565#endif /* Thumb1 only */
1566#else
1567#warning "No speculation barrier defined for this platform"
1568#endif
1569	RET
1570	FUNC_END speculation_barrier
1571#endif
1572/* ------------------------------------------------------------------------ */
1573/* Dword shift operations.  */
1574/* All the following Dword shift variants rely on the fact that
1575	shft xxx, Reg
1576   is in fact done as
1577	shft xxx, (Reg & 255)
1578   so for Reg value in (32...63) and (-1...-31) we will get zero (in the
1579   case of logical shifts) or the sign (for asr).  */
1580
1581#ifdef __ARMEB__
1582#define al	r1
1583#define ah	r0
1584#else
1585#define al	r0
1586#define ah	r1
1587#endif
1588
1589/* Prevent __aeabi double-word shifts from being produced on SymbianOS.  */
1590#ifndef __symbian__
1591
1592#ifdef L_lshrdi3
1593
1594	FUNC_START lshrdi3
1595	FUNC_ALIAS aeabi_llsr lshrdi3
1596
1597#ifdef __thumb__
1598	lsr	al, r2
1599	mov	r3, ah
1600	lsr	ah, r2
1601	mov	ip, r3
1602	sub	r2, #32
1603	lsr	r3, r2
1604	orr	al, r3
1605	neg	r2, r2
1606	mov	r3, ip
1607	lsl	r3, r2
1608	orr	al, r3
1609	RET
1610#else
1611	subs	r3, r2, #32
1612	rsb	ip, r2, #32
1613	movmi	al, al, lsr r2
1614	movpl	al, ah, lsr r3
1615	orrmi	al, al, ah, lsl ip
1616	mov	ah, ah, lsr r2
1617	RET
1618#endif
1619	FUNC_END aeabi_llsr
1620	FUNC_END lshrdi3
1621
1622#endif
1623
1624#ifdef L_ashrdi3
1625
1626	FUNC_START ashrdi3
1627	FUNC_ALIAS aeabi_lasr ashrdi3
1628
1629#ifdef __thumb__
1630	lsr	al, r2
1631	mov	r3, ah
1632	asr	ah, r2
1633	sub	r2, #32
1634	@ If r2 is negative at this point the following step would OR
1635	@ the sign bit into all of AL.  That's not what we want...
1636	bmi	1f
1637	mov	ip, r3
1638	asr	r3, r2
1639	orr	al, r3
1640	mov	r3, ip
16411:
1642	neg	r2, r2
1643	lsl	r3, r2
1644	orr	al, r3
1645	RET
1646#else
1647	subs	r3, r2, #32
1648	rsb	ip, r2, #32
1649	movmi	al, al, lsr r2
1650	movpl	al, ah, asr r3
1651	orrmi	al, al, ah, lsl ip
1652	mov	ah, ah, asr r2
1653	RET
1654#endif
1655
1656	FUNC_END aeabi_lasr
1657	FUNC_END ashrdi3
1658
1659#endif
1660
1661#ifdef L_ashldi3
1662
1663	FUNC_START ashldi3
1664	FUNC_ALIAS aeabi_llsl ashldi3
1665
1666#ifdef __thumb__
1667	lsl	ah, r2
1668	mov	r3, al
1669	lsl	al, r2
1670	mov	ip, r3
1671	sub	r2, #32
1672	lsl	r3, r2
1673	orr	ah, r3
1674	neg	r2, r2
1675	mov	r3, ip
1676	lsr	r3, r2
1677	orr	ah, r3
1678	RET
1679#else
1680	subs	r3, r2, #32
1681	rsb	ip, r2, #32
1682	movmi	ah, ah, lsl r2
1683	movpl	ah, al, lsl r3
1684	orrmi	ah, ah, al, lsr ip
1685	mov	al, al, lsl r2
1686	RET
1687#endif
1688	FUNC_END aeabi_llsl
1689	FUNC_END ashldi3
1690
1691#endif
1692
1693#endif /* __symbian__ */
1694
1695#ifdef L_clzsi2
1696#ifdef NOT_ISA_TARGET_32BIT
1697FUNC_START clzsi2
1698	mov	r1, #28
1699	mov	r3, #1
1700	lsl	r3, r3, #16
1701	cmp	r0, r3 /* 0x10000 */
1702	bcc	2f
1703	lsr	r0, r0, #16
1704	sub	r1, r1, #16
17052:	lsr	r3, r3, #8
1706	cmp	r0, r3 /* #0x100 */
1707	bcc	2f
1708	lsr	r0, r0, #8
1709	sub	r1, r1, #8
17102:	lsr	r3, r3, #4
1711	cmp	r0, r3 /* #0x10 */
1712	bcc	2f
1713	lsr	r0, r0, #4
1714	sub	r1, r1, #4
17152:	adr	r2, 1f
1716	ldrb	r0, [r2, r0]
1717	add	r0, r0, r1
1718	bx lr
1719.align 2
17201:
1721.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1722	FUNC_END clzsi2
1723#else
1724ARM_FUNC_START clzsi2
1725# if defined (__ARM_FEATURE_CLZ)
1726	clz	r0, r0
1727	RET
1728# else
1729	mov	r1, #28
1730	cmp	r0, #0x10000
1731	do_it	cs, t
1732	movcs	r0, r0, lsr #16
1733	subcs	r1, r1, #16
1734	cmp	r0, #0x100
1735	do_it	cs, t
1736	movcs	r0, r0, lsr #8
1737	subcs	r1, r1, #8
1738	cmp	r0, #0x10
1739	do_it	cs, t
1740	movcs	r0, r0, lsr #4
1741	subcs	r1, r1, #4
1742	adr	r2, 1f
1743	ldrb	r0, [r2, r0]
1744	add	r0, r0, r1
1745	RET
1746.align 2
17471:
1748.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1749# endif /* !defined (__ARM_FEATURE_CLZ) */
1750	FUNC_END clzsi2
1751#endif
1752#endif /* L_clzsi2 */
1753
1754#ifdef L_clzdi2
1755#if !defined (__ARM_FEATURE_CLZ)
1756
1757# ifdef NOT_ISA_TARGET_32BIT
1758FUNC_START clzdi2
1759	push	{r4, lr}
1760# else
1761ARM_FUNC_START clzdi2
1762	do_push	{r4, lr}
1763# endif
1764	cmp	xxh, #0
1765	bne	1f
1766# ifdef __ARMEB__
1767	mov	r0, xxl
1768	bl	__clzsi2
1769	add	r0, r0, #32
1770	b 2f
17711:
1772	bl	__clzsi2
1773# else
1774	bl	__clzsi2
1775	add	r0, r0, #32
1776	b 2f
17771:
1778	mov	r0, xxh
1779	bl	__clzsi2
1780# endif
17812:
1782# ifdef NOT_ISA_TARGET_32BIT
1783	pop	{r4, pc}
1784# else
1785	RETLDM	r4
1786# endif
1787	FUNC_END clzdi2
1788
1789#else /* defined (__ARM_FEATURE_CLZ) */
1790
1791ARM_FUNC_START clzdi2
1792	cmp	xxh, #0
1793	do_it	eq, et
1794	clzeq	r0, xxl
1795	clzne	r0, xxh
1796	addeq	r0, r0, #32
1797	RET
1798	FUNC_END clzdi2
1799
1800#endif
1801#endif /* L_clzdi2 */
1802
1803#ifdef L_ctzsi2
1804#ifdef NOT_ISA_TARGET_32BIT
1805FUNC_START ctzsi2
1806	neg	r1, r0
1807	and	r0, r0, r1
1808	mov	r1, #28
1809	mov	r3, #1
1810	lsl	r3, r3, #16
1811	cmp	r0, r3 /* 0x10000 */
1812	bcc	2f
1813	lsr	r0, r0, #16
1814	sub	r1, r1, #16
18152:	lsr	r3, r3, #8
1816	cmp	r0, r3 /* #0x100 */
1817	bcc	2f
1818	lsr	r0, r0, #8
1819	sub	r1, r1, #8
18202:	lsr	r3, r3, #4
1821	cmp	r0, r3 /* #0x10 */
1822	bcc	2f
1823	lsr	r0, r0, #4
1824	sub	r1, r1, #4
18252:	adr	r2, 1f
1826	ldrb	r0, [r2, r0]
1827	sub	r0, r0, r1
1828	bx lr
1829.align 2
18301:
1831.byte	27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1832	FUNC_END ctzsi2
1833#else
1834ARM_FUNC_START ctzsi2
1835	rsb	r1, r0, #0
1836	and	r0, r0, r1
1837# if defined (__ARM_FEATURE_CLZ)
1838	clz	r0, r0
1839	rsb	r0, r0, #31
1840	RET
1841# else
1842	mov	r1, #28
1843	cmp	r0, #0x10000
1844	do_it	cs, t
1845	movcs	r0, r0, lsr #16
1846	subcs	r1, r1, #16
1847	cmp	r0, #0x100
1848	do_it	cs, t
1849	movcs	r0, r0, lsr #8
1850	subcs	r1, r1, #8
1851	cmp	r0, #0x10
1852	do_it	cs, t
1853	movcs	r0, r0, lsr #4
1854	subcs	r1, r1, #4
1855	adr	r2, 1f
1856	ldrb	r0, [r2, r0]
1857	sub	r0, r0, r1
1858	RET
1859.align 2
18601:
1861.byte	27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1862# endif /* !defined (__ARM_FEATURE_CLZ) */
1863	FUNC_END ctzsi2
1864#endif
1865#endif /* L_clzsi2 */
1866
1867/* ------------------------------------------------------------------------ */
1868/* These next two sections are here despite the fact that they contain Thumb
1869   assembler because their presence allows interworked code to be linked even
1870   when the GCC library is this one.  */
1871
1872/* Do not build the interworking functions when the target architecture does
1873   not support Thumb instructions.  (This can be a multilib option).  */
1874#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
1875      || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
1876      || __ARM_ARCH >= 6
1877
1878#if defined L_call_via_rX
1879
1880/* These labels & instructions are used by the Arm/Thumb interworking code.
1881   The address of function to be called is loaded into a register and then
1882   one of these labels is called via a BL instruction.  This puts the
1883   return address into the link register with the bottom bit set, and the
1884   code here switches to the correct mode before executing the function.  */
1885
1886	.text
1887	.align 0
1888        .force_thumb
1889
1890.macro call_via register
1891	THUMB_FUNC_START _call_via_\register
1892
1893	bx	\register
1894	nop
1895
1896	SIZE	(_call_via_\register)
1897.endm
1898
1899	call_via r0
1900	call_via r1
1901	call_via r2
1902	call_via r3
1903	call_via r4
1904	call_via r5
1905	call_via r6
1906	call_via r7
1907	call_via r8
1908	call_via r9
1909	call_via sl
1910	call_via fp
1911	call_via ip
1912	call_via sp
1913	call_via lr
1914
1915#endif /* L_call_via_rX */
1916
1917/* Don't bother with the old interworking routines for Thumb-2.  */
1918/* ??? Maybe only omit these on "m" variants.  */
1919#if !defined(__thumb2__) && __ARM_ARCH_ISA_ARM
1920
1921#if defined L_interwork_call_via_rX
1922
1923/* These labels & instructions are used by the Arm/Thumb interworking code,
1924   when the target address is in an unknown instruction set.  The address
1925   of function to be called is loaded into a register and then one of these
1926   labels is called via a BL instruction.  This puts the return address
1927   into the link register with the bottom bit set, and the code here
1928   switches to the correct mode before executing the function.  Unfortunately
1929   the target code cannot be relied upon to return via a BX instruction, so
1930   instead we have to store the resturn address on the stack and allow the
1931   called function to return here instead.  Upon return we recover the real
1932   return address and use a BX to get back to Thumb mode.
1933
1934   There are three variations of this code.  The first,
1935   _interwork_call_via_rN(), will push the return address onto the
1936   stack and pop it in _arm_return().  It should only be used if all
1937   arguments are passed in registers.
1938
1939   The second, _interwork_r7_call_via_rN(), instead stores the return
1940   address at [r7, #-4].  It is the caller's responsibility to ensure
1941   that this address is valid and contains no useful data.
1942
1943   The third, _interwork_r11_call_via_rN(), works in the same way but
1944   uses r11 instead of r7.  It is useful if the caller does not really
1945   need a frame pointer.  */
1946
1947	.text
1948	.align 0
1949
1950	.code   32
1951	.globl _arm_return
1952LSYM(Lstart_arm_return):
1953	cfi_start	LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
1954	cfi_push	0, 0xe, -0x8, 0x8
1955	nop	@ This nop is for the benefit of debuggers, so that
1956		@ backtraces will use the correct unwind information.
1957_arm_return:
1958	RETLDM	unwind=LSYM(Lstart_arm_return)
1959	cfi_end	LSYM(Lend_arm_return)
1960
1961	.globl _arm_return_r7
1962_arm_return_r7:
1963	ldr	lr, [r7, #-4]
1964	bx	lr
1965
1966	.globl _arm_return_r11
1967_arm_return_r11:
1968	ldr	lr, [r11, #-4]
1969	bx	lr
1970
1971.macro interwork_with_frame frame, register, name, return
1972	.code	16
1973
1974	THUMB_FUNC_START \name
1975
1976	bx	pc
1977	nop
1978
1979	.code	32
1980	tst	\register, #1
1981	streq	lr, [\frame, #-4]
1982	adreq	lr, _arm_return_\frame
1983	bx	\register
1984
1985	SIZE	(\name)
1986.endm
1987
1988.macro interwork register
1989	.code	16
1990
1991	THUMB_FUNC_START _interwork_call_via_\register
1992
1993	bx	pc
1994	nop
1995
1996	.code	32
1997	.globl LSYM(Lchange_\register)
1998LSYM(Lchange_\register):
1999	tst	\register, #1
2000	streq	lr, [sp, #-8]!
2001	adreq	lr, _arm_return
2002	bx	\register
2003
2004	SIZE	(_interwork_call_via_\register)
2005
2006	interwork_with_frame r7,\register,_interwork_r7_call_via_\register
2007	interwork_with_frame r11,\register,_interwork_r11_call_via_\register
2008.endm
2009
2010	interwork r0
2011	interwork r1
2012	interwork r2
2013	interwork r3
2014	interwork r4
2015	interwork r5
2016	interwork r6
2017	interwork r7
2018	interwork r8
2019	interwork r9
2020	interwork sl
2021	interwork fp
2022	interwork ip
2023	interwork sp
2024
2025	/* The LR case has to be handled a little differently...  */
2026	.code 16
2027
2028	THUMB_FUNC_START _interwork_call_via_lr
2029
2030	bx 	pc
2031	nop
2032
2033	.code 32
2034	.globl .Lchange_lr
2035.Lchange_lr:
2036	tst	lr, #1
2037	stmeqdb	r13!, {lr, pc}
2038	mov	ip, lr
2039	adreq	lr, _arm_return
2040	bx	ip
2041
2042	SIZE	(_interwork_call_via_lr)
2043
2044#endif /* L_interwork_call_via_rX */
2045#endif /* !__thumb2__ */
2046
2047/* Functions to support compact pic switch tables in thumb1 state.
2048   All these routines take an index into the table in r0.  The
2049   table is at LR & ~1 (but this must be rounded up in the case
2050   of 32-bit entires).  They are only permitted to clobber r12
2051   and r14 and r0 must be preserved on exit.  */
2052#ifdef L_thumb1_case_sqi
2053
2054	.text
2055	.align 0
2056        .force_thumb
2057	.syntax unified
2058	THUMB_FUNC_START __gnu_thumb1_case_sqi
2059	push	{r1}
2060	mov	r1, lr
2061	lsrs	r1, r1, #1
2062	lsls	r1, r1, #1
2063	ldrsb	r1, [r1, r0]
2064	lsls	r1, r1, #1
2065	add	lr, lr, r1
2066	pop	{r1}
2067	bx	lr
2068	SIZE (__gnu_thumb1_case_sqi)
2069#endif
2070
2071#ifdef L_thumb1_case_uqi
2072
2073	.text
2074	.align 0
2075        .force_thumb
2076	.syntax unified
2077	THUMB_FUNC_START __gnu_thumb1_case_uqi
2078	push	{r1}
2079	mov	r1, lr
2080	lsrs	r1, r1, #1
2081	lsls	r1, r1, #1
2082	ldrb	r1, [r1, r0]
2083	lsls	r1, r1, #1
2084	add	lr, lr, r1
2085	pop	{r1}
2086	bx	lr
2087	SIZE (__gnu_thumb1_case_uqi)
2088#endif
2089
2090#ifdef L_thumb1_case_shi
2091
2092	.text
2093	.align 0
2094        .force_thumb
2095	.syntax unified
2096	THUMB_FUNC_START __gnu_thumb1_case_shi
2097	push	{r0, r1}
2098	mov	r1, lr
2099	lsrs	r1, r1, #1
2100	lsls	r0, r0, #1
2101	lsls	r1, r1, #1
2102	ldrsh	r1, [r1, r0]
2103	lsls	r1, r1, #1
2104	add	lr, lr, r1
2105	pop	{r0, r1}
2106	bx	lr
2107	SIZE (__gnu_thumb1_case_shi)
2108#endif
2109
2110#ifdef L_thumb1_case_uhi
2111
2112	.text
2113	.align 0
2114        .force_thumb
2115	.syntax unified
2116	THUMB_FUNC_START __gnu_thumb1_case_uhi
2117	push	{r0, r1}
2118	mov	r1, lr
2119	lsrs	r1, r1, #1
2120	lsls	r0, r0, #1
2121	lsls	r1, r1, #1
2122	ldrh	r1, [r1, r0]
2123	lsls	r1, r1, #1
2124	add	lr, lr, r1
2125	pop	{r0, r1}
2126	bx	lr
2127	SIZE (__gnu_thumb1_case_uhi)
2128#endif
2129
2130#ifdef L_thumb1_case_si
2131
2132	.text
2133	.align 0
2134        .force_thumb
2135	.syntax unified
2136	THUMB_FUNC_START __gnu_thumb1_case_si
2137	push	{r0, r1}
2138	mov	r1, lr
2139	adds.n	r1, r1, #2	/* Align to word.  */
2140	lsrs	r1, r1, #2
2141	lsls	r0, r0, #2
2142	lsls	r1, r1, #2
2143	ldr	r0, [r1, r0]
2144	adds	r0, r0, r1
2145	mov	lr, r0
2146	pop	{r0, r1}
2147	mov	pc, lr		/* We know we were called from thumb code.  */
2148	SIZE (__gnu_thumb1_case_si)
2149#endif
2150
2151#endif /* Arch supports thumb.  */
2152
2153.macro CFI_START_FUNCTION
2154	.cfi_startproc
2155	.cfi_remember_state
2156.endm
2157
2158.macro CFI_END_FUNCTION
2159	.cfi_restore_state
2160	.cfi_endproc
2161.endm
2162
2163#ifndef __symbian__
2164/* The condition here must match the one in gcc/config/arm/elf.h and
2165   libgcc/config/arm/t-elf.  */
2166#ifndef NOT_ISA_TARGET_32BIT
2167#include "ieee754-df.S"
2168#include "ieee754-sf.S"
2169#include "bpabi.S"
2170#else /* NOT_ISA_TARGET_32BIT */
2171#include "bpabi-v6m.S"
2172#endif /* NOT_ISA_TARGET_32BIT */
2173#endif /* !__symbian__ */
2174