1@ libgcc routines for ARM cpu.
2@ Division routines, written by Richard Earnshaw, (rearnsha@armltd.co.uk)
3
4/* Copyright (C) 1995-2016 Free Software Foundation, Inc.
5
6This file is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 3, or (at your option) any
9later version.
10
11This file is distributed in the hope that it will be useful, but
12WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14General Public License for more details.
15
16Under Section 7 of GPL version 3, you are granted additional
17permissions described in the GCC Runtime Library Exception, version
183.1, as published by the Free Software Foundation.
19
20You should have received a copy of the GNU General Public License and
21a copy of the GCC Runtime Library Exception along with this program;
22see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23<http://www.gnu.org/licenses/>.  */
24
25/* An executable stack is *not* required for these functions.  */
26#if defined(__ELF__) && defined(__linux__)
27.section .note.GNU-stack,"",%progbits
28.previous
29#endif  /* __ELF__ and __linux__ */
30
31#ifdef __ARM_EABI__
32/* Some attributes that are common to all routines in this file.  */
33	/* Tag_ABI_align_needed: This code does not require 8-byte
34	   alignment from the caller.  */
35	/* .eabi_attribute 24, 0  -- default setting.  */
36	/* Tag_ABI_align_preserved: This code preserves 8-byte
37	   alignment in any callee.  */
38	.eabi_attribute 25, 1
39#endif /* __ARM_EABI__ */
40/* ------------------------------------------------------------------------ */
41
42/* We need to know what prefix to add to function names.  */
43
44#ifndef __USER_LABEL_PREFIX__
45#error  __USER_LABEL_PREFIX__ not defined
46#endif
47
48/* ANSI concatenation macros.  */
49
50#define CONCAT1(a, b) CONCAT2(a, b)
51#define CONCAT2(a, b) a ## b
52
53/* Use the right prefix for global labels.  */
54
55#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
56
57#ifdef __ELF__
58#ifdef __thumb__
59#define __PLT__  /* Not supported in Thumb assembler (for now).  */
60#elif defined __vxworks && !defined __PIC__
61#define __PLT__ /* Not supported by the kernel loader.  */
62#else
63#define __PLT__ (PLT)
64#endif
65#define TYPE(x) .type SYM(x),function
66#define SIZE(x) .size SYM(x), . - SYM(x)
67#define LSYM(x) .x
68#else
69#define __PLT__
70#define TYPE(x)
71#define SIZE(x)
72#define LSYM(x) x
73#endif
74
75/* Function end macros.  Variants for interworking.  */
76
77#if defined(__ARM_ARCH_2__)
78# define __ARM_ARCH__ 2
79#endif
80
81#if defined(__ARM_ARCH_3__)
82# define __ARM_ARCH__ 3
83#endif
84
85#if defined(__ARM_ARCH_3M__) || defined(__ARM_ARCH_4__) \
86	|| defined(__ARM_ARCH_4T__)
87/* We use __ARM_ARCH__ set to 4 here, but in reality it's any processor with
88   long multiply instructions.  That includes v3M.  */
89# define __ARM_ARCH__ 4
90#endif
91
92#if defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
93	|| defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
94	|| defined(__ARM_ARCH_5TEJ__)
95# define __ARM_ARCH__ 5
96#endif
97
98#if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
99	|| defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
100	|| defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
101	|| defined(__ARM_ARCH_6M__)
102# define __ARM_ARCH__ 6
103#endif
104
105#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
106	|| defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
107	|| defined(__ARM_ARCH_7EM__)
108# define __ARM_ARCH__ 7
109#endif
110
111#if defined(__ARM_ARCH_8A__)
112# define __ARM_ARCH__ 8
113#endif
114
115#ifndef __ARM_ARCH__
116#error Unable to determine architecture.
117#endif
118
119/* There are times when we might prefer Thumb1 code even if ARM code is
120   permitted, for example, the code might be smaller, or there might be
121   interworking problems with switching to ARM state if interworking is
122   disabled.  */
123#if (defined(__thumb__)			\
124     && !defined(__thumb2__)		\
125     && (!defined(__THUMB_INTERWORK__)	\
126	 || defined (__OPTIMIZE_SIZE__)	\
127	 || defined(__ARM_ARCH_6M__)))
128# define __prefer_thumb__
129#endif
130
131/* How to return from a function call depends on the architecture variant.  */
132
133#if (__ARM_ARCH__ > 4) || defined(__ARM_ARCH_4T__)
134
135# define RET		bx	lr
136# define RETc(x)	bx##x	lr
137
138/* Special precautions for interworking on armv4t.  */
139# if (__ARM_ARCH__ == 4)
140
141/* Always use bx, not ldr pc.  */
142#  if (defined(__thumb__) || defined(__THUMB_INTERWORK__))
143#    define __INTERWORKING__
144#   endif /* __THUMB__ || __THUMB_INTERWORK__ */
145
146/* Include thumb stub before arm mode code.  */
147#  if defined(__thumb__) && !defined(__THUMB_INTERWORK__)
148#   define __INTERWORKING_STUBS__
149#  endif /* __thumb__ && !__THUMB_INTERWORK__ */
150
151#endif /* __ARM_ARCH == 4 */
152
153#else
154
155# define RET		mov	pc, lr
156# define RETc(x)	mov##x	pc, lr
157
158#endif
159
160.macro	cfi_pop		advance, reg, cfa_offset
161#ifdef __ELF__
162	.pushsection	.debug_frame
163	.byte	0x4		/* DW_CFA_advance_loc4 */
164	.4byte	\advance
165	.byte	(0xc0 | \reg)	/* DW_CFA_restore */
166	.byte	0xe		/* DW_CFA_def_cfa_offset */
167	.uleb128 \cfa_offset
168	.popsection
169#endif
170.endm
171.macro	cfi_push	advance, reg, offset, cfa_offset
172#ifdef __ELF__
173	.pushsection	.debug_frame
174	.byte	0x4		/* DW_CFA_advance_loc4 */
175	.4byte	\advance
176	.byte	(0x80 | \reg)	/* DW_CFA_offset */
177	.uleb128 (\offset / -4)
178	.byte	0xe		/* DW_CFA_def_cfa_offset */
179	.uleb128 \cfa_offset
180	.popsection
181#endif
182.endm
183.macro cfi_start	start_label, end_label
184#ifdef __ELF__
185	.pushsection	.debug_frame
186LSYM(Lstart_frame):
187	.4byte	LSYM(Lend_cie) - LSYM(Lstart_cie) @ Length of CIE
188LSYM(Lstart_cie):
189        .4byte	0xffffffff	@ CIE Identifier Tag
190        .byte	0x1	@ CIE Version
191        .ascii	"\0"	@ CIE Augmentation
192        .uleb128 0x1	@ CIE Code Alignment Factor
193        .sleb128 -4	@ CIE Data Alignment Factor
194        .byte	0xe	@ CIE RA Column
195        .byte	0xc	@ DW_CFA_def_cfa
196        .uleb128 0xd
197        .uleb128 0x0
198
199	.align 2
200LSYM(Lend_cie):
201	.4byte	LSYM(Lend_fde)-LSYM(Lstart_fde)	@ FDE Length
202LSYM(Lstart_fde):
203	.4byte	LSYM(Lstart_frame)	@ FDE CIE offset
204	.4byte	\start_label	@ FDE initial location
205	.4byte	\end_label-\start_label	@ FDE address range
206	.popsection
207#endif
208.endm
209.macro cfi_end	end_label
210#ifdef __ELF__
211	.pushsection	.debug_frame
212	.align	2
213LSYM(Lend_fde):
214	.popsection
215\end_label:
216#endif
217.endm
218
219/* Don't pass dirn, it's there just to get token pasting right.  */
220
221.macro	RETLDM	regs=, cond=, unwind=, dirn=ia
222#if defined (__INTERWORKING__)
223	.ifc "\regs",""
224	ldr\cond	lr, [sp], #8
225	.else
226# if defined(__thumb2__)
227	pop\cond	{\regs, lr}
228# else
229	ldm\cond\dirn	sp!, {\regs, lr}
230# endif
231	.endif
232	.ifnc "\unwind", ""
233	/* Mark LR as restored.  */
23497:	cfi_pop 97b - \unwind, 0xe, 0x0
235	.endif
236	bx\cond	lr
237#else
238	/* Caller is responsible for providing IT instruction.  */
239	.ifc "\regs",""
240	ldr\cond	pc, [sp], #8
241	.else
242# if defined(__thumb2__)
243	pop\cond	{\regs, pc}
244# else
245	ldm\cond\dirn	sp!, {\regs, pc}
246# endif
247	.endif
248#endif
249.endm
250
251/* The Unified assembly syntax allows the same code to be assembled for both
252   ARM and Thumb-2.  However this is only supported by recent gas, so define
253   a set of macros to allow ARM code on older assemblers.  */
254#if defined(__thumb2__)
255.macro do_it cond, suffix=""
256	it\suffix	\cond
257.endm
258.macro shift1 op, arg0, arg1, arg2
259	\op	\arg0, \arg1, \arg2
260.endm
261#define do_push	push
262#define do_pop	pop
263#define COND(op1, op2, cond) op1 ## op2 ## cond
264/* Perform an arithmetic operation with a variable shift operand.  This
265   requires two instructions and a scratch register on Thumb-2.  */
266.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
267	\shiftop \tmp, \src2, \shiftreg
268	\name \dest, \src1, \tmp
269.endm
270#else
271.macro do_it cond, suffix=""
272.endm
273.macro shift1 op, arg0, arg1, arg2
274	mov	\arg0, \arg1, \op \arg2
275.endm
276#define do_push	stmfd sp!,
277#define do_pop	ldmfd sp!,
278#define COND(op1, op2, cond) op1 ## cond ## op2
279.macro shiftop name, dest, src1, src2, shiftop, shiftreg, tmp
280	\name \dest, \src1, \src2, \shiftop \shiftreg
281.endm
282#endif
283
284#ifdef __ARM_EABI__
285.macro ARM_LDIV0 name signed
286	cmp	r0, #0
287	.ifc	\signed, unsigned
288	movne	r0, #0xffffffff
289	.else
290	movgt	r0, #0x7fffffff
291	movlt	r0, #0x80000000
292	.endif
293	b	SYM (__aeabi_idiv0) __PLT__
294.endm
295#else
296.macro ARM_LDIV0 name signed
297	str	lr, [sp, #-8]!
29898:	cfi_push 98b - __\name, 0xe, -0x8, 0x8
299	bl	SYM (__div0) __PLT__
300	mov	r0, #0			@ About as wrong as it could be.
301	RETLDM	unwind=98b
302.endm
303#endif
304
305
306#ifdef __ARM_EABI__
307.macro THUMB_LDIV0 name signed
308#if defined(__ARM_ARCH_6M__)
309	.ifc \signed, unsigned
310	cmp	r0, #0
311	beq	1f
312	mov	r0, #0
313	mvn	r0, r0		@ 0xffffffff
3141:
315	.else
316	cmp	r0, #0
317	beq	2f
318	blt	3f
319	mov	r0, #0
320	mvn	r0, r0
321	lsr	r0, r0, #1	@ 0x7fffffff
322	b	2f
3233:	mov	r0, #0x80
324	lsl	r0, r0, #24	@ 0x80000000
3252:
326	.endif
327	push	{r0, r1, r2}
328	ldr	r0, 4f
329	adr	r1, 4f
330	add	r0, r1
331	str	r0, [sp, #8]
332	@ We know we are not on armv4t, so pop pc is safe.
333	pop	{r0, r1, pc}
334	.align	2
3354:
336	.word	__aeabi_idiv0 - 4b
337#elif defined(__thumb2__)
338	.syntax unified
339	.ifc \signed, unsigned
340	cbz	r0, 1f
341	mov	r0, #0xffffffff
3421:
343	.else
344	cmp	r0, #0
345	do_it	gt
346	movgt	r0, #0x7fffffff
347	do_it	lt
348	movlt	r0, #0x80000000
349	.endif
350	b.w	SYM(__aeabi_idiv0) __PLT__
351#else
352	.align	2
353	bx	pc
354	nop
355	.arm
356	cmp	r0, #0
357	.ifc	\signed, unsigned
358	movne	r0, #0xffffffff
359	.else
360	movgt	r0, #0x7fffffff
361	movlt	r0, #0x80000000
362	.endif
363	b	SYM(__aeabi_idiv0) __PLT__
364	.thumb
365#endif
366.endm
367#else
368.macro THUMB_LDIV0 name signed
369	push	{ r1, lr }
37098:	cfi_push 98b - __\name, 0xe, -0x4, 0x8
371	bl	SYM (__div0)
372	mov	r0, #0			@ About as wrong as it could be.
373#if defined (__INTERWORKING__)
374	pop	{ r1, r2 }
375	bx	r2
376#else
377	pop	{ r1, pc }
378#endif
379.endm
380#endif
381
382.macro FUNC_END name
383	SIZE (__\name)
384.endm
385
386.macro DIV_FUNC_END name signed
387	cfi_start	__\name, LSYM(Lend_div0)
388LSYM(Ldiv0):
389#ifdef __thumb__
390	THUMB_LDIV0 \name \signed
391#else
392	ARM_LDIV0 \name \signed
393#endif
394	cfi_end	LSYM(Lend_div0)
395	FUNC_END \name
396.endm
397
398.macro THUMB_FUNC_START name
399	.globl	SYM (\name)
400	TYPE	(\name)
401	.thumb_func
402SYM (\name):
403.endm
404
405/* Function start macros.  Variants for ARM and Thumb.  */
406
407#ifdef __thumb__
408#define THUMB_FUNC .thumb_func
409#define THUMB_CODE .force_thumb
410# if defined(__thumb2__)
411#define THUMB_SYNTAX .syntax divided
412# else
413#define THUMB_SYNTAX
414# endif
415#else
416#define THUMB_FUNC
417#define THUMB_CODE
418#define THUMB_SYNTAX
419#endif
420
421.macro FUNC_START name sp_section=
422  .ifc \sp_section, function_section
423	.section	.text.__\name,"ax",%progbits
424  .else
425	.text
426  .endif
427	.globl SYM (__\name)
428	TYPE (__\name)
429	.align 0
430	THUMB_CODE
431	THUMB_FUNC
432	THUMB_SYNTAX
433SYM (__\name):
434.endm
435
436.macro ARM_SYM_START name
437       TYPE (\name)
438       .align 0
439SYM (\name):
440.endm
441
442.macro SYM_END name
443       SIZE (\name)
444.endm
445
446/* Special function that will always be coded in ARM assembly, even if
447   in Thumb-only compilation.  */
448
449#if defined(__thumb2__)
450
451/* For Thumb-2 we build everything in thumb mode.  */
452.macro ARM_FUNC_START name sp_section=
453       FUNC_START \name \sp_section
454       .syntax unified
455.endm
456#define EQUIV .thumb_set
457.macro  ARM_CALL name
458	bl	__\name
459.endm
460
461#elif defined(__INTERWORKING_STUBS__)
462
463.macro	ARM_FUNC_START name
464	FUNC_START \name
465	bx	pc
466	nop
467	.arm
468/* A hook to tell gdb that we've switched to ARM mode.  Also used to call
469   directly from other local arm routines.  */
470_L__\name:
471.endm
472#define EQUIV .thumb_set
473/* Branch directly to a function declared with ARM_FUNC_START.
474   Must be called in arm mode.  */
475.macro  ARM_CALL name
476	bl	_L__\name
477.endm
478
479#else /* !(__INTERWORKING_STUBS__ || __thumb2__) */
480
481#ifdef __ARM_ARCH_6M__
482#define EQUIV .thumb_set
483#else
484.macro	ARM_FUNC_START name sp_section=
485  .ifc \sp_section, function_section
486	.section	.text.__\name,"ax",%progbits
487  .else
488	.text
489  .endif
490	.globl SYM (__\name)
491	TYPE (__\name)
492	.align 0
493	.arm
494SYM (__\name):
495.endm
496#define EQUIV .set
497.macro  ARM_CALL name
498	bl	__\name
499.endm
500#endif
501
502#endif
503
504.macro	FUNC_ALIAS new old
505	.globl	SYM (__\new)
506#if defined (__thumb__)
507	.thumb_set	SYM (__\new), SYM (__\old)
508#else
509	.set	SYM (__\new), SYM (__\old)
510#endif
511.endm
512
513#ifndef __ARM_ARCH_6M__
514.macro	ARM_FUNC_ALIAS new old
515	.globl	SYM (__\new)
516	EQUIV	SYM (__\new), SYM (__\old)
517#if defined(__INTERWORKING_STUBS__)
518	.set	SYM (_L__\new), SYM (_L__\old)
519#endif
520.endm
521#endif
522
523#ifdef __ARMEB__
524#define xxh r0
525#define xxl r1
526#define yyh r2
527#define yyl r3
528#else
529#define xxh r1
530#define xxl r0
531#define yyh r3
532#define yyl r2
533#endif
534
535#ifdef __ARM_EABI__
536.macro	WEAK name
537	.weak SYM (__\name)
538.endm
539#endif
540
541#ifdef __thumb__
542/* Register aliases.  */
543
544work		.req	r4	@ XXXX is this safe ?
545dividend	.req	r0
546divisor		.req	r1
547overdone	.req	r2
548result		.req	r2
549curbit		.req	r3
550#endif
551#if 0
552ip		.req	r12
553sp		.req	r13
554lr		.req	r14
555pc		.req	r15
556#endif
557
558/* ------------------------------------------------------------------------ */
559/*		Bodies of the division and modulo routines.		    */
560/* ------------------------------------------------------------------------ */
561.macro ARM_DIV_BODY dividend, divisor, result, curbit
562
563#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
564
565#if defined (__thumb2__)
566	clz	\curbit, \dividend
567	clz	\result, \divisor
568	sub	\curbit, \result, \curbit
569	rsb	\curbit, \curbit, #31
570	adr	\result, 1f
571	add	\curbit, \result, \curbit, lsl #4
572	mov	\result, #0
573	mov	pc, \curbit
574.p2align 3
5751:
576	.set	shift, 32
577	.rept	32
578	.set	shift, shift - 1
579	cmp.w	\dividend, \divisor, lsl #shift
580	nop.n
581	adc.w	\result, \result, \result
582	it	cs
583	subcs.w	\dividend, \dividend, \divisor, lsl #shift
584	.endr
585#else
586	clz	\curbit, \dividend
587	clz	\result, \divisor
588	sub	\curbit, \result, \curbit
589	rsbs	\curbit, \curbit, #31
590	addne	\curbit, \curbit, \curbit, lsl #1
591	mov	\result, #0
592	addne	pc, pc, \curbit, lsl #2
593	nop
594	.set	shift, 32
595	.rept	32
596	.set	shift, shift - 1
597	cmp	\dividend, \divisor, lsl #shift
598	adc	\result, \result, \result
599	subcs	\dividend, \dividend, \divisor, lsl #shift
600	.endr
601#endif
602
603#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
604#if __ARM_ARCH__ >= 5
605
606	clz	\curbit, \divisor
607	clz	\result, \dividend
608	sub	\result, \curbit, \result
609	mov	\curbit, #1
610	mov	\divisor, \divisor, lsl \result
611	mov	\curbit, \curbit, lsl \result
612	mov	\result, #0
613
614#else /* __ARM_ARCH__ < 5 */
615
616	@ Initially shift the divisor left 3 bits if possible,
617	@ set curbit accordingly.  This allows for curbit to be located
618	@ at the left end of each 4-bit nibbles in the division loop
619	@ to save one loop in most cases.
620	tst	\divisor, #0xe0000000
621	moveq	\divisor, \divisor, lsl #3
622	moveq	\curbit, #8
623	movne	\curbit, #1
624
625	@ Unless the divisor is very big, shift it up in multiples of
626	@ four bits, since this is the amount of unwinding in the main
627	@ division loop.  Continue shifting until the divisor is
628	@ larger than the dividend.
6291:	cmp	\divisor, #0x10000000
630	cmplo	\divisor, \dividend
631	movlo	\divisor, \divisor, lsl #4
632	movlo	\curbit, \curbit, lsl #4
633	blo	1b
634
635	@ For very big divisors, we must shift it a bit at a time, or
636	@ we will be in danger of overflowing.
6371:	cmp	\divisor, #0x80000000
638	cmplo	\divisor, \dividend
639	movlo	\divisor, \divisor, lsl #1
640	movlo	\curbit, \curbit, lsl #1
641	blo	1b
642
643	mov	\result, #0
644
645#endif /* __ARM_ARCH__ < 5 */
646
647	@ Division loop
6481:	cmp	\dividend, \divisor
649	do_it	hs, t
650	subhs	\dividend, \dividend, \divisor
651	orrhs	\result,   \result,   \curbit
652	cmp	\dividend, \divisor,  lsr #1
653	do_it	hs, t
654	subhs	\dividend, \dividend, \divisor, lsr #1
655	orrhs	\result,   \result,   \curbit,  lsr #1
656	cmp	\dividend, \divisor,  lsr #2
657	do_it	hs, t
658	subhs	\dividend, \dividend, \divisor, lsr #2
659	orrhs	\result,   \result,   \curbit,  lsr #2
660	cmp	\dividend, \divisor,  lsr #3
661	do_it	hs, t
662	subhs	\dividend, \dividend, \divisor, lsr #3
663	orrhs	\result,   \result,   \curbit,  lsr #3
664	cmp	\dividend, #0			@ Early termination?
665	do_it	ne, t
666	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
667	movne	\divisor,  \divisor, lsr #4
668	bne	1b
669
670#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
671
672.endm
673/* ------------------------------------------------------------------------ */
674.macro ARM_DIV2_ORDER divisor, order
675
676#if __ARM_ARCH__ >= 5
677
678	clz	\order, \divisor
679	rsb	\order, \order, #31
680
681#else
682
683	cmp	\divisor, #(1 << 16)
684	movhs	\divisor, \divisor, lsr #16
685	movhs	\order, #16
686	movlo	\order, #0
687
688	cmp	\divisor, #(1 << 8)
689	movhs	\divisor, \divisor, lsr #8
690	addhs	\order, \order, #8
691
692	cmp	\divisor, #(1 << 4)
693	movhs	\divisor, \divisor, lsr #4
694	addhs	\order, \order, #4
695
696	cmp	\divisor, #(1 << 2)
697	addhi	\order, \order, #3
698	addls	\order, \order, \divisor, lsr #1
699
700#endif
701
702.endm
703/* ------------------------------------------------------------------------ */
704.macro ARM_MOD_BODY dividend, divisor, order, spare
705
706#if __ARM_ARCH__ >= 5 && ! defined (__OPTIMIZE_SIZE__)
707
708	clz	\order, \divisor
709	clz	\spare, \dividend
710	sub	\order, \order, \spare
711	rsbs	\order, \order, #31
712	addne	pc, pc, \order, lsl #3
713	nop
714	.set	shift, 32
715	.rept	32
716	.set	shift, shift - 1
717	cmp	\dividend, \divisor, lsl #shift
718	subcs	\dividend, \dividend, \divisor, lsl #shift
719	.endr
720
721#else /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
722#if __ARM_ARCH__ >= 5
723
724	clz	\order, \divisor
725	clz	\spare, \dividend
726	sub	\order, \order, \spare
727	mov	\divisor, \divisor, lsl \order
728
729#else /* __ARM_ARCH__ < 5 */
730
731	mov	\order, #0
732
733	@ Unless the divisor is very big, shift it up in multiples of
734	@ four bits, since this is the amount of unwinding in the main
735	@ division loop.  Continue shifting until the divisor is
736	@ larger than the dividend.
7371:	cmp	\divisor, #0x10000000
738	cmplo	\divisor, \dividend
739	movlo	\divisor, \divisor, lsl #4
740	addlo	\order, \order, #4
741	blo	1b
742
743	@ For very big divisors, we must shift it a bit at a time, or
744	@ we will be in danger of overflowing.
7451:	cmp	\divisor, #0x80000000
746	cmplo	\divisor, \dividend
747	movlo	\divisor, \divisor, lsl #1
748	addlo	\order, \order, #1
749	blo	1b
750
751#endif /* __ARM_ARCH__ < 5 */
752
753	@ Perform all needed substractions to keep only the reminder.
754	@ Do comparisons in batch of 4 first.
755	subs	\order, \order, #3		@ yes, 3 is intended here
756	blt	2f
757
7581:	cmp	\dividend, \divisor
759	subhs	\dividend, \dividend, \divisor
760	cmp	\dividend, \divisor,  lsr #1
761	subhs	\dividend, \dividend, \divisor, lsr #1
762	cmp	\dividend, \divisor,  lsr #2
763	subhs	\dividend, \dividend, \divisor, lsr #2
764	cmp	\dividend, \divisor,  lsr #3
765	subhs	\dividend, \dividend, \divisor, lsr #3
766	cmp	\dividend, #1
767	mov	\divisor, \divisor, lsr #4
768	subges	\order, \order, #4
769	bge	1b
770
771	tst	\order, #3
772	teqne	\dividend, #0
773	beq	5f
774
775	@ Either 1, 2 or 3 comparison/substractions are left.
7762:	cmn	\order, #2
777	blt	4f
778	beq	3f
779	cmp	\dividend, \divisor
780	subhs	\dividend, \dividend, \divisor
781	mov	\divisor,  \divisor,  lsr #1
7823:	cmp	\dividend, \divisor
783	subhs	\dividend, \dividend, \divisor
784	mov	\divisor,  \divisor,  lsr #1
7854:	cmp	\dividend, \divisor
786	subhs	\dividend, \dividend, \divisor
7875:
788
789#endif /* __ARM_ARCH__ < 5 || defined (__OPTIMIZE_SIZE__) */
790
791.endm
792/* ------------------------------------------------------------------------ */
793.macro THUMB_DIV_MOD_BODY modulo
794	@ Load the constant 0x10000000 into our work register.
795	mov	work, #1
796	lsl	work, #28
797LSYM(Loop1):
798	@ Unless the divisor is very big, shift it up in multiples of
799	@ four bits, since this is the amount of unwinding in the main
800	@ division loop.  Continue shifting until the divisor is
801	@ larger than the dividend.
802	cmp	divisor, work
803	bhs	LSYM(Lbignum)
804	cmp	divisor, dividend
805	bhs	LSYM(Lbignum)
806	lsl	divisor, #4
807	lsl	curbit,  #4
808	b	LSYM(Loop1)
809LSYM(Lbignum):
810	@ Set work to 0x80000000
811	lsl	work, #3
812LSYM(Loop2):
813	@ For very big divisors, we must shift it a bit at a time, or
814	@ we will be in danger of overflowing.
815	cmp	divisor, work
816	bhs	LSYM(Loop3)
817	cmp	divisor, dividend
818	bhs	LSYM(Loop3)
819	lsl	divisor, #1
820	lsl	curbit,  #1
821	b	LSYM(Loop2)
822LSYM(Loop3):
823	@ Test for possible subtractions ...
824  .if \modulo
825	@ ... On the final pass, this may subtract too much from the dividend,
826	@ so keep track of which subtractions are done, we can fix them up
827	@ afterwards.
828	mov	overdone, #0
829	cmp	dividend, divisor
830	blo	LSYM(Lover1)
831	sub	dividend, dividend, divisor
832LSYM(Lover1):
833	lsr	work, divisor, #1
834	cmp	dividend, work
835	blo	LSYM(Lover2)
836	sub	dividend, dividend, work
837	mov	ip, curbit
838	mov	work, #1
839	ror	curbit, work
840	orr	overdone, curbit
841	mov	curbit, ip
842LSYM(Lover2):
843	lsr	work, divisor, #2
844	cmp	dividend, work
845	blo	LSYM(Lover3)
846	sub	dividend, dividend, work
847	mov	ip, curbit
848	mov	work, #2
849	ror	curbit, work
850	orr	overdone, curbit
851	mov	curbit, ip
852LSYM(Lover3):
853	lsr	work, divisor, #3
854	cmp	dividend, work
855	blo	LSYM(Lover4)
856	sub	dividend, dividend, work
857	mov	ip, curbit
858	mov	work, #3
859	ror	curbit, work
860	orr	overdone, curbit
861	mov	curbit, ip
862LSYM(Lover4):
863	mov	ip, curbit
864  .else
865	@ ... and note which bits are done in the result.  On the final pass,
866	@ this may subtract too much from the dividend, but the result will be ok,
867	@ since the "bit" will have been shifted out at the bottom.
868	cmp	dividend, divisor
869	blo	LSYM(Lover1)
870	sub	dividend, dividend, divisor
871	orr	result, result, curbit
872LSYM(Lover1):
873	lsr	work, divisor, #1
874	cmp	dividend, work
875	blo	LSYM(Lover2)
876	sub	dividend, dividend, work
877	lsr	work, curbit, #1
878	orr	result, work
879LSYM(Lover2):
880	lsr	work, divisor, #2
881	cmp	dividend, work
882	blo	LSYM(Lover3)
883	sub	dividend, dividend, work
884	lsr	work, curbit, #2
885	orr	result, work
886LSYM(Lover3):
887	lsr	work, divisor, #3
888	cmp	dividend, work
889	blo	LSYM(Lover4)
890	sub	dividend, dividend, work
891	lsr	work, curbit, #3
892	orr	result, work
893LSYM(Lover4):
894  .endif
895
896	cmp	dividend, #0			@ Early termination?
897	beq	LSYM(Lover5)
898	lsr	curbit,  #4			@ No, any more bits to do?
899	beq	LSYM(Lover5)
900	lsr	divisor, #4
901	b	LSYM(Loop3)
902LSYM(Lover5):
903  .if \modulo
904	@ Any subtractions that we should not have done will be recorded in
905	@ the top three bits of "overdone".  Exactly which were not needed
906	@ are governed by the position of the bit, stored in ip.
907	mov	work, #0xe
908	lsl	work, #28
909	and	overdone, work
910	beq	LSYM(Lgot_result)
911
912	@ If we terminated early, because dividend became zero, then the
913	@ bit in ip will not be in the bottom nibble, and we should not
914	@ perform the additions below.  We must test for this though
915	@ (rather relying upon the TSTs to prevent the additions) since
916	@ the bit in ip could be in the top two bits which might then match
917	@ with one of the smaller RORs.
918	mov	curbit, ip
919	mov	work, #0x7
920	tst	curbit, work
921	beq	LSYM(Lgot_result)
922
923	mov	curbit, ip
924	mov	work, #3
925	ror	curbit, work
926	tst	overdone, curbit
927	beq	LSYM(Lover6)
928	lsr	work, divisor, #3
929	add	dividend, work
930LSYM(Lover6):
931	mov	curbit, ip
932	mov	work, #2
933	ror	curbit, work
934	tst	overdone, curbit
935	beq	LSYM(Lover7)
936	lsr	work, divisor, #2
937	add	dividend, work
938LSYM(Lover7):
939	mov	curbit, ip
940	mov	work, #1
941	ror	curbit, work
942	tst	overdone, curbit
943	beq	LSYM(Lgot_result)
944	lsr	work, divisor, #1
945	add	dividend, work
946  .endif
947LSYM(Lgot_result):
948.endm
949/* ------------------------------------------------------------------------ */
950/*		Start of the Real Functions				    */
951/* ------------------------------------------------------------------------ */
952#ifdef L_udivsi3
953
954#if defined(__prefer_thumb__)
955
956	FUNC_START udivsi3
957	FUNC_ALIAS aeabi_uidiv udivsi3
958
959	cmp	divisor, #0
960	beq	LSYM(Ldiv0)
961LSYM(udivsi3_skip_div0_test):
962	mov	curbit, #1
963	mov	result, #0
964
965	push	{ work }
966	cmp	dividend, divisor
967	blo	LSYM(Lgot_result)
968
969	THUMB_DIV_MOD_BODY 0
970
971	mov	r0, result
972	pop	{ work }
973	RET
974
975#elif defined(__ARM_ARCH_EXT_IDIV__)
976
977	ARM_FUNC_START udivsi3
978	ARM_FUNC_ALIAS aeabi_uidiv udivsi3
979
980	cmp	r1, #0
981	beq	LSYM(Ldiv0)
982
983	udiv	r0, r0, r1
984	RET
985
986#else /* ARM version/Thumb-2.  */
987
988	ARM_FUNC_START udivsi3
989	ARM_FUNC_ALIAS aeabi_uidiv udivsi3
990
991	/* Note: if called via udivsi3_skip_div0_test, this will unnecessarily
992	   check for division-by-zero a second time.  */
993LSYM(udivsi3_skip_div0_test):
994	subs	r2, r1, #1
995	do_it	eq
996	RETc(eq)
997	bcc	LSYM(Ldiv0)
998	cmp	r0, r1
999	bls	11f
1000	tst	r1, r2
1001	beq	12f
1002
1003	ARM_DIV_BODY r0, r1, r2, r3
1004
1005	mov	r0, r2
1006	RET
1007
100811:	do_it	eq, e
1009	moveq	r0, #1
1010	movne	r0, #0
1011	RET
1012
101312:	ARM_DIV2_ORDER r1, r2
1014
1015	mov	r0, r0, lsr r2
1016	RET
1017
1018#endif /* ARM version */
1019
1020	DIV_FUNC_END udivsi3 unsigned
1021
1022#if defined(__prefer_thumb__)
1023FUNC_START aeabi_uidivmod
1024	cmp	r1, #0
1025	beq	LSYM(Ldiv0)
1026	push	{r0, r1, lr}
1027	bl	LSYM(udivsi3_skip_div0_test)
1028	POP	{r1, r2, r3}
1029	mul	r2, r0
1030	sub	r1, r1, r2
1031	bx	r3
1032#elif defined(__ARM_ARCH_EXT_IDIV__)
1033ARM_FUNC_START aeabi_uidivmod
1034	cmp	r1, #0
1035	beq	LSYM(Ldiv0)
1036	mov     r2, r0
1037	udiv	r0, r0, r1
1038	mls     r1, r0, r1, r2
1039	RET
1040#else
1041ARM_FUNC_START aeabi_uidivmod
1042	cmp	r1, #0
1043	beq	LSYM(Ldiv0)
1044	stmfd	sp!, { r0, r1, lr }
1045	bl	LSYM(udivsi3_skip_div0_test)
1046	ldmfd	sp!, { r1, r2, lr }
1047	mul	r3, r2, r0
1048	sub	r1, r1, r3
1049	RET
1050#endif
1051	FUNC_END aeabi_uidivmod
1052
1053#endif /* L_udivsi3 */
1054/* ------------------------------------------------------------------------ */
1055#ifdef L_umodsi3
1056
1057#ifdef __ARM_ARCH_EXT_IDIV__
1058
1059	ARM_FUNC_START umodsi3
1060
1061	cmp	r1, #0
1062	beq	LSYM(Ldiv0)
1063	udiv	r2, r0, r1
1064	mls     r0, r1, r2, r0
1065	RET
1066
1067#elif defined(__thumb__)
1068
1069	FUNC_START umodsi3
1070
1071	cmp	divisor, #0
1072	beq	LSYM(Ldiv0)
1073	mov	curbit, #1
1074	cmp	dividend, divisor
1075	bhs	LSYM(Lover10)
1076	RET
1077
1078LSYM(Lover10):
1079	push	{ work }
1080
1081	THUMB_DIV_MOD_BODY 1
1082
1083	pop	{ work }
1084	RET
1085
1086#else  /* ARM version.  */
1087
1088	FUNC_START umodsi3
1089
1090	subs	r2, r1, #1			@ compare divisor with 1
1091	bcc	LSYM(Ldiv0)
1092	cmpne	r0, r1				@ compare dividend with divisor
1093	moveq   r0, #0
1094	tsthi	r1, r2				@ see if divisor is power of 2
1095	andeq	r0, r0, r2
1096	RETc(ls)
1097
1098	ARM_MOD_BODY r0, r1, r2, r3
1099
1100	RET
1101
1102#endif /* ARM version.  */
1103
1104	DIV_FUNC_END umodsi3 unsigned
1105
1106#endif /* L_umodsi3 */
1107/* ------------------------------------------------------------------------ */
1108#ifdef L_divsi3
1109
1110#if defined(__prefer_thumb__)
1111
1112	FUNC_START divsi3
1113	FUNC_ALIAS aeabi_idiv divsi3
1114
1115	cmp	divisor, #0
1116	beq	LSYM(Ldiv0)
1117LSYM(divsi3_skip_div0_test):
1118	push	{ work }
1119	mov	work, dividend
1120	eor	work, divisor		@ Save the sign of the result.
1121	mov	ip, work
1122	mov	curbit, #1
1123	mov	result, #0
1124	cmp	divisor, #0
1125	bpl	LSYM(Lover10)
1126	neg	divisor, divisor	@ Loops below use unsigned.
1127LSYM(Lover10):
1128	cmp	dividend, #0
1129	bpl	LSYM(Lover11)
1130	neg	dividend, dividend
1131LSYM(Lover11):
1132	cmp	dividend, divisor
1133	blo	LSYM(Lgot_result)
1134
1135	THUMB_DIV_MOD_BODY 0
1136
1137	mov	r0, result
1138	mov	work, ip
1139	cmp	work, #0
1140	bpl	LSYM(Lover12)
1141	neg	r0, r0
1142LSYM(Lover12):
1143	pop	{ work }
1144	RET
1145
1146#elif defined(__ARM_ARCH_EXT_IDIV__)
1147
1148	ARM_FUNC_START divsi3
1149	ARM_FUNC_ALIAS aeabi_idiv divsi3
1150
1151	cmp 	r1, #0
1152	beq	LSYM(Ldiv0)
1153	sdiv	r0, r0, r1
1154	RET
1155
1156#else /* ARM/Thumb-2 version.  */
1157
1158	ARM_FUNC_START divsi3
1159	ARM_FUNC_ALIAS aeabi_idiv divsi3
1160
1161	cmp	r1, #0
1162	beq	LSYM(Ldiv0)
1163LSYM(divsi3_skip_div0_test):
1164	eor	ip, r0, r1			@ save the sign of the result.
1165	do_it	mi
1166	rsbmi	r1, r1, #0			@ loops below use unsigned.
1167	subs	r2, r1, #1			@ division by 1 or -1 ?
1168	beq	10f
1169	movs	r3, r0
1170	do_it	mi
1171	rsbmi	r3, r0, #0			@ positive dividend value
1172	cmp	r3, r1
1173	bls	11f
1174	tst	r1, r2				@ divisor is power of 2 ?
1175	beq	12f
1176
1177	ARM_DIV_BODY r3, r1, r0, r2
1178
1179	cmp	ip, #0
1180	do_it	mi
1181	rsbmi	r0, r0, #0
1182	RET
1183
118410:	teq	ip, r0				@ same sign ?
1185	do_it	mi
1186	rsbmi	r0, r0, #0
1187	RET
1188
118911:	do_it	lo
1190	movlo	r0, #0
1191	do_it	eq,t
1192	moveq	r0, ip, asr #31
1193	orreq	r0, r0, #1
1194	RET
1195
119612:	ARM_DIV2_ORDER r1, r2
1197
1198	cmp	ip, #0
1199	mov	r0, r3, lsr r2
1200	do_it	mi
1201	rsbmi	r0, r0, #0
1202	RET
1203
1204#endif /* ARM version */
1205
1206	DIV_FUNC_END divsi3 signed
1207
1208#if defined(__prefer_thumb__)
1209FUNC_START aeabi_idivmod
1210	cmp	r1, #0
1211	beq	LSYM(Ldiv0)
1212	push	{r0, r1, lr}
1213	bl	LSYM(divsi3_skip_div0_test)
1214	POP	{r1, r2, r3}
1215	mul	r2, r0
1216	sub	r1, r1, r2
1217	bx	r3
1218#elif defined(__ARM_ARCH_EXT_IDIV__)
1219ARM_FUNC_START aeabi_idivmod
1220	cmp 	r1, #0
1221	beq	LSYM(Ldiv0)
1222	mov     r2, r0
1223	sdiv	r0, r0, r1
1224	mls     r1, r0, r1, r2
1225	RET
1226#else
1227ARM_FUNC_START aeabi_idivmod
1228	cmp	r1, #0
1229	beq	LSYM(Ldiv0)
1230	stmfd	sp!, { r0, r1, lr }
1231	bl	LSYM(divsi3_skip_div0_test)
1232	ldmfd	sp!, { r1, r2, lr }
1233	mul	r3, r2, r0
1234	sub	r1, r1, r3
1235	RET
1236#endif
1237	FUNC_END aeabi_idivmod
1238
1239#endif /* L_divsi3 */
1240/* ------------------------------------------------------------------------ */
1241#ifdef L_modsi3
1242
1243#if defined(__ARM_ARCH_EXT_IDIV__)
1244
1245	ARM_FUNC_START modsi3
1246
1247	cmp	r1, #0
1248	beq	LSYM(Ldiv0)
1249
1250	sdiv	r2, r0, r1
1251	mls     r0, r1, r2, r0
1252	RET
1253
1254#elif defined(__thumb__)
1255
1256	FUNC_START modsi3
1257
1258	mov	curbit, #1
1259	cmp	divisor, #0
1260	beq	LSYM(Ldiv0)
1261	bpl	LSYM(Lover10)
1262	neg	divisor, divisor		@ Loops below use unsigned.
1263LSYM(Lover10):
1264	push	{ work }
1265	@ Need to save the sign of the dividend, unfortunately, we need
1266	@ work later on.  Must do this after saving the original value of
1267	@ the work register, because we will pop this value off first.
1268	push	{ dividend }
1269	cmp	dividend, #0
1270	bpl	LSYM(Lover11)
1271	neg	dividend, dividend
1272LSYM(Lover11):
1273	cmp	dividend, divisor
1274	blo	LSYM(Lgot_result)
1275
1276	THUMB_DIV_MOD_BODY 1
1277
1278	pop	{ work }
1279	cmp	work, #0
1280	bpl	LSYM(Lover12)
1281	neg	dividend, dividend
1282LSYM(Lover12):
1283	pop	{ work }
1284	RET
1285
1286#else /* ARM version.  */
1287
1288	FUNC_START modsi3
1289
1290	cmp	r1, #0
1291	beq	LSYM(Ldiv0)
1292	rsbmi	r1, r1, #0			@ loops below use unsigned.
1293	movs	ip, r0				@ preserve sign of dividend
1294	rsbmi	r0, r0, #0			@ if negative make positive
1295	subs	r2, r1, #1			@ compare divisor with 1
1296	cmpne	r0, r1				@ compare dividend with divisor
1297	moveq	r0, #0
1298	tsthi	r1, r2				@ see if divisor is power of 2
1299	andeq	r0, r0, r2
1300	bls	10f
1301
1302	ARM_MOD_BODY r0, r1, r2, r3
1303
130410:	cmp	ip, #0
1305	rsbmi	r0, r0, #0
1306	RET
1307
1308#endif /* ARM version */
1309
1310	DIV_FUNC_END modsi3 signed
1311
1312#endif /* L_modsi3 */
1313/* ------------------------------------------------------------------------ */
1314#ifdef L_dvmd_tls
1315
1316#ifdef __ARM_EABI__
1317	WEAK aeabi_idiv0
1318	WEAK aeabi_ldiv0
1319	FUNC_START aeabi_idiv0
1320	FUNC_START aeabi_ldiv0
1321	RET
1322	FUNC_END aeabi_ldiv0
1323	FUNC_END aeabi_idiv0
1324#else
1325	FUNC_START div0
1326	RET
1327	FUNC_END div0
1328#endif
1329
1330#endif /* L_divmodsi_tools */
1331/* ------------------------------------------------------------------------ */
1332#ifdef L_dvmd_lnx
1333@ GNU/Linux division-by zero handler.  Used in place of L_dvmd_tls
1334
1335/* Constant taken from <asm/signal.h>.  */
1336#define SIGFPE	8
1337
1338#ifdef __ARM_EABI__
1339	cfi_start	__aeabi_ldiv0, LSYM(Lend_aeabi_ldiv0)
1340	WEAK aeabi_idiv0
1341	WEAK aeabi_ldiv0
1342	ARM_FUNC_START aeabi_idiv0
1343	ARM_FUNC_START aeabi_ldiv0
1344	do_push	{r1, lr}
134598:	cfi_push 98b - __aeabi_ldiv0, 0xe, -0x4, 0x8
1346#else
1347	cfi_start	__div0, LSYM(Lend_div0)
1348	ARM_FUNC_START div0
1349	do_push	{r1, lr}
135098:	cfi_push 98b - __div0, 0xe, -0x4, 0x8
1351#endif
1352
1353	mov	r0, #SIGFPE
1354	bl	SYM(raise) __PLT__
1355	RETLDM	r1 unwind=98b
1356
1357#ifdef __ARM_EABI__
1358	cfi_end	LSYM(Lend_aeabi_ldiv0)
1359	FUNC_END aeabi_ldiv0
1360	FUNC_END aeabi_idiv0
1361#else
1362	cfi_end	LSYM(Lend_div0)
1363	FUNC_END div0
1364#endif
1365
1366#endif /* L_dvmd_lnx */
1367#ifdef L_clear_cache
1368#if defined __ARM_EABI__ && defined __linux__
1369@ EABI GNU/Linux call to cacheflush syscall.
1370	ARM_FUNC_START clear_cache
1371	do_push	{r7}
1372#if __ARM_ARCH__ >= 7 || defined(__ARM_ARCH_6T2__)
1373	movw	r7, #2
1374	movt	r7, #0xf
1375#else
1376	mov	r7, #0xf0000
1377	add	r7, r7, #2
1378#endif
1379	mov	r2, #0
1380	swi	0
1381	do_pop	{r7}
1382	RET
1383	FUNC_END clear_cache
1384#else
1385#error "This is only for ARM EABI GNU/Linux"
1386#endif
1387#endif /* L_clear_cache */
1388/* ------------------------------------------------------------------------ */
1389/* Dword shift operations.  */
1390/* All the following Dword shift variants rely on the fact that
1391	shft xxx, Reg
1392   is in fact done as
1393	shft xxx, (Reg & 255)
1394   so for Reg value in (32...63) and (-1...-31) we will get zero (in the
1395   case of logical shifts) or the sign (for asr).  */
1396
1397#ifdef __ARMEB__
1398#define al	r1
1399#define ah	r0
1400#else
1401#define al	r0
1402#define ah	r1
1403#endif
1404
1405/* Prevent __aeabi double-word shifts from being produced on SymbianOS.  */
1406#ifndef __symbian__
1407
1408#ifdef L_lshrdi3
1409
1410	FUNC_START lshrdi3
1411	FUNC_ALIAS aeabi_llsr lshrdi3
1412
1413#ifdef __thumb__
1414	lsr	al, r2
1415	mov	r3, ah
1416	lsr	ah, r2
1417	mov	ip, r3
1418	sub	r2, #32
1419	lsr	r3, r2
1420	orr	al, r3
1421	neg	r2, r2
1422	mov	r3, ip
1423	lsl	r3, r2
1424	orr	al, r3
1425	RET
1426#else
1427	subs	r3, r2, #32
1428	rsb	ip, r2, #32
1429	movmi	al, al, lsr r2
1430	movpl	al, ah, lsr r3
1431	orrmi	al, al, ah, lsl ip
1432	mov	ah, ah, lsr r2
1433	RET
1434#endif
1435	FUNC_END aeabi_llsr
1436	FUNC_END lshrdi3
1437
1438#endif
1439
1440#ifdef L_ashrdi3
1441
1442	FUNC_START ashrdi3
1443	FUNC_ALIAS aeabi_lasr ashrdi3
1444
1445#ifdef __thumb__
1446	lsr	al, r2
1447	mov	r3, ah
1448	asr	ah, r2
1449	sub	r2, #32
1450	@ If r2 is negative at this point the following step would OR
1451	@ the sign bit into all of AL.  That's not what we want...
1452	bmi	1f
1453	mov	ip, r3
1454	asr	r3, r2
1455	orr	al, r3
1456	mov	r3, ip
14571:
1458	neg	r2, r2
1459	lsl	r3, r2
1460	orr	al, r3
1461	RET
1462#else
1463	subs	r3, r2, #32
1464	rsb	ip, r2, #32
1465	movmi	al, al, lsr r2
1466	movpl	al, ah, asr r3
1467	orrmi	al, al, ah, lsl ip
1468	mov	ah, ah, asr r2
1469	RET
1470#endif
1471
1472	FUNC_END aeabi_lasr
1473	FUNC_END ashrdi3
1474
1475#endif
1476
1477#ifdef L_ashldi3
1478
1479	FUNC_START ashldi3
1480	FUNC_ALIAS aeabi_llsl ashldi3
1481
1482#ifdef __thumb__
1483	lsl	ah, r2
1484	mov	r3, al
1485	lsl	al, r2
1486	mov	ip, r3
1487	sub	r2, #32
1488	lsl	r3, r2
1489	orr	ah, r3
1490	neg	r2, r2
1491	mov	r3, ip
1492	lsr	r3, r2
1493	orr	ah, r3
1494	RET
1495#else
1496	subs	r3, r2, #32
1497	rsb	ip, r2, #32
1498	movmi	ah, ah, lsl r2
1499	movpl	ah, al, lsl r3
1500	orrmi	ah, ah, al, lsr ip
1501	mov	al, al, lsl r2
1502	RET
1503#endif
1504	FUNC_END aeabi_llsl
1505	FUNC_END ashldi3
1506
1507#endif
1508
1509#endif /* __symbian__ */
1510
1511#if ((__ARM_ARCH__ > 5) && !defined(__ARM_ARCH_6M__)) \
1512    || defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
1513    || defined(__ARM_ARCH_5TEJ__)
1514#define HAVE_ARM_CLZ 1
1515#endif
1516
1517#ifdef L_clzsi2
1518#if defined(__ARM_ARCH_6M__)
1519FUNC_START clzsi2
1520	mov	r1, #28
1521	mov	r3, #1
1522	lsl	r3, r3, #16
1523	cmp	r0, r3 /* 0x10000 */
1524	bcc	2f
1525	lsr	r0, r0, #16
1526	sub	r1, r1, #16
15272:	lsr	r3, r3, #8
1528	cmp	r0, r3 /* #0x100 */
1529	bcc	2f
1530	lsr	r0, r0, #8
1531	sub	r1, r1, #8
15322:	lsr	r3, r3, #4
1533	cmp	r0, r3 /* #0x10 */
1534	bcc	2f
1535	lsr	r0, r0, #4
1536	sub	r1, r1, #4
15372:	adr	r2, 1f
1538	ldrb	r0, [r2, r0]
1539	add	r0, r0, r1
1540	bx lr
1541.align 2
15421:
1543.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1544	FUNC_END clzsi2
1545#else
1546ARM_FUNC_START clzsi2
1547# if defined(HAVE_ARM_CLZ)
1548	clz	r0, r0
1549	RET
1550# else
1551	mov	r1, #28
1552	cmp	r0, #0x10000
1553	do_it	cs, t
1554	movcs	r0, r0, lsr #16
1555	subcs	r1, r1, #16
1556	cmp	r0, #0x100
1557	do_it	cs, t
1558	movcs	r0, r0, lsr #8
1559	subcs	r1, r1, #8
1560	cmp	r0, #0x10
1561	do_it	cs, t
1562	movcs	r0, r0, lsr #4
1563	subcs	r1, r1, #4
1564	adr	r2, 1f
1565	ldrb	r0, [r2, r0]
1566	add	r0, r0, r1
1567	RET
1568.align 2
15691:
1570.byte 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
1571# endif /* !HAVE_ARM_CLZ */
1572	FUNC_END clzsi2
1573#endif
1574#endif /* L_clzsi2 */
1575
1576#ifdef L_clzdi2
1577#if !defined(HAVE_ARM_CLZ)
1578
1579# if defined(__ARM_ARCH_6M__)
1580FUNC_START clzdi2
1581	push	{r4, lr}
1582# else
1583ARM_FUNC_START clzdi2
1584	do_push	{r4, lr}
1585# endif
1586	cmp	xxh, #0
1587	bne	1f
1588# ifdef __ARMEB__
1589	mov	r0, xxl
1590	bl	__clzsi2
1591	add	r0, r0, #32
1592	b 2f
15931:
1594	bl	__clzsi2
1595# else
1596	bl	__clzsi2
1597	add	r0, r0, #32
1598	b 2f
15991:
1600	mov	r0, xxh
1601	bl	__clzsi2
1602# endif
16032:
1604# if defined(__ARM_ARCH_6M__)
1605	pop	{r4, pc}
1606# else
1607	RETLDM	r4
1608# endif
1609	FUNC_END clzdi2
1610
1611#else /* HAVE_ARM_CLZ */
1612
1613ARM_FUNC_START clzdi2
1614	cmp	xxh, #0
1615	do_it	eq, et
1616	clzeq	r0, xxl
1617	clzne	r0, xxh
1618	addeq	r0, r0, #32
1619	RET
1620	FUNC_END clzdi2
1621
1622#endif
1623#endif /* L_clzdi2 */
1624
1625#ifdef L_ctzsi2
1626#if defined(__ARM_ARCH_6M__)
1627FUNC_START ctzsi2
1628	neg	r1, r0
1629	and	r0, r0, r1
1630	mov	r1, #28
1631	mov	r3, #1
1632	lsl	r3, r3, #16
1633	cmp	r0, r3 /* 0x10000 */
1634	bcc	2f
1635	lsr	r0, r0, #16
1636	sub	r1, r1, #16
16372:	lsr	r3, r3, #8
1638	cmp	r0, r3 /* #0x100 */
1639	bcc	2f
1640	lsr	r0, r0, #8
1641	sub	r1, r1, #8
16422:	lsr	r3, r3, #4
1643	cmp	r0, r3 /* #0x10 */
1644	bcc	2f
1645	lsr	r0, r0, #4
1646	sub	r1, r1, #4
16472:	adr	r2, 1f
1648	ldrb	r0, [r2, r0]
1649	sub	r0, r0, r1
1650	bx lr
1651.align 2
16521:
1653.byte	27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1654	FUNC_END ctzsi2
1655#else
1656ARM_FUNC_START ctzsi2
1657	rsb	r1, r0, #0
1658	and	r0, r0, r1
1659# if defined(HAVE_ARM_CLZ)
1660	clz	r0, r0
1661	rsb	r0, r0, #31
1662	RET
1663# else
1664	mov	r1, #28
1665	cmp	r0, #0x10000
1666	do_it	cs, t
1667	movcs	r0, r0, lsr #16
1668	subcs	r1, r1, #16
1669	cmp	r0, #0x100
1670	do_it	cs, t
1671	movcs	r0, r0, lsr #8
1672	subcs	r1, r1, #8
1673	cmp	r0, #0x10
1674	do_it	cs, t
1675	movcs	r0, r0, lsr #4
1676	subcs	r1, r1, #4
1677	adr	r2, 1f
1678	ldrb	r0, [r2, r0]
1679	sub	r0, r0, r1
1680	RET
1681.align 2
16821:
1683.byte	27, 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31, 31, 31, 31
1684# endif /* !HAVE_ARM_CLZ */
1685	FUNC_END ctzsi2
1686#endif
1687#endif /* L_clzsi2 */
1688
1689/* ------------------------------------------------------------------------ */
1690/* These next two sections are here despite the fact that they contain Thumb
1691   assembler because their presence allows interworked code to be linked even
1692   when the GCC library is this one.  */
1693
1694/* Do not build the interworking functions when the target architecture does
1695   not support Thumb instructions.  (This can be a multilib option).  */
1696#if defined __ARM_ARCH_4T__ || defined __ARM_ARCH_5T__\
1697      || defined __ARM_ARCH_5TE__ || defined __ARM_ARCH_5TEJ__ \
1698      || __ARM_ARCH__ >= 6
1699
1700#if defined L_call_via_rX
1701
1702/* These labels & instructions are used by the Arm/Thumb interworking code.
1703   The address of function to be called is loaded into a register and then
1704   one of these labels is called via a BL instruction.  This puts the
1705   return address into the link register with the bottom bit set, and the
1706   code here switches to the correct mode before executing the function.  */
1707
1708	.text
1709	.align 0
1710        .force_thumb
1711
1712.macro call_via register
1713	THUMB_FUNC_START _call_via_\register
1714
1715	bx	\register
1716	nop
1717
1718	SIZE	(_call_via_\register)
1719.endm
1720
1721	call_via r0
1722	call_via r1
1723	call_via r2
1724	call_via r3
1725	call_via r4
1726	call_via r5
1727	call_via r6
1728	call_via r7
1729	call_via r8
1730	call_via r9
1731	call_via sl
1732	call_via fp
1733	call_via ip
1734	call_via sp
1735	call_via lr
1736
1737#endif /* L_call_via_rX */
1738
1739/* Don't bother with the old interworking routines for Thumb-2.  */
1740/* ??? Maybe only omit these on "m" variants.  */
1741#if !defined(__thumb2__) && !defined(__ARM_ARCH_6M__)
1742
1743#if defined L_interwork_call_via_rX
1744
1745/* These labels & instructions are used by the Arm/Thumb interworking code,
1746   when the target address is in an unknown instruction set.  The address
1747   of function to be called is loaded into a register and then one of these
1748   labels is called via a BL instruction.  This puts the return address
1749   into the link register with the bottom bit set, and the code here
1750   switches to the correct mode before executing the function.  Unfortunately
1751   the target code cannot be relied upon to return via a BX instruction, so
1752   instead we have to store the resturn address on the stack and allow the
1753   called function to return here instead.  Upon return we recover the real
1754   return address and use a BX to get back to Thumb mode.
1755
1756   There are three variations of this code.  The first,
1757   _interwork_call_via_rN(), will push the return address onto the
1758   stack and pop it in _arm_return().  It should only be used if all
1759   arguments are passed in registers.
1760
1761   The second, _interwork_r7_call_via_rN(), instead stores the return
1762   address at [r7, #-4].  It is the caller's responsibility to ensure
1763   that this address is valid and contains no useful data.
1764
1765   The third, _interwork_r11_call_via_rN(), works in the same way but
1766   uses r11 instead of r7.  It is useful if the caller does not really
1767   need a frame pointer.  */
1768
1769	.text
1770	.align 0
1771
1772	.code   32
1773	.globl _arm_return
1774LSYM(Lstart_arm_return):
1775	cfi_start	LSYM(Lstart_arm_return) LSYM(Lend_arm_return)
1776	cfi_push	0, 0xe, -0x8, 0x8
1777	nop	@ This nop is for the benefit of debuggers, so that
1778		@ backtraces will use the correct unwind information.
1779_arm_return:
1780	RETLDM	unwind=LSYM(Lstart_arm_return)
1781	cfi_end	LSYM(Lend_arm_return)
1782
1783	.globl _arm_return_r7
1784_arm_return_r7:
1785	ldr	lr, [r7, #-4]
1786	bx	lr
1787
1788	.globl _arm_return_r11
1789_arm_return_r11:
1790	ldr	lr, [r11, #-4]
1791	bx	lr
1792
1793.macro interwork_with_frame frame, register, name, return
1794	.code	16
1795
1796	THUMB_FUNC_START \name
1797
1798	bx	pc
1799	nop
1800
1801	.code	32
1802	tst	\register, #1
1803	streq	lr, [\frame, #-4]
1804	adreq	lr, _arm_return_\frame
1805	bx	\register
1806
1807	SIZE	(\name)
1808.endm
1809
1810.macro interwork register
1811	.code	16
1812
1813	THUMB_FUNC_START _interwork_call_via_\register
1814
1815	bx	pc
1816	nop
1817
1818	.code	32
1819	.globl LSYM(Lchange_\register)
1820LSYM(Lchange_\register):
1821	tst	\register, #1
1822	streq	lr, [sp, #-8]!
1823	adreq	lr, _arm_return
1824	bx	\register
1825
1826	SIZE	(_interwork_call_via_\register)
1827
1828	interwork_with_frame r7,\register,_interwork_r7_call_via_\register
1829	interwork_with_frame r11,\register,_interwork_r11_call_via_\register
1830.endm
1831
1832	interwork r0
1833	interwork r1
1834	interwork r2
1835	interwork r3
1836	interwork r4
1837	interwork r5
1838	interwork r6
1839	interwork r7
1840	interwork r8
1841	interwork r9
1842	interwork sl
1843	interwork fp
1844	interwork ip
1845	interwork sp
1846
1847	/* The LR case has to be handled a little differently...  */
1848	.code 16
1849
1850	THUMB_FUNC_START _interwork_call_via_lr
1851
1852	bx 	pc
1853	nop
1854
1855	.code 32
1856	.globl .Lchange_lr
1857.Lchange_lr:
1858	tst	lr, #1
1859	stmeqdb	r13!, {lr, pc}
1860	mov	ip, lr
1861	adreq	lr, _arm_return
1862	bx	ip
1863
1864	SIZE	(_interwork_call_via_lr)
1865
1866#endif /* L_interwork_call_via_rX */
1867#endif /* !__thumb2__ */
1868
1869/* Functions to support compact pic switch tables in thumb1 state.
1870   All these routines take an index into the table in r0.  The
1871   table is at LR & ~1 (but this must be rounded up in the case
1872   of 32-bit entires).  They are only permitted to clobber r12
1873   and r14 and r0 must be preserved on exit.  */
1874#ifdef L_thumb1_case_sqi
1875
1876	.text
1877	.align 0
1878        .force_thumb
1879	.syntax unified
1880	THUMB_FUNC_START __gnu_thumb1_case_sqi
1881	push	{r1}
1882	mov	r1, lr
1883	lsrs	r1, r1, #1
1884	lsls	r1, r1, #1
1885	ldrsb	r1, [r1, r0]
1886	lsls	r1, r1, #1
1887	add	lr, lr, r1
1888	pop	{r1}
1889	bx	lr
1890	SIZE (__gnu_thumb1_case_sqi)
1891#endif
1892
1893#ifdef L_thumb1_case_uqi
1894
1895	.text
1896	.align 0
1897        .force_thumb
1898	.syntax unified
1899	THUMB_FUNC_START __gnu_thumb1_case_uqi
1900	push	{r1}
1901	mov	r1, lr
1902	lsrs	r1, r1, #1
1903	lsls	r1, r1, #1
1904	ldrb	r1, [r1, r0]
1905	lsls	r1, r1, #1
1906	add	lr, lr, r1
1907	pop	{r1}
1908	bx	lr
1909	SIZE (__gnu_thumb1_case_uqi)
1910#endif
1911
1912#ifdef L_thumb1_case_shi
1913
1914	.text
1915	.align 0
1916        .force_thumb
1917	.syntax unified
1918	THUMB_FUNC_START __gnu_thumb1_case_shi
1919	push	{r0, r1}
1920	mov	r1, lr
1921	lsrs	r1, r1, #1
1922	lsls	r0, r0, #1
1923	lsls	r1, r1, #1
1924	ldrsh	r1, [r1, r0]
1925	lsls	r1, r1, #1
1926	add	lr, lr, r1
1927	pop	{r0, r1}
1928	bx	lr
1929	SIZE (__gnu_thumb1_case_shi)
1930#endif
1931
1932#ifdef L_thumb1_case_uhi
1933
1934	.text
1935	.align 0
1936        .force_thumb
1937	.syntax unified
1938	THUMB_FUNC_START __gnu_thumb1_case_uhi
1939	push	{r0, r1}
1940	mov	r1, lr
1941	lsrs	r1, r1, #1
1942	lsls	r0, r0, #1
1943	lsls	r1, r1, #1
1944	ldrh	r1, [r1, r0]
1945	lsls	r1, r1, #1
1946	add	lr, lr, r1
1947	pop	{r0, r1}
1948	bx	lr
1949	SIZE (__gnu_thumb1_case_uhi)
1950#endif
1951
1952#ifdef L_thumb1_case_si
1953
1954	.text
1955	.align 0
1956        .force_thumb
1957	.syntax unified
1958	THUMB_FUNC_START __gnu_thumb1_case_si
1959	push	{r0, r1}
1960	mov	r1, lr
1961	adds.n	r1, r1, #2	/* Align to word.  */
1962	lsrs	r1, r1, #2
1963	lsls	r0, r0, #2
1964	lsls	r1, r1, #2
1965	ldr	r0, [r1, r0]
1966	adds	r0, r0, r1
1967	mov	lr, r0
1968	pop	{r0, r1}
1969	mov	pc, lr		/* We know we were called from thumb code.  */
1970	SIZE (__gnu_thumb1_case_si)
1971#endif
1972
1973#endif /* Arch supports thumb.  */
1974
1975.macro CFI_START_FUNCTION
1976	.cfi_startproc
1977	.cfi_remember_state
1978.endm
1979
1980.macro CFI_END_FUNCTION
1981	.cfi_restore_state
1982	.cfi_endproc
1983.endm
1984
1985#ifndef __symbian__
1986#ifndef __ARM_ARCH_6M__
1987#include "ieee754-df.S"
1988#include "ieee754-sf.S"
1989#include "bpabi.S"
1990#else /* __ARM_ARCH_6M__ */
1991#include "bpabi-v6m.S"
1992#endif /* __ARM_ARCH_6M__ */
1993#endif /* !__symbian__ */
1994