10b57cec5SDimitry Andric/*
20b57cec5SDimitry Andric * This m4 code has been taken from The SPARC Architecture Manual Version 8.
30b57cec5SDimitry Andric */
40b57cec5SDimitry Andric/*
50b57cec5SDimitry Andric * Division/Remainder
60b57cec5SDimitry Andric *
70b57cec5SDimitry Andric * Input is:
80b57cec5SDimitry Andric *   dividend -- the thing being divided
90b57cec5SDimitry Andric *   divisor -- how many ways to divide it
100b57cec5SDimitry Andric * Important parameters:
110b57cec5SDimitry Andric *   N -- how many bits per iteration we try to get
120b57cec5SDimitry Andric *        as our current guess:
130b57cec5SDimitry Andric *   WORDSIZE -- how many bits altogether we're talking about:
140b57cec5SDimitry Andric *               obviously:
150b57cec5SDimitry Andric * A derived constant:
160b57cec5SDimitry Andric *   TOPBITS -- how many bits are in the top "decade" of a number:
170b57cec5SDimitry Andric *
180b57cec5SDimitry Andric * Important variables are:
190b57cec5SDimitry Andric *   Q -- the partial quotient under development -- initially 0
200b57cec5SDimitry Andric *   R -- the remainder so far -- initially == the dividend
210b57cec5SDimitry Andric *   ITER -- number of iterations of the main division loop which will
220b57cec5SDimitry Andric *           be required. Equal to CEIL( lg2(quotient)/4 )
230b57cec5SDimitry Andric *           Note that this is log_base_(2ˆ4) of the quotient.
240b57cec5SDimitry Andric *   V -- the current comparand -- initially divisor*2ˆ(ITER*4-1)
250b57cec5SDimitry Andric * Cost:
260b57cec5SDimitry Andric *   current estimate for non-large dividend is
270b57cec5SDimitry Andric *        CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C
280b57cec5SDimitry Andric *   a large dividend is one greater than 2ˆ(31-4 ) and takes a
290b57cec5SDimitry Andric *   different path, as the upper bits of the quotient must be developed
300b57cec5SDimitry Andric *   one bit at a time.
310b57cec5SDimitry Andric *   This uses the m4 and cpp macro preprocessors.
320b57cec5SDimitry Andric */
330b57cec5SDimitry Andric/*
340b57cec5SDimitry Andric * This is the recursive definition of how we develop quotient digits.
350b57cec5SDimitry Andric * It takes three important parameters:
360b57cec5SDimitry Andric *   $1 -- the current depth, 1<=$1<=4
370b57cec5SDimitry Andric *   $2 -- the current accumulation of quotient bits
380b57cec5SDimitry Andric *   4 -- max depth
390b57cec5SDimitry Andric * We add a new bit to $2 and either recurse or insert the bits in the quotient.
400b57cec5SDimitry Andric * Dynamic input:
410b57cec5SDimitry Andric *   %o3 -- current remainder
420b57cec5SDimitry Andric *   %o2 -- current quotient
430b57cec5SDimitry Andric *   %o5 -- current comparand
440b57cec5SDimitry Andric *   cc -- set on current value of %o3
450b57cec5SDimitry Andric * Dynamic output:
460b57cec5SDimitry Andric *   %o3', %o2', %o5', cc'
470b57cec5SDimitry Andric */
480b57cec5SDimitry Andric#include "../assembly.h"
490b57cec5SDimitry Andric.text
500b57cec5SDimitry Andric	.align 32
510b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__udivsi3)
520b57cec5SDimitry Andric	b	divide
530b57cec5SDimitry Andric	mov	0,%g3			! result always nonnegative
540b57cec5SDimitry Andric.text
550b57cec5SDimitry Andric	.align 32
560b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__divsi3)
570b57cec5SDimitry Andric	orcc	%o1,%o0,%g0	! are either %o0 or %o1 negative
580b57cec5SDimitry Andric	bge	divide			! if not, skip this junk
590b57cec5SDimitry Andric	xor	%o1,%o0,%g3	! record sign of result in sign of %g3
600b57cec5SDimitry Andric	tst	%o1
610b57cec5SDimitry Andric	bge	2f
620b57cec5SDimitry Andric	tst	%o0
630b57cec5SDimitry Andric	! %o1 < 0
640b57cec5SDimitry Andric	bge	divide
650b57cec5SDimitry Andric	neg	%o1
660b57cec5SDimitry Andric	2:
670b57cec5SDimitry Andric	! %o0 < 0
680b57cec5SDimitry Andric	neg	%o0
690b57cec5SDimitry Andric	! FALL THROUGH
700b57cec5SDimitry Andricdivide:
710b57cec5SDimitry Andric	! Compute size of quotient, scale comparand.
720b57cec5SDimitry Andric	orcc	%o1,%g0,%o5		! movcc %o1,%o5
730b57cec5SDimitry Andric	te	2			! if %o1 = 0
740b57cec5SDimitry Andric	mov	%o0,%o3
750b57cec5SDimitry Andric	mov	0,%o2
760b57cec5SDimitry Andric	sethi	%hi(1<<(32-4 -1)),%g1
770b57cec5SDimitry Andric	cmp	%o3,%g1
780b57cec5SDimitry Andric	blu	not_really_big
790b57cec5SDimitry Andric	mov	0,%o4
800b57cec5SDimitry Andric	!
810b57cec5SDimitry Andric	! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here,
820b57cec5SDimitry Andric	! as our usual 4-at-a-shot divide step will cause overflow and havoc.
830b57cec5SDimitry Andric	! The total number of bits in the result here is 4*%o4+%g2, where
840b57cec5SDimitry Andric	! %g2 <= 4.
850b57cec5SDimitry Andric	! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into
860b57cec5SDimitry Andric! the top decade: so don't even bother to compare to %o3.
870b57cec5SDimitry Andric1:
880b57cec5SDimitry Andric	cmp	%o5,%g1
890b57cec5SDimitry Andric	bgeu	3f
900b57cec5SDimitry Andric	mov	1,%g2
910b57cec5SDimitry Andric	sll	%o5,4,%o5
920b57cec5SDimitry Andric	b	1b
930b57cec5SDimitry Andric	inc	%o4
940b57cec5SDimitry Andric! Now compute %g2
950b57cec5SDimitry Andric2:	addcc	%o5,%o5,%o5
960b57cec5SDimitry Andric	bcc	not_too_big
970b57cec5SDimitry Andric	add	%g2,1,%g2
980b57cec5SDimitry Andric		! We're here if the %o1 overflowed when Shifting.
990b57cec5SDimitry Andric		! This means that %o3 has the high-order bit set.
1000b57cec5SDimitry Andric		! Restore %o5 and subtract from %o3.
1010b57cec5SDimitry Andric		sll	%g1,4 ,%g1	! high order bit
1020b57cec5SDimitry Andric		srl	%o5,1,%o5		! rest of %o5
1030b57cec5SDimitry Andric		add	%o5,%g1,%o5
1040b57cec5SDimitry Andric		b	do_single_div
1050b57cec5SDimitry Andric		dec	%g2
1060b57cec5SDimitry Andricnot_too_big:
1070b57cec5SDimitry Andric3:	cmp	%o5,%o3
1080b57cec5SDimitry Andric	blu	2b
1090b57cec5SDimitry Andric	nop
1100b57cec5SDimitry Andric	be	do_single_div
1110b57cec5SDimitry Andric	nop
1120b57cec5SDimitry Andric! %o5 > %o3: went too far: back up 1 step
1130b57cec5SDimitry Andric!     srl %o5,1,%o5
1140b57cec5SDimitry Andric!      dec %g2
1150b57cec5SDimitry Andric! do single-bit divide steps
1160b57cec5SDimitry Andric!
1170b57cec5SDimitry Andric! We have to be careful here. We know that %o3 >= %o5, so we can do the
1180b57cec5SDimitry Andric! first divide step without thinking. BUT, the others are conditional,
1190b57cec5SDimitry Andric! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
1200b57cec5SDimitry Andric! order bit set in the first step, just falling into the regular
1210b57cec5SDimitry Andric! division loop will mess up the first time around.
1220b57cec5SDimitry Andric! So we unroll slightly...
1230b57cec5SDimitry Andricdo_single_div:
1240b57cec5SDimitry Andric	deccc	%g2
1250b57cec5SDimitry Andric	bl	end_regular_divide
1260b57cec5SDimitry Andric	nop
1270b57cec5SDimitry Andric	sub	%o3,%o5,%o3
1280b57cec5SDimitry Andric	mov	1,%o2
1290b57cec5SDimitry Andric	b,a	end_single_divloop
1300b57cec5SDimitry Andric	! EMPTY
1310b57cec5SDimitry Andricsingle_divloop:
1320b57cec5SDimitry Andric	sll	%o2,1,%o2
1330b57cec5SDimitry Andric	bl	1f
1340b57cec5SDimitry Andric	srl	%o5,1,%o5
1350b57cec5SDimitry Andric	! %o3 >= 0
1360b57cec5SDimitry Andric		sub	%o3,%o5,%o3
1370b57cec5SDimitry Andric		b	2f
1380b57cec5SDimitry Andric		inc	%o2
1390b57cec5SDimitry Andric	1:	! %o3 < 0
1400b57cec5SDimitry Andric		add	%o3,%o5,%o3
1410b57cec5SDimitry Andric		dec	%o2
1420b57cec5SDimitry Andric	2:
1430b57cec5SDimitry Andric	end_single_divloop:
1440b57cec5SDimitry Andric		deccc	%g2
1450b57cec5SDimitry Andric		bge	single_divloop
1460b57cec5SDimitry Andric		tst	%o3
1470b57cec5SDimitry Andric		b,a	end_regular_divide
1480b57cec5SDimitry Andric		! EMPTY
1490b57cec5SDimitry Andricnot_really_big:
1500b57cec5SDimitry Andric1:
1510b57cec5SDimitry Andric	sll	%o5,4,%o5
1520b57cec5SDimitry Andric	cmp	%o5,%o3
1530b57cec5SDimitry Andric	bleu	1b
1540b57cec5SDimitry Andric	inccc	%o4
1550b57cec5SDimitry Andric	be	got_result
1560b57cec5SDimitry Andric	dec	%o4
1570b57cec5SDimitry Andricdo_regular_divide:
1580b57cec5SDimitry Andric	! Do the main division iteration
1590b57cec5SDimitry Andric	tst	%o3
1600b57cec5SDimitry Andric	! Fall through into divide loop
1610b57cec5SDimitry Andricdivloop:
1620b57cec5SDimitry Andric	sll	%o2,4,%o2
1630b57cec5SDimitry Andric		!depth 1, accumulated bits 0
1640b57cec5SDimitry Andric	bl	L.1.16
1650b57cec5SDimitry Andric	srl	%o5,1,%o5
1660b57cec5SDimitry Andric	! remainder is nonnegative
1670b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
1680b57cec5SDimitry Andric	 	!depth 2, accumulated bits 1
1690b57cec5SDimitry Andric	bl	L.2.17
1700b57cec5SDimitry Andric	srl	%o5,1,%o5
1710b57cec5SDimitry Andric	! remainder is nonnegative
1720b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
1730b57cec5SDimitry Andric	 	!depth 3, accumulated bits 3
1740b57cec5SDimitry Andric	bl	L.3.19
1750b57cec5SDimitry Andric	srl	%o5,1,%o5
1760b57cec5SDimitry Andric	! remainder is nonnegative
1770b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
1780b57cec5SDimitry Andric	 	!depth 4, accumulated bits 7
1790b57cec5SDimitry Andric	bl	L.4.23
1800b57cec5SDimitry Andric	srl	%o5,1,%o5
1810b57cec5SDimitry Andric	! remainder is nonnegative
1820b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
1830b57cec5SDimitry Andric		b	9f
1840b57cec5SDimitry Andric		add	%o2, (7*2+1), %o2
1850b57cec5SDimitry AndricL.4.23:
1860b57cec5SDimitry Andric	! remainder is negative
1870b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
1880b57cec5SDimitry Andric		b	9f
1890b57cec5SDimitry Andric		add	%o2, (7*2-1), %o2
1900b57cec5SDimitry AndricL.3.19:
1910b57cec5SDimitry Andric	! remainder is negative
1920b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
1930b57cec5SDimitry Andric	 	!depth 4, accumulated bits 5
1940b57cec5SDimitry Andric	bl	L.4.21
1950b57cec5SDimitry Andric	srl	%o5,1,%o5
1960b57cec5SDimitry Andric	! remainder is nonnegative
1970b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
1980b57cec5SDimitry Andric		b	9f
1990b57cec5SDimitry Andric		add	%o2, (5*2+1), %o2
2000b57cec5SDimitry AndricL.4.21:
2010b57cec5SDimitry Andric	! remainder is negative
2020b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
2030b57cec5SDimitry Andric		b	9f
2040b57cec5SDimitry Andric		add	%o2, (5*2-1), %o2
2050b57cec5SDimitry AndricL.2.17:
2060b57cec5SDimitry Andric	! remainder is negative
2070b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
2080b57cec5SDimitry Andric	 	!depth 3, accumulated bits 1
2090b57cec5SDimitry Andric	bl	L.3.17
2100b57cec5SDimitry Andric	srl	%o5,1,%o5
2110b57cec5SDimitry Andric	! remainder is nonnegative
2120b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
2130b57cec5SDimitry Andric	 	!depth 4, accumulated bits 3
2140b57cec5SDimitry Andric	bl	L.4.19
2150b57cec5SDimitry Andric	srl	%o5,1,%o5
2160b57cec5SDimitry Andric	! remainder is nonnegative
2170b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
2180b57cec5SDimitry Andric		b	9f
2190b57cec5SDimitry Andric		add	%o2, (3*2+1), %o2
2200b57cec5SDimitry AndricL.4.19:
2210b57cec5SDimitry Andric	! remainder is negative
2220b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
2230b57cec5SDimitry Andric		b	9f
2240b57cec5SDimitry Andric		add	%o2, (3*2-1), %o2
2250b57cec5SDimitry AndricL.3.17:
2260b57cec5SDimitry Andric	! remainder is negative
2270b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
2280b57cec5SDimitry Andric	 	!depth 4, accumulated bits 1
2290b57cec5SDimitry Andric	bl	L.4.17
2300b57cec5SDimitry Andric	srl	%o5,1,%o5
2310b57cec5SDimitry Andric	! remainder is nonnegative
2320b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
2330b57cec5SDimitry Andric		b	9f
2340b57cec5SDimitry Andric		add	%o2, (1*2+1), %o2
2350b57cec5SDimitry AndricL.4.17:
2360b57cec5SDimitry Andric	! remainder is negative
2370b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
2380b57cec5SDimitry Andric		b	9f
2390b57cec5SDimitry Andric		add	%o2, (1*2-1), %o2
2400b57cec5SDimitry AndricL.1.16:
2410b57cec5SDimitry Andric	! remainder is negative
2420b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
2430b57cec5SDimitry Andric	 	!depth 2, accumulated bits -1
2440b57cec5SDimitry Andric	bl	L.2.15
2450b57cec5SDimitry Andric	srl	%o5,1,%o5
2460b57cec5SDimitry Andric	! remainder is nonnegative
2470b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
2480b57cec5SDimitry Andric	 	!depth 3, accumulated bits -1
2490b57cec5SDimitry Andric	bl	L.3.15
2500b57cec5SDimitry Andric	srl	%o5,1,%o5
2510b57cec5SDimitry Andric	! remainder is nonnegative
2520b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
2530b57cec5SDimitry Andric	 	!depth 4, accumulated bits -1
2540b57cec5SDimitry Andric	bl	L.4.15
2550b57cec5SDimitry Andric	srl	%o5,1,%o5
2560b57cec5SDimitry Andric	! remainder is nonnegative
2570b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
2580b57cec5SDimitry Andric		b	9f
2590b57cec5SDimitry Andric		add	%o2, (-1*2+1), %o2
2600b57cec5SDimitry AndricL.4.15:
2610b57cec5SDimitry Andric	! remainder is negative
2620b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
2630b57cec5SDimitry Andric		b	9f
2640b57cec5SDimitry Andric		add	%o2, (-1*2-1), %o2
2650b57cec5SDimitry AndricL.3.15:
2660b57cec5SDimitry Andric	! remainder is negative
2670b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
2680b57cec5SDimitry Andric	 	!depth 4, accumulated bits -3
2690b57cec5SDimitry Andric	bl	L.4.13
2700b57cec5SDimitry Andric	srl	%o5,1,%o5
2710b57cec5SDimitry Andric	! remainder is nonnegative
2720b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
2730b57cec5SDimitry Andric		b	9f
2740b57cec5SDimitry Andric		add	%o2, (-3*2+1), %o2
2750b57cec5SDimitry AndricL.4.13:
2760b57cec5SDimitry Andric	! remainder is negative
2770b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
2780b57cec5SDimitry Andric		b	9f
2790b57cec5SDimitry Andric		add	%o2, (-3*2-1), %o2
2800b57cec5SDimitry AndricL.2.15:
2810b57cec5SDimitry Andric	! remainder is negative
2820b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
2830b57cec5SDimitry Andric	 	!depth 3, accumulated bits -3
2840b57cec5SDimitry Andric	bl	L.3.13
2850b57cec5SDimitry Andric	srl	%o5,1,%o5
2860b57cec5SDimitry Andric	! remainder is nonnegative
2870b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
2880b57cec5SDimitry Andric	 	!depth 4, accumulated bits -5
2890b57cec5SDimitry Andric	bl	L.4.11
2900b57cec5SDimitry Andric	srl	%o5,1,%o5
2910b57cec5SDimitry Andric	! remainder is nonnegative
2920b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
2930b57cec5SDimitry Andric		b	9f
2940b57cec5SDimitry Andric		add	%o2, (-5*2+1), %o2
2950b57cec5SDimitry AndricL.4.11:
2960b57cec5SDimitry Andric	! remainder is negative
2970b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
2980b57cec5SDimitry Andric		b	9f
2990b57cec5SDimitry Andric		add	%o2, (-5*2-1), %o2
3000b57cec5SDimitry AndricL.3.13:
3010b57cec5SDimitry Andric	! remainder is negative
3020b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
3030b57cec5SDimitry Andric	 	!depth 4, accumulated bits -7
3040b57cec5SDimitry Andric	bl	L.4.9
3050b57cec5SDimitry Andric	srl	%o5,1,%o5
3060b57cec5SDimitry Andric	! remainder is nonnegative
3070b57cec5SDimitry Andric	subcc	%o3,%o5,%o3
3080b57cec5SDimitry Andric		b	9f
3090b57cec5SDimitry Andric		add	%o2, (-7*2+1), %o2
3100b57cec5SDimitry AndricL.4.9:
3110b57cec5SDimitry Andric	! remainder is negative
3120b57cec5SDimitry Andric	addcc	%o3,%o5,%o3
3130b57cec5SDimitry Andric		b	9f
3140b57cec5SDimitry Andric		add	%o2, (-7*2-1), %o2
3150b57cec5SDimitry Andric	9:
3160b57cec5SDimitry Andricend_regular_divide:
3170b57cec5SDimitry Andric	deccc	%o4
3180b57cec5SDimitry Andric	bge	divloop
3190b57cec5SDimitry Andric	tst	%o3
3200b57cec5SDimitry Andric	bl,a	got_result
3210b57cec5SDimitry Andric	! non-restoring fixup if remainder < 0, otherwise annulled
3220b57cec5SDimitry Andric	dec	%o2
3230b57cec5SDimitry Andricgot_result:
3240b57cec5SDimitry Andric	tst	%g3
3250b57cec5SDimitry Andric	bl,a	1f
3260b57cec5SDimitry Andric	! negate for answer < 0, otherwise annulled
3270b57cec5SDimitry Andric	neg	%o2,%o2			! %o2 <- -%o2
3280b57cec5SDimitry Andric1:
3290b57cec5SDimitry Andric	retl				! leaf-routine return
3300b57cec5SDimitry Andric	mov	%o2,%o0			! quotient <- %o2
331