10b57cec5SDimitry Andric/* 20b57cec5SDimitry Andric * This m4 code has been taken from The SPARC Architecture Manual Version 8. 30b57cec5SDimitry Andric */ 40b57cec5SDimitry Andric/* 50b57cec5SDimitry Andric * Division/Remainder 60b57cec5SDimitry Andric * 70b57cec5SDimitry Andric * Input is: 80b57cec5SDimitry Andric * dividend -- the thing being divided 90b57cec5SDimitry Andric * divisor -- how many ways to divide it 100b57cec5SDimitry Andric * Important parameters: 110b57cec5SDimitry Andric * N -- how many bits per iteration we try to get 120b57cec5SDimitry Andric * as our current guess: 130b57cec5SDimitry Andric * WORDSIZE -- how many bits altogether we're talking about: 140b57cec5SDimitry Andric * obviously: 150b57cec5SDimitry Andric * A derived constant: 160b57cec5SDimitry Andric * TOPBITS -- how many bits are in the top "decade" of a number: 170b57cec5SDimitry Andric * 180b57cec5SDimitry Andric * Important variables are: 190b57cec5SDimitry Andric * Q -- the partial quotient under development -- initially 0 200b57cec5SDimitry Andric * R -- the remainder so far -- initially == the dividend 210b57cec5SDimitry Andric * ITER -- number of iterations of the main division loop which will 220b57cec5SDimitry Andric * be required. Equal to CEIL( lg2(quotient)/4 ) 230b57cec5SDimitry Andric * Note that this is log_base_(2ˆ4) of the quotient. 240b57cec5SDimitry Andric * V -- the current comparand -- initially divisor*2ˆ(ITER*4-1) 250b57cec5SDimitry Andric * Cost: 260b57cec5SDimitry Andric * current estimate for non-large dividend is 270b57cec5SDimitry Andric * CEIL( lg2(quotient) / 4 ) x ( 10 + 74/2 ) + C 280b57cec5SDimitry Andric * a large dividend is one greater than 2ˆ(31-4 ) and takes a 290b57cec5SDimitry Andric * different path, as the upper bits of the quotient must be developed 300b57cec5SDimitry Andric * one bit at a time. 310b57cec5SDimitry Andric * This uses the m4 and cpp macro preprocessors. 320b57cec5SDimitry Andric */ 330b57cec5SDimitry Andric/* 340b57cec5SDimitry Andric * This is the recursive definition of how we develop quotient digits. 350b57cec5SDimitry Andric * It takes three important parameters: 360b57cec5SDimitry Andric * $1 -- the current depth, 1<=$1<=4 370b57cec5SDimitry Andric * $2 -- the current accumulation of quotient bits 380b57cec5SDimitry Andric * 4 -- max depth 390b57cec5SDimitry Andric * We add a new bit to $2 and either recurse or insert the bits in the quotient. 400b57cec5SDimitry Andric * Dynamic input: 410b57cec5SDimitry Andric * %o3 -- current remainder 420b57cec5SDimitry Andric * %o2 -- current quotient 430b57cec5SDimitry Andric * %o5 -- current comparand 440b57cec5SDimitry Andric * cc -- set on current value of %o3 450b57cec5SDimitry Andric * Dynamic output: 460b57cec5SDimitry Andric * %o3', %o2', %o5', cc' 470b57cec5SDimitry Andric */ 480b57cec5SDimitry Andric#include "../assembly.h" 490b57cec5SDimitry Andric.text 500b57cec5SDimitry Andric .align 32 510b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__udivsi3) 520b57cec5SDimitry Andric b divide 530b57cec5SDimitry Andric mov 0,%g3 ! result always nonnegative 540b57cec5SDimitry Andric.text 550b57cec5SDimitry Andric .align 32 560b57cec5SDimitry AndricDEFINE_COMPILERRT_FUNCTION(__divsi3) 570b57cec5SDimitry Andric orcc %o1,%o0,%g0 ! are either %o0 or %o1 negative 580b57cec5SDimitry Andric bge divide ! if not, skip this junk 590b57cec5SDimitry Andric xor %o1,%o0,%g3 ! record sign of result in sign of %g3 600b57cec5SDimitry Andric tst %o1 610b57cec5SDimitry Andric bge 2f 620b57cec5SDimitry Andric tst %o0 630b57cec5SDimitry Andric ! %o1 < 0 640b57cec5SDimitry Andric bge divide 650b57cec5SDimitry Andric neg %o1 660b57cec5SDimitry Andric 2: 670b57cec5SDimitry Andric ! %o0 < 0 680b57cec5SDimitry Andric neg %o0 690b57cec5SDimitry Andric ! FALL THROUGH 700b57cec5SDimitry Andricdivide: 710b57cec5SDimitry Andric ! Compute size of quotient, scale comparand. 720b57cec5SDimitry Andric orcc %o1,%g0,%o5 ! movcc %o1,%o5 730b57cec5SDimitry Andric te 2 ! if %o1 = 0 740b57cec5SDimitry Andric mov %o0,%o3 750b57cec5SDimitry Andric mov 0,%o2 760b57cec5SDimitry Andric sethi %hi(1<<(32-4 -1)),%g1 770b57cec5SDimitry Andric cmp %o3,%g1 780b57cec5SDimitry Andric blu not_really_big 790b57cec5SDimitry Andric mov 0,%o4 800b57cec5SDimitry Andric ! 810b57cec5SDimitry Andric ! Here, the %o0 is >= 2ˆ(31-4) or so. We must be careful here, 820b57cec5SDimitry Andric ! as our usual 4-at-a-shot divide step will cause overflow and havoc. 830b57cec5SDimitry Andric ! The total number of bits in the result here is 4*%o4+%g2, where 840b57cec5SDimitry Andric ! %g2 <= 4. 850b57cec5SDimitry Andric ! Compute %o4 in an unorthodox manner: know we need to Shift %o5 into 860b57cec5SDimitry Andric! the top decade: so don't even bother to compare to %o3. 870b57cec5SDimitry Andric1: 880b57cec5SDimitry Andric cmp %o5,%g1 890b57cec5SDimitry Andric bgeu 3f 900b57cec5SDimitry Andric mov 1,%g2 910b57cec5SDimitry Andric sll %o5,4,%o5 920b57cec5SDimitry Andric b 1b 930b57cec5SDimitry Andric inc %o4 940b57cec5SDimitry Andric! Now compute %g2 950b57cec5SDimitry Andric2: addcc %o5,%o5,%o5 960b57cec5SDimitry Andric bcc not_too_big 970b57cec5SDimitry Andric add %g2,1,%g2 980b57cec5SDimitry Andric ! We're here if the %o1 overflowed when Shifting. 990b57cec5SDimitry Andric ! This means that %o3 has the high-order bit set. 1000b57cec5SDimitry Andric ! Restore %o5 and subtract from %o3. 1010b57cec5SDimitry Andric sll %g1,4 ,%g1 ! high order bit 1020b57cec5SDimitry Andric srl %o5,1,%o5 ! rest of %o5 1030b57cec5SDimitry Andric add %o5,%g1,%o5 1040b57cec5SDimitry Andric b do_single_div 1050b57cec5SDimitry Andric dec %g2 1060b57cec5SDimitry Andricnot_too_big: 1070b57cec5SDimitry Andric3: cmp %o5,%o3 1080b57cec5SDimitry Andric blu 2b 1090b57cec5SDimitry Andric nop 1100b57cec5SDimitry Andric be do_single_div 1110b57cec5SDimitry Andric nop 1120b57cec5SDimitry Andric! %o5 > %o3: went too far: back up 1 step 1130b57cec5SDimitry Andric! srl %o5,1,%o5 1140b57cec5SDimitry Andric! dec %g2 1150b57cec5SDimitry Andric! do single-bit divide steps 1160b57cec5SDimitry Andric! 1170b57cec5SDimitry Andric! We have to be careful here. We know that %o3 >= %o5, so we can do the 1180b57cec5SDimitry Andric! first divide step without thinking. BUT, the others are conditional, 1190b57cec5SDimitry Andric! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- 1200b57cec5SDimitry Andric! order bit set in the first step, just falling into the regular 1210b57cec5SDimitry Andric! division loop will mess up the first time around. 1220b57cec5SDimitry Andric! So we unroll slightly... 1230b57cec5SDimitry Andricdo_single_div: 1240b57cec5SDimitry Andric deccc %g2 1250b57cec5SDimitry Andric bl end_regular_divide 1260b57cec5SDimitry Andric nop 1270b57cec5SDimitry Andric sub %o3,%o5,%o3 1280b57cec5SDimitry Andric mov 1,%o2 1290b57cec5SDimitry Andric b,a end_single_divloop 1300b57cec5SDimitry Andric ! EMPTY 1310b57cec5SDimitry Andricsingle_divloop: 1320b57cec5SDimitry Andric sll %o2,1,%o2 1330b57cec5SDimitry Andric bl 1f 1340b57cec5SDimitry Andric srl %o5,1,%o5 1350b57cec5SDimitry Andric ! %o3 >= 0 1360b57cec5SDimitry Andric sub %o3,%o5,%o3 1370b57cec5SDimitry Andric b 2f 1380b57cec5SDimitry Andric inc %o2 1390b57cec5SDimitry Andric 1: ! %o3 < 0 1400b57cec5SDimitry Andric add %o3,%o5,%o3 1410b57cec5SDimitry Andric dec %o2 1420b57cec5SDimitry Andric 2: 1430b57cec5SDimitry Andric end_single_divloop: 1440b57cec5SDimitry Andric deccc %g2 1450b57cec5SDimitry Andric bge single_divloop 1460b57cec5SDimitry Andric tst %o3 1470b57cec5SDimitry Andric b,a end_regular_divide 1480b57cec5SDimitry Andric ! EMPTY 1490b57cec5SDimitry Andricnot_really_big: 1500b57cec5SDimitry Andric1: 1510b57cec5SDimitry Andric sll %o5,4,%o5 1520b57cec5SDimitry Andric cmp %o5,%o3 1530b57cec5SDimitry Andric bleu 1b 1540b57cec5SDimitry Andric inccc %o4 1550b57cec5SDimitry Andric be got_result 1560b57cec5SDimitry Andric dec %o4 1570b57cec5SDimitry Andricdo_regular_divide: 1580b57cec5SDimitry Andric ! Do the main division iteration 1590b57cec5SDimitry Andric tst %o3 1600b57cec5SDimitry Andric ! Fall through into divide loop 1610b57cec5SDimitry Andricdivloop: 1620b57cec5SDimitry Andric sll %o2,4,%o2 1630b57cec5SDimitry Andric !depth 1, accumulated bits 0 1640b57cec5SDimitry Andric bl L.1.16 1650b57cec5SDimitry Andric srl %o5,1,%o5 1660b57cec5SDimitry Andric ! remainder is nonnegative 1670b57cec5SDimitry Andric subcc %o3,%o5,%o3 1680b57cec5SDimitry Andric !depth 2, accumulated bits 1 1690b57cec5SDimitry Andric bl L.2.17 1700b57cec5SDimitry Andric srl %o5,1,%o5 1710b57cec5SDimitry Andric ! remainder is nonnegative 1720b57cec5SDimitry Andric subcc %o3,%o5,%o3 1730b57cec5SDimitry Andric !depth 3, accumulated bits 3 1740b57cec5SDimitry Andric bl L.3.19 1750b57cec5SDimitry Andric srl %o5,1,%o5 1760b57cec5SDimitry Andric ! remainder is nonnegative 1770b57cec5SDimitry Andric subcc %o3,%o5,%o3 1780b57cec5SDimitry Andric !depth 4, accumulated bits 7 1790b57cec5SDimitry Andric bl L.4.23 1800b57cec5SDimitry Andric srl %o5,1,%o5 1810b57cec5SDimitry Andric ! remainder is nonnegative 1820b57cec5SDimitry Andric subcc %o3,%o5,%o3 1830b57cec5SDimitry Andric b 9f 1840b57cec5SDimitry Andric add %o2, (7*2+1), %o2 1850b57cec5SDimitry AndricL.4.23: 1860b57cec5SDimitry Andric ! remainder is negative 1870b57cec5SDimitry Andric addcc %o3,%o5,%o3 1880b57cec5SDimitry Andric b 9f 1890b57cec5SDimitry Andric add %o2, (7*2-1), %o2 1900b57cec5SDimitry AndricL.3.19: 1910b57cec5SDimitry Andric ! remainder is negative 1920b57cec5SDimitry Andric addcc %o3,%o5,%o3 1930b57cec5SDimitry Andric !depth 4, accumulated bits 5 1940b57cec5SDimitry Andric bl L.4.21 1950b57cec5SDimitry Andric srl %o5,1,%o5 1960b57cec5SDimitry Andric ! remainder is nonnegative 1970b57cec5SDimitry Andric subcc %o3,%o5,%o3 1980b57cec5SDimitry Andric b 9f 1990b57cec5SDimitry Andric add %o2, (5*2+1), %o2 2000b57cec5SDimitry AndricL.4.21: 2010b57cec5SDimitry Andric ! remainder is negative 2020b57cec5SDimitry Andric addcc %o3,%o5,%o3 2030b57cec5SDimitry Andric b 9f 2040b57cec5SDimitry Andric add %o2, (5*2-1), %o2 2050b57cec5SDimitry AndricL.2.17: 2060b57cec5SDimitry Andric ! remainder is negative 2070b57cec5SDimitry Andric addcc %o3,%o5,%o3 2080b57cec5SDimitry Andric !depth 3, accumulated bits 1 2090b57cec5SDimitry Andric bl L.3.17 2100b57cec5SDimitry Andric srl %o5,1,%o5 2110b57cec5SDimitry Andric ! remainder is nonnegative 2120b57cec5SDimitry Andric subcc %o3,%o5,%o3 2130b57cec5SDimitry Andric !depth 4, accumulated bits 3 2140b57cec5SDimitry Andric bl L.4.19 2150b57cec5SDimitry Andric srl %o5,1,%o5 2160b57cec5SDimitry Andric ! remainder is nonnegative 2170b57cec5SDimitry Andric subcc %o3,%o5,%o3 2180b57cec5SDimitry Andric b 9f 2190b57cec5SDimitry Andric add %o2, (3*2+1), %o2 2200b57cec5SDimitry AndricL.4.19: 2210b57cec5SDimitry Andric ! remainder is negative 2220b57cec5SDimitry Andric addcc %o3,%o5,%o3 2230b57cec5SDimitry Andric b 9f 2240b57cec5SDimitry Andric add %o2, (3*2-1), %o2 2250b57cec5SDimitry AndricL.3.17: 2260b57cec5SDimitry Andric ! remainder is negative 2270b57cec5SDimitry Andric addcc %o3,%o5,%o3 2280b57cec5SDimitry Andric !depth 4, accumulated bits 1 2290b57cec5SDimitry Andric bl L.4.17 2300b57cec5SDimitry Andric srl %o5,1,%o5 2310b57cec5SDimitry Andric ! remainder is nonnegative 2320b57cec5SDimitry Andric subcc %o3,%o5,%o3 2330b57cec5SDimitry Andric b 9f 2340b57cec5SDimitry Andric add %o2, (1*2+1), %o2 2350b57cec5SDimitry AndricL.4.17: 2360b57cec5SDimitry Andric ! remainder is negative 2370b57cec5SDimitry Andric addcc %o3,%o5,%o3 2380b57cec5SDimitry Andric b 9f 2390b57cec5SDimitry Andric add %o2, (1*2-1), %o2 2400b57cec5SDimitry AndricL.1.16: 2410b57cec5SDimitry Andric ! remainder is negative 2420b57cec5SDimitry Andric addcc %o3,%o5,%o3 2430b57cec5SDimitry Andric !depth 2, accumulated bits -1 2440b57cec5SDimitry Andric bl L.2.15 2450b57cec5SDimitry Andric srl %o5,1,%o5 2460b57cec5SDimitry Andric ! remainder is nonnegative 2470b57cec5SDimitry Andric subcc %o3,%o5,%o3 2480b57cec5SDimitry Andric !depth 3, accumulated bits -1 2490b57cec5SDimitry Andric bl L.3.15 2500b57cec5SDimitry Andric srl %o5,1,%o5 2510b57cec5SDimitry Andric ! remainder is nonnegative 2520b57cec5SDimitry Andric subcc %o3,%o5,%o3 2530b57cec5SDimitry Andric !depth 4, accumulated bits -1 2540b57cec5SDimitry Andric bl L.4.15 2550b57cec5SDimitry Andric srl %o5,1,%o5 2560b57cec5SDimitry Andric ! remainder is nonnegative 2570b57cec5SDimitry Andric subcc %o3,%o5,%o3 2580b57cec5SDimitry Andric b 9f 2590b57cec5SDimitry Andric add %o2, (-1*2+1), %o2 2600b57cec5SDimitry AndricL.4.15: 2610b57cec5SDimitry Andric ! remainder is negative 2620b57cec5SDimitry Andric addcc %o3,%o5,%o3 2630b57cec5SDimitry Andric b 9f 2640b57cec5SDimitry Andric add %o2, (-1*2-1), %o2 2650b57cec5SDimitry AndricL.3.15: 2660b57cec5SDimitry Andric ! remainder is negative 2670b57cec5SDimitry Andric addcc %o3,%o5,%o3 2680b57cec5SDimitry Andric !depth 4, accumulated bits -3 2690b57cec5SDimitry Andric bl L.4.13 2700b57cec5SDimitry Andric srl %o5,1,%o5 2710b57cec5SDimitry Andric ! remainder is nonnegative 2720b57cec5SDimitry Andric subcc %o3,%o5,%o3 2730b57cec5SDimitry Andric b 9f 2740b57cec5SDimitry Andric add %o2, (-3*2+1), %o2 2750b57cec5SDimitry AndricL.4.13: 2760b57cec5SDimitry Andric ! remainder is negative 2770b57cec5SDimitry Andric addcc %o3,%o5,%o3 2780b57cec5SDimitry Andric b 9f 2790b57cec5SDimitry Andric add %o2, (-3*2-1), %o2 2800b57cec5SDimitry AndricL.2.15: 2810b57cec5SDimitry Andric ! remainder is negative 2820b57cec5SDimitry Andric addcc %o3,%o5,%o3 2830b57cec5SDimitry Andric !depth 3, accumulated bits -3 2840b57cec5SDimitry Andric bl L.3.13 2850b57cec5SDimitry Andric srl %o5,1,%o5 2860b57cec5SDimitry Andric ! remainder is nonnegative 2870b57cec5SDimitry Andric subcc %o3,%o5,%o3 2880b57cec5SDimitry Andric !depth 4, accumulated bits -5 2890b57cec5SDimitry Andric bl L.4.11 2900b57cec5SDimitry Andric srl %o5,1,%o5 2910b57cec5SDimitry Andric ! remainder is nonnegative 2920b57cec5SDimitry Andric subcc %o3,%o5,%o3 2930b57cec5SDimitry Andric b 9f 2940b57cec5SDimitry Andric add %o2, (-5*2+1), %o2 2950b57cec5SDimitry AndricL.4.11: 2960b57cec5SDimitry Andric ! remainder is negative 2970b57cec5SDimitry Andric addcc %o3,%o5,%o3 2980b57cec5SDimitry Andric b 9f 2990b57cec5SDimitry Andric add %o2, (-5*2-1), %o2 3000b57cec5SDimitry AndricL.3.13: 3010b57cec5SDimitry Andric ! remainder is negative 3020b57cec5SDimitry Andric addcc %o3,%o5,%o3 3030b57cec5SDimitry Andric !depth 4, accumulated bits -7 3040b57cec5SDimitry Andric bl L.4.9 3050b57cec5SDimitry Andric srl %o5,1,%o5 3060b57cec5SDimitry Andric ! remainder is nonnegative 3070b57cec5SDimitry Andric subcc %o3,%o5,%o3 3080b57cec5SDimitry Andric b 9f 3090b57cec5SDimitry Andric add %o2, (-7*2+1), %o2 3100b57cec5SDimitry AndricL.4.9: 3110b57cec5SDimitry Andric ! remainder is negative 3120b57cec5SDimitry Andric addcc %o3,%o5,%o3 3130b57cec5SDimitry Andric b 9f 3140b57cec5SDimitry Andric add %o2, (-7*2-1), %o2 3150b57cec5SDimitry Andric 9: 3160b57cec5SDimitry Andricend_regular_divide: 3170b57cec5SDimitry Andric deccc %o4 3180b57cec5SDimitry Andric bge divloop 3190b57cec5SDimitry Andric tst %o3 3200b57cec5SDimitry Andric bl,a got_result 3210b57cec5SDimitry Andric ! non-restoring fixup if remainder < 0, otherwise annulled 3220b57cec5SDimitry Andric dec %o2 3230b57cec5SDimitry Andricgot_result: 3240b57cec5SDimitry Andric tst %g3 3250b57cec5SDimitry Andric bl,a 1f 3260b57cec5SDimitry Andric ! negate for answer < 0, otherwise annulled 3270b57cec5SDimitry Andric neg %o2,%o2 ! %o2 <- -%o2 3280b57cec5SDimitry Andric1: 3290b57cec5SDimitry Andric retl ! leaf-routine return 3300b57cec5SDimitry Andric mov %o2,%o0 ! quotient <- %o2 331