1/* 2 * Copyright (c) 1992, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This software was developed by the Computer Systems Engineering group 6 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 7 * contributed to Berkeley. 8 * 9 * %sccs.include.redist.c% 10 * 11 * from: $Header: divrem.m4,v 1.4 92/06/25 13:23:57 torek Exp $ 12 */ 13 14/* 15 * Division and remainder, from Appendix E of the Sparc Version 8 16 * Architecture Manual, with fixes from Gordon Irlam. 17 */ 18 19#if defined(LIBC_SCCS) && !defined(lint) 20 .asciz "@(#)divrem.m4 8.1 (Berkeley) 06/04/93" 21#endif /* LIBC_SCCS and not lint */ 22 23/* 24 * Input: dividend and divisor in %o0 and %o1 respectively. 25 * 26 * m4 parameters: 27 * NAME name of function to generate 28 * OP OP=div => %o0 / %o1; OP=rem => %o0 % %o1 29 * S S=true => signed; S=false => unsigned 30 * 31 * Algorithm parameters: 32 * N how many bits per iteration we try to get (4) 33 * WORDSIZE total number of bits (32) 34 * 35 * Derived constants: 36 * TWOSUPN 2^N, for label generation (m4 exponentiation currently broken) 37 * TOPBITS number of bits in the top `decade' of a number 38 * 39 * Important variables: 40 * Q the partial quotient under development (initially 0) 41 * R the remainder so far, initially the dividend 42 * ITER number of main division loop iterations required; 43 * equal to ceil(log2(quotient) / N). Note that this 44 * is the log base (2^N) of the quotient. 45 * V the current comparand, initially divisor*2^(ITER*N-1) 46 * 47 * Cost: 48 * Current estimate for non-large dividend is 49 * ceil(log2(quotient) / N) * (10 + 7N/2) + C 50 * A large dividend is one greater than 2^(31-TOPBITS) and takes a 51 * different path, as the upper bits of the quotient must be developed 52 * one bit at a time. 53 */ 54 55define(N, `4') 56define(TWOSUPN, `16') 57define(WORDSIZE, `32') 58define(TOPBITS, eval(WORDSIZE - N*((WORDSIZE-1)/N))) 59 60define(dividend, `%o0') 61define(divisor, `%o1') 62define(Q, `%o2') 63define(R, `%o3') 64define(ITER, `%o4') 65define(V, `%o5') 66 67/* m4 reminder: ifelse(a,b,c,d) => if a is b, then c, else d */ 68define(T, `%g1') 69define(SC, `%g7') 70ifelse(S, `true', `define(SIGN, `%g6')') 71 72/* 73 * This is the recursive definition for developing quotient digits. 74 * 75 * Parameters: 76 * $1 the current depth, 1 <= $1 <= N 77 * $2 the current accumulation of quotient bits 78 * N max depth 79 * 80 * We add a new bit to $2 and either recurse or insert the bits in 81 * the quotient. R, Q, and V are inputs and outputs as defined above; 82 * the condition codes are expected to reflect the input R, and are 83 * modified to reflect the output R. 84 */ 85define(DEVELOP_QUOTIENT_BITS, 86` ! depth $1, accumulated bits $2 87 bl L.$1.eval(TWOSUPN+$2) 88 srl V,1,V 89 ! remainder is positive 90 subcc R,V,R 91 ifelse($1, N, 92 ` b 9f 93 add Q, ($2*2+1), Q 94 ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2+1)')') 95L.$1.eval(TWOSUPN+$2): 96 ! remainder is negative 97 addcc R,V,R 98 ifelse($1, N, 99 ` b 9f 100 add Q, ($2*2-1), Q 101 ', ` DEVELOP_QUOTIENT_BITS(incr($1), `eval(2*$2-1)')') 102 ifelse($1, 1, `9:')') 103 104#include "DEFS.h" 105#include <machine/trap.h> 106 107FUNC(NAME) 108ifelse(S, `true', 109` ! compute sign of result; if neither is negative, no problem 110 orcc divisor, dividend, %g0 ! either negative? 111 bge 2f ! no, go do the divide 112 xor divisor, dividend, SIGN ! compute sign in any case 113 tst divisor 114 bge 1f 115 tst dividend 116 ! divisor is definitely negative; dividend might also be negative 117 bge 2f ! if dividend not negative... 118 neg divisor ! in any case, make divisor nonneg 1191: ! dividend is negative, divisor is nonnegative 120 neg dividend ! make dividend nonnegative 1212: 122') 123 ! Ready to divide. Compute size of quotient; scale comparand. 124 orcc divisor, %g0, V 125 bnz 1f 126 mov dividend, R 127 128 ! Divide by zero trap. If it returns, return 0 (about as 129 ! wrong as possible, but that is what SunOS does...). 130 t ST_DIV0 131 retl 132 clr %o0 133 1341: 135 cmp R, V ! if divisor exceeds dividend, done 136 blu Lgot_result ! (and algorithm fails otherwise) 137 clr Q 138 sethi %hi(1 << (WORDSIZE - TOPBITS - 1)), T 139 cmp R, T 140 blu Lnot_really_big 141 clr ITER 142 143 ! `Here the dividend is >= 2^(31-N) or so. We must be careful here, 144 ! as our usual N-at-a-shot divide step will cause overflow and havoc. 145 ! The number of bits in the result here is N*ITER+SC, where SC <= N. 146 ! Compute ITER in an unorthodox manner: know we need to shift V into 147 ! the top decade: so do not even bother to compare to R.' 148 1: 149 cmp V, T 150 bgeu 3f 151 mov 1, SC 152 sll V, N, V 153 b 1b 154 inc ITER 155 156 ! Now compute SC. 157 2: addcc V, V, V 158 bcc Lnot_too_big 159 inc SC 160 161 ! We get here if the divisor overflowed while shifting. 162 ! This means that R has the high-order bit set. 163 ! Restore V and subtract from R. 164 sll T, TOPBITS, T ! high order bit 165 srl V, 1, V ! rest of V 166 add V, T, V 167 b Ldo_single_div 168 dec SC 169 170 Lnot_too_big: 171 3: cmp V, R 172 blu 2b 173 nop 174 be Ldo_single_div 175 nop 176 /* NB: these are commented out in the V8-Sparc manual as well */ 177 /* (I do not understand this) */ 178 ! V > R: went too far: back up 1 step 179 ! srl V, 1, V 180 ! dec SC 181 ! do single-bit divide steps 182 ! 183 ! We have to be careful here. We know that R >= V, so we can do the 184 ! first divide step without thinking. BUT, the others are conditional, 185 ! and are only done if R >= 0. Because both R and V may have the high- 186 ! order bit set in the first step, just falling into the regular 187 ! division loop will mess up the first time around. 188 ! So we unroll slightly... 189 Ldo_single_div: 190 deccc SC 191 bl Lend_regular_divide 192 nop 193 sub R, V, R 194 mov 1, Q 195 b Lend_single_divloop 196 nop 197 Lsingle_divloop: 198 sll Q, 1, Q 199 bl 1f 200 srl V, 1, V 201 ! R >= 0 202 sub R, V, R 203 b 2f 204 inc Q 205 1: ! R < 0 206 add R, V, R 207 dec Q 208 2: 209 Lend_single_divloop: 210 deccc SC 211 bge Lsingle_divloop 212 tst R 213 b,a Lend_regular_divide 214 215Lnot_really_big: 2161: 217 sll V, N, V 218 cmp V, R 219 bleu 1b 220 inccc ITER 221 be Lgot_result 222 dec ITER 223 224 tst R ! set up for initial iteration 225Ldivloop: 226 sll Q, N, Q 227 DEVELOP_QUOTIENT_BITS(1, 0) 228Lend_regular_divide: 229 deccc ITER 230 bge Ldivloop 231 tst R 232 bl,a Lgot_result 233 ! non-restoring fixup here (one instruction only!) 234ifelse(OP, `div', 235` dec Q 236', ` add R, divisor, R 237') 238 239Lgot_result: 240ifelse(S, `true', 241` ! check to see if answer should be < 0 242 tst SIGN 243 bl,a 1f 244 ifelse(OP, `div', `neg Q', `neg R') 2451:') 246 retl 247 ifelse(OP, `div', `mov Q, %o0', `mov R, %o0') 248