1 /* 2 * Copyright (c) 1992 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This software was developed by the Computer Systems Engineering group 6 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 7 * contributed to Berkeley. 8 * 9 * All advertising materials mentioning features or use of this software 10 * must display the following acknowledgement: 11 * This product includes software developed by the University of 12 * California, Lawrence Berkeley Laboratories. 13 * 14 * %sccs.include.redist.c% 15 * 16 * @(#)fpu_div.c 7.3 (Berkeley) 10/11/92 17 * 18 * from: $Header: fpu_div.c,v 1.2 92/06/17 05:41:30 torek Exp $ 19 */ 20 21 /* 22 * Perform an FPU divide (return x / y). 23 */ 24 25 #include <sys/types.h> 26 27 #include <machine/reg.h> 28 29 #include <sparc/fpu/fpu_arith.h> 30 #include <sparc/fpu/fpu_emu.h> 31 32 /* 33 * Division of normal numbers is done as follows: 34 * 35 * x and y are floating point numbers, i.e., in the form 1.bbbb * 2^e. 36 * If X and Y are the mantissas (1.bbbb's), the quotient is then: 37 * 38 * q = (X / Y) * 2^((x exponent) - (y exponent)) 39 * 40 * Since X and Y are both in [1.0,2.0), the quotient's mantissa (X / Y) 41 * will be in [0.5,2.0). Moreover, it will be less than 1.0 if and only 42 * if X < Y. In that case, it will have to be shifted left one bit to 43 * become a normal number, and the exponent decremented. Thus, the 44 * desired exponent is: 45 * 46 * left_shift = x->fp_mant < y->fp_mant; 47 * result_exp = x->fp_exp - y->fp_exp - left_shift; 48 * 49 * The quotient mantissa X/Y can then be computed one bit at a time 50 * using the following algorithm: 51 * 52 * Q = 0; -- Initial quotient. 53 * R = X; -- Initial remainder, 54 * if (left_shift) -- but fixed up in advance. 55 * R *= 2; 56 * for (bit = FP_NMANT; --bit >= 0; R *= 2) { 57 * if (R >= Y) { 58 * Q |= 1 << bit; 59 * R -= Y; 60 * } 61 * } 62 * 63 * The subtraction R -= Y always removes the uppermost bit from R (and 64 * can sometimes remove additional lower-order 1 bits); this proof is 65 * left to the reader. 66 * 67 * This loop correctly calculates the guard and round bits since they are 68 * included in the expanded internal representation. The sticky bit 69 * is to be set if and only if any other bits beyond guard and round 70 * would be set. From the above it is obvious that this is true if and 71 * only if the remainder R is nonzero when the loop terminates. 72 * 73 * Examining the loop above, we can see that the quotient Q is built 74 * one bit at a time ``from the top down''. This means that we can 75 * dispense with the multi-word arithmetic and just build it one word 76 * at a time, writing each result word when it is done. 77 * 78 * Furthermore, since X and Y are both in [1.0,2.0), we know that, 79 * initially, R >= Y. (Recall that, if X < Y, R is set to X * 2 and 80 * is therefore at in [2.0,4.0).) Thus Q is sure to have bit FP_NMANT-1 81 * set, and R can be set initially to either X - Y (when X >= Y) or 82 * 2X - Y (when X < Y). In addition, comparing R and Y is difficult, 83 * so we will simply calculate R - Y and see if that underflows. 84 * This leads to the following revised version of the algorithm: 85 * 86 * R = X; 87 * bit = FP_1; 88 * D = R - Y; 89 * if (D >= 0) { 90 * result_exp = x->fp_exp - y->fp_exp; 91 * R = D; 92 * q = bit; 93 * bit >>= 1; 94 * } else { 95 * result_exp = x->fp_exp - y->fp_exp - 1; 96 * q = 0; 97 * } 98 * R <<= 1; 99 * do { 100 * D = R - Y; 101 * if (D >= 0) { 102 * q |= bit; 103 * R = D; 104 * } 105 * R <<= 1; 106 * } while ((bit >>= 1) != 0); 107 * Q[0] = q; 108 * for (i = 1; i < 4; i++) { 109 * q = 0, bit = 1 << 31; 110 * do { 111 * D = R - Y; 112 * if (D >= 0) { 113 * q |= bit; 114 * R = D; 115 * } 116 * R <<= 1; 117 * } while ((bit >>= 1) != 0); 118 * Q[i] = q; 119 * } 120 * 121 * This can be refined just a bit further by moving the `R <<= 1' 122 * calculations to the front of the do-loops and eliding the first one. 123 * The process can be terminated immediately whenever R becomes 0, but 124 * this is relatively rare, and we do not bother. 125 */ 126 127 struct fpn * 128 fpu_div(fe) 129 register struct fpemu *fe; 130 { 131 register struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2; 132 register u_int q, bit; 133 register u_int r0, r1, r2, r3, d0, d1, d2, d3, y0, y1, y2, y3; 134 FPU_DECL_CARRY 135 136 /* 137 * Since divide is not commutative, we cannot just use ORDER. 138 * Check either operand for NaN first; if there is at least one, 139 * order the signalling one (if only one) onto the right, then 140 * return it. Otherwise we have the following cases: 141 * 142 * Inf / Inf = NaN, plus NV exception 143 * Inf / num = Inf [i.e., return x] 144 * Inf / 0 = Inf [i.e., return x] 145 * 0 / Inf = 0 [i.e., return x] 146 * 0 / num = 0 [i.e., return x] 147 * 0 / 0 = NaN, plus NV exception 148 * num / Inf = 0 149 * num / num = num (do the divide) 150 * num / 0 = Inf, plus DZ exception 151 */ 152 if (ISNAN(x) || ISNAN(y)) { 153 ORDER(x, y); 154 return (y); 155 } 156 if (ISINF(x) || ISZERO(x)) { 157 if (x->fp_class == y->fp_class) 158 return (fpu_newnan(fe)); 159 return (x); 160 } 161 162 /* all results at this point use XOR of operand signs */ 163 x->fp_sign ^= y->fp_sign; 164 if (ISINF(y)) { 165 x->fp_class = FPC_ZERO; 166 return (x); 167 } 168 if (ISZERO(y)) { 169 fe->fe_cx = FSR_DZ; 170 x->fp_class = FPC_INF; 171 return (x); 172 } 173 174 /* 175 * Macros for the divide. See comments at top for algorithm. 176 * Note that we expand R, D, and Y here. 177 */ 178 179 #define SUBTRACT /* D = R - Y */ \ 180 FPU_SUBS(d3, r3, y3); FPU_SUBCS(d2, r2, y2); \ 181 FPU_SUBCS(d1, r1, y1); FPU_SUBC(d0, r0, y0) 182 183 #define NONNEGATIVE /* D >= 0 */ \ 184 ((int)d0 >= 0) 185 186 #ifdef FPU_SHL1_BY_ADD 187 #define SHL1 /* R <<= 1 */ \ 188 FPU_ADDS(r3, r3, r3); FPU_ADDCS(r2, r2, r2); \ 189 FPU_ADDCS(r1, r1, r1); FPU_ADDC(r0, r0, r0) 190 #else 191 #define SHL1 \ 192 r0 = (r0 << 1) | (r1 >> 31), r1 = (r1 << 1) | (r2 >> 31), \ 193 r2 = (r2 << 1) | (r3 >> 31), r3 <<= 1 194 #endif 195 196 #define LOOP /* do ... while (bit >>= 1) */ \ 197 do { \ 198 SHL1; \ 199 SUBTRACT; \ 200 if (NONNEGATIVE) { \ 201 q |= bit; \ 202 r0 = d0, r1 = d1, r2 = d2, r3 = d3; \ 203 } \ 204 } while ((bit >>= 1) != 0) 205 206 #define WORD(r, i) /* calculate r->fp_mant[i] */ \ 207 q = 0; \ 208 bit = 1 << 31; \ 209 LOOP; \ 210 (x)->fp_mant[i] = q 211 212 /* Setup. Note that we put our result in x. */ 213 r0 = x->fp_mant[0]; 214 r1 = x->fp_mant[1]; 215 r2 = x->fp_mant[2]; 216 r3 = x->fp_mant[3]; 217 y0 = y->fp_mant[0]; 218 y1 = y->fp_mant[1]; 219 y2 = y->fp_mant[2]; 220 y3 = y->fp_mant[3]; 221 222 bit = FP_1; 223 SUBTRACT; 224 if (NONNEGATIVE) { 225 x->fp_exp -= y->fp_exp; 226 r0 = d0, r1 = d1, r2 = d2, r3 = d3; 227 q = bit; 228 bit >>= 1; 229 } else { 230 x->fp_exp -= y->fp_exp + 1; 231 q = 0; 232 } 233 LOOP; 234 x->fp_mant[0] = q; 235 WORD(x, 1); 236 WORD(x, 2); 237 WORD(x, 3); 238 x->fp_sticky = r0 | r1 | r2 | r3; 239 240 return (x); 241 } 242