1 /*- 2 * Copyright (c) 1992, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This software was developed by the Computer Systems Engineering group 6 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 7 * contributed to Berkeley. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), 36 * section 4.3.1, pp. 257--259. 37 */ 38 39 #include "quad.h" 40 41 #define B ((int)1 << HALF_BITS) /* digit base */ 42 43 /* Combine two `digits' to make a single two-digit number. */ 44 #define COMBINE(a, b) (((u_int)(a) << HALF_BITS) | (b)) 45 46 /* select a type for digits in base B: use unsigned short if they fit */ 47 #if UINT_MAX == 0xffffffffU && USHRT_MAX >= 0xffff 48 typedef unsigned short digit; 49 #else 50 typedef u_int digit; 51 #endif 52 53 static void shl(digit *p, int len, int sh); 54 55 /* 56 * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. 57 * 58 * We do this in base 2-sup-HALF_BITS, so that all intermediate products 59 * fit within u_int. As a consequence, the maximum length dividend and 60 * divisor are 4 `digits' in this base (they are shorter if they have 61 * leading zeros). 62 */ 63 u_quad_t 64 __qdivrem(u_quad_t uq, u_quad_t vq, u_quad_t *arq) 65 { 66 union uu tmp; 67 digit *u, *v, *q; 68 digit v1, v2; 69 u_int qhat, rhat, t; 70 int m, n, d, j, i; 71 digit uspace[5], vspace[5], qspace[5]; 72 73 /* 74 * Take care of special cases: divide by zero, and u < v. 75 */ 76 if (vq == 0) { 77 /* divide by zero. */ 78 static volatile const unsigned int zero = 0; 79 80 tmp.ul[H] = tmp.ul[L] = 1 / zero; 81 if (arq) 82 *arq = uq; 83 return (tmp.q); 84 } 85 if (uq < vq) { 86 if (arq) 87 *arq = uq; 88 return (0); 89 } 90 u = &uspace[0]; 91 v = &vspace[0]; 92 q = &qspace[0]; 93 94 /* 95 * Break dividend and divisor into digits in base B, then 96 * count leading zeros to determine m and n. When done, we 97 * will have: 98 * u = (u[1]u[2]...u[m+n]) sub B 99 * v = (v[1]v[2]...v[n]) sub B 100 * v[1] != 0 101 * 1 < n <= 4 (if n = 1, we use a different division algorithm) 102 * m >= 0 (otherwise u < v, which we already checked) 103 * m + n = 4 104 * and thus 105 * m = 4 - n <= 2 106 */ 107 tmp.uq = uq; 108 u[0] = 0; 109 u[1] = (digit)HHALF(tmp.ul[H]); 110 u[2] = (digit)LHALF(tmp.ul[H]); 111 u[3] = (digit)HHALF(tmp.ul[L]); 112 u[4] = (digit)LHALF(tmp.ul[L]); 113 tmp.uq = vq; 114 v[1] = (digit)HHALF(tmp.ul[H]); 115 v[2] = (digit)LHALF(tmp.ul[H]); 116 v[3] = (digit)HHALF(tmp.ul[L]); 117 v[4] = (digit)LHALF(tmp.ul[L]); 118 for (n = 4; v[1] == 0; v++) { 119 if (--n == 1) { 120 u_int rbj; /* r*B+u[j] (not root boy jim) */ 121 digit q1, q2, q3, q4; 122 123 /* 124 * Change of plan, per exercise 16. 125 * r = 0; 126 * for j = 1..4: 127 * q[j] = floor((r*B + u[j]) / v), 128 * r = (r*B + u[j]) % v; 129 * We unroll this completely here. 130 */ 131 t = v[2]; /* nonzero, by definition */ 132 q1 = (digit)(u[1] / t); 133 rbj = COMBINE(u[1] % t, u[2]); 134 q2 = (digit)(rbj / t); 135 rbj = COMBINE(rbj % t, u[3]); 136 q3 = (digit)(rbj / t); 137 rbj = COMBINE(rbj % t, u[4]); 138 q4 = (digit)(rbj / t); 139 if (arq) 140 *arq = rbj % t; 141 tmp.ul[H] = COMBINE(q1, q2); 142 tmp.ul[L] = COMBINE(q3, q4); 143 return (tmp.q); 144 } 145 } 146 147 /* 148 * By adjusting q once we determine m, we can guarantee that 149 * there is a complete four-digit quotient at &qspace[1] when 150 * we finally stop. 151 */ 152 for (m = 4 - n; u[1] == 0; u++) 153 m--; 154 for (i = 4 - m; --i >= 0;) 155 q[i] = 0; 156 q += 4 - m; 157 158 /* 159 * Here we run Program D, translated from MIX to C and acquiring 160 * a few minor changes. 161 * 162 * D1: choose multiplier 1 << d to ensure v[1] >= B/2. 163 */ 164 d = 0; 165 for (t = v[1]; t < B / 2; t <<= 1) 166 d++; 167 if (d > 0) { 168 shl(&u[0], m + n, d); /* u <<= d */ 169 shl(&v[1], n - 1, d); /* v <<= d */ 170 } 171 /* 172 * D2: j = 0. 173 */ 174 j = 0; 175 v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ 176 v2 = v[2]; /* for D3 */ 177 do { 178 digit uj0, uj1, uj2; 179 180 /* 181 * D3: Calculate qhat (\^q, in TeX notation). 182 * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and 183 * let rhat = (u[j]*B + u[j+1]) mod v[1]. 184 * While rhat < B and v[2]*qhat > rhat*B+u[j+2], 185 * decrement qhat and increase rhat correspondingly. 186 * Note that if rhat >= B, v[2]*qhat < rhat*B. 187 */ 188 uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ 189 uj1 = u[j + 1]; /* for D3 only */ 190 uj2 = u[j + 2]; /* for D3 only */ 191 if (uj0 == v1) { 192 qhat = B; 193 rhat = uj1; 194 goto qhat_too_big; 195 } else { 196 u_int nn = COMBINE(uj0, uj1); 197 qhat = nn / v1; 198 rhat = nn % v1; 199 } 200 while (v2 * qhat > COMBINE(rhat, uj2)) { 201 qhat_too_big: 202 qhat--; 203 if ((rhat += v1) >= B) 204 break; 205 } 206 /* 207 * D4: Multiply and subtract. 208 * The variable `t' holds any borrows across the loop. 209 * We split this up so that we do not require v[0] = 0, 210 * and to eliminate a final special case. 211 */ 212 for (t = 0, i = n; i > 0; i--) { 213 t = u[i + j] - v[i] * qhat - t; 214 u[i + j] = (digit)LHALF(t); 215 t = (B - HHALF(t)) & (B - 1); 216 } 217 t = u[j] - t; 218 u[j] = (digit)LHALF(t); 219 /* 220 * D5: test remainder. 221 * There is a borrow if and only if HHALF(t) is nonzero; 222 * in that (rare) case, qhat was too large (by exactly 1). 223 * Fix it by adding v[1..n] to u[j..j+n]. 224 */ 225 if (HHALF(t)) { 226 qhat--; 227 for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ 228 t += u[i + j] + v[i]; 229 u[i + j] = (digit)LHALF(t); 230 t = HHALF(t); 231 } 232 u[j] = (digit)LHALF(u[j] + t); 233 } 234 q[j] = (digit)qhat; 235 } while (++j <= m); /* D7: loop on j. */ 236 237 /* 238 * If caller wants the remainder, we have to calculate it as 239 * u[m..m+n] >> d (this is at most n digits and thus fits in 240 * u[m+1..m+n], but we may need more source digits). 241 */ 242 if (arq) { 243 if (d) { 244 for (i = m + n; i > m; --i) 245 u[i] = (digit)(((u_int)u[i] >> d) | 246 LHALF((u_int)u[i - 1] << (HALF_BITS - d))); 247 u[i] = 0; 248 } 249 tmp.ul[H] = COMBINE(uspace[1], uspace[2]); 250 tmp.ul[L] = COMBINE(uspace[3], uspace[4]); 251 *arq = tmp.q; 252 } 253 254 tmp.ul[H] = COMBINE(qspace[1], qspace[2]); 255 tmp.ul[L] = COMBINE(qspace[3], qspace[4]); 256 return (tmp.q); 257 } 258 259 /* 260 * Shift p[0]..p[len] left `sh' bits, ignoring any bits that 261 * `fall out' the left (there never will be any such anyway). 262 * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. 263 */ 264 static void 265 shl(digit *p, int len, int sh) 266 { 267 int i; 268 269 for (i = 0; i < len; i++) 270 p[i] = (digit)(LHALF((u_int)p[i] << sh) | 271 ((u_int)p[i + 1] >> (HALF_BITS - sh))); 272 p[i] = (digit)(LHALF((u_int)p[i] << sh)); 273 } 274