1 /* $Id: qdivrem.c,v 1.3 2009/03/15 00:20:42 gmcgarry Exp $ */ 2 /* $NetBSD: qdivrem.c,v 1.1 2005/12/20 19:28:51 christos Exp $ */ 3 4 /*- 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This software was developed by the Computer Systems Engineering group 9 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 10 * contributed to Berkeley. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 /* 38 * Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed), 39 * section 4.3.1, pp. 257--259. 40 */ 41 42 #include "quad.h" 43 44 #define B ((int)1 << HALF_BITS) /* digit base */ 45 46 /* Combine two `digits' to make a single two-digit number. */ 47 #define COMBINE(a, b) (((unsigned int)(a) << HALF_BITS) | (b)) 48 49 /* select a type for digits in base B: use unsigned short if they fit */ 50 #if UINT_MAX == 0xffffffffU && USHRT_MAX >= 0xffff 51 typedef unsigned short digit; 52 #else 53 typedef unsigned int digit; 54 #endif 55 56 static void shl(digit *p, int len, int sh); 57 58 /* 59 * __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v. 60 * 61 * We do this in base 2-sup-HALF_BITS, so that all intermediate products 62 * fit within unsigned int. As a consequence, the maximum length dividend and 63 * divisor are 4 `digits' in this base (they are shorter if they have 64 * leading zeros). 65 */ 66 u_quad_t 67 __qdivrem(u_quad_t uq, u_quad_t vq, u_quad_t *arq) 68 { 69 union uu tmp; 70 digit *u, *v, *q; 71 digit v1, v2; 72 unsigned int qhat, rhat, t; 73 int m, n, d, j, i; 74 digit uspace[5], vspace[5], qspace[5]; 75 76 /* 77 * Take care of special cases: divide by zero, and u < v. 78 */ 79 if (vq == 0) { 80 /* divide by zero. */ 81 static volatile const unsigned int zero = 0; 82 83 tmp.ul[H] = tmp.ul[L] = 1 / zero; 84 if (arq) 85 *arq = uq; 86 return (tmp.q); 87 } 88 if (uq < vq) { 89 if (arq) 90 *arq = uq; 91 return (0); 92 } 93 u = &uspace[0]; 94 v = &vspace[0]; 95 q = &qspace[0]; 96 97 /* 98 * Break dividend and divisor into digits in base B, then 99 * count leading zeros to determine m and n. When done, we 100 * will have: 101 * u = (u[1]u[2]...u[m+n]) sub B 102 * v = (v[1]v[2]...v[n]) sub B 103 * v[1] != 0 104 * 1 < n <= 4 (if n = 1, we use a different division algorithm) 105 * m >= 0 (otherwise u < v, which we already checked) 106 * m + n = 4 107 * and thus 108 * m = 4 - n <= 2 109 */ 110 tmp.uq = uq; 111 u[0] = 0; 112 u[1] = (digit)HHALF(tmp.ul[H]); 113 u[2] = (digit)LHALF(tmp.ul[H]); 114 u[3] = (digit)HHALF(tmp.ul[L]); 115 u[4] = (digit)LHALF(tmp.ul[L]); 116 tmp.uq = vq; 117 v[1] = (digit)HHALF(tmp.ul[H]); 118 v[2] = (digit)LHALF(tmp.ul[H]); 119 v[3] = (digit)HHALF(tmp.ul[L]); 120 v[4] = (digit)LHALF(tmp.ul[L]); 121 for (n = 4; v[1] == 0; v++) { 122 if (--n == 1) { 123 unsigned int rbj; /* r*B+u[j] (not root boy jim) */ 124 digit q1, q2, q3, q4; 125 126 /* 127 * Change of plan, per exercise 16. 128 * r = 0; 129 * for j = 1..4: 130 * q[j] = floor((r*B + u[j]) / v), 131 * r = (r*B + u[j]) % v; 132 * We unroll this completely here. 133 */ 134 t = v[2]; /* nonzero, by definition */ 135 q1 = (digit)(u[1] / t); 136 rbj = COMBINE(u[1] % t, u[2]); 137 q2 = (digit)(rbj / t); 138 rbj = COMBINE(rbj % t, u[3]); 139 q3 = (digit)(rbj / t); 140 rbj = COMBINE(rbj % t, u[4]); 141 q4 = (digit)(rbj / t); 142 if (arq) 143 *arq = rbj % t; 144 tmp.ul[H] = COMBINE(q1, q2); 145 tmp.ul[L] = COMBINE(q3, q4); 146 return (tmp.q); 147 } 148 } 149 150 /* 151 * By adjusting q once we determine m, we can guarantee that 152 * there is a complete four-digit quotient at &qspace[1] when 153 * we finally stop. 154 */ 155 for (m = 4 - n; u[1] == 0; u++) 156 m--; 157 for (i = 4 - m; --i >= 0;) 158 q[i] = 0; 159 q += 4 - m; 160 161 /* 162 * Here we run Program D, translated from MIX to C and acquiring 163 * a few minor changes. 164 * 165 * D1: choose multiplier 1 << d to ensure v[1] >= B/2. 166 */ 167 d = 0; 168 for (t = v[1]; t < B / 2; t <<= 1) 169 d++; 170 if (d > 0) { 171 shl(&u[0], m + n, d); /* u <<= d */ 172 shl(&v[1], n - 1, d); /* v <<= d */ 173 } 174 /* 175 * D2: j = 0. 176 */ 177 j = 0; 178 v1 = v[1]; /* for D3 -- note that v[1..n] are constant */ 179 v2 = v[2]; /* for D3 */ 180 do { 181 digit uj0, uj1, uj2; 182 183 /* 184 * D3: Calculate qhat (\^q, in TeX notation). 185 * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and 186 * let rhat = (u[j]*B + u[j+1]) mod v[1]. 187 * While rhat < B and v[2]*qhat > rhat*B+u[j+2], 188 * decrement qhat and increase rhat correspondingly. 189 * Note that if rhat >= B, v[2]*qhat < rhat*B. 190 */ 191 uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */ 192 uj1 = u[j + 1]; /* for D3 only */ 193 uj2 = u[j + 2]; /* for D3 only */ 194 if (uj0 == v1) { 195 qhat = B; 196 rhat = uj1; 197 goto qhat_too_big; 198 } else { 199 unsigned int nn = COMBINE(uj0, uj1); 200 qhat = nn / v1; 201 rhat = nn % v1; 202 } 203 while (v2 * qhat > COMBINE(rhat, uj2)) { 204 qhat_too_big: 205 qhat--; 206 if ((rhat += v1) >= B) 207 break; 208 } 209 /* 210 * D4: Multiply and subtract. 211 * The variable `t' holds any borrows across the loop. 212 * We split this up so that we do not require v[0] = 0, 213 * and to eliminate a final special case. 214 */ 215 for (t = 0, i = n; i > 0; i--) { 216 t = u[i + j] - v[i] * qhat - t; 217 u[i + j] = (digit)LHALF(t); 218 t = (B - HHALF(t)) & (B - 1); 219 } 220 t = u[j] - t; 221 u[j] = (digit)LHALF(t); 222 /* 223 * D5: test remainder. 224 * There is a borrow if and only if HHALF(t) is nonzero; 225 * in that (rare) case, qhat was too large (by exactly 1). 226 * Fix it by adding v[1..n] to u[j..j+n]. 227 */ 228 if (HHALF(t)) { 229 qhat--; 230 for (t = 0, i = n; i > 0; i--) { /* D6: add back. */ 231 t += u[i + j] + v[i]; 232 u[i + j] = (digit)LHALF(t); 233 t = HHALF(t); 234 } 235 u[j] = (digit)LHALF(u[j] + t); 236 } 237 q[j] = (digit)qhat; 238 } while (++j <= m); /* D7: loop on j. */ 239 240 /* 241 * If caller wants the remainder, we have to calculate it as 242 * u[m..m+n] >> d (this is at most n digits and thus fits in 243 * u[m+1..m+n], but we may need more source digits). 244 */ 245 if (arq) { 246 if (d) { 247 for (i = m + n; i > m; --i) 248 u[i] = (digit)(((unsigned int)u[i] >> d) | 249 LHALF((unsigned int)u[i - 1] << (HALF_BITS - d))); 250 u[i] = 0; 251 } 252 tmp.ul[H] = COMBINE(uspace[1], uspace[2]); 253 tmp.ul[L] = COMBINE(uspace[3], uspace[4]); 254 *arq = tmp.q; 255 } 256 257 tmp.ul[H] = COMBINE(qspace[1], qspace[2]); 258 tmp.ul[L] = COMBINE(qspace[3], qspace[4]); 259 return (tmp.q); 260 } 261 262 /* 263 * Shift p[0]..p[len] left `sh' bits, ignoring any bits that 264 * `fall out' the left (there never will be any such anyway). 265 * We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS. 266 */ 267 static void 268 shl(digit *p, int len, int sh) 269 { 270 int i; 271 272 for (i = 0; i < len; i++) 273 p[i] = (digit)(LHALF((unsigned int)p[i] << sh) | 274 ((unsigned int)p[i + 1] >> (HALF_BITS - sh))); 275 p[i] = (digit)(LHALF((unsigned int)p[i] << sh)); 276 } 277