1 /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and 2 write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp. If 3 qxn is non-zero, generate that many fraction limbs and append them after the 4 other quotient limbs, and update the remainder accordningly. The input 5 operands are unaffected. 6 7 Preconditions: 8 1. The most significant limb of of the divisor must be non-zero. 9 2. No argument overlap is permitted. (??? relax this ???) 10 3. nn >= dn, even if qxn is non-zero. (??? relax this ???) 11 12 The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time 13 complexity of multiplication. 14 15 Copyright 1997, 2000, 2001, 2002, 2005 Free Software Foundation, Inc. 16 17 This file is part of the GNU MP Library. 18 19 The GNU MP Library is free software; you can redistribute it and/or modify 20 it under the terms of the GNU Lesser General Public License as published by 21 the Free Software Foundation; either version 3 of the License, or (at your 22 option) any later version. 23 24 The GNU MP Library is distributed in the hope that it will be useful, but 25 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 27 License for more details. 28 29 You should have received a copy of the GNU Lesser General Public License 30 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 31 32 #include "gmp.h" 33 #include "gmp-impl.h" 34 #include "longlong.h" 35 36 37 void 38 mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn, 39 mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) 40 { 41 /* FIXME: 42 1. qxn 43 2. pass allocated storage in additional parameter? 44 */ 45 ASSERT_ALWAYS (qxn == 0); 46 47 ASSERT (qxn >= 0); 48 ASSERT (nn >= 0); 49 ASSERT (dn >= 0); 50 ASSERT (dn == 0 || dp[dn - 1] != 0); 51 ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn)); 52 ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn)); 53 54 switch (dn) 55 { 56 case 0: 57 DIVIDE_BY_ZERO; 58 59 case 1: 60 { 61 rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]); 62 return; 63 } 64 65 case 2: 66 { 67 mp_ptr n2p, d2p; 68 mp_limb_t qhl, cy; 69 TMP_DECL; 70 TMP_MARK; 71 if ((dp[1] & GMP_NUMB_HIGHBIT) == 0) 72 { 73 int cnt; 74 mp_limb_t dtmp[2]; 75 count_leading_zeros (cnt, dp[1]); 76 cnt -= GMP_NAIL_BITS; 77 d2p = dtmp; 78 d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt)); 79 d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK; 80 n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB); 81 cy = mpn_lshift (n2p, np, nn, cnt); 82 n2p[nn] = cy; 83 qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p); 84 if (cy == 0) 85 qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */ 86 rp[0] = (n2p[0] >> cnt) 87 | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK); 88 rp[1] = (n2p[1] >> cnt); 89 } 90 else 91 { 92 d2p = (mp_ptr) dp; 93 n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB); 94 MPN_COPY (n2p, np, nn); 95 qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p); 96 qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */ 97 rp[0] = n2p[0]; 98 rp[1] = n2p[1]; 99 } 100 TMP_FREE; 101 return; 102 } 103 104 default: 105 { 106 int adjust; 107 TMP_DECL; 108 TMP_MARK; 109 adjust = np[nn - 1] >= dp[dn - 1]; /* conservative tests for quotient size */ 110 if (nn + adjust >= 2 * dn) 111 { 112 mp_ptr n2p, d2p, q2p; 113 mp_limb_t cy; 114 int cnt; 115 116 qp[nn - dn] = 0; /* zero high quotient limb */ 117 if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */ 118 { 119 count_leading_zeros (cnt, dp[dn - 1]); 120 cnt -= GMP_NAIL_BITS; 121 d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB); 122 mpn_lshift (d2p, dp, dn, cnt); 123 n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB); 124 cy = mpn_lshift (n2p, np, nn, cnt); 125 n2p[nn] = cy; 126 nn += adjust; 127 } 128 else 129 { 130 cnt = 0; 131 d2p = (mp_ptr) dp; 132 n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB); 133 MPN_COPY (n2p, np, nn); 134 n2p[nn] = 0; 135 nn += adjust; 136 } 137 138 if (dn < DIV_DC_THRESHOLD) 139 mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn); 140 else 141 { 142 /* Divide 2*dn / dn limbs as long as the limbs in np last. */ 143 q2p = qp + nn - dn; 144 n2p += nn - dn; 145 do 146 { 147 q2p -= dn; n2p -= dn; 148 mpn_dc_divrem_n (q2p, n2p, d2p, dn); 149 nn -= dn; 150 } 151 while (nn >= 2 * dn); 152 153 if (nn != dn) 154 { 155 mp_limb_t ql; 156 n2p -= nn - dn; 157 158 /* We have now dn < nn - dn < 2dn. Make a recursive call, 159 since falling out to the code below isn't pretty. 160 Unfortunately, mpn_tdiv_qr returns nn-dn+1 quotient 161 limbs, which would overwrite one already generated 162 quotient limbs. Preserve it with an ugly hack. */ 163 /* FIXME: This suggests that we should have an 164 mpn_tdiv_qr_internal that instead returns the most 165 significant quotient limb and move the meat of this 166 function there. */ 167 /* FIXME: Perhaps call mpn_sb_divrem_mn here for certain 168 operand ranges, to decrease overhead for small 169 operands? */ 170 ql = qp[nn - dn]; /* preserve quotient limb... */ 171 mpn_tdiv_qr (qp, n2p, 0L, n2p, nn, d2p, dn); 172 qp[nn - dn] = ql; /* ...restore it again */ 173 } 174 } 175 176 177 if (cnt != 0) 178 mpn_rshift (rp, n2p, dn, cnt); 179 else 180 MPN_COPY (rp, n2p, dn); 181 TMP_FREE; 182 return; 183 } 184 185 /* When we come here, the numerator/partial remainder is less 186 than twice the size of the denominator. */ 187 188 { 189 /* Problem: 190 191 Divide a numerator N with nn limbs by a denominator D with dn 192 limbs forming a quotient of qn=nn-dn+1 limbs. When qn is small 193 compared to dn, conventional division algorithms perform poorly. 194 We want an algorithm that has an expected running time that is 195 dependent only on qn. 196 197 Algorithm (very informally stated): 198 199 1) Divide the 2 x qn most significant limbs from the numerator 200 by the qn most significant limbs from the denominator. Call 201 the result qest. This is either the correct quotient, but 202 might be 1 or 2 too large. Compute the remainder from the 203 division. (This step is implemented by a mpn_divrem call.) 204 205 2) Is the most significant limb from the remainder < p, where p 206 is the product of the most significant limb from the quotient 207 and the next(d)? (Next(d) denotes the next ignored limb from 208 the denominator.) If it is, decrement qest, and adjust the 209 remainder accordingly. 210 211 3) Is the remainder >= qest? If it is, qest is the desired 212 quotient. The algorithm terminates. 213 214 4) Subtract qest x next(d) from the remainder. If there is 215 borrow out, decrement qest, and adjust the remainder 216 accordingly. 217 218 5) Skip one word from the denominator (i.e., let next(d) denote 219 the next less significant limb. */ 220 221 mp_size_t qn; 222 mp_ptr n2p, d2p; 223 mp_ptr tp; 224 mp_limb_t cy; 225 mp_size_t in, rn; 226 mp_limb_t quotient_too_large; 227 unsigned int cnt; 228 229 qn = nn - dn; 230 qp[qn] = 0; /* zero high quotient limb */ 231 qn += adjust; /* qn cannot become bigger */ 232 233 if (qn == 0) 234 { 235 MPN_COPY (rp, np, dn); 236 TMP_FREE; 237 return; 238 } 239 240 in = dn - qn; /* (at least partially) ignored # of limbs in ops */ 241 /* Normalize denominator by shifting it to the left such that its 242 most significant bit is set. Then shift the numerator the same 243 amount, to mathematically preserve quotient. */ 244 if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) 245 { 246 count_leading_zeros (cnt, dp[dn - 1]); 247 cnt -= GMP_NAIL_BITS; 248 249 d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB); 250 mpn_lshift (d2p, dp + in, qn, cnt); 251 d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt); 252 253 n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB); 254 cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt); 255 if (adjust) 256 { 257 n2p[2 * qn] = cy; 258 n2p++; 259 } 260 else 261 { 262 n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt); 263 } 264 } 265 else 266 { 267 cnt = 0; 268 d2p = (mp_ptr) dp + in; 269 270 n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB); 271 MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn); 272 if (adjust) 273 { 274 n2p[2 * qn] = 0; 275 n2p++; 276 } 277 } 278 279 /* Get an approximate quotient using the extracted operands. */ 280 if (qn == 1) 281 { 282 mp_limb_t q0, r0; 283 mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0; 284 /* Due to a gcc 2.7.2.3 reload pass bug, we have to use some 285 temps here. This doesn't hurt code quality on any machines 286 so we do it unconditionally. */ 287 gcc272bug_n1 = n2p[1]; 288 gcc272bug_n0 = n2p[0]; 289 gcc272bug_d0 = d2p[0]; 290 udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0 << GMP_NAIL_BITS, 291 gcc272bug_d0 << GMP_NAIL_BITS); 292 r0 >>= GMP_NAIL_BITS; 293 n2p[0] = r0; 294 qp[0] = q0; 295 } 296 else if (qn == 2) 297 mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); 298 else if (qn < DIV_DC_THRESHOLD) 299 mpn_sb_divrem_mn (qp, n2p, 2 * qn, d2p, qn); 300 else 301 mpn_dc_divrem_n (qp, n2p, d2p, qn); 302 303 rn = qn; 304 /* Multiply the first ignored divisor limb by the most significant 305 quotient limb. If that product is > the partial remainder's 306 most significant limb, we know the quotient is too large. This 307 test quickly catches most cases where the quotient is too large; 308 it catches all cases where the quotient is 2 too large. */ 309 { 310 mp_limb_t dl, x; 311 mp_limb_t h, dummy; 312 313 if (in - 2 < 0) 314 dl = 0; 315 else 316 dl = dp[in - 2]; 317 318 #if GMP_NAIL_BITS == 0 319 x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % BITS_PER_MP_LIMB)); 320 #else 321 x = (dp[in - 1] << cnt) & GMP_NUMB_MASK; 322 if (cnt != 0) 323 x |= dl >> (GMP_NUMB_BITS - cnt); 324 #endif 325 umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS); 326 327 if (n2p[qn - 1] < h) 328 { 329 mp_limb_t cy; 330 331 mpn_decr_u (qp, (mp_limb_t) 1); 332 cy = mpn_add_n (n2p, n2p, d2p, qn); 333 if (cy) 334 { 335 /* The partial remainder is safely large. */ 336 n2p[qn] = cy; 337 ++rn; 338 } 339 } 340 } 341 342 quotient_too_large = 0; 343 if (cnt != 0) 344 { 345 mp_limb_t cy1, cy2; 346 347 /* Append partially used numerator limb to partial remainder. */ 348 cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt); 349 n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt); 350 351 /* Update partial remainder with partially used divisor limb. */ 352 cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt)); 353 if (qn != rn) 354 { 355 ASSERT_ALWAYS (n2p[qn] >= cy2); 356 n2p[qn] -= cy2; 357 } 358 else 359 { 360 n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */ 361 362 quotient_too_large = (cy1 < cy2); 363 ++rn; 364 } 365 --in; 366 } 367 /* True: partial remainder now is neutral, i.e., it is not shifted up. */ 368 369 tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB); 370 371 if (in < qn) 372 { 373 if (in == 0) 374 { 375 MPN_COPY (rp, n2p, rn); 376 ASSERT_ALWAYS (rn == dn); 377 goto foo; 378 } 379 mpn_mul (tp, qp, qn, dp, in); 380 } 381 else 382 mpn_mul (tp, dp, in, qp, qn); 383 384 cy = mpn_sub (n2p, n2p, rn, tp + in, qn); 385 MPN_COPY (rp + in, n2p, dn - in); 386 quotient_too_large |= cy; 387 cy = mpn_sub_n (rp, np, tp, in); 388 cy = mpn_sub_1 (rp + in, rp + in, rn, cy); 389 quotient_too_large |= cy; 390 foo: 391 if (quotient_too_large) 392 { 393 mpn_decr_u (qp, (mp_limb_t) 1); 394 mpn_add_n (rp, rp, dp, dn); 395 } 396 } 397 TMP_FREE; 398 return; 399 } 400 } 401 } 402