1 /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and 2 write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp. If 3 qxn is non-zero, generate that many fraction limbs and append them after the 4 other quotient limbs, and update the remainder accordingly. The input 5 operands are unaffected. 6 7 Preconditions: 8 1. The most significant limb of of the divisor must be non-zero. 9 2. nn >= dn, even if qxn is non-zero. (??? relax this ???) 10 11 The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time 12 complexity of multiplication. 13 14 Copyright 1997, 2000, 2001, 2002, 2005, 2009 Free Software Foundation, Inc. 15 16 This file is part of the GNU MP Library. 17 18 The GNU MP Library is free software; you can redistribute it and/or modify 19 it under the terms of the GNU Lesser General Public License as published by 20 the Free Software Foundation; either version 3 of the License, or (at your 21 option) any later version. 22 23 The GNU MP Library is distributed in the hope that it will be useful, but 24 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 26 License for more details. 27 28 You should have received a copy of the GNU Lesser General Public License 29 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 30 31 #include "gmp.h" 32 #include "gmp-impl.h" 33 #include "longlong.h" 34 35 36 void 37 mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn, 38 mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn) 39 { 40 ASSERT_ALWAYS (qxn == 0); 41 42 ASSERT (nn >= 0); 43 ASSERT (dn >= 0); 44 ASSERT (dn == 0 || dp[dn - 1] != 0); 45 ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn)); 46 ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn)); 47 48 switch (dn) 49 { 50 case 0: 51 DIVIDE_BY_ZERO; 52 53 case 1: 54 { 55 rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]); 56 return; 57 } 58 59 case 2: 60 { 61 mp_ptr n2p, d2p; 62 mp_limb_t qhl, cy; 63 TMP_DECL; 64 TMP_MARK; 65 if ((dp[1] & GMP_NUMB_HIGHBIT) == 0) 66 { 67 int cnt; 68 mp_limb_t dtmp[2]; 69 count_leading_zeros (cnt, dp[1]); 70 cnt -= GMP_NAIL_BITS; 71 d2p = dtmp; 72 d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt)); 73 d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK; 74 n2p = TMP_ALLOC_LIMBS (nn + 1); 75 cy = mpn_lshift (n2p, np, nn, cnt); 76 n2p[nn] = cy; 77 qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p); 78 if (cy == 0) 79 qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */ 80 rp[0] = (n2p[0] >> cnt) 81 | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK); 82 rp[1] = (n2p[1] >> cnt); 83 } 84 else 85 { 86 d2p = (mp_ptr) dp; 87 n2p = TMP_ALLOC_LIMBS (nn); 88 MPN_COPY (n2p, np, nn); 89 qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p); 90 qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */ 91 rp[0] = n2p[0]; 92 rp[1] = n2p[1]; 93 } 94 TMP_FREE; 95 return; 96 } 97 98 default: 99 { 100 int adjust; 101 gmp_pi1_t dinv; 102 TMP_DECL; 103 TMP_MARK; 104 adjust = np[nn - 1] >= dp[dn - 1]; /* conservative tests for quotient size */ 105 if (nn + adjust >= 2 * dn) 106 { 107 mp_ptr n2p, d2p; 108 mp_limb_t cy; 109 int cnt; 110 111 qp[nn - dn] = 0; /* zero high quotient limb */ 112 if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */ 113 { 114 count_leading_zeros (cnt, dp[dn - 1]); 115 cnt -= GMP_NAIL_BITS; 116 d2p = TMP_ALLOC_LIMBS (dn); 117 mpn_lshift (d2p, dp, dn, cnt); 118 n2p = TMP_ALLOC_LIMBS (nn + 1); 119 cy = mpn_lshift (n2p, np, nn, cnt); 120 n2p[nn] = cy; 121 nn += adjust; 122 } 123 else 124 { 125 cnt = 0; 126 d2p = (mp_ptr) dp; 127 n2p = TMP_ALLOC_LIMBS (nn + 1); 128 MPN_COPY (n2p, np, nn); 129 n2p[nn] = 0; 130 nn += adjust; 131 } 132 133 invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]); 134 if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD)) 135 mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32); 136 else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) || /* fast condition */ 137 BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */ 138 (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */ 139 + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn) /* ...condition */ 140 mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv); 141 else 142 { 143 mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0); 144 mp_ptr scratch = TMP_ALLOC_LIMBS (itch); 145 mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch); 146 n2p = rp; 147 } 148 149 if (cnt != 0) 150 mpn_rshift (rp, n2p, dn, cnt); 151 else 152 MPN_COPY (rp, n2p, dn); 153 TMP_FREE; 154 return; 155 } 156 157 /* When we come here, the numerator/partial remainder is less 158 than twice the size of the denominator. */ 159 160 { 161 /* Problem: 162 163 Divide a numerator N with nn limbs by a denominator D with dn 164 limbs forming a quotient of qn=nn-dn+1 limbs. When qn is small 165 compared to dn, conventional division algorithms perform poorly. 166 We want an algorithm that has an expected running time that is 167 dependent only on qn. 168 169 Algorithm (very informally stated): 170 171 1) Divide the 2 x qn most significant limbs from the numerator 172 by the qn most significant limbs from the denominator. Call 173 the result qest. This is either the correct quotient, but 174 might be 1 or 2 too large. Compute the remainder from the 175 division. (This step is implemented by a mpn_divrem call.) 176 177 2) Is the most significant limb from the remainder < p, where p 178 is the product of the most significant limb from the quotient 179 and the next(d)? (Next(d) denotes the next ignored limb from 180 the denominator.) If it is, decrement qest, and adjust the 181 remainder accordingly. 182 183 3) Is the remainder >= qest? If it is, qest is the desired 184 quotient. The algorithm terminates. 185 186 4) Subtract qest x next(d) from the remainder. If there is 187 borrow out, decrement qest, and adjust the remainder 188 accordingly. 189 190 5) Skip one word from the denominator (i.e., let next(d) denote 191 the next less significant limb. */ 192 193 mp_size_t qn; 194 mp_ptr n2p, d2p; 195 mp_ptr tp; 196 mp_limb_t cy; 197 mp_size_t in, rn; 198 mp_limb_t quotient_too_large; 199 unsigned int cnt; 200 201 qn = nn - dn; 202 qp[qn] = 0; /* zero high quotient limb */ 203 qn += adjust; /* qn cannot become bigger */ 204 205 if (qn == 0) 206 { 207 MPN_COPY (rp, np, dn); 208 TMP_FREE; 209 return; 210 } 211 212 in = dn - qn; /* (at least partially) ignored # of limbs in ops */ 213 /* Normalize denominator by shifting it to the left such that its 214 most significant bit is set. Then shift the numerator the same 215 amount, to mathematically preserve quotient. */ 216 if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) 217 { 218 count_leading_zeros (cnt, dp[dn - 1]); 219 cnt -= GMP_NAIL_BITS; 220 221 d2p = TMP_ALLOC_LIMBS (qn); 222 mpn_lshift (d2p, dp + in, qn, cnt); 223 d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt); 224 225 n2p = TMP_ALLOC_LIMBS (2 * qn + 1); 226 cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt); 227 if (adjust) 228 { 229 n2p[2 * qn] = cy; 230 n2p++; 231 } 232 else 233 { 234 n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt); 235 } 236 } 237 else 238 { 239 cnt = 0; 240 d2p = (mp_ptr) dp + in; 241 242 n2p = TMP_ALLOC_LIMBS (2 * qn + 1); 243 MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn); 244 if (adjust) 245 { 246 n2p[2 * qn] = 0; 247 n2p++; 248 } 249 } 250 251 /* Get an approximate quotient using the extracted operands. */ 252 if (qn == 1) 253 { 254 mp_limb_t q0, r0; 255 udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS); 256 n2p[0] = r0 >> GMP_NAIL_BITS; 257 qp[0] = q0; 258 } 259 else if (qn == 2) 260 mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */ 261 else 262 { 263 invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]); 264 if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD)) 265 mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32); 266 else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD)) 267 mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv); 268 else 269 { 270 mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0); 271 mp_ptr scratch = TMP_ALLOC_LIMBS (itch); 272 mp_ptr r2p = rp; 273 if (np == r2p) /* If N and R share space, put ... */ 274 r2p += nn - qn; /* intermediate remainder at N's upper end. */ 275 mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch); 276 MPN_COPY (n2p, r2p, qn); 277 } 278 } 279 280 rn = qn; 281 /* Multiply the first ignored divisor limb by the most significant 282 quotient limb. If that product is > the partial remainder's 283 most significant limb, we know the quotient is too large. This 284 test quickly catches most cases where the quotient is too large; 285 it catches all cases where the quotient is 2 too large. */ 286 { 287 mp_limb_t dl, x; 288 mp_limb_t h, dummy; 289 290 if (in - 2 < 0) 291 dl = 0; 292 else 293 dl = dp[in - 2]; 294 295 #if GMP_NAIL_BITS == 0 296 x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS)); 297 #else 298 x = (dp[in - 1] << cnt) & GMP_NUMB_MASK; 299 if (cnt != 0) 300 x |= dl >> (GMP_NUMB_BITS - cnt); 301 #endif 302 umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS); 303 304 if (n2p[qn - 1] < h) 305 { 306 mp_limb_t cy; 307 308 mpn_decr_u (qp, (mp_limb_t) 1); 309 cy = mpn_add_n (n2p, n2p, d2p, qn); 310 if (cy) 311 { 312 /* The partial remainder is safely large. */ 313 n2p[qn] = cy; 314 ++rn; 315 } 316 } 317 } 318 319 quotient_too_large = 0; 320 if (cnt != 0) 321 { 322 mp_limb_t cy1, cy2; 323 324 /* Append partially used numerator limb to partial remainder. */ 325 cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt); 326 n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt); 327 328 /* Update partial remainder with partially used divisor limb. */ 329 cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt)); 330 if (qn != rn) 331 { 332 ASSERT_ALWAYS (n2p[qn] >= cy2); 333 n2p[qn] -= cy2; 334 } 335 else 336 { 337 n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */ 338 339 quotient_too_large = (cy1 < cy2); 340 ++rn; 341 } 342 --in; 343 } 344 /* True: partial remainder now is neutral, i.e., it is not shifted up. */ 345 346 tp = TMP_ALLOC_LIMBS (dn); 347 348 if (in < qn) 349 { 350 if (in == 0) 351 { 352 MPN_COPY (rp, n2p, rn); 353 ASSERT_ALWAYS (rn == dn); 354 goto foo; 355 } 356 mpn_mul (tp, qp, qn, dp, in); 357 } 358 else 359 mpn_mul (tp, dp, in, qp, qn); 360 361 cy = mpn_sub (n2p, n2p, rn, tp + in, qn); 362 MPN_COPY (rp + in, n2p, dn - in); 363 quotient_too_large |= cy; 364 cy = mpn_sub_n (rp, np, tp, in); 365 cy = mpn_sub_1 (rp + in, rp + in, rn, cy); 366 quotient_too_large |= cy; 367 foo: 368 if (quotient_too_large) 369 { 370 mpn_decr_u (qp, (mp_limb_t) 1); 371 mpn_add_n (rp, rp, dp, dn); 372 } 373 } 374 TMP_FREE; 375 return; 376 } 377 } 378 } 379