1 /* mpn_div_q -- division for arbitrary size operands. 2 3 Contributed to the GNU project by Torbjorn Granlund. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE. 8 9 Copyright 2009, 2010 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of the GNU Lesser General Public License as published by 15 the Free Software Foundation; either version 3 of the License, or (at your 16 option) any later version. 17 18 The GNU MP Library is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 20 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 21 License for more details. 22 23 You should have received a copy of the GNU Lesser General Public License 24 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 25 26 #include "gmp.h" 27 #include "gmp-impl.h" 28 #include "longlong.h" 29 30 31 /* Compute Q = N/D with truncation. 32 N = {np,nn} 33 D = {dp,dn} 34 Q = {qp,nn-dn+1} 35 T = {scratch,nn+1} is scratch space 36 N and D are both untouched by the computation. 37 N and T may overlap; pass the same space if N is irrelevant after the call, 38 but note that tp needs an extra limb. 39 40 Operand requirements: 41 N >= D > 0 42 dp[dn-1] != 0 43 No overlap between the N, D, and Q areas. 44 45 This division function does not clobber its input operands, since it is 46 intended to support average-O(qn) division, and for that to be effective, it 47 cannot put requirements on callers to copy a O(nn) operand. 48 49 If a caller does not care about the value of {np,nn+1} after calling this 50 function, it should pass np also for the scratch argument. This function 51 will then save some time and space by avoiding allocation and copying. 52 (FIXME: Is this a good design? We only really save any copying for 53 already-normalised divisors, which should be rare. It also prevents us from 54 reasonably asking for all scratch space we need.) 55 56 We write nn-dn+1 limbs for the quotient, but return void. Why not return 57 the most significant quotient limb? Look at the 4 main code blocks below 58 (consisting of an outer if-else where each arm contains an if-else). It is 59 tricky for the first code block, since the mpn_*_div_q calls will typically 60 generate all nn-dn+1 and return 0 or 1. I don't see how to fix that unless 61 we generate the most significant quotient limb here, before calling 62 mpn_*_div_q, or put the quotient in a temporary area. Since this is a 63 critical division case (the SB sub-case in particular) copying is not a good 64 idea. 65 66 It might make sense to split the if-else parts of the (qn + FUDGE 67 >= dn) blocks into separate functions, since we could promise quite 68 different things to callers in these two cases. The 'then' case 69 benefits from np=scratch, and it could perhaps even tolerate qp=np, 70 saving some headache for many callers. 71 72 FIXME: Scratch allocation leaves a lot to be desired. E.g., for the MU size 73 operands, we do not reuse the huge scratch for adjustments. This can be a 74 serious waste of memory for the largest operands. 75 */ 76 77 /* FUDGE determines when to try getting an approximate quotient from the upper 78 parts of the dividend and divisor, then adjust. N.B. FUDGE must be >= 2 79 for the code to be correct. */ 80 #define FUDGE 5 /* FIXME: tune this */ 81 82 #define DC_DIV_Q_THRESHOLD DC_DIVAPPR_Q_THRESHOLD 83 #define MU_DIV_Q_THRESHOLD MU_DIVAPPR_Q_THRESHOLD 84 #define MUPI_DIV_Q_THRESHOLD MUPI_DIVAPPR_Q_THRESHOLD 85 #ifndef MUPI_DIVAPPR_Q_THRESHOLD 86 #define MUPI_DIVAPPR_Q_THRESHOLD MUPI_DIV_QR_THRESHOLD 87 #endif 88 89 void 90 mpn_div_q (mp_ptr qp, 91 mp_srcptr np, mp_size_t nn, 92 mp_srcptr dp, mp_size_t dn, mp_ptr scratch) 93 { 94 mp_ptr new_dp, new_np, tp, rp; 95 mp_limb_t cy, dh, qh; 96 mp_size_t new_nn, qn; 97 gmp_pi1_t dinv; 98 int cnt; 99 TMP_DECL; 100 TMP_MARK; 101 102 ASSERT (nn >= dn); 103 ASSERT (dn > 0); 104 ASSERT (dp[dn - 1] != 0); 105 ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, np, nn)); 106 ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1, dp, dn)); 107 ASSERT (MPN_SAME_OR_SEPARATE_P (np, scratch, nn)); 108 109 ASSERT_ALWAYS (FUDGE >= 2); 110 111 if (dn == 1) 112 { 113 mpn_divrem_1 (qp, 0L, np, nn, dp[dn - 1]); 114 return; 115 } 116 117 qn = nn - dn + 1; /* Quotient size, high limb might be zero */ 118 119 if (qn + FUDGE >= dn) 120 { 121 /* |________________________| 122 |_______| */ 123 new_np = scratch; 124 125 dh = dp[dn - 1]; 126 if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0)) 127 { 128 count_leading_zeros (cnt, dh); 129 130 cy = mpn_lshift (new_np, np, nn, cnt); 131 new_np[nn] = cy; 132 new_nn = nn + (cy != 0); 133 134 new_dp = TMP_ALLOC_LIMBS (dn); 135 mpn_lshift (new_dp, dp, dn, cnt); 136 137 if (dn == 2) 138 { 139 qh = mpn_divrem_2 (qp, 0L, new_np, new_nn, new_dp); 140 } 141 else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) || 142 BELOW_THRESHOLD (new_nn - dn, DC_DIV_Q_THRESHOLD)) 143 { 144 invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]); 145 qh = mpn_sbpi1_div_q (qp, new_np, new_nn, new_dp, dn, dinv.inv32); 146 } 147 else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */ 148 BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */ 149 (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */ 150 + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */ 151 { 152 invert_pi1 (dinv, new_dp[dn - 1], new_dp[dn - 2]); 153 qh = mpn_dcpi1_div_q (qp, new_np, new_nn, new_dp, dn, &dinv); 154 } 155 else 156 { 157 mp_size_t itch = mpn_mu_div_q_itch (new_nn, dn, 0); 158 mp_ptr scratch = TMP_ALLOC_LIMBS (itch); 159 qh = mpn_mu_div_q (qp, new_np, new_nn, new_dp, dn, scratch); 160 } 161 if (cy == 0) 162 qp[qn - 1] = qh; 163 else if (UNLIKELY (qh != 0)) 164 { 165 /* This happens only when the quotient is close to B^n and 166 mpn_*_divappr_q returned B^n. */ 167 mp_size_t i, n; 168 n = new_nn - dn; 169 for (i = 0; i < n; i++) 170 qp[i] = GMP_NUMB_MAX; 171 qh = 0; /* currently ignored */ 172 } 173 } 174 else /* divisor is already normalised */ 175 { 176 if (new_np != np) 177 MPN_COPY (new_np, np, nn); 178 179 if (dn == 2) 180 { 181 qh = mpn_divrem_2 (qp, 0L, new_np, nn, dp); 182 } 183 else if (BELOW_THRESHOLD (dn, DC_DIV_Q_THRESHOLD) || 184 BELOW_THRESHOLD (nn - dn, DC_DIV_Q_THRESHOLD)) 185 { 186 invert_pi1 (dinv, dh, dp[dn - 2]); 187 qh = mpn_sbpi1_div_q (qp, new_np, nn, dp, dn, dinv.inv32); 188 } 189 else if (BELOW_THRESHOLD (dn, MUPI_DIV_Q_THRESHOLD) || /* fast condition */ 190 BELOW_THRESHOLD (nn, 2 * MU_DIV_Q_THRESHOLD) || /* fast condition */ 191 (double) (2 * (MU_DIV_Q_THRESHOLD - MUPI_DIV_Q_THRESHOLD)) * dn /* slow... */ 192 + (double) MUPI_DIV_Q_THRESHOLD * nn > (double) dn * nn) /* ...condition */ 193 { 194 invert_pi1 (dinv, dh, dp[dn - 2]); 195 qh = mpn_dcpi1_div_q (qp, new_np, nn, dp, dn, &dinv); 196 } 197 else 198 { 199 mp_size_t itch = mpn_mu_div_q_itch (nn, dn, 0); 200 mp_ptr scratch = TMP_ALLOC_LIMBS (itch); 201 qh = mpn_mu_div_q (qp, np, nn, dp, dn, scratch); 202 } 203 qp[nn - dn] = qh; 204 } 205 } 206 else 207 { 208 /* |________________________| 209 |_________________| */ 210 tp = TMP_ALLOC_LIMBS (qn + 1); 211 212 new_np = scratch; 213 new_nn = 2 * qn + 1; 214 if (new_np == np) 215 /* We need {np,nn} to remain untouched until the final adjustment, so 216 we need to allocate separate space for new_np. */ 217 new_np = TMP_ALLOC_LIMBS (new_nn + 1); 218 219 220 dh = dp[dn - 1]; 221 if (LIKELY ((dh & GMP_NUMB_HIGHBIT) == 0)) 222 { 223 count_leading_zeros (cnt, dh); 224 225 cy = mpn_lshift (new_np, np + nn - new_nn, new_nn, cnt); 226 new_np[new_nn] = cy; 227 228 new_nn += (cy != 0); 229 230 new_dp = TMP_ALLOC_LIMBS (qn + 1); 231 mpn_lshift (new_dp, dp + dn - (qn + 1), qn + 1, cnt); 232 new_dp[0] |= dp[dn - (qn + 1) - 1] >> (GMP_NUMB_BITS - cnt); 233 234 if (qn + 1 == 2) 235 { 236 qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp); 237 } 238 else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1)) 239 { 240 invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]); 241 qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32); 242 } 243 else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1)) 244 { 245 invert_pi1 (dinv, new_dp[qn], new_dp[qn - 1]); 246 qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv); 247 } 248 else 249 { 250 mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0); 251 mp_ptr scratch = TMP_ALLOC_LIMBS (itch); 252 qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch); 253 } 254 if (cy == 0) 255 tp[qn] = qh; 256 else if (UNLIKELY (qh != 0)) 257 { 258 /* This happens only when the quotient is close to B^n and 259 mpn_*_divappr_q returned B^n. */ 260 mp_size_t i, n; 261 n = new_nn - (qn + 1); 262 for (i = 0; i < n; i++) 263 tp[i] = GMP_NUMB_MAX; 264 qh = 0; /* currently ignored */ 265 } 266 } 267 else /* divisor is already normalised */ 268 { 269 MPN_COPY (new_np, np + nn - new_nn, new_nn); /* pointless of MU will be used */ 270 271 new_dp = (mp_ptr) dp + dn - (qn + 1); 272 273 if (qn == 2 - 1) 274 { 275 qh = mpn_divrem_2 (tp, 0L, new_np, new_nn, new_dp); 276 } 277 else if (BELOW_THRESHOLD (qn, DC_DIVAPPR_Q_THRESHOLD - 1)) 278 { 279 invert_pi1 (dinv, dh, new_dp[qn - 1]); 280 qh = mpn_sbpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, dinv.inv32); 281 } 282 else if (BELOW_THRESHOLD (qn, MU_DIVAPPR_Q_THRESHOLD - 1)) 283 { 284 invert_pi1 (dinv, dh, new_dp[qn - 1]); 285 qh = mpn_dcpi1_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, &dinv); 286 } 287 else 288 { 289 mp_size_t itch = mpn_mu_divappr_q_itch (new_nn, qn + 1, 0); 290 mp_ptr scratch = TMP_ALLOC_LIMBS (itch); 291 qh = mpn_mu_divappr_q (tp, new_np, new_nn, new_dp, qn + 1, scratch); 292 } 293 tp[qn] = qh; 294 } 295 296 MPN_COPY (qp, tp + 1, qn); 297 if (tp[0] <= 4) 298 { 299 mp_size_t rn; 300 301 rp = TMP_ALLOC_LIMBS (dn + qn); 302 mpn_mul (rp, dp, dn, tp + 1, qn); 303 rn = dn + qn; 304 rn -= rp[rn - 1] == 0; 305 306 if (rn > nn || mpn_cmp (np, rp, nn) < 0) 307 mpn_decr_u (qp, 1); 308 } 309 } 310 311 TMP_FREE; 312 } 313