1 /* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
2 write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp. If
3 qxn is non-zero, generate that many fraction limbs and append them after the
4 other quotient limbs, and update the remainder accordingly. The input
5 operands are unaffected.
6
7 Preconditions:
8 1. The most significant limb of the divisor must be non-zero.
9 2. nn >= dn, even if qxn is non-zero. (??? relax this ???)
10
11 The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
12 complexity of multiplication.
13
14 Copyright 1997, 2000-2002, 2005, 2009, 2015 Free Software Foundation, Inc.
15
16 This file is part of the GNU MP Library.
17
18 The GNU MP Library is free software; you can redistribute it and/or modify
19 it under the terms of either:
20
21 * the GNU Lesser General Public License as published by the Free
22 Software Foundation; either version 3 of the License, or (at your
23 option) any later version.
24
25 or
26
27 * the GNU General Public License as published by the Free Software
28 Foundation; either version 2 of the License, or (at your option) any
29 later version.
30
31 or both in parallel, as here.
32
33 The GNU MP Library is distributed in the hope that it will be useful, but
34 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
35 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
36 for more details.
37
38 You should have received copies of the GNU General Public License and the
39 GNU Lesser General Public License along with the GNU MP Library. If not,
40 see https://www.gnu.org/licenses/. */
41
42 #include "gmp-impl.h"
43 #include "longlong.h"
44
45
46 void
mpn_tdiv_qr(mp_ptr qp,mp_ptr rp,mp_size_t qxn,mp_srcptr np,mp_size_t nn,mp_srcptr dp,mp_size_t dn)47 mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
48 mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
49 {
50 ASSERT_ALWAYS (qxn == 0);
51
52 ASSERT (nn >= 0);
53 ASSERT (dn >= 0);
54 ASSERT (dn == 0 || dp[dn - 1] != 0);
55 ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, np, nn));
56 ASSERT (! MPN_OVERLAP_P (qp, nn - dn + 1 + qxn, dp, dn));
57
58 switch (dn)
59 {
60 case 0:
61 DIVIDE_BY_ZERO;
62
63 case 1:
64 {
65 rp[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
66 return;
67 }
68
69 case 2:
70 {
71 mp_ptr n2p;
72 mp_limb_t qhl, cy;
73 TMP_DECL;
74 TMP_MARK;
75 if ((dp[1] & GMP_NUMB_HIGHBIT) == 0)
76 {
77 int cnt;
78 mp_limb_t d2p[2];
79 count_leading_zeros (cnt, dp[1]);
80 cnt -= GMP_NAIL_BITS;
81 d2p[1] = (dp[1] << cnt) | (dp[0] >> (GMP_NUMB_BITS - cnt));
82 d2p[0] = (dp[0] << cnt) & GMP_NUMB_MASK;
83 n2p = TMP_ALLOC_LIMBS (nn + 1);
84 cy = mpn_lshift (n2p, np, nn, cnt);
85 n2p[nn] = cy;
86 qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
87 if (cy == 0)
88 qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */
89 rp[0] = (n2p[0] >> cnt)
90 | ((n2p[1] << (GMP_NUMB_BITS - cnt)) & GMP_NUMB_MASK);
91 rp[1] = (n2p[1] >> cnt);
92 }
93 else
94 {
95 n2p = TMP_ALLOC_LIMBS (nn);
96 MPN_COPY (n2p, np, nn);
97 qhl = mpn_divrem_2 (qp, 0L, n2p, nn, dp);
98 qp[nn - 2] = qhl; /* always store nn-2+1 quotient limbs */
99 rp[0] = n2p[0];
100 rp[1] = n2p[1];
101 }
102 TMP_FREE;
103 return;
104 }
105
106 default:
107 {
108 int adjust;
109 gmp_pi1_t dinv;
110 TMP_DECL;
111 TMP_MARK;
112 adjust = np[nn - 1] >= dp[dn - 1]; /* conservative tests for quotient size */
113 if (nn + adjust >= 2 * dn)
114 {
115 mp_ptr n2p, d2p;
116 mp_limb_t cy;
117 int cnt;
118
119 qp[nn - dn] = 0; /* zero high quotient limb */
120 if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0) /* normalize divisor */
121 {
122 count_leading_zeros (cnt, dp[dn - 1]);
123 cnt -= GMP_NAIL_BITS;
124 d2p = TMP_ALLOC_LIMBS (dn);
125 mpn_lshift (d2p, dp, dn, cnt);
126 n2p = TMP_ALLOC_LIMBS (nn + 1);
127 cy = mpn_lshift (n2p, np, nn, cnt);
128 n2p[nn] = cy;
129 nn += adjust;
130 }
131 else
132 {
133 cnt = 0;
134 d2p = (mp_ptr) dp;
135 n2p = TMP_ALLOC_LIMBS (nn + 1);
136 MPN_COPY (n2p, np, nn);
137 n2p[nn] = 0;
138 nn += adjust;
139 }
140
141 invert_pi1 (dinv, d2p[dn - 1], d2p[dn - 2]);
142 if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD))
143 mpn_sbpi1_div_qr (qp, n2p, nn, d2p, dn, dinv.inv32);
144 else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) || /* fast condition */
145 BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
146 (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
147 + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn) /* ...condition */
148 mpn_dcpi1_div_qr (qp, n2p, nn, d2p, dn, &dinv);
149 else
150 {
151 mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
152 mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
153 mpn_mu_div_qr (qp, rp, n2p, nn, d2p, dn, scratch);
154 n2p = rp;
155 }
156
157 if (cnt != 0)
158 mpn_rshift (rp, n2p, dn, cnt);
159 else
160 MPN_COPY (rp, n2p, dn);
161 TMP_FREE;
162 return;
163 }
164
165 /* When we come here, the numerator/partial remainder is less
166 than twice the size of the denominator. */
167
168 {
169 /* Problem:
170
171 Divide a numerator N with nn limbs by a denominator D with dn
172 limbs forming a quotient of qn=nn-dn+1 limbs. When qn is small
173 compared to dn, conventional division algorithms perform poorly.
174 We want an algorithm that has an expected running time that is
175 dependent only on qn.
176
177 Algorithm (very informally stated):
178
179 1) Divide the 2 x qn most significant limbs from the numerator
180 by the qn most significant limbs from the denominator. Call
181 the result qest. This is either the correct quotient, but
182 might be 1 or 2 too large. Compute the remainder from the
183 division. (This step is implemented by an mpn_divrem call.)
184
185 2) Is the most significant limb from the remainder < p, where p
186 is the product of the most significant limb from the quotient
187 and the next(d)? (Next(d) denotes the next ignored limb from
188 the denominator.) If it is, decrement qest, and adjust the
189 remainder accordingly.
190
191 3) Is the remainder >= qest? If it is, qest is the desired
192 quotient. The algorithm terminates.
193
194 4) Subtract qest x next(d) from the remainder. If there is
195 borrow out, decrement qest, and adjust the remainder
196 accordingly.
197
198 5) Skip one word from the denominator (i.e., let next(d) denote
199 the next less significant limb. */
200
201 mp_size_t qn;
202 mp_ptr n2p, d2p;
203 mp_ptr tp;
204 mp_limb_t cy;
205 mp_size_t in, rn;
206 mp_limb_t quotient_too_large;
207 unsigned int cnt;
208
209 qn = nn - dn;
210 qp[qn] = 0; /* zero high quotient limb */
211 qn += adjust; /* qn cannot become bigger */
212
213 if (qn == 0)
214 {
215 MPN_COPY (rp, np, dn);
216 TMP_FREE;
217 return;
218 }
219
220 in = dn - qn; /* (at least partially) ignored # of limbs in ops */
221 /* Normalize denominator by shifting it to the left such that its
222 most significant bit is set. Then shift the numerator the same
223 amount, to mathematically preserve quotient. */
224 if ((dp[dn - 1] & GMP_NUMB_HIGHBIT) == 0)
225 {
226 count_leading_zeros (cnt, dp[dn - 1]);
227 cnt -= GMP_NAIL_BITS;
228
229 d2p = TMP_ALLOC_LIMBS (qn);
230 mpn_lshift (d2p, dp + in, qn, cnt);
231 d2p[0] |= dp[in - 1] >> (GMP_NUMB_BITS - cnt);
232
233 n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
234 cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
235 if (adjust)
236 {
237 n2p[2 * qn] = cy;
238 n2p++;
239 }
240 else
241 {
242 n2p[0] |= np[nn - 2 * qn - 1] >> (GMP_NUMB_BITS - cnt);
243 }
244 }
245 else
246 {
247 cnt = 0;
248 d2p = (mp_ptr) dp + in;
249
250 n2p = TMP_ALLOC_LIMBS (2 * qn + 1);
251 MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
252 if (adjust)
253 {
254 n2p[2 * qn] = 0;
255 n2p++;
256 }
257 }
258
259 /* Get an approximate quotient using the extracted operands. */
260 if (qn == 1)
261 {
262 mp_limb_t q0, r0;
263 udiv_qrnnd (q0, r0, n2p[1], n2p[0] << GMP_NAIL_BITS, d2p[0] << GMP_NAIL_BITS);
264 n2p[0] = r0 >> GMP_NAIL_BITS;
265 qp[0] = q0;
266 }
267 else if (qn == 2)
268 mpn_divrem_2 (qp, 0L, n2p, 4L, d2p); /* FIXME: obsolete function */
269 else
270 {
271 invert_pi1 (dinv, d2p[qn - 1], d2p[qn - 2]);
272 if (BELOW_THRESHOLD (qn, DC_DIV_QR_THRESHOLD))
273 mpn_sbpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, dinv.inv32);
274 else if (BELOW_THRESHOLD (qn, MU_DIV_QR_THRESHOLD))
275 mpn_dcpi1_div_qr (qp, n2p, 2 * qn, d2p, qn, &dinv);
276 else
277 {
278 mp_size_t itch = mpn_mu_div_qr_itch (2 * qn, qn, 0);
279 mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
280 mp_ptr r2p = rp;
281 if (np == r2p) /* If N and R share space, put ... */
282 r2p += nn - qn; /* intermediate remainder at N's upper end. */
283 mpn_mu_div_qr (qp, r2p, n2p, 2 * qn, d2p, qn, scratch);
284 MPN_COPY (n2p, r2p, qn);
285 }
286 }
287
288 rn = qn;
289 /* Multiply the first ignored divisor limb by the most significant
290 quotient limb. If that product is > the partial remainder's
291 most significant limb, we know the quotient is too large. This
292 test quickly catches most cases where the quotient is too large;
293 it catches all cases where the quotient is 2 too large. */
294 {
295 mp_limb_t dl, x;
296 mp_limb_t h, dummy;
297
298 if (in - 2 < 0)
299 dl = 0;
300 else
301 dl = dp[in - 2];
302
303 #if GMP_NAIL_BITS == 0
304 x = (dp[in - 1] << cnt) | ((dl >> 1) >> ((~cnt) % GMP_LIMB_BITS));
305 #else
306 x = (dp[in - 1] << cnt) & GMP_NUMB_MASK;
307 if (cnt != 0)
308 x |= dl >> (GMP_NUMB_BITS - cnt);
309 #endif
310 umul_ppmm (h, dummy, x, qp[qn - 1] << GMP_NAIL_BITS);
311
312 if (n2p[qn - 1] < h)
313 {
314 mp_limb_t cy;
315
316 mpn_decr_u (qp, (mp_limb_t) 1);
317 cy = mpn_add_n (n2p, n2p, d2p, qn);
318 if (cy)
319 {
320 /* The partial remainder is safely large. */
321 n2p[qn] = cy;
322 ++rn;
323 }
324 }
325 }
326
327 quotient_too_large = 0;
328 if (cnt != 0)
329 {
330 mp_limb_t cy1, cy2;
331
332 /* Append partially used numerator limb to partial remainder. */
333 cy1 = mpn_lshift (n2p, n2p, rn, GMP_NUMB_BITS - cnt);
334 n2p[0] |= np[in - 1] & (GMP_NUMB_MASK >> cnt);
335
336 /* Update partial remainder with partially used divisor limb. */
337 cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (GMP_NUMB_MASK >> cnt));
338 if (qn != rn)
339 {
340 ASSERT_ALWAYS (n2p[qn] >= cy2);
341 n2p[qn] -= cy2;
342 }
343 else
344 {
345 n2p[qn] = cy1 - cy2; /* & GMP_NUMB_MASK; */
346
347 quotient_too_large = (cy1 < cy2);
348 ++rn;
349 }
350 --in;
351 }
352 /* True: partial remainder now is neutral, i.e., it is not shifted up. */
353
354 tp = TMP_ALLOC_LIMBS (dn);
355
356 if (in < qn)
357 {
358 if (in == 0)
359 {
360 MPN_COPY (rp, n2p, rn);
361 ASSERT_ALWAYS (rn == dn);
362 goto foo;
363 }
364 mpn_mul (tp, qp, qn, dp, in);
365 }
366 else
367 mpn_mul (tp, dp, in, qp, qn);
368
369 cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
370 MPN_COPY (rp + in, n2p, dn - in);
371 quotient_too_large |= cy;
372 cy = mpn_sub_n (rp, np, tp, in);
373 cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
374 quotient_too_large |= cy;
375 foo:
376 if (quotient_too_large)
377 {
378 mpn_decr_u (qp, (mp_limb_t) 1);
379 mpn_add_n (rp, rp, dp, dn);
380 }
381 }
382 TMP_FREE;
383 return;
384 }
385 }
386 }
387