1 /* Mulders' MulHigh function (short product)
2
3 Copyright 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 Contributed by the AriC and Caramel projects, INRIA.
5
6 This file is part of the GNU MPFR Library.
7
8 The GNU MPFR Library is free software; you can redistribute it and/or modify
9 it under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 The GNU MPFR Library is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16 License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with the GNU MPFR Library; see the file COPYING.LESSER. If not, see
20 http://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
21 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23 /* References:
24 [1] Short Division of Long Integers, David Harvey and Paul Zimmermann,
25 Proceedings of the 20th Symposium on Computer Arithmetic (ARITH-20),
26 July 25-27, 2011, pages 7-14.
27 */
28
29 #define MPFR_NEED_LONGLONG_H
30 #include "mpfr-impl.h"
31
32 #ifndef MUL_FFT_THRESHOLD
33 #define MUL_FFT_THRESHOLD 8448
34 #endif
35
36 /* Don't use MPFR_MULHIGH_SIZE since it is handled by tuneup */
37 #ifdef MPFR_MULHIGH_TAB_SIZE
38 static short mulhigh_ktab[MPFR_MULHIGH_TAB_SIZE];
39 #else
40 static short mulhigh_ktab[] = {MPFR_MULHIGH_TAB};
41 #define MPFR_MULHIGH_TAB_SIZE \
42 ((mp_size_t) (sizeof(mulhigh_ktab) / sizeof(mulhigh_ktab[0])))
43 #endif
44
45 /* Put in rp[n..2n-1] an approximation of the n high limbs
46 of {up, n} * {vp, n}. The error is less than n ulps of rp[n] (and the
47 approximation is always less or equal to the truncated full product).
48 Assume 2n limbs are allocated at rp.
49
50 Implements Algorithm ShortMulNaive from [1].
51 */
52 static void
mpfr_mulhigh_n_basecase(mpfr_limb_ptr rp,mpfr_limb_srcptr up,mpfr_limb_srcptr vp,mp_size_t n)53 mpfr_mulhigh_n_basecase (mpfr_limb_ptr rp, mpfr_limb_srcptr up,
54 mpfr_limb_srcptr vp, mp_size_t n)
55 {
56 mp_size_t i;
57
58 rp += n - 1;
59 umul_ppmm (rp[1], rp[0], up[n-1], vp[0]); /* we neglect up[0..n-2]*vp[0],
60 which is less than B^n */
61 for (i = 1 ; i < n ; i++)
62 /* here, we neglect up[0..n-i-2] * vp[i], which is less than B^n too */
63 rp[i + 1] = mpn_addmul_1 (rp, up + (n - i - 1), i + 1, vp[i]);
64 /* in total, we neglect less than n*B^n, i.e., n ulps of rp[n]. */
65 }
66
67 /* Put in rp[0..n] the n+1 low limbs of {up, n} * {vp, n}.
68 Assume 2n limbs are allocated at rp. */
69 static void
mpfr_mullow_n_basecase(mpfr_limb_ptr rp,mpfr_limb_srcptr up,mpfr_limb_srcptr vp,mp_size_t n)70 mpfr_mullow_n_basecase (mpfr_limb_ptr rp, mpfr_limb_srcptr up,
71 mpfr_limb_srcptr vp, mp_size_t n)
72 {
73 mp_size_t i;
74
75 rp[n] = mpn_mul_1 (rp, up, n, vp[0]);
76 for (i = 1 ; i < n ; i++)
77 mpn_addmul_1 (rp + i, up, n - i + 1, vp[i]);
78 }
79
80 /* Put in rp[n..2n-1] an approximation of the n high limbs
81 of {np, n} * {mp, n}. The error is less than n ulps of rp[n] (and the
82 approximation is always less or equal to the truncated full product).
83
84 Implements Algorithm ShortMul from [1].
85 */
86 void
mpfr_mulhigh_n(mpfr_limb_ptr rp,mpfr_limb_srcptr np,mpfr_limb_srcptr mp,mp_size_t n)87 mpfr_mulhigh_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mpfr_limb_srcptr mp,
88 mp_size_t n)
89 {
90 mp_size_t k;
91
92 MPFR_ASSERTN (MPFR_MULHIGH_TAB_SIZE >= 8); /* so that 3*(n/4) > n/2 */
93 k = MPFR_LIKELY (n < MPFR_MULHIGH_TAB_SIZE) ? mulhigh_ktab[n] : 3*(n/4);
94 /* Algorithm ShortMul from [1] requires k >= (n+3)/2, which translates
95 into k >= (n+4)/2 in the C language. */
96 MPFR_ASSERTD (k == -1 || k == 0 || (k >= (n+4)/2 && k < n));
97 if (k < 0)
98 mpn_mul_basecase (rp, np, n, mp, n); /* result is exact, no error */
99 else if (k == 0)
100 mpfr_mulhigh_n_basecase (rp, np, mp, n); /* basecase error < n ulps */
101 else if (n > MUL_FFT_THRESHOLD)
102 mpn_mul_n (rp, np, mp, n); /* result is exact, no error */
103 else
104 {
105 mp_size_t l = n - k;
106 mp_limb_t cy;
107
108 mpn_mul_n (rp + 2 * l, np + l, mp + l, k); /* fills rp[2l..2n-1] */
109 mpfr_mulhigh_n (rp, np + k, mp, l); /* fills rp[l-1..2l-1] */
110 cy = mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
111 mpfr_mulhigh_n (rp, np, mp + k, l); /* fills rp[l-1..2l-1] */
112 cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
113 mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */
114 }
115 }
116
117 /* Put in rp[0..n] the n+1 low limbs of {np, n} * {mp, n}.
118 Assume 2n limbs are allocated at rp. */
119 void
mpfr_mullow_n(mpfr_limb_ptr rp,mpfr_limb_srcptr np,mpfr_limb_srcptr mp,mp_size_t n)120 mpfr_mullow_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mpfr_limb_srcptr mp,
121 mp_size_t n)
122 {
123 mp_size_t k;
124
125 MPFR_ASSERTN (MPFR_MULHIGH_TAB_SIZE >= 8); /* so that 3*(n/4) > n/2 */
126 k = MPFR_LIKELY (n < MPFR_MULHIGH_TAB_SIZE) ? mulhigh_ktab[n] : 3*(n/4);
127 MPFR_ASSERTD (k == -1 || k == 0 || (2 * k >= n && k < n));
128 if (k < 0)
129 mpn_mul_basecase (rp, np, n, mp, n);
130 else if (k == 0)
131 mpfr_mullow_n_basecase (rp, np, mp, n);
132 else if (n > MUL_FFT_THRESHOLD)
133 mpn_mul_n (rp, np, mp, n);
134 else
135 {
136 mp_size_t l = n - k;
137
138 mpn_mul_n (rp, np, mp, k); /* fills rp[0..2k] */
139 mpfr_mullow_n (rp + n, np + k, mp, l); /* fills rp[n..n+2l] */
140 mpn_add_n (rp + k, rp + k, rp + n, l + 1);
141 mpfr_mullow_n (rp + n, np, mp + k, l); /* fills rp[n..n+2l] */
142 mpn_add_n (rp + k, rp + k, rp + n, l + 1);
143 }
144 }
145
146 #ifdef MPFR_SQRHIGH_TAB_SIZE
147 static short sqrhigh_ktab[MPFR_SQRHIGH_TAB_SIZE];
148 #else
149 static short sqrhigh_ktab[] = {MPFR_SQRHIGH_TAB};
150 #define MPFR_SQRHIGH_TAB_SIZE (sizeof(sqrhigh_ktab) / sizeof(sqrhigh_ktab[0]))
151 #endif
152
153 /* Put in rp[n..2n-1] an approximation of the n high limbs
154 of {np, n}^2. The error is less than n ulps of rp[n]. */
155 void
mpfr_sqrhigh_n(mpfr_limb_ptr rp,mpfr_limb_srcptr np,mp_size_t n)156 mpfr_sqrhigh_n (mpfr_limb_ptr rp, mpfr_limb_srcptr np, mp_size_t n)
157 {
158 mp_size_t k;
159
160 MPFR_ASSERTN (MPFR_SQRHIGH_TAB_SIZE > 2); /* ensures k < n */
161 k = MPFR_LIKELY (n < MPFR_SQRHIGH_TAB_SIZE) ? sqrhigh_ktab[n]
162 : (n+4)/2; /* ensures that k >= (n+3)/2 */
163 MPFR_ASSERTD (k == -1 || k == 0 || (k >= (n+4)/2 && k < n));
164 if (k < 0)
165 /* we can't use mpn_sqr_basecase here, since it requires
166 n <= SQR_KARATSUBA_THRESHOLD, where SQR_KARATSUBA_THRESHOLD
167 is not exported by GMP */
168 mpn_sqr_n (rp, np, n);
169 else if (k == 0)
170 mpfr_mulhigh_n_basecase (rp, np, np, n);
171 else
172 {
173 mp_size_t l = n - k;
174 mp_limb_t cy;
175
176 mpn_sqr_n (rp + 2 * l, np + l, k); /* fills rp[2l..2n-1] */
177 mpfr_mulhigh_n (rp, np, np + k, l); /* fills rp[l-1..2l-1] */
178 /* {rp+n-1,l+1} += 2 * {rp+l-1,l+1} */
179 cy = mpn_lshift (rp + l - 1, rp + l - 1, l + 1, 1);
180 cy += mpn_add_n (rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
181 mpn_add_1 (rp + n + l, rp + n + l, k, cy); /* propagate carry */
182 }
183 }
184
185 #ifdef MPFR_DIVHIGH_TAB_SIZE
186 static short divhigh_ktab[MPFR_DIVHIGH_TAB_SIZE];
187 #else
188 static short divhigh_ktab[] = {MPFR_DIVHIGH_TAB};
189 #define MPFR_DIVHIGH_TAB_SIZE (sizeof(divhigh_ktab) / sizeof(divhigh_ktab[0]))
190 #endif
191
192 #ifndef __GMPFR_GMP_H__
193 #define mpfr_pi1_t gmp_pi1_t /* with a GMP build */
194 #endif
195
196 #if !(defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_SBPI1_DIVAPPR_Q))
197 /* Put in Q={qp, n} an approximation of N={np, 2*n} divided by D={dp, n},
198 with the most significant limb of the quotient as return value (0 or 1).
199 Assumes the most significant bit of D is set. Clobbers N.
200
201 The approximate quotient Q satisfies - 2(n-1) < N/D - Q <= 4.
202 */
203 static mp_limb_t
mpfr_divhigh_n_basecase(mpfr_limb_ptr qp,mpfr_limb_ptr np,mpfr_limb_srcptr dp,mp_size_t n)204 mpfr_divhigh_n_basecase (mpfr_limb_ptr qp, mpfr_limb_ptr np,
205 mpfr_limb_srcptr dp, mp_size_t n)
206 {
207 mp_limb_t qh, d1, d0, dinv, q2, q1, q0;
208 mpfr_pi1_t dinv2;
209
210 np += n;
211
212 if ((qh = (mpn_cmp (np, dp, n) >= 0)))
213 mpn_sub_n (np, np, dp, n);
214
215 /* now {np, n} is less than D={dp, n}, which implies np[n-1] <= dp[n-1] */
216
217 d1 = dp[n - 1];
218
219 if (n == 1)
220 {
221 invert_limb (dinv, d1);
222 umul_ppmm (q1, q0, np[0], dinv);
223 qp[0] = np[0] + q1;
224 return qh;
225 }
226
227 /* now n >= 2 */
228 d0 = dp[n - 2];
229 invert_pi1 (dinv2, d1, d0);
230 /* dinv2.inv32 = floor ((B^3 - 1) / (d0 + d1 B)) - B */
231 while (n > 1)
232 {
233 /* Invariant: it remains to reduce n limbs from N (in addition to the
234 initial low n limbs).
235 Since n >= 2 here, necessarily we had n >= 2 initially, which means
236 that in addition to the limb np[n-1] to reduce, we have at least 2
237 extra limbs, thus accessing np[n-3] is valid. */
238
239 /* warning: we can have np[n-1]=d1 and np[n-2]=d0, but since {np,n} < D,
240 the largest possible partial quotient is B-1 */
241 if (MPFR_UNLIKELY(np[n - 1] == d1 && np[n - 2] == d0))
242 q2 = ~ (mp_limb_t) 0;
243 else
244 udiv_qr_3by2 (q2, q1, q0, np[n - 1], np[n - 2], np[n - 3],
245 d1, d0, dinv2.inv32);
246 /* since q2 = floor((np[n-1]*B^2+np[n-2]*B+np[n-3])/(d1*B+d0)),
247 we have q2 <= (np[n-1]*B^2+np[n-2]*B+np[n-3])/(d1*B+d0),
248 thus np[n-1]*B^2+np[n-2]*B+np[n-3] >= q2*(d1*B+d0)
249 and {np-1, n} >= q2*D - q2*B^(n-2) >= q2*D - B^(n-1)
250 thus {np-1, n} - (q2-1)*D >= D - B^(n-1) >= 0
251 which proves that at most one correction is needed */
252 q0 = mpn_submul_1 (np - 1, dp, n, q2);
253 if (MPFR_UNLIKELY(q0 > np[n - 1]))
254 {
255 mpn_add_n (np - 1, np - 1, dp, n);
256 q2 --;
257 }
258 qp[--n] = q2;
259 dp ++;
260 }
261
262 /* we have B+dinv2 = floor((B^3-1)/(d1*B+d0)) < B^2/d1
263 q1 = floor(np[0]*(B+dinv2)/B) <= floor(np[0]*B/d1)
264 <= floor((np[0]*B+np[1])/d1)
265 thus q1 is not larger than the true quotient.
266 q1 > np[0]*(B+dinv2)/B - 1 > np[0]*(B^3-1)/(d1*B+d0)/B - 2
267 For d1*B+d0 <> B^2/2, we have B+dinv2 = floor(B^3/(d1*B+d0))
268 thus q1 > np[0]*B^2/(d1*B+d0) - 2, i.e.,
269 (d1*B+d0)*q1 > np[0]*B^2 - 2*(d1*B+d0)
270 d1*B*q1 > np[0]*B^2 - 2*d1*B - 2*d0 - d0*q1 >= np[0]*B^2 - 2*d1*B - B^2
271 thus q1 > np[0]*B/d1 - 2 - B/d1 > np[0]*B/d1 - 4.
272
273 For d1*B+d0 = B^2/2, dinv2 = B-1 thus q1 > np[0]*(2B-1)/B - 1 >
274 np[0]*B/d1 - 2.
275
276 In all cases, if q = floor((np[0]*B+np[1])/d1), we have:
277 q - 4 <= q1 <= q
278 */
279 umul_ppmm (q1, q0, np[0], dinv2.inv32);
280 qp[0] = np[0] + q1;
281
282 return qh;
283 }
284 #endif
285
286 /* Put in {qp, n} an approximation of N={np, 2*n} divided by D={dp, n},
287 with the most significant limb of the quotient as return value (0 or 1).
288 Assumes the most significant bit of D is set. Clobbers N.
289
290 This implements the ShortDiv algorithm from reference [1].
291 */
292 #if 1
293 mp_limb_t
mpfr_divhigh_n(mpfr_limb_ptr qp,mpfr_limb_ptr np,mpfr_limb_ptr dp,mp_size_t n)294 mpfr_divhigh_n (mpfr_limb_ptr qp, mpfr_limb_ptr np, mpfr_limb_ptr dp,
295 mp_size_t n)
296 {
297 mp_size_t k, l;
298 mp_limb_t qh, cy;
299 mpfr_limb_ptr tp;
300 MPFR_TMP_DECL(marker);
301
302 MPFR_ASSERTN (MPFR_MULHIGH_TAB_SIZE >= 15); /* so that 2*(n/3) >= (n+4)/2 */
303 k = MPFR_LIKELY (n < MPFR_DIVHIGH_TAB_SIZE) ? divhigh_ktab[n] : 2*(n/3);
304
305 if (k == 0)
306 #if defined(WANT_GMP_INTERNALS) && defined(HAVE___GMPN_SBPI1_DIVAPPR_Q)
307 {
308 mpfr_pi1_t dinv2;
309 invert_pi1 (dinv2, dp[n - 1], dp[n - 2]);
310 return __gmpn_sbpi1_divappr_q (qp, np, n + n, dp, n, dinv2.inv32);
311 }
312 #else /* use our own code for base-case short division */
313 return mpfr_divhigh_n_basecase (qp, np, dp, n);
314 #endif
315 else if (k == n)
316 /* for k=n, we use a division with remainder (mpn_divrem),
317 which computes the exact quotient */
318 return mpn_divrem (qp, 0, np, 2 * n, dp, n);
319
320 MPFR_ASSERTD ((n+4)/2 <= k && k < n); /* bounds from [1] */
321 MPFR_TMP_MARK (marker);
322 l = n - k;
323 /* first divide the most significant 2k limbs from N by the most significant
324 k limbs of D */
325 qh = mpn_divrem (qp + l, 0, np + 2 * l, 2 * k, dp + l, k); /* exact */
326
327 /* it remains {np,2l+k} = {np,n+l} as remainder */
328
329 /* now we have to subtract high(Q1)*D0 where Q1=qh*B^k+{qp+l,k} and
330 D0={dp,l} */
331 tp = MPFR_TMP_LIMBS_ALLOC (2 * l);
332 mpfr_mulhigh_n (tp, qp + k, dp, l);
333 /* we are only interested in the upper l limbs from {tp,2l} */
334 cy = mpn_sub_n (np + n, np + n, tp + l, l);
335 if (qh)
336 cy += mpn_sub_n (np + n, np + n, dp, l);
337 while (cy > 0) /* Q1 was too large: subtract 1 to Q1 and add D to np+l */
338 {
339 qh -= mpn_sub_1 (qp + l, qp + l, k, MPFR_LIMB_ONE);
340 cy -= mpn_add_n (np + l, np + l, dp, n);
341 }
342
343 /* now it remains {np,n+l} to divide by D */
344 cy = mpfr_divhigh_n (qp, np + k, dp + k, l);
345 qh += mpn_add_1 (qp + l, qp + l, k, cy);
346 MPFR_TMP_FREE(marker);
347
348 return qh;
349 }
350 #else /* below is the FoldDiv(K) algorithm from [1] */
351 mp_limb_t
mpfr_divhigh_n(mpfr_limb_ptr qp,mpfr_limb_ptr np,mpfr_limb_ptr dp,mp_size_t n)352 mpfr_divhigh_n (mpfr_limb_ptr qp, mpfr_limb_ptr np, mpfr_limb_ptr dp,
353 mp_size_t n)
354 {
355 mp_size_t k, r;
356 mpfr_limb_ptr ip, tp, up;
357 mp_limb_t qh = 0, cy, cc;
358 int count;
359 MPFR_TMP_DECL(marker);
360
361 #define K 3
362 if (n < K)
363 return mpn_divrem (qp, 0, np, 2 * n, dp, n);
364
365 k = (n - 1) / K + 1; /* ceil(n/K) */
366
367 MPFR_TMP_MARK (marker);
368 ip = MPFR_TMP_LIMBS_ALLOC (k + 1);
369 tp = MPFR_TMP_LIMBS_ALLOC (n + k);
370 up = MPFR_TMP_LIMBS_ALLOC (2 * (k + 1));
371 mpn_invert (ip, dp + n - (k + 1), k + 1, NULL); /* takes about 13% for n=1000 */
372 /* {ip, k+1} = floor((B^(2k+2)-1)/D - B^(k+1) where D = {dp+n-(k+1),k+1} */
373 for (r = n, cc = 0UL; r > 0;)
374 {
375 /* cc is the carry at np[n+r] */
376 MPFR_ASSERTD(cc <= 1);
377 /* FIXME: why can we have cc as large as say 8? */
378 count = 0;
379 while (cc > 0)
380 {
381 count ++;
382 MPFR_ASSERTD(count <= 1);
383 /* subtract {dp+n-r,r} from {np+n,r} */
384 cc -= mpn_sub_n (np + n, np + n, dp + n - r, r);
385 /* add 1 at qp[r] */
386 qh += mpn_add_1 (qp + r, qp + r, n - r, 1UL);
387 }
388 /* it remains r limbs to reduce, i.e., the remainder is {np, n+r} */
389 if (r < k)
390 {
391 ip += k - r;
392 k = r;
393 }
394 /* now r >= k */
395 /* qp + r - 2 * k -> up */
396 mpfr_mulhigh_n (up, np + n + r - (k + 1), ip, k + 1);
397 /* take into account the term B^k in the inverse: B^k * {np+n+r-k, k} */
398 cy = mpn_add_n (qp + r - k, up + k + 2, np + n + r - k, k);
399 /* since we need only r limbs of tp (below), it suffices to consider
400 r high limbs of dp */
401 if (r > k)
402 {
403 #if 0
404 mpn_mul (tp, dp + n - r, r, qp + r - k, k);
405 #else /* use a short product for the low k x k limbs */
406 /* we know the upper k limbs of the r-limb product cancel with the
407 remainder, thus we only need to compute the low r-k limbs */
408 if (r - k >= k)
409 mpn_mul (tp + k, dp + n - r + k, r - k, qp + r - k, k);
410 else /* r-k < k */
411 {
412 /* #define LOW */
413 #ifndef LOW
414 mpn_mul (tp + k, qp + r - k, k, dp + n - r + k, r - k);
415 #else
416 mpfr_mullow_n_basecase (tp + k, qp + r - k, dp + n - r + k, r - k);
417 /* take into account qp[2r-2k] * dp[n - r + k] */
418 tp[r] += qp[2*r-2*k] * dp[n - r + k];
419 #endif
420 /* tp[k..r] is filled */
421 }
422 #if 0
423 mpfr_mulhigh_n (up, dp + n - r, qp + r - k, k);
424 #else /* compute one more limb. FIXME: we could add one limb of dp in the
425 above, to save one mpn_addmul_1 call */
426 mpfr_mulhigh_n (up, dp + n - r, qp + r - k, k - 1); /* {up,2k-2} */
427 /* add {qp + r - k, k - 1} * dp[n-r+k-1] */
428 up[2*k-2] = mpn_addmul_1 (up + k - 1, qp + r - k, k-1, dp[n-r+k-1]);
429 /* add {dp+n-r, k} * qp[r-1] */
430 up[2*k-1] = mpn_addmul_1 (up + k - 1, dp + n - r, k, qp[r-1]);
431 #endif
432 #ifndef LOW
433 cc = mpn_add_n (tp + k, tp + k, up + k, k);
434 mpn_add_1 (tp + 2 * k, tp + 2 * k, r - k, cc);
435 #else
436 /* update tp[k..r] */
437 if (r - k + 1 <= k)
438 mpn_add_n (tp + k, tp + k, up + k, r - k + 1);
439 else /* r - k >= k */
440 {
441 cc = mpn_add_n (tp + k, tp + k, up + k, k);
442 mpn_add_1 (tp + 2 * k, tp + 2 * k, r - 2 * k + 1, cc);
443 }
444 #endif
445 #endif
446 }
447 else /* last step: since we only want the quotient, no need to update,
448 just propagate the carry cy */
449 {
450 MPFR_ASSERTD(r < n);
451 if (cy > 0)
452 qh += mpn_add_1 (qp + r, qp + r, n - r, cy);
453 break;
454 }
455 /* subtract {tp, n+k} from {np+r-k, n+k}; however we only want to
456 update {np+n, n} */
457 /* we should have tp[r] = np[n+r-k] up to 1 */
458 MPFR_ASSERTD(tp[r] == np[n + r - k] || tp[r] + 1 == np[n + r - k]);
459 #ifndef LOW
460 cc = mpn_sub_n (np + n - 1, np + n - 1, tp + k - 1, r + 1); /* borrow at np[n+r] */
461 #else
462 cc = mpn_sub_n (np + n - 1, np + n - 1, tp + k - 1, r - k + 2);
463 #endif
464 /* if cy = 1, subtract {dp, n} from {np+r, n}, thus
465 {dp+n-r,r} from {np+n,r} */
466 if (cy)
467 {
468 if (r < n)
469 cc += mpn_sub_n (np + n - 1, np + n - 1, dp + n - r - 1, r + 1);
470 else
471 cc += mpn_sub_n (np + n, np + n, dp + n - r, r);
472 /* propagate cy */
473 if (r == n)
474 qh = cy;
475 else
476 qh += mpn_add_1 (qp + r, qp + r, n - r, cy);
477 }
478 /* cc is the borrow at np[n+r] */
479 count = 0;
480 while (cc > 0) /* quotient was too large */
481 {
482 count++;
483 MPFR_ASSERTD (count <= 1);
484 cy = mpn_add_n (np + n, np + n, dp + n - (r - k), r - k);
485 cc -= mpn_add_1 (np + n + r - k, np + n + r - k, k, cy);
486 qh -= mpn_sub_1 (qp + r - k, qp + r - k, n - (r - k), 1UL);
487 }
488 r -= k;
489 cc = np[n + r];
490 }
491 MPFR_TMP_FREE(marker);
492
493 return qh;
494 }
495 #endif
496