1 /* hgcd.c.
2
3 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
4 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
5 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
6
7 Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
8
9 This file is part of the GNU MP Library.
10
11 The GNU MP Library is free software; you can redistribute it and/or modify
12 it under the terms of the GNU Lesser General Public License as published by
13 the Free Software Foundation; either version 3 of the License, or (at your
14 option) any later version.
15
16 The GNU MP Library is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19 License for more details.
20
21 You should have received a copy of the GNU Lesser General Public License
22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
23
24 #include "gmp.h"
25 #include "gmp-impl.h"
26 #include "longlong.h"
27
28 /* For input of size n, matrix elements are of size at most ceil(n/2)
29 - 1, but we need two limbs extra. */
30 void
mpn_hgcd_matrix_init(struct hgcd_matrix * M,mp_size_t n,mp_ptr p)31 mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p)
32 {
33 mp_size_t s = (n+1)/2 + 1;
34 M->alloc = s;
35 M->n = 1;
36 MPN_ZERO (p, 4 * s);
37 M->p[0][0] = p;
38 M->p[0][1] = p + s;
39 M->p[1][0] = p + 2 * s;
40 M->p[1][1] = p + 3 * s;
41
42 M->p[0][0][0] = M->p[1][1][0] = 1;
43 }
44
45 /* Updated column COL, adding in column (1-COL). */
46 static void
hgcd_matrix_update_1(struct hgcd_matrix * M,unsigned col)47 hgcd_matrix_update_1 (struct hgcd_matrix *M, unsigned col)
48 {
49 mp_limb_t c0, c1;
50 ASSERT (col < 2);
51
52 c0 = mpn_add_n (M->p[0][col], M->p[0][0], M->p[0][1], M->n);
53 c1 = mpn_add_n (M->p[1][col], M->p[1][0], M->p[1][1], M->n);
54
55 M->p[0][col][M->n] = c0;
56 M->p[1][col][M->n] = c1;
57
58 M->n += (c0 | c1) != 0;
59 ASSERT (M->n < M->alloc);
60 }
61
62 /* Updated column COL, adding in column Q * (1-COL). Temporary
63 * storage: qn + n <= M->alloc, where n is the size of the largest
64 * element in column 1 - COL. */
65 static void
hgcd_matrix_update_q(struct hgcd_matrix * M,mp_srcptr qp,mp_size_t qn,unsigned col,mp_ptr tp)66 hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn,
67 unsigned col, mp_ptr tp)
68 {
69 ASSERT (col < 2);
70
71 if (qn == 1)
72 {
73 mp_limb_t q = qp[0];
74 mp_limb_t c0, c1;
75
76 c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q);
77 c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q);
78
79 M->p[0][col][M->n] = c0;
80 M->p[1][col][M->n] = c1;
81
82 M->n += (c0 | c1) != 0;
83 }
84 else
85 {
86 unsigned row;
87
88 /* Carries for the unlikely case that we get both high words
89 from the multiplication and carries from the addition. */
90 mp_limb_t c[2];
91 mp_size_t n;
92
93 /* The matrix will not necessarily grow in size by qn, so we
94 need normalization in order not to overflow M. */
95
96 for (n = M->n; n + qn > M->n; n--)
97 {
98 ASSERT (n > 0);
99 if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0)
100 break;
101 }
102
103 ASSERT (qn + n <= M->alloc);
104
105 for (row = 0; row < 2; row++)
106 {
107 if (qn <= n)
108 mpn_mul (tp, M->p[row][1-col], n, qp, qn);
109 else
110 mpn_mul (tp, qp, qn, M->p[row][1-col], n);
111
112 ASSERT (n + qn >= M->n);
113 c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n);
114 }
115 if (c[0] | c[1])
116 {
117 M->n = n + qn + 1;
118 M->p[0][col][M->n - 1] = c[0];
119 M->p[1][col][M->n - 1] = c[1];
120 }
121 else
122 {
123 n += qn;
124 n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0;
125 if (n > M->n)
126 M->n = n;
127 }
128 }
129
130 ASSERT (M->n < M->alloc);
131 }
132
133 /* Multiply M by M1 from the right. Since the M1 elements fit in
134 GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs
135 temporary space M->n */
136 static void
hgcd_matrix_mul_1(struct hgcd_matrix * M,const struct hgcd_matrix1 * M1,mp_ptr tp)137 hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1,
138 mp_ptr tp)
139 {
140 mp_size_t n0, n1;
141
142 /* Could avoid copy by some swapping of pointers. */
143 MPN_COPY (tp, M->p[0][0], M->n);
144 n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n);
145 MPN_COPY (tp, M->p[1][0], M->n);
146 n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n);
147
148 /* Depends on zero initialization */
149 M->n = MAX(n0, n1);
150 ASSERT (M->n < M->alloc);
151 }
152
153 /* Perform a few steps, using some of mpn_hgcd2, subtraction and
154 division. Reduces the size by almost one limb or more, but never
155 below the given size s. Return new size for a and b, or 0 if no
156 more steps are possible.
157
158 If hgcd2 succeds, needs temporary space for hgcd_matrix_mul_1, M->n
159 limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
160 fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
161 hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
162 resulting size of $.
163
164 If N is the input size to the calling hgcd, then s = floor(N/2) +
165 1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
166 < N, so N is sufficient.
167 */
168
169 static mp_size_t
hgcd_step(mp_size_t n,mp_ptr ap,mp_ptr bp,mp_size_t s,struct hgcd_matrix * M,mp_ptr tp)170 hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
171 struct hgcd_matrix *M, mp_ptr tp)
172 {
173 struct hgcd_matrix1 M1;
174 mp_limb_t mask;
175 mp_limb_t ah, al, bh, bl;
176 mp_size_t an, bn, qn;
177 int col;
178
179 ASSERT (n > s);
180
181 mask = ap[n-1] | bp[n-1];
182 ASSERT (mask > 0);
183
184 if (n == s + 1)
185 {
186 if (mask < 4)
187 goto subtract;
188
189 ah = ap[n-1]; al = ap[n-2];
190 bh = bp[n-1]; bl = bp[n-2];
191 }
192 else if (mask & GMP_NUMB_HIGHBIT)
193 {
194 ah = ap[n-1]; al = ap[n-2];
195 bh = bp[n-1]; bl = bp[n-2];
196 }
197 else
198 {
199 int shift;
200
201 count_leading_zeros (shift, mask);
202 ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
203 al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
204 bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
205 bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
206 }
207
208 /* Try an mpn_hgcd2 step */
209 if (mpn_hgcd2 (ah, al, bh, bl, &M1))
210 {
211 /* Multiply M <- M * M1 */
212 hgcd_matrix_mul_1 (M, &M1, tp);
213
214 /* Can't swap inputs, so we need to copy. */
215 MPN_COPY (tp, ap, n);
216 /* Multiply M1^{-1} (a;b) */
217 return mpn_hgcd_mul_matrix1_inverse_vector (&M1, ap, tp, bp, n);
218 }
219
220 subtract:
221 /* There are two ways in which mpn_hgcd2 can fail. Either one of ah and
222 bh was too small, or ah, bh were (almost) equal. Perform one
223 subtraction step (for possible cancellation of high limbs),
224 followed by one division. */
225
226 /* Since we must ensure that #(a-b) > s, we handle cancellation of
227 high limbs explicitly up front. (FIXME: Or is it better to just
228 subtract, normalize, and use an addition to undo if it turns out
229 the the difference is too small?) */
230 for (an = n; an > s; an--)
231 if (ap[an-1] != bp[an-1])
232 break;
233
234 if (an == s)
235 return 0;
236
237 /* Maintain a > b. When needed, swap a and b, and let col keep track
238 of how to update M. */
239 if (ap[an-1] > bp[an-1])
240 {
241 /* a is largest. In the subtraction step, we need to update
242 column 1 of M */
243 col = 1;
244 }
245 else
246 {
247 MP_PTR_SWAP (ap, bp);
248 col = 0;
249 }
250
251 bn = n;
252 MPN_NORMALIZE (bp, bn);
253 if (bn <= s)
254 return 0;
255
256 /* We have #a, #b > s. When is it possible that #(a-b) < s? For
257 cancellation to happen, the numbers must be of the form
258
259 a = x + 1, 0, ..., 0, al
260 b = x , GMP_NUMB_MAX, ..., GMP_NUMB_MAX, bl
261
262 where al, bl denotes the least significant k limbs. If al < bl,
263 then #(a-b) < k, and if also high(al) != 0, high(bl) != GMP_NUMB_MAX,
264 then #(a-b) = k. If al >= bl, then #(a-b) = k + 1. */
265
266 if (ap[an-1] == bp[an-1] + 1)
267 {
268 mp_size_t k;
269 int c;
270 for (k = an-1; k > s; k--)
271 if (ap[k-1] != 0 || bp[k-1] != GMP_NUMB_MAX)
272 break;
273
274 MPN_CMP (c, ap, bp, k);
275 if (c < 0)
276 {
277 mp_limb_t cy;
278
279 /* The limbs from k and up are cancelled. */
280 if (k == s)
281 return 0;
282 cy = mpn_sub_n (ap, ap, bp, k);
283 ASSERT (cy == 1);
284 an = k;
285 }
286 else
287 {
288 ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, k));
289 ap[k] = 1;
290 an = k + 1;
291 }
292 }
293 else
294 ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, an));
295
296 ASSERT (an > s);
297 ASSERT (ap[an-1] > 0);
298 ASSERT (bn > s);
299 ASSERT (bp[bn-1] > 0);
300
301 hgcd_matrix_update_1 (M, col);
302
303 if (an < bn)
304 {
305 MPN_PTR_SWAP (ap, an, bp, bn);
306 col ^= 1;
307 }
308 else if (an == bn)
309 {
310 int c;
311 MPN_CMP (c, ap, bp, an);
312 if (c < 0)
313 {
314 MP_PTR_SWAP (ap, bp);
315 col ^= 1;
316 }
317 }
318
319 /* Divide a / b. */
320 qn = an + 1 - bn;
321
322 /* FIXME: We could use an approximate division, that may return a
323 too small quotient, and only guarantee that the size of r is
324 almost the size of b. FIXME: Let ap and remainder overlap. */
325 mpn_tdiv_qr (tp, ap, 0, ap, an, bp, bn);
326 qn -= (tp[qn -1] == 0);
327
328 /* Normalize remainder */
329 an = bn;
330 for ( ; an > s; an--)
331 if (ap[an-1] > 0)
332 break;
333
334 if (an <= s)
335 {
336 /* Quotient is too large */
337 mp_limb_t cy;
338
339 cy = mpn_add (ap, bp, bn, ap, an);
340
341 if (cy > 0)
342 {
343 ASSERT (bn < n);
344 ap[bn] = cy;
345 bp[bn] = 0;
346 bn++;
347 }
348
349 MPN_DECR_U (tp, qn, 1);
350 qn -= (tp[qn-1] == 0);
351 }
352
353 if (qn > 0)
354 hgcd_matrix_update_q (M, tp, qn, col, tp + qn);
355
356 return bn;
357 }
358
359 /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
360 with elements of size at most (n+1)/2 - 1. Returns new size of a,
361 b, or zero if no reduction is possible. */
362 mp_size_t
mpn_hgcd_lehmer(mp_ptr ap,mp_ptr bp,mp_size_t n,struct hgcd_matrix * M,mp_ptr tp)363 mpn_hgcd_lehmer (mp_ptr ap, mp_ptr bp, mp_size_t n,
364 struct hgcd_matrix *M, mp_ptr tp)
365 {
366 mp_size_t s = n/2 + 1;
367 mp_size_t nn;
368
369 ASSERT (n > s);
370 ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
371
372 nn = hgcd_step (n, ap, bp, s, M, tp);
373 if (!nn)
374 return 0;
375
376 for (;;)
377 {
378 n = nn;
379 ASSERT (n > s);
380 nn = hgcd_step (n, ap, bp, s, M, tp);
381 if (!nn )
382 return n;
383 }
384 }
385
386 /* Multiply M by M1 from the right. Needs 3*(M->n + M1->n) + 5 limbs
387 of temporary storage (see mpn_matrix22_mul_itch). */
388 void
mpn_hgcd_matrix_mul(struct hgcd_matrix * M,const struct hgcd_matrix * M1,mp_ptr tp)389 mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1,
390 mp_ptr tp)
391 {
392 mp_size_t n;
393
394 /* About the new size of M:s elements. Since M1's diagonal elements
395 are > 0, no element can decrease. The new elements are of size
396 M->n + M1->n, one limb more or less. The computation of the
397 matrix product produces elements of size M->n + M1->n + 1. But
398 the true size, after normalization, may be three limbs smaller.
399
400 The reason that the product has normalized size >= M->n + M1->n -
401 2 is subtle. It depends on the fact that M and M1 can be factored
402 as products of (1,1; 0,1) and (1,0; 1,1), and that we can't have
403 M ending with a large power and M1 starting with a large power of
404 the same matrix. */
405
406 /* FIXME: Strassen multiplication gives only a small speedup. In FFT
407 multiplication range, this function could be sped up quite a lot
408 using invariance. */
409 ASSERT (M->n + M1->n < M->alloc);
410
411 ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1]
412 | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0);
413
414 ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1]
415 | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0);
416
417 mpn_matrix22_mul (M->p[0][0], M->p[0][1],
418 M->p[1][0], M->p[1][1], M->n,
419 M1->p[0][0], M1->p[0][1],
420 M1->p[1][0], M1->p[1][1], M1->n, tp);
421
422 /* Index of last potentially non-zero limb, size is one greater. */
423 n = M->n + M1->n;
424
425 n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
426 n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
427 n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
428
429 ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0);
430
431 M->n = n + 1;
432 }
433
434 /* Multiplies the least significant p limbs of (a;b) by M^-1.
435 Temporary space needed: 2 * (p + M->n)*/
436 mp_size_t
mpn_hgcd_matrix_adjust(struct hgcd_matrix * M,mp_size_t n,mp_ptr ap,mp_ptr bp,mp_size_t p,mp_ptr tp)437 mpn_hgcd_matrix_adjust (struct hgcd_matrix *M,
438 mp_size_t n, mp_ptr ap, mp_ptr bp,
439 mp_size_t p, mp_ptr tp)
440 {
441 /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b)
442 = (r11 a - r01 b; - r10 a + r00 b */
443
444 mp_ptr t0 = tp;
445 mp_ptr t1 = tp + p + M->n;
446 mp_limb_t ah, bh;
447 mp_limb_t cy;
448
449 ASSERT (p + M->n < n);
450
451 /* First compute the two values depending on a, before overwriting a */
452
453 if (M->n >= p)
454 {
455 mpn_mul (t0, M->p[1][1], M->n, ap, p);
456 mpn_mul (t1, M->p[1][0], M->n, ap, p);
457 }
458 else
459 {
460 mpn_mul (t0, ap, p, M->p[1][1], M->n);
461 mpn_mul (t1, ap, p, M->p[1][0], M->n);
462 }
463
464 /* Update a */
465 MPN_COPY (ap, t0, p);
466 ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n);
467
468 if (M->n >= p)
469 mpn_mul (t0, M->p[0][1], M->n, bp, p);
470 else
471 mpn_mul (t0, bp, p, M->p[0][1], M->n);
472
473 cy = mpn_sub (ap, ap, n, t0, p + M->n);
474 ASSERT (cy <= ah);
475 ah -= cy;
476
477 /* Update b */
478 if (M->n >= p)
479 mpn_mul (t0, M->p[0][0], M->n, bp, p);
480 else
481 mpn_mul (t0, bp, p, M->p[0][0], M->n);
482
483 MPN_COPY (bp, t0, p);
484 bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n);
485 cy = mpn_sub (bp, bp, n, t1, p + M->n);
486 ASSERT (cy <= bh);
487 bh -= cy;
488
489 if (ah > 0 || bh > 0)
490 {
491 ap[n] = ah;
492 bp[n] = bh;
493 n++;
494 }
495 else
496 {
497 /* The subtraction can reduce the size by at most one limb. */
498 if (ap[n-1] == 0 && bp[n-1] == 0)
499 n--;
500 }
501 ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
502 return n;
503 }
504
505 /* Size analysis for hgcd:
506
507 For the recursive calls, we have n1 <= ceil(n / 2). Then the
508 storage need is determined by the storage for the recursive call
509 computing M1, and hgcd_matrix_adjust and hgcd_matrix_mul calls that use M1
510 (after this, the storage needed for M1 can be recycled).
511
512 Let S(r) denote the required storage. For M1 we need 4 * (ceil(n1/2) + 1)
513 = 4 * (ceil(n/4) + 1), for the hgcd_matrix_adjust call, we need n + 2,
514 and for the hgcd_matrix_mul, we may need 3 ceil(n/2) + 8. In total,
515 4 * ceil(n/4) + 3 ceil(n/2) + 12 <= 10 ceil(n/4) + 12.
516
517 For the recursive call, we need S(n1) = S(ceil(n/2)).
518
519 S(n) <= 10*ceil(n/4) + 12 + S(ceil(n/2))
520 <= 10*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 12k + S(ceil(n/2^k))
521 <= 10*(2 ceil(n/4) + k) + 12k + S(ceil(n/2^k))
522 <= 20 ceil(n/4) + 22k + S(ceil(n/2^k))
523 */
524
525 mp_size_t
mpn_hgcd_itch(mp_size_t n)526 mpn_hgcd_itch (mp_size_t n)
527 {
528 unsigned k;
529 int count;
530 mp_size_t nscaled;
531
532 if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
533 return MPN_HGCD_LEHMER_ITCH (n);
534
535 /* Get the recursion depth. */
536 nscaled = (n - 1) / (HGCD_THRESHOLD - 1);
537 count_leading_zeros (count, nscaled);
538 k = GMP_LIMB_BITS - count;
539
540 return 20 * ((n+3) / 4) + 22 * k
541 + MPN_HGCD_LEHMER_ITCH (HGCD_THRESHOLD);
542 }
543
544 /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
545 with elements of size at most (n+1)/2 - 1. Returns new size of a,
546 b, or zero if no reduction is possible. */
547
548 mp_size_t
mpn_hgcd(mp_ptr ap,mp_ptr bp,mp_size_t n,struct hgcd_matrix * M,mp_ptr tp)549 mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,
550 struct hgcd_matrix *M, mp_ptr tp)
551 {
552 mp_size_t s = n/2 + 1;
553 mp_size_t n2 = (3*n)/4 + 1;
554
555 mp_size_t p, nn;
556 int success = 0;
557
558 if (n <= s)
559 /* Happens when n <= 2, a fairly uninteresting case but exercised
560 by the random inputs of the testsuite. */
561 return 0;
562
563 ASSERT ((ap[n-1] | bp[n-1]) > 0);
564
565 ASSERT ((n+1)/2 - 1 < M->alloc);
566
567 if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
568 return mpn_hgcd_lehmer (ap, bp, n, M, tp);
569
570 p = n/2;
571 nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
572 if (nn > 0)
573 {
574 /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
575 = 2 (n - 1) */
576 n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
577 success = 1;
578 }
579 while (n > n2)
580 {
581 /* Needs n + 1 storage */
582 nn = hgcd_step (n, ap, bp, s, M, tp);
583 if (!nn)
584 return success ? n : 0;
585 n = nn;
586 success = 1;
587 }
588
589 if (n > s + 2)
590 {
591 struct hgcd_matrix M1;
592 mp_size_t scratch;
593
594 p = 2*s - n + 1;
595 scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
596
597 mpn_hgcd_matrix_init(&M1, n - p, tp);
598 nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);
599 if (nn > 0)
600 {
601 /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
602 ASSERT (M->n + 2 >= M1.n);
603
604 /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
605 then either q or q + 1 is a correct quotient, and M1 will
606 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
607 rules out the case that the size of M * M1 is much
608 smaller than the expected M->n + M1->n. */
609
610 ASSERT (M->n + M1.n < M->alloc);
611
612 /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
613 = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
614 n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
615
616 /* We need a bound for of M->n + M1.n. Let n be the original
617 input size. Then
618
619 ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
620
621 and it follows that
622
623 M.n + M1.n <= ceil(n/2) + 1
624
625 Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
626 amount of needed scratch space. */
627 mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
628 success = 1;
629 }
630 }
631
632 /* This really is the base case */
633 for (;;)
634 {
635 /* Needs s+3 < n */
636 nn = hgcd_step (n, ap, bp, s, M, tp);
637 if (!nn)
638 return success ? n : 0;
639
640 n = nn;
641 success = 1;
642 }
643 }
644