1 /* hgcd.c. 2 3 THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY 4 SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 5 GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 6 7 Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc. 8 9 This file is part of the GNU MP Library. 10 11 The GNU MP Library is free software; you can redistribute it and/or modify 12 it under the terms of the GNU Lesser General Public License as published by 13 the Free Software Foundation; either version 3 of the License, or (at your 14 option) any later version. 15 16 The GNU MP Library is distributed in the hope that it will be useful, but 17 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 18 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 19 License for more details. 20 21 You should have received a copy of the GNU Lesser General Public License 22 along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 23 24 #include "gmp.h" 25 #include "gmp-impl.h" 26 #include "longlong.h" 27 28 /* For input of size n, matrix elements are of size at most ceil(n/2) 29 - 1, but we need two limbs extra. */ 30 void 31 mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p) 32 { 33 mp_size_t s = (n+1)/2 + 1; 34 M->alloc = s; 35 M->n = 1; 36 MPN_ZERO (p, 4 * s); 37 M->p[0][0] = p; 38 M->p[0][1] = p + s; 39 M->p[1][0] = p + 2 * s; 40 M->p[1][1] = p + 3 * s; 41 42 M->p[0][0][0] = M->p[1][1][0] = 1; 43 } 44 45 /* Updated column COL, adding in column (1-COL). */ 46 static void 47 hgcd_matrix_update_1 (struct hgcd_matrix *M, unsigned col) 48 { 49 mp_limb_t c0, c1; 50 ASSERT (col < 2); 51 52 c0 = mpn_add_n (M->p[0][col], M->p[0][0], M->p[0][1], M->n); 53 c1 = mpn_add_n (M->p[1][col], M->p[1][0], M->p[1][1], M->n); 54 55 M->p[0][col][M->n] = c0; 56 M->p[1][col][M->n] = c1; 57 58 M->n += (c0 | c1) != 0; 59 ASSERT (M->n < M->alloc); 60 } 61 62 /* Updated column COL, adding in column Q * (1-COL). Temporary 63 * storage: qn + n <= M->alloc, where n is the size of the largest 64 * element in column 1 - COL. */ 65 static void 66 hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn, 67 unsigned col, mp_ptr tp) 68 { 69 ASSERT (col < 2); 70 71 if (qn == 1) 72 { 73 mp_limb_t q = qp[0]; 74 mp_limb_t c0, c1; 75 76 c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q); 77 c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q); 78 79 M->p[0][col][M->n] = c0; 80 M->p[1][col][M->n] = c1; 81 82 M->n += (c0 | c1) != 0; 83 } 84 else 85 { 86 unsigned row; 87 88 /* Carries for the unlikely case that we get both high words 89 from the multiplication and carries from the addition. */ 90 mp_limb_t c[2]; 91 mp_size_t n; 92 93 /* The matrix will not necessarily grow in size by qn, so we 94 need normalization in order not to overflow M. */ 95 96 for (n = M->n; n + qn > M->n; n--) 97 { 98 ASSERT (n > 0); 99 if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0) 100 break; 101 } 102 103 ASSERT (qn + n <= M->alloc); 104 105 for (row = 0; row < 2; row++) 106 { 107 if (qn <= n) 108 mpn_mul (tp, M->p[row][1-col], n, qp, qn); 109 else 110 mpn_mul (tp, qp, qn, M->p[row][1-col], n); 111 112 ASSERT (n + qn >= M->n); 113 c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n); 114 } 115 if (c[0] | c[1]) 116 { 117 M->n = n + qn + 1; 118 M->p[0][col][n-1] = c[0]; 119 M->p[1][col][n-1] = c[1]; 120 } 121 else 122 { 123 n += qn; 124 n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0; 125 if (n > M->n) 126 M->n = n; 127 } 128 } 129 130 ASSERT (M->n < M->alloc); 131 } 132 133 /* Multiply M by M1 from the right. Since the M1 elements fit in 134 GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs 135 temporary space M->n */ 136 static void 137 hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1, 138 mp_ptr tp) 139 { 140 mp_size_t n0, n1; 141 142 /* Could avoid copy by some swapping of pointers. */ 143 MPN_COPY (tp, M->p[0][0], M->n); 144 n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n); 145 MPN_COPY (tp, M->p[1][0], M->n); 146 n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n); 147 148 /* Depends on zero initialization */ 149 M->n = MAX(n0, n1); 150 ASSERT (M->n < M->alloc); 151 } 152 153 /* Perform a few steps, using some of mpn_hgcd2, subtraction and 154 division. Reduces the size by almost one limb or more, but never 155 below the given size s. Return new size for a and b, or 0 if no 156 more steps are possible. 157 158 If hgcd2 succeds, needs temporary space for hgcd_matrix_mul_1, M->n 159 limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2 160 fails, needs space for the quotient, qn <= n - s + 1 limbs, for and 161 hgcd_matrix_update_q, qn + (size of the appropriate column of M) <= 162 resulting size of $. 163 164 If N is the input size to the calling hgcd, then s = floor(N/2) + 165 1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1 166 < N, so N is sufficient. 167 */ 168 169 static mp_size_t 170 hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s, 171 struct hgcd_matrix *M, mp_ptr tp) 172 { 173 struct hgcd_matrix1 M1; 174 mp_limb_t mask; 175 mp_limb_t ah, al, bh, bl; 176 mp_size_t an, bn, qn; 177 int col; 178 179 ASSERT (n > s); 180 181 mask = ap[n-1] | bp[n-1]; 182 ASSERT (mask > 0); 183 184 if (n == s + 1) 185 { 186 if (mask < 4) 187 goto subtract; 188 189 ah = ap[n-1]; al = ap[n-2]; 190 bh = bp[n-1]; bl = bp[n-2]; 191 } 192 else if (mask & GMP_NUMB_HIGHBIT) 193 { 194 ah = ap[n-1]; al = ap[n-2]; 195 bh = bp[n-1]; bl = bp[n-2]; 196 } 197 else 198 { 199 int shift; 200 201 count_leading_zeros (shift, mask); 202 ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]); 203 al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]); 204 bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]); 205 bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]); 206 } 207 208 /* Try an mpn_hgcd2 step */ 209 if (mpn_hgcd2 (ah, al, bh, bl, &M1)) 210 { 211 /* Multiply M <- M * M1 */ 212 hgcd_matrix_mul_1 (M, &M1, tp); 213 214 /* Can't swap inputs, so we need to copy. */ 215 MPN_COPY (tp, ap, n); 216 /* Multiply M1^{-1} (a;b) */ 217 return mpn_hgcd_mul_matrix1_inverse_vector (&M1, ap, tp, bp, n); 218 } 219 220 subtract: 221 /* There are two ways in which mpn_hgcd2 can fail. Either one of ah and 222 bh was too small, or ah, bh were (almost) equal. Perform one 223 subtraction step (for possible cancellation of high limbs), 224 followed by one division. */ 225 226 /* Since we must ensure that #(a-b) > s, we handle cancellation of 227 high limbs explicitly up front. (FIXME: Or is it better to just 228 subtract, normalize, and use an addition to undo if it turns out 229 the the difference is too small?) */ 230 for (an = n; an > s; an--) 231 if (ap[an-1] != bp[an-1]) 232 break; 233 234 if (an == s) 235 return 0; 236 237 /* Maintain a > b. When needed, swap a and b, and let col keep track 238 of how to update M. */ 239 if (ap[an-1] > bp[an-1]) 240 { 241 /* a is largest. In the subtraction step, we need to update 242 column 1 of M */ 243 col = 1; 244 } 245 else 246 { 247 MP_PTR_SWAP (ap, bp); 248 col = 0; 249 } 250 251 bn = n; 252 MPN_NORMALIZE (bp, bn); 253 if (bn <= s) 254 return 0; 255 256 /* We have #a, #b > s. When is it possible that #(a-b) < s? For 257 cancellation to happen, the numbers must be of the form 258 259 a = x + 1, 0, ..., 0, al 260 b = x , GMP_NUMB_MAX, ..., GMP_NUMB_MAX, bl 261 262 where al, bl denotes the least significant k limbs. If al < bl, 263 then #(a-b) < k, and if also high(al) != 0, high(bl) != GMP_NUMB_MAX, 264 then #(a-b) = k. If al >= bl, then #(a-b) = k + 1. */ 265 266 if (ap[an-1] == bp[an-1] + 1) 267 { 268 mp_size_t k; 269 int c; 270 for (k = an-1; k > s; k--) 271 if (ap[k-1] != 0 || bp[k-1] != GMP_NUMB_MAX) 272 break; 273 274 MPN_CMP (c, ap, bp, k); 275 if (c < 0) 276 { 277 mp_limb_t cy; 278 279 /* The limbs from k and up are cancelled. */ 280 if (k == s) 281 return 0; 282 cy = mpn_sub_n (ap, ap, bp, k); 283 ASSERT (cy == 1); 284 an = k; 285 } 286 else 287 { 288 ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, k)); 289 ap[k] = 1; 290 an = k + 1; 291 } 292 } 293 else 294 ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, an)); 295 296 ASSERT (an > s); 297 ASSERT (ap[an-1] > 0); 298 ASSERT (bn > s); 299 ASSERT (bp[bn-1] > 0); 300 301 hgcd_matrix_update_1 (M, col); 302 303 if (an < bn) 304 { 305 MPN_PTR_SWAP (ap, an, bp, bn); 306 col ^= 1; 307 } 308 else if (an == bn) 309 { 310 int c; 311 MPN_CMP (c, ap, bp, an); 312 if (c < 0) 313 { 314 MP_PTR_SWAP (ap, bp); 315 col ^= 1; 316 } 317 } 318 319 /* Divide a / b. */ 320 qn = an + 1 - bn; 321 322 /* FIXME: We could use an approximate division, that may return a 323 too small quotient, and only guarantee that the size of r is 324 almost the size of b. FIXME: Let ap and remainder overlap. */ 325 mpn_tdiv_qr (tp, ap, 0, ap, an, bp, bn); 326 qn -= (tp[qn -1] == 0); 327 328 /* Normalize remainder */ 329 an = bn; 330 for ( ; an > s; an--) 331 if (ap[an-1] > 0) 332 break; 333 334 if (an <= s) 335 { 336 /* Quotient is too large */ 337 mp_limb_t cy; 338 339 cy = mpn_add (ap, bp, bn, ap, an); 340 341 if (cy > 0) 342 { 343 ASSERT (bn < n); 344 ap[bn] = cy; 345 bp[bn] = 0; 346 bn++; 347 } 348 349 MPN_DECR_U (tp, qn, 1); 350 qn -= (tp[qn-1] == 0); 351 } 352 353 if (qn > 0) 354 hgcd_matrix_update_q (M, tp, qn, col, tp + qn); 355 356 return bn; 357 } 358 359 /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M 360 with elements of size at most (n+1)/2 - 1. Returns new size of a, 361 b, or zero if no reduction is possible. */ 362 mp_size_t 363 mpn_hgcd_lehmer (mp_ptr ap, mp_ptr bp, mp_size_t n, 364 struct hgcd_matrix *M, mp_ptr tp) 365 { 366 mp_size_t s = n/2 + 1; 367 mp_size_t nn; 368 369 ASSERT (n > s); 370 ASSERT (ap[n-1] > 0 || bp[n-1] > 0); 371 372 nn = hgcd_step (n, ap, bp, s, M, tp); 373 if (!nn) 374 return 0; 375 376 for (;;) 377 { 378 n = nn; 379 ASSERT (n > s); 380 nn = hgcd_step (n, ap, bp, s, M, tp); 381 if (!nn ) 382 return n; 383 } 384 } 385 386 /* Multiply M by M1 from the right. Needs 4*(M->n + M1->n) + 5 limbs 387 of temporary storage (see mpn_matrix22_mul_itch). */ 388 void 389 mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1, 390 mp_ptr tp) 391 { 392 mp_size_t n; 393 394 /* About the new size of M:s elements. Since M1's diagonal elements 395 are > 0, no element can decrease. The new elements are of size 396 M->n + M1->n, one limb more or less. The computation of the 397 matrix product produces elements of size M->n + M1->n + 1. But 398 the true size, after normalization, may be three limbs smaller. */ 399 400 /* FIXME: Strassen multiplication gives only a small speedup. In FFT 401 multiplication range, this function could be sped up quite a lot 402 using invariance. */ 403 ASSERT (M->n + M1->n < M->alloc); 404 405 ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1] 406 | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0); 407 408 ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1] 409 | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0); 410 411 mpn_matrix22_mul (M->p[0][0], M->p[0][1], 412 M->p[1][0], M->p[1][1], M->n, 413 M1->p[0][0], M1->p[0][1], 414 M1->p[1][0], M1->p[1][1], M1->n, tp); 415 416 /* Index of last potentially non-zero limb, size is one greater. */ 417 n = M->n + M1->n; 418 419 n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0); 420 n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0); 421 n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0); 422 423 ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0); 424 425 M->n = n + 1; 426 } 427 428 /* Multiplies the least significant p limbs of (a;b) by M^-1. 429 Temporary space needed: 2 * (p + M->n)*/ 430 mp_size_t 431 mpn_hgcd_matrix_adjust (struct hgcd_matrix *M, 432 mp_size_t n, mp_ptr ap, mp_ptr bp, 433 mp_size_t p, mp_ptr tp) 434 { 435 /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b) 436 = (r11 a - r01 b; - r10 a + r00 b */ 437 438 mp_ptr t0 = tp; 439 mp_ptr t1 = tp + p + M->n; 440 mp_limb_t ah, bh; 441 mp_limb_t cy; 442 443 ASSERT (p + M->n < n); 444 445 /* First compute the two values depending on a, before overwriting a */ 446 447 if (M->n >= p) 448 { 449 mpn_mul (t0, M->p[1][1], M->n, ap, p); 450 mpn_mul (t1, M->p[1][0], M->n, ap, p); 451 } 452 else 453 { 454 mpn_mul (t0, ap, p, M->p[1][1], M->n); 455 mpn_mul (t1, ap, p, M->p[1][0], M->n); 456 } 457 458 /* Update a */ 459 MPN_COPY (ap, t0, p); 460 ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n); 461 462 if (M->n >= p) 463 mpn_mul (t0, M->p[0][1], M->n, bp, p); 464 else 465 mpn_mul (t0, bp, p, M->p[0][1], M->n); 466 467 cy = mpn_sub (ap, ap, n, t0, p + M->n); 468 ASSERT (cy <= ah); 469 ah -= cy; 470 471 /* Update b */ 472 if (M->n >= p) 473 mpn_mul (t0, M->p[0][0], M->n, bp, p); 474 else 475 mpn_mul (t0, bp, p, M->p[0][0], M->n); 476 477 MPN_COPY (bp, t0, p); 478 bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n); 479 cy = mpn_sub (bp, bp, n, t1, p + M->n); 480 ASSERT (cy <= bh); 481 bh -= cy; 482 483 if (ah > 0 || bh > 0) 484 { 485 ap[n] = ah; 486 bp[n] = bh; 487 n++; 488 } 489 else 490 { 491 /* The subtraction can reduce the size by at most one limb. */ 492 if (ap[n-1] == 0 && bp[n-1] == 0) 493 n--; 494 } 495 ASSERT (ap[n-1] > 0 || bp[n-1] > 0); 496 return n; 497 } 498 499 /* Size analysis for hgcd: 500 501 For the recursive calls, we have n1 <= ceil(n / 2). Then the 502 storage need is determined by the storage for the recursive call 503 computing M1, and hgcd_matrix_adjust and hgcd_matrix_mul calls that use M1 504 (after this, the storage needed for M1 can be recycled). 505 506 Let S(r) denote the required storage. For M1 we need 4 * (ceil(n1/2) + 1) 507 = 4 * (ceil(n/4) + 1), for the hgcd_matrix_adjust call, we need n + 2, 508 and for the hgcd_matrix_mul, we may need 4 ceil(n/2) + 1. In total, 509 4 * ceil(n/4) + 4 ceil(n/2) + 5 <= 12 ceil(n/4) + 5. 510 511 For the recursive call, we need S(n1) = S(ceil(n/2)). 512 513 S(n) <= 12*ceil(n/4) + 5 + S(ceil(n/2)) 514 <= 12*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 5k + S(ceil(n/2^k)) 515 <= 12*(2 ceil(n/4) + k) + 5k + S(n/2^k) 516 <= 24 ceil(n/4) + 17k + S(n/2^k) 517 518 */ 519 520 mp_size_t 521 mpn_hgcd_itch (mp_size_t n) 522 { 523 unsigned k; 524 int count; 525 mp_size_t nscaled; 526 527 if (BELOW_THRESHOLD (n, HGCD_THRESHOLD)) 528 return MPN_HGCD_LEHMER_ITCH (n); 529 530 /* Get the recursion depth. */ 531 nscaled = (n - 1) / (HGCD_THRESHOLD - 1); 532 count_leading_zeros (count, nscaled); 533 k = GMP_LIMB_BITS - count; 534 535 return 24 * ((n+3) / 4) + 17 * k 536 + MPN_HGCD_LEHMER_ITCH (HGCD_THRESHOLD); 537 } 538 539 /* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M 540 with elements of size at most (n+1)/2 - 1. Returns new size of a, 541 b, or zero if no reduction is possible. */ 542 543 mp_size_t 544 mpn_hgcd (mp_ptr ap, mp_ptr bp, mp_size_t n, 545 struct hgcd_matrix *M, mp_ptr tp) 546 { 547 mp_size_t s = n/2 + 1; 548 mp_size_t n2 = (3*n)/4 + 1; 549 550 mp_size_t p, nn; 551 int success = 0; 552 553 if (n <= s) 554 /* Happens when n <= 2, a fairly uninteresting case but exercised 555 by the random inputs of the testsuite. */ 556 return 0; 557 558 ASSERT ((ap[n-1] | bp[n-1]) > 0); 559 560 ASSERT ((n+1)/2 - 1 < M->alloc); 561 562 if (BELOW_THRESHOLD (n, HGCD_THRESHOLD)) 563 return mpn_hgcd_lehmer (ap, bp, n, M, tp); 564 565 p = n/2; 566 nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp); 567 if (nn > 0) 568 { 569 /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1) 570 = 2 (n - 1) */ 571 n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp); 572 success = 1; 573 } 574 while (n > n2) 575 { 576 /* Needs n + 1 storage */ 577 nn = hgcd_step (n, ap, bp, s, M, tp); 578 if (!nn) 579 return success ? n : 0; 580 n = nn; 581 success = 1; 582 } 583 584 if (n > s + 2) 585 { 586 struct hgcd_matrix M1; 587 mp_size_t scratch; 588 589 p = 2*s - n + 1; 590 scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p); 591 592 mpn_hgcd_matrix_init(&M1, n - p, tp); 593 nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch); 594 if (nn > 0) 595 { 596 /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */ 597 ASSERT (M->n + 2 >= M1.n); 598 599 /* Furthermore, assume M ends with a quotient (1, q; 0, 1), 600 then either q or q + 1 is a correct quotient, and M1 will 601 start with either (1, 0; 1, 1) or (2, 1; 1, 1). This 602 rules out the case that the size of M * M1 is much 603 smaller than the expected M->n + M1->n. */ 604 605 ASSERT (M->n + M1.n < M->alloc); 606 607 /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1) 608 = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */ 609 n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch); 610 /* Needs 4 ceil(n/2) + 1 */ 611 mpn_hgcd_matrix_mul (M, &M1, tp + scratch); 612 success = 1; 613 } 614 } 615 616 /* This really is the base case */ 617 for (;;) 618 { 619 /* Needs s+3 < n */ 620 nn = hgcd_step (n, ap, bp, s, M, tp); 621 if (!nn) 622 return success ? n : 0; 623 624 n = nn; 625 success = 1; 626 } 627 } 628