1 /*- 2 * Copyright (c) 2012 Alistair Crooks <agc@NetBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 /* LibTomMath, multiple-precision integer library -- Tom St Denis 26 * 27 * LibTomMath is a library that provides multiple-precision 28 * integer arithmetic as well as number theoretic functionality. 29 * 30 * The library was designed directly after the MPI library by 31 * Michael Fromberger but has been written from scratch with 32 * additional optimizations in place. 33 * 34 * The library is free for all purposes without any express 35 * guarantee it works. 36 * 37 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 38 */ 39 #include "config.h" 40 41 #include <sys/types.h> 42 #include <sys/param.h> 43 44 #ifdef _KERNEL 45 # include <sys/kmem.h> 46 #else 47 # include <arpa/inet.h> 48 # include <stdarg.h> 49 # include <stdio.h> 50 # include <stdlib.h> 51 # include <string.h> 52 # include <unistd.h> 53 #endif 54 55 #include "bn.h" 56 57 /**************************************************************************/ 58 59 /* LibTomMath, multiple-precision integer library -- Tom St Denis 60 * 61 * LibTomMath is a library that provides multiple-precision 62 * integer arithmetic as well as number theoretic functionality. 63 * 64 * The library was designed directly after the MPI library by 65 * Michael Fromberger but has been written from scratch with 66 * additional optimizations in place. 67 * 68 * The library is free for all purposes without any express 69 * guarantee it works. 70 * 71 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org 72 */ 73 74 #define MP_PREC 32 75 #define DIGIT_BIT 28 76 #define MP_MASK ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1)) 77 78 #define MP_WARRAY /*LINTED*/(1U << (((sizeof(mp_word) * CHAR_BIT) - (2 * DIGIT_BIT) + 1))) 79 80 #define MP_NO 0 81 #define MP_YES 1 82 83 #ifndef USE_ARG 84 #define USE_ARG(x) /*LINTED*/(void)&(x) 85 #endif 86 87 #ifndef __arraycount 88 #define __arraycount(__x) (sizeof(__x) / sizeof(__x[0])) 89 #endif 90 91 #define MP_ISZERO(a) (((a)->used == 0) ? MP_YES : MP_NO) 92 93 typedef int mp_err; 94 95 static int signed_multiply(mp_int * a, mp_int * b, mp_int * c); 96 static int square(mp_int * a, mp_int * b); 97 98 static int signed_subtract_word(mp_int *a, mp_digit b, mp_int *c); 99 100 static inline void * 101 allocate(size_t n, size_t m) 102 { 103 return calloc(n, m); 104 } 105 106 static inline void 107 deallocate(void *v, size_t sz) 108 { 109 USE_ARG(sz); 110 free(v); 111 } 112 113 /* set to zero */ 114 static inline void 115 mp_zero(mp_int *a) 116 { 117 a->sign = MP_ZPOS; 118 a->used = 0; 119 memset(a->dp, 0x0, a->alloc * sizeof(*a->dp)); 120 } 121 122 /* grow as required */ 123 static int 124 mp_grow(mp_int *a, int size) 125 { 126 mp_digit *tmp; 127 128 /* if the alloc size is smaller alloc more ram */ 129 if (a->alloc < size) { 130 /* ensure there are always at least MP_PREC digits extra on top */ 131 size += (MP_PREC * 2) - (size % MP_PREC); 132 133 /* reallocate the array a->dp 134 * 135 * We store the return in a temporary variable 136 * in case the operation failed we don't want 137 * to overwrite the dp member of a. 138 */ 139 tmp = realloc(a->dp, sizeof(*tmp) * size); 140 if (tmp == NULL) { 141 /* reallocation failed but "a" is still valid [can be freed] */ 142 return MP_MEM; 143 } 144 145 /* reallocation succeeded so set a->dp */ 146 a->dp = tmp; 147 /* zero excess digits */ 148 memset(&a->dp[a->alloc], 0x0, (size - a->alloc) * sizeof(*a->dp)); 149 a->alloc = size; 150 } 151 return MP_OKAY; 152 } 153 154 /* shift left a certain amount of digits */ 155 static int 156 lshift_digits(mp_int * a, int b) 157 { 158 mp_digit *top, *bottom; 159 int x, res; 160 161 /* if its less than zero return */ 162 if (b <= 0) { 163 return MP_OKAY; 164 } 165 166 /* grow to fit the new digits */ 167 if (a->alloc < a->used + b) { 168 if ((res = mp_grow(a, a->used + b)) != MP_OKAY) { 169 return res; 170 } 171 } 172 173 /* increment the used by the shift amount then copy upwards */ 174 a->used += b; 175 176 /* top */ 177 top = a->dp + a->used - 1; 178 179 /* base */ 180 bottom = a->dp + a->used - 1 - b; 181 182 /* much like rshift_digits this is implemented using a sliding window 183 * except the window goes the otherway around. Copying from 184 * the bottom to the top. 185 */ 186 for (x = a->used - 1; x >= b; x--) { 187 *top-- = *bottom--; 188 } 189 190 /* zero the lower digits */ 191 memset(a->dp, 0x0, b * sizeof(*a->dp)); 192 return MP_OKAY; 193 } 194 195 /* trim unused digits 196 * 197 * This is used to ensure that leading zero digits are 198 * trimed and the leading "used" digit will be non-zero 199 * Typically very fast. Also fixes the sign if there 200 * are no more leading digits 201 */ 202 static void 203 trim_unused_digits(mp_int * a) 204 { 205 /* decrease used while the most significant digit is 206 * zero. 207 */ 208 while (a->used > 0 && a->dp[a->used - 1] == 0) { 209 a->used -= 1; 210 } 211 /* reset the sign flag if used == 0 */ 212 if (a->used == 0) { 213 a->sign = MP_ZPOS; 214 } 215 } 216 217 /* copy, b = a */ 218 static int 219 mp_copy(BIGNUM *a, BIGNUM *b) 220 { 221 int res; 222 223 /* if dst == src do nothing */ 224 if (a == b) { 225 return MP_OKAY; 226 } 227 if (a == NULL || b == NULL) { 228 return MP_VAL; 229 } 230 231 /* grow dest */ 232 if (b->alloc < a->used) { 233 if ((res = mp_grow(b, a->used)) != MP_OKAY) { 234 return res; 235 } 236 } 237 238 memcpy(b->dp, a->dp, a->used * sizeof(*b->dp)); 239 if (b->used > a->used) { 240 memset(&b->dp[a->used], 0x0, (b->used - a->used) * sizeof(*b->dp)); 241 } 242 243 /* copy used count and sign */ 244 b->used = a->used; 245 b->sign = a->sign; 246 return MP_OKAY; 247 } 248 249 /* shift left by a certain bit count */ 250 static int 251 lshift_bits(mp_int *a, int b, mp_int *c) 252 { 253 mp_digit d; 254 int res; 255 256 /* copy */ 257 if (a != c) { 258 if ((res = mp_copy(a, c)) != MP_OKAY) { 259 return res; 260 } 261 } 262 263 if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) { 264 if ((res = mp_grow(c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) { 265 return res; 266 } 267 } 268 269 /* shift by as many digits in the bit count */ 270 if (b >= (int)DIGIT_BIT) { 271 if ((res = lshift_digits(c, b / DIGIT_BIT)) != MP_OKAY) { 272 return res; 273 } 274 } 275 276 /* shift any bit count < DIGIT_BIT */ 277 d = (mp_digit) (b % DIGIT_BIT); 278 if (d != 0) { 279 mp_digit *tmpc, shift, mask, carry, rr; 280 int x; 281 282 /* bitmask for carries */ 283 mask = (((mp_digit)1) << d) - 1; 284 285 /* shift for msbs */ 286 shift = DIGIT_BIT - d; 287 288 /* alias */ 289 tmpc = c->dp; 290 291 /* carry */ 292 carry = 0; 293 for (x = 0; x < c->used; x++) { 294 /* get the higher bits of the current word */ 295 rr = (*tmpc >> shift) & mask; 296 297 /* shift the current word and OR in the carry */ 298 *tmpc = ((*tmpc << d) | carry) & MP_MASK; 299 ++tmpc; 300 301 /* set the carry to the carry bits of the current word */ 302 carry = rr; 303 } 304 305 /* set final carry */ 306 if (carry != 0) { 307 c->dp[c->used++] = carry; 308 } 309 } 310 trim_unused_digits(c); 311 return MP_OKAY; 312 } 313 314 /* reads a unsigned char array, assumes the msb is stored first [big endian] */ 315 static int 316 mp_read_unsigned_bin(mp_int *a, const uint8_t *b, int c) 317 { 318 int res; 319 320 /* make sure there are at least two digits */ 321 if (a->alloc < 2) { 322 if ((res = mp_grow(a, 2)) != MP_OKAY) { 323 return res; 324 } 325 } 326 327 /* zero the int */ 328 mp_zero(a); 329 330 /* read the bytes in */ 331 while (c-- > 0) { 332 if ((res = lshift_bits(a, 8, a)) != MP_OKAY) { 333 return res; 334 } 335 336 a->dp[0] |= *b++; 337 a->used += 1; 338 } 339 trim_unused_digits(a); 340 return MP_OKAY; 341 } 342 343 /* returns the number of bits in an mpi */ 344 static int 345 mp_count_bits(const mp_int *a) 346 { 347 int r; 348 mp_digit q; 349 350 /* shortcut */ 351 if (a->used == 0) { 352 return 0; 353 } 354 355 /* get number of digits and add that */ 356 r = (a->used - 1) * DIGIT_BIT; 357 358 /* take the last digit and count the bits in it */ 359 for (q = a->dp[a->used - 1]; q > ((mp_digit) 0) ; r++) { 360 q >>= ((mp_digit) 1); 361 } 362 return r; 363 } 364 365 /* compare maginitude of two ints (unsigned) */ 366 static int 367 compare_magnitude(mp_int * a, mp_int * b) 368 { 369 int n; 370 mp_digit *tmpa, *tmpb; 371 372 /* compare based on # of non-zero digits */ 373 if (a->used > b->used) { 374 return MP_GT; 375 } 376 377 if (a->used < b->used) { 378 return MP_LT; 379 } 380 381 /* alias for a */ 382 tmpa = a->dp + (a->used - 1); 383 384 /* alias for b */ 385 tmpb = b->dp + (a->used - 1); 386 387 /* compare based on digits */ 388 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) { 389 if (*tmpa > *tmpb) { 390 return MP_GT; 391 } 392 393 if (*tmpa < *tmpb) { 394 return MP_LT; 395 } 396 } 397 return MP_EQ; 398 } 399 400 /* compare two ints (signed)*/ 401 static int 402 signed_compare(mp_int * a, mp_int * b) 403 { 404 /* compare based on sign */ 405 if (a->sign != b->sign) { 406 return (a->sign == MP_NEG) ? MP_LT : MP_GT; 407 } 408 return (a->sign == MP_NEG) ? compare_magnitude(b, a) : compare_magnitude(a, b); 409 } 410 411 /* get the size for an unsigned equivalent */ 412 static int 413 mp_unsigned_bin_size(mp_int * a) 414 { 415 int size = mp_count_bits(a); 416 417 return (size / 8 + ((size & 7) != 0 ? 1 : 0)); 418 } 419 420 /* init a new mp_int */ 421 static int 422 mp_init(mp_int * a) 423 { 424 /* allocate memory required and clear it */ 425 a->dp = allocate(1, sizeof(*a->dp) * MP_PREC); 426 if (a->dp == NULL) { 427 return MP_MEM; 428 } 429 430 /* set the digits to zero */ 431 memset(a->dp, 0x0, MP_PREC * sizeof(*a->dp)); 432 433 /* set the used to zero, allocated digits to the default precision 434 * and sign to positive */ 435 a->used = 0; 436 a->alloc = MP_PREC; 437 a->sign = MP_ZPOS; 438 439 return MP_OKAY; 440 } 441 442 /* clear one (frees) */ 443 static void 444 mp_clear(mp_int * a) 445 { 446 /* only do anything if a hasn't been freed previously */ 447 if (a->dp != NULL) { 448 memset(a->dp, 0x0, a->used * sizeof(*a->dp)); 449 450 /* free ram */ 451 deallocate(a->dp, (size_t)a->alloc); 452 453 /* reset members to make debugging easier */ 454 a->dp = NULL; 455 a->alloc = a->used = 0; 456 a->sign = MP_ZPOS; 457 } 458 } 459 460 static int 461 mp_init_multi(mp_int *mp, ...) 462 { 463 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */ 464 int n = 0; /* Number of ok inits */ 465 mp_int* cur_arg = mp; 466 va_list args; 467 468 va_start(args, mp); /* init args to next argument from caller */ 469 while (cur_arg != NULL) { 470 if (mp_init(cur_arg) != MP_OKAY) { 471 /* Oops - error! Back-track and mp_clear what we already 472 succeeded in init-ing, then return error. 473 */ 474 va_list clean_args; 475 476 /* end the current list */ 477 va_end(args); 478 479 /* now start cleaning up */ 480 cur_arg = mp; 481 va_start(clean_args, mp); 482 while (n--) { 483 mp_clear(cur_arg); 484 cur_arg = va_arg(clean_args, mp_int*); 485 } 486 va_end(clean_args); 487 res = MP_MEM; 488 break; 489 } 490 n++; 491 cur_arg = va_arg(args, mp_int*); 492 } 493 va_end(args); 494 return res; /* Assumed ok, if error flagged above. */ 495 } 496 497 /* init an mp_init for a given size */ 498 static int 499 mp_init_size(mp_int * a, int size) 500 { 501 /* pad size so there are always extra digits */ 502 size += (MP_PREC * 2) - (size % MP_PREC); 503 504 /* alloc mem */ 505 a->dp = allocate(1, sizeof(*a->dp) * size); 506 if (a->dp == NULL) { 507 return MP_MEM; 508 } 509 510 /* set the members */ 511 a->used = 0; 512 a->alloc = size; 513 a->sign = MP_ZPOS; 514 515 /* zero the digits */ 516 memset(a->dp, 0x0, size * sizeof(*a->dp)); 517 return MP_OKAY; 518 } 519 520 /* creates "a" then copies b into it */ 521 static int 522 mp_init_copy(mp_int * a, mp_int * b) 523 { 524 int res; 525 526 if ((res = mp_init(a)) != MP_OKAY) { 527 return res; 528 } 529 return mp_copy(b, a); 530 } 531 532 /* low level addition, based on HAC pp.594, Algorithm 14.7 */ 533 static int 534 basic_add(mp_int * a, mp_int * b, mp_int * c) 535 { 536 mp_int *x; 537 int olduse, res, min, max; 538 539 /* find sizes, we let |a| <= |b| which means we have to sort 540 * them. "x" will point to the input with the most digits 541 */ 542 if (a->used > b->used) { 543 min = b->used; 544 max = a->used; 545 x = a; 546 } else { 547 min = a->used; 548 max = b->used; 549 x = b; 550 } 551 552 /* init result */ 553 if (c->alloc < max + 1) { 554 if ((res = mp_grow(c, max + 1)) != MP_OKAY) { 555 return res; 556 } 557 } 558 559 /* get old used digit count and set new one */ 560 olduse = c->used; 561 c->used = max + 1; 562 563 { 564 mp_digit carry, *tmpa, *tmpb, *tmpc; 565 int i; 566 567 /* alias for digit pointers */ 568 569 /* first input */ 570 tmpa = a->dp; 571 572 /* second input */ 573 tmpb = b->dp; 574 575 /* destination */ 576 tmpc = c->dp; 577 578 /* zero the carry */ 579 carry = 0; 580 for (i = 0; i < min; i++) { 581 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */ 582 *tmpc = *tmpa++ + *tmpb++ + carry; 583 584 /* U = carry bit of T[i] */ 585 carry = *tmpc >> ((mp_digit)DIGIT_BIT); 586 587 /* take away carry bit from T[i] */ 588 *tmpc++ &= MP_MASK; 589 } 590 591 /* now copy higher words if any, that is in A+B 592 * if A or B has more digits add those in 593 */ 594 if (min != max) { 595 for (; i < max; i++) { 596 /* T[i] = X[i] + U */ 597 *tmpc = x->dp[i] + carry; 598 599 /* U = carry bit of T[i] */ 600 carry = *tmpc >> ((mp_digit)DIGIT_BIT); 601 602 /* take away carry bit from T[i] */ 603 *tmpc++ &= MP_MASK; 604 } 605 } 606 607 /* add carry */ 608 *tmpc++ = carry; 609 610 /* clear digits above oldused */ 611 if (olduse > c->used) { 612 memset(tmpc, 0x0, (olduse - c->used) * sizeof(*c->dp)); 613 } 614 } 615 616 trim_unused_digits(c); 617 return MP_OKAY; 618 } 619 620 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */ 621 static int 622 basic_subtract(mp_int * a, mp_int * b, mp_int * c) 623 { 624 int olduse, res, min, max; 625 626 /* find sizes */ 627 min = b->used; 628 max = a->used; 629 630 /* init result */ 631 if (c->alloc < max) { 632 if ((res = mp_grow(c, max)) != MP_OKAY) { 633 return res; 634 } 635 } 636 olduse = c->used; 637 c->used = max; 638 639 { 640 mp_digit carry, *tmpa, *tmpb, *tmpc; 641 int i; 642 643 /* alias for digit pointers */ 644 tmpa = a->dp; 645 tmpb = b->dp; 646 tmpc = c->dp; 647 648 /* set carry to zero */ 649 carry = 0; 650 for (i = 0; i < min; i++) { 651 /* T[i] = A[i] - B[i] - U */ 652 *tmpc = *tmpa++ - *tmpb++ - carry; 653 654 /* U = carry bit of T[i] 655 * Note this saves performing an AND operation since 656 * if a carry does occur it will propagate all the way to the 657 * MSB. As a result a single shift is enough to get the carry 658 */ 659 carry = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof(mp_digit) - 1)); 660 661 /* Clear carry from T[i] */ 662 *tmpc++ &= MP_MASK; 663 } 664 665 /* now copy higher words if any, e.g. if A has more digits than B */ 666 for (; i < max; i++) { 667 /* T[i] = A[i] - U */ 668 *tmpc = *tmpa++ - carry; 669 670 /* U = carry bit of T[i] */ 671 carry = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof(mp_digit) - 1)); 672 673 /* Clear carry from T[i] */ 674 *tmpc++ &= MP_MASK; 675 } 676 677 /* clear digits above used (since we may not have grown result above) */ 678 if (olduse > c->used) { 679 memset(tmpc, 0x0, (olduse - c->used) * sizeof(*a->dp)); 680 } 681 } 682 683 trim_unused_digits(c); 684 return MP_OKAY; 685 } 686 687 /* high level subtraction (handles signs) */ 688 static int 689 signed_subtract(mp_int * a, mp_int * b, mp_int * c) 690 { 691 int sa, sb, res; 692 693 sa = a->sign; 694 sb = b->sign; 695 696 if (sa != sb) { 697 /* subtract a negative from a positive, OR */ 698 /* subtract a positive from a negative. */ 699 /* In either case, ADD their magnitudes, */ 700 /* and use the sign of the first number. */ 701 c->sign = sa; 702 res = basic_add(a, b, c); 703 } else { 704 /* subtract a positive from a positive, OR */ 705 /* subtract a negative from a negative. */ 706 /* First, take the difference between their */ 707 /* magnitudes, then... */ 708 if (compare_magnitude(a, b) != MP_LT) { 709 /* Copy the sign from the first */ 710 c->sign = sa; 711 /* The first has a larger or equal magnitude */ 712 res = basic_subtract(a, b, c); 713 } else { 714 /* The result has the *opposite* sign from */ 715 /* the first number. */ 716 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS; 717 /* The second has a larger magnitude */ 718 res = basic_subtract(b, a, c); 719 } 720 } 721 return res; 722 } 723 724 /* shift right a certain amount of digits */ 725 static int 726 rshift_digits(mp_int * a, int b) 727 { 728 /* if b <= 0 then ignore it */ 729 if (b <= 0) { 730 return 0; 731 } 732 733 /* if b > used then simply zero it and return */ 734 if (a->used <= b) { 735 mp_zero(a); 736 return 0; 737 } 738 739 /* this is implemented as a sliding window where 740 * the window is b-digits long and digits from 741 * the top of the window are copied to the bottom 742 * 743 * e.g. 744 745 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ----> 746 /\ | ----> 747 \-------------------/ ----> 748 */ 749 memmove(a->dp, &a->dp[b], (a->used - b) * sizeof(*a->dp)); 750 memset(&a->dp[a->used - b], 0x0, b * sizeof(*a->dp)); 751 752 /* remove excess digits */ 753 a->used -= b; 754 return 1; 755 } 756 757 /* multiply by a digit */ 758 static int 759 multiply_digit(mp_int * a, mp_digit b, mp_int * c) 760 { 761 mp_digit carry, *tmpa, *tmpc; 762 mp_word r; 763 int ix, res, olduse; 764 765 /* make sure c is big enough to hold a*b */ 766 if (c->alloc < a->used + 1) { 767 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 768 return res; 769 } 770 } 771 772 /* get the original destinations used count */ 773 olduse = c->used; 774 775 /* set the sign */ 776 c->sign = a->sign; 777 778 /* alias for a->dp [source] */ 779 tmpa = a->dp; 780 781 /* alias for c->dp [dest] */ 782 tmpc = c->dp; 783 784 /* zero carry */ 785 carry = 0; 786 787 /* compute columns */ 788 for (ix = 0; ix < a->used; ix++) { 789 /* compute product and carry sum for this term */ 790 r = ((mp_word) carry) + ((mp_word)*tmpa++) * ((mp_word)b); 791 792 /* mask off higher bits to get a single digit */ 793 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK)); 794 795 /* send carry into next iteration */ 796 carry = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 797 } 798 799 /* store final carry [if any] and increment ix offset */ 800 *tmpc++ = carry; 801 ++ix; 802 if (olduse > ix) { 803 memset(tmpc, 0x0, (olduse - ix) * sizeof(*tmpc)); 804 } 805 806 /* set used count */ 807 c->used = a->used + 1; 808 trim_unused_digits(c); 809 810 return MP_OKAY; 811 } 812 813 /* high level addition (handles signs) */ 814 static int 815 signed_add(mp_int * a, mp_int * b, mp_int * c) 816 { 817 int asign, bsign, res; 818 819 /* get sign of both inputs */ 820 asign = a->sign; 821 bsign = b->sign; 822 823 /* handle two cases, not four */ 824 if (asign == bsign) { 825 /* both positive or both negative */ 826 /* add their magnitudes, copy the sign */ 827 c->sign = asign; 828 res = basic_add(a, b, c); 829 } else { 830 /* one positive, the other negative */ 831 /* subtract the one with the greater magnitude from */ 832 /* the one of the lesser magnitude. The result gets */ 833 /* the sign of the one with the greater magnitude. */ 834 if (compare_magnitude(a, b) == MP_LT) { 835 c->sign = bsign; 836 res = basic_subtract(b, a, c); 837 } else { 838 c->sign = asign; 839 res = basic_subtract(a, b, c); 840 } 841 } 842 return res; 843 } 844 845 /* swap the elements of two integers, for cases where you can't simply swap the 846 * mp_int pointers around 847 */ 848 static void 849 mp_exch(mp_int *a, mp_int *b) 850 { 851 mp_int t; 852 853 t = *a; 854 *a = *b; 855 *b = t; 856 } 857 858 /* calc a value mod 2**b */ 859 static int 860 modulo_2_to_power(mp_int * a, int b, mp_int * c) 861 { 862 int x, res; 863 864 /* if b is <= 0 then zero the int */ 865 if (b <= 0) { 866 mp_zero(c); 867 return MP_OKAY; 868 } 869 870 /* if the modulus is larger than the value than return */ 871 if (b >= (int) (a->used * DIGIT_BIT)) { 872 res = mp_copy(a, c); 873 return res; 874 } 875 876 /* copy */ 877 if ((res = mp_copy(a, c)) != MP_OKAY) { 878 return res; 879 } 880 881 /* zero digits above the last digit of the modulus */ 882 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) { 883 c->dp[x] = 0; 884 } 885 /* clear the digit that is not completely outside/inside the modulus */ 886 c->dp[b / DIGIT_BIT] &= 887 (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1)); 888 trim_unused_digits(c); 889 return MP_OKAY; 890 } 891 892 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */ 893 static int 894 rshift_bits(mp_int * a, int b, mp_int * c, mp_int * d) 895 { 896 mp_digit D, r, rr; 897 int x, res; 898 mp_int t; 899 900 901 /* if the shift count is <= 0 then we do no work */ 902 if (b <= 0) { 903 res = mp_copy(a, c); 904 if (d != NULL) { 905 mp_zero(d); 906 } 907 return res; 908 } 909 910 if ((res = mp_init(&t)) != MP_OKAY) { 911 return res; 912 } 913 914 /* get the remainder */ 915 if (d != NULL) { 916 if ((res = modulo_2_to_power(a, b, &t)) != MP_OKAY) { 917 mp_clear(&t); 918 return res; 919 } 920 } 921 922 /* copy */ 923 if ((res = mp_copy(a, c)) != MP_OKAY) { 924 mp_clear(&t); 925 return res; 926 } 927 928 /* shift by as many digits in the bit count */ 929 if (b >= (int)DIGIT_BIT) { 930 rshift_digits(c, b / DIGIT_BIT); 931 } 932 933 /* shift any bit count < DIGIT_BIT */ 934 D = (mp_digit) (b % DIGIT_BIT); 935 if (D != 0) { 936 mp_digit *tmpc, mask, shift; 937 938 /* mask */ 939 mask = (((mp_digit)1) << D) - 1; 940 941 /* shift for lsb */ 942 shift = DIGIT_BIT - D; 943 944 /* alias */ 945 tmpc = c->dp + (c->used - 1); 946 947 /* carry */ 948 r = 0; 949 for (x = c->used - 1; x >= 0; x--) { 950 /* get the lower bits of this word in a temp */ 951 rr = *tmpc & mask; 952 953 /* shift the current word and mix in the carry bits from the previous word */ 954 *tmpc = (*tmpc >> D) | (r << shift); 955 --tmpc; 956 957 /* set the carry to the carry bits of the current word found above */ 958 r = rr; 959 } 960 } 961 trim_unused_digits(c); 962 if (d != NULL) { 963 mp_exch(&t, d); 964 } 965 mp_clear(&t); 966 return MP_OKAY; 967 } 968 969 /* integer signed division. 970 * c*b + d == a [e.g. a/b, c=quotient, d=remainder] 971 * HAC pp.598 Algorithm 14.20 972 * 973 * Note that the description in HAC is horribly 974 * incomplete. For example, it doesn't consider 975 * the case where digits are removed from 'x' in 976 * the inner loop. It also doesn't consider the 977 * case that y has fewer than three digits, etc.. 978 * 979 * The overall algorithm is as described as 980 * 14.20 from HAC but fixed to treat these cases. 981 */ 982 static int 983 signed_divide(mp_int *c, mp_int *d, mp_int *a, mp_int *b) 984 { 985 mp_int q, x, y, t1, t2; 986 int res, n, t, i, norm, neg; 987 988 /* is divisor zero ? */ 989 if (MP_ISZERO(b) == MP_YES) { 990 return MP_VAL; 991 } 992 993 /* if a < b then q=0, r = a */ 994 if (compare_magnitude(a, b) == MP_LT) { 995 if (d != NULL) { 996 res = mp_copy(a, d); 997 } else { 998 res = MP_OKAY; 999 } 1000 if (c != NULL) { 1001 mp_zero(c); 1002 } 1003 return res; 1004 } 1005 1006 if ((res = mp_init_size(&q, a->used + 2)) != MP_OKAY) { 1007 return res; 1008 } 1009 q.used = a->used + 2; 1010 1011 if ((res = mp_init(&t1)) != MP_OKAY) { 1012 goto LBL_Q; 1013 } 1014 1015 if ((res = mp_init(&t2)) != MP_OKAY) { 1016 goto LBL_T1; 1017 } 1018 1019 if ((res = mp_init_copy(&x, a)) != MP_OKAY) { 1020 goto LBL_T2; 1021 } 1022 1023 if ((res = mp_init_copy(&y, b)) != MP_OKAY) { 1024 goto LBL_X; 1025 } 1026 1027 /* fix the sign */ 1028 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 1029 x.sign = y.sign = MP_ZPOS; 1030 1031 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */ 1032 norm = mp_count_bits(&y) % DIGIT_BIT; 1033 if (norm < (int)(DIGIT_BIT-1)) { 1034 norm = (DIGIT_BIT-1) - norm; 1035 if ((res = lshift_bits(&x, norm, &x)) != MP_OKAY) { 1036 goto LBL_Y; 1037 } 1038 if ((res = lshift_bits(&y, norm, &y)) != MP_OKAY) { 1039 goto LBL_Y; 1040 } 1041 } else { 1042 norm = 0; 1043 } 1044 1045 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */ 1046 n = x.used - 1; 1047 t = y.used - 1; 1048 1049 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */ 1050 if ((res = lshift_digits(&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */ 1051 goto LBL_Y; 1052 } 1053 1054 while (signed_compare(&x, &y) != MP_LT) { 1055 ++(q.dp[n - t]); 1056 if ((res = signed_subtract(&x, &y, &x)) != MP_OKAY) { 1057 goto LBL_Y; 1058 } 1059 } 1060 1061 /* reset y by shifting it back down */ 1062 rshift_digits(&y, n - t); 1063 1064 /* step 3. for i from n down to (t + 1) */ 1065 for (i = n; i >= (t + 1); i--) { 1066 if (i > x.used) { 1067 continue; 1068 } 1069 1070 /* step 3.1 if xi == yt then set q{i-t-1} to b-1, 1071 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */ 1072 if (x.dp[i] == y.dp[t]) { 1073 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1); 1074 } else { 1075 mp_word tmp; 1076 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT); 1077 tmp |= ((mp_word) x.dp[i - 1]); 1078 tmp /= ((mp_word) y.dp[t]); 1079 if (tmp > (mp_word) MP_MASK) { 1080 tmp = MP_MASK; 1081 } 1082 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK)); 1083 } 1084 1085 /* while (q{i-t-1} * (yt * b + y{t-1})) > 1086 xi * b**2 + xi-1 * b + xi-2 1087 do q{i-t-1} -= 1; 1088 */ 1089 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK; 1090 do { 1091 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK; 1092 1093 /* find left hand */ 1094 mp_zero(&t1); 1095 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1]; 1096 t1.dp[1] = y.dp[t]; 1097 t1.used = 2; 1098 if ((res = multiply_digit(&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) { 1099 goto LBL_Y; 1100 } 1101 1102 /* find right hand */ 1103 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2]; 1104 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1]; 1105 t2.dp[2] = x.dp[i]; 1106 t2.used = 3; 1107 } while (compare_magnitude(&t1, &t2) == MP_GT); 1108 1109 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */ 1110 if ((res = multiply_digit(&y, q.dp[i - t - 1], &t1)) != MP_OKAY) { 1111 goto LBL_Y; 1112 } 1113 1114 if ((res = lshift_digits(&t1, i - t - 1)) != MP_OKAY) { 1115 goto LBL_Y; 1116 } 1117 1118 if ((res = signed_subtract(&x, &t1, &x)) != MP_OKAY) { 1119 goto LBL_Y; 1120 } 1121 1122 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */ 1123 if (x.sign == MP_NEG) { 1124 if ((res = mp_copy(&y, &t1)) != MP_OKAY) { 1125 goto LBL_Y; 1126 } 1127 if ((res = lshift_digits(&t1, i - t - 1)) != MP_OKAY) { 1128 goto LBL_Y; 1129 } 1130 if ((res = signed_add(&x, &t1, &x)) != MP_OKAY) { 1131 goto LBL_Y; 1132 } 1133 1134 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK; 1135 } 1136 } 1137 1138 /* now q is the quotient and x is the remainder 1139 * [which we have to normalize] 1140 */ 1141 1142 /* get sign before writing to c */ 1143 x.sign = x.used == 0 ? MP_ZPOS : a->sign; 1144 1145 if (c != NULL) { 1146 trim_unused_digits(&q); 1147 mp_exch(&q, c); 1148 c->sign = neg; 1149 } 1150 1151 if (d != NULL) { 1152 rshift_bits(&x, norm, &x, NULL); 1153 mp_exch(&x, d); 1154 } 1155 1156 res = MP_OKAY; 1157 1158 LBL_Y: 1159 mp_clear(&y); 1160 LBL_X: 1161 mp_clear(&x); 1162 LBL_T2: 1163 mp_clear(&t2); 1164 LBL_T1: 1165 mp_clear(&t1); 1166 LBL_Q: 1167 mp_clear(&q); 1168 return res; 1169 } 1170 1171 /* c = a mod b, 0 <= c < b */ 1172 static int 1173 modulo(mp_int * a, mp_int * b, mp_int * c) 1174 { 1175 mp_int t; 1176 int res; 1177 1178 if ((res = mp_init(&t)) != MP_OKAY) { 1179 return res; 1180 } 1181 1182 if ((res = signed_divide(NULL, &t, a, b)) != MP_OKAY) { 1183 mp_clear(&t); 1184 return res; 1185 } 1186 1187 if (t.sign != b->sign) { 1188 res = signed_add(b, &t, c); 1189 } else { 1190 res = MP_OKAY; 1191 mp_exch(&t, c); 1192 } 1193 1194 mp_clear(&t); 1195 return res; 1196 } 1197 1198 /* set to a digit */ 1199 static void 1200 set_word(mp_int * a, mp_digit b) 1201 { 1202 mp_zero(a); 1203 a->dp[0] = b & MP_MASK; 1204 a->used = (a->dp[0] != 0) ? 1 : 0; 1205 } 1206 1207 /* b = a/2 */ 1208 static int 1209 half(mp_int * a, mp_int * b) 1210 { 1211 int x, res, oldused; 1212 1213 /* copy */ 1214 if (b->alloc < a->used) { 1215 if ((res = mp_grow(b, a->used)) != MP_OKAY) { 1216 return res; 1217 } 1218 } 1219 1220 oldused = b->used; 1221 b->used = a->used; 1222 { 1223 mp_digit r, rr, *tmpa, *tmpb; 1224 1225 /* source alias */ 1226 tmpa = a->dp + b->used - 1; 1227 1228 /* dest alias */ 1229 tmpb = b->dp + b->used - 1; 1230 1231 /* carry */ 1232 r = 0; 1233 for (x = b->used - 1; x >= 0; x--) { 1234 /* get the carry for the next iteration */ 1235 rr = *tmpa & 1; 1236 1237 /* shift the current digit, add in carry and store */ 1238 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1)); 1239 1240 /* forward carry to next iteration */ 1241 r = rr; 1242 } 1243 1244 /* zero excess digits */ 1245 tmpb = b->dp + b->used; 1246 for (x = b->used; x < oldused; x++) { 1247 *tmpb++ = 0; 1248 } 1249 } 1250 b->sign = a->sign; 1251 trim_unused_digits(b); 1252 return MP_OKAY; 1253 } 1254 1255 /* compare a digit */ 1256 static int 1257 compare_digit(mp_int * a, mp_digit b) 1258 { 1259 /* compare based on sign */ 1260 if (a->sign == MP_NEG) { 1261 return MP_LT; 1262 } 1263 1264 /* compare based on magnitude */ 1265 if (a->used > 1) { 1266 return MP_GT; 1267 } 1268 1269 /* compare the only digit of a to b */ 1270 if (a->dp[0] > b) { 1271 return MP_GT; 1272 } else if (a->dp[0] < b) { 1273 return MP_LT; 1274 } else { 1275 return MP_EQ; 1276 } 1277 } 1278 1279 static void 1280 mp_clear_multi(mp_int *mp, ...) 1281 { 1282 mp_int* next_mp = mp; 1283 va_list args; 1284 1285 va_start(args, mp); 1286 while (next_mp != NULL) { 1287 mp_clear(next_mp); 1288 next_mp = va_arg(args, mp_int*); 1289 } 1290 va_end(args); 1291 } 1292 1293 /* computes the modular inverse via binary extended euclidean algorithm, 1294 * that is c = 1/a mod b 1295 * 1296 * Based on slow invmod except this is optimized for the case where b is 1297 * odd as per HAC Note 14.64 on pp. 610 1298 */ 1299 static int 1300 fast_modular_inverse(mp_int * a, mp_int * b, mp_int * c) 1301 { 1302 mp_int x, y, u, v, B, D; 1303 int res, neg; 1304 1305 /* 2. [modified] b must be odd */ 1306 if (MP_ISZERO(b) == MP_YES) { 1307 return MP_VAL; 1308 } 1309 1310 /* init all our temps */ 1311 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) { 1312 return res; 1313 } 1314 1315 /* x == modulus, y == value to invert */ 1316 if ((res = mp_copy(b, &x)) != MP_OKAY) { 1317 goto LBL_ERR; 1318 } 1319 1320 /* we need y = |a| */ 1321 if ((res = modulo(a, b, &y)) != MP_OKAY) { 1322 goto LBL_ERR; 1323 } 1324 1325 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ 1326 if ((res = mp_copy(&x, &u)) != MP_OKAY) { 1327 goto LBL_ERR; 1328 } 1329 if ((res = mp_copy(&y, &v)) != MP_OKAY) { 1330 goto LBL_ERR; 1331 } 1332 set_word(&D, 1); 1333 1334 top: 1335 /* 4. while u is even do */ 1336 while (BN_is_even(&u) == 1) { 1337 /* 4.1 u = u/2 */ 1338 if ((res = half(&u, &u)) != MP_OKAY) { 1339 goto LBL_ERR; 1340 } 1341 /* 4.2 if B is odd then */ 1342 if (BN_is_odd(&B) == 1) { 1343 if ((res = signed_subtract(&B, &x, &B)) != MP_OKAY) { 1344 goto LBL_ERR; 1345 } 1346 } 1347 /* B = B/2 */ 1348 if ((res = half(&B, &B)) != MP_OKAY) { 1349 goto LBL_ERR; 1350 } 1351 } 1352 1353 /* 5. while v is even do */ 1354 while (BN_is_even(&v) == 1) { 1355 /* 5.1 v = v/2 */ 1356 if ((res = half(&v, &v)) != MP_OKAY) { 1357 goto LBL_ERR; 1358 } 1359 /* 5.2 if D is odd then */ 1360 if (BN_is_odd(&D) == 1) { 1361 /* D = (D-x)/2 */ 1362 if ((res = signed_subtract(&D, &x, &D)) != MP_OKAY) { 1363 goto LBL_ERR; 1364 } 1365 } 1366 /* D = D/2 */ 1367 if ((res = half(&D, &D)) != MP_OKAY) { 1368 goto LBL_ERR; 1369 } 1370 } 1371 1372 /* 6. if u >= v then */ 1373 if (signed_compare(&u, &v) != MP_LT) { 1374 /* u = u - v, B = B - D */ 1375 if ((res = signed_subtract(&u, &v, &u)) != MP_OKAY) { 1376 goto LBL_ERR; 1377 } 1378 1379 if ((res = signed_subtract(&B, &D, &B)) != MP_OKAY) { 1380 goto LBL_ERR; 1381 } 1382 } else { 1383 /* v - v - u, D = D - B */ 1384 if ((res = signed_subtract(&v, &u, &v)) != MP_OKAY) { 1385 goto LBL_ERR; 1386 } 1387 1388 if ((res = signed_subtract(&D, &B, &D)) != MP_OKAY) { 1389 goto LBL_ERR; 1390 } 1391 } 1392 1393 /* if not zero goto step 4 */ 1394 if (MP_ISZERO(&u) == MP_NO) { 1395 goto top; 1396 } 1397 1398 /* now a = C, b = D, gcd == g*v */ 1399 1400 /* if v != 1 then there is no inverse */ 1401 if (compare_digit(&v, 1) != MP_EQ) { 1402 res = MP_VAL; 1403 goto LBL_ERR; 1404 } 1405 1406 /* b is now the inverse */ 1407 neg = a->sign; 1408 while (D.sign == MP_NEG) { 1409 if ((res = signed_add(&D, b, &D)) != MP_OKAY) { 1410 goto LBL_ERR; 1411 } 1412 } 1413 mp_exch(&D, c); 1414 c->sign = neg; 1415 res = MP_OKAY; 1416 1417 LBL_ERR: 1418 mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL); 1419 return res; 1420 } 1421 1422 /* hac 14.61, pp608 */ 1423 static int 1424 slow_modular_inverse(mp_int * a, mp_int * b, mp_int * c) 1425 { 1426 mp_int x, y, u, v, A, B, C, D; 1427 int res; 1428 1429 /* b cannot be negative */ 1430 if (b->sign == MP_NEG || MP_ISZERO(b) == MP_YES) { 1431 return MP_VAL; 1432 } 1433 1434 /* init temps */ 1435 if ((res = mp_init_multi(&x, &y, &u, &v, 1436 &A, &B, &C, &D, NULL)) != MP_OKAY) { 1437 return res; 1438 } 1439 1440 /* x = a, y = b */ 1441 if ((res = modulo(a, b, &x)) != MP_OKAY) { 1442 goto LBL_ERR; 1443 } 1444 if ((res = mp_copy(b, &y)) != MP_OKAY) { 1445 goto LBL_ERR; 1446 } 1447 1448 /* 2. [modified] if x,y are both even then return an error! */ 1449 if (BN_is_even(&x) == 1 && BN_is_even(&y) == 1) { 1450 res = MP_VAL; 1451 goto LBL_ERR; 1452 } 1453 1454 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */ 1455 if ((res = mp_copy(&x, &u)) != MP_OKAY) { 1456 goto LBL_ERR; 1457 } 1458 if ((res = mp_copy(&y, &v)) != MP_OKAY) { 1459 goto LBL_ERR; 1460 } 1461 set_word(&A, 1); 1462 set_word(&D, 1); 1463 1464 top: 1465 /* 4. while u is even do */ 1466 while (BN_is_even(&u) == 1) { 1467 /* 4.1 u = u/2 */ 1468 if ((res = half(&u, &u)) != MP_OKAY) { 1469 goto LBL_ERR; 1470 } 1471 /* 4.2 if A or B is odd then */ 1472 if (BN_is_odd(&A) == 1 || BN_is_odd(&B) == 1) { 1473 /* A = (A+y)/2, B = (B-x)/2 */ 1474 if ((res = signed_add(&A, &y, &A)) != MP_OKAY) { 1475 goto LBL_ERR; 1476 } 1477 if ((res = signed_subtract(&B, &x, &B)) != MP_OKAY) { 1478 goto LBL_ERR; 1479 } 1480 } 1481 /* A = A/2, B = B/2 */ 1482 if ((res = half(&A, &A)) != MP_OKAY) { 1483 goto LBL_ERR; 1484 } 1485 if ((res = half(&B, &B)) != MP_OKAY) { 1486 goto LBL_ERR; 1487 } 1488 } 1489 1490 /* 5. while v is even do */ 1491 while (BN_is_even(&v) == 1) { 1492 /* 5.1 v = v/2 */ 1493 if ((res = half(&v, &v)) != MP_OKAY) { 1494 goto LBL_ERR; 1495 } 1496 /* 5.2 if C or D is odd then */ 1497 if (BN_is_odd(&C) == 1 || BN_is_odd(&D) == 1) { 1498 /* C = (C+y)/2, D = (D-x)/2 */ 1499 if ((res = signed_add(&C, &y, &C)) != MP_OKAY) { 1500 goto LBL_ERR; 1501 } 1502 if ((res = signed_subtract(&D, &x, &D)) != MP_OKAY) { 1503 goto LBL_ERR; 1504 } 1505 } 1506 /* C = C/2, D = D/2 */ 1507 if ((res = half(&C, &C)) != MP_OKAY) { 1508 goto LBL_ERR; 1509 } 1510 if ((res = half(&D, &D)) != MP_OKAY) { 1511 goto LBL_ERR; 1512 } 1513 } 1514 1515 /* 6. if u >= v then */ 1516 if (signed_compare(&u, &v) != MP_LT) { 1517 /* u = u - v, A = A - C, B = B - D */ 1518 if ((res = signed_subtract(&u, &v, &u)) != MP_OKAY) { 1519 goto LBL_ERR; 1520 } 1521 1522 if ((res = signed_subtract(&A, &C, &A)) != MP_OKAY) { 1523 goto LBL_ERR; 1524 } 1525 1526 if ((res = signed_subtract(&B, &D, &B)) != MP_OKAY) { 1527 goto LBL_ERR; 1528 } 1529 } else { 1530 /* v - v - u, C = C - A, D = D - B */ 1531 if ((res = signed_subtract(&v, &u, &v)) != MP_OKAY) { 1532 goto LBL_ERR; 1533 } 1534 1535 if ((res = signed_subtract(&C, &A, &C)) != MP_OKAY) { 1536 goto LBL_ERR; 1537 } 1538 1539 if ((res = signed_subtract(&D, &B, &D)) != MP_OKAY) { 1540 goto LBL_ERR; 1541 } 1542 } 1543 1544 /* if not zero goto step 4 */ 1545 if (BN_is_zero(&u) == 0) { 1546 goto top; 1547 } 1548 /* now a = C, b = D, gcd == g*v */ 1549 1550 /* if v != 1 then there is no inverse */ 1551 if (compare_digit(&v, 1) != MP_EQ) { 1552 res = MP_VAL; 1553 goto LBL_ERR; 1554 } 1555 1556 /* if its too low */ 1557 while (compare_digit(&C, 0) == MP_LT) { 1558 if ((res = signed_add(&C, b, &C)) != MP_OKAY) { 1559 goto LBL_ERR; 1560 } 1561 } 1562 1563 /* too big */ 1564 while (compare_magnitude(&C, b) != MP_LT) { 1565 if ((res = signed_subtract(&C, b, &C)) != MP_OKAY) { 1566 goto LBL_ERR; 1567 } 1568 } 1569 1570 /* C is now the inverse */ 1571 mp_exch(&C, c); 1572 res = MP_OKAY; 1573 LBL_ERR: 1574 mp_clear_multi(&x, &y, &u, &v, &A, &B, &C, &D, NULL); 1575 return res; 1576 } 1577 1578 static int 1579 modular_inverse(mp_int *c, mp_int *a, mp_int *b) 1580 { 1581 /* b cannot be negative */ 1582 if (b->sign == MP_NEG || MP_ISZERO(b) == MP_YES) { 1583 return MP_VAL; 1584 } 1585 1586 /* if the modulus is odd we can use a faster routine instead */ 1587 if (BN_is_odd(b) == 1) { 1588 return fast_modular_inverse(a, b, c); 1589 } 1590 return slow_modular_inverse(a, b, c); 1591 } 1592 1593 /* b = |a| 1594 * 1595 * Simple function copies the input and fixes the sign to positive 1596 */ 1597 static int 1598 absolute(mp_int * a, mp_int * b) 1599 { 1600 int res; 1601 1602 /* copy a to b */ 1603 if (a != b) { 1604 if ((res = mp_copy(a, b)) != MP_OKAY) { 1605 return res; 1606 } 1607 } 1608 1609 /* force the sign of b to positive */ 1610 b->sign = MP_ZPOS; 1611 1612 return MP_OKAY; 1613 } 1614 1615 /* determines if reduce_2k_l can be used */ 1616 static int 1617 mp_reduce_is_2k_l(mp_int *a) 1618 { 1619 int ix, iy; 1620 1621 if (a->used == 0) { 1622 return MP_NO; 1623 } else if (a->used == 1) { 1624 return MP_YES; 1625 } else if (a->used > 1) { 1626 /* if more than half of the digits are -1 we're sold */ 1627 for (iy = ix = 0; ix < a->used; ix++) { 1628 if (a->dp[ix] == MP_MASK) { 1629 ++iy; 1630 } 1631 } 1632 return (iy >= (a->used/2)) ? MP_YES : MP_NO; 1633 1634 } 1635 return MP_NO; 1636 } 1637 1638 /* computes a = 2**b 1639 * 1640 * Simple algorithm which zeroes the int, grows it then just sets one bit 1641 * as required. 1642 */ 1643 static int 1644 mp_2expt(mp_int * a, int b) 1645 { 1646 int res; 1647 1648 /* zero a as per default */ 1649 mp_zero(a); 1650 1651 /* grow a to accomodate the single bit */ 1652 if ((res = mp_grow(a, b / DIGIT_BIT + 1)) != MP_OKAY) { 1653 return res; 1654 } 1655 1656 /* set the used count of where the bit will go */ 1657 a->used = b / DIGIT_BIT + 1; 1658 1659 /* put the single bit in its place */ 1660 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT); 1661 1662 return MP_OKAY; 1663 } 1664 1665 /* pre-calculate the value required for Barrett reduction 1666 * For a given modulus "b" it calulates the value required in "a" 1667 */ 1668 static int 1669 mp_reduce_setup(mp_int * a, mp_int * b) 1670 { 1671 int res; 1672 1673 if ((res = mp_2expt(a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) { 1674 return res; 1675 } 1676 return signed_divide(a, NULL, a, b); 1677 } 1678 1679 /* b = a*2 */ 1680 static int 1681 doubled(mp_int * a, mp_int * b) 1682 { 1683 int x, res, oldused; 1684 1685 /* grow to accomodate result */ 1686 if (b->alloc < a->used + 1) { 1687 if ((res = mp_grow(b, a->used + 1)) != MP_OKAY) { 1688 return res; 1689 } 1690 } 1691 1692 oldused = b->used; 1693 b->used = a->used; 1694 1695 { 1696 mp_digit r, rr, *tmpa, *tmpb; 1697 1698 /* alias for source */ 1699 tmpa = a->dp; 1700 1701 /* alias for dest */ 1702 tmpb = b->dp; 1703 1704 /* carry */ 1705 r = 0; 1706 for (x = 0; x < a->used; x++) { 1707 1708 /* get what will be the *next* carry bit from the 1709 * MSB of the current digit 1710 */ 1711 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1)); 1712 1713 /* now shift up this digit, add in the carry [from the previous] */ 1714 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK; 1715 1716 /* copy the carry that would be from the source 1717 * digit into the next iteration 1718 */ 1719 r = rr; 1720 } 1721 1722 /* new leading digit? */ 1723 if (r != 0) { 1724 /* add a MSB which is always 1 at this point */ 1725 *tmpb = 1; 1726 ++(b->used); 1727 } 1728 1729 /* now zero any excess digits on the destination 1730 * that we didn't write to 1731 */ 1732 tmpb = b->dp + b->used; 1733 for (x = b->used; x < oldused; x++) { 1734 *tmpb++ = 0; 1735 } 1736 } 1737 b->sign = a->sign; 1738 return MP_OKAY; 1739 } 1740 1741 /* divide by three (based on routine from MPI and the GMP manual) */ 1742 static int 1743 third(mp_int * a, mp_int *c, mp_digit * d) 1744 { 1745 mp_int q; 1746 mp_word w, t; 1747 mp_digit b; 1748 int res, ix; 1749 1750 /* b = 2**DIGIT_BIT / 3 */ 1751 b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3); 1752 1753 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { 1754 return res; 1755 } 1756 1757 q.used = a->used; 1758 q.sign = a->sign; 1759 w = 0; 1760 for (ix = a->used - 1; ix >= 0; ix--) { 1761 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); 1762 1763 if (w >= 3) { 1764 /* multiply w by [1/3] */ 1765 t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT); 1766 1767 /* now subtract 3 * [w/3] from w, to get the remainder */ 1768 w -= t+t+t; 1769 1770 /* fixup the remainder as required since 1771 * the optimization is not exact. 1772 */ 1773 while (w >= 3) { 1774 t += 1; 1775 w -= 3; 1776 } 1777 } else { 1778 t = 0; 1779 } 1780 q.dp[ix] = (mp_digit)t; 1781 } 1782 1783 /* [optional] store the remainder */ 1784 if (d != NULL) { 1785 *d = (mp_digit)w; 1786 } 1787 1788 /* [optional] store the quotient */ 1789 if (c != NULL) { 1790 trim_unused_digits(&q); 1791 mp_exch(&q, c); 1792 } 1793 mp_clear(&q); 1794 1795 return res; 1796 } 1797 1798 /* multiplication using the Toom-Cook 3-way algorithm 1799 * 1800 * Much more complicated than Karatsuba but has a lower 1801 * asymptotic running time of O(N**1.464). This algorithm is 1802 * only particularly useful on VERY large inputs 1803 * (we're talking 1000s of digits here...). 1804 */ 1805 static int 1806 toom_cook_multiply(mp_int *a, mp_int *b, mp_int *c) 1807 { 1808 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2; 1809 int res, B; 1810 1811 /* init temps */ 1812 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, 1813 &a0, &a1, &a2, &b0, &b1, 1814 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) { 1815 return res; 1816 } 1817 1818 /* B */ 1819 B = MIN(a->used, b->used) / 3; 1820 1821 /* a = a2 * B**2 + a1 * B + a0 */ 1822 if ((res = modulo_2_to_power(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { 1823 goto ERR; 1824 } 1825 1826 if ((res = mp_copy(a, &a1)) != MP_OKAY) { 1827 goto ERR; 1828 } 1829 rshift_digits(&a1, B); 1830 modulo_2_to_power(&a1, DIGIT_BIT * B, &a1); 1831 1832 if ((res = mp_copy(a, &a2)) != MP_OKAY) { 1833 goto ERR; 1834 } 1835 rshift_digits(&a2, B*2); 1836 1837 /* b = b2 * B**2 + b1 * B + b0 */ 1838 if ((res = modulo_2_to_power(b, DIGIT_BIT * B, &b0)) != MP_OKAY) { 1839 goto ERR; 1840 } 1841 1842 if ((res = mp_copy(b, &b1)) != MP_OKAY) { 1843 goto ERR; 1844 } 1845 rshift_digits(&b1, B); 1846 modulo_2_to_power(&b1, DIGIT_BIT * B, &b1); 1847 1848 if ((res = mp_copy(b, &b2)) != MP_OKAY) { 1849 goto ERR; 1850 } 1851 rshift_digits(&b2, B*2); 1852 1853 /* w0 = a0*b0 */ 1854 if ((res = signed_multiply(&a0, &b0, &w0)) != MP_OKAY) { 1855 goto ERR; 1856 } 1857 1858 /* w4 = a2 * b2 */ 1859 if ((res = signed_multiply(&a2, &b2, &w4)) != MP_OKAY) { 1860 goto ERR; 1861 } 1862 1863 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */ 1864 if ((res = doubled(&a0, &tmp1)) != MP_OKAY) { 1865 goto ERR; 1866 } 1867 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 1868 goto ERR; 1869 } 1870 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) { 1871 goto ERR; 1872 } 1873 if ((res = signed_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { 1874 goto ERR; 1875 } 1876 1877 if ((res = doubled(&b0, &tmp2)) != MP_OKAY) { 1878 goto ERR; 1879 } 1880 if ((res = signed_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 1881 goto ERR; 1882 } 1883 if ((res = doubled(&tmp2, &tmp2)) != MP_OKAY) { 1884 goto ERR; 1885 } 1886 if ((res = signed_add(&tmp2, &b2, &tmp2)) != MP_OKAY) { 1887 goto ERR; 1888 } 1889 1890 if ((res = signed_multiply(&tmp1, &tmp2, &w1)) != MP_OKAY) { 1891 goto ERR; 1892 } 1893 1894 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */ 1895 if ((res = doubled(&a2, &tmp1)) != MP_OKAY) { 1896 goto ERR; 1897 } 1898 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 1899 goto ERR; 1900 } 1901 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) { 1902 goto ERR; 1903 } 1904 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 1905 goto ERR; 1906 } 1907 1908 if ((res = doubled(&b2, &tmp2)) != MP_OKAY) { 1909 goto ERR; 1910 } 1911 if ((res = signed_add(&tmp2, &b1, &tmp2)) != MP_OKAY) { 1912 goto ERR; 1913 } 1914 if ((res = doubled(&tmp2, &tmp2)) != MP_OKAY) { 1915 goto ERR; 1916 } 1917 if ((res = signed_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 1918 goto ERR; 1919 } 1920 1921 if ((res = signed_multiply(&tmp1, &tmp2, &w3)) != MP_OKAY) { 1922 goto ERR; 1923 } 1924 1925 1926 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */ 1927 if ((res = signed_add(&a2, &a1, &tmp1)) != MP_OKAY) { 1928 goto ERR; 1929 } 1930 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 1931 goto ERR; 1932 } 1933 if ((res = signed_add(&b2, &b1, &tmp2)) != MP_OKAY) { 1934 goto ERR; 1935 } 1936 if ((res = signed_add(&tmp2, &b0, &tmp2)) != MP_OKAY) { 1937 goto ERR; 1938 } 1939 if ((res = signed_multiply(&tmp1, &tmp2, &w2)) != MP_OKAY) { 1940 goto ERR; 1941 } 1942 1943 /* now solve the matrix 1944 1945 0 0 0 0 1 1946 1 2 4 8 16 1947 1 1 1 1 1 1948 16 8 4 2 1 1949 1 0 0 0 0 1950 1951 using 12 subtractions, 4 shifts, 1952 2 small divisions and 1 small multiplication 1953 */ 1954 1955 /* r1 - r4 */ 1956 if ((res = signed_subtract(&w1, &w4, &w1)) != MP_OKAY) { 1957 goto ERR; 1958 } 1959 /* r3 - r0 */ 1960 if ((res = signed_subtract(&w3, &w0, &w3)) != MP_OKAY) { 1961 goto ERR; 1962 } 1963 /* r1/2 */ 1964 if ((res = half(&w1, &w1)) != MP_OKAY) { 1965 goto ERR; 1966 } 1967 /* r3/2 */ 1968 if ((res = half(&w3, &w3)) != MP_OKAY) { 1969 goto ERR; 1970 } 1971 /* r2 - r0 - r4 */ 1972 if ((res = signed_subtract(&w2, &w0, &w2)) != MP_OKAY) { 1973 goto ERR; 1974 } 1975 if ((res = signed_subtract(&w2, &w4, &w2)) != MP_OKAY) { 1976 goto ERR; 1977 } 1978 /* r1 - r2 */ 1979 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) { 1980 goto ERR; 1981 } 1982 /* r3 - r2 */ 1983 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) { 1984 goto ERR; 1985 } 1986 /* r1 - 8r0 */ 1987 if ((res = lshift_bits(&w0, 3, &tmp1)) != MP_OKAY) { 1988 goto ERR; 1989 } 1990 if ((res = signed_subtract(&w1, &tmp1, &w1)) != MP_OKAY) { 1991 goto ERR; 1992 } 1993 /* r3 - 8r4 */ 1994 if ((res = lshift_bits(&w4, 3, &tmp1)) != MP_OKAY) { 1995 goto ERR; 1996 } 1997 if ((res = signed_subtract(&w3, &tmp1, &w3)) != MP_OKAY) { 1998 goto ERR; 1999 } 2000 /* 3r2 - r1 - r3 */ 2001 if ((res = multiply_digit(&w2, 3, &w2)) != MP_OKAY) { 2002 goto ERR; 2003 } 2004 if ((res = signed_subtract(&w2, &w1, &w2)) != MP_OKAY) { 2005 goto ERR; 2006 } 2007 if ((res = signed_subtract(&w2, &w3, &w2)) != MP_OKAY) { 2008 goto ERR; 2009 } 2010 /* r1 - r2 */ 2011 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) { 2012 goto ERR; 2013 } 2014 /* r3 - r2 */ 2015 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) { 2016 goto ERR; 2017 } 2018 /* r1/3 */ 2019 if ((res = third(&w1, &w1, NULL)) != MP_OKAY) { 2020 goto ERR; 2021 } 2022 /* r3/3 */ 2023 if ((res = third(&w3, &w3, NULL)) != MP_OKAY) { 2024 goto ERR; 2025 } 2026 2027 /* at this point shift W[n] by B*n */ 2028 if ((res = lshift_digits(&w1, 1*B)) != MP_OKAY) { 2029 goto ERR; 2030 } 2031 if ((res = lshift_digits(&w2, 2*B)) != MP_OKAY) { 2032 goto ERR; 2033 } 2034 if ((res = lshift_digits(&w3, 3*B)) != MP_OKAY) { 2035 goto ERR; 2036 } 2037 if ((res = lshift_digits(&w4, 4*B)) != MP_OKAY) { 2038 goto ERR; 2039 } 2040 2041 if ((res = signed_add(&w0, &w1, c)) != MP_OKAY) { 2042 goto ERR; 2043 } 2044 if ((res = signed_add(&w2, &w3, &tmp1)) != MP_OKAY) { 2045 goto ERR; 2046 } 2047 if ((res = signed_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { 2048 goto ERR; 2049 } 2050 if ((res = signed_add(&tmp1, c, c)) != MP_OKAY) { 2051 goto ERR; 2052 } 2053 2054 ERR: 2055 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, 2056 &a0, &a1, &a2, &b0, &b1, 2057 &b2, &tmp1, &tmp2, NULL); 2058 return res; 2059 } 2060 2061 #define TOOM_MUL_CUTOFF 350 2062 #define KARATSUBA_MUL_CUTOFF 80 2063 2064 /* c = |a| * |b| using Karatsuba Multiplication using 2065 * three half size multiplications 2066 * 2067 * Let B represent the radix [e.g. 2**DIGIT_BIT] and 2068 * let n represent half of the number of digits in 2069 * the min(a,b) 2070 * 2071 * a = a1 * B**n + a0 2072 * b = b1 * B**n + b0 2073 * 2074 * Then, a * b => 2075 a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 2076 * 2077 * Note that a1b1 and a0b0 are used twice and only need to be 2078 * computed once. So in total three half size (half # of 2079 * digit) multiplications are performed, a0b0, a1b1 and 2080 * (a1+b1)(a0+b0) 2081 * 2082 * Note that a multiplication of half the digits requires 2083 * 1/4th the number of single precision multiplications so in 2084 * total after one call 25% of the single precision multiplications 2085 * are saved. Note also that the call to signed_multiply can end up back 2086 * in this function if the a0, a1, b0, or b1 are above the threshold. 2087 * This is known as divide-and-conquer and leads to the famous 2088 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than 2089 * the standard O(N**2) that the baseline/comba methods use. 2090 * Generally though the overhead of this method doesn't pay off 2091 * until a certain size (N ~ 80) is reached. 2092 */ 2093 static int 2094 karatsuba_multiply(mp_int * a, mp_int * b, mp_int * c) 2095 { 2096 mp_int x0, x1, y0, y1, t1, x0y0, x1y1; 2097 int B; 2098 int err; 2099 2100 /* default the return code to an error */ 2101 err = MP_MEM; 2102 2103 /* min # of digits */ 2104 B = MIN(a->used, b->used); 2105 2106 /* now divide in two */ 2107 B = (int)((unsigned)B >> 1); 2108 2109 /* init copy all the temps */ 2110 if (mp_init_size(&x0, B) != MP_OKAY) { 2111 goto ERR; 2112 } 2113 if (mp_init_size(&x1, a->used - B) != MP_OKAY) { 2114 goto X0; 2115 } 2116 if (mp_init_size(&y0, B) != MP_OKAY) { 2117 goto X1; 2118 } 2119 if (mp_init_size(&y1, b->used - B) != MP_OKAY) { 2120 goto Y0; 2121 } 2122 /* init temps */ 2123 if (mp_init_size(&t1, B * 2) != MP_OKAY) { 2124 goto Y1; 2125 } 2126 if (mp_init_size(&x0y0, B * 2) != MP_OKAY) { 2127 goto T1; 2128 } 2129 if (mp_init_size(&x1y1, B * 2) != MP_OKAY) { 2130 goto X0Y0; 2131 } 2132 /* now shift the digits */ 2133 x0.used = y0.used = B; 2134 x1.used = a->used - B; 2135 y1.used = b->used - B; 2136 2137 { 2138 int x; 2139 mp_digit *tmpa, *tmpb, *tmpx, *tmpy; 2140 2141 /* we copy the digits directly instead of using higher level functions 2142 * since we also need to shift the digits 2143 */ 2144 tmpa = a->dp; 2145 tmpb = b->dp; 2146 2147 tmpx = x0.dp; 2148 tmpy = y0.dp; 2149 for (x = 0; x < B; x++) { 2150 *tmpx++ = *tmpa++; 2151 *tmpy++ = *tmpb++; 2152 } 2153 2154 tmpx = x1.dp; 2155 for (x = B; x < a->used; x++) { 2156 *tmpx++ = *tmpa++; 2157 } 2158 2159 tmpy = y1.dp; 2160 for (x = B; x < b->used; x++) { 2161 *tmpy++ = *tmpb++; 2162 } 2163 } 2164 2165 /* only need to clamp the lower words since by definition the 2166 * upper words x1/y1 must have a known number of digits 2167 */ 2168 trim_unused_digits(&x0); 2169 trim_unused_digits(&y0); 2170 2171 /* now calc the products x0y0 and x1y1 */ 2172 /* after this x0 is no longer required, free temp [x0==t2]! */ 2173 if (signed_multiply(&x0, &y0, &x0y0) != MP_OKAY) { 2174 goto X1Y1; /* x0y0 = x0*y0 */ 2175 } 2176 if (signed_multiply(&x1, &y1, &x1y1) != MP_OKAY) { 2177 goto X1Y1; /* x1y1 = x1*y1 */ 2178 } 2179 /* now calc x1+x0 and y1+y0 */ 2180 if (basic_add(&x1, &x0, &t1) != MP_OKAY) { 2181 goto X1Y1; /* t1 = x1 - x0 */ 2182 } 2183 if (basic_add(&y1, &y0, &x0) != MP_OKAY) { 2184 goto X1Y1; /* t2 = y1 - y0 */ 2185 } 2186 if (signed_multiply(&t1, &x0, &t1) != MP_OKAY) { 2187 goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ 2188 } 2189 /* add x0y0 */ 2190 if (signed_add(&x0y0, &x1y1, &x0) != MP_OKAY) { 2191 goto X1Y1; /* t2 = x0y0 + x1y1 */ 2192 } 2193 if (basic_subtract(&t1, &x0, &t1) != MP_OKAY) { 2194 goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ 2195 } 2196 /* shift by B */ 2197 if (lshift_digits(&t1, B) != MP_OKAY) { 2198 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ 2199 } 2200 if (lshift_digits(&x1y1, B * 2) != MP_OKAY) { 2201 goto X1Y1; /* x1y1 = x1y1 << 2*B */ 2202 } 2203 if (signed_add(&x0y0, &t1, &t1) != MP_OKAY) { 2204 goto X1Y1; /* t1 = x0y0 + t1 */ 2205 } 2206 if (signed_add(&t1, &x1y1, c) != MP_OKAY) { 2207 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */ 2208 } 2209 /* Algorithm succeeded set the return code to MP_OKAY */ 2210 err = MP_OKAY; 2211 2212 X1Y1: 2213 mp_clear(&x1y1); 2214 X0Y0: 2215 mp_clear(&x0y0); 2216 T1: 2217 mp_clear(&t1); 2218 Y1: 2219 mp_clear(&y1); 2220 Y0: 2221 mp_clear(&y0); 2222 X1: 2223 mp_clear(&x1); 2224 X0: 2225 mp_clear(&x0); 2226 ERR: 2227 return err; 2228 } 2229 2230 /* Fast (comba) multiplier 2231 * 2232 * This is the fast column-array [comba] multiplier. It is 2233 * designed to compute the columns of the product first 2234 * then handle the carries afterwards. This has the effect 2235 * of making the nested loops that compute the columns very 2236 * simple and schedulable on super-scalar processors. 2237 * 2238 * This has been modified to produce a variable number of 2239 * digits of output so if say only a half-product is required 2240 * you don't have to compute the upper half (a feature 2241 * required for fast Barrett reduction). 2242 * 2243 * Based on Algorithm 14.12 on pp.595 of HAC. 2244 * 2245 */ 2246 static int 2247 fast_col_array_multiply(mp_int * a, mp_int * b, mp_int * c, int digs) 2248 { 2249 int olduse, res, pa, ix, iz; 2250 /*LINTED*/ 2251 mp_digit W[MP_WARRAY]; 2252 mp_word _W; 2253 2254 /* grow the destination as required */ 2255 if (c->alloc < digs) { 2256 if ((res = mp_grow(c, digs)) != MP_OKAY) { 2257 return res; 2258 } 2259 } 2260 2261 /* number of output digits to produce */ 2262 pa = MIN(digs, a->used + b->used); 2263 2264 /* clear the carry */ 2265 _W = 0; 2266 for (ix = 0; ix < pa; ix++) { 2267 int tx, ty; 2268 int iy; 2269 mp_digit *tmpx, *tmpy; 2270 2271 /* get offsets into the two bignums */ 2272 ty = MIN(b->used-1, ix); 2273 tx = ix - ty; 2274 2275 /* setup temp aliases */ 2276 tmpx = a->dp + tx; 2277 tmpy = b->dp + ty; 2278 2279 /* this is the number of times the loop will iterrate, essentially 2280 while (tx++ < a->used && ty-- >= 0) { ... } 2281 */ 2282 iy = MIN(a->used-tx, ty+1); 2283 2284 /* execute loop */ 2285 for (iz = 0; iz < iy; ++iz) { 2286 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 2287 2288 } 2289 2290 /* store term */ 2291 W[ix] = ((mp_digit)_W) & MP_MASK; 2292 2293 /* make next carry */ 2294 _W = _W >> ((mp_word)DIGIT_BIT); 2295 } 2296 2297 /* setup dest */ 2298 olduse = c->used; 2299 c->used = pa; 2300 2301 { 2302 mp_digit *tmpc; 2303 tmpc = c->dp; 2304 for (ix = 0; ix < pa+1; ix++) { 2305 /* now extract the previous digit [below the carry] */ 2306 *tmpc++ = W[ix]; 2307 } 2308 2309 /* clear unused digits [that existed in the old copy of c] */ 2310 for (; ix < olduse; ix++) { 2311 *tmpc++ = 0; 2312 } 2313 } 2314 trim_unused_digits(c); 2315 return MP_OKAY; 2316 } 2317 2318 /* return 1 if we can use fast column array multiply */ 2319 /* 2320 * The fast multiplier can be used if the output will 2321 * have less than MP_WARRAY digits and the number of 2322 * digits won't affect carry propagation 2323 */ 2324 static inline int 2325 can_use_fast_column_array(int ndigits, int used) 2326 { 2327 return (((unsigned)ndigits < MP_WARRAY) && 2328 used < (1 << (unsigned)((CHAR_BIT * sizeof(mp_word)) - (2 * DIGIT_BIT)))); 2329 } 2330 2331 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_digs.c,v $ */ 2332 /* Revision: 1.2 $ */ 2333 /* Date: 2011/03/18 16:22:09 $ */ 2334 2335 2336 /* multiplies |a| * |b| and only computes upto digs digits of result 2337 * HAC pp. 595, Algorithm 14.12 Modified so you can control how 2338 * many digits of output are created. 2339 */ 2340 static int 2341 basic_multiply_partial_lower(mp_int * a, mp_int * b, mp_int * c, int digs) 2342 { 2343 mp_int t; 2344 int res, pa, pb, ix, iy; 2345 mp_digit u; 2346 mp_word r; 2347 mp_digit tmpx, *tmpt, *tmpy; 2348 2349 /* can we use the fast multiplier? */ 2350 if (can_use_fast_column_array(digs, MIN(a->used, b->used))) { 2351 return fast_col_array_multiply(a, b, c, digs); 2352 } 2353 2354 if ((res = mp_init_size(&t, digs)) != MP_OKAY) { 2355 return res; 2356 } 2357 t.used = digs; 2358 2359 /* compute the digits of the product directly */ 2360 pa = a->used; 2361 for (ix = 0; ix < pa; ix++) { 2362 /* set the carry to zero */ 2363 u = 0; 2364 2365 /* limit ourselves to making digs digits of output */ 2366 pb = MIN(b->used, digs - ix); 2367 2368 /* setup some aliases */ 2369 /* copy of the digit from a used within the nested loop */ 2370 tmpx = a->dp[ix]; 2371 2372 /* an alias for the destination shifted ix places */ 2373 tmpt = t.dp + ix; 2374 2375 /* an alias for the digits of b */ 2376 tmpy = b->dp; 2377 2378 /* compute the columns of the output and propagate the carry */ 2379 for (iy = 0; iy < pb; iy++) { 2380 /* compute the column as a mp_word */ 2381 r = ((mp_word)*tmpt) + 2382 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 2383 ((mp_word) u); 2384 2385 /* the new column is the lower part of the result */ 2386 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 2387 2388 /* get the carry word from the result */ 2389 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 2390 } 2391 /* set carry if it is placed below digs */ 2392 if (ix + iy < digs) { 2393 *tmpt = u; 2394 } 2395 } 2396 2397 trim_unused_digits(&t); 2398 mp_exch(&t, c); 2399 2400 mp_clear(&t); 2401 return MP_OKAY; 2402 } 2403 2404 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_digs.c,v $ */ 2405 /* Revision: 1.3 $ */ 2406 /* Date: 2011/03/18 16:43:04 $ */ 2407 2408 /* high level multiplication (handles sign) */ 2409 static int 2410 signed_multiply(mp_int * a, mp_int * b, mp_int * c) 2411 { 2412 int res, neg; 2413 2414 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG; 2415 /* use Toom-Cook? */ 2416 if (MIN(a->used, b->used) >= TOOM_MUL_CUTOFF) { 2417 res = toom_cook_multiply(a, b, c); 2418 } else if (MIN(a->used, b->used) >= KARATSUBA_MUL_CUTOFF) { 2419 /* use Karatsuba? */ 2420 res = karatsuba_multiply(a, b, c); 2421 } else { 2422 /* can we use the fast multiplier? */ 2423 int digs = a->used + b->used + 1; 2424 2425 if (can_use_fast_column_array(digs, MIN(a->used, b->used))) { 2426 res = fast_col_array_multiply(a, b, c, digs); 2427 } else { 2428 res = basic_multiply_partial_lower(a, b, c, (a)->used + (b)->used + 1); 2429 } 2430 } 2431 c->sign = (c->used > 0) ? neg : MP_ZPOS; 2432 return res; 2433 } 2434 2435 /* this is a modified version of fast_s_mul_digs that only produces 2436 * output digits *above* digs. See the comments for fast_s_mul_digs 2437 * to see how it works. 2438 * 2439 * This is used in the Barrett reduction since for one of the multiplications 2440 * only the higher digits were needed. This essentially halves the work. 2441 * 2442 * Based on Algorithm 14.12 on pp.595 of HAC. 2443 */ 2444 static int 2445 fast_basic_multiply_partial_upper(mp_int * a, mp_int * b, mp_int * c, int digs) 2446 { 2447 int olduse, res, pa, ix, iz; 2448 mp_digit W[MP_WARRAY]; 2449 mp_word _W; 2450 2451 /* grow the destination as required */ 2452 pa = a->used + b->used; 2453 if (c->alloc < pa) { 2454 if ((res = mp_grow(c, pa)) != MP_OKAY) { 2455 return res; 2456 } 2457 } 2458 2459 /* number of output digits to produce */ 2460 pa = a->used + b->used; 2461 _W = 0; 2462 for (ix = digs; ix < pa; ix++) { 2463 int tx, ty, iy; 2464 mp_digit *tmpx, *tmpy; 2465 2466 /* get offsets into the two bignums */ 2467 ty = MIN(b->used-1, ix); 2468 tx = ix - ty; 2469 2470 /* setup temp aliases */ 2471 tmpx = a->dp + tx; 2472 tmpy = b->dp + ty; 2473 2474 /* this is the number of times the loop will iterrate, essentially its 2475 while (tx++ < a->used && ty-- >= 0) { ... } 2476 */ 2477 iy = MIN(a->used-tx, ty+1); 2478 2479 /* execute loop */ 2480 for (iz = 0; iz < iy; iz++) { 2481 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 2482 } 2483 2484 /* store term */ 2485 W[ix] = ((mp_digit)_W) & MP_MASK; 2486 2487 /* make next carry */ 2488 _W = _W >> ((mp_word)DIGIT_BIT); 2489 } 2490 2491 /* setup dest */ 2492 olduse = c->used; 2493 c->used = pa; 2494 2495 { 2496 mp_digit *tmpc; 2497 2498 tmpc = c->dp + digs; 2499 for (ix = digs; ix < pa; ix++) { 2500 /* now extract the previous digit [below the carry] */ 2501 *tmpc++ = W[ix]; 2502 } 2503 2504 /* clear unused digits [that existed in the old copy of c] */ 2505 for (; ix < olduse; ix++) { 2506 *tmpc++ = 0; 2507 } 2508 } 2509 trim_unused_digits(c); 2510 return MP_OKAY; 2511 } 2512 2513 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_high_digs.c,v $ */ 2514 /* Revision: 1.1.1.1 $ */ 2515 /* Date: 2011/03/12 22:58:18 $ */ 2516 2517 /* multiplies |a| * |b| and does not compute the lower digs digits 2518 * [meant to get the higher part of the product] 2519 */ 2520 static int 2521 basic_multiply_partial_upper(mp_int * a, mp_int * b, mp_int * c, int digs) 2522 { 2523 mp_int t; 2524 int res, pa, pb, ix, iy; 2525 mp_digit carry; 2526 mp_word r; 2527 mp_digit tmpx, *tmpt, *tmpy; 2528 2529 /* can we use the fast multiplier? */ 2530 if (can_use_fast_column_array(a->used + b->used + 1, MIN(a->used, b->used))) { 2531 return fast_basic_multiply_partial_upper(a, b, c, digs); 2532 } 2533 2534 if ((res = mp_init_size(&t, a->used + b->used + 1)) != MP_OKAY) { 2535 return res; 2536 } 2537 t.used = a->used + b->used + 1; 2538 2539 pa = a->used; 2540 pb = b->used; 2541 for (ix = 0; ix < pa; ix++) { 2542 /* clear the carry */ 2543 carry = 0; 2544 2545 /* left hand side of A[ix] * B[iy] */ 2546 tmpx = a->dp[ix]; 2547 2548 /* alias to the address of where the digits will be stored */ 2549 tmpt = &(t.dp[digs]); 2550 2551 /* alias for where to read the right hand side from */ 2552 tmpy = b->dp + (digs - ix); 2553 2554 for (iy = digs - ix; iy < pb; iy++) { 2555 /* calculate the double precision result */ 2556 r = ((mp_word)*tmpt) + 2557 ((mp_word)tmpx) * ((mp_word)*tmpy++) + 2558 ((mp_word) carry); 2559 2560 /* get the lower part */ 2561 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 2562 2563 /* carry the carry */ 2564 carry = (mp_digit) (r >> ((mp_word) DIGIT_BIT)); 2565 } 2566 *tmpt = carry; 2567 } 2568 trim_unused_digits(&t); 2569 mp_exch(&t, c); 2570 mp_clear(&t); 2571 return MP_OKAY; 2572 } 2573 2574 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_high_digs.c,v $ */ 2575 /* Revision: 1.3 $ */ 2576 /* Date: 2011/03/18 16:43:04 $ */ 2577 2578 /* reduces x mod m, assumes 0 < x < m**2, mu is 2579 * precomputed via mp_reduce_setup. 2580 * From HAC pp.604 Algorithm 14.42 2581 */ 2582 static int 2583 mp_reduce(mp_int * x, mp_int * m, mp_int * mu) 2584 { 2585 mp_int q; 2586 int res, um = m->used; 2587 2588 /* q = x */ 2589 if ((res = mp_init_copy(&q, x)) != MP_OKAY) { 2590 return res; 2591 } 2592 2593 /* q1 = x / b**(k-1) */ 2594 rshift_digits(&q, um - 1); 2595 2596 /* according to HAC this optimization is ok */ 2597 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) { 2598 if ((res = signed_multiply(&q, mu, &q)) != MP_OKAY) { 2599 goto CLEANUP; 2600 } 2601 } else { 2602 if ((res = basic_multiply_partial_upper(&q, mu, &q, um)) != MP_OKAY) { 2603 goto CLEANUP; 2604 } 2605 } 2606 2607 /* q3 = q2 / b**(k+1) */ 2608 rshift_digits(&q, um + 1); 2609 2610 /* x = x mod b**(k+1), quick (no division) */ 2611 if ((res = modulo_2_to_power(x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) { 2612 goto CLEANUP; 2613 } 2614 2615 /* q = q * m mod b**(k+1), quick (no division) */ 2616 if ((res = basic_multiply_partial_lower(&q, m, &q, um + 1)) != MP_OKAY) { 2617 goto CLEANUP; 2618 } 2619 2620 /* x = x - q */ 2621 if ((res = signed_subtract(x, &q, x)) != MP_OKAY) { 2622 goto CLEANUP; 2623 } 2624 2625 /* If x < 0, add b**(k+1) to it */ 2626 if (compare_digit(x, 0) == MP_LT) { 2627 set_word(&q, 1); 2628 if ((res = lshift_digits(&q, um + 1)) != MP_OKAY) { 2629 goto CLEANUP; 2630 } 2631 if ((res = signed_add(x, &q, x)) != MP_OKAY) { 2632 goto CLEANUP; 2633 } 2634 } 2635 2636 /* Back off if it's too big */ 2637 while (signed_compare(x, m) != MP_LT) { 2638 if ((res = basic_subtract(x, m, x)) != MP_OKAY) { 2639 goto CLEANUP; 2640 } 2641 } 2642 2643 CLEANUP: 2644 mp_clear(&q); 2645 2646 return res; 2647 } 2648 2649 /* determines the setup value */ 2650 static int 2651 mp_reduce_2k_setup_l(mp_int *a, mp_int *d) 2652 { 2653 int res; 2654 mp_int tmp; 2655 2656 if ((res = mp_init(&tmp)) != MP_OKAY) { 2657 return res; 2658 } 2659 2660 if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) { 2661 goto ERR; 2662 } 2663 2664 if ((res = basic_subtract(&tmp, a, d)) != MP_OKAY) { 2665 goto ERR; 2666 } 2667 2668 ERR: 2669 mp_clear(&tmp); 2670 return res; 2671 } 2672 2673 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup_l.c,v $ */ 2674 /* Revision: 1.1.1.1 $ */ 2675 /* Date: 2011/03/12 22:58:18 $ */ 2676 2677 /* reduces a modulo n where n is of the form 2**p - d 2678 This differs from reduce_2k since "d" can be larger 2679 than a single digit. 2680 */ 2681 static int 2682 mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d) 2683 { 2684 mp_int q; 2685 int p, res; 2686 2687 if ((res = mp_init(&q)) != MP_OKAY) { 2688 return res; 2689 } 2690 2691 p = mp_count_bits(n); 2692 top: 2693 /* q = a/2**p, a = a mod 2**p */ 2694 if ((res = rshift_bits(a, p, &q, a)) != MP_OKAY) { 2695 goto ERR; 2696 } 2697 2698 /* q = q * d */ 2699 if ((res = signed_multiply(&q, d, &q)) != MP_OKAY) { 2700 goto ERR; 2701 } 2702 2703 /* a = a + q */ 2704 if ((res = basic_add(a, &q, a)) != MP_OKAY) { 2705 goto ERR; 2706 } 2707 2708 if (compare_magnitude(a, n) != MP_LT) { 2709 basic_subtract(a, n, a); 2710 goto top; 2711 } 2712 2713 ERR: 2714 mp_clear(&q); 2715 return res; 2716 } 2717 2718 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_l.c,v $ */ 2719 /* Revision: 1.1.1.1 $ */ 2720 /* Date: 2011/03/12 22:58:18 $ */ 2721 2722 /* squaring using Toom-Cook 3-way algorithm */ 2723 static int 2724 toom_cook_square(mp_int *a, mp_int *b) 2725 { 2726 mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2; 2727 int res, B; 2728 2729 /* init temps */ 2730 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) { 2731 return res; 2732 } 2733 2734 /* B */ 2735 B = a->used / 3; 2736 2737 /* a = a2 * B**2 + a1 * B + a0 */ 2738 if ((res = modulo_2_to_power(a, DIGIT_BIT * B, &a0)) != MP_OKAY) { 2739 goto ERR; 2740 } 2741 2742 if ((res = mp_copy(a, &a1)) != MP_OKAY) { 2743 goto ERR; 2744 } 2745 rshift_digits(&a1, B); 2746 modulo_2_to_power(&a1, DIGIT_BIT * B, &a1); 2747 2748 if ((res = mp_copy(a, &a2)) != MP_OKAY) { 2749 goto ERR; 2750 } 2751 rshift_digits(&a2, B*2); 2752 2753 /* w0 = a0*a0 */ 2754 if ((res = square(&a0, &w0)) != MP_OKAY) { 2755 goto ERR; 2756 } 2757 2758 /* w4 = a2 * a2 */ 2759 if ((res = square(&a2, &w4)) != MP_OKAY) { 2760 goto ERR; 2761 } 2762 2763 /* w1 = (a2 + 2(a1 + 2a0))**2 */ 2764 if ((res = doubled(&a0, &tmp1)) != MP_OKAY) { 2765 goto ERR; 2766 } 2767 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 2768 goto ERR; 2769 } 2770 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) { 2771 goto ERR; 2772 } 2773 if ((res = signed_add(&tmp1, &a2, &tmp1)) != MP_OKAY) { 2774 goto ERR; 2775 } 2776 2777 if ((res = square(&tmp1, &w1)) != MP_OKAY) { 2778 goto ERR; 2779 } 2780 2781 /* w3 = (a0 + 2(a1 + 2a2))**2 */ 2782 if ((res = doubled(&a2, &tmp1)) != MP_OKAY) { 2783 goto ERR; 2784 } 2785 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) { 2786 goto ERR; 2787 } 2788 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) { 2789 goto ERR; 2790 } 2791 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 2792 goto ERR; 2793 } 2794 2795 if ((res = square(&tmp1, &w3)) != MP_OKAY) { 2796 goto ERR; 2797 } 2798 2799 2800 /* w2 = (a2 + a1 + a0)**2 */ 2801 if ((res = signed_add(&a2, &a1, &tmp1)) != MP_OKAY) { 2802 goto ERR; 2803 } 2804 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) { 2805 goto ERR; 2806 } 2807 if ((res = square(&tmp1, &w2)) != MP_OKAY) { 2808 goto ERR; 2809 } 2810 2811 /* now solve the matrix 2812 2813 0 0 0 0 1 2814 1 2 4 8 16 2815 1 1 1 1 1 2816 16 8 4 2 1 2817 1 0 0 0 0 2818 2819 using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication. 2820 */ 2821 2822 /* r1 - r4 */ 2823 if ((res = signed_subtract(&w1, &w4, &w1)) != MP_OKAY) { 2824 goto ERR; 2825 } 2826 /* r3 - r0 */ 2827 if ((res = signed_subtract(&w3, &w0, &w3)) != MP_OKAY) { 2828 goto ERR; 2829 } 2830 /* r1/2 */ 2831 if ((res = half(&w1, &w1)) != MP_OKAY) { 2832 goto ERR; 2833 } 2834 /* r3/2 */ 2835 if ((res = half(&w3, &w3)) != MP_OKAY) { 2836 goto ERR; 2837 } 2838 /* r2 - r0 - r4 */ 2839 if ((res = signed_subtract(&w2, &w0, &w2)) != MP_OKAY) { 2840 goto ERR; 2841 } 2842 if ((res = signed_subtract(&w2, &w4, &w2)) != MP_OKAY) { 2843 goto ERR; 2844 } 2845 /* r1 - r2 */ 2846 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) { 2847 goto ERR; 2848 } 2849 /* r3 - r2 */ 2850 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) { 2851 goto ERR; 2852 } 2853 /* r1 - 8r0 */ 2854 if ((res = lshift_bits(&w0, 3, &tmp1)) != MP_OKAY) { 2855 goto ERR; 2856 } 2857 if ((res = signed_subtract(&w1, &tmp1, &w1)) != MP_OKAY) { 2858 goto ERR; 2859 } 2860 /* r3 - 8r4 */ 2861 if ((res = lshift_bits(&w4, 3, &tmp1)) != MP_OKAY) { 2862 goto ERR; 2863 } 2864 if ((res = signed_subtract(&w3, &tmp1, &w3)) != MP_OKAY) { 2865 goto ERR; 2866 } 2867 /* 3r2 - r1 - r3 */ 2868 if ((res = multiply_digit(&w2, 3, &w2)) != MP_OKAY) { 2869 goto ERR; 2870 } 2871 if ((res = signed_subtract(&w2, &w1, &w2)) != MP_OKAY) { 2872 goto ERR; 2873 } 2874 if ((res = signed_subtract(&w2, &w3, &w2)) != MP_OKAY) { 2875 goto ERR; 2876 } 2877 /* r1 - r2 */ 2878 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) { 2879 goto ERR; 2880 } 2881 /* r3 - r2 */ 2882 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) { 2883 goto ERR; 2884 } 2885 /* r1/3 */ 2886 if ((res = third(&w1, &w1, NULL)) != MP_OKAY) { 2887 goto ERR; 2888 } 2889 /* r3/3 */ 2890 if ((res = third(&w3, &w3, NULL)) != MP_OKAY) { 2891 goto ERR; 2892 } 2893 2894 /* at this point shift W[n] by B*n */ 2895 if ((res = lshift_digits(&w1, 1*B)) != MP_OKAY) { 2896 goto ERR; 2897 } 2898 if ((res = lshift_digits(&w2, 2*B)) != MP_OKAY) { 2899 goto ERR; 2900 } 2901 if ((res = lshift_digits(&w3, 3*B)) != MP_OKAY) { 2902 goto ERR; 2903 } 2904 if ((res = lshift_digits(&w4, 4*B)) != MP_OKAY) { 2905 goto ERR; 2906 } 2907 2908 if ((res = signed_add(&w0, &w1, b)) != MP_OKAY) { 2909 goto ERR; 2910 } 2911 if ((res = signed_add(&w2, &w3, &tmp1)) != MP_OKAY) { 2912 goto ERR; 2913 } 2914 if ((res = signed_add(&w4, &tmp1, &tmp1)) != MP_OKAY) { 2915 goto ERR; 2916 } 2917 if ((res = signed_add(&tmp1, b, b)) != MP_OKAY) { 2918 goto ERR; 2919 } 2920 2921 ERR: 2922 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL); 2923 return res; 2924 } 2925 2926 2927 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_toom_sqr.c,v $ */ 2928 /* Revision: 1.1.1.1 $ */ 2929 /* Date: 2011/03/12 22:58:18 $ */ 2930 2931 /* Karatsuba squaring, computes b = a*a using three 2932 * half size squarings 2933 * 2934 * See comments of karatsuba_mul for details. It 2935 * is essentially the same algorithm but merely 2936 * tuned to perform recursive squarings. 2937 */ 2938 static int 2939 karatsuba_square(mp_int * a, mp_int * b) 2940 { 2941 mp_int x0, x1, t1, t2, x0x0, x1x1; 2942 int B, err; 2943 2944 err = MP_MEM; 2945 2946 /* min # of digits */ 2947 B = a->used; 2948 2949 /* now divide in two */ 2950 B = (unsigned)B >> 1; 2951 2952 /* init copy all the temps */ 2953 if (mp_init_size(&x0, B) != MP_OKAY) { 2954 goto ERR; 2955 } 2956 if (mp_init_size(&x1, a->used - B) != MP_OKAY) { 2957 goto X0; 2958 } 2959 /* init temps */ 2960 if (mp_init_size(&t1, a->used * 2) != MP_OKAY) { 2961 goto X1; 2962 } 2963 if (mp_init_size(&t2, a->used * 2) != MP_OKAY) { 2964 goto T1; 2965 } 2966 if (mp_init_size(&x0x0, B * 2) != MP_OKAY) { 2967 goto T2; 2968 } 2969 if (mp_init_size(&x1x1, (a->used - B) * 2) != MP_OKAY) { 2970 goto X0X0; 2971 } 2972 2973 memcpy(x0.dp, a->dp, B * sizeof(*x0.dp)); 2974 memcpy(x1.dp, &a->dp[B], (a->used - B) * sizeof(*x1.dp)); 2975 2976 x0.used = B; 2977 x1.used = a->used - B; 2978 2979 trim_unused_digits(&x0); 2980 2981 /* now calc the products x0*x0 and x1*x1 */ 2982 if (square(&x0, &x0x0) != MP_OKAY) { 2983 goto X1X1; /* x0x0 = x0*x0 */ 2984 } 2985 if (square(&x1, &x1x1) != MP_OKAY) { 2986 goto X1X1; /* x1x1 = x1*x1 */ 2987 } 2988 /* now calc (x1+x0)**2 */ 2989 if (basic_add(&x1, &x0, &t1) != MP_OKAY) { 2990 goto X1X1; /* t1 = x1 - x0 */ 2991 } 2992 if (square(&t1, &t1) != MP_OKAY) { 2993 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */ 2994 } 2995 /* add x0y0 */ 2996 if (basic_add(&x0x0, &x1x1, &t2) != MP_OKAY) { 2997 goto X1X1; /* t2 = x0x0 + x1x1 */ 2998 } 2999 if (basic_subtract(&t1, &t2, &t1) != MP_OKAY) { 3000 goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */ 3001 } 3002 /* shift by B */ 3003 if (lshift_digits(&t1, B) != MP_OKAY) { 3004 goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */ 3005 } 3006 if (lshift_digits(&x1x1, B * 2) != MP_OKAY) { 3007 goto X1X1; /* x1x1 = x1x1 << 2*B */ 3008 } 3009 if (signed_add(&x0x0, &t1, &t1) != MP_OKAY) { 3010 goto X1X1; /* t1 = x0x0 + t1 */ 3011 } 3012 if (signed_add(&t1, &x1x1, b) != MP_OKAY) { 3013 goto X1X1; /* t1 = x0x0 + t1 + x1x1 */ 3014 } 3015 err = MP_OKAY; 3016 3017 X1X1: 3018 mp_clear(&x1x1); 3019 X0X0: 3020 mp_clear(&x0x0); 3021 T2: 3022 mp_clear(&t2); 3023 T1: 3024 mp_clear(&t1); 3025 X1: 3026 mp_clear(&x1); 3027 X0: 3028 mp_clear(&x0); 3029 ERR: 3030 return err; 3031 } 3032 3033 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_karatsuba_sqr.c,v $ */ 3034 /* Revision: 1.2 $ */ 3035 /* Date: 2011/03/12 23:43:54 $ */ 3036 3037 /* the jist of squaring... 3038 * you do like mult except the offset of the tmpx [one that 3039 * starts closer to zero] can't equal the offset of tmpy. 3040 * So basically you set up iy like before then you min it with 3041 * (ty-tx) so that it never happens. You double all those 3042 * you add in the inner loop 3043 3044 After that loop you do the squares and add them in. 3045 */ 3046 3047 static int 3048 fast_basic_square(mp_int * a, mp_int * b) 3049 { 3050 int olduse, res, pa, ix, iz; 3051 mp_digit W[MP_WARRAY], *tmpx; 3052 mp_word W1; 3053 3054 /* grow the destination as required */ 3055 pa = a->used + a->used; 3056 if (b->alloc < pa) { 3057 if ((res = mp_grow(b, pa)) != MP_OKAY) { 3058 return res; 3059 } 3060 } 3061 3062 /* number of output digits to produce */ 3063 W1 = 0; 3064 for (ix = 0; ix < pa; ix++) { 3065 int tx, ty, iy; 3066 mp_word _W; 3067 mp_digit *tmpy; 3068 3069 /* clear counter */ 3070 _W = 0; 3071 3072 /* get offsets into the two bignums */ 3073 ty = MIN(a->used-1, ix); 3074 tx = ix - ty; 3075 3076 /* setup temp aliases */ 3077 tmpx = a->dp + tx; 3078 tmpy = a->dp + ty; 3079 3080 /* this is the number of times the loop will iterrate, essentially 3081 while (tx++ < a->used && ty-- >= 0) { ... } 3082 */ 3083 iy = MIN(a->used-tx, ty+1); 3084 3085 /* now for squaring tx can never equal ty 3086 * we halve the distance since they approach at a rate of 2x 3087 * and we have to round because odd cases need to be executed 3088 */ 3089 iy = MIN(iy, (int)((unsigned)(ty-tx+1)>>1)); 3090 3091 /* execute loop */ 3092 for (iz = 0; iz < iy; iz++) { 3093 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--); 3094 } 3095 3096 /* double the inner product and add carry */ 3097 _W = _W + _W + W1; 3098 3099 /* even columns have the square term in them */ 3100 if ((ix&1) == 0) { 3101 _W += ((mp_word)a->dp[(unsigned)ix>>1])*((mp_word)a->dp[(unsigned)ix>>1]); 3102 } 3103 3104 /* store it */ 3105 W[ix] = (mp_digit)(_W & MP_MASK); 3106 3107 /* make next carry */ 3108 W1 = _W >> ((mp_word)DIGIT_BIT); 3109 } 3110 3111 /* setup dest */ 3112 olduse = b->used; 3113 b->used = a->used+a->used; 3114 3115 { 3116 mp_digit *tmpb; 3117 tmpb = b->dp; 3118 for (ix = 0; ix < pa; ix++) { 3119 *tmpb++ = W[ix] & MP_MASK; 3120 } 3121 3122 /* clear unused digits [that existed in the old copy of c] */ 3123 for (; ix < olduse; ix++) { 3124 *tmpb++ = 0; 3125 } 3126 } 3127 trim_unused_digits(b); 3128 return MP_OKAY; 3129 } 3130 3131 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_sqr.c,v $ */ 3132 /* Revision: 1.3 $ */ 3133 /* Date: 2011/03/18 16:43:04 $ */ 3134 3135 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */ 3136 static int 3137 basic_square(mp_int * a, mp_int * b) 3138 { 3139 mp_int t; 3140 int res, ix, iy, pa; 3141 mp_word r; 3142 mp_digit carry, tmpx, *tmpt; 3143 3144 pa = a->used; 3145 if ((res = mp_init_size(&t, 2*pa + 1)) != MP_OKAY) { 3146 return res; 3147 } 3148 3149 /* default used is maximum possible size */ 3150 t.used = 2*pa + 1; 3151 3152 for (ix = 0; ix < pa; ix++) { 3153 /* first calculate the digit at 2*ix */ 3154 /* calculate double precision result */ 3155 r = ((mp_word) t.dp[2*ix]) + 3156 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]); 3157 3158 /* store lower part in result */ 3159 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK)); 3160 3161 /* get the carry */ 3162 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3163 3164 /* left hand side of A[ix] * A[iy] */ 3165 tmpx = a->dp[ix]; 3166 3167 /* alias for where to store the results */ 3168 tmpt = t.dp + (2*ix + 1); 3169 3170 for (iy = ix + 1; iy < pa; iy++) { 3171 /* first calculate the product */ 3172 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]); 3173 3174 /* now calculate the double precision result, note we use 3175 * addition instead of *2 since it's easier to optimize 3176 */ 3177 r = ((mp_word) *tmpt) + r + r + ((mp_word) carry); 3178 3179 /* store lower part */ 3180 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 3181 3182 /* get carry */ 3183 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3184 } 3185 /* propagate upwards */ 3186 while (carry != ((mp_digit) 0)) { 3187 r = ((mp_word) *tmpt) + ((mp_word) carry); 3188 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK)); 3189 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3190 } 3191 } 3192 3193 trim_unused_digits(&t); 3194 mp_exch(&t, b); 3195 mp_clear(&t); 3196 return MP_OKAY; 3197 } 3198 3199 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_sqr.c,v $ */ 3200 /* Revision: 1.1.1.1 $ */ 3201 /* Date: 2011/03/12 22:58:18 $ */ 3202 3203 #define TOOM_SQR_CUTOFF 400 3204 #define KARATSUBA_SQR_CUTOFF 120 3205 3206 /* computes b = a*a */ 3207 static int 3208 square(mp_int * a, mp_int * b) 3209 { 3210 int res; 3211 3212 /* use Toom-Cook? */ 3213 if (a->used >= TOOM_SQR_CUTOFF) { 3214 res = toom_cook_square(a, b); 3215 /* Karatsuba? */ 3216 } else if (a->used >= KARATSUBA_SQR_CUTOFF) { 3217 res = karatsuba_square(a, b); 3218 } else { 3219 /* can we use the fast comba multiplier? */ 3220 if (can_use_fast_column_array(a->used + a->used + 1, a->used)) { 3221 res = fast_basic_square(a, b); 3222 } else { 3223 res = basic_square(a, b); 3224 } 3225 } 3226 b->sign = MP_ZPOS; 3227 return res; 3228 } 3229 3230 /* find window size */ 3231 static inline int 3232 find_window_size(mp_int *X) 3233 { 3234 int x; 3235 3236 x = mp_count_bits(X); 3237 return (x <= 7) ? 2 : (x <= 36) ? 3 : (x <= 140) ? 4 : (x <= 450) ? 5 : (x <= 1303) ? 6 : (x <= 3529) ? 7 : 8; 3238 } 3239 3240 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_sqr.c,v $ */ 3241 /* Revision: 1.3 $ */ 3242 /* Date: 2011/03/18 16:43:04 $ */ 3243 3244 #define TAB_SIZE 256 3245 3246 static int 3247 basic_exponent_mod(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) 3248 { 3249 mp_digit buf; 3250 mp_int M[TAB_SIZE], res, mu; 3251 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 3252 int (*redux)(mp_int*,mp_int*,mp_int*); 3253 3254 winsize = find_window_size(X); 3255 3256 /* init M array */ 3257 /* init first cell */ 3258 if ((err = mp_init(&M[1])) != MP_OKAY) { 3259 return err; 3260 } 3261 3262 /* now init the second half of the array */ 3263 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 3264 if ((err = mp_init(&M[x])) != MP_OKAY) { 3265 for (y = 1<<(winsize-1); y < x; y++) { 3266 mp_clear(&M[y]); 3267 } 3268 mp_clear(&M[1]); 3269 return err; 3270 } 3271 } 3272 3273 /* create mu, used for Barrett reduction */ 3274 if ((err = mp_init(&mu)) != MP_OKAY) { 3275 goto LBL_M; 3276 } 3277 3278 if (redmode == 0) { 3279 if ((err = mp_reduce_setup(&mu, P)) != MP_OKAY) { 3280 goto LBL_MU; 3281 } 3282 redux = mp_reduce; 3283 } else { 3284 if ((err = mp_reduce_2k_setup_l(P, &mu)) != MP_OKAY) { 3285 goto LBL_MU; 3286 } 3287 redux = mp_reduce_2k_l; 3288 } 3289 3290 /* create M table 3291 * 3292 * The M table contains powers of the base, 3293 * e.g. M[x] = G**x mod P 3294 * 3295 * The first half of the table is not 3296 * computed though accept for M[0] and M[1] 3297 */ 3298 if ((err = modulo(G, P, &M[1])) != MP_OKAY) { 3299 goto LBL_MU; 3300 } 3301 3302 /* compute the value at M[1<<(winsize-1)] by squaring 3303 * M[1] (winsize-1) times 3304 */ 3305 if ((err = mp_copy( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 3306 goto LBL_MU; 3307 } 3308 3309 for (x = 0; x < (winsize - 1); x++) { 3310 /* square it */ 3311 if ((err = square(&M[1 << (winsize - 1)], 3312 &M[1 << (winsize - 1)])) != MP_OKAY) { 3313 goto LBL_MU; 3314 } 3315 3316 /* reduce modulo P */ 3317 if ((err = redux(&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) { 3318 goto LBL_MU; 3319 } 3320 } 3321 3322 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P) 3323 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1) 3324 */ 3325 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 3326 if ((err = signed_multiply(&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 3327 goto LBL_MU; 3328 } 3329 if ((err = redux(&M[x], P, &mu)) != MP_OKAY) { 3330 goto LBL_MU; 3331 } 3332 } 3333 3334 /* setup result */ 3335 if ((err = mp_init(&res)) != MP_OKAY) { 3336 goto LBL_MU; 3337 } 3338 set_word(&res, 1); 3339 3340 /* set initial mode and bit cnt */ 3341 mode = 0; 3342 bitcnt = 1; 3343 buf = 0; 3344 digidx = X->used - 1; 3345 bitcpy = 0; 3346 bitbuf = 0; 3347 3348 for (;;) { 3349 /* grab next digit as required */ 3350 if (--bitcnt == 0) { 3351 /* if digidx == -1 we are out of digits */ 3352 if (digidx == -1) { 3353 break; 3354 } 3355 /* read next digit and reset the bitcnt */ 3356 buf = X->dp[digidx--]; 3357 bitcnt = (int) DIGIT_BIT; 3358 } 3359 3360 /* grab the next msb from the exponent */ 3361 y = (unsigned)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1; 3362 buf <<= (mp_digit)1; 3363 3364 /* if the bit is zero and mode == 0 then we ignore it 3365 * These represent the leading zero bits before the first 1 bit 3366 * in the exponent. Technically this opt is not required but it 3367 * does lower the # of trivial squaring/reductions used 3368 */ 3369 if (mode == 0 && y == 0) { 3370 continue; 3371 } 3372 3373 /* if the bit is zero and mode == 1 then we square */ 3374 if (mode == 1 && y == 0) { 3375 if ((err = square(&res, &res)) != MP_OKAY) { 3376 goto LBL_RES; 3377 } 3378 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3379 goto LBL_RES; 3380 } 3381 continue; 3382 } 3383 3384 /* else we add it to the window */ 3385 bitbuf |= (y << (winsize - ++bitcpy)); 3386 mode = 2; 3387 3388 if (bitcpy == winsize) { 3389 /* ok window is filled so square as required and multiply */ 3390 /* square first */ 3391 for (x = 0; x < winsize; x++) { 3392 if ((err = square(&res, &res)) != MP_OKAY) { 3393 goto LBL_RES; 3394 } 3395 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3396 goto LBL_RES; 3397 } 3398 } 3399 3400 /* then multiply */ 3401 if ((err = signed_multiply(&res, &M[bitbuf], &res)) != MP_OKAY) { 3402 goto LBL_RES; 3403 } 3404 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3405 goto LBL_RES; 3406 } 3407 3408 /* empty window and reset */ 3409 bitcpy = 0; 3410 bitbuf = 0; 3411 mode = 1; 3412 } 3413 } 3414 3415 /* if bits remain then square/multiply */ 3416 if (mode == 2 && bitcpy > 0) { 3417 /* square then multiply if the bit is set */ 3418 for (x = 0; x < bitcpy; x++) { 3419 if ((err = square(&res, &res)) != MP_OKAY) { 3420 goto LBL_RES; 3421 } 3422 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3423 goto LBL_RES; 3424 } 3425 3426 bitbuf <<= 1; 3427 if ((bitbuf & (1 << winsize)) != 0) { 3428 /* then multiply */ 3429 if ((err = signed_multiply(&res, &M[1], &res)) != MP_OKAY) { 3430 goto LBL_RES; 3431 } 3432 if ((err = redux(&res, P, &mu)) != MP_OKAY) { 3433 goto LBL_RES; 3434 } 3435 } 3436 } 3437 } 3438 3439 mp_exch(&res, Y); 3440 err = MP_OKAY; 3441 LBL_RES: 3442 mp_clear(&res); 3443 LBL_MU: 3444 mp_clear(&mu); 3445 LBL_M: 3446 mp_clear(&M[1]); 3447 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 3448 mp_clear(&M[x]); 3449 } 3450 return err; 3451 } 3452 3453 /* determines if a number is a valid DR modulus */ 3454 static int 3455 is_diminished_radix_modulus(mp_int *a) 3456 { 3457 int ix; 3458 3459 /* must be at least two digits */ 3460 if (a->used < 2) { 3461 return 0; 3462 } 3463 3464 /* must be of the form b**k - a [a <= b] so all 3465 * but the first digit must be equal to -1 (mod b). 3466 */ 3467 for (ix = 1; ix < a->used; ix++) { 3468 if (a->dp[ix] != MP_MASK) { 3469 return 0; 3470 } 3471 } 3472 return 1; 3473 } 3474 3475 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_is_modulus.c,v $ */ 3476 /* Revision: 1.1.1.1 $ */ 3477 /* Date: 2011/03/12 22:58:18 $ */ 3478 3479 /* determines if mp_reduce_2k can be used */ 3480 static int 3481 mp_reduce_is_2k(mp_int *a) 3482 { 3483 int ix, iy, iw; 3484 mp_digit iz; 3485 3486 if (a->used == 0) { 3487 return MP_NO; 3488 } 3489 if (a->used == 1) { 3490 return MP_YES; 3491 } 3492 if (a->used > 1) { 3493 iy = mp_count_bits(a); 3494 iz = 1; 3495 iw = 1; 3496 3497 /* Test every bit from the second digit up, must be 1 */ 3498 for (ix = DIGIT_BIT; ix < iy; ix++) { 3499 if ((a->dp[iw] & iz) == 0) { 3500 return MP_NO; 3501 } 3502 iz <<= 1; 3503 if (iz > (mp_digit)MP_MASK) { 3504 ++iw; 3505 iz = 1; 3506 } 3507 } 3508 } 3509 return MP_YES; 3510 } 3511 3512 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_is_2k.c,v $ */ 3513 /* Revision: 1.1.1.1 $ */ 3514 /* Date: 2011/03/12 22:58:18 $ */ 3515 3516 3517 /* d = a * b (mod c) */ 3518 static int 3519 multiply_modulo(mp_int *d, mp_int * a, mp_int * b, mp_int * c) 3520 { 3521 mp_int t; 3522 int res; 3523 3524 if ((res = mp_init(&t)) != MP_OKAY) { 3525 return res; 3526 } 3527 3528 if ((res = signed_multiply(a, b, &t)) != MP_OKAY) { 3529 mp_clear(&t); 3530 return res; 3531 } 3532 res = modulo(&t, c, d); 3533 mp_clear(&t); 3534 return res; 3535 } 3536 3537 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_mulmod.c,v $ */ 3538 /* Revision: 1.1.1.1 $ */ 3539 /* Date: 2011/03/12 22:58:18 $ */ 3540 3541 /* setups the montgomery reduction stuff */ 3542 static int 3543 mp_montgomery_setup(mp_int * n, mp_digit * rho) 3544 { 3545 mp_digit x, b; 3546 3547 /* fast inversion mod 2**k 3548 * 3549 * Based on the fact that 3550 * 3551 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n) 3552 * => 2*X*A - X*X*A*A = 1 3553 * => 2*(1) - (1) = 1 3554 */ 3555 b = n->dp[0]; 3556 3557 if ((b & 1) == 0) { 3558 return MP_VAL; 3559 } 3560 3561 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */ 3562 x *= 2 - b * x; /* here x*a==1 mod 2**8 */ 3563 x *= 2 - b * x; /* here x*a==1 mod 2**16 */ 3564 x *= 2 - b * x; /* here x*a==1 mod 2**32 */ 3565 if (/*CONSTCOND*/sizeof(mp_digit) == 8) { 3566 x *= 2 - b * x; /* here x*a==1 mod 2**64 */ 3567 } 3568 3569 /* rho = -1/m mod b */ 3570 *rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK; 3571 3572 return MP_OKAY; 3573 } 3574 3575 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_setup.c,v $ */ 3576 /* Revision: 1.1.1.1 $ */ 3577 /* Date: 2011/03/12 22:58:18 $ */ 3578 3579 /* computes xR**-1 == x (mod N) via Montgomery Reduction 3580 * 3581 * This is an optimized implementation of montgomery_reduce 3582 * which uses the comba method to quickly calculate the columns of the 3583 * reduction. 3584 * 3585 * Based on Algorithm 14.32 on pp.601 of HAC. 3586 */ 3587 static int 3588 fast_mp_montgomery_reduce(mp_int * x, mp_int * n, mp_digit rho) 3589 { 3590 int ix, res, olduse; 3591 /*LINTED*/ 3592 mp_word W[MP_WARRAY]; 3593 3594 /* get old used count */ 3595 olduse = x->used; 3596 3597 /* grow a as required */ 3598 if (x->alloc < n->used + 1) { 3599 if ((res = mp_grow(x, n->used + 1)) != MP_OKAY) { 3600 return res; 3601 } 3602 } 3603 3604 /* first we have to get the digits of the input into 3605 * an array of double precision words W[...] 3606 */ 3607 { 3608 mp_word *_W; 3609 mp_digit *tmpx; 3610 3611 /* alias for the W[] array */ 3612 _W = W; 3613 3614 /* alias for the digits of x*/ 3615 tmpx = x->dp; 3616 3617 /* copy the digits of a into W[0..a->used-1] */ 3618 for (ix = 0; ix < x->used; ix++) { 3619 *_W++ = *tmpx++; 3620 } 3621 3622 /* zero the high words of W[a->used..m->used*2] */ 3623 for (; ix < n->used * 2 + 1; ix++) { 3624 *_W++ = 0; 3625 } 3626 } 3627 3628 /* now we proceed to zero successive digits 3629 * from the least significant upwards 3630 */ 3631 for (ix = 0; ix < n->used; ix++) { 3632 /* mu = ai * m' mod b 3633 * 3634 * We avoid a double precision multiplication (which isn't required) 3635 * by casting the value down to a mp_digit. Note this requires 3636 * that W[ix-1] have the carry cleared (see after the inner loop) 3637 */ 3638 mp_digit mu; 3639 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK); 3640 3641 /* a = a + mu * m * b**i 3642 * 3643 * This is computed in place and on the fly. The multiplication 3644 * by b**i is handled by offseting which columns the results 3645 * are added to. 3646 * 3647 * Note the comba method normally doesn't handle carries in the 3648 * inner loop In this case we fix the carry from the previous 3649 * column since the Montgomery reduction requires digits of the 3650 * result (so far) [see above] to work. This is 3651 * handled by fixing up one carry after the inner loop. The 3652 * carry fixups are done in order so after these loops the 3653 * first m->used words of W[] have the carries fixed 3654 */ 3655 { 3656 int iy; 3657 mp_digit *tmpn; 3658 mp_word *_W; 3659 3660 /* alias for the digits of the modulus */ 3661 tmpn = n->dp; 3662 3663 /* Alias for the columns set by an offset of ix */ 3664 _W = W + ix; 3665 3666 /* inner loop */ 3667 for (iy = 0; iy < n->used; iy++) { 3668 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++); 3669 } 3670 } 3671 3672 /* now fix carry for next digit, W[ix+1] */ 3673 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT); 3674 } 3675 3676 /* now we have to propagate the carries and 3677 * shift the words downward [all those least 3678 * significant digits we zeroed]. 3679 */ 3680 { 3681 mp_digit *tmpx; 3682 mp_word *_W, *_W1; 3683 3684 /* nox fix rest of carries */ 3685 3686 /* alias for current word */ 3687 _W1 = W + ix; 3688 3689 /* alias for next word, where the carry goes */ 3690 _W = W + ++ix; 3691 3692 for (; ix <= n->used * 2 + 1; ix++) { 3693 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT); 3694 } 3695 3696 /* copy out, A = A/b**n 3697 * 3698 * The result is A/b**n but instead of converting from an 3699 * array of mp_word to mp_digit than calling rshift_digits 3700 * we just copy them in the right order 3701 */ 3702 3703 /* alias for destination word */ 3704 tmpx = x->dp; 3705 3706 /* alias for shifted double precision result */ 3707 _W = W + n->used; 3708 3709 for (ix = 0; ix < n->used + 1; ix++) { 3710 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK)); 3711 } 3712 3713 /* zero oldused digits, if the input a was larger than 3714 * m->used+1 we'll have to clear the digits 3715 */ 3716 for (; ix < olduse; ix++) { 3717 *tmpx++ = 0; 3718 } 3719 } 3720 3721 /* set the max used and clamp */ 3722 x->used = n->used + 1; 3723 trim_unused_digits(x); 3724 3725 /* if A >= m then A = A - m */ 3726 if (compare_magnitude(x, n) != MP_LT) { 3727 return basic_subtract(x, n, x); 3728 } 3729 return MP_OKAY; 3730 } 3731 3732 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_mp_montgomery_reduce.c,v $ */ 3733 /* Revision: 1.2 $ */ 3734 /* Date: 2011/03/18 16:22:09 $ */ 3735 3736 /* computes xR**-1 == x (mod N) via Montgomery Reduction */ 3737 static int 3738 mp_montgomery_reduce(mp_int * x, mp_int * n, mp_digit rho) 3739 { 3740 int ix, res, digs; 3741 mp_digit mu; 3742 3743 /* can the fast reduction [comba] method be used? 3744 * 3745 * Note that unlike in mul you're safely allowed *less* 3746 * than the available columns [255 per default] since carries 3747 * are fixed up in the inner loop. 3748 */ 3749 digs = n->used * 2 + 1; 3750 if (can_use_fast_column_array(digs, n->used)) { 3751 return fast_mp_montgomery_reduce(x, n, rho); 3752 } 3753 3754 /* grow the input as required */ 3755 if (x->alloc < digs) { 3756 if ((res = mp_grow(x, digs)) != MP_OKAY) { 3757 return res; 3758 } 3759 } 3760 x->used = digs; 3761 3762 for (ix = 0; ix < n->used; ix++) { 3763 /* mu = ai * rho mod b 3764 * 3765 * The value of rho must be precalculated via 3766 * montgomery_setup() such that 3767 * it equals -1/n0 mod b this allows the 3768 * following inner loop to reduce the 3769 * input one digit at a time 3770 */ 3771 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK); 3772 3773 /* a = a + mu * m * b**i */ 3774 { 3775 int iy; 3776 mp_digit *tmpn, *tmpx, carry; 3777 mp_word r; 3778 3779 /* alias for digits of the modulus */ 3780 tmpn = n->dp; 3781 3782 /* alias for the digits of x [the input] */ 3783 tmpx = x->dp + ix; 3784 3785 /* set the carry to zero */ 3786 carry = 0; 3787 3788 /* Multiply and add in place */ 3789 for (iy = 0; iy < n->used; iy++) { 3790 /* compute product and sum */ 3791 r = ((mp_word)mu) * ((mp_word)*tmpn++) + 3792 ((mp_word) carry) + ((mp_word) * tmpx); 3793 3794 /* get carry */ 3795 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT)); 3796 3797 /* fix digit */ 3798 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK)); 3799 } 3800 /* At this point the ix'th digit of x should be zero */ 3801 3802 3803 /* propagate carries upwards as required*/ 3804 while (carry) { 3805 *tmpx += carry; 3806 carry = *tmpx >> DIGIT_BIT; 3807 *tmpx++ &= MP_MASK; 3808 } 3809 } 3810 } 3811 3812 /* at this point the n.used'th least 3813 * significant digits of x are all zero 3814 * which means we can shift x to the 3815 * right by n.used digits and the 3816 * residue is unchanged. 3817 */ 3818 3819 /* x = x/b**n.used */ 3820 trim_unused_digits(x); 3821 rshift_digits(x, n->used); 3822 3823 /* if x >= n then x = x - n */ 3824 if (compare_magnitude(x, n) != MP_LT) { 3825 return basic_subtract(x, n, x); 3826 } 3827 3828 return MP_OKAY; 3829 } 3830 3831 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_reduce.c,v $ */ 3832 /* Revision: 1.3 $ */ 3833 /* Date: 2011/03/18 16:43:04 $ */ 3834 3835 /* determines the setup value */ 3836 static void 3837 diminished_radix_setup(mp_int *a, mp_digit *d) 3838 { 3839 /* the casts are required if DIGIT_BIT is one less than 3840 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31] 3841 */ 3842 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) - 3843 ((mp_word)a->dp[0])); 3844 } 3845 3846 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_setup.c,v $ */ 3847 /* Revision: 1.1.1.1 $ */ 3848 /* Date: 2011/03/12 22:58:18 $ */ 3849 3850 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm. 3851 * 3852 * Based on algorithm from the paper 3853 * 3854 * "Generating Efficient Primes for Discrete Log Cryptosystems" 3855 * Chae Hoon Lim, Pil Joong Lee, 3856 * POSTECH Information Research Laboratories 3857 * 3858 * The modulus must be of a special format [see manual] 3859 * 3860 * Has been modified to use algorithm 7.10 from the LTM book instead 3861 * 3862 * Input x must be in the range 0 <= x <= (n-1)**2 3863 */ 3864 static int 3865 diminished_radix_reduce(mp_int * x, mp_int * n, mp_digit k) 3866 { 3867 int err, i, m; 3868 mp_word r; 3869 mp_digit mu, *tmpx1, *tmpx2; 3870 3871 /* m = digits in modulus */ 3872 m = n->used; 3873 3874 /* ensure that "x" has at least 2m digits */ 3875 if (x->alloc < m + m) { 3876 if ((err = mp_grow(x, m + m)) != MP_OKAY) { 3877 return err; 3878 } 3879 } 3880 3881 /* top of loop, this is where the code resumes if 3882 * another reduction pass is required. 3883 */ 3884 top: 3885 /* aliases for digits */ 3886 /* alias for lower half of x */ 3887 tmpx1 = x->dp; 3888 3889 /* alias for upper half of x, or x/B**m */ 3890 tmpx2 = x->dp + m; 3891 3892 /* set carry to zero */ 3893 mu = 0; 3894 3895 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */ 3896 for (i = 0; i < m; i++) { 3897 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu; 3898 *tmpx1++ = (mp_digit)(r & MP_MASK); 3899 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT)); 3900 } 3901 3902 /* set final carry */ 3903 *tmpx1++ = mu; 3904 3905 /* zero words above m */ 3906 for (i = m + 1; i < x->used; i++) { 3907 *tmpx1++ = 0; 3908 } 3909 3910 /* clamp, sub and return */ 3911 trim_unused_digits(x); 3912 3913 /* if x >= n then subtract and reduce again 3914 * Each successive "recursion" makes the input smaller and smaller. 3915 */ 3916 if (compare_magnitude(x, n) != MP_LT) { 3917 basic_subtract(x, n, x); 3918 goto top; 3919 } 3920 return MP_OKAY; 3921 } 3922 3923 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_reduce.c,v $ */ 3924 /* Revision: 1.1.1.1 $ */ 3925 /* Date: 2011/03/12 22:58:18 $ */ 3926 3927 /* determines the setup value */ 3928 static int 3929 mp_reduce_2k_setup(mp_int *a, mp_digit *d) 3930 { 3931 int res, p; 3932 mp_int tmp; 3933 3934 if ((res = mp_init(&tmp)) != MP_OKAY) { 3935 return res; 3936 } 3937 3938 p = mp_count_bits(a); 3939 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) { 3940 mp_clear(&tmp); 3941 return res; 3942 } 3943 3944 if ((res = basic_subtract(&tmp, a, &tmp)) != MP_OKAY) { 3945 mp_clear(&tmp); 3946 return res; 3947 } 3948 3949 *d = tmp.dp[0]; 3950 mp_clear(&tmp); 3951 return MP_OKAY; 3952 } 3953 3954 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup.c,v $ */ 3955 /* Revision: 1.1.1.1 $ */ 3956 /* Date: 2011/03/12 22:58:18 $ */ 3957 3958 /* reduces a modulo n where n is of the form 2**p - d */ 3959 static int 3960 mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d) 3961 { 3962 mp_int q; 3963 int p, res; 3964 3965 if ((res = mp_init(&q)) != MP_OKAY) { 3966 return res; 3967 } 3968 3969 p = mp_count_bits(n); 3970 top: 3971 /* q = a/2**p, a = a mod 2**p */ 3972 if ((res = rshift_bits(a, p, &q, a)) != MP_OKAY) { 3973 goto ERR; 3974 } 3975 3976 if (d != 1) { 3977 /* q = q * d */ 3978 if ((res = multiply_digit(&q, d, &q)) != MP_OKAY) { 3979 goto ERR; 3980 } 3981 } 3982 3983 /* a = a + q */ 3984 if ((res = basic_add(a, &q, a)) != MP_OKAY) { 3985 goto ERR; 3986 } 3987 3988 if (compare_magnitude(a, n) != MP_LT) { 3989 basic_subtract(a, n, a); 3990 goto top; 3991 } 3992 3993 ERR: 3994 mp_clear(&q); 3995 return res; 3996 } 3997 3998 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k.c,v $ */ 3999 /* Revision: 1.1.1.1 $ */ 4000 /* Date: 2011/03/12 22:58:18 $ */ 4001 4002 /* 4003 * shifts with subtractions when the result is greater than b. 4004 * 4005 * The method is slightly modified to shift B unconditionally upto just under 4006 * the leading bit of b. This saves alot of multiple precision shifting. 4007 */ 4008 static int 4009 mp_montgomery_calc_normalization(mp_int * a, mp_int * b) 4010 { 4011 int x, bits, res; 4012 4013 /* how many bits of last digit does b use */ 4014 bits = mp_count_bits(b) % DIGIT_BIT; 4015 4016 if (b->used > 1) { 4017 if ((res = mp_2expt(a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) { 4018 return res; 4019 } 4020 } else { 4021 set_word(a, 1); 4022 bits = 1; 4023 } 4024 4025 4026 /* now compute C = A * B mod b */ 4027 for (x = bits - 1; x < (int)DIGIT_BIT; x++) { 4028 if ((res = doubled(a, a)) != MP_OKAY) { 4029 return res; 4030 } 4031 if (compare_magnitude(a, b) != MP_LT) { 4032 if ((res = basic_subtract(a, b, a)) != MP_OKAY) { 4033 return res; 4034 } 4035 } 4036 } 4037 4038 return MP_OKAY; 4039 } 4040 4041 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_calc_normalization.c,v $ */ 4042 /* Revision: 1.1.1.1 $ */ 4043 /* Date: 2011/03/12 22:58:18 $ */ 4044 4045 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85 4046 * 4047 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation. 4048 * The value of k changes based on the size of the exponent. 4049 * 4050 * Uses Montgomery or Diminished Radix reduction [whichever appropriate] 4051 */ 4052 4053 #define TAB_SIZE 256 4054 4055 static int 4056 fast_exponent_modulo(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode) 4057 { 4058 mp_int M[TAB_SIZE], res; 4059 mp_digit buf, mp; 4060 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; 4061 4062 /* use a pointer to the reduction algorithm. This allows us to use 4063 * one of many reduction algorithms without modding the guts of 4064 * the code with if statements everywhere. 4065 */ 4066 int (*redux)(mp_int*,mp_int*,mp_digit); 4067 4068 winsize = find_window_size(X); 4069 4070 /* init M array */ 4071 /* init first cell */ 4072 if ((err = mp_init(&M[1])) != MP_OKAY) { 4073 return err; 4074 } 4075 4076 /* now init the second half of the array */ 4077 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 4078 if ((err = mp_init(&M[x])) != MP_OKAY) { 4079 for (y = 1<<(winsize-1); y < x; y++) { 4080 mp_clear(&M[y]); 4081 } 4082 mp_clear(&M[1]); 4083 return err; 4084 } 4085 } 4086 4087 /* determine and setup reduction code */ 4088 if (redmode == 0) { 4089 /* now setup montgomery */ 4090 if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) { 4091 goto LBL_M; 4092 } 4093 4094 /* automatically pick the comba one if available (saves quite a few calls/ifs) */ 4095 if (can_use_fast_column_array(P->used + P->used + 1, P->used)) { 4096 redux = fast_mp_montgomery_reduce; 4097 } else { 4098 /* use slower baseline Montgomery method */ 4099 redux = mp_montgomery_reduce; 4100 } 4101 } else if (redmode == 1) { 4102 /* setup DR reduction for moduli of the form B**k - b */ 4103 diminished_radix_setup(P, &mp); 4104 redux = diminished_radix_reduce; 4105 } else { 4106 /* setup DR reduction for moduli of the form 2**k - b */ 4107 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) { 4108 goto LBL_M; 4109 } 4110 redux = mp_reduce_2k; 4111 } 4112 4113 /* setup result */ 4114 if ((err = mp_init(&res)) != MP_OKAY) { 4115 goto LBL_M; 4116 } 4117 4118 /* create M table 4119 * 4120 4121 * 4122 * The first half of the table is not computed though accept for M[0] and M[1] 4123 */ 4124 4125 if (redmode == 0) { 4126 /* now we need R mod m */ 4127 if ((err = mp_montgomery_calc_normalization(&res, P)) != MP_OKAY) { 4128 goto LBL_RES; 4129 } 4130 4131 /* now set M[1] to G * R mod m */ 4132 if ((err = multiply_modulo(&M[1], G, &res, P)) != MP_OKAY) { 4133 goto LBL_RES; 4134 } 4135 } else { 4136 set_word(&res, 1); 4137 if ((err = modulo(G, P, &M[1])) != MP_OKAY) { 4138 goto LBL_RES; 4139 } 4140 } 4141 4142 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */ 4143 if ((err = mp_copy( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) { 4144 goto LBL_RES; 4145 } 4146 4147 for (x = 0; x < (winsize - 1); x++) { 4148 if ((err = square(&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) { 4149 goto LBL_RES; 4150 } 4151 if ((err = (*redux)(&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) { 4152 goto LBL_RES; 4153 } 4154 } 4155 4156 /* create upper table */ 4157 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) { 4158 if ((err = signed_multiply(&M[x - 1], &M[1], &M[x])) != MP_OKAY) { 4159 goto LBL_RES; 4160 } 4161 if ((err = (*redux)(&M[x], P, mp)) != MP_OKAY) { 4162 goto LBL_RES; 4163 } 4164 } 4165 4166 /* set initial mode and bit cnt */ 4167 mode = 0; 4168 bitcnt = 1; 4169 buf = 0; 4170 digidx = X->used - 1; 4171 bitcpy = 0; 4172 bitbuf = 0; 4173 4174 for (;;) { 4175 /* grab next digit as required */ 4176 if (--bitcnt == 0) { 4177 /* if digidx == -1 we are out of digits so break */ 4178 if (digidx == -1) { 4179 break; 4180 } 4181 /* read next digit and reset bitcnt */ 4182 buf = X->dp[digidx--]; 4183 bitcnt = (int)DIGIT_BIT; 4184 } 4185 4186 /* grab the next msb from the exponent */ 4187 y = (int)(mp_digit)((mp_digit)buf >> (unsigned)(DIGIT_BIT - 1)) & 1; 4188 buf <<= (mp_digit)1; 4189 4190 /* if the bit is zero and mode == 0 then we ignore it 4191 * These represent the leading zero bits before the first 1 bit 4192 * in the exponent. Technically this opt is not required but it 4193 * does lower the # of trivial squaring/reductions used 4194 */ 4195 if (mode == 0 && y == 0) { 4196 continue; 4197 } 4198 4199 /* if the bit is zero and mode == 1 then we square */ 4200 if (mode == 1 && y == 0) { 4201 if ((err = square(&res, &res)) != MP_OKAY) { 4202 goto LBL_RES; 4203 } 4204 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4205 goto LBL_RES; 4206 } 4207 continue; 4208 } 4209 4210 /* else we add it to the window */ 4211 bitbuf |= (y << (winsize - ++bitcpy)); 4212 mode = 2; 4213 4214 if (bitcpy == winsize) { 4215 /* ok window is filled so square as required and multiply */ 4216 /* square first */ 4217 for (x = 0; x < winsize; x++) { 4218 if ((err = square(&res, &res)) != MP_OKAY) { 4219 goto LBL_RES; 4220 } 4221 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4222 goto LBL_RES; 4223 } 4224 } 4225 4226 /* then multiply */ 4227 if ((err = signed_multiply(&res, &M[bitbuf], &res)) != MP_OKAY) { 4228 goto LBL_RES; 4229 } 4230 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4231 goto LBL_RES; 4232 } 4233 4234 /* empty window and reset */ 4235 bitcpy = 0; 4236 bitbuf = 0; 4237 mode = 1; 4238 } 4239 } 4240 4241 /* if bits remain then square/multiply */ 4242 if (mode == 2 && bitcpy > 0) { 4243 /* square then multiply if the bit is set */ 4244 for (x = 0; x < bitcpy; x++) { 4245 if ((err = square(&res, &res)) != MP_OKAY) { 4246 goto LBL_RES; 4247 } 4248 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4249 goto LBL_RES; 4250 } 4251 4252 /* get next bit of the window */ 4253 bitbuf <<= 1; 4254 if ((bitbuf & (1 << winsize)) != 0) { 4255 /* then multiply */ 4256 if ((err = signed_multiply(&res, &M[1], &res)) != MP_OKAY) { 4257 goto LBL_RES; 4258 } 4259 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4260 goto LBL_RES; 4261 } 4262 } 4263 } 4264 } 4265 4266 if (redmode == 0) { 4267 /* fixup result if Montgomery reduction is used 4268 * recall that any value in a Montgomery system is 4269 * actually multiplied by R mod n. So we have 4270 * to reduce one more time to cancel out the factor 4271 * of R. 4272 */ 4273 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) { 4274 goto LBL_RES; 4275 } 4276 } 4277 4278 /* swap res with Y */ 4279 mp_exch(&res, Y); 4280 err = MP_OKAY; 4281 LBL_RES: 4282 mp_clear(&res); 4283 LBL_M: 4284 mp_clear(&M[1]); 4285 for (x = 1<<(winsize-1); x < (1 << winsize); x++) { 4286 mp_clear(&M[x]); 4287 } 4288 return err; 4289 } 4290 4291 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_exponent_modulo.c,v $ */ 4292 /* Revision: 1.4 $ */ 4293 /* Date: 2011/03/18 16:43:04 $ */ 4294 4295 /* this is a shell function that calls either the normal or Montgomery 4296 * exptmod functions. Originally the call to the montgomery code was 4297 * embedded in the normal function but that wasted alot of stack space 4298 * for nothing (since 99% of the time the Montgomery code would be called) 4299 */ 4300 static int 4301 exponent_modulo(mp_int * G, mp_int * X, mp_int * P, mp_int *Y) 4302 { 4303 int diminished_radix; 4304 4305 /* modulus P must be positive */ 4306 if (P->sign == MP_NEG) { 4307 return MP_VAL; 4308 } 4309 4310 /* if exponent X is negative we have to recurse */ 4311 if (X->sign == MP_NEG) { 4312 mp_int tmpG, tmpX; 4313 int err; 4314 4315 /* first compute 1/G mod P */ 4316 if ((err = mp_init(&tmpG)) != MP_OKAY) { 4317 return err; 4318 } 4319 if ((err = modular_inverse(&tmpG, G, P)) != MP_OKAY) { 4320 mp_clear(&tmpG); 4321 return err; 4322 } 4323 4324 /* now get |X| */ 4325 if ((err = mp_init(&tmpX)) != MP_OKAY) { 4326 mp_clear(&tmpG); 4327 return err; 4328 } 4329 if ((err = absolute(X, &tmpX)) != MP_OKAY) { 4330 mp_clear_multi(&tmpG, &tmpX, NULL); 4331 return err; 4332 } 4333 4334 /* and now compute (1/G)**|X| instead of G**X [X < 0] */ 4335 err = exponent_modulo(&tmpG, &tmpX, P, Y); 4336 mp_clear_multi(&tmpG, &tmpX, NULL); 4337 return err; 4338 } 4339 4340 /* modified diminished radix reduction */ 4341 if (mp_reduce_is_2k_l(P) == MP_YES) { 4342 return basic_exponent_mod(G, X, P, Y, 1); 4343 } 4344 4345 /* is it a DR modulus? */ 4346 diminished_radix = is_diminished_radix_modulus(P); 4347 4348 /* if not, is it a unrestricted DR modulus? */ 4349 if (!diminished_radix) { 4350 diminished_radix = mp_reduce_is_2k(P) << 1; 4351 } 4352 4353 /* if the modulus is odd or diminished_radix, use the montgomery method */ 4354 if (BN_is_odd(P) == 1 || diminished_radix) { 4355 return fast_exponent_modulo(G, X, P, Y, diminished_radix); 4356 } 4357 /* otherwise use the generic Barrett reduction technique */ 4358 return basic_exponent_mod(G, X, P, Y, 0); 4359 } 4360 4361 /* reverse an array, used for radix code */ 4362 static void 4363 bn_reverse(unsigned char *s, int len) 4364 { 4365 int ix, iy; 4366 uint8_t t; 4367 4368 for (ix = 0, iy = len - 1; ix < iy ; ix++, --iy) { 4369 t = s[ix]; 4370 s[ix] = s[iy]; 4371 s[iy] = t; 4372 } 4373 } 4374 4375 static inline int 4376 is_power_of_two(mp_digit b, int *p) 4377 { 4378 int x; 4379 4380 /* fast return if no power of two */ 4381 if ((b==0) || (b & (b-1))) { 4382 return 0; 4383 } 4384 4385 for (x = 0; x < DIGIT_BIT; x++) { 4386 if (b == (((mp_digit)1)<<x)) { 4387 *p = x; 4388 return 1; 4389 } 4390 } 4391 return 0; 4392 } 4393 4394 /* single digit division (based on routine from MPI) */ 4395 static int 4396 signed_divide_word(mp_int *a, mp_digit b, mp_int *c, mp_digit *d) 4397 { 4398 mp_int q; 4399 mp_word w; 4400 mp_digit t; 4401 int res, ix; 4402 4403 /* cannot divide by zero */ 4404 if (b == 0) { 4405 return MP_VAL; 4406 } 4407 4408 /* quick outs */ 4409 if (b == 1 || MP_ISZERO(a) == 1) { 4410 if (d != NULL) { 4411 *d = 0; 4412 } 4413 if (c != NULL) { 4414 return mp_copy(a, c); 4415 } 4416 return MP_OKAY; 4417 } 4418 4419 /* power of two ? */ 4420 if (is_power_of_two(b, &ix) == 1) { 4421 if (d != NULL) { 4422 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1); 4423 } 4424 if (c != NULL) { 4425 return rshift_bits(a, ix, c, NULL); 4426 } 4427 return MP_OKAY; 4428 } 4429 4430 /* three? */ 4431 if (b == 3) { 4432 return third(a, c, d); 4433 } 4434 4435 /* no easy answer [c'est la vie]. Just division */ 4436 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) { 4437 return res; 4438 } 4439 4440 q.used = a->used; 4441 q.sign = a->sign; 4442 w = 0; 4443 for (ix = a->used - 1; ix >= 0; ix--) { 4444 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]); 4445 4446 if (w >= b) { 4447 t = (mp_digit)(w / b); 4448 w -= ((mp_word)t) * ((mp_word)b); 4449 } else { 4450 t = 0; 4451 } 4452 q.dp[ix] = (mp_digit)t; 4453 } 4454 4455 if (d != NULL) { 4456 *d = (mp_digit)w; 4457 } 4458 4459 if (c != NULL) { 4460 trim_unused_digits(&q); 4461 mp_exch(&q, c); 4462 } 4463 mp_clear(&q); 4464 4465 return res; 4466 } 4467 4468 static const mp_digit ltm_prime_tab[] = { 4469 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, 4470 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, 4471 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, 4472 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 4473 #ifndef MP_8BIT 4474 0x0083, 4475 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, 4476 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, 4477 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, 4478 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, 4479 4480 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, 4481 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, 4482 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, 4483 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, 4484 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, 4485 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, 4486 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, 4487 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, 4488 4489 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, 4490 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, 4491 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, 4492 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, 4493 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, 4494 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, 4495 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, 4496 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, 4497 4498 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, 4499 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, 4500 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, 4501 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, 4502 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, 4503 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, 4504 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, 4505 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653 4506 #endif 4507 }; 4508 4509 #define PRIME_SIZE __arraycount(ltm_prime_tab) 4510 4511 static inline int 4512 mp_prime_is_divisible(mp_int *a, int *result) 4513 { 4514 int err, ix; 4515 mp_digit res; 4516 4517 /* default to not */ 4518 *result = MP_NO; 4519 4520 for (ix = 0; ix < (int)PRIME_SIZE; ix++) { 4521 /* what is a mod LBL_prime_tab[ix] */ 4522 if ((err = signed_divide_word(a, ltm_prime_tab[ix], NULL, &res)) != MP_OKAY) { 4523 return err; 4524 } 4525 4526 /* is the residue zero? */ 4527 if (res == 0) { 4528 *result = MP_YES; 4529 return MP_OKAY; 4530 } 4531 } 4532 4533 return MP_OKAY; 4534 } 4535 4536 /* single digit addition */ 4537 static int 4538 add_single_digit(mp_int *a, mp_digit b, mp_int *c) 4539 { 4540 int res, ix, oldused; 4541 mp_digit *tmpa, *tmpc, mu; 4542 4543 /* grow c as required */ 4544 if (c->alloc < a->used + 1) { 4545 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 4546 return res; 4547 } 4548 } 4549 4550 /* if a is negative and |a| >= b, call c = |a| - b */ 4551 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) { 4552 /* temporarily fix sign of a */ 4553 a->sign = MP_ZPOS; 4554 4555 /* c = |a| - b */ 4556 res = signed_subtract_word(a, b, c); 4557 4558 /* fix sign */ 4559 a->sign = c->sign = MP_NEG; 4560 4561 /* clamp */ 4562 trim_unused_digits(c); 4563 4564 return res; 4565 } 4566 4567 /* old number of used digits in c */ 4568 oldused = c->used; 4569 4570 /* sign always positive */ 4571 c->sign = MP_ZPOS; 4572 4573 /* source alias */ 4574 tmpa = a->dp; 4575 4576 /* destination alias */ 4577 tmpc = c->dp; 4578 4579 /* if a is positive */ 4580 if (a->sign == MP_ZPOS) { 4581 /* add digit, after this we're propagating 4582 * the carry. 4583 */ 4584 *tmpc = *tmpa++ + b; 4585 mu = *tmpc >> DIGIT_BIT; 4586 *tmpc++ &= MP_MASK; 4587 4588 /* now handle rest of the digits */ 4589 for (ix = 1; ix < a->used; ix++) { 4590 *tmpc = *tmpa++ + mu; 4591 mu = *tmpc >> DIGIT_BIT; 4592 *tmpc++ &= MP_MASK; 4593 } 4594 /* set final carry */ 4595 ix++; 4596 *tmpc++ = mu; 4597 4598 /* setup size */ 4599 c->used = a->used + 1; 4600 } else { 4601 /* a was negative and |a| < b */ 4602 c->used = 1; 4603 4604 /* the result is a single digit */ 4605 if (a->used == 1) { 4606 *tmpc++ = b - a->dp[0]; 4607 } else { 4608 *tmpc++ = b; 4609 } 4610 4611 /* setup count so the clearing of oldused 4612 * can fall through correctly 4613 */ 4614 ix = 1; 4615 } 4616 4617 /* now zero to oldused */ 4618 while (ix++ < oldused) { 4619 *tmpc++ = 0; 4620 } 4621 trim_unused_digits(c); 4622 4623 return MP_OKAY; 4624 } 4625 4626 /* single digit subtraction */ 4627 static int 4628 signed_subtract_word(mp_int *a, mp_digit b, mp_int *c) 4629 { 4630 mp_digit *tmpa, *tmpc, mu; 4631 int res, ix, oldused; 4632 4633 /* grow c as required */ 4634 if (c->alloc < a->used + 1) { 4635 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) { 4636 return res; 4637 } 4638 } 4639 4640 /* if a is negative just do an unsigned 4641 * addition [with fudged signs] 4642 */ 4643 if (a->sign == MP_NEG) { 4644 a->sign = MP_ZPOS; 4645 res = add_single_digit(a, b, c); 4646 a->sign = c->sign = MP_NEG; 4647 4648 /* clamp */ 4649 trim_unused_digits(c); 4650 4651 return res; 4652 } 4653 4654 /* setup regs */ 4655 oldused = c->used; 4656 tmpa = a->dp; 4657 tmpc = c->dp; 4658 4659 /* if a <= b simply fix the single digit */ 4660 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) { 4661 if (a->used == 1) { 4662 *tmpc++ = b - *tmpa; 4663 } else { 4664 *tmpc++ = b; 4665 } 4666 ix = 1; 4667 4668 /* negative/1digit */ 4669 c->sign = MP_NEG; 4670 c->used = 1; 4671 } else { 4672 /* positive/size */ 4673 c->sign = MP_ZPOS; 4674 c->used = a->used; 4675 4676 /* subtract first digit */ 4677 *tmpc = *tmpa++ - b; 4678 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); 4679 *tmpc++ &= MP_MASK; 4680 4681 /* handle rest of the digits */ 4682 for (ix = 1; ix < a->used; ix++) { 4683 *tmpc = *tmpa++ - mu; 4684 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1); 4685 *tmpc++ &= MP_MASK; 4686 } 4687 } 4688 4689 /* zero excess digits */ 4690 while (ix++ < oldused) { 4691 *tmpc++ = 0; 4692 } 4693 trim_unused_digits(c); 4694 return MP_OKAY; 4695 } 4696 4697 static const int lnz[16] = { 4698 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 4699 }; 4700 4701 /* Counts the number of lsbs which are zero before the first zero bit */ 4702 static int 4703 mp_cnt_lsb(mp_int *a) 4704 { 4705 int x; 4706 mp_digit q, qq; 4707 4708 /* easy out */ 4709 if (MP_ISZERO(a) == 1) { 4710 return 0; 4711 } 4712 4713 /* scan lower digits until non-zero */ 4714 for (x = 0; x < a->used && a->dp[x] == 0; x++) { 4715 } 4716 q = a->dp[x]; 4717 x *= DIGIT_BIT; 4718 4719 /* now scan this digit until a 1 is found */ 4720 if ((q & 1) == 0) { 4721 do { 4722 qq = q & 15; 4723 /* LINTED previous op ensures range of qq */ 4724 x += lnz[qq]; 4725 q >>= 4; 4726 } while (qq == 0); 4727 } 4728 return x; 4729 } 4730 4731 /* c = a * a (mod b) */ 4732 static int 4733 square_modulo(mp_int *a, mp_int *b, mp_int *c) 4734 { 4735 int res; 4736 mp_int t; 4737 4738 if ((res = mp_init(&t)) != MP_OKAY) { 4739 return res; 4740 } 4741 4742 if ((res = square(a, &t)) != MP_OKAY) { 4743 mp_clear(&t); 4744 return res; 4745 } 4746 res = modulo(&t, b, c); 4747 mp_clear(&t); 4748 return res; 4749 } 4750 4751 static int 4752 mp_prime_miller_rabin(mp_int *a, mp_int *b, int *result) 4753 { 4754 mp_int n1, y, r; 4755 int s, j, err; 4756 4757 /* default */ 4758 *result = MP_NO; 4759 4760 /* ensure b > 1 */ 4761 if (compare_digit(b, 1) != MP_GT) { 4762 return MP_VAL; 4763 } 4764 4765 /* get n1 = a - 1 */ 4766 if ((err = mp_init_copy(&n1, a)) != MP_OKAY) { 4767 return err; 4768 } 4769 if ((err = signed_subtract_word(&n1, 1, &n1)) != MP_OKAY) { 4770 goto LBL_N1; 4771 } 4772 4773 /* set 2**s * r = n1 */ 4774 if ((err = mp_init_copy(&r, &n1)) != MP_OKAY) { 4775 goto LBL_N1; 4776 } 4777 4778 /* count the number of least significant bits 4779 * which are zero 4780 */ 4781 s = mp_cnt_lsb(&r); 4782 4783 /* now divide n - 1 by 2**s */ 4784 if ((err = rshift_bits(&r, s, &r, NULL)) != MP_OKAY) { 4785 goto LBL_R; 4786 } 4787 4788 /* compute y = b**r mod a */ 4789 if ((err = mp_init(&y)) != MP_OKAY) { 4790 goto LBL_R; 4791 } 4792 if ((err = exponent_modulo(b, &r, a, &y)) != MP_OKAY) { 4793 goto LBL_Y; 4794 } 4795 4796 /* if y != 1 and y != n1 do */ 4797 if (compare_digit(&y, 1) != MP_EQ && signed_compare(&y, &n1) != MP_EQ) { 4798 j = 1; 4799 /* while j <= s-1 and y != n1 */ 4800 while ((j <= (s - 1)) && signed_compare(&y, &n1) != MP_EQ) { 4801 if ((err = square_modulo(&y, a, &y)) != MP_OKAY) { 4802 goto LBL_Y; 4803 } 4804 4805 /* if y == 1 then composite */ 4806 if (compare_digit(&y, 1) == MP_EQ) { 4807 goto LBL_Y; 4808 } 4809 4810 ++j; 4811 } 4812 4813 /* if y != n1 then composite */ 4814 if (signed_compare(&y, &n1) != MP_EQ) { 4815 goto LBL_Y; 4816 } 4817 } 4818 4819 /* probably prime now */ 4820 *result = MP_YES; 4821 LBL_Y: 4822 mp_clear(&y); 4823 LBL_R: 4824 mp_clear(&r); 4825 LBL_N1: 4826 mp_clear(&n1); 4827 return err; 4828 } 4829 4830 /* performs a variable number of rounds of Miller-Rabin 4831 * 4832 * Probability of error after t rounds is no more than 4833 4834 * 4835 * Sets result to 1 if probably prime, 0 otherwise 4836 */ 4837 static int 4838 mp_prime_is_prime(mp_int *a, int t, int *result) 4839 { 4840 mp_int b; 4841 int ix, err, res; 4842 4843 /* default to no */ 4844 *result = MP_NO; 4845 4846 /* valid value of t? */ 4847 if (t <= 0 || t > (int)PRIME_SIZE) { 4848 return MP_VAL; 4849 } 4850 4851 /* is the input equal to one of the primes in the table? */ 4852 for (ix = 0; ix < (int)PRIME_SIZE; ix++) { 4853 if (compare_digit(a, ltm_prime_tab[ix]) == MP_EQ) { 4854 *result = 1; 4855 return MP_OKAY; 4856 } 4857 } 4858 4859 /* first perform trial division */ 4860 if ((err = mp_prime_is_divisible(a, &res)) != MP_OKAY) { 4861 return err; 4862 } 4863 4864 /* return if it was trivially divisible */ 4865 if (res == MP_YES) { 4866 return MP_OKAY; 4867 } 4868 4869 /* now perform the miller-rabin rounds */ 4870 if ((err = mp_init(&b)) != MP_OKAY) { 4871 return err; 4872 } 4873 4874 for (ix = 0; ix < t; ix++) { 4875 /* set the prime */ 4876 set_word(&b, ltm_prime_tab[ix]); 4877 4878 if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) { 4879 goto LBL_B; 4880 } 4881 4882 if (res == MP_NO) { 4883 goto LBL_B; 4884 } 4885 } 4886 4887 /* passed the test */ 4888 *result = MP_YES; 4889 LBL_B: 4890 mp_clear(&b); 4891 return err; 4892 } 4893 4894 /* returns size of ASCII reprensentation */ 4895 static int 4896 mp_radix_size(mp_int *a, int radix, int *size) 4897 { 4898 int res, digs; 4899 mp_int t; 4900 mp_digit d; 4901 4902 *size = 0; 4903 4904 /* special case for binary */ 4905 if (radix == 2) { 4906 *size = mp_count_bits(a) + (a->sign == MP_NEG ? 1 : 0) + 1; 4907 return MP_OKAY; 4908 } 4909 4910 /* make sure the radix is in range */ 4911 if (radix < 2 || radix > 64) { 4912 return MP_VAL; 4913 } 4914 4915 if (MP_ISZERO(a) == MP_YES) { 4916 *size = 2; 4917 return MP_OKAY; 4918 } 4919 4920 /* digs is the digit count */ 4921 digs = 0; 4922 4923 /* if it's negative add one for the sign */ 4924 if (a->sign == MP_NEG) { 4925 ++digs; 4926 } 4927 4928 /* init a copy of the input */ 4929 if ((res = mp_init_copy(&t, a)) != MP_OKAY) { 4930 return res; 4931 } 4932 4933 /* force temp to positive */ 4934 t.sign = MP_ZPOS; 4935 4936 /* fetch out all of the digits */ 4937 while (MP_ISZERO(&t) == MP_NO) { 4938 if ((res = signed_divide_word(&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { 4939 mp_clear(&t); 4940 return res; 4941 } 4942 ++digs; 4943 } 4944 mp_clear(&t); 4945 4946 /* return digs + 1, the 1 is for the NULL byte that would be required. */ 4947 *size = digs + 1; 4948 return MP_OKAY; 4949 } 4950 4951 static const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; 4952 4953 /* stores a bignum as a ASCII string in a given radix (2..64) 4954 * 4955 * Stores upto maxlen-1 chars and always a NULL byte 4956 */ 4957 static int 4958 mp_toradix_n(mp_int * a, char *str, int radix, int maxlen) 4959 { 4960 int res, digs; 4961 mp_int t; 4962 mp_digit d; 4963 char *_s = str; 4964 4965 /* check range of the maxlen, radix */ 4966 if (maxlen < 2 || radix < 2 || radix > 64) { 4967 return MP_VAL; 4968 } 4969 4970 /* quick out if its zero */ 4971 if (MP_ISZERO(a) == MP_YES) { 4972 *str++ = '0'; 4973 *str = '\0'; 4974 return MP_OKAY; 4975 } 4976 4977 if ((res = mp_init_copy(&t, a)) != MP_OKAY) { 4978 return res; 4979 } 4980 4981 /* if it is negative output a - */ 4982 if (t.sign == MP_NEG) { 4983 /* we have to reverse our digits later... but not the - sign!! */ 4984 ++_s; 4985 4986 /* store the flag and mark the number as positive */ 4987 *str++ = '-'; 4988 t.sign = MP_ZPOS; 4989 4990 /* subtract a char */ 4991 --maxlen; 4992 } 4993 4994 digs = 0; 4995 while (MP_ISZERO(&t) == 0) { 4996 if (--maxlen < 1) { 4997 /* no more room */ 4998 break; 4999 } 5000 if ((res = signed_divide_word(&t, (mp_digit) radix, &t, &d)) != MP_OKAY) { 5001 mp_clear(&t); 5002 return res; 5003 } 5004 /* LINTED -- radix' range is checked above, limits d's range */ 5005 *str++ = mp_s_rmap[d]; 5006 ++digs; 5007 } 5008 5009 /* reverse the digits of the string. In this case _s points 5010 * to the first digit [exluding the sign] of the number 5011 */ 5012 bn_reverse((unsigned char *)_s, digs); 5013 5014 /* append a NULL so the string is properly terminated */ 5015 *str = '\0'; 5016 5017 mp_clear(&t); 5018 return MP_OKAY; 5019 } 5020 5021 static char * 5022 formatbn(const BIGNUM *a, const int radix) 5023 { 5024 char *s; 5025 int len; 5026 5027 if (mp_radix_size(__UNCONST(a), radix, &len) != MP_OKAY) { 5028 return NULL; 5029 } 5030 if ((s = allocate(1, (size_t)len)) != NULL) { 5031 if (mp_toradix_n(__UNCONST(a), s, radix, len) != MP_OKAY) { 5032 deallocate(s, (size_t)len); 5033 return NULL; 5034 } 5035 } 5036 return s; 5037 } 5038 5039 static int 5040 mp_getradix_num(mp_int *a, int radix, char *s) 5041 { 5042 int err, ch, neg, y; 5043 5044 /* clear a */ 5045 mp_zero(a); 5046 5047 /* if first digit is - then set negative */ 5048 if ((ch = *s++) == '-') { 5049 neg = MP_NEG; 5050 ch = *s++; 5051 } else { 5052 neg = MP_ZPOS; 5053 } 5054 5055 for (;;) { 5056 /* find y in the radix map */ 5057 for (y = 0; y < radix; y++) { 5058 if (mp_s_rmap[y] == ch) { 5059 break; 5060 } 5061 } 5062 if (y == radix) { 5063 break; 5064 } 5065 5066 /* shift up and add */ 5067 if ((err = multiply_digit(a, radix, a)) != MP_OKAY) { 5068 return err; 5069 } 5070 if ((err = add_single_digit(a, y, a)) != MP_OKAY) { 5071 return err; 5072 } 5073 5074 ch = *s++; 5075 } 5076 if (compare_digit(a, 0) != MP_EQ) { 5077 a->sign = neg; 5078 } 5079 5080 return MP_OKAY; 5081 } 5082 5083 static int 5084 getbn(BIGNUM **a, const char *str, int radix) 5085 { 5086 int len; 5087 5088 if (a == NULL || str == NULL || (*a = BN_new()) == NULL) { 5089 return 0; 5090 } 5091 if (mp_getradix_num(*a, radix, __UNCONST(str)) != MP_OKAY) { 5092 return 0; 5093 } 5094 mp_radix_size(__UNCONST(*a), radix, &len); 5095 return len - 1; 5096 } 5097 5098 /* d = a - b (mod c) */ 5099 static int 5100 subtract_modulo(mp_int *a, mp_int *b, mp_int *c, mp_int *d) 5101 { 5102 int res; 5103 mp_int t; 5104 5105 5106 if ((res = mp_init(&t)) != MP_OKAY) { 5107 return res; 5108 } 5109 5110 if ((res = signed_subtract(a, b, &t)) != MP_OKAY) { 5111 mp_clear(&t); 5112 return res; 5113 } 5114 res = modulo(&t, c, d); 5115 mp_clear(&t); 5116 return res; 5117 } 5118 5119 /**************************************************************************/ 5120 5121 /* BIGNUM emulation layer */ 5122 5123 /* essentiually, these are just wrappers around the libtommath functions */ 5124 /* usually the order of args changes */ 5125 /* the BIGNUM API tends to have more const poisoning */ 5126 /* these wrappers also check the arguments passed for sanity */ 5127 5128 BIGNUM * 5129 BN_bin2bn(const uint8_t *data, int len, BIGNUM *ret) 5130 { 5131 if (data == NULL) { 5132 return BN_new(); 5133 } 5134 if (ret == NULL) { 5135 ret = BN_new(); 5136 } 5137 return (mp_read_unsigned_bin(ret, data, len) == MP_OKAY) ? ret : NULL; 5138 } 5139 5140 /* store in unsigned [big endian] format */ 5141 int 5142 BN_bn2bin(const BIGNUM *a, unsigned char *b) 5143 { 5144 BIGNUM t; 5145 int x; 5146 5147 if (a == NULL || b == NULL) { 5148 return -1; 5149 } 5150 if (mp_init_copy (&t, __UNCONST(a)) != MP_OKAY) { 5151 return -1; 5152 } 5153 for (x = 0; !BN_is_zero(&t) ; ) { 5154 b[x++] = (unsigned char) (t.dp[0] & 0xff); 5155 if (rshift_bits(&t, 8, &t, NULL) != MP_OKAY) { 5156 mp_clear(&t); 5157 return -1; 5158 } 5159 } 5160 bn_reverse(b, x); 5161 mp_clear(&t); 5162 return x; 5163 } 5164 5165 void 5166 BN_init(BIGNUM *a) 5167 { 5168 if (a != NULL) { 5169 mp_init(a); 5170 } 5171 } 5172 5173 BIGNUM * 5174 BN_new(void) 5175 { 5176 BIGNUM *a; 5177 5178 if ((a = allocate(1, sizeof(*a))) != NULL) { 5179 mp_init(a); 5180 } 5181 return a; 5182 } 5183 5184 /* copy, b = a */ 5185 int 5186 BN_copy(BIGNUM *b, const BIGNUM *a) 5187 { 5188 if (a == NULL || b == NULL) { 5189 return MP_VAL; 5190 } 5191 return mp_copy(__UNCONST(a), b); 5192 } 5193 5194 BIGNUM * 5195 BN_dup(const BIGNUM *a) 5196 { 5197 BIGNUM *ret; 5198 5199 if (a == NULL) { 5200 return NULL; 5201 } 5202 if ((ret = BN_new()) != NULL) { 5203 BN_copy(ret, a); 5204 } 5205 return ret; 5206 } 5207 5208 void 5209 BN_swap(BIGNUM *a, BIGNUM *b) 5210 { 5211 if (a && b) { 5212 mp_exch(a, b); 5213 } 5214 } 5215 5216 int 5217 BN_lshift(BIGNUM *r, const BIGNUM *a, int n) 5218 { 5219 if (r == NULL || a == NULL || n < 0) { 5220 return 0; 5221 } 5222 BN_copy(r, a); 5223 return lshift_digits(r, n) == MP_OKAY; 5224 } 5225 5226 int 5227 BN_lshift1(BIGNUM *r, BIGNUM *a) 5228 { 5229 if (r == NULL || a == NULL) { 5230 return 0; 5231 } 5232 BN_copy(r, a); 5233 return lshift_digits(r, 1) == MP_OKAY; 5234 } 5235 5236 int 5237 BN_rshift(BIGNUM *r, const BIGNUM *a, int n) 5238 { 5239 if (r == NULL || a == NULL || n < 0) { 5240 return MP_VAL; 5241 } 5242 BN_copy(r, a); 5243 return rshift_digits(r, n) == MP_OKAY; 5244 } 5245 5246 int 5247 BN_rshift1(BIGNUM *r, BIGNUM *a) 5248 { 5249 if (r == NULL || a == NULL) { 5250 return 0; 5251 } 5252 BN_copy(r, a); 5253 return rshift_digits(r, 1) == MP_OKAY; 5254 } 5255 5256 int 5257 BN_set_word(BIGNUM *a, BN_ULONG w) 5258 { 5259 if (a == NULL) { 5260 return 0; 5261 } 5262 set_word(a, w); 5263 return 1; 5264 } 5265 5266 int 5267 BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) 5268 { 5269 if (a == NULL || b == NULL || r == NULL) { 5270 return 0; 5271 } 5272 return signed_add(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5273 } 5274 5275 int 5276 BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) 5277 { 5278 if (a == NULL || b == NULL || r == NULL) { 5279 return 0; 5280 } 5281 return signed_subtract(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5282 } 5283 5284 int 5285 BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) 5286 { 5287 if (a == NULL || b == NULL || r == NULL) { 5288 return 0; 5289 } 5290 USE_ARG(ctx); 5291 return signed_multiply(__UNCONST(a), __UNCONST(b), r) == MP_OKAY; 5292 } 5293 5294 int 5295 BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *a, const BIGNUM *d, BN_CTX *ctx) 5296 { 5297 if ((dv == NULL && rem == NULL) || a == NULL || d == NULL) { 5298 return 0; 5299 } 5300 USE_ARG(ctx); 5301 return signed_divide(dv, rem, __UNCONST(a), __UNCONST(d)) == MP_OKAY; 5302 } 5303 5304 /* perform a bit operation on the 2 bignums */ 5305 int 5306 BN_bitop(BIGNUM *r, const BIGNUM *a, char op, const BIGNUM *b) 5307 { 5308 unsigned ndigits; 5309 mp_digit ad; 5310 mp_digit bd; 5311 int i; 5312 5313 if (a == NULL || b == NULL || r == NULL) { 5314 return 0; 5315 } 5316 if (BN_cmp(__UNCONST(a), __UNCONST(b)) >= 0) { 5317 BN_copy(r, a); 5318 ndigits = a->used; 5319 } else { 5320 BN_copy(r, b); 5321 ndigits = b->used; 5322 } 5323 for (i = 0 ; i < (int)ndigits ; i++) { 5324 ad = (i > a->used) ? 0 : a->dp[i]; 5325 bd = (i > b->used) ? 0 : b->dp[i]; 5326 switch(op) { 5327 case '&': 5328 r->dp[i] = (ad & bd); 5329 break; 5330 case '|': 5331 r->dp[i] = (ad | bd); 5332 break; 5333 case '^': 5334 r->dp[i] = (ad ^ bd); 5335 break; 5336 default: 5337 break; 5338 } 5339 } 5340 return 1; 5341 } 5342 5343 void 5344 BN_free(BIGNUM *a) 5345 { 5346 if (a) { 5347 mp_clear(a); 5348 } 5349 } 5350 5351 void 5352 BN_clear(BIGNUM *a) 5353 { 5354 if (a) { 5355 mp_clear(a); 5356 } 5357 } 5358 5359 void 5360 BN_clear_free(BIGNUM *a) 5361 { 5362 if (a) { 5363 mp_clear(a); 5364 } 5365 } 5366 5367 int 5368 BN_num_bytes(const BIGNUM *a) 5369 { 5370 if (a == NULL) { 5371 return MP_VAL; 5372 } 5373 return mp_unsigned_bin_size(__UNCONST(a)); 5374 } 5375 5376 int 5377 BN_num_bits(const BIGNUM *a) 5378 { 5379 if (a == NULL) { 5380 return 0; 5381 } 5382 return mp_count_bits(a); 5383 } 5384 5385 void 5386 BN_set_negative(BIGNUM *a, int n) 5387 { 5388 if (a) { 5389 a->sign = (n) ? MP_NEG : 0; 5390 } 5391 } 5392 5393 int 5394 BN_cmp(BIGNUM *a, BIGNUM *b) 5395 { 5396 if (a == NULL || b == NULL) { 5397 return MP_VAL; 5398 } 5399 switch(signed_compare(a, b)) { 5400 case MP_LT: 5401 return -1; 5402 case MP_GT: 5403 return 1; 5404 case MP_EQ: 5405 default: 5406 return 0; 5407 } 5408 } 5409 5410 int 5411 BN_mod_exp(BIGNUM *Y, BIGNUM *G, BIGNUM *X, BIGNUM *P, BN_CTX *ctx) 5412 { 5413 if (Y == NULL || G == NULL || X == NULL || P == NULL) { 5414 return MP_VAL; 5415 } 5416 USE_ARG(ctx); 5417 return exponent_modulo(G, X, P, Y) == MP_OKAY; 5418 } 5419 5420 BIGNUM * 5421 BN_mod_inverse(BIGNUM *r, BIGNUM *a, const BIGNUM *n, BN_CTX *ctx) 5422 { 5423 USE_ARG(ctx); 5424 if (r == NULL || a == NULL || n == NULL) { 5425 return NULL; 5426 } 5427 return (modular_inverse(r, a, __UNCONST(n)) == MP_OKAY) ? r : NULL; 5428 } 5429 5430 int 5431 BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx) 5432 { 5433 USE_ARG(ctx); 5434 if (ret == NULL || a == NULL || b == NULL || m == NULL) { 5435 return 0; 5436 } 5437 return multiply_modulo(ret, a, b, __UNCONST(m)) == MP_OKAY; 5438 } 5439 5440 BN_CTX * 5441 BN_CTX_new(void) 5442 { 5443 return allocate(1, sizeof(BN_CTX)); 5444 } 5445 5446 void 5447 BN_CTX_init(BN_CTX *c) 5448 { 5449 if (c != NULL) { 5450 c->arraysize = 15; 5451 if ((c->v = allocate(sizeof(*c->v), c->arraysize)) == NULL) { 5452 c->arraysize = 0; 5453 } 5454 } 5455 } 5456 5457 BIGNUM * 5458 BN_CTX_get(BN_CTX *ctx) 5459 { 5460 if (ctx == NULL || ctx->v == NULL || ctx->arraysize == 0 || ctx->count == ctx->arraysize - 1) { 5461 return NULL; 5462 } 5463 return ctx->v[ctx->count++] = BN_new(); 5464 } 5465 5466 void 5467 BN_CTX_start(BN_CTX *ctx) 5468 { 5469 BN_CTX_init(ctx); 5470 } 5471 5472 void 5473 BN_CTX_free(BN_CTX *c) 5474 { 5475 unsigned i; 5476 5477 if (c != NULL && c->v != NULL) { 5478 for (i = 0 ; i < c->count ; i++) { 5479 BN_clear_free(c->v[i]); 5480 } 5481 deallocate(c->v, sizeof(*c->v) * c->arraysize); 5482 } 5483 } 5484 5485 void 5486 BN_CTX_end(BN_CTX *ctx) 5487 { 5488 BN_CTX_free(ctx); 5489 } 5490 5491 char * 5492 BN_bn2hex(const BIGNUM *a) 5493 { 5494 return (a == NULL) ? NULL : formatbn(a, 16); 5495 } 5496 5497 char * 5498 BN_bn2dec(const BIGNUM *a) 5499 { 5500 return (a == NULL) ? NULL : formatbn(a, 10); 5501 } 5502 5503 char * 5504 BN_bn2radix(const BIGNUM *a, unsigned radix) 5505 { 5506 return (a == NULL) ? NULL : formatbn(a, (int)radix); 5507 } 5508 5509 #ifndef _KERNEL 5510 int 5511 BN_print_fp(FILE *fp, const BIGNUM *a) 5512 { 5513 char *s; 5514 int ret; 5515 5516 if (fp == NULL || a == NULL) { 5517 return 0; 5518 } 5519 s = BN_bn2hex(a); 5520 ret = fprintf(fp, "%s", s); 5521 deallocate(s, strlen(s) + 1); 5522 return ret; 5523 } 5524 #endif 5525 5526 #ifdef BN_RAND_NEEDED 5527 int 5528 BN_rand(BIGNUM *rnd, int bits, int top, int bottom) 5529 { 5530 uint64_t r; 5531 int digits; 5532 int i; 5533 5534 if (rnd == NULL) { 5535 return 0; 5536 } 5537 mp_init_size(rnd, digits = howmany(bits, DIGIT_BIT)); 5538 for (i = 0 ; i < digits ; i++) { 5539 r = (uint64_t)arc4random(); 5540 r <<= 32; 5541 r |= arc4random(); 5542 rnd->dp[i] = (r & MP_MASK); 5543 } 5544 if (top == 0) { 5545 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT)); 5546 } 5547 if (top == 1) { 5548 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT)); 5549 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)(DIGIT_BIT - 1))); 5550 } 5551 if (bottom) { 5552 rnd->dp[0] |= 0x1; 5553 } 5554 return 1; 5555 } 5556 5557 int 5558 BN_rand_range(BIGNUM *rnd, BIGNUM *range) 5559 { 5560 if (rnd == NULL || range == NULL || BN_is_zero(range)) { 5561 return 0; 5562 } 5563 BN_rand(rnd, BN_num_bits(range), 1, 0); 5564 return modulo(rnd, range, rnd) == MP_OKAY; 5565 } 5566 #endif 5567 5568 int 5569 BN_is_prime(const BIGNUM *a, int checks, void (*callback)(int, int, void *), BN_CTX *ctx, void *cb_arg) 5570 { 5571 int primality; 5572 5573 if (a == NULL) { 5574 return 0; 5575 } 5576 USE_ARG(ctx); 5577 USE_ARG(cb_arg); 5578 USE_ARG(callback); 5579 return (mp_prime_is_prime(__UNCONST(a), checks, &primality) == MP_OKAY) ? primality : 0; 5580 } 5581 5582 const BIGNUM * 5583 BN_value_one(void) 5584 { 5585 static mp_digit digit = 1UL; 5586 static const BIGNUM one = { &digit, 1, 1, 0 }; 5587 5588 return &one; 5589 } 5590 5591 int 5592 BN_hex2bn(BIGNUM **a, const char *str) 5593 { 5594 return getbn(a, str, 16); 5595 } 5596 5597 int 5598 BN_dec2bn(BIGNUM **a, const char *str) 5599 { 5600 return getbn(a, str, 10); 5601 } 5602 5603 int 5604 BN_radix2bn(BIGNUM **a, const char *str, unsigned radix) 5605 { 5606 return getbn(a, str, (int)radix); 5607 } 5608 5609 int 5610 BN_mod_sub(BIGNUM *r, BIGNUM *a, BIGNUM *b, const BIGNUM *m, BN_CTX *ctx) 5611 { 5612 USE_ARG(ctx); 5613 if (r == NULL || a == NULL || b == NULL || m == NULL) { 5614 return 0; 5615 } 5616 return subtract_modulo(a, b, __UNCONST(m), r) == MP_OKAY; 5617 } 5618 5619 int 5620 BN_is_bit_set(const BIGNUM *a, int n) 5621 { 5622 if (a == NULL || n < 0 || n >= a->used * DIGIT_BIT) { 5623 return 0; 5624 } 5625 return (a->dp[n / DIGIT_BIT] & (1 << (n % DIGIT_BIT))) ? 1 : 0; 5626 } 5627 5628 /* raise 'a' to power of 'b' */ 5629 int 5630 BN_raise(BIGNUM *res, BIGNUM *a, BIGNUM *b) 5631 { 5632 uint64_t exponent; 5633 BIGNUM *power; 5634 BIGNUM *temp; 5635 char *t; 5636 5637 t = BN_bn2dec(b); 5638 exponent = (uint64_t)strtoull(t, NULL, 10); 5639 free(t); 5640 if (exponent == 0) { 5641 BN_copy(res, BN_value_one()); 5642 } else { 5643 power = BN_dup(a); 5644 for ( ; (exponent & 1) == 0 ; exponent >>= 1) { 5645 BN_mul(power, power, power, NULL); 5646 } 5647 temp = BN_dup(power); 5648 for (exponent >>= 1 ; exponent > 0 ; exponent >>= 1) { 5649 BN_mul(power, power, power, NULL); 5650 if (exponent & 1) { 5651 BN_mul(temp, power, temp, NULL); 5652 } 5653 } 5654 BN_copy(res, temp); 5655 BN_free(power); 5656 BN_free(temp); 5657 } 5658 return 1; 5659 } 5660 5661 /* compute the factorial */ 5662 int 5663 BN_factorial(BIGNUM *res, BIGNUM *f) 5664 { 5665 BIGNUM *one; 5666 BIGNUM *i; 5667 5668 i = BN_dup(f); 5669 one = __UNCONST(BN_value_one()); 5670 BN_sub(i, i, one); 5671 BN_copy(res, f); 5672 while (BN_cmp(i, one) > 0) { 5673 BN_mul(res, res, i, NULL); 5674 BN_sub(i, i, one); 5675 } 5676 BN_free(i); 5677 return 1; 5678 } 5679