1 /*-
2 * Copyright (c) 2012-2019 Alistair Crooks <agc@NetBSD.org>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25 /* LibTomMath, multiple-precision integer library -- Tom St Denis
26 *
27 * LibTomMath is a library that provides multiple-precision
28 * integer arithmetic as well as number theoretic functionality.
29 *
30 * The library was designed directly after the MPI library by
31 * Michael Fromberger but has been written from scratch with
32 * additional optimizations in place.
33 *
34 * The library is free for all purposes without any express
35 * guarantee it works.
36 *
37 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
38 */
39 #include "config.h"
40
41 #include <sys/types.h>
42 #include <sys/param.h>
43
44 #ifdef _KERNEL
45 # include <sys/kmem.h>
46 #else
47 # include <arpa/inet.h>
48 # include <limits.h>
49 # include <stdarg.h>
50 # include <stdio.h>
51 # include <stdlib.h>
52 # include <string.h>
53 # include <unistd.h>
54 #endif
55
56 #include "bn.h"
57
58 /**************************************************************************/
59
60 /* LibTomMath, multiple-precision integer library -- Tom St Denis
61 *
62 * LibTomMath is a library that provides multiple-precision
63 * integer arithmetic as well as number theoretic functionality.
64 *
65 * The library was designed directly after the MPI library by
66 * Michael Fromberger but has been written from scratch with
67 * additional optimizations in place.
68 *
69 * The library is free for all purposes without any express
70 * guarantee it works.
71 *
72 * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
73 */
74
75 #define MP_PREC 32
76 #define DIGIT_BIT 28
77 #define MP_MASK ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1))
78
79 #define MP_WARRAY /*LINTED*/(1U << (((sizeof(mp_word) * CHAR_BIT) - (2 * DIGIT_BIT) + 1)))
80
81 #define MP_NO 0
82 #define MP_YES 1
83
84 #ifndef USE_ARG
85 #define USE_ARG(x) /*LINTED*/(void)&(x)
86 #endif
87
88 #ifndef __arraycount
89 #define __arraycount(__x) (sizeof(__x) / sizeof(__x[0]))
90 #endif
91
92 #ifndef MIN
93 #define MIN(a,b) (((a)<(b))?(a):(b))
94 #endif
95
96 #define MP_ISZERO(a) (((a)->used == 0) ? MP_YES : MP_NO)
97
98 typedef int mp_err;
99
100 static int signed_multiply(mp_int * a, mp_int * b, mp_int * c);
101 static int square(mp_int * a, mp_int * b);
102
103 static int signed_subtract_word(mp_int *a, mp_digit b, mp_int *c);
104
105 static inline void *
allocate(size_t n,size_t m)106 allocate(size_t n, size_t m)
107 {
108 return calloc(n, m);
109 }
110
111 static inline void
deallocate(void * v,size_t sz)112 deallocate(void *v, size_t sz)
113 {
114 USE_ARG(sz);
115 free(v);
116 }
117
118 /* set to zero */
119 static inline void
mp_zero(mp_int * a)120 mp_zero(mp_int *a)
121 {
122 a->sign = MP_ZPOS;
123 a->used = 0;
124 memset(a->dp, 0x0, a->alloc * sizeof(*a->dp));
125 }
126
127 /* grow as required */
128 static int
mp_grow(mp_int * a,int size)129 mp_grow(mp_int *a, int size)
130 {
131 mp_digit *tmp;
132
133 /* if the alloc size is smaller alloc more ram */
134 if (a->alloc < size) {
135 /* ensure there are always at least MP_PREC digits extra on top */
136 size += (MP_PREC * 2) - (size % MP_PREC);
137
138 /* reallocate the array a->dp
139 *
140 * We store the return in a temporary variable
141 * in case the operation failed we don't want
142 * to overwrite the dp member of a.
143 */
144 tmp = realloc(a->dp, sizeof(*tmp) * size);
145 if (tmp == NULL) {
146 /* reallocation failed but "a" is still valid [can be freed] */
147 return MP_MEM;
148 }
149
150 /* reallocation succeeded so set a->dp */
151 a->dp = tmp;
152 /* zero excess digits */
153 memset(&a->dp[a->alloc], 0x0, (size - a->alloc) * sizeof(*a->dp));
154 a->alloc = size;
155 }
156 return MP_OKAY;
157 }
158
159 /* shift left a certain amount of digits */
160 static int
lshift_digits(mp_int * a,int b)161 lshift_digits(mp_int * a, int b)
162 {
163 mp_digit *top, *bottom;
164 int x, res;
165
166 /* if its less than zero return */
167 if (b <= 0) {
168 return MP_OKAY;
169 }
170
171 /* grow to fit the new digits */
172 if (a->alloc < a->used + b) {
173 if ((res = mp_grow(a, a->used + b)) != MP_OKAY) {
174 return res;
175 }
176 }
177
178 /* increment the used by the shift amount then copy upwards */
179 a->used += b;
180
181 /* top */
182 top = a->dp + a->used - 1;
183
184 /* base */
185 bottom = a->dp + a->used - 1 - b;
186
187 /* much like rshift_digits this is implemented using a sliding window
188 * except the window goes the otherway around. Copying from
189 * the bottom to the top.
190 */
191 for (x = a->used - 1; x >= b; x--) {
192 *top-- = *bottom--;
193 }
194
195 /* zero the lower digits */
196 memset(a->dp, 0x0, b * sizeof(*a->dp));
197 return MP_OKAY;
198 }
199
200 /* trim unused digits
201 *
202 * This is used to ensure that leading zero digits are
203 * trimed and the leading "used" digit will be non-zero
204 * Typically very fast. Also fixes the sign if there
205 * are no more leading digits
206 */
207 static void
trim_unused_digits(mp_int * a)208 trim_unused_digits(mp_int * a)
209 {
210 /* decrease used while the most significant digit is
211 * zero.
212 */
213 while (a->used > 0 && a->dp[a->used - 1] == 0) {
214 a->used -= 1;
215 }
216 /* reset the sign flag if used == 0 */
217 if (a->used == 0) {
218 a->sign = MP_ZPOS;
219 }
220 }
221
222 /* copy, b = a */
223 static int
mp_copy(PGPV_BIGNUM * a,PGPV_BIGNUM * b)224 mp_copy(PGPV_BIGNUM *a, PGPV_BIGNUM *b)
225 {
226 int res;
227
228 /* if dst == src do nothing */
229 if (a == b) {
230 return MP_OKAY;
231 }
232 if (a == NULL || b == NULL) {
233 return MP_VAL;
234 }
235
236 /* grow dest */
237 if (b->alloc < a->used) {
238 if ((res = mp_grow(b, a->used)) != MP_OKAY) {
239 return res;
240 }
241 }
242
243 memcpy(b->dp, a->dp, a->used * sizeof(*b->dp));
244 if (b->used > a->used) {
245 memset(&b->dp[a->used], 0x0, (b->used - a->used) * sizeof(*b->dp));
246 }
247
248 /* copy used count and sign */
249 b->used = a->used;
250 b->sign = a->sign;
251 return MP_OKAY;
252 }
253
254 /* shift left by a certain bit count */
255 static int
lshift_bits(mp_int * a,int b,mp_int * c)256 lshift_bits(mp_int *a, int b, mp_int *c)
257 {
258 mp_digit d;
259 int res;
260
261 /* copy */
262 if (a != c) {
263 if ((res = mp_copy(a, c)) != MP_OKAY) {
264 return res;
265 }
266 }
267
268 if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
269 if ((res = mp_grow(c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
270 return res;
271 }
272 }
273
274 /* shift by as many digits in the bit count */
275 if (b >= (int)DIGIT_BIT) {
276 if ((res = lshift_digits(c, b / DIGIT_BIT)) != MP_OKAY) {
277 return res;
278 }
279 }
280
281 /* shift any bit count < DIGIT_BIT */
282 d = (mp_digit) (b % DIGIT_BIT);
283 if (d != 0) {
284 mp_digit *tmpc, shift, mask, carry, rr;
285 int x;
286
287 /* bitmask for carries */
288 mask = (((mp_digit)1) << d) - 1;
289
290 /* shift for msbs */
291 shift = DIGIT_BIT - d;
292
293 /* alias */
294 tmpc = c->dp;
295
296 /* carry */
297 carry = 0;
298 for (x = 0; x < c->used; x++) {
299 /* get the higher bits of the current word */
300 rr = (*tmpc >> shift) & mask;
301
302 /* shift the current word and OR in the carry */
303 *tmpc = ((*tmpc << d) | carry) & MP_MASK;
304 ++tmpc;
305
306 /* set the carry to the carry bits of the current word */
307 carry = rr;
308 }
309
310 /* set final carry */
311 if (carry != 0) {
312 c->dp[c->used++] = carry;
313 }
314 }
315 trim_unused_digits(c);
316 return MP_OKAY;
317 }
318
319 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
320 static int
mp_read_unsigned_bin(mp_int * a,const uint8_t * b,int c)321 mp_read_unsigned_bin(mp_int *a, const uint8_t *b, int c)
322 {
323 int res;
324
325 /* make sure there are at least two digits */
326 if (a->alloc < 2) {
327 if ((res = mp_grow(a, 2)) != MP_OKAY) {
328 return res;
329 }
330 }
331
332 /* zero the int */
333 mp_zero(a);
334
335 /* read the bytes in */
336 while (c-- > 0) {
337 if ((res = lshift_bits(a, 8, a)) != MP_OKAY) {
338 return res;
339 }
340
341 a->dp[0] |= *b++;
342 a->used += 1;
343 }
344 trim_unused_digits(a);
345 return MP_OKAY;
346 }
347
348 /* returns the number of bits in an mpi */
349 static int
mp_count_bits(const mp_int * a)350 mp_count_bits(const mp_int *a)
351 {
352 int r;
353 mp_digit q;
354
355 /* shortcut */
356 if (a->used == 0) {
357 return 0;
358 }
359
360 /* get number of digits and add that */
361 r = (a->used - 1) * DIGIT_BIT;
362
363 /* take the last digit and count the bits in it */
364 for (q = a->dp[a->used - 1]; q > ((mp_digit) 0) ; r++) {
365 q >>= ((mp_digit) 1);
366 }
367 return r;
368 }
369
370 /* compare maginitude of two ints (unsigned) */
371 static int
compare_magnitude(mp_int * a,mp_int * b)372 compare_magnitude(mp_int * a, mp_int * b)
373 {
374 int n;
375 mp_digit *tmpa, *tmpb;
376
377 /* compare based on # of non-zero digits */
378 if (a->used > b->used) {
379 return MP_GT;
380 }
381
382 if (a->used < b->used) {
383 return MP_LT;
384 }
385
386 /* alias for a */
387 tmpa = a->dp + (a->used - 1);
388
389 /* alias for b */
390 tmpb = b->dp + (a->used - 1);
391
392 /* compare based on digits */
393 for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
394 if (*tmpa > *tmpb) {
395 return MP_GT;
396 }
397
398 if (*tmpa < *tmpb) {
399 return MP_LT;
400 }
401 }
402 return MP_EQ;
403 }
404
405 /* compare two ints (signed)*/
406 static int
signed_compare(mp_int * a,mp_int * b)407 signed_compare(mp_int * a, mp_int * b)
408 {
409 /* compare based on sign */
410 if (a->sign != b->sign) {
411 return (a->sign == MP_NEG) ? MP_LT : MP_GT;
412 }
413 return (a->sign == MP_NEG) ? compare_magnitude(b, a) : compare_magnitude(a, b);
414 }
415
416 /* get the size for an unsigned equivalent */
417 static int
mp_unsigned_bin_size(mp_int * a)418 mp_unsigned_bin_size(mp_int * a)
419 {
420 int size = mp_count_bits(a);
421
422 return (size / 8 + ((size & 7) != 0 ? 1 : 0));
423 }
424
425 /* init a new mp_int */
426 static int
mp_init(mp_int * a)427 mp_init(mp_int * a)
428 {
429 /* allocate memory required and clear it */
430 a->dp = allocate(1, sizeof(*a->dp) * MP_PREC);
431 if (a->dp == NULL) {
432 return MP_MEM;
433 }
434
435 /* set the digits to zero */
436 memset(a->dp, 0x0, MP_PREC * sizeof(*a->dp));
437
438 /* set the used to zero, allocated digits to the default precision
439 * and sign to positive */
440 a->used = 0;
441 a->alloc = MP_PREC;
442 a->sign = MP_ZPOS;
443
444 return MP_OKAY;
445 }
446
447 /* clear one (frees) */
448 static void
mp_clear(mp_int * a)449 mp_clear(mp_int * a)
450 {
451 /* only do anything if a hasn't been freed previously */
452 if (a->dp != NULL) {
453 memset(a->dp, 0x0, a->used * sizeof(*a->dp));
454
455 /* free ram */
456 deallocate(a->dp, (size_t)a->alloc);
457
458 /* reset members to make debugging easier */
459 a->dp = NULL;
460 a->alloc = a->used = 0;
461 a->sign = MP_ZPOS;
462 }
463 }
464
465 static int
mp_init_multi(mp_int * mp,...)466 mp_init_multi(mp_int *mp, ...)
467 {
468 mp_err res = MP_OKAY; /* Assume ok until proven otherwise */
469 int n = 0; /* Number of ok inits */
470 mp_int* cur_arg = mp;
471 va_list args;
472
473 va_start(args, mp); /* init args to next argument from caller */
474 while (cur_arg != NULL) {
475 if (mp_init(cur_arg) != MP_OKAY) {
476 /* Oops - error! Back-track and mp_clear what we already
477 succeeded in init-ing, then return error.
478 */
479 va_list clean_args;
480
481 /* end the current list */
482 va_end(args);
483
484 /* now start cleaning up */
485 cur_arg = mp;
486 va_start(clean_args, mp);
487 while (n--) {
488 mp_clear(cur_arg);
489 cur_arg = va_arg(clean_args, mp_int*);
490 }
491 va_end(clean_args);
492 res = MP_MEM;
493 break;
494 }
495 n++;
496 cur_arg = va_arg(args, mp_int*);
497 }
498 va_end(args);
499 return res; /* Assumed ok, if error flagged above. */
500 }
501
502 /* init an mp_init for a given size */
503 static int
mp_init_size(mp_int * a,int size)504 mp_init_size(mp_int * a, int size)
505 {
506 /* pad size so there are always extra digits */
507 size += (MP_PREC * 2) - (size % MP_PREC);
508
509 /* alloc mem */
510 a->dp = allocate(1, sizeof(*a->dp) * size);
511 if (a->dp == NULL) {
512 return MP_MEM;
513 }
514
515 /* set the members */
516 a->used = 0;
517 a->alloc = size;
518 a->sign = MP_ZPOS;
519
520 /* zero the digits */
521 memset(a->dp, 0x0, size * sizeof(*a->dp));
522 return MP_OKAY;
523 }
524
525 /* creates "a" then copies b into it */
526 static int
mp_init_copy(mp_int * a,mp_int * b)527 mp_init_copy(mp_int * a, mp_int * b)
528 {
529 int res;
530
531 if ((res = mp_init(a)) != MP_OKAY) {
532 return res;
533 }
534 return mp_copy(b, a);
535 }
536
537 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
538 static int
basic_add(mp_int * a,mp_int * b,mp_int * c)539 basic_add(mp_int * a, mp_int * b, mp_int * c)
540 {
541 mp_int *x;
542 int olduse, res, min, max;
543
544 /* find sizes, we let |a| <= |b| which means we have to sort
545 * them. "x" will point to the input with the most digits
546 */
547 if (a->used > b->used) {
548 min = b->used;
549 max = a->used;
550 x = a;
551 } else {
552 min = a->used;
553 max = b->used;
554 x = b;
555 }
556
557 /* init result */
558 if (c->alloc < max + 1) {
559 if ((res = mp_grow(c, max + 1)) != MP_OKAY) {
560 return res;
561 }
562 }
563
564 /* get old used digit count and set new one */
565 olduse = c->used;
566 c->used = max + 1;
567
568 {
569 mp_digit carry, *tmpa, *tmpb, *tmpc;
570 int i;
571
572 /* alias for digit pointers */
573
574 /* first input */
575 tmpa = a->dp;
576
577 /* second input */
578 tmpb = b->dp;
579
580 /* destination */
581 tmpc = c->dp;
582
583 /* zero the carry */
584 carry = 0;
585 for (i = 0; i < min; i++) {
586 /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
587 *tmpc = *tmpa++ + *tmpb++ + carry;
588
589 /* U = carry bit of T[i] */
590 carry = *tmpc >> ((mp_digit)DIGIT_BIT);
591
592 /* take away carry bit from T[i] */
593 *tmpc++ &= MP_MASK;
594 }
595
596 /* now copy higher words if any, that is in A+B
597 * if A or B has more digits add those in
598 */
599 if (min != max) {
600 for (; i < max; i++) {
601 /* T[i] = X[i] + U */
602 *tmpc = x->dp[i] + carry;
603
604 /* U = carry bit of T[i] */
605 carry = *tmpc >> ((mp_digit)DIGIT_BIT);
606
607 /* take away carry bit from T[i] */
608 *tmpc++ &= MP_MASK;
609 }
610 }
611
612 /* add carry */
613 *tmpc++ = carry;
614
615 /* clear digits above oldused */
616 if (olduse > c->used) {
617 memset(tmpc, 0x0, (olduse - c->used) * sizeof(*c->dp));
618 }
619 }
620
621 trim_unused_digits(c);
622 return MP_OKAY;
623 }
624
625 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
626 static int
basic_subtract(mp_int * a,mp_int * b,mp_int * c)627 basic_subtract(mp_int * a, mp_int * b, mp_int * c)
628 {
629 int olduse, res, min, max;
630
631 /* find sizes */
632 min = b->used;
633 max = a->used;
634
635 /* init result */
636 if (c->alloc < max) {
637 if ((res = mp_grow(c, max)) != MP_OKAY) {
638 return res;
639 }
640 }
641 olduse = c->used;
642 c->used = max;
643
644 {
645 mp_digit carry, *tmpa, *tmpb, *tmpc;
646 int i;
647
648 /* alias for digit pointers */
649 tmpa = a->dp;
650 tmpb = b->dp;
651 tmpc = c->dp;
652
653 /* set carry to zero */
654 carry = 0;
655 for (i = 0; i < min; i++) {
656 /* T[i] = A[i] - B[i] - U */
657 *tmpc = *tmpa++ - *tmpb++ - carry;
658
659 /* U = carry bit of T[i]
660 * Note this saves performing an AND operation since
661 * if a carry does occur it will propagate all the way to the
662 * MSB. As a result a single shift is enough to get the carry
663 */
664 carry = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof(mp_digit) - 1));
665
666 /* Clear carry from T[i] */
667 *tmpc++ &= MP_MASK;
668 }
669
670 /* now copy higher words if any, e.g. if A has more digits than B */
671 for (; i < max; i++) {
672 /* T[i] = A[i] - U */
673 *tmpc = *tmpa++ - carry;
674
675 /* U = carry bit of T[i] */
676 carry = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof(mp_digit) - 1));
677
678 /* Clear carry from T[i] */
679 *tmpc++ &= MP_MASK;
680 }
681
682 /* clear digits above used (since we may not have grown result above) */
683 if (olduse > c->used) {
684 memset(tmpc, 0x0, (olduse - c->used) * sizeof(*a->dp));
685 }
686 }
687
688 trim_unused_digits(c);
689 return MP_OKAY;
690 }
691
692 /* high level subtraction (handles signs) */
693 static int
signed_subtract(mp_int * a,mp_int * b,mp_int * c)694 signed_subtract(mp_int * a, mp_int * b, mp_int * c)
695 {
696 int sa, sb, res;
697
698 sa = a->sign;
699 sb = b->sign;
700
701 if (sa != sb) {
702 /* subtract a negative from a positive, OR */
703 /* subtract a positive from a negative. */
704 /* In either case, ADD their magnitudes, */
705 /* and use the sign of the first number. */
706 c->sign = sa;
707 res = basic_add(a, b, c);
708 } else {
709 /* subtract a positive from a positive, OR */
710 /* subtract a negative from a negative. */
711 /* First, take the difference between their */
712 /* magnitudes, then... */
713 if (compare_magnitude(a, b) != MP_LT) {
714 /* Copy the sign from the first */
715 c->sign = sa;
716 /* The first has a larger or equal magnitude */
717 res = basic_subtract(a, b, c);
718 } else {
719 /* The result has the *opposite* sign from */
720 /* the first number. */
721 c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
722 /* The second has a larger magnitude */
723 res = basic_subtract(b, a, c);
724 }
725 }
726 return res;
727 }
728
729 /* shift right a certain amount of digits */
730 static int
rshift_digits(mp_int * a,int b)731 rshift_digits(mp_int * a, int b)
732 {
733 /* if b <= 0 then ignore it */
734 if (b <= 0) {
735 return 0;
736 }
737
738 /* if b > used then simply zero it and return */
739 if (a->used <= b) {
740 mp_zero(a);
741 return 0;
742 }
743
744 /* this is implemented as a sliding window where
745 * the window is b-digits long and digits from
746 * the top of the window are copied to the bottom
747 *
748 * e.g.
749
750 b-2 | b-1 | b0 | b1 | b2 | ... | bb | ---->
751 /\ | ---->
752 \-------------------/ ---->
753 */
754 memmove(a->dp, &a->dp[b], (a->used - b) * sizeof(*a->dp));
755 memset(&a->dp[a->used - b], 0x0, b * sizeof(*a->dp));
756
757 /* remove excess digits */
758 a->used -= b;
759 return 1;
760 }
761
762 /* multiply by a digit */
763 static int
multiply_digit(mp_int * a,mp_digit b,mp_int * c)764 multiply_digit(mp_int * a, mp_digit b, mp_int * c)
765 {
766 mp_digit carry, *tmpa, *tmpc;
767 mp_word r;
768 int ix, res, olduse;
769
770 /* make sure c is big enough to hold a*b */
771 if (c->alloc < a->used + 1) {
772 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
773 return res;
774 }
775 }
776
777 /* get the original destinations used count */
778 olduse = c->used;
779
780 /* set the sign */
781 c->sign = a->sign;
782
783 /* alias for a->dp [source] */
784 tmpa = a->dp;
785
786 /* alias for c->dp [dest] */
787 tmpc = c->dp;
788
789 /* zero carry */
790 carry = 0;
791
792 /* compute columns */
793 for (ix = 0; ix < a->used; ix++) {
794 /* compute product and carry sum for this term */
795 r = ((mp_word) carry) + ((mp_word)*tmpa++) * ((mp_word)b);
796
797 /* mask off higher bits to get a single digit */
798 *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
799
800 /* send carry into next iteration */
801 carry = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
802 }
803
804 /* store final carry [if any] and increment ix offset */
805 *tmpc++ = carry;
806 ++ix;
807 if (olduse > ix) {
808 memset(tmpc, 0x0, (olduse - ix) * sizeof(*tmpc));
809 }
810
811 /* set used count */
812 c->used = a->used + 1;
813 trim_unused_digits(c);
814
815 return MP_OKAY;
816 }
817
818 /* high level addition (handles signs) */
819 static int
signed_add(mp_int * a,mp_int * b,mp_int * c)820 signed_add(mp_int * a, mp_int * b, mp_int * c)
821 {
822 int asign, bsign, res;
823
824 /* get sign of both inputs */
825 asign = a->sign;
826 bsign = b->sign;
827
828 /* handle two cases, not four */
829 if (asign == bsign) {
830 /* both positive or both negative */
831 /* add their magnitudes, copy the sign */
832 c->sign = asign;
833 res = basic_add(a, b, c);
834 } else {
835 /* one positive, the other negative */
836 /* subtract the one with the greater magnitude from */
837 /* the one of the lesser magnitude. The result gets */
838 /* the sign of the one with the greater magnitude. */
839 if (compare_magnitude(a, b) == MP_LT) {
840 c->sign = bsign;
841 res = basic_subtract(b, a, c);
842 } else {
843 c->sign = asign;
844 res = basic_subtract(a, b, c);
845 }
846 }
847 return res;
848 }
849
850 /* swap the elements of two integers, for cases where you can't simply swap the
851 * mp_int pointers around
852 */
853 static void
mp_exch(mp_int * a,mp_int * b)854 mp_exch(mp_int *a, mp_int *b)
855 {
856 mp_int t;
857
858 t = *a;
859 *a = *b;
860 *b = t;
861 }
862
863 /* calc a value mod 2**b */
864 static int
modulo_2_to_power(mp_int * a,int b,mp_int * c)865 modulo_2_to_power(mp_int * a, int b, mp_int * c)
866 {
867 int x, res;
868
869 /* if b is <= 0 then zero the int */
870 if (b <= 0) {
871 mp_zero(c);
872 return MP_OKAY;
873 }
874
875 /* if the modulus is larger than the value than return */
876 if (b >= (int) (a->used * DIGIT_BIT)) {
877 res = mp_copy(a, c);
878 return res;
879 }
880
881 /* copy */
882 if ((res = mp_copy(a, c)) != MP_OKAY) {
883 return res;
884 }
885
886 /* zero digits above the last digit of the modulus */
887 for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
888 c->dp[x] = 0;
889 }
890 /* clear the digit that is not completely outside/inside the modulus */
891 c->dp[b / DIGIT_BIT] &=
892 (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
893 trim_unused_digits(c);
894 return MP_OKAY;
895 }
896
897 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */
898 static int
rshift_bits(mp_int * a,int b,mp_int * c,mp_int * d)899 rshift_bits(mp_int * a, int b, mp_int * c, mp_int * d)
900 {
901 mp_digit D, r, rr;
902 int x, res;
903 mp_int t;
904
905
906 /* if the shift count is <= 0 then we do no work */
907 if (b <= 0) {
908 res = mp_copy(a, c);
909 if (d != NULL) {
910 mp_zero(d);
911 }
912 return res;
913 }
914
915 if ((res = mp_init(&t)) != MP_OKAY) {
916 return res;
917 }
918
919 /* get the remainder */
920 if (d != NULL) {
921 if ((res = modulo_2_to_power(a, b, &t)) != MP_OKAY) {
922 mp_clear(&t);
923 return res;
924 }
925 }
926
927 /* copy */
928 if ((res = mp_copy(a, c)) != MP_OKAY) {
929 mp_clear(&t);
930 return res;
931 }
932
933 /* shift by as many digits in the bit count */
934 if (b >= (int)DIGIT_BIT) {
935 rshift_digits(c, b / DIGIT_BIT);
936 }
937
938 /* shift any bit count < DIGIT_BIT */
939 D = (mp_digit) (b % DIGIT_BIT);
940 if (D != 0) {
941 mp_digit *tmpc, mask, shift;
942
943 /* mask */
944 mask = (((mp_digit)1) << D) - 1;
945
946 /* shift for lsb */
947 shift = DIGIT_BIT - D;
948
949 /* alias */
950 tmpc = c->dp + (c->used - 1);
951
952 /* carry */
953 r = 0;
954 for (x = c->used - 1; x >= 0; x--) {
955 /* get the lower bits of this word in a temp */
956 rr = *tmpc & mask;
957
958 /* shift the current word and mix in the carry bits from the previous word */
959 *tmpc = (*tmpc >> D) | (r << shift);
960 --tmpc;
961
962 /* set the carry to the carry bits of the current word found above */
963 r = rr;
964 }
965 }
966 trim_unused_digits(c);
967 if (d != NULL) {
968 mp_exch(&t, d);
969 }
970 mp_clear(&t);
971 return MP_OKAY;
972 }
973
974 /* integer signed division.
975 * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
976 * HAC pp.598 Algorithm 14.20
977 *
978 * Note that the description in HAC is horribly
979 * incomplete. For example, it doesn't consider
980 * the case where digits are removed from 'x' in
981 * the inner loop. It also doesn't consider the
982 * case that y has fewer than three digits, etc..
983 *
984 * The overall algorithm is as described as
985 * 14.20 from HAC but fixed to treat these cases.
986 */
987 static int
signed_divide(mp_int * c,mp_int * d,mp_int * a,mp_int * b)988 signed_divide(mp_int *c, mp_int *d, mp_int *a, mp_int *b)
989 {
990 mp_int q, x, y, t1, t2;
991 int res, n, t, i, norm, neg;
992
993 /* is divisor zero ? */
994 if (MP_ISZERO(b) == MP_YES) {
995 return MP_VAL;
996 }
997
998 /* if a < b then q=0, r = a */
999 if (compare_magnitude(a, b) == MP_LT) {
1000 if (d != NULL) {
1001 res = mp_copy(a, d);
1002 } else {
1003 res = MP_OKAY;
1004 }
1005 if (c != NULL) {
1006 mp_zero(c);
1007 }
1008 return res;
1009 }
1010
1011 if ((res = mp_init_size(&q, a->used + 2)) != MP_OKAY) {
1012 return res;
1013 }
1014 q.used = a->used + 2;
1015
1016 if ((res = mp_init(&t1)) != MP_OKAY) {
1017 goto LBL_Q;
1018 }
1019
1020 if ((res = mp_init(&t2)) != MP_OKAY) {
1021 goto LBL_T1;
1022 }
1023
1024 if ((res = mp_init_copy(&x, a)) != MP_OKAY) {
1025 goto LBL_T2;
1026 }
1027
1028 if ((res = mp_init_copy(&y, b)) != MP_OKAY) {
1029 goto LBL_X;
1030 }
1031
1032 /* fix the sign */
1033 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
1034 x.sign = y.sign = MP_ZPOS;
1035
1036 /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1037 norm = mp_count_bits(&y) % DIGIT_BIT;
1038 if (norm < (int)(DIGIT_BIT-1)) {
1039 norm = (DIGIT_BIT-1) - norm;
1040 if ((res = lshift_bits(&x, norm, &x)) != MP_OKAY) {
1041 goto LBL_Y;
1042 }
1043 if ((res = lshift_bits(&y, norm, &y)) != MP_OKAY) {
1044 goto LBL_Y;
1045 }
1046 } else {
1047 norm = 0;
1048 }
1049
1050 /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
1051 n = x.used - 1;
1052 t = y.used - 1;
1053
1054 /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1055 if ((res = lshift_digits(&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
1056 goto LBL_Y;
1057 }
1058
1059 while (signed_compare(&x, &y) != MP_LT) {
1060 ++(q.dp[n - t]);
1061 if ((res = signed_subtract(&x, &y, &x)) != MP_OKAY) {
1062 goto LBL_Y;
1063 }
1064 }
1065
1066 /* reset y by shifting it back down */
1067 rshift_digits(&y, n - t);
1068
1069 /* step 3. for i from n down to (t + 1) */
1070 for (i = n; i >= (t + 1); i--) {
1071 if (i > x.used) {
1072 continue;
1073 }
1074
1075 /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1076 * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1077 if (x.dp[i] == y.dp[t]) {
1078 q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
1079 } else {
1080 mp_word tmp;
1081 tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
1082 tmp |= ((mp_word) x.dp[i - 1]);
1083 tmp /= ((mp_word) y.dp[t]);
1084 if (tmp > (mp_word) MP_MASK) {
1085 tmp = MP_MASK;
1086 }
1087 q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
1088 }
1089
1090 /* while (q{i-t-1} * (yt * b + y{t-1})) >
1091 xi * b**2 + xi-1 * b + xi-2
1092 do q{i-t-1} -= 1;
1093 */
1094 q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
1095 do {
1096 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
1097
1098 /* find left hand */
1099 mp_zero(&t1);
1100 t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
1101 t1.dp[1] = y.dp[t];
1102 t1.used = 2;
1103 if ((res = multiply_digit(&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1104 goto LBL_Y;
1105 }
1106
1107 /* find right hand */
1108 t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
1109 t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
1110 t2.dp[2] = x.dp[i];
1111 t2.used = 3;
1112 } while (compare_magnitude(&t1, &t2) == MP_GT);
1113
1114 /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1115 if ((res = multiply_digit(&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1116 goto LBL_Y;
1117 }
1118
1119 if ((res = lshift_digits(&t1, i - t - 1)) != MP_OKAY) {
1120 goto LBL_Y;
1121 }
1122
1123 if ((res = signed_subtract(&x, &t1, &x)) != MP_OKAY) {
1124 goto LBL_Y;
1125 }
1126
1127 /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1128 if (x.sign == MP_NEG) {
1129 if ((res = mp_copy(&y, &t1)) != MP_OKAY) {
1130 goto LBL_Y;
1131 }
1132 if ((res = lshift_digits(&t1, i - t - 1)) != MP_OKAY) {
1133 goto LBL_Y;
1134 }
1135 if ((res = signed_add(&x, &t1, &x)) != MP_OKAY) {
1136 goto LBL_Y;
1137 }
1138
1139 q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
1140 }
1141 }
1142
1143 /* now q is the quotient and x is the remainder
1144 * [which we have to normalize]
1145 */
1146
1147 /* get sign before writing to c */
1148 x.sign = x.used == 0 ? MP_ZPOS : a->sign;
1149
1150 if (c != NULL) {
1151 trim_unused_digits(&q);
1152 mp_exch(&q, c);
1153 c->sign = neg;
1154 }
1155
1156 if (d != NULL) {
1157 rshift_bits(&x, norm, &x, NULL);
1158 mp_exch(&x, d);
1159 }
1160
1161 res = MP_OKAY;
1162
1163 LBL_Y:
1164 mp_clear(&y);
1165 LBL_X:
1166 mp_clear(&x);
1167 LBL_T2:
1168 mp_clear(&t2);
1169 LBL_T1:
1170 mp_clear(&t1);
1171 LBL_Q:
1172 mp_clear(&q);
1173 return res;
1174 }
1175
1176 /* c = a mod b, 0 <= c < b */
1177 static int
modulo(mp_int * a,mp_int * b,mp_int * c)1178 modulo(mp_int * a, mp_int * b, mp_int * c)
1179 {
1180 mp_int t;
1181 int res;
1182
1183 if ((res = mp_init(&t)) != MP_OKAY) {
1184 return res;
1185 }
1186
1187 if ((res = signed_divide(NULL, &t, a, b)) != MP_OKAY) {
1188 mp_clear(&t);
1189 return res;
1190 }
1191
1192 if (t.sign != b->sign) {
1193 res = signed_add(b, &t, c);
1194 } else {
1195 res = MP_OKAY;
1196 mp_exch(&t, c);
1197 }
1198
1199 mp_clear(&t);
1200 return res;
1201 }
1202
1203 /* set to a digit */
1204 static void
set_word(mp_int * a,mp_digit b)1205 set_word(mp_int * a, mp_digit b)
1206 {
1207 mp_zero(a);
1208 a->dp[0] = b & MP_MASK;
1209 a->used = (a->dp[0] != 0) ? 1 : 0;
1210 }
1211
1212 /* b = a/2 */
1213 static int
half(mp_int * a,mp_int * b)1214 half(mp_int * a, mp_int * b)
1215 {
1216 int x, res, oldused;
1217
1218 /* copy */
1219 if (b->alloc < a->used) {
1220 if ((res = mp_grow(b, a->used)) != MP_OKAY) {
1221 return res;
1222 }
1223 }
1224
1225 oldused = b->used;
1226 b->used = a->used;
1227 {
1228 mp_digit r, rr, *tmpa, *tmpb;
1229
1230 /* source alias */
1231 tmpa = a->dp + b->used - 1;
1232
1233 /* dest alias */
1234 tmpb = b->dp + b->used - 1;
1235
1236 /* carry */
1237 r = 0;
1238 for (x = b->used - 1; x >= 0; x--) {
1239 /* get the carry for the next iteration */
1240 rr = *tmpa & 1;
1241
1242 /* shift the current digit, add in carry and store */
1243 *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1244
1245 /* forward carry to next iteration */
1246 r = rr;
1247 }
1248
1249 /* zero excess digits */
1250 tmpb = b->dp + b->used;
1251 for (x = b->used; x < oldused; x++) {
1252 *tmpb++ = 0;
1253 }
1254 }
1255 b->sign = a->sign;
1256 trim_unused_digits(b);
1257 return MP_OKAY;
1258 }
1259
1260 /* compare a digit */
1261 static int
compare_digit(mp_int * a,mp_digit b)1262 compare_digit(mp_int * a, mp_digit b)
1263 {
1264 /* compare based on sign */
1265 if (a->sign == MP_NEG) {
1266 return MP_LT;
1267 }
1268
1269 /* compare based on magnitude */
1270 if (a->used > 1) {
1271 return MP_GT;
1272 }
1273
1274 /* compare the only digit of a to b */
1275 if (a->dp[0] > b) {
1276 return MP_GT;
1277 } else if (a->dp[0] < b) {
1278 return MP_LT;
1279 } else {
1280 return MP_EQ;
1281 }
1282 }
1283
1284 static void
mp_clear_multi(mp_int * mp,...)1285 mp_clear_multi(mp_int *mp, ...)
1286 {
1287 mp_int* next_mp = mp;
1288 va_list args;
1289
1290 va_start(args, mp);
1291 while (next_mp != NULL) {
1292 mp_clear(next_mp);
1293 next_mp = va_arg(args, mp_int*);
1294 }
1295 va_end(args);
1296 }
1297
1298 /* computes the modular inverse via binary extended euclidean algorithm,
1299 * that is c = 1/a mod b
1300 *
1301 * Based on slow invmod except this is optimized for the case where b is
1302 * odd as per HAC Note 14.64 on pp. 610
1303 */
1304 static int
fast_modular_inverse(mp_int * a,mp_int * b,mp_int * c)1305 fast_modular_inverse(mp_int * a, mp_int * b, mp_int * c)
1306 {
1307 mp_int x, y, u, v, B, D;
1308 int res, neg;
1309
1310 /* 2. [modified] b must be odd */
1311 if (MP_ISZERO(b) == MP_YES) {
1312 return MP_VAL;
1313 }
1314
1315 /* init all our temps */
1316 if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {
1317 return res;
1318 }
1319
1320 /* x == modulus, y == value to invert */
1321 if ((res = mp_copy(b, &x)) != MP_OKAY) {
1322 goto LBL_ERR;
1323 }
1324
1325 /* we need y = |a| */
1326 if ((res = modulo(a, b, &y)) != MP_OKAY) {
1327 goto LBL_ERR;
1328 }
1329
1330 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1331 if ((res = mp_copy(&x, &u)) != MP_OKAY) {
1332 goto LBL_ERR;
1333 }
1334 if ((res = mp_copy(&y, &v)) != MP_OKAY) {
1335 goto LBL_ERR;
1336 }
1337 set_word(&D, 1);
1338
1339 top:
1340 /* 4. while u is even do */
1341 while (PGPV_BN_is_even(&u) == 1) {
1342 /* 4.1 u = u/2 */
1343 if ((res = half(&u, &u)) != MP_OKAY) {
1344 goto LBL_ERR;
1345 }
1346 /* 4.2 if B is odd then */
1347 if (PGPV_BN_is_odd(&B) == 1) {
1348 if ((res = signed_subtract(&B, &x, &B)) != MP_OKAY) {
1349 goto LBL_ERR;
1350 }
1351 }
1352 /* B = B/2 */
1353 if ((res = half(&B, &B)) != MP_OKAY) {
1354 goto LBL_ERR;
1355 }
1356 }
1357
1358 /* 5. while v is even do */
1359 while (PGPV_BN_is_even(&v) == 1) {
1360 /* 5.1 v = v/2 */
1361 if ((res = half(&v, &v)) != MP_OKAY) {
1362 goto LBL_ERR;
1363 }
1364 /* 5.2 if D is odd then */
1365 if (PGPV_BN_is_odd(&D) == 1) {
1366 /* D = (D-x)/2 */
1367 if ((res = signed_subtract(&D, &x, &D)) != MP_OKAY) {
1368 goto LBL_ERR;
1369 }
1370 }
1371 /* D = D/2 */
1372 if ((res = half(&D, &D)) != MP_OKAY) {
1373 goto LBL_ERR;
1374 }
1375 }
1376
1377 /* 6. if u >= v then */
1378 if (signed_compare(&u, &v) != MP_LT) {
1379 /* u = u - v, B = B - D */
1380 if ((res = signed_subtract(&u, &v, &u)) != MP_OKAY) {
1381 goto LBL_ERR;
1382 }
1383
1384 if ((res = signed_subtract(&B, &D, &B)) != MP_OKAY) {
1385 goto LBL_ERR;
1386 }
1387 } else {
1388 /* v - v - u, D = D - B */
1389 if ((res = signed_subtract(&v, &u, &v)) != MP_OKAY) {
1390 goto LBL_ERR;
1391 }
1392
1393 if ((res = signed_subtract(&D, &B, &D)) != MP_OKAY) {
1394 goto LBL_ERR;
1395 }
1396 }
1397
1398 /* if not zero goto step 4 */
1399 if (MP_ISZERO(&u) == MP_NO) {
1400 goto top;
1401 }
1402
1403 /* now a = C, b = D, gcd == g*v */
1404
1405 /* if v != 1 then there is no inverse */
1406 if (compare_digit(&v, 1) != MP_EQ) {
1407 res = MP_VAL;
1408 goto LBL_ERR;
1409 }
1410
1411 /* b is now the inverse */
1412 neg = a->sign;
1413 while (D.sign == MP_NEG) {
1414 if ((res = signed_add(&D, b, &D)) != MP_OKAY) {
1415 goto LBL_ERR;
1416 }
1417 }
1418 mp_exch(&D, c);
1419 c->sign = neg;
1420 res = MP_OKAY;
1421
1422 LBL_ERR:
1423 mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
1424 return res;
1425 }
1426
1427 /* hac 14.61, pp608 */
1428 static int
slow_modular_inverse(mp_int * a,mp_int * b,mp_int * c)1429 slow_modular_inverse(mp_int * a, mp_int * b, mp_int * c)
1430 {
1431 mp_int x, y, u, v, A, B, C, D;
1432 int res;
1433
1434 /* b cannot be negative */
1435 if (b->sign == MP_NEG || MP_ISZERO(b) == MP_YES) {
1436 return MP_VAL;
1437 }
1438
1439 /* init temps */
1440 if ((res = mp_init_multi(&x, &y, &u, &v,
1441 &A, &B, &C, &D, NULL)) != MP_OKAY) {
1442 return res;
1443 }
1444
1445 /* x = a, y = b */
1446 if ((res = modulo(a, b, &x)) != MP_OKAY) {
1447 goto LBL_ERR;
1448 }
1449 if ((res = mp_copy(b, &y)) != MP_OKAY) {
1450 goto LBL_ERR;
1451 }
1452
1453 /* 2. [modified] if x,y are both even then return an error! */
1454 if (PGPV_BN_is_even(&x) == 1 && PGPV_BN_is_even(&y) == 1) {
1455 res = MP_VAL;
1456 goto LBL_ERR;
1457 }
1458
1459 /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1460 if ((res = mp_copy(&x, &u)) != MP_OKAY) {
1461 goto LBL_ERR;
1462 }
1463 if ((res = mp_copy(&y, &v)) != MP_OKAY) {
1464 goto LBL_ERR;
1465 }
1466 set_word(&A, 1);
1467 set_word(&D, 1);
1468
1469 top:
1470 /* 4. while u is even do */
1471 while (PGPV_BN_is_even(&u) == 1) {
1472 /* 4.1 u = u/2 */
1473 if ((res = half(&u, &u)) != MP_OKAY) {
1474 goto LBL_ERR;
1475 }
1476 /* 4.2 if A or B is odd then */
1477 if (PGPV_BN_is_odd(&A) == 1 || PGPV_BN_is_odd(&B) == 1) {
1478 /* A = (A+y)/2, B = (B-x)/2 */
1479 if ((res = signed_add(&A, &y, &A)) != MP_OKAY) {
1480 goto LBL_ERR;
1481 }
1482 if ((res = signed_subtract(&B, &x, &B)) != MP_OKAY) {
1483 goto LBL_ERR;
1484 }
1485 }
1486 /* A = A/2, B = B/2 */
1487 if ((res = half(&A, &A)) != MP_OKAY) {
1488 goto LBL_ERR;
1489 }
1490 if ((res = half(&B, &B)) != MP_OKAY) {
1491 goto LBL_ERR;
1492 }
1493 }
1494
1495 /* 5. while v is even do */
1496 while (PGPV_BN_is_even(&v) == 1) {
1497 /* 5.1 v = v/2 */
1498 if ((res = half(&v, &v)) != MP_OKAY) {
1499 goto LBL_ERR;
1500 }
1501 /* 5.2 if C or D is odd then */
1502 if (PGPV_BN_is_odd(&C) == 1 || PGPV_BN_is_odd(&D) == 1) {
1503 /* C = (C+y)/2, D = (D-x)/2 */
1504 if ((res = signed_add(&C, &y, &C)) != MP_OKAY) {
1505 goto LBL_ERR;
1506 }
1507 if ((res = signed_subtract(&D, &x, &D)) != MP_OKAY) {
1508 goto LBL_ERR;
1509 }
1510 }
1511 /* C = C/2, D = D/2 */
1512 if ((res = half(&C, &C)) != MP_OKAY) {
1513 goto LBL_ERR;
1514 }
1515 if ((res = half(&D, &D)) != MP_OKAY) {
1516 goto LBL_ERR;
1517 }
1518 }
1519
1520 /* 6. if u >= v then */
1521 if (signed_compare(&u, &v) != MP_LT) {
1522 /* u = u - v, A = A - C, B = B - D */
1523 if ((res = signed_subtract(&u, &v, &u)) != MP_OKAY) {
1524 goto LBL_ERR;
1525 }
1526
1527 if ((res = signed_subtract(&A, &C, &A)) != MP_OKAY) {
1528 goto LBL_ERR;
1529 }
1530
1531 if ((res = signed_subtract(&B, &D, &B)) != MP_OKAY) {
1532 goto LBL_ERR;
1533 }
1534 } else {
1535 /* v - v - u, C = C - A, D = D - B */
1536 if ((res = signed_subtract(&v, &u, &v)) != MP_OKAY) {
1537 goto LBL_ERR;
1538 }
1539
1540 if ((res = signed_subtract(&C, &A, &C)) != MP_OKAY) {
1541 goto LBL_ERR;
1542 }
1543
1544 if ((res = signed_subtract(&D, &B, &D)) != MP_OKAY) {
1545 goto LBL_ERR;
1546 }
1547 }
1548
1549 /* if not zero goto step 4 */
1550 if (PGPV_BN_is_zero(&u) == 0) {
1551 goto top;
1552 }
1553 /* now a = C, b = D, gcd == g*v */
1554
1555 /* if v != 1 then there is no inverse */
1556 if (compare_digit(&v, 1) != MP_EQ) {
1557 res = MP_VAL;
1558 goto LBL_ERR;
1559 }
1560
1561 /* if its too low */
1562 while (compare_digit(&C, 0) == MP_LT) {
1563 if ((res = signed_add(&C, b, &C)) != MP_OKAY) {
1564 goto LBL_ERR;
1565 }
1566 }
1567
1568 /* too big */
1569 while (compare_magnitude(&C, b) != MP_LT) {
1570 if ((res = signed_subtract(&C, b, &C)) != MP_OKAY) {
1571 goto LBL_ERR;
1572 }
1573 }
1574
1575 /* C is now the inverse */
1576 mp_exch(&C, c);
1577 res = MP_OKAY;
1578 LBL_ERR:
1579 mp_clear_multi(&x, &y, &u, &v, &A, &B, &C, &D, NULL);
1580 return res;
1581 }
1582
1583 static int
modular_inverse(mp_int * c,mp_int * a,mp_int * b)1584 modular_inverse(mp_int *c, mp_int *a, mp_int *b)
1585 {
1586 /* b cannot be negative */
1587 if (b->sign == MP_NEG || MP_ISZERO(b) == MP_YES) {
1588 return MP_VAL;
1589 }
1590
1591 /* if the modulus is odd we can use a faster routine instead */
1592 if (PGPV_BN_is_odd(b) == 1) {
1593 return fast_modular_inverse(a, b, c);
1594 }
1595 return slow_modular_inverse(a, b, c);
1596 }
1597
1598 /* b = |a|
1599 *
1600 * Simple function copies the input and fixes the sign to positive
1601 */
1602 static int
absolute(mp_int * a,mp_int * b)1603 absolute(mp_int * a, mp_int * b)
1604 {
1605 int res;
1606
1607 /* copy a to b */
1608 if (a != b) {
1609 if ((res = mp_copy(a, b)) != MP_OKAY) {
1610 return res;
1611 }
1612 }
1613
1614 /* force the sign of b to positive */
1615 b->sign = MP_ZPOS;
1616
1617 return MP_OKAY;
1618 }
1619
1620 /* determines if reduce_2k_l can be used */
1621 static int
mp_reduce_is_2k_l(mp_int * a)1622 mp_reduce_is_2k_l(mp_int *a)
1623 {
1624 int ix, iy;
1625
1626 if (a->used == 0) {
1627 return MP_NO;
1628 } else if (a->used == 1) {
1629 return MP_YES;
1630 } else if (a->used > 1) {
1631 /* if more than half of the digits are -1 we're sold */
1632 for (iy = ix = 0; ix < a->used; ix++) {
1633 if (a->dp[ix] == MP_MASK) {
1634 ++iy;
1635 }
1636 }
1637 return (iy >= (a->used/2)) ? MP_YES : MP_NO;
1638
1639 }
1640 return MP_NO;
1641 }
1642
1643 /* computes a = 2**b
1644 *
1645 * Simple algorithm which zeroes the int, grows it then just sets one bit
1646 * as required.
1647 */
1648 static int
mp_2expt(mp_int * a,int b)1649 mp_2expt(mp_int * a, int b)
1650 {
1651 int res;
1652
1653 /* zero a as per default */
1654 mp_zero(a);
1655
1656 /* grow a to accomodate the single bit */
1657 if ((res = mp_grow(a, b / DIGIT_BIT + 1)) != MP_OKAY) {
1658 return res;
1659 }
1660
1661 /* set the used count of where the bit will go */
1662 a->used = b / DIGIT_BIT + 1;
1663
1664 /* put the single bit in its place */
1665 a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
1666
1667 return MP_OKAY;
1668 }
1669
1670 /* pre-calculate the value required for Barrett reduction
1671 * For a given modulus "b" it calulates the value required in "a"
1672 */
1673 static int
mp_reduce_setup(mp_int * a,mp_int * b)1674 mp_reduce_setup(mp_int * a, mp_int * b)
1675 {
1676 int res;
1677
1678 if ((res = mp_2expt(a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
1679 return res;
1680 }
1681 return signed_divide(a, NULL, a, b);
1682 }
1683
1684 /* b = a*2 */
1685 static int
doubled(mp_int * a,mp_int * b)1686 doubled(mp_int * a, mp_int * b)
1687 {
1688 int x, res, oldused;
1689
1690 /* grow to accomodate result */
1691 if (b->alloc < a->used + 1) {
1692 if ((res = mp_grow(b, a->used + 1)) != MP_OKAY) {
1693 return res;
1694 }
1695 }
1696
1697 oldused = b->used;
1698 b->used = a->used;
1699
1700 {
1701 mp_digit r, rr, *tmpa, *tmpb;
1702
1703 /* alias for source */
1704 tmpa = a->dp;
1705
1706 /* alias for dest */
1707 tmpb = b->dp;
1708
1709 /* carry */
1710 r = 0;
1711 for (x = 0; x < a->used; x++) {
1712
1713 /* get what will be the *next* carry bit from the
1714 * MSB of the current digit
1715 */
1716 rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
1717
1718 /* now shift up this digit, add in the carry [from the previous] */
1719 *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
1720
1721 /* copy the carry that would be from the source
1722 * digit into the next iteration
1723 */
1724 r = rr;
1725 }
1726
1727 /* new leading digit? */
1728 if (r != 0) {
1729 /* add a MSB which is always 1 at this point */
1730 *tmpb = 1;
1731 ++(b->used);
1732 }
1733
1734 /* now zero any excess digits on the destination
1735 * that we didn't write to
1736 */
1737 tmpb = b->dp + b->used;
1738 for (x = b->used; x < oldused; x++) {
1739 *tmpb++ = 0;
1740 }
1741 }
1742 b->sign = a->sign;
1743 return MP_OKAY;
1744 }
1745
1746 /* divide by three (based on routine from MPI and the GMP manual) */
1747 static int
third(mp_int * a,mp_int * c,mp_digit * d)1748 third(mp_int * a, mp_int *c, mp_digit * d)
1749 {
1750 mp_int q;
1751 mp_word w, t;
1752 mp_digit b;
1753 int res, ix;
1754
1755 /* b = 2**DIGIT_BIT / 3 */
1756 b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3);
1757
1758 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
1759 return res;
1760 }
1761
1762 q.used = a->used;
1763 q.sign = a->sign;
1764 w = 0;
1765 for (ix = a->used - 1; ix >= 0; ix--) {
1766 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
1767
1768 if (w >= 3) {
1769 /* multiply w by [1/3] */
1770 t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
1771
1772 /* now subtract 3 * [w/3] from w, to get the remainder */
1773 w -= t+t+t;
1774
1775 /* fixup the remainder as required since
1776 * the optimization is not exact.
1777 */
1778 while (w >= 3) {
1779 t += 1;
1780 w -= 3;
1781 }
1782 } else {
1783 t = 0;
1784 }
1785 q.dp[ix] = (mp_digit)t;
1786 }
1787
1788 /* [optional] store the remainder */
1789 if (d != NULL) {
1790 *d = (mp_digit)w;
1791 }
1792
1793 /* [optional] store the quotient */
1794 if (c != NULL) {
1795 trim_unused_digits(&q);
1796 mp_exch(&q, c);
1797 }
1798 mp_clear(&q);
1799
1800 return res;
1801 }
1802
1803 /* multiplication using the Toom-Cook 3-way algorithm
1804 *
1805 * Much more complicated than Karatsuba but has a lower
1806 * asymptotic running time of O(N**1.464). This algorithm is
1807 * only particularly useful on VERY large inputs
1808 * (we're talking 1000s of digits here...).
1809 */
1810 static int
toom_cook_multiply(mp_int * a,mp_int * b,mp_int * c)1811 toom_cook_multiply(mp_int *a, mp_int *b, mp_int *c)
1812 {
1813 mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2;
1814 int res, B;
1815
1816 /* init temps */
1817 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4,
1818 &a0, &a1, &a2, &b0, &b1,
1819 &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) {
1820 return res;
1821 }
1822
1823 /* B */
1824 B = MIN(a->used, b->used) / 3;
1825
1826 /* a = a2 * B**2 + a1 * B + a0 */
1827 if ((res = modulo_2_to_power(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
1828 goto ERR;
1829 }
1830
1831 if ((res = mp_copy(a, &a1)) != MP_OKAY) {
1832 goto ERR;
1833 }
1834 rshift_digits(&a1, B);
1835 modulo_2_to_power(&a1, DIGIT_BIT * B, &a1);
1836
1837 if ((res = mp_copy(a, &a2)) != MP_OKAY) {
1838 goto ERR;
1839 }
1840 rshift_digits(&a2, B*2);
1841
1842 /* b = b2 * B**2 + b1 * B + b0 */
1843 if ((res = modulo_2_to_power(b, DIGIT_BIT * B, &b0)) != MP_OKAY) {
1844 goto ERR;
1845 }
1846
1847 if ((res = mp_copy(b, &b1)) != MP_OKAY) {
1848 goto ERR;
1849 }
1850 rshift_digits(&b1, B);
1851 modulo_2_to_power(&b1, DIGIT_BIT * B, &b1);
1852
1853 if ((res = mp_copy(b, &b2)) != MP_OKAY) {
1854 goto ERR;
1855 }
1856 rshift_digits(&b2, B*2);
1857
1858 /* w0 = a0*b0 */
1859 if ((res = signed_multiply(&a0, &b0, &w0)) != MP_OKAY) {
1860 goto ERR;
1861 }
1862
1863 /* w4 = a2 * b2 */
1864 if ((res = signed_multiply(&a2, &b2, &w4)) != MP_OKAY) {
1865 goto ERR;
1866 }
1867
1868 /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */
1869 if ((res = doubled(&a0, &tmp1)) != MP_OKAY) {
1870 goto ERR;
1871 }
1872 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
1873 goto ERR;
1874 }
1875 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) {
1876 goto ERR;
1877 }
1878 if ((res = signed_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
1879 goto ERR;
1880 }
1881
1882 if ((res = doubled(&b0, &tmp2)) != MP_OKAY) {
1883 goto ERR;
1884 }
1885 if ((res = signed_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
1886 goto ERR;
1887 }
1888 if ((res = doubled(&tmp2, &tmp2)) != MP_OKAY) {
1889 goto ERR;
1890 }
1891 if ((res = signed_add(&tmp2, &b2, &tmp2)) != MP_OKAY) {
1892 goto ERR;
1893 }
1894
1895 if ((res = signed_multiply(&tmp1, &tmp2, &w1)) != MP_OKAY) {
1896 goto ERR;
1897 }
1898
1899 /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */
1900 if ((res = doubled(&a2, &tmp1)) != MP_OKAY) {
1901 goto ERR;
1902 }
1903 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
1904 goto ERR;
1905 }
1906 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) {
1907 goto ERR;
1908 }
1909 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
1910 goto ERR;
1911 }
1912
1913 if ((res = doubled(&b2, &tmp2)) != MP_OKAY) {
1914 goto ERR;
1915 }
1916 if ((res = signed_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
1917 goto ERR;
1918 }
1919 if ((res = doubled(&tmp2, &tmp2)) != MP_OKAY) {
1920 goto ERR;
1921 }
1922 if ((res = signed_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
1923 goto ERR;
1924 }
1925
1926 if ((res = signed_multiply(&tmp1, &tmp2, &w3)) != MP_OKAY) {
1927 goto ERR;
1928 }
1929
1930
1931 /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */
1932 if ((res = signed_add(&a2, &a1, &tmp1)) != MP_OKAY) {
1933 goto ERR;
1934 }
1935 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
1936 goto ERR;
1937 }
1938 if ((res = signed_add(&b2, &b1, &tmp2)) != MP_OKAY) {
1939 goto ERR;
1940 }
1941 if ((res = signed_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
1942 goto ERR;
1943 }
1944 if ((res = signed_multiply(&tmp1, &tmp2, &w2)) != MP_OKAY) {
1945 goto ERR;
1946 }
1947
1948 /* now solve the matrix
1949
1950 0 0 0 0 1
1951 1 2 4 8 16
1952 1 1 1 1 1
1953 16 8 4 2 1
1954 1 0 0 0 0
1955
1956 using 12 subtractions, 4 shifts,
1957 2 small divisions and 1 small multiplication
1958 */
1959
1960 /* r1 - r4 */
1961 if ((res = signed_subtract(&w1, &w4, &w1)) != MP_OKAY) {
1962 goto ERR;
1963 }
1964 /* r3 - r0 */
1965 if ((res = signed_subtract(&w3, &w0, &w3)) != MP_OKAY) {
1966 goto ERR;
1967 }
1968 /* r1/2 */
1969 if ((res = half(&w1, &w1)) != MP_OKAY) {
1970 goto ERR;
1971 }
1972 /* r3/2 */
1973 if ((res = half(&w3, &w3)) != MP_OKAY) {
1974 goto ERR;
1975 }
1976 /* r2 - r0 - r4 */
1977 if ((res = signed_subtract(&w2, &w0, &w2)) != MP_OKAY) {
1978 goto ERR;
1979 }
1980 if ((res = signed_subtract(&w2, &w4, &w2)) != MP_OKAY) {
1981 goto ERR;
1982 }
1983 /* r1 - r2 */
1984 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) {
1985 goto ERR;
1986 }
1987 /* r3 - r2 */
1988 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) {
1989 goto ERR;
1990 }
1991 /* r1 - 8r0 */
1992 if ((res = lshift_bits(&w0, 3, &tmp1)) != MP_OKAY) {
1993 goto ERR;
1994 }
1995 if ((res = signed_subtract(&w1, &tmp1, &w1)) != MP_OKAY) {
1996 goto ERR;
1997 }
1998 /* r3 - 8r4 */
1999 if ((res = lshift_bits(&w4, 3, &tmp1)) != MP_OKAY) {
2000 goto ERR;
2001 }
2002 if ((res = signed_subtract(&w3, &tmp1, &w3)) != MP_OKAY) {
2003 goto ERR;
2004 }
2005 /* 3r2 - r1 - r3 */
2006 if ((res = multiply_digit(&w2, 3, &w2)) != MP_OKAY) {
2007 goto ERR;
2008 }
2009 if ((res = signed_subtract(&w2, &w1, &w2)) != MP_OKAY) {
2010 goto ERR;
2011 }
2012 if ((res = signed_subtract(&w2, &w3, &w2)) != MP_OKAY) {
2013 goto ERR;
2014 }
2015 /* r1 - r2 */
2016 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) {
2017 goto ERR;
2018 }
2019 /* r3 - r2 */
2020 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) {
2021 goto ERR;
2022 }
2023 /* r1/3 */
2024 if ((res = third(&w1, &w1, NULL)) != MP_OKAY) {
2025 goto ERR;
2026 }
2027 /* r3/3 */
2028 if ((res = third(&w3, &w3, NULL)) != MP_OKAY) {
2029 goto ERR;
2030 }
2031
2032 /* at this point shift W[n] by B*n */
2033 if ((res = lshift_digits(&w1, 1*B)) != MP_OKAY) {
2034 goto ERR;
2035 }
2036 if ((res = lshift_digits(&w2, 2*B)) != MP_OKAY) {
2037 goto ERR;
2038 }
2039 if ((res = lshift_digits(&w3, 3*B)) != MP_OKAY) {
2040 goto ERR;
2041 }
2042 if ((res = lshift_digits(&w4, 4*B)) != MP_OKAY) {
2043 goto ERR;
2044 }
2045
2046 if ((res = signed_add(&w0, &w1, c)) != MP_OKAY) {
2047 goto ERR;
2048 }
2049 if ((res = signed_add(&w2, &w3, &tmp1)) != MP_OKAY) {
2050 goto ERR;
2051 }
2052 if ((res = signed_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
2053 goto ERR;
2054 }
2055 if ((res = signed_add(&tmp1, c, c)) != MP_OKAY) {
2056 goto ERR;
2057 }
2058
2059 ERR:
2060 mp_clear_multi(&w0, &w1, &w2, &w3, &w4,
2061 &a0, &a1, &a2, &b0, &b1,
2062 &b2, &tmp1, &tmp2, NULL);
2063 return res;
2064 }
2065
2066 #define TOOM_MUL_CUTOFF 350
2067 #define KARATSUBA_MUL_CUTOFF 80
2068
2069 /* c = |a| * |b| using Karatsuba Multiplication using
2070 * three half size multiplications
2071 *
2072 * Let B represent the radix [e.g. 2**DIGIT_BIT] and
2073 * let n represent half of the number of digits in
2074 * the min(a,b)
2075 *
2076 * a = a1 * B**n + a0
2077 * b = b1 * B**n + b0
2078 *
2079 * Then, a * b =>
2080 a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0
2081 *
2082 * Note that a1b1 and a0b0 are used twice and only need to be
2083 * computed once. So in total three half size (half # of
2084 * digit) multiplications are performed, a0b0, a1b1 and
2085 * (a1+b1)(a0+b0)
2086 *
2087 * Note that a multiplication of half the digits requires
2088 * 1/4th the number of single precision multiplications so in
2089 * total after one call 25% of the single precision multiplications
2090 * are saved. Note also that the call to signed_multiply can end up back
2091 * in this function if the a0, a1, b0, or b1 are above the threshold.
2092 * This is known as divide-and-conquer and leads to the famous
2093 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than
2094 * the standard O(N**2) that the baseline/comba methods use.
2095 * Generally though the overhead of this method doesn't pay off
2096 * until a certain size (N ~ 80) is reached.
2097 */
2098 static int
karatsuba_multiply(mp_int * a,mp_int * b,mp_int * c)2099 karatsuba_multiply(mp_int * a, mp_int * b, mp_int * c)
2100 {
2101 mp_int x0, x1, y0, y1, t1, x0y0, x1y1;
2102 int B;
2103 int err;
2104
2105 /* default the return code to an error */
2106 err = MP_MEM;
2107
2108 /* min # of digits */
2109 B = MIN(a->used, b->used);
2110
2111 /* now divide in two */
2112 B = (int)((unsigned)B >> 1);
2113
2114 /* init copy all the temps */
2115 if (mp_init_size(&x0, B) != MP_OKAY) {
2116 goto ERR;
2117 }
2118 if (mp_init_size(&x1, a->used - B) != MP_OKAY) {
2119 goto X0;
2120 }
2121 if (mp_init_size(&y0, B) != MP_OKAY) {
2122 goto X1;
2123 }
2124 if (mp_init_size(&y1, b->used - B) != MP_OKAY) {
2125 goto Y0;
2126 }
2127 /* init temps */
2128 if (mp_init_size(&t1, B * 2) != MP_OKAY) {
2129 goto Y1;
2130 }
2131 if (mp_init_size(&x0y0, B * 2) != MP_OKAY) {
2132 goto T1;
2133 }
2134 if (mp_init_size(&x1y1, B * 2) != MP_OKAY) {
2135 goto X0Y0;
2136 }
2137 /* now shift the digits */
2138 x0.used = y0.used = B;
2139 x1.used = a->used - B;
2140 y1.used = b->used - B;
2141
2142 {
2143 int x;
2144 mp_digit *tmpa, *tmpb, *tmpx, *tmpy;
2145
2146 /* we copy the digits directly instead of using higher level functions
2147 * since we also need to shift the digits
2148 */
2149 tmpa = a->dp;
2150 tmpb = b->dp;
2151
2152 tmpx = x0.dp;
2153 tmpy = y0.dp;
2154 for (x = 0; x < B; x++) {
2155 *tmpx++ = *tmpa++;
2156 *tmpy++ = *tmpb++;
2157 }
2158
2159 tmpx = x1.dp;
2160 for (x = B; x < a->used; x++) {
2161 *tmpx++ = *tmpa++;
2162 }
2163
2164 tmpy = y1.dp;
2165 for (x = B; x < b->used; x++) {
2166 *tmpy++ = *tmpb++;
2167 }
2168 }
2169
2170 /* only need to clamp the lower words since by definition the
2171 * upper words x1/y1 must have a known number of digits
2172 */
2173 trim_unused_digits(&x0);
2174 trim_unused_digits(&y0);
2175
2176 /* now calc the products x0y0 and x1y1 */
2177 /* after this x0 is no longer required, free temp [x0==t2]! */
2178 if (signed_multiply(&x0, &y0, &x0y0) != MP_OKAY) {
2179 goto X1Y1; /* x0y0 = x0*y0 */
2180 }
2181 if (signed_multiply(&x1, &y1, &x1y1) != MP_OKAY) {
2182 goto X1Y1; /* x1y1 = x1*y1 */
2183 }
2184 /* now calc x1+x0 and y1+y0 */
2185 if (basic_add(&x1, &x0, &t1) != MP_OKAY) {
2186 goto X1Y1; /* t1 = x1 - x0 */
2187 }
2188 if (basic_add(&y1, &y0, &x0) != MP_OKAY) {
2189 goto X1Y1; /* t2 = y1 - y0 */
2190 }
2191 if (signed_multiply(&t1, &x0, &t1) != MP_OKAY) {
2192 goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */
2193 }
2194 /* add x0y0 */
2195 if (signed_add(&x0y0, &x1y1, &x0) != MP_OKAY) {
2196 goto X1Y1; /* t2 = x0y0 + x1y1 */
2197 }
2198 if (basic_subtract(&t1, &x0, &t1) != MP_OKAY) {
2199 goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */
2200 }
2201 /* shift by B */
2202 if (lshift_digits(&t1, B) != MP_OKAY) {
2203 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
2204 }
2205 if (lshift_digits(&x1y1, B * 2) != MP_OKAY) {
2206 goto X1Y1; /* x1y1 = x1y1 << 2*B */
2207 }
2208 if (signed_add(&x0y0, &t1, &t1) != MP_OKAY) {
2209 goto X1Y1; /* t1 = x0y0 + t1 */
2210 }
2211 if (signed_add(&t1, &x1y1, c) != MP_OKAY) {
2212 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */
2213 }
2214 /* Algorithm succeeded set the return code to MP_OKAY */
2215 err = MP_OKAY;
2216
2217 X1Y1:
2218 mp_clear(&x1y1);
2219 X0Y0:
2220 mp_clear(&x0y0);
2221 T1:
2222 mp_clear(&t1);
2223 Y1:
2224 mp_clear(&y1);
2225 Y0:
2226 mp_clear(&y0);
2227 X1:
2228 mp_clear(&x1);
2229 X0:
2230 mp_clear(&x0);
2231 ERR:
2232 return err;
2233 }
2234
2235 /* Fast (comba) multiplier
2236 *
2237 * This is the fast column-array [comba] multiplier. It is
2238 * designed to compute the columns of the product first
2239 * then handle the carries afterwards. This has the effect
2240 * of making the nested loops that compute the columns very
2241 * simple and schedulable on super-scalar processors.
2242 *
2243 * This has been modified to produce a variable number of
2244 * digits of output so if say only a half-product is required
2245 * you don't have to compute the upper half (a feature
2246 * required for fast Barrett reduction).
2247 *
2248 * Based on Algorithm 14.12 on pp.595 of HAC.
2249 *
2250 */
2251 static int
fast_col_array_multiply(mp_int * a,mp_int * b,mp_int * c,int digs)2252 fast_col_array_multiply(mp_int * a, mp_int * b, mp_int * c, int digs)
2253 {
2254 int olduse, res, pa, ix, iz;
2255 /*LINTED*/
2256 mp_digit W[MP_WARRAY];
2257 mp_word _W;
2258
2259 /* grow the destination as required */
2260 if (c->alloc < digs) {
2261 if ((res = mp_grow(c, digs)) != MP_OKAY) {
2262 return res;
2263 }
2264 }
2265
2266 /* number of output digits to produce */
2267 pa = MIN(digs, a->used + b->used);
2268
2269 /* clear the carry */
2270 _W = 0;
2271 for (ix = 0; ix < pa; ix++) {
2272 int tx, ty;
2273 int iy;
2274 mp_digit *tmpx, *tmpy;
2275
2276 /* get offsets into the two bignums */
2277 ty = MIN(b->used-1, ix);
2278 tx = ix - ty;
2279
2280 /* setup temp aliases */
2281 tmpx = a->dp + tx;
2282 tmpy = b->dp + ty;
2283
2284 /* this is the number of times the loop will iterrate, essentially
2285 while (tx++ < a->used && ty-- >= 0) { ... }
2286 */
2287 iy = MIN(a->used-tx, ty+1);
2288
2289 /* execute loop */
2290 for (iz = 0; iz < iy; ++iz) {
2291 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
2292
2293 }
2294
2295 /* store term */
2296 W[ix] = ((mp_digit)_W) & MP_MASK;
2297
2298 /* make next carry */
2299 _W = _W >> ((mp_word)DIGIT_BIT);
2300 }
2301
2302 /* setup dest */
2303 olduse = c->used;
2304 c->used = pa;
2305
2306 {
2307 mp_digit *tmpc;
2308 tmpc = c->dp;
2309 for (ix = 0; ix < pa+1; ix++) {
2310 /* now extract the previous digit [below the carry] */
2311 *tmpc++ = (ix < pa) ? W[ix] : 0;
2312 }
2313
2314 /* clear unused digits [that existed in the old copy of c] */
2315 for (; ix < olduse; ix++) {
2316 *tmpc++ = 0;
2317 }
2318 }
2319 trim_unused_digits(c);
2320 return MP_OKAY;
2321 }
2322
2323 /* return 1 if we can use fast column array multiply */
2324 /*
2325 * The fast multiplier can be used if the output will
2326 * have less than MP_WARRAY digits and the number of
2327 * digits won't affect carry propagation
2328 */
2329 static inline int
can_use_fast_column_array(int ndigits,int used)2330 can_use_fast_column_array(int ndigits, int used)
2331 {
2332 return (((unsigned)ndigits < MP_WARRAY) &&
2333 used < (1 << (unsigned)((CHAR_BIT * sizeof(mp_word)) - (2 * DIGIT_BIT))));
2334 }
2335
2336 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_digs.c,v $ */
2337 /* Revision: 1.2 $ */
2338 /* Date: 2011/03/18 16:22:09 $ */
2339
2340
2341 /* multiplies |a| * |b| and only computes upto digs digits of result
2342 * HAC pp. 595, Algorithm 14.12 Modified so you can control how
2343 * many digits of output are created.
2344 */
2345 static int
basic_multiply_partial_lower(mp_int * a,mp_int * b,mp_int * c,int digs)2346 basic_multiply_partial_lower(mp_int * a, mp_int * b, mp_int * c, int digs)
2347 {
2348 mp_int t;
2349 int res, pa, pb, ix, iy;
2350 mp_digit u;
2351 mp_word r;
2352 mp_digit tmpx, *tmpt, *tmpy;
2353
2354 /* can we use the fast multiplier? */
2355 if (can_use_fast_column_array(digs, MIN(a->used, b->used))) {
2356 return fast_col_array_multiply(a, b, c, digs);
2357 }
2358
2359 if ((res = mp_init_size(&t, digs)) != MP_OKAY) {
2360 return res;
2361 }
2362 t.used = digs;
2363
2364 /* compute the digits of the product directly */
2365 pa = a->used;
2366 for (ix = 0; ix < pa; ix++) {
2367 /* set the carry to zero */
2368 u = 0;
2369
2370 /* limit ourselves to making digs digits of output */
2371 pb = MIN(b->used, digs - ix);
2372
2373 /* setup some aliases */
2374 /* copy of the digit from a used within the nested loop */
2375 tmpx = a->dp[ix];
2376
2377 /* an alias for the destination shifted ix places */
2378 tmpt = t.dp + ix;
2379
2380 /* an alias for the digits of b */
2381 tmpy = b->dp;
2382
2383 /* compute the columns of the output and propagate the carry */
2384 for (iy = 0; iy < pb; iy++) {
2385 /* compute the column as a mp_word */
2386 r = ((mp_word)*tmpt) +
2387 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
2388 ((mp_word) u);
2389
2390 /* the new column is the lower part of the result */
2391 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2392
2393 /* get the carry word from the result */
2394 u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2395 }
2396 /* set carry if it is placed below digs */
2397 if (ix + iy < digs) {
2398 *tmpt = u;
2399 }
2400 }
2401
2402 trim_unused_digits(&t);
2403 mp_exch(&t, c);
2404
2405 mp_clear(&t);
2406 return MP_OKAY;
2407 }
2408
2409 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_digs.c,v $ */
2410 /* Revision: 1.3 $ */
2411 /* Date: 2011/03/18 16:43:04 $ */
2412
2413 /* high level multiplication (handles sign) */
2414 static int
signed_multiply(mp_int * a,mp_int * b,mp_int * c)2415 signed_multiply(mp_int * a, mp_int * b, mp_int * c)
2416 {
2417 int res, neg;
2418
2419 neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
2420 /* use Toom-Cook? */
2421 if (MIN(a->used, b->used) >= TOOM_MUL_CUTOFF) {
2422 res = toom_cook_multiply(a, b, c);
2423 } else if (MIN(a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
2424 /* use Karatsuba? */
2425 res = karatsuba_multiply(a, b, c);
2426 } else {
2427 /* can we use the fast multiplier? */
2428 int digs = a->used + b->used + 1;
2429
2430 if (can_use_fast_column_array(digs, MIN(a->used, b->used))) {
2431 res = fast_col_array_multiply(a, b, c, digs);
2432 } else {
2433 res = basic_multiply_partial_lower(a, b, c, (a)->used + (b)->used + 1);
2434 }
2435 }
2436 c->sign = (c->used > 0) ? neg : MP_ZPOS;
2437 return res;
2438 }
2439
2440 /* this is a modified version of fast_s_mul_digs that only produces
2441 * output digits *above* digs. See the comments for fast_s_mul_digs
2442 * to see how it works.
2443 *
2444 * This is used in the Barrett reduction since for one of the multiplications
2445 * only the higher digits were needed. This essentially halves the work.
2446 *
2447 * Based on Algorithm 14.12 on pp.595 of HAC.
2448 */
2449 static int
fast_basic_multiply_partial_upper(mp_int * a,mp_int * b,mp_int * c,int digs)2450 fast_basic_multiply_partial_upper(mp_int * a, mp_int * b, mp_int * c, int digs)
2451 {
2452 int olduse, res, pa, ix, iz;
2453 mp_digit W[MP_WARRAY];
2454 mp_word _W;
2455
2456 /* grow the destination as required */
2457 pa = a->used + b->used;
2458 if (c->alloc < pa) {
2459 if ((res = mp_grow(c, pa)) != MP_OKAY) {
2460 return res;
2461 }
2462 }
2463
2464 /* number of output digits to produce */
2465 pa = a->used + b->used;
2466 _W = 0;
2467 for (ix = digs; ix < pa; ix++) {
2468 int tx, ty, iy;
2469 mp_digit *tmpx, *tmpy;
2470
2471 /* get offsets into the two bignums */
2472 ty = MIN(b->used-1, ix);
2473 tx = ix - ty;
2474
2475 /* setup temp aliases */
2476 tmpx = a->dp + tx;
2477 tmpy = b->dp + ty;
2478
2479 /* this is the number of times the loop will iterrate, essentially its
2480 while (tx++ < a->used && ty-- >= 0) { ... }
2481 */
2482 iy = MIN(a->used-tx, ty+1);
2483
2484 /* execute loop */
2485 for (iz = 0; iz < iy; iz++) {
2486 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
2487 }
2488
2489 /* store term */
2490 W[ix] = ((mp_digit)_W) & MP_MASK;
2491
2492 /* make next carry */
2493 _W = _W >> ((mp_word)DIGIT_BIT);
2494 }
2495
2496 /* setup dest */
2497 olduse = c->used;
2498 c->used = pa;
2499
2500 {
2501 mp_digit *tmpc;
2502
2503 tmpc = c->dp + digs;
2504 for (ix = digs; ix < pa; ix++) {
2505 /* now extract the previous digit [below the carry] */
2506 *tmpc++ = W[ix];
2507 }
2508
2509 /* clear unused digits [that existed in the old copy of c] */
2510 for (; ix < olduse; ix++) {
2511 *tmpc++ = 0;
2512 }
2513 }
2514 trim_unused_digits(c);
2515 return MP_OKAY;
2516 }
2517
2518 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_high_digs.c,v $ */
2519 /* Revision: 1.1.1.1 $ */
2520 /* Date: 2011/03/12 22:58:18 $ */
2521
2522 /* multiplies |a| * |b| and does not compute the lower digs digits
2523 * [meant to get the higher part of the product]
2524 */
2525 static int
basic_multiply_partial_upper(mp_int * a,mp_int * b,mp_int * c,int digs)2526 basic_multiply_partial_upper(mp_int * a, mp_int * b, mp_int * c, int digs)
2527 {
2528 mp_int t;
2529 int res, pa, pb, ix, iy;
2530 mp_digit carry;
2531 mp_word r;
2532 mp_digit tmpx, *tmpt, *tmpy;
2533
2534 /* can we use the fast multiplier? */
2535 if (can_use_fast_column_array(a->used + b->used + 1, MIN(a->used, b->used))) {
2536 return fast_basic_multiply_partial_upper(a, b, c, digs);
2537 }
2538
2539 if ((res = mp_init_size(&t, a->used + b->used + 1)) != MP_OKAY) {
2540 return res;
2541 }
2542 t.used = a->used + b->used + 1;
2543
2544 pa = a->used;
2545 pb = b->used;
2546 for (ix = 0; ix < pa; ix++) {
2547 /* clear the carry */
2548 carry = 0;
2549
2550 /* left hand side of A[ix] * B[iy] */
2551 tmpx = a->dp[ix];
2552
2553 /* alias to the address of where the digits will be stored */
2554 tmpt = &(t.dp[digs]);
2555
2556 /* alias for where to read the right hand side from */
2557 tmpy = b->dp + (digs - ix);
2558
2559 for (iy = digs - ix; iy < pb; iy++) {
2560 /* calculate the double precision result */
2561 r = ((mp_word)*tmpt) +
2562 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
2563 ((mp_word) carry);
2564
2565 /* get the lower part */
2566 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2567
2568 /* carry the carry */
2569 carry = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2570 }
2571 *tmpt = carry;
2572 }
2573 trim_unused_digits(&t);
2574 mp_exch(&t, c);
2575 mp_clear(&t);
2576 return MP_OKAY;
2577 }
2578
2579 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_high_digs.c,v $ */
2580 /* Revision: 1.3 $ */
2581 /* Date: 2011/03/18 16:43:04 $ */
2582
2583 /* reduces x mod m, assumes 0 < x < m**2, mu is
2584 * precomputed via mp_reduce_setup.
2585 * From HAC pp.604 Algorithm 14.42
2586 */
2587 static int
mp_reduce(mp_int * x,mp_int * m,mp_int * mu)2588 mp_reduce(mp_int * x, mp_int * m, mp_int * mu)
2589 {
2590 mp_int q;
2591 int res, um = m->used;
2592
2593 /* q = x */
2594 if ((res = mp_init_copy(&q, x)) != MP_OKAY) {
2595 return res;
2596 }
2597
2598 /* q1 = x / b**(k-1) */
2599 rshift_digits(&q, um - 1);
2600
2601 /* according to HAC this optimization is ok */
2602 if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
2603 if ((res = signed_multiply(&q, mu, &q)) != MP_OKAY) {
2604 goto CLEANUP;
2605 }
2606 } else {
2607 if ((res = basic_multiply_partial_upper(&q, mu, &q, um)) != MP_OKAY) {
2608 goto CLEANUP;
2609 }
2610 }
2611
2612 /* q3 = q2 / b**(k+1) */
2613 rshift_digits(&q, um + 1);
2614
2615 /* x = x mod b**(k+1), quick (no division) */
2616 if ((res = modulo_2_to_power(x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
2617 goto CLEANUP;
2618 }
2619
2620 /* q = q * m mod b**(k+1), quick (no division) */
2621 if ((res = basic_multiply_partial_lower(&q, m, &q, um + 1)) != MP_OKAY) {
2622 goto CLEANUP;
2623 }
2624
2625 /* x = x - q */
2626 if ((res = signed_subtract(x, &q, x)) != MP_OKAY) {
2627 goto CLEANUP;
2628 }
2629
2630 /* If x < 0, add b**(k+1) to it */
2631 if (compare_digit(x, 0) == MP_LT) {
2632 set_word(&q, 1);
2633 if ((res = lshift_digits(&q, um + 1)) != MP_OKAY) {
2634 goto CLEANUP;
2635 }
2636 if ((res = signed_add(x, &q, x)) != MP_OKAY) {
2637 goto CLEANUP;
2638 }
2639 }
2640
2641 /* Back off if it's too big */
2642 while (signed_compare(x, m) != MP_LT) {
2643 if ((res = basic_subtract(x, m, x)) != MP_OKAY) {
2644 goto CLEANUP;
2645 }
2646 }
2647
2648 CLEANUP:
2649 mp_clear(&q);
2650
2651 return res;
2652 }
2653
2654 /* determines the setup value */
2655 static int
mp_reduce_2k_setup_l(mp_int * a,mp_int * d)2656 mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
2657 {
2658 int res;
2659 mp_int tmp;
2660
2661 if ((res = mp_init(&tmp)) != MP_OKAY) {
2662 return res;
2663 }
2664
2665 if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
2666 goto ERR;
2667 }
2668
2669 if ((res = basic_subtract(&tmp, a, d)) != MP_OKAY) {
2670 goto ERR;
2671 }
2672
2673 ERR:
2674 mp_clear(&tmp);
2675 return res;
2676 }
2677
2678 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup_l.c,v $ */
2679 /* Revision: 1.1.1.1 $ */
2680 /* Date: 2011/03/12 22:58:18 $ */
2681
2682 /* reduces a modulo n where n is of the form 2**p - d
2683 This differs from reduce_2k since "d" can be larger
2684 than a single digit.
2685 */
2686 static int
mp_reduce_2k_l(mp_int * a,mp_int * n,mp_int * d)2687 mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
2688 {
2689 mp_int q;
2690 int p, res;
2691
2692 if ((res = mp_init(&q)) != MP_OKAY) {
2693 return res;
2694 }
2695
2696 p = mp_count_bits(n);
2697 top:
2698 /* q = a/2**p, a = a mod 2**p */
2699 if ((res = rshift_bits(a, p, &q, a)) != MP_OKAY) {
2700 goto ERR;
2701 }
2702
2703 /* q = q * d */
2704 if ((res = signed_multiply(&q, d, &q)) != MP_OKAY) {
2705 goto ERR;
2706 }
2707
2708 /* a = a + q */
2709 if ((res = basic_add(a, &q, a)) != MP_OKAY) {
2710 goto ERR;
2711 }
2712
2713 if (compare_magnitude(a, n) != MP_LT) {
2714 basic_subtract(a, n, a);
2715 goto top;
2716 }
2717
2718 ERR:
2719 mp_clear(&q);
2720 return res;
2721 }
2722
2723 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_l.c,v $ */
2724 /* Revision: 1.1.1.1 $ */
2725 /* Date: 2011/03/12 22:58:18 $ */
2726
2727 /* squaring using Toom-Cook 3-way algorithm */
2728 static int
toom_cook_square(mp_int * a,mp_int * b)2729 toom_cook_square(mp_int *a, mp_int *b)
2730 {
2731 mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2;
2732 int res, B;
2733
2734 /* init temps */
2735 if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) {
2736 return res;
2737 }
2738
2739 /* B */
2740 B = a->used / 3;
2741
2742 /* a = a2 * B**2 + a1 * B + a0 */
2743 if ((res = modulo_2_to_power(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
2744 goto ERR;
2745 }
2746
2747 if ((res = mp_copy(a, &a1)) != MP_OKAY) {
2748 goto ERR;
2749 }
2750 rshift_digits(&a1, B);
2751 modulo_2_to_power(&a1, DIGIT_BIT * B, &a1);
2752
2753 if ((res = mp_copy(a, &a2)) != MP_OKAY) {
2754 goto ERR;
2755 }
2756 rshift_digits(&a2, B*2);
2757
2758 /* w0 = a0*a0 */
2759 if ((res = square(&a0, &w0)) != MP_OKAY) {
2760 goto ERR;
2761 }
2762
2763 /* w4 = a2 * a2 */
2764 if ((res = square(&a2, &w4)) != MP_OKAY) {
2765 goto ERR;
2766 }
2767
2768 /* w1 = (a2 + 2(a1 + 2a0))**2 */
2769 if ((res = doubled(&a0, &tmp1)) != MP_OKAY) {
2770 goto ERR;
2771 }
2772 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
2773 goto ERR;
2774 }
2775 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) {
2776 goto ERR;
2777 }
2778 if ((res = signed_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
2779 goto ERR;
2780 }
2781
2782 if ((res = square(&tmp1, &w1)) != MP_OKAY) {
2783 goto ERR;
2784 }
2785
2786 /* w3 = (a0 + 2(a1 + 2a2))**2 */
2787 if ((res = doubled(&a2, &tmp1)) != MP_OKAY) {
2788 goto ERR;
2789 }
2790 if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
2791 goto ERR;
2792 }
2793 if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) {
2794 goto ERR;
2795 }
2796 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
2797 goto ERR;
2798 }
2799
2800 if ((res = square(&tmp1, &w3)) != MP_OKAY) {
2801 goto ERR;
2802 }
2803
2804
2805 /* w2 = (a2 + a1 + a0)**2 */
2806 if ((res = signed_add(&a2, &a1, &tmp1)) != MP_OKAY) {
2807 goto ERR;
2808 }
2809 if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
2810 goto ERR;
2811 }
2812 if ((res = square(&tmp1, &w2)) != MP_OKAY) {
2813 goto ERR;
2814 }
2815
2816 /* now solve the matrix
2817
2818 0 0 0 0 1
2819 1 2 4 8 16
2820 1 1 1 1 1
2821 16 8 4 2 1
2822 1 0 0 0 0
2823
2824 using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication.
2825 */
2826
2827 /* r1 - r4 */
2828 if ((res = signed_subtract(&w1, &w4, &w1)) != MP_OKAY) {
2829 goto ERR;
2830 }
2831 /* r3 - r0 */
2832 if ((res = signed_subtract(&w3, &w0, &w3)) != MP_OKAY) {
2833 goto ERR;
2834 }
2835 /* r1/2 */
2836 if ((res = half(&w1, &w1)) != MP_OKAY) {
2837 goto ERR;
2838 }
2839 /* r3/2 */
2840 if ((res = half(&w3, &w3)) != MP_OKAY) {
2841 goto ERR;
2842 }
2843 /* r2 - r0 - r4 */
2844 if ((res = signed_subtract(&w2, &w0, &w2)) != MP_OKAY) {
2845 goto ERR;
2846 }
2847 if ((res = signed_subtract(&w2, &w4, &w2)) != MP_OKAY) {
2848 goto ERR;
2849 }
2850 /* r1 - r2 */
2851 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) {
2852 goto ERR;
2853 }
2854 /* r3 - r2 */
2855 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) {
2856 goto ERR;
2857 }
2858 /* r1 - 8r0 */
2859 if ((res = lshift_bits(&w0, 3, &tmp1)) != MP_OKAY) {
2860 goto ERR;
2861 }
2862 if ((res = signed_subtract(&w1, &tmp1, &w1)) != MP_OKAY) {
2863 goto ERR;
2864 }
2865 /* r3 - 8r4 */
2866 if ((res = lshift_bits(&w4, 3, &tmp1)) != MP_OKAY) {
2867 goto ERR;
2868 }
2869 if ((res = signed_subtract(&w3, &tmp1, &w3)) != MP_OKAY) {
2870 goto ERR;
2871 }
2872 /* 3r2 - r1 - r3 */
2873 if ((res = multiply_digit(&w2, 3, &w2)) != MP_OKAY) {
2874 goto ERR;
2875 }
2876 if ((res = signed_subtract(&w2, &w1, &w2)) != MP_OKAY) {
2877 goto ERR;
2878 }
2879 if ((res = signed_subtract(&w2, &w3, &w2)) != MP_OKAY) {
2880 goto ERR;
2881 }
2882 /* r1 - r2 */
2883 if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) {
2884 goto ERR;
2885 }
2886 /* r3 - r2 */
2887 if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) {
2888 goto ERR;
2889 }
2890 /* r1/3 */
2891 if ((res = third(&w1, &w1, NULL)) != MP_OKAY) {
2892 goto ERR;
2893 }
2894 /* r3/3 */
2895 if ((res = third(&w3, &w3, NULL)) != MP_OKAY) {
2896 goto ERR;
2897 }
2898
2899 /* at this point shift W[n] by B*n */
2900 if ((res = lshift_digits(&w1, 1*B)) != MP_OKAY) {
2901 goto ERR;
2902 }
2903 if ((res = lshift_digits(&w2, 2*B)) != MP_OKAY) {
2904 goto ERR;
2905 }
2906 if ((res = lshift_digits(&w3, 3*B)) != MP_OKAY) {
2907 goto ERR;
2908 }
2909 if ((res = lshift_digits(&w4, 4*B)) != MP_OKAY) {
2910 goto ERR;
2911 }
2912
2913 if ((res = signed_add(&w0, &w1, b)) != MP_OKAY) {
2914 goto ERR;
2915 }
2916 if ((res = signed_add(&w2, &w3, &tmp1)) != MP_OKAY) {
2917 goto ERR;
2918 }
2919 if ((res = signed_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
2920 goto ERR;
2921 }
2922 if ((res = signed_add(&tmp1, b, b)) != MP_OKAY) {
2923 goto ERR;
2924 }
2925
2926 ERR:
2927 mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL);
2928 return res;
2929 }
2930
2931
2932 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_toom_sqr.c,v $ */
2933 /* Revision: 1.1.1.1 $ */
2934 /* Date: 2011/03/12 22:58:18 $ */
2935
2936 /* Karatsuba squaring, computes b = a*a using three
2937 * half size squarings
2938 *
2939 * See comments of karatsuba_mul for details. It
2940 * is essentially the same algorithm but merely
2941 * tuned to perform recursive squarings.
2942 */
2943 static int
karatsuba_square(mp_int * a,mp_int * b)2944 karatsuba_square(mp_int * a, mp_int * b)
2945 {
2946 mp_int x0, x1, t1, t2, x0x0, x1x1;
2947 int B, err;
2948
2949 err = MP_MEM;
2950
2951 /* min # of digits */
2952 B = a->used;
2953
2954 /* now divide in two */
2955 B = (unsigned)B >> 1;
2956
2957 /* init copy all the temps */
2958 if (mp_init_size(&x0, B) != MP_OKAY) {
2959 goto ERR;
2960 }
2961 if (mp_init_size(&x1, a->used - B) != MP_OKAY) {
2962 goto X0;
2963 }
2964 /* init temps */
2965 if (mp_init_size(&t1, a->used * 2) != MP_OKAY) {
2966 goto X1;
2967 }
2968 if (mp_init_size(&t2, a->used * 2) != MP_OKAY) {
2969 goto T1;
2970 }
2971 if (mp_init_size(&x0x0, B * 2) != MP_OKAY) {
2972 goto T2;
2973 }
2974 if (mp_init_size(&x1x1, (a->used - B) * 2) != MP_OKAY) {
2975 goto X0X0;
2976 }
2977
2978 memcpy(x0.dp, a->dp, B * sizeof(*x0.dp));
2979 memcpy(x1.dp, &a->dp[B], (a->used - B) * sizeof(*x1.dp));
2980
2981 x0.used = B;
2982 x1.used = a->used - B;
2983
2984 trim_unused_digits(&x0);
2985
2986 /* now calc the products x0*x0 and x1*x1 */
2987 if (square(&x0, &x0x0) != MP_OKAY) {
2988 goto X1X1; /* x0x0 = x0*x0 */
2989 }
2990 if (square(&x1, &x1x1) != MP_OKAY) {
2991 goto X1X1; /* x1x1 = x1*x1 */
2992 }
2993 /* now calc (x1+x0)**2 */
2994 if (basic_add(&x1, &x0, &t1) != MP_OKAY) {
2995 goto X1X1; /* t1 = x1 - x0 */
2996 }
2997 if (square(&t1, &t1) != MP_OKAY) {
2998 goto X1X1; /* t1 = (x1 - x0) * (x1 - x0) */
2999 }
3000 /* add x0y0 */
3001 if (basic_add(&x0x0, &x1x1, &t2) != MP_OKAY) {
3002 goto X1X1; /* t2 = x0x0 + x1x1 */
3003 }
3004 if (basic_subtract(&t1, &t2, &t1) != MP_OKAY) {
3005 goto X1X1; /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */
3006 }
3007 /* shift by B */
3008 if (lshift_digits(&t1, B) != MP_OKAY) {
3009 goto X1X1; /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
3010 }
3011 if (lshift_digits(&x1x1, B * 2) != MP_OKAY) {
3012 goto X1X1; /* x1x1 = x1x1 << 2*B */
3013 }
3014 if (signed_add(&x0x0, &t1, &t1) != MP_OKAY) {
3015 goto X1X1; /* t1 = x0x0 + t1 */
3016 }
3017 if (signed_add(&t1, &x1x1, b) != MP_OKAY) {
3018 goto X1X1; /* t1 = x0x0 + t1 + x1x1 */
3019 }
3020 err = MP_OKAY;
3021
3022 X1X1:
3023 mp_clear(&x1x1);
3024 X0X0:
3025 mp_clear(&x0x0);
3026 T2:
3027 mp_clear(&t2);
3028 T1:
3029 mp_clear(&t1);
3030 X1:
3031 mp_clear(&x1);
3032 X0:
3033 mp_clear(&x0);
3034 ERR:
3035 return err;
3036 }
3037
3038 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_karatsuba_sqr.c,v $ */
3039 /* Revision: 1.2 $ */
3040 /* Date: 2011/03/12 23:43:54 $ */
3041
3042 /* the jist of squaring...
3043 * you do like mult except the offset of the tmpx [one that
3044 * starts closer to zero] can't equal the offset of tmpy.
3045 * So basically you set up iy like before then you min it with
3046 * (ty-tx) so that it never happens. You double all those
3047 * you add in the inner loop
3048
3049 After that loop you do the squares and add them in.
3050 */
3051
3052 static int
fast_basic_square(mp_int * a,mp_int * b)3053 fast_basic_square(mp_int * a, mp_int * b)
3054 {
3055 int olduse, res, pa, ix, iz;
3056 mp_digit W[MP_WARRAY], *tmpx;
3057 mp_word W1;
3058
3059 /* grow the destination as required */
3060 pa = a->used + a->used;
3061 if (b->alloc < pa) {
3062 if ((res = mp_grow(b, pa)) != MP_OKAY) {
3063 return res;
3064 }
3065 }
3066
3067 /* number of output digits to produce */
3068 W1 = 0;
3069 for (ix = 0; ix < pa; ix++) {
3070 int tx, ty, iy;
3071 mp_word _W;
3072 mp_digit *tmpy;
3073
3074 /* clear counter */
3075 _W = 0;
3076
3077 /* get offsets into the two bignums */
3078 ty = MIN(a->used-1, ix);
3079 tx = ix - ty;
3080
3081 /* setup temp aliases */
3082 tmpx = a->dp + tx;
3083 tmpy = a->dp + ty;
3084
3085 /* this is the number of times the loop will iterrate, essentially
3086 while (tx++ < a->used && ty-- >= 0) { ... }
3087 */
3088 iy = MIN(a->used-tx, ty+1);
3089
3090 /* now for squaring tx can never equal ty
3091 * we halve the distance since they approach at a rate of 2x
3092 * and we have to round because odd cases need to be executed
3093 */
3094 iy = MIN(iy, (int)((unsigned)(ty-tx+1)>>1));
3095
3096 /* execute loop */
3097 for (iz = 0; iz < iy; iz++) {
3098 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
3099 }
3100
3101 /* double the inner product and add carry */
3102 _W = _W + _W + W1;
3103
3104 /* even columns have the square term in them */
3105 if ((ix&1) == 0) {
3106 _W += ((mp_word)a->dp[(unsigned)ix>>1])*((mp_word)a->dp[(unsigned)ix>>1]);
3107 }
3108
3109 /* store it */
3110 W[ix] = (mp_digit)(_W & MP_MASK);
3111
3112 /* make next carry */
3113 W1 = _W >> ((mp_word)DIGIT_BIT);
3114 }
3115
3116 /* setup dest */
3117 olduse = b->used;
3118 b->used = a->used+a->used;
3119
3120 {
3121 mp_digit *tmpb;
3122 tmpb = b->dp;
3123 for (ix = 0; ix < pa; ix++) {
3124 *tmpb++ = W[ix] & MP_MASK;
3125 }
3126
3127 /* clear unused digits [that existed in the old copy of c] */
3128 for (; ix < olduse; ix++) {
3129 *tmpb++ = 0;
3130 }
3131 }
3132 trim_unused_digits(b);
3133 return MP_OKAY;
3134 }
3135
3136 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_sqr.c,v $ */
3137 /* Revision: 1.3 $ */
3138 /* Date: 2011/03/18 16:43:04 $ */
3139
3140 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
3141 static int
basic_square(mp_int * a,mp_int * b)3142 basic_square(mp_int * a, mp_int * b)
3143 {
3144 mp_int t;
3145 int res, ix, iy, pa;
3146 mp_word r;
3147 mp_digit carry, tmpx, *tmpt;
3148
3149 pa = a->used;
3150 if ((res = mp_init_size(&t, 2*pa + 1)) != MP_OKAY) {
3151 return res;
3152 }
3153
3154 /* default used is maximum possible size */
3155 t.used = 2*pa + 1;
3156
3157 for (ix = 0; ix < pa; ix++) {
3158 /* first calculate the digit at 2*ix */
3159 /* calculate double precision result */
3160 r = ((mp_word) t.dp[2*ix]) +
3161 ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
3162
3163 /* store lower part in result */
3164 t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
3165
3166 /* get the carry */
3167 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3168
3169 /* left hand side of A[ix] * A[iy] */
3170 tmpx = a->dp[ix];
3171
3172 /* alias for where to store the results */
3173 tmpt = t.dp + (2*ix + 1);
3174
3175 for (iy = ix + 1; iy < pa; iy++) {
3176 /* first calculate the product */
3177 r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
3178
3179 /* now calculate the double precision result, note we use
3180 * addition instead of *2 since it's easier to optimize
3181 */
3182 r = ((mp_word) *tmpt) + r + r + ((mp_word) carry);
3183
3184 /* store lower part */
3185 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3186
3187 /* get carry */
3188 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3189 }
3190 /* propagate upwards */
3191 while (carry != ((mp_digit) 0)) {
3192 r = ((mp_word) *tmpt) + ((mp_word) carry);
3193 *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3194 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3195 }
3196 }
3197
3198 trim_unused_digits(&t);
3199 mp_exch(&t, b);
3200 mp_clear(&t);
3201 return MP_OKAY;
3202 }
3203
3204 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_sqr.c,v $ */
3205 /* Revision: 1.1.1.1 $ */
3206 /* Date: 2011/03/12 22:58:18 $ */
3207
3208 #define TOOM_SQR_CUTOFF 400
3209 #define KARATSUBA_SQR_CUTOFF 120
3210
3211 /* computes b = a*a */
3212 static int
square(mp_int * a,mp_int * b)3213 square(mp_int * a, mp_int * b)
3214 {
3215 int res;
3216
3217 /* use Toom-Cook? */
3218 if (a->used >= TOOM_SQR_CUTOFF) {
3219 res = toom_cook_square(a, b);
3220 /* Karatsuba? */
3221 } else if (a->used >= KARATSUBA_SQR_CUTOFF) {
3222 res = karatsuba_square(a, b);
3223 } else {
3224 /* can we use the fast comba multiplier? */
3225 if (can_use_fast_column_array(a->used + a->used + 1, a->used)) {
3226 res = fast_basic_square(a, b);
3227 } else {
3228 res = basic_square(a, b);
3229 }
3230 }
3231 b->sign = MP_ZPOS;
3232 return res;
3233 }
3234
3235 /* find window size */
3236 static inline int
find_window_size(mp_int * X)3237 find_window_size(mp_int *X)
3238 {
3239 int x;
3240
3241 x = mp_count_bits(X);
3242 return (x <= 7) ? 2 : (x <= 36) ? 3 : (x <= 140) ? 4 : (x <= 450) ? 5 : (x <= 1303) ? 6 : (x <= 3529) ? 7 : 8;
3243 }
3244
3245 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_sqr.c,v $ */
3246 /* Revision: 1.3 $ */
3247 /* Date: 2011/03/18 16:43:04 $ */
3248
3249 #define TAB_SIZE 256
3250
3251 static int
basic_exponent_mod(mp_int * G,mp_int * X,mp_int * P,mp_int * Y,int redmode)3252 basic_exponent_mod(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
3253 {
3254 mp_digit buf;
3255 mp_int M[TAB_SIZE], res, mu;
3256 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
3257 int (*redux)(mp_int*,mp_int*,mp_int*);
3258
3259 winsize = find_window_size(X);
3260
3261 /* init M array */
3262 /* init first cell */
3263 if ((err = mp_init(&M[1])) != MP_OKAY) {
3264 return err;
3265 }
3266
3267 /* now init the second half of the array */
3268 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3269 if ((err = mp_init(&M[x])) != MP_OKAY) {
3270 for (y = 1<<(winsize-1); y < x; y++) {
3271 mp_clear(&M[y]);
3272 }
3273 mp_clear(&M[1]);
3274 return err;
3275 }
3276 }
3277
3278 /* create mu, used for Barrett reduction */
3279 if ((err = mp_init(&mu)) != MP_OKAY) {
3280 goto LBL_M;
3281 }
3282
3283 if (redmode == 0) {
3284 if ((err = mp_reduce_setup(&mu, P)) != MP_OKAY) {
3285 goto LBL_MU;
3286 }
3287 redux = mp_reduce;
3288 } else {
3289 if ((err = mp_reduce_2k_setup_l(P, &mu)) != MP_OKAY) {
3290 goto LBL_MU;
3291 }
3292 redux = mp_reduce_2k_l;
3293 }
3294
3295 /* create M table
3296 *
3297 * The M table contains powers of the base,
3298 * e.g. M[x] = G**x mod P
3299 *
3300 * The first half of the table is not
3301 * computed though accept for M[0] and M[1]
3302 */
3303 if ((err = modulo(G, P, &M[1])) != MP_OKAY) {
3304 goto LBL_MU;
3305 }
3306
3307 /* compute the value at M[1<<(winsize-1)] by squaring
3308 * M[1] (winsize-1) times
3309 */
3310 if ((err = mp_copy( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
3311 goto LBL_MU;
3312 }
3313
3314 for (x = 0; x < (winsize - 1); x++) {
3315 /* square it */
3316 if ((err = square(&M[1 << (winsize - 1)],
3317 &M[1 << (winsize - 1)])) != MP_OKAY) {
3318 goto LBL_MU;
3319 }
3320
3321 /* reduce modulo P */
3322 if ((err = redux(&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
3323 goto LBL_MU;
3324 }
3325 }
3326
3327 /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
3328 * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
3329 */
3330 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
3331 if ((err = signed_multiply(&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
3332 goto LBL_MU;
3333 }
3334 if ((err = redux(&M[x], P, &mu)) != MP_OKAY) {
3335 goto LBL_MU;
3336 }
3337 }
3338
3339 /* setup result */
3340 if ((err = mp_init(&res)) != MP_OKAY) {
3341 goto LBL_MU;
3342 }
3343 set_word(&res, 1);
3344
3345 /* set initial mode and bit cnt */
3346 mode = 0;
3347 bitcnt = 1;
3348 buf = 0;
3349 digidx = X->used - 1;
3350 bitcpy = 0;
3351 bitbuf = 0;
3352
3353 for (;;) {
3354 /* grab next digit as required */
3355 if (--bitcnt == 0) {
3356 /* if digidx == -1 we are out of digits */
3357 if (digidx == -1) {
3358 break;
3359 }
3360 /* read next digit and reset the bitcnt */
3361 buf = X->dp[digidx--];
3362 bitcnt = (int) DIGIT_BIT;
3363 }
3364
3365 /* grab the next msb from the exponent */
3366 y = (unsigned)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
3367 buf <<= (mp_digit)1;
3368
3369 /* if the bit is zero and mode == 0 then we ignore it
3370 * These represent the leading zero bits before the first 1 bit
3371 * in the exponent. Technically this opt is not required but it
3372 * does lower the # of trivial squaring/reductions used
3373 */
3374 if (mode == 0 && y == 0) {
3375 continue;
3376 }
3377
3378 /* if the bit is zero and mode == 1 then we square */
3379 if (mode == 1 && y == 0) {
3380 if ((err = square(&res, &res)) != MP_OKAY) {
3381 goto LBL_RES;
3382 }
3383 if ((err = redux(&res, P, &mu)) != MP_OKAY) {
3384 goto LBL_RES;
3385 }
3386 continue;
3387 }
3388
3389 /* else we add it to the window */
3390 bitbuf |= (y << (winsize - ++bitcpy));
3391 mode = 2;
3392
3393 if (bitcpy == winsize) {
3394 /* ok window is filled so square as required and multiply */
3395 /* square first */
3396 for (x = 0; x < winsize; x++) {
3397 if ((err = square(&res, &res)) != MP_OKAY) {
3398 goto LBL_RES;
3399 }
3400 if ((err = redux(&res, P, &mu)) != MP_OKAY) {
3401 goto LBL_RES;
3402 }
3403 }
3404
3405 /* then multiply */
3406 if ((err = signed_multiply(&res, &M[bitbuf], &res)) != MP_OKAY) {
3407 goto LBL_RES;
3408 }
3409 if ((err = redux(&res, P, &mu)) != MP_OKAY) {
3410 goto LBL_RES;
3411 }
3412
3413 /* empty window and reset */
3414 bitcpy = 0;
3415 bitbuf = 0;
3416 mode = 1;
3417 }
3418 }
3419
3420 /* if bits remain then square/multiply */
3421 if (mode == 2 && bitcpy > 0) {
3422 /* square then multiply if the bit is set */
3423 for (x = 0; x < bitcpy; x++) {
3424 if ((err = square(&res, &res)) != MP_OKAY) {
3425 goto LBL_RES;
3426 }
3427 if ((err = redux(&res, P, &mu)) != MP_OKAY) {
3428 goto LBL_RES;
3429 }
3430
3431 bitbuf <<= 1;
3432 if ((bitbuf & (1 << winsize)) != 0) {
3433 /* then multiply */
3434 if ((err = signed_multiply(&res, &M[1], &res)) != MP_OKAY) {
3435 goto LBL_RES;
3436 }
3437 if ((err = redux(&res, P, &mu)) != MP_OKAY) {
3438 goto LBL_RES;
3439 }
3440 }
3441 }
3442 }
3443
3444 mp_exch(&res, Y);
3445 err = MP_OKAY;
3446 LBL_RES:
3447 mp_clear(&res);
3448 LBL_MU:
3449 mp_clear(&mu);
3450 LBL_M:
3451 mp_clear(&M[1]);
3452 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3453 mp_clear(&M[x]);
3454 }
3455 return err;
3456 }
3457
3458 /* determines if a number is a valid DR modulus */
3459 static int
is_diminished_radix_modulus(mp_int * a)3460 is_diminished_radix_modulus(mp_int *a)
3461 {
3462 int ix;
3463
3464 /* must be at least two digits */
3465 if (a->used < 2) {
3466 return 0;
3467 }
3468
3469 /* must be of the form b**k - a [a <= b] so all
3470 * but the first digit must be equal to -1 (mod b).
3471 */
3472 for (ix = 1; ix < a->used; ix++) {
3473 if (a->dp[ix] != MP_MASK) {
3474 return 0;
3475 }
3476 }
3477 return 1;
3478 }
3479
3480 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_is_modulus.c,v $ */
3481 /* Revision: 1.1.1.1 $ */
3482 /* Date: 2011/03/12 22:58:18 $ */
3483
3484 /* determines if mp_reduce_2k can be used */
3485 static int
mp_reduce_is_2k(mp_int * a)3486 mp_reduce_is_2k(mp_int *a)
3487 {
3488 int ix, iy, iw;
3489 mp_digit iz;
3490
3491 if (a->used == 0) {
3492 return MP_NO;
3493 }
3494 if (a->used == 1) {
3495 return MP_YES;
3496 }
3497 if (a->used > 1) {
3498 iy = mp_count_bits(a);
3499 iz = 1;
3500 iw = 1;
3501
3502 /* Test every bit from the second digit up, must be 1 */
3503 for (ix = DIGIT_BIT; ix < iy; ix++) {
3504 if ((a->dp[iw] & iz) == 0) {
3505 return MP_NO;
3506 }
3507 iz <<= 1;
3508 if (iz > (mp_digit)MP_MASK) {
3509 ++iw;
3510 iz = 1;
3511 }
3512 }
3513 }
3514 return MP_YES;
3515 }
3516
3517 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_is_2k.c,v $ */
3518 /* Revision: 1.1.1.1 $ */
3519 /* Date: 2011/03/12 22:58:18 $ */
3520
3521
3522 /* d = a * b (mod c) */
3523 static int
multiply_modulo(mp_int * d,mp_int * a,mp_int * b,mp_int * c)3524 multiply_modulo(mp_int *d, mp_int * a, mp_int * b, mp_int * c)
3525 {
3526 mp_int t;
3527 int res;
3528
3529 if ((res = mp_init(&t)) != MP_OKAY) {
3530 return res;
3531 }
3532
3533 if ((res = signed_multiply(a, b, &t)) != MP_OKAY) {
3534 mp_clear(&t);
3535 return res;
3536 }
3537 res = modulo(&t, c, d);
3538 mp_clear(&t);
3539 return res;
3540 }
3541
3542 /* d = a + b (mod c) */
3543 static int
add_modulo(mp_int * d,mp_int * a,mp_int * b,mp_int * c)3544 add_modulo(mp_int *d, mp_int * a, mp_int * b, mp_int * c)
3545 {
3546 mp_int t;
3547 int res;
3548
3549 if ((res = mp_init(&t)) != MP_OKAY) {
3550 return res;
3551 }
3552 if ((res = signed_add(a, b, &t)) != MP_OKAY) {
3553 mp_clear(&t);
3554 return res;
3555 }
3556 res = modulo(&t, c, d);
3557 mp_clear(&t);
3558 return res;
3559 }
3560
3561 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_mulmod.c,v $ */
3562 /* Revision: 1.1.1.1 $ */
3563 /* Date: 2011/03/12 22:58:18 $ */
3564
3565 /* setups the montgomery reduction stuff */
3566 static int
mp_montgomery_setup(mp_int * n,mp_digit * rho)3567 mp_montgomery_setup(mp_int * n, mp_digit * rho)
3568 {
3569 mp_digit x, b;
3570
3571 /* fast inversion mod 2**k
3572 *
3573 * Based on the fact that
3574 *
3575 * XA = 1 (mod 2**n) => (X(2-XA)) A = 1 (mod 2**2n)
3576 * => 2*X*A - X*X*A*A = 1
3577 * => 2*(1) - (1) = 1
3578 */
3579 b = n->dp[0];
3580
3581 if ((b & 1) == 0) {
3582 return MP_VAL;
3583 }
3584
3585 x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
3586 x *= 2 - b * x; /* here x*a==1 mod 2**8 */
3587 x *= 2 - b * x; /* here x*a==1 mod 2**16 */
3588 x *= 2 - b * x; /* here x*a==1 mod 2**32 */
3589 if (/*CONSTCOND*/sizeof(mp_digit) == 8) {
3590 x *= 2 - b * x; /* here x*a==1 mod 2**64 */
3591 }
3592
3593 /* rho = -1/m mod b */
3594 *rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
3595
3596 return MP_OKAY;
3597 }
3598
3599 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_setup.c,v $ */
3600 /* Revision: 1.1.1.1 $ */
3601 /* Date: 2011/03/12 22:58:18 $ */
3602
3603 /* computes xR**-1 == x (mod N) via Montgomery Reduction
3604 *
3605 * This is an optimized implementation of montgomery_reduce
3606 * which uses the comba method to quickly calculate the columns of the
3607 * reduction.
3608 *
3609 * Based on Algorithm 14.32 on pp.601 of HAC.
3610 */
3611 static int
fast_mp_montgomery_reduce(mp_int * x,mp_int * n,mp_digit rho)3612 fast_mp_montgomery_reduce(mp_int * x, mp_int * n, mp_digit rho)
3613 {
3614 int ix, res, olduse;
3615 /*LINTED*/
3616 mp_word W[MP_WARRAY];
3617
3618 /* get old used count */
3619 olduse = x->used;
3620
3621 /* grow a as required */
3622 if (x->alloc < n->used + 1) {
3623 if ((res = mp_grow(x, n->used + 1)) != MP_OKAY) {
3624 return res;
3625 }
3626 }
3627
3628 /* first we have to get the digits of the input into
3629 * an array of double precision words W[...]
3630 */
3631 {
3632 mp_word *_W;
3633 mp_digit *tmpx;
3634
3635 /* alias for the W[] array */
3636 _W = W;
3637
3638 /* alias for the digits of x*/
3639 tmpx = x->dp;
3640
3641 /* copy the digits of a into W[0..a->used-1] */
3642 for (ix = 0; ix < x->used; ix++) {
3643 *_W++ = *tmpx++;
3644 }
3645
3646 /* zero the high words of W[a->used..m->used*2] */
3647 for (; ix < n->used * 2 + 1; ix++) {
3648 *_W++ = 0;
3649 }
3650 }
3651
3652 /* now we proceed to zero successive digits
3653 * from the least significant upwards
3654 */
3655 for (ix = 0; ix < n->used; ix++) {
3656 /* mu = ai * m' mod b
3657 *
3658 * We avoid a double precision multiplication (which isn't required)
3659 * by casting the value down to a mp_digit. Note this requires
3660 * that W[ix-1] have the carry cleared (see after the inner loop)
3661 */
3662 mp_digit mu;
3663 mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
3664
3665 /* a = a + mu * m * b**i
3666 *
3667 * This is computed in place and on the fly. The multiplication
3668 * by b**i is handled by offseting which columns the results
3669 * are added to.
3670 *
3671 * Note the comba method normally doesn't handle carries in the
3672 * inner loop In this case we fix the carry from the previous
3673 * column since the Montgomery reduction requires digits of the
3674 * result (so far) [see above] to work. This is
3675 * handled by fixing up one carry after the inner loop. The
3676 * carry fixups are done in order so after these loops the
3677 * first m->used words of W[] have the carries fixed
3678 */
3679 {
3680 int iy;
3681 mp_digit *tmpn;
3682 mp_word *_W;
3683
3684 /* alias for the digits of the modulus */
3685 tmpn = n->dp;
3686
3687 /* Alias for the columns set by an offset of ix */
3688 _W = W + ix;
3689
3690 /* inner loop */
3691 for (iy = 0; iy < n->used; iy++) {
3692 *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
3693 }
3694 }
3695
3696 /* now fix carry for next digit, W[ix+1] */
3697 W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
3698 }
3699
3700 /* now we have to propagate the carries and
3701 * shift the words downward [all those least
3702 * significant digits we zeroed].
3703 */
3704 {
3705 mp_digit *tmpx;
3706 mp_word *_W, *_W1;
3707
3708 /* nox fix rest of carries */
3709
3710 /* alias for current word */
3711 _W1 = W + ix;
3712
3713 /* alias for next word, where the carry goes */
3714 _W = W + ++ix;
3715
3716 for (; ix <= n->used * 2 + 1; ix++) {
3717 *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
3718 }
3719
3720 /* copy out, A = A/b**n
3721 *
3722 * The result is A/b**n but instead of converting from an
3723 * array of mp_word to mp_digit than calling rshift_digits
3724 * we just copy them in the right order
3725 */
3726
3727 /* alias for destination word */
3728 tmpx = x->dp;
3729
3730 /* alias for shifted double precision result */
3731 _W = W + n->used;
3732
3733 for (ix = 0; ix < n->used + 1; ix++) {
3734 *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
3735 }
3736
3737 /* zero oldused digits, if the input a was larger than
3738 * m->used+1 we'll have to clear the digits
3739 */
3740 for (; ix < olduse; ix++) {
3741 *tmpx++ = 0;
3742 }
3743 }
3744
3745 /* set the max used and clamp */
3746 x->used = n->used + 1;
3747 trim_unused_digits(x);
3748
3749 /* if A >= m then A = A - m */
3750 if (compare_magnitude(x, n) != MP_LT) {
3751 return basic_subtract(x, n, x);
3752 }
3753 return MP_OKAY;
3754 }
3755
3756 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_mp_montgomery_reduce.c,v $ */
3757 /* Revision: 1.2 $ */
3758 /* Date: 2011/03/18 16:22:09 $ */
3759
3760 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
3761 static int
mp_montgomery_reduce(mp_int * x,mp_int * n,mp_digit rho)3762 mp_montgomery_reduce(mp_int * x, mp_int * n, mp_digit rho)
3763 {
3764 int ix, res, digs;
3765 mp_digit mu;
3766
3767 /* can the fast reduction [comba] method be used?
3768 *
3769 * Note that unlike in mul you're safely allowed *less*
3770 * than the available columns [255 per default] since carries
3771 * are fixed up in the inner loop.
3772 */
3773 digs = n->used * 2 + 1;
3774 if (can_use_fast_column_array(digs, n->used)) {
3775 return fast_mp_montgomery_reduce(x, n, rho);
3776 }
3777
3778 /* grow the input as required */
3779 if (x->alloc < digs) {
3780 if ((res = mp_grow(x, digs)) != MP_OKAY) {
3781 return res;
3782 }
3783 }
3784 x->used = digs;
3785
3786 for (ix = 0; ix < n->used; ix++) {
3787 /* mu = ai * rho mod b
3788 *
3789 * The value of rho must be precalculated via
3790 * montgomery_setup() such that
3791 * it equals -1/n0 mod b this allows the
3792 * following inner loop to reduce the
3793 * input one digit at a time
3794 */
3795 mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
3796
3797 /* a = a + mu * m * b**i */
3798 {
3799 int iy;
3800 mp_digit *tmpn, *tmpx, carry;
3801 mp_word r;
3802
3803 /* alias for digits of the modulus */
3804 tmpn = n->dp;
3805
3806 /* alias for the digits of x [the input] */
3807 tmpx = x->dp + ix;
3808
3809 /* set the carry to zero */
3810 carry = 0;
3811
3812 /* Multiply and add in place */
3813 for (iy = 0; iy < n->used; iy++) {
3814 /* compute product and sum */
3815 r = ((mp_word)mu) * ((mp_word)*tmpn++) +
3816 ((mp_word) carry) + ((mp_word) * tmpx);
3817
3818 /* get carry */
3819 carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3820
3821 /* fix digit */
3822 *tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
3823 }
3824 /* At this point the ix'th digit of x should be zero */
3825
3826
3827 /* propagate carries upwards as required*/
3828 while (carry) {
3829 *tmpx += carry;
3830 carry = *tmpx >> DIGIT_BIT;
3831 *tmpx++ &= MP_MASK;
3832 }
3833 }
3834 }
3835
3836 /* at this point the n.used'th least
3837 * significant digits of x are all zero
3838 * which means we can shift x to the
3839 * right by n.used digits and the
3840 * residue is unchanged.
3841 */
3842
3843 /* x = x/b**n.used */
3844 trim_unused_digits(x);
3845 rshift_digits(x, n->used);
3846
3847 /* if x >= n then x = x - n */
3848 if (compare_magnitude(x, n) != MP_LT) {
3849 return basic_subtract(x, n, x);
3850 }
3851
3852 return MP_OKAY;
3853 }
3854
3855 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_reduce.c,v $ */
3856 /* Revision: 1.3 $ */
3857 /* Date: 2011/03/18 16:43:04 $ */
3858
3859 /* determines the setup value */
3860 static void
diminished_radix_setup(mp_int * a,mp_digit * d)3861 diminished_radix_setup(mp_int *a, mp_digit *d)
3862 {
3863 /* the casts are required if DIGIT_BIT is one less than
3864 * the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
3865 */
3866 *d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
3867 ((mp_word)a->dp[0]));
3868 }
3869
3870 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_setup.c,v $ */
3871 /* Revision: 1.1.1.1 $ */
3872 /* Date: 2011/03/12 22:58:18 $ */
3873
3874 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
3875 *
3876 * Based on algorithm from the paper
3877 *
3878 * "Generating Efficient Primes for Discrete Log Cryptosystems"
3879 * Chae Hoon Lim, Pil Joong Lee,
3880 * POSTECH Information Research Laboratories
3881 *
3882 * The modulus must be of a special format [see manual]
3883 *
3884 * Has been modified to use algorithm 7.10 from the LTM book instead
3885 *
3886 * Input x must be in the range 0 <= x <= (n-1)**2
3887 */
3888 static int
diminished_radix_reduce(mp_int * x,mp_int * n,mp_digit k)3889 diminished_radix_reduce(mp_int * x, mp_int * n, mp_digit k)
3890 {
3891 int err, i, m;
3892 mp_word r;
3893 mp_digit mu, *tmpx1, *tmpx2;
3894
3895 /* m = digits in modulus */
3896 m = n->used;
3897
3898 /* ensure that "x" has at least 2m digits */
3899 if (x->alloc < m + m) {
3900 if ((err = mp_grow(x, m + m)) != MP_OKAY) {
3901 return err;
3902 }
3903 }
3904
3905 /* top of loop, this is where the code resumes if
3906 * another reduction pass is required.
3907 */
3908 top:
3909 /* aliases for digits */
3910 /* alias for lower half of x */
3911 tmpx1 = x->dp;
3912
3913 /* alias for upper half of x, or x/B**m */
3914 tmpx2 = x->dp + m;
3915
3916 /* set carry to zero */
3917 mu = 0;
3918
3919 /* compute (x mod B**m) + k * [x/B**m] inline and inplace */
3920 for (i = 0; i < m; i++) {
3921 r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
3922 *tmpx1++ = (mp_digit)(r & MP_MASK);
3923 mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
3924 }
3925
3926 /* set final carry */
3927 *tmpx1++ = mu;
3928
3929 /* zero words above m */
3930 for (i = m + 1; i < x->used; i++) {
3931 *tmpx1++ = 0;
3932 }
3933
3934 /* clamp, sub and return */
3935 trim_unused_digits(x);
3936
3937 /* if x >= n then subtract and reduce again
3938 * Each successive "recursion" makes the input smaller and smaller.
3939 */
3940 if (compare_magnitude(x, n) != MP_LT) {
3941 basic_subtract(x, n, x);
3942 goto top;
3943 }
3944 return MP_OKAY;
3945 }
3946
3947 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_reduce.c,v $ */
3948 /* Revision: 1.1.1.1 $ */
3949 /* Date: 2011/03/12 22:58:18 $ */
3950
3951 /* determines the setup value */
3952 static int
mp_reduce_2k_setup(mp_int * a,mp_digit * d)3953 mp_reduce_2k_setup(mp_int *a, mp_digit *d)
3954 {
3955 int res, p;
3956 mp_int tmp;
3957
3958 if ((res = mp_init(&tmp)) != MP_OKAY) {
3959 return res;
3960 }
3961
3962 p = mp_count_bits(a);
3963 if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
3964 mp_clear(&tmp);
3965 return res;
3966 }
3967
3968 if ((res = basic_subtract(&tmp, a, &tmp)) != MP_OKAY) {
3969 mp_clear(&tmp);
3970 return res;
3971 }
3972
3973 *d = tmp.dp[0];
3974 mp_clear(&tmp);
3975 return MP_OKAY;
3976 }
3977
3978 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup.c,v $ */
3979 /* Revision: 1.1.1.1 $ */
3980 /* Date: 2011/03/12 22:58:18 $ */
3981
3982 /* reduces a modulo n where n is of the form 2**p - d */
3983 static int
mp_reduce_2k(mp_int * a,mp_int * n,mp_digit d)3984 mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
3985 {
3986 mp_int q;
3987 int p, res;
3988
3989 if ((res = mp_init(&q)) != MP_OKAY) {
3990 return res;
3991 }
3992
3993 p = mp_count_bits(n);
3994 top:
3995 /* q = a/2**p, a = a mod 2**p */
3996 if ((res = rshift_bits(a, p, &q, a)) != MP_OKAY) {
3997 goto ERR;
3998 }
3999
4000 if (d != 1) {
4001 /* q = q * d */
4002 if ((res = multiply_digit(&q, d, &q)) != MP_OKAY) {
4003 goto ERR;
4004 }
4005 }
4006
4007 /* a = a + q */
4008 if ((res = basic_add(a, &q, a)) != MP_OKAY) {
4009 goto ERR;
4010 }
4011
4012 if (compare_magnitude(a, n) != MP_LT) {
4013 basic_subtract(a, n, a);
4014 goto top;
4015 }
4016
4017 ERR:
4018 mp_clear(&q);
4019 return res;
4020 }
4021
4022 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k.c,v $ */
4023 /* Revision: 1.1.1.1 $ */
4024 /* Date: 2011/03/12 22:58:18 $ */
4025
4026 /*
4027 * shifts with subtractions when the result is greater than b.
4028 *
4029 * The method is slightly modified to shift B unconditionally upto just under
4030 * the leading bit of b. This saves alot of multiple precision shifting.
4031 */
4032 static int
mp_montgomery_calc_normalization(mp_int * a,mp_int * b)4033 mp_montgomery_calc_normalization(mp_int * a, mp_int * b)
4034 {
4035 int x, bits, res;
4036
4037 /* how many bits of last digit does b use */
4038 bits = mp_count_bits(b) % DIGIT_BIT;
4039
4040 if (b->used > 1) {
4041 if ((res = mp_2expt(a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
4042 return res;
4043 }
4044 } else {
4045 set_word(a, 1);
4046 bits = 1;
4047 }
4048
4049
4050 /* now compute C = A * B mod b */
4051 for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
4052 if ((res = doubled(a, a)) != MP_OKAY) {
4053 return res;
4054 }
4055 if (compare_magnitude(a, b) != MP_LT) {
4056 if ((res = basic_subtract(a, b, a)) != MP_OKAY) {
4057 return res;
4058 }
4059 }
4060 }
4061
4062 return MP_OKAY;
4063 }
4064
4065 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_calc_normalization.c,v $ */
4066 /* Revision: 1.1.1.1 $ */
4067 /* Date: 2011/03/12 22:58:18 $ */
4068
4069 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
4070 *
4071 * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
4072 * The value of k changes based on the size of the exponent.
4073 *
4074 * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
4075 */
4076
4077 #define TAB_SIZE 256
4078
4079 static int
fast_exponent_modulo(mp_int * G,mp_int * X,mp_int * P,mp_int * Y,int redmode)4080 fast_exponent_modulo(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
4081 {
4082 mp_int M[TAB_SIZE], res;
4083 mp_digit buf, mp;
4084 int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
4085
4086 /* use a pointer to the reduction algorithm. This allows us to use
4087 * one of many reduction algorithms without modding the guts of
4088 * the code with if statements everywhere.
4089 */
4090 int (*redux)(mp_int*,mp_int*,mp_digit);
4091
4092 winsize = find_window_size(X);
4093
4094 /* init M array */
4095 /* init first cell */
4096 if ((err = mp_init(&M[1])) != MP_OKAY) {
4097 return err;
4098 }
4099
4100 /* now init the second half of the array */
4101 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
4102 if ((err = mp_init(&M[x])) != MP_OKAY) {
4103 for (y = 1<<(winsize-1); y < x; y++) {
4104 mp_clear(&M[y]);
4105 }
4106 mp_clear(&M[1]);
4107 return err;
4108 }
4109 }
4110
4111 /* determine and setup reduction code */
4112 if (redmode == 0) {
4113 /* now setup montgomery */
4114 if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) {
4115 goto LBL_M;
4116 }
4117
4118 /* automatically pick the comba one if available (saves quite a few calls/ifs) */
4119 if (can_use_fast_column_array(P->used + P->used + 1, P->used)) {
4120 redux = fast_mp_montgomery_reduce;
4121 } else {
4122 /* use slower baseline Montgomery method */
4123 redux = mp_montgomery_reduce;
4124 }
4125 } else if (redmode == 1) {
4126 /* setup DR reduction for moduli of the form B**k - b */
4127 diminished_radix_setup(P, &mp);
4128 redux = diminished_radix_reduce;
4129 } else {
4130 /* setup DR reduction for moduli of the form 2**k - b */
4131 if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
4132 goto LBL_M;
4133 }
4134 redux = mp_reduce_2k;
4135 }
4136
4137 /* setup result */
4138 if ((err = mp_init(&res)) != MP_OKAY) {
4139 goto LBL_M;
4140 }
4141
4142 /* create M table
4143 *
4144
4145 *
4146 * The first half of the table is not computed though accept for M[0] and M[1]
4147 */
4148
4149 if (redmode == 0) {
4150 /* now we need R mod m */
4151 if ((err = mp_montgomery_calc_normalization(&res, P)) != MP_OKAY) {
4152 goto LBL_RES;
4153 }
4154
4155 /* now set M[1] to G * R mod m */
4156 if ((err = multiply_modulo(&M[1], G, &res, P)) != MP_OKAY) {
4157 goto LBL_RES;
4158 }
4159 } else {
4160 set_word(&res, 1);
4161 if ((err = modulo(G, P, &M[1])) != MP_OKAY) {
4162 goto LBL_RES;
4163 }
4164 }
4165
4166 /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
4167 if ((err = mp_copy( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
4168 goto LBL_RES;
4169 }
4170
4171 for (x = 0; x < (winsize - 1); x++) {
4172 if ((err = square(&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
4173 goto LBL_RES;
4174 }
4175 if ((err = (*redux)(&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
4176 goto LBL_RES;
4177 }
4178 }
4179
4180 /* create upper table */
4181 for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
4182 if ((err = signed_multiply(&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
4183 goto LBL_RES;
4184 }
4185 if ((err = (*redux)(&M[x], P, mp)) != MP_OKAY) {
4186 goto LBL_RES;
4187 }
4188 }
4189
4190 /* set initial mode and bit cnt */
4191 mode = 0;
4192 bitcnt = 1;
4193 buf = 0;
4194 digidx = X->used - 1;
4195 bitcpy = 0;
4196 bitbuf = 0;
4197
4198 for (;;) {
4199 /* grab next digit as required */
4200 if (--bitcnt == 0) {
4201 /* if digidx == -1 we are out of digits so break */
4202 if (digidx == -1) {
4203 break;
4204 }
4205 /* read next digit and reset bitcnt */
4206 buf = X->dp[digidx--];
4207 bitcnt = (int)DIGIT_BIT;
4208 }
4209
4210 /* grab the next msb from the exponent */
4211 y = (int)(mp_digit)((mp_digit)buf >> (unsigned)(DIGIT_BIT - 1)) & 1;
4212 buf <<= (mp_digit)1;
4213
4214 /* if the bit is zero and mode == 0 then we ignore it
4215 * These represent the leading zero bits before the first 1 bit
4216 * in the exponent. Technically this opt is not required but it
4217 * does lower the # of trivial squaring/reductions used
4218 */
4219 if (mode == 0 && y == 0) {
4220 continue;
4221 }
4222
4223 /* if the bit is zero and mode == 1 then we square */
4224 if (mode == 1 && y == 0) {
4225 if ((err = square(&res, &res)) != MP_OKAY) {
4226 goto LBL_RES;
4227 }
4228 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) {
4229 goto LBL_RES;
4230 }
4231 continue;
4232 }
4233
4234 /* else we add it to the window */
4235 bitbuf |= (y << (winsize - ++bitcpy));
4236 mode = 2;
4237
4238 if (bitcpy == winsize) {
4239 /* ok window is filled so square as required and multiply */
4240 /* square first */
4241 for (x = 0; x < winsize; x++) {
4242 if ((err = square(&res, &res)) != MP_OKAY) {
4243 goto LBL_RES;
4244 }
4245 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) {
4246 goto LBL_RES;
4247 }
4248 }
4249
4250 /* then multiply */
4251 if ((err = signed_multiply(&res, &M[bitbuf], &res)) != MP_OKAY) {
4252 goto LBL_RES;
4253 }
4254 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) {
4255 goto LBL_RES;
4256 }
4257
4258 /* empty window and reset */
4259 bitcpy = 0;
4260 bitbuf = 0;
4261 mode = 1;
4262 }
4263 }
4264
4265 /* if bits remain then square/multiply */
4266 if (mode == 2 && bitcpy > 0) {
4267 /* square then multiply if the bit is set */
4268 for (x = 0; x < bitcpy; x++) {
4269 if ((err = square(&res, &res)) != MP_OKAY) {
4270 goto LBL_RES;
4271 }
4272 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) {
4273 goto LBL_RES;
4274 }
4275
4276 /* get next bit of the window */
4277 bitbuf <<= 1;
4278 if ((bitbuf & (1 << winsize)) != 0) {
4279 /* then multiply */
4280 if ((err = signed_multiply(&res, &M[1], &res)) != MP_OKAY) {
4281 goto LBL_RES;
4282 }
4283 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) {
4284 goto LBL_RES;
4285 }
4286 }
4287 }
4288 }
4289
4290 if (redmode == 0) {
4291 /* fixup result if Montgomery reduction is used
4292 * recall that any value in a Montgomery system is
4293 * actually multiplied by R mod n. So we have
4294 * to reduce one more time to cancel out the factor
4295 * of R.
4296 */
4297 if ((err = (*redux)(&res, P, mp)) != MP_OKAY) {
4298 goto LBL_RES;
4299 }
4300 }
4301
4302 /* swap res with Y */
4303 mp_exch(&res, Y);
4304 err = MP_OKAY;
4305 LBL_RES:
4306 mp_clear(&res);
4307 LBL_M:
4308 mp_clear(&M[1]);
4309 for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
4310 mp_clear(&M[x]);
4311 }
4312 return err;
4313 }
4314
4315 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_exponent_modulo.c,v $ */
4316 /* Revision: 1.4 $ */
4317 /* Date: 2011/03/18 16:43:04 $ */
4318
4319 /* this is a shell function that calls either the normal or Montgomery
4320 * exptmod functions. Originally the call to the montgomery code was
4321 * embedded in the normal function but that wasted alot of stack space
4322 * for nothing (since 99% of the time the Montgomery code would be called)
4323 */
4324 static int
exponent_modulo(mp_int * G,mp_int * X,mp_int * P,mp_int * Y)4325 exponent_modulo(mp_int * G, mp_int * X, mp_int * P, mp_int *Y)
4326 {
4327 int diminished_radix;
4328
4329 /* modulus P must be positive */
4330 if (P->sign == MP_NEG) {
4331 return MP_VAL;
4332 }
4333
4334 /* if exponent X is negative we have to recurse */
4335 if (X->sign == MP_NEG) {
4336 mp_int tmpG, tmpX;
4337 int err;
4338
4339 /* first compute 1/G mod P */
4340 if ((err = mp_init(&tmpG)) != MP_OKAY) {
4341 return err;
4342 }
4343 if ((err = modular_inverse(&tmpG, G, P)) != MP_OKAY) {
4344 mp_clear(&tmpG);
4345 return err;
4346 }
4347
4348 /* now get |X| */
4349 if ((err = mp_init(&tmpX)) != MP_OKAY) {
4350 mp_clear(&tmpG);
4351 return err;
4352 }
4353 if ((err = absolute(X, &tmpX)) != MP_OKAY) {
4354 mp_clear_multi(&tmpG, &tmpX, NULL);
4355 return err;
4356 }
4357
4358 /* and now compute (1/G)**|X| instead of G**X [X < 0] */
4359 err = exponent_modulo(&tmpG, &tmpX, P, Y);
4360 mp_clear_multi(&tmpG, &tmpX, NULL);
4361 return err;
4362 }
4363
4364 /* modified diminished radix reduction */
4365 if (mp_reduce_is_2k_l(P) == MP_YES) {
4366 return basic_exponent_mod(G, X, P, Y, 1);
4367 }
4368
4369 /* is it a DR modulus? */
4370 diminished_radix = is_diminished_radix_modulus(P);
4371
4372 /* if not, is it a unrestricted DR modulus? */
4373 if (!diminished_radix) {
4374 diminished_radix = mp_reduce_is_2k(P) << 1;
4375 }
4376
4377 /* if the modulus is odd or diminished_radix, use the montgomery method */
4378 if (PGPV_BN_is_odd(P) == 1 || diminished_radix) {
4379 return fast_exponent_modulo(G, X, P, Y, diminished_radix);
4380 }
4381 /* otherwise use the generic Barrett reduction technique */
4382 return basic_exponent_mod(G, X, P, Y, 0);
4383 }
4384
4385 /* reverse an array, used for radix code */
4386 static void
bn_reverse(unsigned char * s,int len)4387 bn_reverse(unsigned char *s, int len)
4388 {
4389 int ix, iy;
4390 uint8_t t;
4391
4392 for (ix = 0, iy = len - 1; ix < iy ; ix++, --iy) {
4393 t = s[ix];
4394 s[ix] = s[iy];
4395 s[iy] = t;
4396 }
4397 }
4398
4399 static inline int
is_power_of_two(mp_digit b,int * p)4400 is_power_of_two(mp_digit b, int *p)
4401 {
4402 int x;
4403
4404 /* fast return if no power of two */
4405 if ((b==0) || (b & (b-1))) {
4406 return 0;
4407 }
4408
4409 for (x = 0; x < DIGIT_BIT; x++) {
4410 if (b == (((mp_digit)1)<<x)) {
4411 *p = x;
4412 return 1;
4413 }
4414 }
4415 return 0;
4416 }
4417
4418 /* single digit division (based on routine from MPI) */
4419 static int
signed_divide_word(mp_int * a,mp_digit b,mp_int * c,mp_digit * d)4420 signed_divide_word(mp_int *a, mp_digit b, mp_int *c, mp_digit *d)
4421 {
4422 mp_int q;
4423 mp_word w;
4424 mp_digit t;
4425 int res, ix;
4426
4427 /* cannot divide by zero */
4428 if (b == 0) {
4429 return MP_VAL;
4430 }
4431
4432 /* quick outs */
4433 if (b == 1 || MP_ISZERO(a) == 1) {
4434 if (d != NULL) {
4435 *d = 0;
4436 }
4437 if (c != NULL) {
4438 return mp_copy(a, c);
4439 }
4440 return MP_OKAY;
4441 }
4442
4443 /* power of two ? */
4444 if (is_power_of_two(b, &ix) == 1) {
4445 if (d != NULL) {
4446 *d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
4447 }
4448 if (c != NULL) {
4449 return rshift_bits(a, ix, c, NULL);
4450 }
4451 return MP_OKAY;
4452 }
4453
4454 /* three? */
4455 if (b == 3) {
4456 return third(a, c, d);
4457 }
4458
4459 /* no easy answer [c'est la vie]. Just division */
4460 if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
4461 return res;
4462 }
4463
4464 q.used = a->used;
4465 q.sign = a->sign;
4466 w = 0;
4467 for (ix = a->used - 1; ix >= 0; ix--) {
4468 w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
4469
4470 if (w >= b) {
4471 t = (mp_digit)(w / b);
4472 w -= ((mp_word)t) * ((mp_word)b);
4473 } else {
4474 t = 0;
4475 }
4476 q.dp[ix] = (mp_digit)t;
4477 }
4478
4479 if (d != NULL) {
4480 *d = (mp_digit)w;
4481 }
4482
4483 if (c != NULL) {
4484 trim_unused_digits(&q);
4485 mp_exch(&q, c);
4486 }
4487 mp_clear(&q);
4488
4489 return res;
4490 }
4491
4492 static const mp_digit ltm_prime_tab[] = {
4493 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
4494 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
4495 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
4496 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
4497 #ifndef MP_8BIT
4498 0x0083,
4499 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
4500 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
4501 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
4502 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
4503
4504 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
4505 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
4506 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
4507 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
4508 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
4509 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
4510 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
4511 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
4512
4513 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
4514 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
4515 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
4516 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
4517 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
4518 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
4519 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
4520 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
4521
4522 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
4523 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
4524 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
4525 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
4526 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
4527 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
4528 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
4529 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
4530 #endif
4531 };
4532
4533 #define PRIME_SIZE __arraycount(ltm_prime_tab)
4534
4535 static inline int
mp_prime_is_divisible(mp_int * a,int * result)4536 mp_prime_is_divisible(mp_int *a, int *result)
4537 {
4538 int err, ix;
4539 mp_digit res;
4540
4541 /* default to not */
4542 *result = MP_NO;
4543
4544 for (ix = 0; ix < (int)PRIME_SIZE; ix++) {
4545 /* what is a mod LBL_prime_tab[ix] */
4546 if ((err = signed_divide_word(a, ltm_prime_tab[ix], NULL, &res)) != MP_OKAY) {
4547 return err;
4548 }
4549
4550 /* is the residue zero? */
4551 if (res == 0) {
4552 *result = MP_YES;
4553 return MP_OKAY;
4554 }
4555 }
4556
4557 return MP_OKAY;
4558 }
4559
4560 /* single digit addition */
4561 static int
add_single_digit(mp_int * a,mp_digit b,mp_int * c)4562 add_single_digit(mp_int *a, mp_digit b, mp_int *c)
4563 {
4564 int res, ix, oldused;
4565 mp_digit *tmpa, *tmpc, mu;
4566
4567 /* grow c as required */
4568 if (c->alloc < a->used + 1) {
4569 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4570 return res;
4571 }
4572 }
4573
4574 /* if a is negative and |a| >= b, call c = |a| - b */
4575 if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
4576 /* temporarily fix sign of a */
4577 a->sign = MP_ZPOS;
4578
4579 /* c = |a| - b */
4580 res = signed_subtract_word(a, b, c);
4581
4582 /* fix sign */
4583 a->sign = c->sign = MP_NEG;
4584
4585 /* clamp */
4586 trim_unused_digits(c);
4587
4588 return res;
4589 }
4590
4591 /* old number of used digits in c */
4592 oldused = c->used;
4593
4594 /* sign always positive */
4595 c->sign = MP_ZPOS;
4596
4597 /* source alias */
4598 tmpa = a->dp;
4599
4600 /* destination alias */
4601 tmpc = c->dp;
4602
4603 /* if a is positive */
4604 if (a->sign == MP_ZPOS) {
4605 /* add digit, after this we're propagating
4606 * the carry.
4607 */
4608 *tmpc = *tmpa++ + b;
4609 mu = *tmpc >> DIGIT_BIT;
4610 *tmpc++ &= MP_MASK;
4611
4612 /* now handle rest of the digits */
4613 for (ix = 1; ix < a->used; ix++) {
4614 *tmpc = *tmpa++ + mu;
4615 mu = *tmpc >> DIGIT_BIT;
4616 *tmpc++ &= MP_MASK;
4617 }
4618 /* set final carry */
4619 ix++;
4620 *tmpc++ = mu;
4621
4622 /* setup size */
4623 c->used = a->used + 1;
4624 } else {
4625 /* a was negative and |a| < b */
4626 c->used = 1;
4627
4628 /* the result is a single digit */
4629 if (a->used == 1) {
4630 *tmpc++ = b - a->dp[0];
4631 } else {
4632 *tmpc++ = b;
4633 }
4634
4635 /* setup count so the clearing of oldused
4636 * can fall through correctly
4637 */
4638 ix = 1;
4639 }
4640
4641 /* now zero to oldused */
4642 while (ix++ < oldused) {
4643 *tmpc++ = 0;
4644 }
4645 trim_unused_digits(c);
4646
4647 return MP_OKAY;
4648 }
4649
4650 /* single digit subtraction */
4651 static int
signed_subtract_word(mp_int * a,mp_digit b,mp_int * c)4652 signed_subtract_word(mp_int *a, mp_digit b, mp_int *c)
4653 {
4654 mp_digit *tmpa, *tmpc, mu;
4655 int res, ix, oldused;
4656
4657 /* grow c as required */
4658 if (c->alloc < a->used + 1) {
4659 if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4660 return res;
4661 }
4662 }
4663
4664 /* if a is negative just do an unsigned
4665 * addition [with fudged signs]
4666 */
4667 if (a->sign == MP_NEG) {
4668 a->sign = MP_ZPOS;
4669 res = add_single_digit(a, b, c);
4670 a->sign = c->sign = MP_NEG;
4671
4672 /* clamp */
4673 trim_unused_digits(c);
4674
4675 return res;
4676 }
4677
4678 /* setup regs */
4679 oldused = c->used;
4680 tmpa = a->dp;
4681 tmpc = c->dp;
4682
4683 /* if a <= b simply fix the single digit */
4684 if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
4685 if (a->used == 1) {
4686 *tmpc++ = b - *tmpa;
4687 } else {
4688 *tmpc++ = b;
4689 }
4690 ix = 1;
4691
4692 /* negative/1digit */
4693 c->sign = MP_NEG;
4694 c->used = 1;
4695 } else {
4696 /* positive/size */
4697 c->sign = MP_ZPOS;
4698 c->used = a->used;
4699
4700 /* subtract first digit */
4701 *tmpc = *tmpa++ - b;
4702 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
4703 *tmpc++ &= MP_MASK;
4704
4705 /* handle rest of the digits */
4706 for (ix = 1; ix < a->used; ix++) {
4707 *tmpc = *tmpa++ - mu;
4708 mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
4709 *tmpc++ &= MP_MASK;
4710 }
4711 }
4712
4713 /* zero excess digits */
4714 while (ix++ < oldused) {
4715 *tmpc++ = 0;
4716 }
4717 trim_unused_digits(c);
4718 return MP_OKAY;
4719 }
4720
4721 static const int lnz[16] = {
4722 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
4723 };
4724
4725 /* Counts the number of lsbs which are zero before the first zero bit */
4726 static int
mp_cnt_lsb(mp_int * a)4727 mp_cnt_lsb(mp_int *a)
4728 {
4729 int x;
4730 mp_digit q, qq;
4731
4732 /* easy out */
4733 if (MP_ISZERO(a) == 1) {
4734 return 0;
4735 }
4736
4737 /* scan lower digits until non-zero */
4738 for (x = 0; x < a->used && a->dp[x] == 0; x++) {
4739 }
4740 q = a->dp[x];
4741 x *= DIGIT_BIT;
4742
4743 /* now scan this digit until a 1 is found */
4744 if ((q & 1) == 0) {
4745 do {
4746 qq = q & 15;
4747 /* LINTED previous op ensures range of qq */
4748 x += lnz[qq];
4749 q >>= 4;
4750 } while (qq == 0);
4751 }
4752 return x;
4753 }
4754
4755 /* c = a * a (mod b) */
4756 static int
square_modulo(mp_int * a,mp_int * b,mp_int * c)4757 square_modulo(mp_int *a, mp_int *b, mp_int *c)
4758 {
4759 int res;
4760 mp_int t;
4761
4762 if ((res = mp_init(&t)) != MP_OKAY) {
4763 return res;
4764 }
4765
4766 if ((res = square(a, &t)) != MP_OKAY) {
4767 mp_clear(&t);
4768 return res;
4769 }
4770 res = modulo(&t, b, c);
4771 mp_clear(&t);
4772 return res;
4773 }
4774
4775 static int
mp_prime_miller_rabin(mp_int * a,mp_int * b,int * result)4776 mp_prime_miller_rabin(mp_int *a, mp_int *b, int *result)
4777 {
4778 mp_int n1, y, r;
4779 int s, j, err;
4780
4781 /* default */
4782 *result = MP_NO;
4783
4784 /* ensure b > 1 */
4785 if (compare_digit(b, 1) != MP_GT) {
4786 return MP_VAL;
4787 }
4788
4789 /* get n1 = a - 1 */
4790 if ((err = mp_init_copy(&n1, a)) != MP_OKAY) {
4791 return err;
4792 }
4793 if ((err = signed_subtract_word(&n1, 1, &n1)) != MP_OKAY) {
4794 goto LBL_N1;
4795 }
4796
4797 /* set 2**s * r = n1 */
4798 if ((err = mp_init_copy(&r, &n1)) != MP_OKAY) {
4799 goto LBL_N1;
4800 }
4801
4802 /* count the number of least significant bits
4803 * which are zero
4804 */
4805 s = mp_cnt_lsb(&r);
4806
4807 /* now divide n - 1 by 2**s */
4808 if ((err = rshift_bits(&r, s, &r, NULL)) != MP_OKAY) {
4809 goto LBL_R;
4810 }
4811
4812 /* compute y = b**r mod a */
4813 if ((err = mp_init(&y)) != MP_OKAY) {
4814 goto LBL_R;
4815 }
4816 if ((err = exponent_modulo(b, &r, a, &y)) != MP_OKAY) {
4817 goto LBL_Y;
4818 }
4819
4820 /* if y != 1 and y != n1 do */
4821 if (compare_digit(&y, 1) != MP_EQ && signed_compare(&y, &n1) != MP_EQ) {
4822 j = 1;
4823 /* while j <= s-1 and y != n1 */
4824 while ((j <= (s - 1)) && signed_compare(&y, &n1) != MP_EQ) {
4825 if ((err = square_modulo(&y, a, &y)) != MP_OKAY) {
4826 goto LBL_Y;
4827 }
4828
4829 /* if y == 1 then composite */
4830 if (compare_digit(&y, 1) == MP_EQ) {
4831 goto LBL_Y;
4832 }
4833
4834 ++j;
4835 }
4836
4837 /* if y != n1 then composite */
4838 if (signed_compare(&y, &n1) != MP_EQ) {
4839 goto LBL_Y;
4840 }
4841 }
4842
4843 /* probably prime now */
4844 *result = MP_YES;
4845 LBL_Y:
4846 mp_clear(&y);
4847 LBL_R:
4848 mp_clear(&r);
4849 LBL_N1:
4850 mp_clear(&n1);
4851 return err;
4852 }
4853
4854 /* performs a variable number of rounds of Miller-Rabin
4855 *
4856 * Probability of error after t rounds is no more than
4857
4858 *
4859 * Sets result to 1 if probably prime, 0 otherwise
4860 */
4861 static int
mp_prime_is_prime(mp_int * a,int t,int * result)4862 mp_prime_is_prime(mp_int *a, int t, int *result)
4863 {
4864 mp_int b;
4865 int ix, err, res;
4866
4867 /* default to no */
4868 *result = MP_NO;
4869
4870 /* valid value of t? */
4871 if (t <= 0 || t > (int)PRIME_SIZE) {
4872 return MP_VAL;
4873 }
4874
4875 /* is the input equal to one of the primes in the table? */
4876 for (ix = 0; ix < (int)PRIME_SIZE; ix++) {
4877 if (compare_digit(a, ltm_prime_tab[ix]) == MP_EQ) {
4878 *result = 1;
4879 return MP_OKAY;
4880 }
4881 }
4882
4883 /* first perform trial division */
4884 if ((err = mp_prime_is_divisible(a, &res)) != MP_OKAY) {
4885 return err;
4886 }
4887
4888 /* return if it was trivially divisible */
4889 if (res == MP_YES) {
4890 return MP_OKAY;
4891 }
4892
4893 /* now perform the miller-rabin rounds */
4894 if ((err = mp_init(&b)) != MP_OKAY) {
4895 return err;
4896 }
4897
4898 for (ix = 0; ix < t; ix++) {
4899 /* set the prime */
4900 set_word(&b, ltm_prime_tab[ix]);
4901
4902 if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {
4903 goto LBL_B;
4904 }
4905
4906 if (res == MP_NO) {
4907 goto LBL_B;
4908 }
4909 }
4910
4911 /* passed the test */
4912 *result = MP_YES;
4913 LBL_B:
4914 mp_clear(&b);
4915 return err;
4916 }
4917
4918 /* returns size of ASCII reprensentation */
4919 static int
mp_radix_size(mp_int * a,int radix,int * size)4920 mp_radix_size(mp_int *a, int radix, int *size)
4921 {
4922 int res, digs;
4923 mp_int t;
4924 mp_digit d;
4925
4926 *size = 0;
4927
4928 /* special case for binary */
4929 if (radix == 2) {
4930 *size = mp_count_bits(a) + (a->sign == MP_NEG ? 1 : 0) + 1;
4931 return MP_OKAY;
4932 }
4933
4934 /* make sure the radix is in range */
4935 if (radix < 2 || radix > 64) {
4936 return MP_VAL;
4937 }
4938
4939 if (MP_ISZERO(a) == MP_YES) {
4940 *size = 2;
4941 return MP_OKAY;
4942 }
4943
4944 /* digs is the digit count */
4945 digs = 0;
4946
4947 /* if it's negative add one for the sign */
4948 if (a->sign == MP_NEG) {
4949 ++digs;
4950 }
4951
4952 /* init a copy of the input */
4953 if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
4954 return res;
4955 }
4956
4957 /* force temp to positive */
4958 t.sign = MP_ZPOS;
4959
4960 /* fetch out all of the digits */
4961 while (MP_ISZERO(&t) == MP_NO) {
4962 if ((res = signed_divide_word(&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
4963 mp_clear(&t);
4964 return res;
4965 }
4966 ++digs;
4967 }
4968 mp_clear(&t);
4969
4970 /* return digs + 1, the 1 is for the NULL byte that would be required. */
4971 *size = digs + 1;
4972 return MP_OKAY;
4973 }
4974
4975 static const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
4976
4977 /* stores a bignum as a ASCII string in a given radix (2..64)
4978 *
4979 * Stores upto maxlen-1 chars and always a NULL byte
4980 */
4981 static int
mp_toradix_n(mp_int * a,char * str,int radix,int maxlen)4982 mp_toradix_n(mp_int * a, char *str, int radix, int maxlen)
4983 {
4984 int res, digs;
4985 mp_int t;
4986 mp_digit d;
4987 char *_s = str;
4988
4989 /* check range of the maxlen, radix */
4990 if (maxlen < 2 || radix < 2 || radix > 64) {
4991 return MP_VAL;
4992 }
4993
4994 /* quick out if its zero */
4995 if (MP_ISZERO(a) == MP_YES) {
4996 *str++ = '0';
4997 *str = '\0';
4998 return MP_OKAY;
4999 }
5000
5001 if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
5002 return res;
5003 }
5004
5005 /* if it is negative output a - */
5006 if (t.sign == MP_NEG) {
5007 /* we have to reverse our digits later... but not the - sign!! */
5008 ++_s;
5009
5010 /* store the flag and mark the number as positive */
5011 *str++ = '-';
5012 t.sign = MP_ZPOS;
5013
5014 /* subtract a char */
5015 --maxlen;
5016 }
5017
5018 digs = 0;
5019 while (MP_ISZERO(&t) == 0) {
5020 if (--maxlen < 1) {
5021 /* no more room */
5022 break;
5023 }
5024 if ((res = signed_divide_word(&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
5025 mp_clear(&t);
5026 return res;
5027 }
5028 /* LINTED -- radix' range is checked above, limits d's range */
5029 *str++ = mp_s_rmap[d];
5030 ++digs;
5031 }
5032
5033 /* reverse the digits of the string. In this case _s points
5034 * to the first digit [exluding the sign] of the number
5035 */
5036 bn_reverse((unsigned char *)_s, digs);
5037
5038 /* append a NULL so the string is properly terminated */
5039 *str = '\0';
5040
5041 mp_clear(&t);
5042 return MP_OKAY;
5043 }
5044
5045 static char *
formatbn(const PGPV_BIGNUM * a,const int radix)5046 formatbn(const PGPV_BIGNUM *a, const int radix)
5047 {
5048 char *s;
5049 int len;
5050
5051 if (mp_radix_size(__UNCONST(a), radix, &len) != MP_OKAY) {
5052 return NULL;
5053 }
5054 if ((s = allocate(1, (size_t)len)) != NULL) {
5055 if (mp_toradix_n(__UNCONST(a), s, radix, len) != MP_OKAY) {
5056 deallocate(s, (size_t)len);
5057 return NULL;
5058 }
5059 }
5060 return s;
5061 }
5062
5063 static int
mp_getradix_num(mp_int * a,int radix,char * s)5064 mp_getradix_num(mp_int *a, int radix, char *s)
5065 {
5066 int err, ch, neg, y;
5067
5068 /* clear a */
5069 mp_zero(a);
5070 /* if first digit is - then set negative */
5071 if ((ch = *s++) == '-') {
5072 neg = MP_NEG;
5073 ch = *s++;
5074 } else {
5075 neg = MP_ZPOS;
5076 }
5077 for (;;) {
5078 /* fold lower to upper case */
5079 if (ch >= 'a' && ch <= 'z') {
5080 ch = (ch - 'a') + 'A';
5081 }
5082 /* find index y in the radix map */
5083 for (y = 0; y < radix; y++) {
5084 if (mp_s_rmap[y] == ch) {
5085 break;
5086 }
5087 }
5088 if (y == radix) {
5089 break;
5090 }
5091 /* shift up and add */
5092 if ((err = multiply_digit(a, radix, a)) != MP_OKAY) {
5093 return err;
5094 }
5095 if ((err = add_single_digit(a, y, a)) != MP_OKAY) {
5096 return err;
5097 }
5098 ch = *s++;
5099 }
5100 if (compare_digit(a, 0) != MP_EQ) {
5101 a->sign = neg;
5102 }
5103 return MP_OKAY;
5104 }
5105
5106 static int
getbn(PGPV_BIGNUM ** a,const char * str,int radix)5107 getbn(PGPV_BIGNUM **a, const char *str, int radix)
5108 {
5109 int len;
5110
5111 if (a == NULL || str == NULL || (*a = PGPV_BN_new()) == NULL) {
5112 return 0;
5113 }
5114 if (mp_getradix_num(*a, radix, __UNCONST(str)) != MP_OKAY) {
5115 return 0;
5116 }
5117 mp_radix_size(__UNCONST(*a), radix, &len);
5118 return len - 1;
5119 }
5120
5121 /* d = a - b (mod c) */
5122 static int
subtract_modulo(mp_int * a,mp_int * b,mp_int * c,mp_int * d)5123 subtract_modulo(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
5124 {
5125 int res;
5126 mp_int t;
5127
5128
5129 if ((res = mp_init(&t)) != MP_OKAY) {
5130 return res;
5131 }
5132
5133 if ((res = signed_subtract(a, b, &t)) != MP_OKAY) {
5134 mp_clear(&t);
5135 return res;
5136 }
5137 res = modulo(&t, c, d);
5138 mp_clear(&t);
5139 return res;
5140 }
5141
5142 /* bn_mp_gcd.c */
5143 /* Greatest Common Divisor using the binary method */
5144 static int
mp_gcd(mp_int * a,mp_int * b,mp_int * c)5145 mp_gcd(mp_int *a, mp_int *b, mp_int *c)
5146 {
5147 mp_int u, v;
5148 int k, u_lsb, v_lsb, res;
5149
5150 /* either zero than gcd is the largest */
5151 if (PGPV_BN_is_zero(a) == MP_YES) {
5152 return absolute(b, c);
5153 }
5154 if (PGPV_BN_is_zero(b) == MP_YES) {
5155 return absolute(a, c);
5156 }
5157
5158 /* get copies of a and b we can modify */
5159 if ((res = mp_init_copy(&u, a)) != MP_OKAY) {
5160 return res;
5161 }
5162
5163 if ((res = mp_init_copy(&v, b)) != MP_OKAY) {
5164 goto LBL_U;
5165 }
5166
5167 /* must be positive for the remainder of the algorithm */
5168 u.sign = v.sign = MP_ZPOS;
5169
5170 /* B1. Find the common power of two for u and v */
5171 u_lsb = mp_cnt_lsb(&u);
5172 v_lsb = mp_cnt_lsb(&v);
5173 k = MIN(u_lsb, v_lsb);
5174
5175 if (k > 0) {
5176 /* divide the power of two out */
5177 if ((res = rshift_bits(&u, k, &u, NULL)) != MP_OKAY) {
5178 goto LBL_V;
5179 }
5180
5181 if ((res = rshift_bits(&v, k, &v, NULL)) != MP_OKAY) {
5182 goto LBL_V;
5183 }
5184 }
5185
5186 /* divide any remaining factors of two out */
5187 if (u_lsb != k) {
5188 if ((res = rshift_bits(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
5189 goto LBL_V;
5190 }
5191 }
5192
5193 if (v_lsb != k) {
5194 if ((res = rshift_bits(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
5195 goto LBL_V;
5196 }
5197 }
5198
5199 while (PGPV_BN_is_zero(&v) == 0) {
5200 /* make sure v is the largest */
5201 if (compare_magnitude(&u, &v) == MP_GT) {
5202 /* swap u and v to make sure v is >= u */
5203 mp_exch(&u, &v);
5204 }
5205
5206 /* subtract smallest from largest */
5207 if ((res = signed_subtract(&v, &u, &v)) != MP_OKAY) {
5208 goto LBL_V;
5209 }
5210
5211 /* Divide out all factors of two */
5212 if ((res = rshift_bits(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
5213 goto LBL_V;
5214 }
5215 }
5216
5217 /* multiply by 2**k which we divided out at the beginning */
5218 if ((res = lshift_bits(&u, k, c)) != MP_OKAY) {
5219 goto LBL_V;
5220 }
5221 c->sign = MP_ZPOS;
5222 res = MP_OKAY;
5223 LBL_V:
5224 mp_clear (&u);
5225 LBL_U:
5226 mp_clear (&v);
5227 return res;
5228 }
5229
5230 /**************************************************************************/
5231
5232 /* PGPV_BIGNUM emulation layer */
5233
5234 /* essentiually, these are just wrappers around the libtommath functions */
5235 /* usually the order of args changes */
5236 /* the PGPV_BIGNUM API tends to have more const poisoning */
5237 /* these wrappers also check the arguments passed for sanity */
5238
5239 PGPV_BIGNUM *
PGPV_BN_bin2bn(const uint8_t * data,int len,PGPV_BIGNUM * ret)5240 PGPV_BN_bin2bn(const uint8_t *data, int len, PGPV_BIGNUM *ret)
5241 {
5242 if (data == NULL) {
5243 return PGPV_BN_new();
5244 }
5245 if (ret == NULL) {
5246 ret = PGPV_BN_new();
5247 }
5248 return (mp_read_unsigned_bin(ret, data, len) == MP_OKAY) ? ret : NULL;
5249 }
5250
5251 /* store in unsigned [big endian] format */
5252 int
PGPV_BN_bn2bin(const PGPV_BIGNUM * a,unsigned char * b)5253 PGPV_BN_bn2bin(const PGPV_BIGNUM *a, unsigned char *b)
5254 {
5255 PGPV_BIGNUM t;
5256 int x;
5257
5258 if (a == NULL || b == NULL) {
5259 return -1;
5260 }
5261 if (mp_init_copy (&t, __UNCONST(a)) != MP_OKAY) {
5262 return -1;
5263 }
5264 for (x = 0; !PGPV_BN_is_zero(&t) ; ) {
5265 b[x++] = (unsigned char) (t.dp[0] & 0xff);
5266 if (rshift_bits(&t, 8, &t, NULL) != MP_OKAY) {
5267 mp_clear(&t);
5268 return -1;
5269 }
5270 }
5271 bn_reverse(b, x);
5272 mp_clear(&t);
5273 return x;
5274 }
5275
5276 void
PGPV_BN_init(PGPV_BIGNUM * a)5277 PGPV_BN_init(PGPV_BIGNUM *a)
5278 {
5279 if (a != NULL) {
5280 mp_init(a);
5281 }
5282 }
5283
5284 PGPV_BIGNUM *
PGPV_BN_new(void)5285 PGPV_BN_new(void)
5286 {
5287 PGPV_BIGNUM *a;
5288
5289 if ((a = allocate(1, sizeof(*a))) != NULL) {
5290 mp_init(a);
5291 }
5292 return a;
5293 }
5294
5295 /* copy, b = a */
5296 int
PGPV_BN_copy(PGPV_BIGNUM * b,const PGPV_BIGNUM * a)5297 PGPV_BN_copy(PGPV_BIGNUM *b, const PGPV_BIGNUM *a)
5298 {
5299 if (a == NULL || b == NULL) {
5300 return MP_VAL;
5301 }
5302 return mp_copy(__UNCONST(a), b);
5303 }
5304
5305 PGPV_BIGNUM *
PGPV_BN_dup(const PGPV_BIGNUM * a)5306 PGPV_BN_dup(const PGPV_BIGNUM *a)
5307 {
5308 PGPV_BIGNUM *ret;
5309
5310 if (a == NULL) {
5311 return NULL;
5312 }
5313 if ((ret = PGPV_BN_new()) != NULL) {
5314 PGPV_BN_copy(ret, a);
5315 }
5316 return ret;
5317 }
5318
5319 void
PGPV_BN_swap(PGPV_BIGNUM * a,PGPV_BIGNUM * b)5320 PGPV_BN_swap(PGPV_BIGNUM *a, PGPV_BIGNUM *b)
5321 {
5322 if (a && b) {
5323 mp_exch(a, b);
5324 }
5325 }
5326
5327 int
PGPV_BN_lshift(PGPV_BIGNUM * r,const PGPV_BIGNUM * a,int n)5328 PGPV_BN_lshift(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, int n)
5329 {
5330 if (r == NULL || a == NULL || n < 0) {
5331 return 0;
5332 }
5333 PGPV_BN_copy(r, a);
5334 return lshift_digits(r, n) == MP_OKAY;
5335 }
5336
5337 int
PGPV_BN_lshift1(PGPV_BIGNUM * r,PGPV_BIGNUM * a)5338 PGPV_BN_lshift1(PGPV_BIGNUM *r, PGPV_BIGNUM *a)
5339 {
5340 if (r == NULL || a == NULL) {
5341 return 0;
5342 }
5343 PGPV_BN_copy(r, a);
5344 return lshift_digits(r, 1) == MP_OKAY;
5345 }
5346
5347 int
PGPV_BN_rshift(PGPV_BIGNUM * r,const PGPV_BIGNUM * a,int n)5348 PGPV_BN_rshift(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, int n)
5349 {
5350 if (r == NULL || a == NULL || n < 0) {
5351 return MP_VAL;
5352 }
5353 PGPV_BN_copy(r, a);
5354 return rshift_digits(r, n) == MP_OKAY;
5355 }
5356
5357 int
PGPV_BN_rshift1(PGPV_BIGNUM * r,PGPV_BIGNUM * a)5358 PGPV_BN_rshift1(PGPV_BIGNUM *r, PGPV_BIGNUM *a)
5359 {
5360 if (r == NULL || a == NULL) {
5361 return 0;
5362 }
5363 PGPV_BN_copy(r, a);
5364 return rshift_digits(r, 1) == MP_OKAY;
5365 }
5366
5367 int
PGPV_BN_set_word(PGPV_BIGNUM * a,PGPV_BN_ULONG w)5368 PGPV_BN_set_word(PGPV_BIGNUM *a, PGPV_BN_ULONG w)
5369 {
5370 if (a == NULL) {
5371 return 0;
5372 }
5373 set_word(a, w);
5374 return 1;
5375 }
5376
5377 int
PGPV_BN_add(PGPV_BIGNUM * r,const PGPV_BIGNUM * a,const PGPV_BIGNUM * b)5378 PGPV_BN_add(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, const PGPV_BIGNUM *b)
5379 {
5380 if (a == NULL || b == NULL || r == NULL) {
5381 return 0;
5382 }
5383 return signed_add(__UNCONST(a), __UNCONST(b), r) == MP_OKAY;
5384 }
5385
5386 int
PGPV_BN_sub(PGPV_BIGNUM * r,const PGPV_BIGNUM * a,const PGPV_BIGNUM * b)5387 PGPV_BN_sub(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, const PGPV_BIGNUM *b)
5388 {
5389 if (a == NULL || b == NULL || r == NULL) {
5390 return 0;
5391 }
5392 return signed_subtract(__UNCONST(a), __UNCONST(b), r) == MP_OKAY;
5393 }
5394
5395 int
PGPV_BN_mul(PGPV_BIGNUM * r,const PGPV_BIGNUM * a,const PGPV_BIGNUM * b,PGPV_BN_CTX * ctx)5396 PGPV_BN_mul(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, const PGPV_BIGNUM *b, PGPV_BN_CTX *ctx)
5397 {
5398 if (a == NULL || b == NULL || r == NULL) {
5399 return 0;
5400 }
5401 USE_ARG(ctx);
5402 return signed_multiply(__UNCONST(a), __UNCONST(b), r) == MP_OKAY;
5403 }
5404
5405 int
PGPV_BN_div(PGPV_BIGNUM * dv,PGPV_BIGNUM * rem,const PGPV_BIGNUM * a,const PGPV_BIGNUM * d,PGPV_BN_CTX * ctx)5406 PGPV_BN_div(PGPV_BIGNUM *dv, PGPV_BIGNUM *rem, const PGPV_BIGNUM *a, const PGPV_BIGNUM *d, PGPV_BN_CTX *ctx)
5407 {
5408 if ((dv == NULL && rem == NULL) || a == NULL || d == NULL) {
5409 return 0;
5410 }
5411 USE_ARG(ctx);
5412 return signed_divide(dv, rem, __UNCONST(a), __UNCONST(d)) == MP_OKAY;
5413 }
5414
5415 /* perform a bit operation on the 2 bignums */
5416 int
PGPV_BN_bitop(PGPV_BIGNUM * r,const PGPV_BIGNUM * a,char op,const PGPV_BIGNUM * b)5417 PGPV_BN_bitop(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, char op, const PGPV_BIGNUM *b)
5418 {
5419 unsigned ndigits;
5420 mp_digit ad;
5421 mp_digit bd;
5422 int i;
5423
5424 if (a == NULL || b == NULL || r == NULL) {
5425 return 0;
5426 }
5427 if (PGPV_BN_cmp(__UNCONST(a), __UNCONST(b)) >= 0) {
5428 PGPV_BN_copy(r, a);
5429 ndigits = a->used;
5430 } else {
5431 PGPV_BN_copy(r, b);
5432 ndigits = b->used;
5433 }
5434 for (i = 0 ; i < (int)ndigits ; i++) {
5435 ad = (i > a->used) ? 0 : a->dp[i];
5436 bd = (i > b->used) ? 0 : b->dp[i];
5437 switch(op) {
5438 case '&':
5439 r->dp[i] = (ad & bd);
5440 break;
5441 case '|':
5442 r->dp[i] = (ad | bd);
5443 break;
5444 case '^':
5445 r->dp[i] = (ad ^ bd);
5446 break;
5447 default:
5448 break;
5449 }
5450 }
5451 return 1;
5452 }
5453
5454 void
PGPV_BN_free(PGPV_BIGNUM * a)5455 PGPV_BN_free(PGPV_BIGNUM *a)
5456 {
5457 if (a) {
5458 mp_clear(a);
5459 free(a);
5460 }
5461 }
5462
5463 void
PGPV_BN_clear(PGPV_BIGNUM * a)5464 PGPV_BN_clear(PGPV_BIGNUM *a)
5465 {
5466 if (a) {
5467 mp_clear(a);
5468 }
5469 }
5470
5471 void
PGPV_BN_clear_free(PGPV_BIGNUM * a)5472 PGPV_BN_clear_free(PGPV_BIGNUM *a)
5473 {
5474 PGPV_BN_clear(a);
5475 free(a);
5476 }
5477
5478 int
PGPV_BN_num_bytes(const PGPV_BIGNUM * a)5479 PGPV_BN_num_bytes(const PGPV_BIGNUM *a)
5480 {
5481 if (a == NULL) {
5482 return MP_VAL;
5483 }
5484 return mp_unsigned_bin_size(__UNCONST(a));
5485 }
5486
5487 int
PGPV_BN_num_bits(const PGPV_BIGNUM * a)5488 PGPV_BN_num_bits(const PGPV_BIGNUM *a)
5489 {
5490 if (a == NULL) {
5491 return 0;
5492 }
5493 return mp_count_bits(a);
5494 }
5495
5496 void
PGPV_BN_set_negative(PGPV_BIGNUM * a,int n)5497 PGPV_BN_set_negative(PGPV_BIGNUM *a, int n)
5498 {
5499 if (a) {
5500 a->sign = (n) ? MP_NEG : 0;
5501 }
5502 }
5503
5504 int
PGPV_BN_cmp(PGPV_BIGNUM * a,PGPV_BIGNUM * b)5505 PGPV_BN_cmp(PGPV_BIGNUM *a, PGPV_BIGNUM *b)
5506 {
5507 if (a == NULL || b == NULL) {
5508 return MP_VAL;
5509 }
5510 switch(signed_compare(a, b)) {
5511 case MP_LT:
5512 return -1;
5513 case MP_GT:
5514 return 1;
5515 case MP_EQ:
5516 default:
5517 return 0;
5518 }
5519 }
5520
5521 int
PGPV_BN_mod_exp(PGPV_BIGNUM * Y,PGPV_BIGNUM * G,const PGPV_BIGNUM * X,const PGPV_BIGNUM * P,PGPV_BN_CTX * ctx)5522 PGPV_BN_mod_exp(PGPV_BIGNUM *Y, PGPV_BIGNUM *G, const PGPV_BIGNUM *X, const PGPV_BIGNUM *P, PGPV_BN_CTX *ctx)
5523 {
5524 if (Y == NULL || G == NULL || X == NULL || P == NULL) {
5525 return MP_VAL;
5526 }
5527 USE_ARG(ctx);
5528 return exponent_modulo(G, __UNCONST(X), __UNCONST(P), Y) == MP_OKAY;
5529 }
5530
5531 PGPV_BIGNUM *
PGPV_BN_mod_inverse(PGPV_BIGNUM * r,PGPV_BIGNUM * a,const PGPV_BIGNUM * n,PGPV_BN_CTX * ctx)5532 PGPV_BN_mod_inverse(PGPV_BIGNUM *r, PGPV_BIGNUM *a, const PGPV_BIGNUM *n, PGPV_BN_CTX *ctx)
5533 {
5534 USE_ARG(ctx);
5535 if (r == NULL || a == NULL || n == NULL) {
5536 return NULL;
5537 }
5538 return (modular_inverse(r, a, __UNCONST(n)) == MP_OKAY) ? r : NULL;
5539 }
5540
5541 int
PGPV_BN_mod_mul(PGPV_BIGNUM * ret,PGPV_BIGNUM * a,PGPV_BIGNUM * b,const PGPV_BIGNUM * m,PGPV_BN_CTX * ctx)5542 PGPV_BN_mod_mul(PGPV_BIGNUM *ret, PGPV_BIGNUM *a, PGPV_BIGNUM *b, const PGPV_BIGNUM *m, PGPV_BN_CTX *ctx)
5543 {
5544 USE_ARG(ctx);
5545 if (ret == NULL || a == NULL || b == NULL || m == NULL) {
5546 return 0;
5547 }
5548 return multiply_modulo(ret, a, b, __UNCONST(m)) == MP_OKAY;
5549 }
5550
5551 int
PGPV_BN_mod_add(PGPV_BIGNUM * ret,PGPV_BIGNUM * a,PGPV_BIGNUM * b,const PGPV_BIGNUM * m,PGPV_BN_CTX * ctx)5552 PGPV_BN_mod_add(PGPV_BIGNUM *ret, PGPV_BIGNUM *a, PGPV_BIGNUM *b, const PGPV_BIGNUM *m, PGPV_BN_CTX *ctx)
5553 {
5554 USE_ARG(ctx);
5555 if (ret == NULL || a == NULL || b == NULL || m == NULL) {
5556 return 0;
5557 }
5558 return add_modulo(ret, a, b, __UNCONST(m)) == MP_OKAY;
5559 }
5560
5561 PGPV_BN_CTX *
PGPV_BN_CTX_new(void)5562 PGPV_BN_CTX_new(void)
5563 {
5564 return allocate(1, sizeof(PGPV_BN_CTX));
5565 }
5566
5567 void
PGPV_BN_CTX_init(PGPV_BN_CTX * c)5568 PGPV_BN_CTX_init(PGPV_BN_CTX *c)
5569 {
5570 if (c != NULL) {
5571 c->arraysize = 15;
5572 if ((c->v = allocate(sizeof(*c->v), c->arraysize)) == NULL) {
5573 c->arraysize = 0;
5574 }
5575 }
5576 }
5577
5578 PGPV_BIGNUM *
PGPV_BN_CTX_get(PGPV_BN_CTX * ctx)5579 PGPV_BN_CTX_get(PGPV_BN_CTX *ctx)
5580 {
5581 if (ctx == NULL || ctx->v == NULL || ctx->arraysize == 0 || ctx->count == ctx->arraysize - 1) {
5582 return NULL;
5583 }
5584 return ctx->v[ctx->count++] = PGPV_BN_new();
5585 }
5586
5587 void
PGPV_BN_CTX_start(PGPV_BN_CTX * ctx)5588 PGPV_BN_CTX_start(PGPV_BN_CTX *ctx)
5589 {
5590 PGPV_BN_CTX_init(ctx);
5591 }
5592
5593 void
PGPV_BN_CTX_free(PGPV_BN_CTX * c)5594 PGPV_BN_CTX_free(PGPV_BN_CTX *c)
5595 {
5596 unsigned i;
5597
5598 if (c != NULL && c->v != NULL) {
5599 for (i = 0 ; i < c->count ; i++) {
5600 PGPV_BN_clear_free(c->v[i]);
5601 }
5602 deallocate(c->v, sizeof(*c->v) * c->arraysize);
5603 }
5604 }
5605
5606 void
PGPV_BN_CTX_end(PGPV_BN_CTX * ctx)5607 PGPV_BN_CTX_end(PGPV_BN_CTX *ctx)
5608 {
5609 PGPV_BN_CTX_free(ctx);
5610 }
5611
5612 char *
PGPV_BN_bn2hex(const PGPV_BIGNUM * a)5613 PGPV_BN_bn2hex(const PGPV_BIGNUM *a)
5614 {
5615 return (a == NULL) ? NULL : formatbn(a, 16);
5616 }
5617
5618 char *
PGPV_BN_bn2dec(const PGPV_BIGNUM * a)5619 PGPV_BN_bn2dec(const PGPV_BIGNUM *a)
5620 {
5621 return (a == NULL) ? NULL : formatbn(a, 10);
5622 }
5623
5624 char *
PGPV_BN_bn2radix(const PGPV_BIGNUM * a,unsigned radix)5625 PGPV_BN_bn2radix(const PGPV_BIGNUM *a, unsigned radix)
5626 {
5627 return (a == NULL) ? NULL : formatbn(a, (int)radix);
5628 }
5629
5630 #ifndef _KERNEL
5631 int
PGPV_BN_print_fp(FILE * fp,const PGPV_BIGNUM * a)5632 PGPV_BN_print_fp(FILE *fp, const PGPV_BIGNUM *a)
5633 {
5634 char *s;
5635 int ret;
5636
5637 if (fp == NULL || a == NULL) {
5638 return 0;
5639 }
5640 s = PGPV_BN_bn2hex(a);
5641 ret = fprintf(fp, "%s", s);
5642 deallocate(s, strlen(s) + 1);
5643 return ret;
5644 }
5645 #endif
5646
5647 #ifdef PGPV_BN_RAND_NEEDED
5648 int
PGPV_BN_rand(PGPV_BIGNUM * rnd,int bits,int top,int bottom)5649 PGPV_BN_rand(PGPV_BIGNUM *rnd, int bits, int top, int bottom)
5650 {
5651 uint64_t r;
5652 int digits;
5653 int i;
5654
5655 if (rnd == NULL) {
5656 return 0;
5657 }
5658 mp_init_size(rnd, digits = howmany(bits, DIGIT_BIT));
5659 for (i = 0 ; i < digits ; i++) {
5660 r = (uint64_t)arc4random();
5661 r <<= 32;
5662 r |= arc4random();
5663 rnd->dp[i] = (r & MP_MASK);
5664 rnd->used += 1;
5665 }
5666 if (top == 0) {
5667 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT));
5668 }
5669 if (top == 1) {
5670 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT));
5671 rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)(DIGIT_BIT - 1)));
5672 }
5673 if (bottom) {
5674 rnd->dp[0] |= 0x1;
5675 }
5676 return 1;
5677 }
5678
5679 int
PGPV_BN_rand_range(PGPV_BIGNUM * rnd,PGPV_BIGNUM * range)5680 PGPV_BN_rand_range(PGPV_BIGNUM *rnd, PGPV_BIGNUM *range)
5681 {
5682 if (rnd == NULL || range == NULL || PGPV_BN_is_zero(range)) {
5683 return 0;
5684 }
5685 PGPV_BN_rand(rnd, PGPV_BN_num_bits(range), 1, 0);
5686 return modulo(rnd, range, rnd) == MP_OKAY;
5687 }
5688 #endif
5689
5690 int
PGPV_BN_is_prime(const PGPV_BIGNUM * a,int checks,void (* callback)(int,int,void *),PGPV_BN_CTX * ctx,void * cb_arg)5691 PGPV_BN_is_prime(const PGPV_BIGNUM *a, int checks, void (*callback)(int, int, void *), PGPV_BN_CTX *ctx, void *cb_arg)
5692 {
5693 int primality;
5694
5695 if (a == NULL) {
5696 return 0;
5697 }
5698 USE_ARG(ctx);
5699 USE_ARG(cb_arg);
5700 USE_ARG(callback);
5701 return (mp_prime_is_prime(__UNCONST(a), checks, &primality) == MP_OKAY) ? primality : 0;
5702 }
5703
5704 const PGPV_BIGNUM *
PGPV_BN_value_one(void)5705 PGPV_BN_value_one(void)
5706 {
5707 static mp_digit digit = 1UL;
5708 static const PGPV_BIGNUM one = { &digit, 1, 1, 0 };
5709
5710 return &one;
5711 }
5712
5713 int
PGPV_BN_hex2bn(PGPV_BIGNUM ** a,const char * str)5714 PGPV_BN_hex2bn(PGPV_BIGNUM **a, const char *str)
5715 {
5716 return getbn(a, str, 16);
5717 }
5718
5719 int
PGPV_BN_dec2bn(PGPV_BIGNUM ** a,const char * str)5720 PGPV_BN_dec2bn(PGPV_BIGNUM **a, const char *str)
5721 {
5722 return getbn(a, str, 10);
5723 }
5724
5725 int
PGPV_BN_radix2bn(PGPV_BIGNUM ** a,const char * str,unsigned radix)5726 PGPV_BN_radix2bn(PGPV_BIGNUM **a, const char *str, unsigned radix)
5727 {
5728 return getbn(a, str, (int)radix);
5729 }
5730
5731 int
PGPV_BN_mod_sub(PGPV_BIGNUM * r,PGPV_BIGNUM * a,PGPV_BIGNUM * b,const PGPV_BIGNUM * m,PGPV_BN_CTX * ctx)5732 PGPV_BN_mod_sub(PGPV_BIGNUM *r, PGPV_BIGNUM *a, PGPV_BIGNUM *b, const PGPV_BIGNUM *m, PGPV_BN_CTX *ctx)
5733 {
5734 USE_ARG(ctx);
5735 if (r == NULL || a == NULL || b == NULL || m == NULL) {
5736 return 0;
5737 }
5738 return subtract_modulo(a, b, __UNCONST(m), r) == MP_OKAY;
5739 }
5740
5741 int
PGPV_BN_is_bit_set(const PGPV_BIGNUM * a,int n)5742 PGPV_BN_is_bit_set(const PGPV_BIGNUM *a, int n)
5743 {
5744 if (a == NULL || n < 0 || n >= a->used * DIGIT_BIT) {
5745 return 0;
5746 }
5747 return (a->dp[n / DIGIT_BIT] & (1 << (n % DIGIT_BIT))) ? 1 : 0;
5748 }
5749
5750 /* raise 'a' to power of 'b' */
5751 int
PGPV_BN_raise(PGPV_BIGNUM * res,PGPV_BIGNUM * a,PGPV_BIGNUM * b)5752 PGPV_BN_raise(PGPV_BIGNUM *res, PGPV_BIGNUM *a, PGPV_BIGNUM *b)
5753 {
5754 uint64_t exponent;
5755 PGPV_BIGNUM *power;
5756 PGPV_BIGNUM *temp;
5757 char *t;
5758
5759 t = PGPV_BN_bn2dec(b);
5760 exponent = (uint64_t)strtoull(t, NULL, 10);
5761 free(t);
5762 if (exponent == 0) {
5763 PGPV_BN_copy(res, PGPV_BN_value_one());
5764 } else {
5765 power = PGPV_BN_dup(a);
5766 for ( ; (exponent & 1) == 0 ; exponent >>= 1) {
5767 PGPV_BN_mul(power, power, power, NULL);
5768 }
5769 temp = PGPV_BN_dup(power);
5770 for (exponent >>= 1 ; exponent > 0 ; exponent >>= 1) {
5771 PGPV_BN_mul(power, power, power, NULL);
5772 if (exponent & 1) {
5773 PGPV_BN_mul(temp, power, temp, NULL);
5774 }
5775 }
5776 PGPV_BN_copy(res, temp);
5777 PGPV_BN_free(power);
5778 PGPV_BN_free(temp);
5779 }
5780 return 1;
5781 }
5782
5783 /* compute the factorial */
5784 int
PGPV_BN_factorial(PGPV_BIGNUM * res,PGPV_BIGNUM * f)5785 PGPV_BN_factorial(PGPV_BIGNUM *res, PGPV_BIGNUM *f)
5786 {
5787 PGPV_BIGNUM *one;
5788 PGPV_BIGNUM *i;
5789
5790 i = PGPV_BN_dup(f);
5791 one = __UNCONST(PGPV_BN_value_one());
5792 PGPV_BN_sub(i, i, one);
5793 PGPV_BN_copy(res, f);
5794 while (PGPV_BN_cmp(i, one) > 0) {
5795 PGPV_BN_mul(res, res, i, NULL);
5796 PGPV_BN_sub(i, i, one);
5797 }
5798 PGPV_BN_free(i);
5799 return 1;
5800 }
5801
5802 /* get greatest common divisor */
5803 int
PGPV_BN_gcd(PGPV_BIGNUM * r,PGPV_BIGNUM * a,PGPV_BIGNUM * b,PGPV_BN_CTX * ctx)5804 PGPV_BN_gcd(PGPV_BIGNUM *r, PGPV_BIGNUM *a, PGPV_BIGNUM *b, PGPV_BN_CTX *ctx)
5805 {
5806 return mp_gcd(a, b, r);
5807 }
5808
5809 int
PGPV_BN_sub_word(PGPV_BIGNUM * a,PGPV_BN_ULONG w)5810 PGPV_BN_sub_word(PGPV_BIGNUM *a, PGPV_BN_ULONG w)
5811 {
5812 PGPV_BIGNUM *bnw;
5813
5814 bnw = PGPV_BN_new();
5815 PGPV_BN_set_word(bnw, w);
5816 PGPV_BN_sub(a, a, bnw);
5817 PGPV_BN_free(bnw);
5818 return 1;
5819 }
5820
5821 int
PGPV_BN_add_word(PGPV_BIGNUM * a,PGPV_BN_ULONG w)5822 PGPV_BN_add_word(PGPV_BIGNUM *a, PGPV_BN_ULONG w)
5823 {
5824 PGPV_BIGNUM *bnw;
5825
5826 bnw = PGPV_BN_new();
5827 PGPV_BN_set_word(bnw, w);
5828 PGPV_BN_add(a, a, bnw);
5829 PGPV_BN_free(bnw);
5830 return 1;
5831 }
5832