1 /*-
2  * Copyright (c) 2012-2019 Alistair Crooks <agc@NetBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 /* LibTomMath, multiple-precision integer library -- Tom St Denis
26  *
27  * LibTomMath is a library that provides multiple-precision
28  * integer arithmetic as well as number theoretic functionality.
29  *
30  * The library was designed directly after the MPI library by
31  * Michael Fromberger but has been written from scratch with
32  * additional optimizations in place.
33  *
34  * The library is free for all purposes without any express
35  * guarantee it works.
36  *
37  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
38  */
39 #include "config.h"
40 
41 #include <sys/types.h>
42 #include <sys/param.h>
43 
44 #ifdef _KERNEL
45 # include <sys/kmem.h>
46 #else
47 # include <arpa/inet.h>
48 # include <limits.h>
49 # include <stdarg.h>
50 # include <stdio.h>
51 # include <stdlib.h>
52 # include <string.h>
53 # include <unistd.h>
54 #endif
55 
56 #include "bn.h"
57 
58 /**************************************************************************/
59 
60 /* LibTomMath, multiple-precision integer library -- Tom St Denis
61  *
62  * LibTomMath is a library that provides multiple-precision
63  * integer arithmetic as well as number theoretic functionality.
64  *
65  * The library was designed directly after the MPI library by
66  * Michael Fromberger but has been written from scratch with
67  * additional optimizations in place.
68  *
69  * The library is free for all purposes without any express
70  * guarantee it works.
71  *
72  * Tom St Denis, tomstdenis@gmail.com, http://libtom.org
73  */
74 
75 #define MP_PREC		32
76 #define DIGIT_BIT	28
77 #define MP_MASK          ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1))
78 
79 #define MP_WARRAY	/*LINTED*/(1U << (((sizeof(mp_word) * CHAR_BIT) - (2 * DIGIT_BIT) + 1)))
80 
81 #define MP_NO		0
82 #define MP_YES		1
83 
84 #ifndef USE_ARG
85 #define USE_ARG(x)	/*LINTED*/(void)&(x)
86 #endif
87 
88 #ifndef __arraycount
89 #define	__arraycount(__x)	(sizeof(__x) / sizeof(__x[0]))
90 #endif
91 
92 #ifndef MIN
93 #define MIN(a,b)	(((a)<(b))?(a):(b))
94 #endif
95 
96 #define MP_ISZERO(a) (((a)->used == 0) ? MP_YES : MP_NO)
97 
98 typedef int           mp_err;
99 
100 static int signed_multiply(mp_int * a, mp_int * b, mp_int * c);
101 static int square(mp_int * a, mp_int * b);
102 
103 static int signed_subtract_word(mp_int *a, mp_digit b, mp_int *c);
104 
105 static inline void *
allocate(size_t n,size_t m)106 allocate(size_t n, size_t m)
107 {
108 	return calloc(n, m);
109 }
110 
111 static inline void
deallocate(void * v,size_t sz)112 deallocate(void *v, size_t sz)
113 {
114 	USE_ARG(sz);
115 	free(v);
116 }
117 
118 /* set to zero */
119 static inline void
mp_zero(mp_int * a)120 mp_zero(mp_int *a)
121 {
122 	a->sign = MP_ZPOS;
123 	a->used = 0;
124 	memset(a->dp, 0x0, a->alloc * sizeof(*a->dp));
125 }
126 
127 /* grow as required */
128 static int
mp_grow(mp_int * a,int size)129 mp_grow(mp_int *a, int size)
130 {
131 	mp_digit *tmp;
132 
133 	/* if the alloc size is smaller alloc more ram */
134 	if (a->alloc < size) {
135 		/* ensure there are always at least MP_PREC digits extra on top */
136 		size += (MP_PREC * 2) - (size % MP_PREC);
137 
138 		/* reallocate the array a->dp
139 		*
140 		* We store the return in a temporary variable
141 		* in case the operation failed we don't want
142 		* to overwrite the dp member of a.
143 		*/
144 		tmp = realloc(a->dp, sizeof(*tmp) * size);
145 		if (tmp == NULL) {
146 			/* reallocation failed but "a" is still valid [can be freed] */
147 			return MP_MEM;
148 		}
149 
150 		/* reallocation succeeded so set a->dp */
151 		a->dp = tmp;
152 		/* zero excess digits */
153 		memset(&a->dp[a->alloc], 0x0, (size - a->alloc) * sizeof(*a->dp));
154 		a->alloc = size;
155 	}
156 	return MP_OKAY;
157 }
158 
159 /* shift left a certain amount of digits */
160 static int
lshift_digits(mp_int * a,int b)161 lshift_digits(mp_int * a, int b)
162 {
163 	mp_digit *top, *bottom;
164 	int     x, res;
165 
166 	/* if its less than zero return */
167 	if (b <= 0) {
168 		return MP_OKAY;
169 	}
170 
171 	/* grow to fit the new digits */
172 	if (a->alloc < a->used + b) {
173 		if ((res = mp_grow(a, a->used + b)) != MP_OKAY) {
174 			return res;
175 		}
176 	}
177 
178 	/* increment the used by the shift amount then copy upwards */
179 	a->used += b;
180 
181 	/* top */
182 	top = a->dp + a->used - 1;
183 
184 	/* base */
185 	bottom = a->dp + a->used - 1 - b;
186 
187 	/* much like rshift_digits this is implemented using a sliding window
188 	* except the window goes the otherway around.  Copying from
189 	* the bottom to the top.
190 	*/
191 	for (x = a->used - 1; x >= b; x--) {
192 		*top-- = *bottom--;
193 	}
194 
195 	/* zero the lower digits */
196 	memset(a->dp, 0x0, b * sizeof(*a->dp));
197 	return MP_OKAY;
198 }
199 
200 /* trim unused digits
201  *
202  * This is used to ensure that leading zero digits are
203  * trimed and the leading "used" digit will be non-zero
204  * Typically very fast.  Also fixes the sign if there
205  * are no more leading digits
206  */
207 static void
trim_unused_digits(mp_int * a)208 trim_unused_digits(mp_int * a)
209 {
210 	/* decrease used while the most significant digit is
211 	* zero.
212 	*/
213 	while (a->used > 0 && a->dp[a->used - 1] == 0) {
214 		a->used -= 1;
215 	}
216 	/* reset the sign flag if used == 0 */
217 	if (a->used == 0) {
218 		a->sign = MP_ZPOS;
219 	}
220 }
221 
222 /* copy, b = a */
223 static int
mp_copy(PGPV_BIGNUM * a,PGPV_BIGNUM * b)224 mp_copy(PGPV_BIGNUM *a, PGPV_BIGNUM *b)
225 {
226 	int	res;
227 
228 	/* if dst == src do nothing */
229 	if (a == b) {
230 		return MP_OKAY;
231 	}
232 	if (a == NULL || b == NULL) {
233 		return MP_VAL;
234 	}
235 
236 	/* grow dest */
237 	if (b->alloc < a->used) {
238 		if ((res = mp_grow(b, a->used)) != MP_OKAY) {
239 			return res;
240 		}
241 	}
242 
243 	memcpy(b->dp, a->dp, a->used * sizeof(*b->dp));
244 	if (b->used > a->used) {
245 		memset(&b->dp[a->used], 0x0, (b->used - a->used) * sizeof(*b->dp));
246 	}
247 
248 	/* copy used count and sign */
249 	b->used = a->used;
250 	b->sign = a->sign;
251 	return MP_OKAY;
252 }
253 
254 /* shift left by a certain bit count */
255 static int
lshift_bits(mp_int * a,int b,mp_int * c)256 lshift_bits(mp_int *a, int b, mp_int *c)
257 {
258 	mp_digit d;
259 	int      res;
260 
261 	/* copy */
262 	if (a != c) {
263 		if ((res = mp_copy(a, c)) != MP_OKAY) {
264 			return res;
265 		}
266 	}
267 
268 	if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
269 		if ((res = mp_grow(c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
270 			return res;
271 		}
272 	}
273 
274 	/* shift by as many digits in the bit count */
275 	if (b >= (int)DIGIT_BIT) {
276 		if ((res = lshift_digits(c, b / DIGIT_BIT)) != MP_OKAY) {
277 			return res;
278 		}
279 	}
280 
281 	/* shift any bit count < DIGIT_BIT */
282 	d = (mp_digit) (b % DIGIT_BIT);
283 	if (d != 0) {
284 		mp_digit *tmpc, shift, mask, carry, rr;
285 		int x;
286 
287 		/* bitmask for carries */
288 		mask = (((mp_digit)1) << d) - 1;
289 
290 		/* shift for msbs */
291 		shift = DIGIT_BIT - d;
292 
293 		/* alias */
294 		tmpc = c->dp;
295 
296 		/* carry */
297 		carry = 0;
298 		for (x = 0; x < c->used; x++) {
299 			/* get the higher bits of the current word */
300 			rr = (*tmpc >> shift) & mask;
301 
302 			/* shift the current word and OR in the carry */
303 			*tmpc = ((*tmpc << d) | carry) & MP_MASK;
304 			++tmpc;
305 
306 			/* set the carry to the carry bits of the current word */
307 			carry = rr;
308 		}
309 
310 		/* set final carry */
311 		if (carry != 0) {
312 			c->dp[c->used++] = carry;
313 		}
314 	}
315 	trim_unused_digits(c);
316 	return MP_OKAY;
317 }
318 
319 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
320 static int
mp_read_unsigned_bin(mp_int * a,const uint8_t * b,int c)321 mp_read_unsigned_bin(mp_int *a, const uint8_t *b, int c)
322 {
323 	int     res;
324 
325 	/* make sure there are at least two digits */
326 	if (a->alloc < 2) {
327 		if ((res = mp_grow(a, 2)) != MP_OKAY) {
328 			return res;
329 		}
330 	}
331 
332 	/* zero the int */
333 	mp_zero(a);
334 
335 	/* read the bytes in */
336 	while (c-- > 0) {
337 		if ((res = lshift_bits(a, 8, a)) != MP_OKAY) {
338 			return res;
339 		}
340 
341 		a->dp[0] |= *b++;
342 		a->used += 1;
343 	}
344 	trim_unused_digits(a);
345 	return MP_OKAY;
346 }
347 
348 /* returns the number of bits in an mpi */
349 static int
mp_count_bits(const mp_int * a)350 mp_count_bits(const mp_int *a)
351 {
352 	int     r;
353 	mp_digit q;
354 
355 	/* shortcut */
356 	if (a->used == 0) {
357 		return 0;
358 	}
359 
360 	/* get number of digits and add that */
361 	r = (a->used - 1) * DIGIT_BIT;
362 
363 	/* take the last digit and count the bits in it */
364 	for (q = a->dp[a->used - 1]; q > ((mp_digit) 0) ; r++) {
365 		q >>= ((mp_digit) 1);
366 	}
367 	return r;
368 }
369 
370 /* compare maginitude of two ints (unsigned) */
371 static int
compare_magnitude(mp_int * a,mp_int * b)372 compare_magnitude(mp_int * a, mp_int * b)
373 {
374 	int     n;
375 	mp_digit *tmpa, *tmpb;
376 
377 	/* compare based on # of non-zero digits */
378 	if (a->used > b->used) {
379 		return MP_GT;
380 	}
381 
382 	if (a->used < b->used) {
383 		return MP_LT;
384 	}
385 
386 	/* alias for a */
387 	tmpa = a->dp + (a->used - 1);
388 
389 	/* alias for b */
390 	tmpb = b->dp + (a->used - 1);
391 
392 	/* compare based on digits  */
393 	for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
394 		if (*tmpa > *tmpb) {
395 			return MP_GT;
396 		}
397 
398 		if (*tmpa < *tmpb) {
399 			return MP_LT;
400 		}
401 	}
402 	return MP_EQ;
403 }
404 
405 /* compare two ints (signed)*/
406 static int
signed_compare(mp_int * a,mp_int * b)407 signed_compare(mp_int * a, mp_int * b)
408 {
409 	/* compare based on sign */
410 	if (a->sign != b->sign) {
411 		return (a->sign == MP_NEG) ? MP_LT : MP_GT;
412 	}
413 	return (a->sign == MP_NEG) ? compare_magnitude(b, a) : compare_magnitude(a, b);
414 }
415 
416 /* get the size for an unsigned equivalent */
417 static int
mp_unsigned_bin_size(mp_int * a)418 mp_unsigned_bin_size(mp_int * a)
419 {
420 	int     size = mp_count_bits(a);
421 
422 	return (size / 8 + ((size & 7) != 0 ? 1 : 0));
423 }
424 
425 /* init a new mp_int */
426 static int
mp_init(mp_int * a)427 mp_init(mp_int * a)
428 {
429 	/* allocate memory required and clear it */
430 	a->dp = allocate(1, sizeof(*a->dp) * MP_PREC);
431 	if (a->dp == NULL) {
432 		return MP_MEM;
433 	}
434 
435 	/* set the digits to zero */
436 	memset(a->dp, 0x0, MP_PREC * sizeof(*a->dp));
437 
438 	/* set the used to zero, allocated digits to the default precision
439 	* and sign to positive */
440 	a->used  = 0;
441 	a->alloc = MP_PREC;
442 	a->sign  = MP_ZPOS;
443 
444 	return MP_OKAY;
445 }
446 
447 /* clear one (frees)  */
448 static void
mp_clear(mp_int * a)449 mp_clear(mp_int * a)
450 {
451 	/* only do anything if a hasn't been freed previously */
452 	if (a->dp != NULL) {
453 		memset(a->dp, 0x0, a->used * sizeof(*a->dp));
454 
455 		/* free ram */
456 		deallocate(a->dp, (size_t)a->alloc);
457 
458 		/* reset members to make debugging easier */
459 		a->dp = NULL;
460 		a->alloc = a->used = 0;
461 		a->sign  = MP_ZPOS;
462 	}
463 }
464 
465 static int
mp_init_multi(mp_int * mp,...)466 mp_init_multi(mp_int *mp, ...)
467 {
468 	mp_err res = MP_OKAY;      /* Assume ok until proven otherwise */
469 	int n = 0;                 /* Number of ok inits */
470 	mp_int* cur_arg = mp;
471 	va_list args;
472 
473 	va_start(args, mp);        /* init args to next argument from caller */
474 	while (cur_arg != NULL) {
475 		if (mp_init(cur_arg) != MP_OKAY) {
476 			/* Oops - error! Back-track and mp_clear what we already
477 			succeeded in init-ing, then return error.
478 			*/
479 			va_list clean_args;
480 
481 			/* end the current list */
482 			va_end(args);
483 
484 			/* now start cleaning up */
485 			cur_arg = mp;
486 			va_start(clean_args, mp);
487 			while (n--) {
488 				mp_clear(cur_arg);
489 				cur_arg = va_arg(clean_args, mp_int*);
490 			}
491 			va_end(clean_args);
492 			res = MP_MEM;
493 			break;
494 		}
495 		n++;
496 		cur_arg = va_arg(args, mp_int*);
497 	}
498 	va_end(args);
499 	return res;                /* Assumed ok, if error flagged above. */
500 }
501 
502 /* init an mp_init for a given size */
503 static int
mp_init_size(mp_int * a,int size)504 mp_init_size(mp_int * a, int size)
505 {
506 	/* pad size so there are always extra digits */
507 	size += (MP_PREC * 2) - (size % MP_PREC);
508 
509 	/* alloc mem */
510 	a->dp = allocate(1, sizeof(*a->dp) * size);
511 	if (a->dp == NULL) {
512 		return MP_MEM;
513 	}
514 
515 	/* set the members */
516 	a->used  = 0;
517 	a->alloc = size;
518 	a->sign  = MP_ZPOS;
519 
520 	/* zero the digits */
521 	memset(a->dp, 0x0, size * sizeof(*a->dp));
522 	return MP_OKAY;
523 }
524 
525 /* creates "a" then copies b into it */
526 static int
mp_init_copy(mp_int * a,mp_int * b)527 mp_init_copy(mp_int * a, mp_int * b)
528 {
529 	int     res;
530 
531 	if ((res = mp_init(a)) != MP_OKAY) {
532 		return res;
533 	}
534 	return mp_copy(b, a);
535 }
536 
537 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
538 static int
basic_add(mp_int * a,mp_int * b,mp_int * c)539 basic_add(mp_int * a, mp_int * b, mp_int * c)
540 {
541 	mp_int *x;
542 	int     olduse, res, min, max;
543 
544 	/* find sizes, we let |a| <= |b| which means we have to sort
545 	* them.  "x" will point to the input with the most digits
546 	*/
547 	if (a->used > b->used) {
548 		min = b->used;
549 		max = a->used;
550 		x = a;
551 	} else {
552 		min = a->used;
553 		max = b->used;
554 		x = b;
555 	}
556 
557 	/* init result */
558 	if (c->alloc < max + 1) {
559 		if ((res = mp_grow(c, max + 1)) != MP_OKAY) {
560 			return res;
561 		}
562 	}
563 
564 	/* get old used digit count and set new one */
565 	olduse = c->used;
566 	c->used = max + 1;
567 
568 	{
569 		mp_digit carry, *tmpa, *tmpb, *tmpc;
570 		int i;
571 
572 		/* alias for digit pointers */
573 
574 		/* first input */
575 		tmpa = a->dp;
576 
577 		/* second input */
578 		tmpb = b->dp;
579 
580 		/* destination */
581 		tmpc = c->dp;
582 
583 		/* zero the carry */
584 		carry = 0;
585 		for (i = 0; i < min; i++) {
586 			/* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
587 			*tmpc = *tmpa++ + *tmpb++ + carry;
588 
589 			/* U = carry bit of T[i] */
590 			carry = *tmpc >> ((mp_digit)DIGIT_BIT);
591 
592 			/* take away carry bit from T[i] */
593 			*tmpc++ &= MP_MASK;
594 		}
595 
596 		/* now copy higher words if any, that is in A+B
597 		* if A or B has more digits add those in
598 		*/
599 		if (min != max) {
600 			for (; i < max; i++) {
601 				/* T[i] = X[i] + U */
602 				*tmpc = x->dp[i] + carry;
603 
604 				/* U = carry bit of T[i] */
605 				carry = *tmpc >> ((mp_digit)DIGIT_BIT);
606 
607 				/* take away carry bit from T[i] */
608 				*tmpc++ &= MP_MASK;
609 			}
610 		}
611 
612 		/* add carry */
613 		*tmpc++ = carry;
614 
615 		/* clear digits above oldused */
616 		if (olduse > c->used) {
617 			memset(tmpc, 0x0, (olduse - c->used) * sizeof(*c->dp));
618 		}
619 	}
620 
621 	trim_unused_digits(c);
622 	return MP_OKAY;
623 }
624 
625 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
626 static int
basic_subtract(mp_int * a,mp_int * b,mp_int * c)627 basic_subtract(mp_int * a, mp_int * b, mp_int * c)
628 {
629 	int     olduse, res, min, max;
630 
631 	/* find sizes */
632 	min = b->used;
633 	max = a->used;
634 
635 	/* init result */
636 	if (c->alloc < max) {
637 		if ((res = mp_grow(c, max)) != MP_OKAY) {
638 			return res;
639 		}
640 	}
641 	olduse = c->used;
642 	c->used = max;
643 
644 	{
645 		mp_digit carry, *tmpa, *tmpb, *tmpc;
646 		int i;
647 
648 		/* alias for digit pointers */
649 		tmpa = a->dp;
650 		tmpb = b->dp;
651 		tmpc = c->dp;
652 
653 		/* set carry to zero */
654 		carry = 0;
655 		for (i = 0; i < min; i++) {
656 			/* T[i] = A[i] - B[i] - U */
657 			*tmpc = *tmpa++ - *tmpb++ - carry;
658 
659 			/* U = carry bit of T[i]
660 			* Note this saves performing an AND operation since
661 			* if a carry does occur it will propagate all the way to the
662 			* MSB.  As a result a single shift is enough to get the carry
663 			*/
664 			carry = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof(mp_digit) - 1));
665 
666 			/* Clear carry from T[i] */
667 			*tmpc++ &= MP_MASK;
668 		}
669 
670 		/* now copy higher words if any, e.g. if A has more digits than B  */
671 		for (; i < max; i++) {
672 			/* T[i] = A[i] - U */
673 			*tmpc = *tmpa++ - carry;
674 
675 			/* U = carry bit of T[i] */
676 			carry = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof(mp_digit) - 1));
677 
678 			/* Clear carry from T[i] */
679 			*tmpc++ &= MP_MASK;
680 		}
681 
682 		/* clear digits above used (since we may not have grown result above) */
683 		if (olduse > c->used) {
684 			memset(tmpc, 0x0, (olduse - c->used) * sizeof(*a->dp));
685 		}
686 	}
687 
688 	trim_unused_digits(c);
689 	return MP_OKAY;
690 }
691 
692 /* high level subtraction (handles signs) */
693 static int
signed_subtract(mp_int * a,mp_int * b,mp_int * c)694 signed_subtract(mp_int * a, mp_int * b, mp_int * c)
695 {
696 	int     sa, sb, res;
697 
698 	sa = a->sign;
699 	sb = b->sign;
700 
701 	if (sa != sb) {
702 		/* subtract a negative from a positive, OR */
703 		/* subtract a positive from a negative. */
704 		/* In either case, ADD their magnitudes, */
705 		/* and use the sign of the first number. */
706 		c->sign = sa;
707 		res = basic_add(a, b, c);
708 	} else {
709 		/* subtract a positive from a positive, OR */
710 		/* subtract a negative from a negative. */
711 		/* First, take the difference between their */
712 		/* magnitudes, then... */
713 		if (compare_magnitude(a, b) != MP_LT) {
714 			/* Copy the sign from the first */
715 			c->sign = sa;
716 			/* The first has a larger or equal magnitude */
717 			res = basic_subtract(a, b, c);
718 		} else {
719 			/* The result has the *opposite* sign from */
720 			/* the first number. */
721 			c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
722 			/* The second has a larger magnitude */
723 			res = basic_subtract(b, a, c);
724 		}
725 	}
726 	return res;
727 }
728 
729 /* shift right a certain amount of digits */
730 static int
rshift_digits(mp_int * a,int b)731 rshift_digits(mp_int * a, int b)
732 {
733 	/* if b <= 0 then ignore it */
734 	if (b <= 0) {
735 		return 0;
736 	}
737 
738 	/* if b > used then simply zero it and return */
739 	if (a->used <= b) {
740 		mp_zero(a);
741 		return 0;
742 	}
743 
744 	/* this is implemented as a sliding window where
745 	* the window is b-digits long and digits from
746 	* the top of the window are copied to the bottom
747 	*
748 	* e.g.
749 
750 	b-2 | b-1 | b0 | b1 | b2 | ... | bb |   ---->
751 		 /\                   |      ---->
752 		  \-------------------/      ---->
753 	*/
754 	memmove(a->dp, &a->dp[b], (a->used - b) * sizeof(*a->dp));
755 	memset(&a->dp[a->used - b], 0x0, b * sizeof(*a->dp));
756 
757 	/* remove excess digits */
758 	a->used -= b;
759 	return 1;
760 }
761 
762 /* multiply by a digit */
763 static int
multiply_digit(mp_int * a,mp_digit b,mp_int * c)764 multiply_digit(mp_int * a, mp_digit b, mp_int * c)
765 {
766 	mp_digit carry, *tmpa, *tmpc;
767 	mp_word  r;
768 	int      ix, res, olduse;
769 
770 	/* make sure c is big enough to hold a*b */
771 	if (c->alloc < a->used + 1) {
772 		if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
773 			return res;
774 		}
775 	}
776 
777 	/* get the original destinations used count */
778 	olduse = c->used;
779 
780 	/* set the sign */
781 	c->sign = a->sign;
782 
783 	/* alias for a->dp [source] */
784 	tmpa = a->dp;
785 
786 	/* alias for c->dp [dest] */
787 	tmpc = c->dp;
788 
789 	/* zero carry */
790 	carry = 0;
791 
792 	/* compute columns */
793 	for (ix = 0; ix < a->used; ix++) {
794 		/* compute product and carry sum for this term */
795 		r = ((mp_word) carry) + ((mp_word)*tmpa++) * ((mp_word)b);
796 
797 		/* mask off higher bits to get a single digit */
798 		*tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
799 
800 		/* send carry into next iteration */
801 		carry = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
802 	}
803 
804 	/* store final carry [if any] and increment ix offset  */
805 	*tmpc++ = carry;
806 	++ix;
807 	if (olduse > ix) {
808 		memset(tmpc, 0x0, (olduse - ix) * sizeof(*tmpc));
809 	}
810 
811 	/* set used count */
812 	c->used = a->used + 1;
813 	trim_unused_digits(c);
814 
815 	return MP_OKAY;
816 }
817 
818 /* high level addition (handles signs) */
819 static int
signed_add(mp_int * a,mp_int * b,mp_int * c)820 signed_add(mp_int * a, mp_int * b, mp_int * c)
821 {
822 	int     asign, bsign, res;
823 
824 	/* get sign of both inputs */
825 	asign = a->sign;
826 	bsign = b->sign;
827 
828 	/* handle two cases, not four */
829 	if (asign == bsign) {
830 		/* both positive or both negative */
831 		/* add their magnitudes, copy the sign */
832 		c->sign = asign;
833 		res = basic_add(a, b, c);
834 	} else {
835 		/* one positive, the other negative */
836 		/* subtract the one with the greater magnitude from */
837 		/* the one of the lesser magnitude.  The result gets */
838 		/* the sign of the one with the greater magnitude. */
839 		if (compare_magnitude(a, b) == MP_LT) {
840 			c->sign = bsign;
841 			res = basic_subtract(b, a, c);
842 		} else {
843 			c->sign = asign;
844 			res = basic_subtract(a, b, c);
845 		}
846 	}
847 	return res;
848 }
849 
850 /* swap the elements of two integers, for cases where you can't simply swap the
851  * mp_int pointers around
852  */
853 static void
mp_exch(mp_int * a,mp_int * b)854 mp_exch(mp_int *a, mp_int *b)
855 {
856 	mp_int  t;
857 
858 	t  = *a;
859 	*a = *b;
860 	*b = t;
861 }
862 
863 /* calc a value mod 2**b */
864 static int
modulo_2_to_power(mp_int * a,int b,mp_int * c)865 modulo_2_to_power(mp_int * a, int b, mp_int * c)
866 {
867 	int     x, res;
868 
869 	/* if b is <= 0 then zero the int */
870 	if (b <= 0) {
871 		mp_zero(c);
872 		return MP_OKAY;
873 	}
874 
875 	/* if the modulus is larger than the value than return */
876 	if (b >= (int) (a->used * DIGIT_BIT)) {
877 		res = mp_copy(a, c);
878 		return res;
879 	}
880 
881 	/* copy */
882 	if ((res = mp_copy(a, c)) != MP_OKAY) {
883 		return res;
884 	}
885 
886 	/* zero digits above the last digit of the modulus */
887 	for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
888 		c->dp[x] = 0;
889 	}
890 	/* clear the digit that is not completely outside/inside the modulus */
891 	c->dp[b / DIGIT_BIT] &=
892 		(mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
893 	trim_unused_digits(c);
894 	return MP_OKAY;
895 }
896 
897 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */
898 static int
rshift_bits(mp_int * a,int b,mp_int * c,mp_int * d)899 rshift_bits(mp_int * a, int b, mp_int * c, mp_int * d)
900 {
901 	mp_digit D, r, rr;
902 	int     x, res;
903 	mp_int  t;
904 
905 
906 	/* if the shift count is <= 0 then we do no work */
907 	if (b <= 0) {
908 		res = mp_copy(a, c);
909 		if (d != NULL) {
910 			mp_zero(d);
911 		}
912 		return res;
913 	}
914 
915 	if ((res = mp_init(&t)) != MP_OKAY) {
916 		return res;
917 	}
918 
919 	/* get the remainder */
920 	if (d != NULL) {
921 		if ((res = modulo_2_to_power(a, b, &t)) != MP_OKAY) {
922 			mp_clear(&t);
923 			return res;
924 		}
925 	}
926 
927 	/* copy */
928 	if ((res = mp_copy(a, c)) != MP_OKAY) {
929 		mp_clear(&t);
930 		return res;
931 	}
932 
933 	/* shift by as many digits in the bit count */
934 	if (b >= (int)DIGIT_BIT) {
935 		rshift_digits(c, b / DIGIT_BIT);
936 	}
937 
938 	/* shift any bit count < DIGIT_BIT */
939 	D = (mp_digit) (b % DIGIT_BIT);
940 	if (D != 0) {
941 		mp_digit *tmpc, mask, shift;
942 
943 		/* mask */
944 		mask = (((mp_digit)1) << D) - 1;
945 
946 		/* shift for lsb */
947 		shift = DIGIT_BIT - D;
948 
949 		/* alias */
950 		tmpc = c->dp + (c->used - 1);
951 
952 		/* carry */
953 		r = 0;
954 		for (x = c->used - 1; x >= 0; x--) {
955 			/* get the lower  bits of this word in a temp */
956 			rr = *tmpc & mask;
957 
958 			/* shift the current word and mix in the carry bits from the previous word */
959 			*tmpc = (*tmpc >> D) | (r << shift);
960 			--tmpc;
961 
962 			/* set the carry to the carry bits of the current word found above */
963 			r = rr;
964 		}
965 	}
966 	trim_unused_digits(c);
967 	if (d != NULL) {
968 		mp_exch(&t, d);
969 	}
970 	mp_clear(&t);
971 	return MP_OKAY;
972 }
973 
974 /* integer signed division.
975  * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
976  * HAC pp.598 Algorithm 14.20
977  *
978  * Note that the description in HAC is horribly
979  * incomplete.  For example, it doesn't consider
980  * the case where digits are removed from 'x' in
981  * the inner loop.  It also doesn't consider the
982  * case that y has fewer than three digits, etc..
983  *
984  * The overall algorithm is as described as
985  * 14.20 from HAC but fixed to treat these cases.
986 */
987 static int
signed_divide(mp_int * c,mp_int * d,mp_int * a,mp_int * b)988 signed_divide(mp_int *c, mp_int *d, mp_int *a, mp_int *b)
989 {
990 	mp_int  q, x, y, t1, t2;
991 	int     res, n, t, i, norm, neg;
992 
993 	/* is divisor zero ? */
994 	if (MP_ISZERO(b) == MP_YES) {
995 		return MP_VAL;
996 	}
997 
998 	/* if a < b then q=0, r = a */
999 	if (compare_magnitude(a, b) == MP_LT) {
1000 		if (d != NULL) {
1001 			res = mp_copy(a, d);
1002 		} else {
1003 			res = MP_OKAY;
1004 		}
1005 		if (c != NULL) {
1006 			mp_zero(c);
1007 		}
1008 		return res;
1009 	}
1010 
1011 	if ((res = mp_init_size(&q, a->used + 2)) != MP_OKAY) {
1012 		return res;
1013 	}
1014 	q.used = a->used + 2;
1015 
1016 	if ((res = mp_init(&t1)) != MP_OKAY) {
1017 		goto LBL_Q;
1018 	}
1019 
1020 	if ((res = mp_init(&t2)) != MP_OKAY) {
1021 		goto LBL_T1;
1022 	}
1023 
1024 	if ((res = mp_init_copy(&x, a)) != MP_OKAY) {
1025 		goto LBL_T2;
1026 	}
1027 
1028 	if ((res = mp_init_copy(&y, b)) != MP_OKAY) {
1029 		goto LBL_X;
1030 	}
1031 
1032 	/* fix the sign */
1033 	neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
1034 	x.sign = y.sign = MP_ZPOS;
1035 
1036 	/* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
1037 	norm = mp_count_bits(&y) % DIGIT_BIT;
1038 	if (norm < (int)(DIGIT_BIT-1)) {
1039 		norm = (DIGIT_BIT-1) - norm;
1040 		if ((res = lshift_bits(&x, norm, &x)) != MP_OKAY) {
1041 			goto LBL_Y;
1042 		}
1043 		if ((res = lshift_bits(&y, norm, &y)) != MP_OKAY) {
1044 			goto LBL_Y;
1045 		}
1046 	} else {
1047 		norm = 0;
1048 	}
1049 
1050 	/* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
1051 	n = x.used - 1;
1052 	t = y.used - 1;
1053 
1054 	/* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
1055 	if ((res = lshift_digits(&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
1056 		goto LBL_Y;
1057 	}
1058 
1059 	while (signed_compare(&x, &y) != MP_LT) {
1060 		++(q.dp[n - t]);
1061 		if ((res = signed_subtract(&x, &y, &x)) != MP_OKAY) {
1062 			goto LBL_Y;
1063 		}
1064 	}
1065 
1066 	/* reset y by shifting it back down */
1067 	rshift_digits(&y, n - t);
1068 
1069 	/* step 3. for i from n down to (t + 1) */
1070 	for (i = n; i >= (t + 1); i--) {
1071 		if (i > x.used) {
1072 			continue;
1073 		}
1074 
1075 		/* step 3.1 if xi == yt then set q{i-t-1} to b-1,
1076 		* otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
1077 		if (x.dp[i] == y.dp[t]) {
1078 			q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
1079 		} else {
1080 			mp_word tmp;
1081 			tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
1082 			tmp |= ((mp_word) x.dp[i - 1]);
1083 			tmp /= ((mp_word) y.dp[t]);
1084 			if (tmp > (mp_word) MP_MASK) {
1085 				tmp = MP_MASK;
1086 			}
1087 			q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
1088 		}
1089 
1090 		/* while (q{i-t-1} * (yt * b + y{t-1})) >
1091 		     xi * b**2 + xi-1 * b + xi-2
1092 			do q{i-t-1} -= 1;
1093 		*/
1094 		q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
1095 		do {
1096 			q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
1097 
1098 			/* find left hand */
1099 			mp_zero(&t1);
1100 			t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
1101 			t1.dp[1] = y.dp[t];
1102 			t1.used = 2;
1103 			if ((res = multiply_digit(&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1104 				goto LBL_Y;
1105 			}
1106 
1107 			/* find right hand */
1108 			t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
1109 			t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
1110 			t2.dp[2] = x.dp[i];
1111 			t2.used = 3;
1112 		} while (compare_magnitude(&t1, &t2) == MP_GT);
1113 
1114 		/* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
1115 		if ((res = multiply_digit(&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
1116 			goto LBL_Y;
1117 		}
1118 
1119 		if ((res = lshift_digits(&t1, i - t - 1)) != MP_OKAY) {
1120 			goto LBL_Y;
1121 		}
1122 
1123 		if ((res = signed_subtract(&x, &t1, &x)) != MP_OKAY) {
1124 			goto LBL_Y;
1125 		}
1126 
1127 		/* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
1128 		if (x.sign == MP_NEG) {
1129 			if ((res = mp_copy(&y, &t1)) != MP_OKAY) {
1130 				goto LBL_Y;
1131 			}
1132 			if ((res = lshift_digits(&t1, i - t - 1)) != MP_OKAY) {
1133 				goto LBL_Y;
1134 			}
1135 			if ((res = signed_add(&x, &t1, &x)) != MP_OKAY) {
1136 				goto LBL_Y;
1137 			}
1138 
1139 			q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
1140 		}
1141 	}
1142 
1143 	/* now q is the quotient and x is the remainder
1144 	* [which we have to normalize]
1145 	*/
1146 
1147 	/* get sign before writing to c */
1148 	x.sign = x.used == 0 ? MP_ZPOS : a->sign;
1149 
1150 	if (c != NULL) {
1151 		trim_unused_digits(&q);
1152 		mp_exch(&q, c);
1153 		c->sign = neg;
1154 	}
1155 
1156 	if (d != NULL) {
1157 		rshift_bits(&x, norm, &x, NULL);
1158 		mp_exch(&x, d);
1159 	}
1160 
1161 	res = MP_OKAY;
1162 
1163 LBL_Y:
1164 	mp_clear(&y);
1165 LBL_X:
1166 	mp_clear(&x);
1167 LBL_T2:
1168 	mp_clear(&t2);
1169 LBL_T1:
1170 	mp_clear(&t1);
1171 LBL_Q:
1172 	mp_clear(&q);
1173 	return res;
1174 }
1175 
1176 /* c = a mod b, 0 <= c < b */
1177 static int
modulo(mp_int * a,mp_int * b,mp_int * c)1178 modulo(mp_int * a, mp_int * b, mp_int * c)
1179 {
1180 	mp_int  t;
1181 	int     res;
1182 
1183 	if ((res = mp_init(&t)) != MP_OKAY) {
1184 		return res;
1185 	}
1186 
1187 	if ((res = signed_divide(NULL, &t, a, b)) != MP_OKAY) {
1188 		mp_clear(&t);
1189 		return res;
1190 	}
1191 
1192 	if (t.sign != b->sign) {
1193 		res = signed_add(b, &t, c);
1194 	} else {
1195 		res = MP_OKAY;
1196 		mp_exch(&t, c);
1197 	}
1198 
1199 	mp_clear(&t);
1200 	return res;
1201 }
1202 
1203 /* set to a digit */
1204 static void
set_word(mp_int * a,mp_digit b)1205 set_word(mp_int * a, mp_digit b)
1206 {
1207 	mp_zero(a);
1208 	a->dp[0] = b & MP_MASK;
1209 	a->used = (a->dp[0] != 0) ? 1 : 0;
1210 }
1211 
1212 /* b = a/2 */
1213 static int
half(mp_int * a,mp_int * b)1214 half(mp_int * a, mp_int * b)
1215 {
1216 	int     x, res, oldused;
1217 
1218 	/* copy */
1219 	if (b->alloc < a->used) {
1220 		if ((res = mp_grow(b, a->used)) != MP_OKAY) {
1221 			return res;
1222 		}
1223 	}
1224 
1225 	oldused = b->used;
1226 	b->used = a->used;
1227 	{
1228 		mp_digit r, rr, *tmpa, *tmpb;
1229 
1230 		/* source alias */
1231 		tmpa = a->dp + b->used - 1;
1232 
1233 		/* dest alias */
1234 		tmpb = b->dp + b->used - 1;
1235 
1236 		/* carry */
1237 		r = 0;
1238 		for (x = b->used - 1; x >= 0; x--) {
1239 			/* get the carry for the next iteration */
1240 			rr = *tmpa & 1;
1241 
1242 			/* shift the current digit, add in carry and store */
1243 			*tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
1244 
1245 			/* forward carry to next iteration */
1246 			r = rr;
1247 		}
1248 
1249 		/* zero excess digits */
1250 		tmpb = b->dp + b->used;
1251 		for (x = b->used; x < oldused; x++) {
1252 			*tmpb++ = 0;
1253 		}
1254 	}
1255 	b->sign = a->sign;
1256 	trim_unused_digits(b);
1257 	return MP_OKAY;
1258 }
1259 
1260 /* compare a digit */
1261 static int
compare_digit(mp_int * a,mp_digit b)1262 compare_digit(mp_int * a, mp_digit b)
1263 {
1264 	/* compare based on sign */
1265 	if (a->sign == MP_NEG) {
1266 		return MP_LT;
1267 	}
1268 
1269 	/* compare based on magnitude */
1270 	if (a->used > 1) {
1271 		return MP_GT;
1272 	}
1273 
1274 	/* compare the only digit of a to b */
1275 	if (a->dp[0] > b) {
1276 		return MP_GT;
1277 	} else if (a->dp[0] < b) {
1278 		return MP_LT;
1279 	} else {
1280 		return MP_EQ;
1281 	}
1282 }
1283 
1284 static void
mp_clear_multi(mp_int * mp,...)1285 mp_clear_multi(mp_int *mp, ...)
1286 {
1287 	mp_int* next_mp = mp;
1288 	va_list args;
1289 
1290 	va_start(args, mp);
1291 	while (next_mp != NULL) {
1292 		mp_clear(next_mp);
1293 		next_mp = va_arg(args, mp_int*);
1294 	}
1295 	va_end(args);
1296 }
1297 
1298 /* computes the modular inverse via binary extended euclidean algorithm,
1299  * that is c = 1/a mod b
1300  *
1301  * Based on slow invmod except this is optimized for the case where b is
1302  * odd as per HAC Note 14.64 on pp. 610
1303  */
1304 static int
fast_modular_inverse(mp_int * a,mp_int * b,mp_int * c)1305 fast_modular_inverse(mp_int * a, mp_int * b, mp_int * c)
1306 {
1307 	mp_int  x, y, u, v, B, D;
1308 	int     res, neg;
1309 
1310 	/* 2. [modified] b must be odd   */
1311 	if (MP_ISZERO(b) == MP_YES) {
1312 		return MP_VAL;
1313 	}
1314 
1315 	/* init all our temps */
1316 	if ((res = mp_init_multi(&x, &y, &u, &v, &B, &D, NULL)) != MP_OKAY) {
1317 		return res;
1318 	}
1319 
1320 	/* x == modulus, y == value to invert */
1321 	if ((res = mp_copy(b, &x)) != MP_OKAY) {
1322 		goto LBL_ERR;
1323 	}
1324 
1325 	/* we need y = |a| */
1326 	if ((res = modulo(a, b, &y)) != MP_OKAY) {
1327 		goto LBL_ERR;
1328 	}
1329 
1330 	/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1331 	if ((res = mp_copy(&x, &u)) != MP_OKAY) {
1332 		goto LBL_ERR;
1333 	}
1334 	if ((res = mp_copy(&y, &v)) != MP_OKAY) {
1335 		goto LBL_ERR;
1336 	}
1337 	set_word(&D, 1);
1338 
1339 top:
1340 	/* 4.  while u is even do */
1341 	while (PGPV_BN_is_even(&u) == 1) {
1342 		/* 4.1 u = u/2 */
1343 		if ((res = half(&u, &u)) != MP_OKAY) {
1344 			goto LBL_ERR;
1345 		}
1346 		/* 4.2 if B is odd then */
1347 		if (PGPV_BN_is_odd(&B) == 1) {
1348 			if ((res = signed_subtract(&B, &x, &B)) != MP_OKAY) {
1349 				goto LBL_ERR;
1350 			}
1351 		}
1352 		/* B = B/2 */
1353 		if ((res = half(&B, &B)) != MP_OKAY) {
1354 			goto LBL_ERR;
1355 		}
1356 	}
1357 
1358 	/* 5.  while v is even do */
1359 	while (PGPV_BN_is_even(&v) == 1) {
1360 		/* 5.1 v = v/2 */
1361 		if ((res = half(&v, &v)) != MP_OKAY) {
1362 			goto LBL_ERR;
1363 		}
1364 		/* 5.2 if D is odd then */
1365 		if (PGPV_BN_is_odd(&D) == 1) {
1366 			/* D = (D-x)/2 */
1367 			if ((res = signed_subtract(&D, &x, &D)) != MP_OKAY) {
1368 				goto LBL_ERR;
1369 			}
1370 		}
1371 		/* D = D/2 */
1372 		if ((res = half(&D, &D)) != MP_OKAY) {
1373 			goto LBL_ERR;
1374 		}
1375 	}
1376 
1377 	/* 6.  if u >= v then */
1378 	if (signed_compare(&u, &v) != MP_LT) {
1379 		/* u = u - v, B = B - D */
1380 		if ((res = signed_subtract(&u, &v, &u)) != MP_OKAY) {
1381 			goto LBL_ERR;
1382 		}
1383 
1384 		if ((res = signed_subtract(&B, &D, &B)) != MP_OKAY) {
1385 			goto LBL_ERR;
1386 		}
1387 	} else {
1388 		/* v - v - u, D = D - B */
1389 		if ((res = signed_subtract(&v, &u, &v)) != MP_OKAY) {
1390 			goto LBL_ERR;
1391 		}
1392 
1393 		if ((res = signed_subtract(&D, &B, &D)) != MP_OKAY) {
1394 			goto LBL_ERR;
1395 		}
1396 	}
1397 
1398 	/* if not zero goto step 4 */
1399 	if (MP_ISZERO(&u) == MP_NO) {
1400 		goto top;
1401 	}
1402 
1403 	/* now a = C, b = D, gcd == g*v */
1404 
1405 	/* if v != 1 then there is no inverse */
1406 	if (compare_digit(&v, 1) != MP_EQ) {
1407 		res = MP_VAL;
1408 		goto LBL_ERR;
1409 	}
1410 
1411 	/* b is now the inverse */
1412 	neg = a->sign;
1413 	while (D.sign == MP_NEG) {
1414 		if ((res = signed_add(&D, b, &D)) != MP_OKAY) {
1415 			goto LBL_ERR;
1416 		}
1417 	}
1418 	mp_exch(&D, c);
1419 	c->sign = neg;
1420 	res = MP_OKAY;
1421 
1422 LBL_ERR:
1423 	mp_clear_multi (&x, &y, &u, &v, &B, &D, NULL);
1424 	return res;
1425 }
1426 
1427 /* hac 14.61, pp608 */
1428 static int
slow_modular_inverse(mp_int * a,mp_int * b,mp_int * c)1429 slow_modular_inverse(mp_int * a, mp_int * b, mp_int * c)
1430 {
1431 	mp_int  x, y, u, v, A, B, C, D;
1432 	int     res;
1433 
1434 	/* b cannot be negative */
1435 	if (b->sign == MP_NEG || MP_ISZERO(b) == MP_YES) {
1436 		return MP_VAL;
1437 	}
1438 
1439 	/* init temps */
1440 	if ((res = mp_init_multi(&x, &y, &u, &v,
1441 		   &A, &B, &C, &D, NULL)) != MP_OKAY) {
1442 		return res;
1443 	}
1444 
1445 	/* x = a, y = b */
1446 	if ((res = modulo(a, b, &x)) != MP_OKAY) {
1447 		goto LBL_ERR;
1448 	}
1449 	if ((res = mp_copy(b, &y)) != MP_OKAY) {
1450 		goto LBL_ERR;
1451 	}
1452 
1453 	/* 2. [modified] if x,y are both even then return an error! */
1454 	if (PGPV_BN_is_even(&x) == 1 && PGPV_BN_is_even(&y) == 1) {
1455 		res = MP_VAL;
1456 		goto LBL_ERR;
1457 	}
1458 
1459 	/* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
1460 	if ((res = mp_copy(&x, &u)) != MP_OKAY) {
1461 		goto LBL_ERR;
1462 	}
1463 	if ((res = mp_copy(&y, &v)) != MP_OKAY) {
1464 		goto LBL_ERR;
1465 	}
1466 	set_word(&A, 1);
1467 	set_word(&D, 1);
1468 
1469 top:
1470 	/* 4.  while u is even do */
1471 	while (PGPV_BN_is_even(&u) == 1) {
1472 		/* 4.1 u = u/2 */
1473 		if ((res = half(&u, &u)) != MP_OKAY) {
1474 			goto LBL_ERR;
1475 		}
1476 		/* 4.2 if A or B is odd then */
1477 		if (PGPV_BN_is_odd(&A) == 1 || PGPV_BN_is_odd(&B) == 1) {
1478 			/* A = (A+y)/2, B = (B-x)/2 */
1479 			if ((res = signed_add(&A, &y, &A)) != MP_OKAY) {
1480 				 goto LBL_ERR;
1481 			}
1482 			if ((res = signed_subtract(&B, &x, &B)) != MP_OKAY) {
1483 				 goto LBL_ERR;
1484 			}
1485 		}
1486 		/* A = A/2, B = B/2 */
1487 		if ((res = half(&A, &A)) != MP_OKAY) {
1488 			goto LBL_ERR;
1489 		}
1490 		if ((res = half(&B, &B)) != MP_OKAY) {
1491 			goto LBL_ERR;
1492 		}
1493 	}
1494 
1495 	/* 5.  while v is even do */
1496 	while (PGPV_BN_is_even(&v) == 1) {
1497 		/* 5.1 v = v/2 */
1498 		if ((res = half(&v, &v)) != MP_OKAY) {
1499 			goto LBL_ERR;
1500 		}
1501 		/* 5.2 if C or D is odd then */
1502 		if (PGPV_BN_is_odd(&C) == 1 || PGPV_BN_is_odd(&D) == 1) {
1503 			/* C = (C+y)/2, D = (D-x)/2 */
1504 			if ((res = signed_add(&C, &y, &C)) != MP_OKAY) {
1505 				 goto LBL_ERR;
1506 			}
1507 			if ((res = signed_subtract(&D, &x, &D)) != MP_OKAY) {
1508 				 goto LBL_ERR;
1509 			}
1510 		}
1511 		/* C = C/2, D = D/2 */
1512 		if ((res = half(&C, &C)) != MP_OKAY) {
1513 			goto LBL_ERR;
1514 		}
1515 		if ((res = half(&D, &D)) != MP_OKAY) {
1516 			goto LBL_ERR;
1517 		}
1518 	}
1519 
1520 	/* 6.  if u >= v then */
1521 	if (signed_compare(&u, &v) != MP_LT) {
1522 		/* u = u - v, A = A - C, B = B - D */
1523 		if ((res = signed_subtract(&u, &v, &u)) != MP_OKAY) {
1524 			goto LBL_ERR;
1525 		}
1526 
1527 		if ((res = signed_subtract(&A, &C, &A)) != MP_OKAY) {
1528 			goto LBL_ERR;
1529 		}
1530 
1531 		if ((res = signed_subtract(&B, &D, &B)) != MP_OKAY) {
1532 			goto LBL_ERR;
1533 		}
1534 	} else {
1535 		/* v - v - u, C = C - A, D = D - B */
1536 		if ((res = signed_subtract(&v, &u, &v)) != MP_OKAY) {
1537 			goto LBL_ERR;
1538 		}
1539 
1540 		if ((res = signed_subtract(&C, &A, &C)) != MP_OKAY) {
1541 			goto LBL_ERR;
1542 		}
1543 
1544 		if ((res = signed_subtract(&D, &B, &D)) != MP_OKAY) {
1545 			goto LBL_ERR;
1546 		}
1547 	}
1548 
1549 	/* if not zero goto step 4 */
1550 	if (PGPV_BN_is_zero(&u) == 0) {
1551 		goto top;
1552 	}
1553 	/* now a = C, b = D, gcd == g*v */
1554 
1555 	/* if v != 1 then there is no inverse */
1556 	if (compare_digit(&v, 1) != MP_EQ) {
1557 		res = MP_VAL;
1558 		goto LBL_ERR;
1559 	}
1560 
1561 	/* if its too low */
1562 	while (compare_digit(&C, 0) == MP_LT) {
1563 		if ((res = signed_add(&C, b, &C)) != MP_OKAY) {
1564 			 goto LBL_ERR;
1565 		}
1566 	}
1567 
1568 	/* too big */
1569 	while (compare_magnitude(&C, b) != MP_LT) {
1570 		if ((res = signed_subtract(&C, b, &C)) != MP_OKAY) {
1571 			 goto LBL_ERR;
1572 		}
1573 	}
1574 
1575 	/* C is now the inverse */
1576 	mp_exch(&C, c);
1577 	res = MP_OKAY;
1578 LBL_ERR:
1579 	mp_clear_multi(&x, &y, &u, &v, &A, &B, &C, &D, NULL);
1580 	return res;
1581 }
1582 
1583 static int
modular_inverse(mp_int * c,mp_int * a,mp_int * b)1584 modular_inverse(mp_int *c, mp_int *a, mp_int *b)
1585 {
1586 	/* b cannot be negative */
1587 	if (b->sign == MP_NEG || MP_ISZERO(b) == MP_YES) {
1588 		return MP_VAL;
1589 	}
1590 
1591 	/* if the modulus is odd we can use a faster routine instead */
1592 	if (PGPV_BN_is_odd(b) == 1) {
1593 		return fast_modular_inverse(a, b, c);
1594 	}
1595 	return slow_modular_inverse(a, b, c);
1596 }
1597 
1598 /* b = |a|
1599  *
1600  * Simple function copies the input and fixes the sign to positive
1601  */
1602 static int
absolute(mp_int * a,mp_int * b)1603 absolute(mp_int * a, mp_int * b)
1604 {
1605 	int     res;
1606 
1607 	/* copy a to b */
1608 	if (a != b) {
1609 		if ((res = mp_copy(a, b)) != MP_OKAY) {
1610 			return res;
1611 		}
1612 	}
1613 
1614 	/* force the sign of b to positive */
1615 	b->sign = MP_ZPOS;
1616 
1617 	return MP_OKAY;
1618 }
1619 
1620 /* determines if reduce_2k_l can be used */
1621 static int
mp_reduce_is_2k_l(mp_int * a)1622 mp_reduce_is_2k_l(mp_int *a)
1623 {
1624 	int ix, iy;
1625 
1626 	if (a->used == 0) {
1627 		return MP_NO;
1628 	} else if (a->used == 1) {
1629 		return MP_YES;
1630 	} else if (a->used > 1) {
1631 		/* if more than half of the digits are -1 we're sold */
1632 		for (iy = ix = 0; ix < a->used; ix++) {
1633 			if (a->dp[ix] == MP_MASK) {
1634 				++iy;
1635 			}
1636 		}
1637 		return (iy >= (a->used/2)) ? MP_YES : MP_NO;
1638 
1639 	}
1640 	return MP_NO;
1641 }
1642 
1643 /* computes a = 2**b
1644  *
1645  * Simple algorithm which zeroes the int, grows it then just sets one bit
1646  * as required.
1647  */
1648 static int
mp_2expt(mp_int * a,int b)1649 mp_2expt(mp_int * a, int b)
1650 {
1651 	int     res;
1652 
1653 	/* zero a as per default */
1654 	mp_zero(a);
1655 
1656 	/* grow a to accomodate the single bit */
1657 	if ((res = mp_grow(a, b / DIGIT_BIT + 1)) != MP_OKAY) {
1658 		return res;
1659 	}
1660 
1661 	/* set the used count of where the bit will go */
1662 	a->used = b / DIGIT_BIT + 1;
1663 
1664 	/* put the single bit in its place */
1665 	a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
1666 
1667 	return MP_OKAY;
1668 }
1669 
1670 /* pre-calculate the value required for Barrett reduction
1671  * For a given modulus "b" it calulates the value required in "a"
1672  */
1673 static int
mp_reduce_setup(mp_int * a,mp_int * b)1674 mp_reduce_setup(mp_int * a, mp_int * b)
1675 {
1676 	int     res;
1677 
1678 	if ((res = mp_2expt(a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
1679 		return res;
1680 	}
1681 	return signed_divide(a, NULL, a, b);
1682 }
1683 
1684 /* b = a*2 */
1685 static int
doubled(mp_int * a,mp_int * b)1686 doubled(mp_int * a, mp_int * b)
1687 {
1688 	int     x, res, oldused;
1689 
1690 	/* grow to accomodate result */
1691 	if (b->alloc < a->used + 1) {
1692 		if ((res = mp_grow(b, a->used + 1)) != MP_OKAY) {
1693 			return res;
1694 		}
1695 	}
1696 
1697 	oldused = b->used;
1698 	b->used = a->used;
1699 
1700 	{
1701 		mp_digit r, rr, *tmpa, *tmpb;
1702 
1703 		/* alias for source */
1704 		tmpa = a->dp;
1705 
1706 		/* alias for dest */
1707 		tmpb = b->dp;
1708 
1709 		/* carry */
1710 		r = 0;
1711 		for (x = 0; x < a->used; x++) {
1712 
1713 			/* get what will be the *next* carry bit from the
1714 			* MSB of the current digit
1715 			*/
1716 			rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
1717 
1718 			/* now shift up this digit, add in the carry [from the previous] */
1719 			*tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
1720 
1721 			/* copy the carry that would be from the source
1722 			* digit into the next iteration
1723 			*/
1724 			r = rr;
1725 		}
1726 
1727 		/* new leading digit? */
1728 		if (r != 0) {
1729 			/* add a MSB which is always 1 at this point */
1730 			*tmpb = 1;
1731 			++(b->used);
1732 		}
1733 
1734 		/* now zero any excess digits on the destination
1735 		* that we didn't write to
1736 		*/
1737 		tmpb = b->dp + b->used;
1738 		for (x = b->used; x < oldused; x++) {
1739 			*tmpb++ = 0;
1740 		}
1741 	}
1742 	b->sign = a->sign;
1743 	return MP_OKAY;
1744 }
1745 
1746 /* divide by three (based on routine from MPI and the GMP manual) */
1747 static int
third(mp_int * a,mp_int * c,mp_digit * d)1748 third(mp_int * a, mp_int *c, mp_digit * d)
1749 {
1750 	mp_int   q;
1751 	mp_word  w, t;
1752 	mp_digit b;
1753 	int      res, ix;
1754 
1755 	/* b = 2**DIGIT_BIT / 3 */
1756 	b = (((mp_word)1) << ((mp_word)DIGIT_BIT)) / ((mp_word)3);
1757 
1758 	if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
1759 		return res;
1760 	}
1761 
1762 	q.used = a->used;
1763 	q.sign = a->sign;
1764 	w = 0;
1765 	for (ix = a->used - 1; ix >= 0; ix--) {
1766 		w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
1767 
1768 		if (w >= 3) {
1769 			/* multiply w by [1/3] */
1770 			t = (w * ((mp_word)b)) >> ((mp_word)DIGIT_BIT);
1771 
1772 			/* now subtract 3 * [w/3] from w, to get the remainder */
1773 			w -= t+t+t;
1774 
1775 			/* fixup the remainder as required since
1776 			* the optimization is not exact.
1777 			*/
1778 			while (w >= 3) {
1779 				t += 1;
1780 				w -= 3;
1781 			}
1782 		} else {
1783 			t = 0;
1784 		}
1785 		q.dp[ix] = (mp_digit)t;
1786 	}
1787 
1788 	/* [optional] store the remainder */
1789 	if (d != NULL) {
1790 		*d = (mp_digit)w;
1791 	}
1792 
1793 	/* [optional] store the quotient */
1794 	if (c != NULL) {
1795 		trim_unused_digits(&q);
1796 		mp_exch(&q, c);
1797 	}
1798 	mp_clear(&q);
1799 
1800 	return res;
1801 }
1802 
1803 /* multiplication using the Toom-Cook 3-way algorithm
1804  *
1805  * Much more complicated than Karatsuba but has a lower
1806  * asymptotic running time of O(N**1.464).  This algorithm is
1807  * only particularly useful on VERY large inputs
1808  * (we're talking 1000s of digits here...).
1809 */
1810 static int
toom_cook_multiply(mp_int * a,mp_int * b,mp_int * c)1811 toom_cook_multiply(mp_int *a, mp_int *b, mp_int *c)
1812 {
1813 	mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2;
1814 	int res, B;
1815 
1816 	/* init temps */
1817 	if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4,
1818 			&a0, &a1, &a2, &b0, &b1,
1819 			&b2, &tmp1, &tmp2, NULL)) != MP_OKAY) {
1820 		return res;
1821 	}
1822 
1823 	/* B */
1824 	B = MIN(a->used, b->used) / 3;
1825 
1826 	/* a = a2 * B**2 + a1 * B + a0 */
1827 	if ((res = modulo_2_to_power(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
1828 		goto ERR;
1829 	}
1830 
1831 	if ((res = mp_copy(a, &a1)) != MP_OKAY) {
1832 		goto ERR;
1833 	}
1834 	rshift_digits(&a1, B);
1835 	modulo_2_to_power(&a1, DIGIT_BIT * B, &a1);
1836 
1837 	if ((res = mp_copy(a, &a2)) != MP_OKAY) {
1838 		goto ERR;
1839 	}
1840 	rshift_digits(&a2, B*2);
1841 
1842 	/* b = b2 * B**2 + b1 * B + b0 */
1843 	if ((res = modulo_2_to_power(b, DIGIT_BIT * B, &b0)) != MP_OKAY) {
1844 		goto ERR;
1845 	}
1846 
1847 	if ((res = mp_copy(b, &b1)) != MP_OKAY) {
1848 		goto ERR;
1849 	}
1850 	rshift_digits(&b1, B);
1851 	modulo_2_to_power(&b1, DIGIT_BIT * B, &b1);
1852 
1853 	if ((res = mp_copy(b, &b2)) != MP_OKAY) {
1854 		goto ERR;
1855 	}
1856 	rshift_digits(&b2, B*2);
1857 
1858 	/* w0 = a0*b0 */
1859 	if ((res = signed_multiply(&a0, &b0, &w0)) != MP_OKAY) {
1860 		goto ERR;
1861 	}
1862 
1863 	/* w4 = a2 * b2 */
1864 	if ((res = signed_multiply(&a2, &b2, &w4)) != MP_OKAY) {
1865 		goto ERR;
1866 	}
1867 
1868 	/* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */
1869 	if ((res = doubled(&a0, &tmp1)) != MP_OKAY) {
1870 		goto ERR;
1871 	}
1872 	if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
1873 		goto ERR;
1874 	}
1875 	if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) {
1876 		goto ERR;
1877 	}
1878 	if ((res = signed_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
1879 		goto ERR;
1880 	}
1881 
1882 	if ((res = doubled(&b0, &tmp2)) != MP_OKAY) {
1883 		goto ERR;
1884 	}
1885 	if ((res = signed_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
1886 		goto ERR;
1887 	}
1888 	if ((res = doubled(&tmp2, &tmp2)) != MP_OKAY) {
1889 		goto ERR;
1890 	}
1891 	if ((res = signed_add(&tmp2, &b2, &tmp2)) != MP_OKAY) {
1892 		goto ERR;
1893 	}
1894 
1895 	if ((res = signed_multiply(&tmp1, &tmp2, &w1)) != MP_OKAY) {
1896 		goto ERR;
1897 	}
1898 
1899 	/* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */
1900 	if ((res = doubled(&a2, &tmp1)) != MP_OKAY) {
1901 		goto ERR;
1902 	}
1903 	if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
1904 		goto ERR;
1905 	}
1906 	if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) {
1907 		goto ERR;
1908 	}
1909 	if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
1910 		goto ERR;
1911 	}
1912 
1913 	if ((res = doubled(&b2, &tmp2)) != MP_OKAY) {
1914 		goto ERR;
1915 	}
1916 	if ((res = signed_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
1917 		goto ERR;
1918 	}
1919 	if ((res = doubled(&tmp2, &tmp2)) != MP_OKAY) {
1920 		goto ERR;
1921 	}
1922 	if ((res = signed_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
1923 		goto ERR;
1924 	}
1925 
1926 	if ((res = signed_multiply(&tmp1, &tmp2, &w3)) != MP_OKAY) {
1927 		goto ERR;
1928 	}
1929 
1930 
1931 	/* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */
1932 	if ((res = signed_add(&a2, &a1, &tmp1)) != MP_OKAY) {
1933 		goto ERR;
1934 	}
1935 	if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
1936 		goto ERR;
1937 	}
1938 	if ((res = signed_add(&b2, &b1, &tmp2)) != MP_OKAY) {
1939 		goto ERR;
1940 	}
1941 	if ((res = signed_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
1942 		goto ERR;
1943 	}
1944 	if ((res = signed_multiply(&tmp1, &tmp2, &w2)) != MP_OKAY) {
1945 		goto ERR;
1946 	}
1947 
1948 	/* now solve the matrix
1949 
1950 	0  0  0  0  1
1951 	1  2  4  8  16
1952 	1  1  1  1  1
1953 	16 8  4  2  1
1954 	1  0  0  0  0
1955 
1956 	using 12 subtractions, 4 shifts,
1957 	2 small divisions and 1 small multiplication
1958 	*/
1959 
1960 	/* r1 - r4 */
1961 	if ((res = signed_subtract(&w1, &w4, &w1)) != MP_OKAY) {
1962 		goto ERR;
1963 	}
1964 	/* r3 - r0 */
1965 	if ((res = signed_subtract(&w3, &w0, &w3)) != MP_OKAY) {
1966 		goto ERR;
1967 	}
1968 	/* r1/2 */
1969 	if ((res = half(&w1, &w1)) != MP_OKAY) {
1970 		goto ERR;
1971 	}
1972 	/* r3/2 */
1973 	if ((res = half(&w3, &w3)) != MP_OKAY) {
1974 		goto ERR;
1975 	}
1976 	/* r2 - r0 - r4 */
1977 	if ((res = signed_subtract(&w2, &w0, &w2)) != MP_OKAY) {
1978 		goto ERR;
1979 	}
1980 	if ((res = signed_subtract(&w2, &w4, &w2)) != MP_OKAY) {
1981 		goto ERR;
1982 	}
1983 	/* r1 - r2 */
1984 	if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) {
1985 		goto ERR;
1986 	}
1987 	/* r3 - r2 */
1988 	if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) {
1989 		goto ERR;
1990 	}
1991 	/* r1 - 8r0 */
1992 	if ((res = lshift_bits(&w0, 3, &tmp1)) != MP_OKAY) {
1993 		goto ERR;
1994 	}
1995 	if ((res = signed_subtract(&w1, &tmp1, &w1)) != MP_OKAY) {
1996 		goto ERR;
1997 	}
1998 	/* r3 - 8r4 */
1999 	if ((res = lshift_bits(&w4, 3, &tmp1)) != MP_OKAY) {
2000 		goto ERR;
2001 	}
2002 	if ((res = signed_subtract(&w3, &tmp1, &w3)) != MP_OKAY) {
2003 		goto ERR;
2004 	}
2005 	/* 3r2 - r1 - r3 */
2006 	if ((res = multiply_digit(&w2, 3, &w2)) != MP_OKAY) {
2007 		goto ERR;
2008 	}
2009 	if ((res = signed_subtract(&w2, &w1, &w2)) != MP_OKAY) {
2010 		goto ERR;
2011 	}
2012 	if ((res = signed_subtract(&w2, &w3, &w2)) != MP_OKAY) {
2013 		goto ERR;
2014 	}
2015 	/* r1 - r2 */
2016 	if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) {
2017 		goto ERR;
2018 	}
2019 	/* r3 - r2 */
2020 	if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) {
2021 		goto ERR;
2022 	}
2023 	/* r1/3 */
2024 	if ((res = third(&w1, &w1, NULL)) != MP_OKAY) {
2025 		goto ERR;
2026 	}
2027 	/* r3/3 */
2028 	if ((res = third(&w3, &w3, NULL)) != MP_OKAY) {
2029 		goto ERR;
2030 	}
2031 
2032 	/* at this point shift W[n] by B*n */
2033 	if ((res = lshift_digits(&w1, 1*B)) != MP_OKAY) {
2034 		goto ERR;
2035 	}
2036 	if ((res = lshift_digits(&w2, 2*B)) != MP_OKAY) {
2037 		goto ERR;
2038 	}
2039 	if ((res = lshift_digits(&w3, 3*B)) != MP_OKAY) {
2040 		goto ERR;
2041 	}
2042 	if ((res = lshift_digits(&w4, 4*B)) != MP_OKAY) {
2043 		goto ERR;
2044 	}
2045 
2046 	if ((res = signed_add(&w0, &w1, c)) != MP_OKAY) {
2047 		goto ERR;
2048 	}
2049 	if ((res = signed_add(&w2, &w3, &tmp1)) != MP_OKAY) {
2050 		goto ERR;
2051 	}
2052 	if ((res = signed_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
2053 		goto ERR;
2054 	}
2055 	if ((res = signed_add(&tmp1, c, c)) != MP_OKAY) {
2056 		goto ERR;
2057 	}
2058 
2059 ERR:
2060 	mp_clear_multi(&w0, &w1, &w2, &w3, &w4,
2061 		&a0, &a1, &a2, &b0, &b1,
2062 		&b2, &tmp1, &tmp2, NULL);
2063 	return res;
2064 }
2065 
2066 #define TOOM_MUL_CUTOFF	350
2067 #define KARATSUBA_MUL_CUTOFF 80
2068 
2069 /* c = |a| * |b| using Karatsuba Multiplication using
2070  * three half size multiplications
2071  *
2072  * Let B represent the radix [e.g. 2**DIGIT_BIT] and
2073  * let n represent half of the number of digits in
2074  * the min(a,b)
2075  *
2076  * a = a1 * B**n + a0
2077  * b = b1 * B**n + b0
2078  *
2079  * Then, a * b =>
2080    a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0
2081  *
2082  * Note that a1b1 and a0b0 are used twice and only need to be
2083  * computed once.  So in total three half size (half # of
2084  * digit) multiplications are performed, a0b0, a1b1 and
2085  * (a1+b1)(a0+b0)
2086  *
2087  * Note that a multiplication of half the digits requires
2088  * 1/4th the number of single precision multiplications so in
2089  * total after one call 25% of the single precision multiplications
2090  * are saved.  Note also that the call to signed_multiply can end up back
2091  * in this function if the a0, a1, b0, or b1 are above the threshold.
2092  * This is known as divide-and-conquer and leads to the famous
2093  * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than
2094  * the standard O(N**2) that the baseline/comba methods use.
2095  * Generally though the overhead of this method doesn't pay off
2096  * until a certain size (N ~ 80) is reached.
2097  */
2098 static int
karatsuba_multiply(mp_int * a,mp_int * b,mp_int * c)2099 karatsuba_multiply(mp_int * a, mp_int * b, mp_int * c)
2100 {
2101 	mp_int  x0, x1, y0, y1, t1, x0y0, x1y1;
2102 	int     B;
2103 	int     err;
2104 
2105 	/* default the return code to an error */
2106 	err = MP_MEM;
2107 
2108 	/* min # of digits */
2109 	B = MIN(a->used, b->used);
2110 
2111 	/* now divide in two */
2112 	B = (int)((unsigned)B >> 1);
2113 
2114 	/* init copy all the temps */
2115 	if (mp_init_size(&x0, B) != MP_OKAY) {
2116 		goto ERR;
2117 	}
2118 	if (mp_init_size(&x1, a->used - B) != MP_OKAY) {
2119 		goto X0;
2120 	}
2121 	if (mp_init_size(&y0, B) != MP_OKAY) {
2122 		goto X1;
2123 	}
2124 	if (mp_init_size(&y1, b->used - B) != MP_OKAY) {
2125 		goto Y0;
2126 	}
2127 	/* init temps */
2128 	if (mp_init_size(&t1, B * 2) != MP_OKAY) {
2129 		goto Y1;
2130 	}
2131 	if (mp_init_size(&x0y0, B * 2) != MP_OKAY) {
2132 		goto T1;
2133 	}
2134 	if (mp_init_size(&x1y1, B * 2) != MP_OKAY) {
2135 		goto X0Y0;
2136 	}
2137 	/* now shift the digits */
2138 	x0.used = y0.used = B;
2139 	x1.used = a->used - B;
2140 	y1.used = b->used - B;
2141 
2142 	{
2143 		int x;
2144 		mp_digit *tmpa, *tmpb, *tmpx, *tmpy;
2145 
2146 		/* we copy the digits directly instead of using higher level functions
2147 		* since we also need to shift the digits
2148 		*/
2149 		tmpa = a->dp;
2150 		tmpb = b->dp;
2151 
2152 		tmpx = x0.dp;
2153 		tmpy = y0.dp;
2154 		for (x = 0; x < B; x++) {
2155 			*tmpx++ = *tmpa++;
2156 			*tmpy++ = *tmpb++;
2157 		}
2158 
2159 		tmpx = x1.dp;
2160 		for (x = B; x < a->used; x++) {
2161 			*tmpx++ = *tmpa++;
2162 		}
2163 
2164 		tmpy = y1.dp;
2165 		for (x = B; x < b->used; x++) {
2166 			*tmpy++ = *tmpb++;
2167 		}
2168 	}
2169 
2170 	/* only need to clamp the lower words since by definition the
2171 	* upper words x1/y1 must have a known number of digits
2172 	*/
2173 	trim_unused_digits(&x0);
2174 	trim_unused_digits(&y0);
2175 
2176 	/* now calc the products x0y0 and x1y1 */
2177 	/* after this x0 is no longer required, free temp [x0==t2]! */
2178 	if (signed_multiply(&x0, &y0, &x0y0) != MP_OKAY)  {
2179 		goto X1Y1;          /* x0y0 = x0*y0 */
2180 	}
2181 	if (signed_multiply(&x1, &y1, &x1y1) != MP_OKAY) {
2182 		goto X1Y1;          /* x1y1 = x1*y1 */
2183 	}
2184 	/* now calc x1+x0 and y1+y0 */
2185 	if (basic_add(&x1, &x0, &t1) != MP_OKAY) {
2186 		goto X1Y1;          /* t1 = x1 - x0 */
2187 	}
2188 	if (basic_add(&y1, &y0, &x0) != MP_OKAY) {
2189 		goto X1Y1;          /* t2 = y1 - y0 */
2190 	}
2191 	if (signed_multiply(&t1, &x0, &t1) != MP_OKAY) {
2192 		goto X1Y1;          /* t1 = (x1 + x0) * (y1 + y0) */
2193 	}
2194 	/* add x0y0 */
2195 	if (signed_add(&x0y0, &x1y1, &x0) != MP_OKAY) {
2196 		goto X1Y1;          /* t2 = x0y0 + x1y1 */
2197 	}
2198 	if (basic_subtract(&t1, &x0, &t1) != MP_OKAY) {
2199 		goto X1Y1;          /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */
2200 	}
2201 	/* shift by B */
2202 	if (lshift_digits(&t1, B) != MP_OKAY) {
2203 		goto X1Y1;          /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */
2204 	}
2205 	if (lshift_digits(&x1y1, B * 2) != MP_OKAY) {
2206 		goto X1Y1;          /* x1y1 = x1y1 << 2*B */
2207 	}
2208 	if (signed_add(&x0y0, &t1, &t1) != MP_OKAY) {
2209 		goto X1Y1;          /* t1 = x0y0 + t1 */
2210 	}
2211 	if (signed_add(&t1, &x1y1, c) != MP_OKAY) {
2212 		goto X1Y1;          /* t1 = x0y0 + t1 + x1y1 */
2213 	}
2214 	/* Algorithm succeeded set the return code to MP_OKAY */
2215 	err = MP_OKAY;
2216 
2217 X1Y1:
2218 	mp_clear(&x1y1);
2219 X0Y0:
2220 	mp_clear(&x0y0);
2221 T1:
2222 	mp_clear(&t1);
2223 Y1:
2224 	mp_clear(&y1);
2225 Y0:
2226 	mp_clear(&y0);
2227 X1:
2228 	mp_clear(&x1);
2229 X0:
2230 	mp_clear(&x0);
2231 ERR:
2232 	return err;
2233 }
2234 
2235 /* Fast (comba) multiplier
2236  *
2237  * This is the fast column-array [comba] multiplier.  It is
2238  * designed to compute the columns of the product first
2239  * then handle the carries afterwards.  This has the effect
2240  * of making the nested loops that compute the columns very
2241  * simple and schedulable on super-scalar processors.
2242  *
2243  * This has been modified to produce a variable number of
2244  * digits of output so if say only a half-product is required
2245  * you don't have to compute the upper half (a feature
2246  * required for fast Barrett reduction).
2247  *
2248  * Based on Algorithm 14.12 on pp.595 of HAC.
2249  *
2250  */
2251 static int
fast_col_array_multiply(mp_int * a,mp_int * b,mp_int * c,int digs)2252 fast_col_array_multiply(mp_int * a, mp_int * b, mp_int * c, int digs)
2253 {
2254 	int     olduse, res, pa, ix, iz;
2255 	/*LINTED*/
2256 	mp_digit W[MP_WARRAY];
2257 	mp_word  _W;
2258 
2259 	/* grow the destination as required */
2260 	if (c->alloc < digs) {
2261 		if ((res = mp_grow(c, digs)) != MP_OKAY) {
2262 			return res;
2263 		}
2264 	}
2265 
2266 	/* number of output digits to produce */
2267 	pa = MIN(digs, a->used + b->used);
2268 
2269 	/* clear the carry */
2270 	_W = 0;
2271 	for (ix = 0; ix < pa; ix++) {
2272 		int      tx, ty;
2273 		int      iy;
2274 		mp_digit *tmpx, *tmpy;
2275 
2276 		/* get offsets into the two bignums */
2277 		ty = MIN(b->used-1, ix);
2278 		tx = ix - ty;
2279 
2280 		/* setup temp aliases */
2281 		tmpx = a->dp + tx;
2282 		tmpy = b->dp + ty;
2283 
2284 		/* this is the number of times the loop will iterrate, essentially
2285 		while (tx++ < a->used && ty-- >= 0) { ... }
2286 		*/
2287 		iy = MIN(a->used-tx, ty+1);
2288 
2289 		/* execute loop */
2290 		for (iz = 0; iz < iy; ++iz) {
2291 			_W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
2292 
2293 		}
2294 
2295 		/* store term */
2296 		W[ix] = ((mp_digit)_W) & MP_MASK;
2297 
2298 		/* make next carry */
2299 		_W = _W >> ((mp_word)DIGIT_BIT);
2300 	}
2301 
2302 	/* setup dest */
2303 	olduse  = c->used;
2304 	c->used = pa;
2305 
2306 	{
2307 		mp_digit *tmpc;
2308 		tmpc = c->dp;
2309 		for (ix = 0; ix < pa+1; ix++) {
2310 			/* now extract the previous digit [below the carry] */
2311 			*tmpc++ = (ix < pa) ? W[ix] : 0;
2312 		}
2313 
2314 		/* clear unused digits [that existed in the old copy of c] */
2315 		for (; ix < olduse; ix++) {
2316 			*tmpc++ = 0;
2317 		}
2318 	}
2319 	trim_unused_digits(c);
2320 	return MP_OKAY;
2321 }
2322 
2323 /* return 1 if we can use fast column array multiply */
2324 /*
2325 * The fast multiplier can be used if the output will
2326 * have less than MP_WARRAY digits and the number of
2327 * digits won't affect carry propagation
2328 */
2329 static inline int
can_use_fast_column_array(int ndigits,int used)2330 can_use_fast_column_array(int ndigits, int used)
2331 {
2332 	return (((unsigned)ndigits < MP_WARRAY) &&
2333 		used < (1 << (unsigned)((CHAR_BIT * sizeof(mp_word)) - (2 * DIGIT_BIT))));
2334 }
2335 
2336 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_digs.c,v $ */
2337 /* Revision: 1.2 $ */
2338 /* Date: 2011/03/18 16:22:09 $ */
2339 
2340 
2341 /* multiplies |a| * |b| and only computes upto digs digits of result
2342  * HAC pp. 595, Algorithm 14.12  Modified so you can control how
2343  * many digits of output are created.
2344  */
2345 static int
basic_multiply_partial_lower(mp_int * a,mp_int * b,mp_int * c,int digs)2346 basic_multiply_partial_lower(mp_int * a, mp_int * b, mp_int * c, int digs)
2347 {
2348 	mp_int  t;
2349 	int     res, pa, pb, ix, iy;
2350 	mp_digit u;
2351 	mp_word r;
2352 	mp_digit tmpx, *tmpt, *tmpy;
2353 
2354 	/* can we use the fast multiplier? */
2355 	if (can_use_fast_column_array(digs, MIN(a->used, b->used))) {
2356 		return fast_col_array_multiply(a, b, c, digs);
2357 	}
2358 
2359 	if ((res = mp_init_size(&t, digs)) != MP_OKAY) {
2360 		return res;
2361 	}
2362 	t.used = digs;
2363 
2364 	/* compute the digits of the product directly */
2365 	pa = a->used;
2366 	for (ix = 0; ix < pa; ix++) {
2367 		/* set the carry to zero */
2368 		u = 0;
2369 
2370 		/* limit ourselves to making digs digits of output */
2371 		pb = MIN(b->used, digs - ix);
2372 
2373 		/* setup some aliases */
2374 		/* copy of the digit from a used within the nested loop */
2375 		tmpx = a->dp[ix];
2376 
2377 		/* an alias for the destination shifted ix places */
2378 		tmpt = t.dp + ix;
2379 
2380 		/* an alias for the digits of b */
2381 		tmpy = b->dp;
2382 
2383 		/* compute the columns of the output and propagate the carry */
2384 		for (iy = 0; iy < pb; iy++) {
2385 			/* compute the column as a mp_word */
2386 			r = ((mp_word)*tmpt) +
2387 				((mp_word)tmpx) * ((mp_word)*tmpy++) +
2388 				((mp_word) u);
2389 
2390 			/* the new column is the lower part of the result */
2391 			*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2392 
2393 			/* get the carry word from the result */
2394 			u = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2395 		}
2396 		/* set carry if it is placed below digs */
2397 		if (ix + iy < digs) {
2398 			*tmpt = u;
2399 		}
2400 	}
2401 
2402 	trim_unused_digits(&t);
2403 	mp_exch(&t, c);
2404 
2405 	mp_clear(&t);
2406 	return MP_OKAY;
2407 }
2408 
2409 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_digs.c,v $ */
2410 /* Revision: 1.3 $ */
2411 /* Date: 2011/03/18 16:43:04 $ */
2412 
2413 /* high level multiplication (handles sign) */
2414 static int
signed_multiply(mp_int * a,mp_int * b,mp_int * c)2415 signed_multiply(mp_int * a, mp_int * b, mp_int * c)
2416 {
2417 	int     res, neg;
2418 
2419 	neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
2420 	/* use Toom-Cook? */
2421 	if (MIN(a->used, b->used) >= TOOM_MUL_CUTOFF) {
2422 		res = toom_cook_multiply(a, b, c);
2423 	} else if (MIN(a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
2424 		/* use Karatsuba? */
2425 		res = karatsuba_multiply(a, b, c);
2426 	} else {
2427 		/* can we use the fast multiplier? */
2428 		int     digs = a->used + b->used + 1;
2429 
2430 		if (can_use_fast_column_array(digs, MIN(a->used, b->used))) {
2431 			res = fast_col_array_multiply(a, b, c, digs);
2432 		} else  {
2433 			res = basic_multiply_partial_lower(a, b, c, (a)->used + (b)->used + 1);
2434 		}
2435 	}
2436 	c->sign = (c->used > 0) ? neg : MP_ZPOS;
2437 	return res;
2438 }
2439 
2440 /* this is a modified version of fast_s_mul_digs that only produces
2441  * output digits *above* digs.  See the comments for fast_s_mul_digs
2442  * to see how it works.
2443  *
2444  * This is used in the Barrett reduction since for one of the multiplications
2445  * only the higher digits were needed.  This essentially halves the work.
2446  *
2447  * Based on Algorithm 14.12 on pp.595 of HAC.
2448  */
2449 static int
fast_basic_multiply_partial_upper(mp_int * a,mp_int * b,mp_int * c,int digs)2450 fast_basic_multiply_partial_upper(mp_int * a, mp_int * b, mp_int * c, int digs)
2451 {
2452 	int     olduse, res, pa, ix, iz;
2453 	mp_digit W[MP_WARRAY];
2454 	mp_word  _W;
2455 
2456 	/* grow the destination as required */
2457 	pa = a->used + b->used;
2458 	if (c->alloc < pa) {
2459 		if ((res = mp_grow(c, pa)) != MP_OKAY) {
2460 			return res;
2461 		}
2462 	}
2463 
2464 	/* number of output digits to produce */
2465 	pa = a->used + b->used;
2466 	_W = 0;
2467 	for (ix = digs; ix < pa; ix++) {
2468 		int      tx, ty, iy;
2469 		mp_digit *tmpx, *tmpy;
2470 
2471 		/* get offsets into the two bignums */
2472 		ty = MIN(b->used-1, ix);
2473 		tx = ix - ty;
2474 
2475 		/* setup temp aliases */
2476 		tmpx = a->dp + tx;
2477 		tmpy = b->dp + ty;
2478 
2479 		/* this is the number of times the loop will iterrate, essentially its
2480 		 while (tx++ < a->used && ty-- >= 0) { ... }
2481 		*/
2482 		iy = MIN(a->used-tx, ty+1);
2483 
2484 		/* execute loop */
2485 		for (iz = 0; iz < iy; iz++) {
2486 			 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
2487 		}
2488 
2489 		/* store term */
2490 		W[ix] = ((mp_digit)_W) & MP_MASK;
2491 
2492 		/* make next carry */
2493 		_W = _W >> ((mp_word)DIGIT_BIT);
2494 	}
2495 
2496 	/* setup dest */
2497 	olduse  = c->used;
2498 	c->used = pa;
2499 
2500 	{
2501 		mp_digit *tmpc;
2502 
2503 		tmpc = c->dp + digs;
2504 		for (ix = digs; ix < pa; ix++) {
2505 			/* now extract the previous digit [below the carry] */
2506 			*tmpc++ = W[ix];
2507 		}
2508 
2509 		/* clear unused digits [that existed in the old copy of c] */
2510 		for (; ix < olduse; ix++) {
2511 			*tmpc++ = 0;
2512 		}
2513 	}
2514 	trim_unused_digits(c);
2515 	return MP_OKAY;
2516 }
2517 
2518 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_mul_high_digs.c,v $ */
2519 /* Revision: 1.1.1.1 $ */
2520 /* Date: 2011/03/12 22:58:18 $ */
2521 
2522 /* multiplies |a| * |b| and does not compute the lower digs digits
2523  * [meant to get the higher part of the product]
2524  */
2525 static int
basic_multiply_partial_upper(mp_int * a,mp_int * b,mp_int * c,int digs)2526 basic_multiply_partial_upper(mp_int * a, mp_int * b, mp_int * c, int digs)
2527 {
2528 	mp_int  t;
2529 	int     res, pa, pb, ix, iy;
2530 	mp_digit carry;
2531 	mp_word r;
2532 	mp_digit tmpx, *tmpt, *tmpy;
2533 
2534 	/* can we use the fast multiplier? */
2535 	if (can_use_fast_column_array(a->used + b->used + 1, MIN(a->used, b->used))) {
2536 		return fast_basic_multiply_partial_upper(a, b, c, digs);
2537 	}
2538 
2539 	if ((res = mp_init_size(&t, a->used + b->used + 1)) != MP_OKAY) {
2540 		return res;
2541 	}
2542 	t.used = a->used + b->used + 1;
2543 
2544 	pa = a->used;
2545 	pb = b->used;
2546 	for (ix = 0; ix < pa; ix++) {
2547 		/* clear the carry */
2548 		carry = 0;
2549 
2550 		/* left hand side of A[ix] * B[iy] */
2551 		tmpx = a->dp[ix];
2552 
2553 		/* alias to the address of where the digits will be stored */
2554 		tmpt = &(t.dp[digs]);
2555 
2556 		/* alias for where to read the right hand side from */
2557 		tmpy = b->dp + (digs - ix);
2558 
2559 		for (iy = digs - ix; iy < pb; iy++) {
2560 			/* calculate the double precision result */
2561 			r = ((mp_word)*tmpt) +
2562 				((mp_word)tmpx) * ((mp_word)*tmpy++) +
2563 				((mp_word) carry);
2564 
2565 			/* get the lower part */
2566 			*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
2567 
2568 			/* carry the carry */
2569 			carry = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
2570 		}
2571 		*tmpt = carry;
2572 	}
2573 	trim_unused_digits(&t);
2574 	mp_exch(&t, c);
2575 	mp_clear(&t);
2576 	return MP_OKAY;
2577 }
2578 
2579 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_mul_high_digs.c,v $ */
2580 /* Revision: 1.3 $ */
2581 /* Date: 2011/03/18 16:43:04 $ */
2582 
2583 /* reduces x mod m, assumes 0 < x < m**2, mu is
2584  * precomputed via mp_reduce_setup.
2585  * From HAC pp.604 Algorithm 14.42
2586  */
2587 static int
mp_reduce(mp_int * x,mp_int * m,mp_int * mu)2588 mp_reduce(mp_int * x, mp_int * m, mp_int * mu)
2589 {
2590 	mp_int  q;
2591 	int     res, um = m->used;
2592 
2593 	/* q = x */
2594 	if ((res = mp_init_copy(&q, x)) != MP_OKAY) {
2595 		return res;
2596 	}
2597 
2598 	/* q1 = x / b**(k-1)  */
2599 	rshift_digits(&q, um - 1);
2600 
2601 	/* according to HAC this optimization is ok */
2602 	if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
2603 		if ((res = signed_multiply(&q, mu, &q)) != MP_OKAY) {
2604 			goto CLEANUP;
2605 		}
2606 	} else {
2607 		if ((res = basic_multiply_partial_upper(&q, mu, &q, um)) != MP_OKAY) {
2608 			goto CLEANUP;
2609 		}
2610 	}
2611 
2612 	/* q3 = q2 / b**(k+1) */
2613 	rshift_digits(&q, um + 1);
2614 
2615 	/* x = x mod b**(k+1), quick (no division) */
2616 	if ((res = modulo_2_to_power(x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
2617 		goto CLEANUP;
2618 	}
2619 
2620 	/* q = q * m mod b**(k+1), quick (no division) */
2621 	if ((res = basic_multiply_partial_lower(&q, m, &q, um + 1)) != MP_OKAY) {
2622 		goto CLEANUP;
2623 	}
2624 
2625 	/* x = x - q */
2626 	if ((res = signed_subtract(x, &q, x)) != MP_OKAY) {
2627 		goto CLEANUP;
2628 	}
2629 
2630 	/* If x < 0, add b**(k+1) to it */
2631 	if (compare_digit(x, 0) == MP_LT) {
2632 		set_word(&q, 1);
2633 		if ((res = lshift_digits(&q, um + 1)) != MP_OKAY) {
2634 			goto CLEANUP;
2635 		}
2636 		if ((res = signed_add(x, &q, x)) != MP_OKAY) {
2637 			goto CLEANUP;
2638 		}
2639 	}
2640 
2641 	/* Back off if it's too big */
2642 	while (signed_compare(x, m) != MP_LT) {
2643 		if ((res = basic_subtract(x, m, x)) != MP_OKAY) {
2644 			goto CLEANUP;
2645 		}
2646 	}
2647 
2648 CLEANUP:
2649 	mp_clear(&q);
2650 
2651 	return res;
2652 }
2653 
2654 /* determines the setup value */
2655 static int
mp_reduce_2k_setup_l(mp_int * a,mp_int * d)2656 mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
2657 {
2658 	int    res;
2659 	mp_int tmp;
2660 
2661 	if ((res = mp_init(&tmp)) != MP_OKAY) {
2662 		return res;
2663 	}
2664 
2665 	if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
2666 		goto ERR;
2667 	}
2668 
2669 	if ((res = basic_subtract(&tmp, a, d)) != MP_OKAY) {
2670 		goto ERR;
2671 	}
2672 
2673 ERR:
2674 	mp_clear(&tmp);
2675 	return res;
2676 }
2677 
2678 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup_l.c,v $ */
2679 /* Revision: 1.1.1.1 $ */
2680 /* Date: 2011/03/12 22:58:18 $ */
2681 
2682 /* reduces a modulo n where n is of the form 2**p - d
2683    This differs from reduce_2k since "d" can be larger
2684    than a single digit.
2685 */
2686 static int
mp_reduce_2k_l(mp_int * a,mp_int * n,mp_int * d)2687 mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
2688 {
2689 	mp_int q;
2690 	int    p, res;
2691 
2692 	if ((res = mp_init(&q)) != MP_OKAY) {
2693 		return res;
2694 	}
2695 
2696 	p = mp_count_bits(n);
2697 top:
2698 	/* q = a/2**p, a = a mod 2**p */
2699 	if ((res = rshift_bits(a, p, &q, a)) != MP_OKAY) {
2700 		goto ERR;
2701 	}
2702 
2703 	/* q = q * d */
2704 	if ((res = signed_multiply(&q, d, &q)) != MP_OKAY) {
2705 		goto ERR;
2706 	}
2707 
2708 	/* a = a + q */
2709 	if ((res = basic_add(a, &q, a)) != MP_OKAY) {
2710 		goto ERR;
2711 	}
2712 
2713 	if (compare_magnitude(a, n) != MP_LT) {
2714 		basic_subtract(a, n, a);
2715 		goto top;
2716 	}
2717 
2718 ERR:
2719 	mp_clear(&q);
2720 	return res;
2721 }
2722 
2723 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_l.c,v $ */
2724 /* Revision: 1.1.1.1 $ */
2725 /* Date: 2011/03/12 22:58:18 $ */
2726 
2727 /* squaring using Toom-Cook 3-way algorithm */
2728 static int
toom_cook_square(mp_int * a,mp_int * b)2729 toom_cook_square(mp_int *a, mp_int *b)
2730 {
2731 	mp_int w0, w1, w2, w3, w4, tmp1, a0, a1, a2;
2732 	int res, B;
2733 
2734 	/* init temps */
2735 	if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL)) != MP_OKAY) {
2736 		return res;
2737 	}
2738 
2739 	/* B */
2740 	B = a->used / 3;
2741 
2742 	/* a = a2 * B**2 + a1 * B + a0 */
2743 	if ((res = modulo_2_to_power(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
2744 		goto ERR;
2745 	}
2746 
2747 	if ((res = mp_copy(a, &a1)) != MP_OKAY) {
2748 		goto ERR;
2749 	}
2750 	rshift_digits(&a1, B);
2751 	modulo_2_to_power(&a1, DIGIT_BIT * B, &a1);
2752 
2753 	if ((res = mp_copy(a, &a2)) != MP_OKAY) {
2754 		goto ERR;
2755 	}
2756 	rshift_digits(&a2, B*2);
2757 
2758 	/* w0 = a0*a0 */
2759 	if ((res = square(&a0, &w0)) != MP_OKAY) {
2760 		goto ERR;
2761 	}
2762 
2763 	/* w4 = a2 * a2 */
2764 	if ((res = square(&a2, &w4)) != MP_OKAY) {
2765 		goto ERR;
2766 	}
2767 
2768 	/* w1 = (a2 + 2(a1 + 2a0))**2 */
2769 	if ((res = doubled(&a0, &tmp1)) != MP_OKAY) {
2770 		goto ERR;
2771 	}
2772 	if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
2773 		goto ERR;
2774 	}
2775 	if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) {
2776 		goto ERR;
2777 	}
2778 	if ((res = signed_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
2779 		goto ERR;
2780 	}
2781 
2782 	if ((res = square(&tmp1, &w1)) != MP_OKAY) {
2783 		goto ERR;
2784 	}
2785 
2786 	/* w3 = (a0 + 2(a1 + 2a2))**2 */
2787 	if ((res = doubled(&a2, &tmp1)) != MP_OKAY) {
2788 		goto ERR;
2789 	}
2790 	if ((res = signed_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
2791 		goto ERR;
2792 	}
2793 	if ((res = doubled(&tmp1, &tmp1)) != MP_OKAY) {
2794 		goto ERR;
2795 	}
2796 	if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
2797 		goto ERR;
2798 	}
2799 
2800 	if ((res = square(&tmp1, &w3)) != MP_OKAY) {
2801 		goto ERR;
2802 	}
2803 
2804 
2805 	/* w2 = (a2 + a1 + a0)**2 */
2806 	if ((res = signed_add(&a2, &a1, &tmp1)) != MP_OKAY) {
2807 		goto ERR;
2808 	}
2809 	if ((res = signed_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
2810 		goto ERR;
2811 	}
2812 	if ((res = square(&tmp1, &w2)) != MP_OKAY) {
2813 		goto ERR;
2814 	}
2815 
2816 	/* now solve the matrix
2817 
2818 	0  0  0  0  1
2819 	1  2  4  8  16
2820 	1  1  1  1  1
2821 	16 8  4  2  1
2822 	1  0  0  0  0
2823 
2824 	using 12 subtractions, 4 shifts, 2 small divisions and 1 small multiplication.
2825 	*/
2826 
2827 	/* r1 - r4 */
2828 	if ((res = signed_subtract(&w1, &w4, &w1)) != MP_OKAY) {
2829 		goto ERR;
2830 	}
2831 	/* r3 - r0 */
2832 	if ((res = signed_subtract(&w3, &w0, &w3)) != MP_OKAY) {
2833 		goto ERR;
2834 	}
2835 	/* r1/2 */
2836 	if ((res = half(&w1, &w1)) != MP_OKAY) {
2837 		goto ERR;
2838 	}
2839 	/* r3/2 */
2840 	if ((res = half(&w3, &w3)) != MP_OKAY) {
2841 		goto ERR;
2842 	}
2843 	/* r2 - r0 - r4 */
2844 	if ((res = signed_subtract(&w2, &w0, &w2)) != MP_OKAY) {
2845 		goto ERR;
2846 	}
2847 	if ((res = signed_subtract(&w2, &w4, &w2)) != MP_OKAY) {
2848 		goto ERR;
2849 	}
2850 	/* r1 - r2 */
2851 	if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) {
2852 		goto ERR;
2853 	}
2854 	/* r3 - r2 */
2855 	if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) {
2856 		goto ERR;
2857 	}
2858 	/* r1 - 8r0 */
2859 	if ((res = lshift_bits(&w0, 3, &tmp1)) != MP_OKAY) {
2860 		goto ERR;
2861 	}
2862 	if ((res = signed_subtract(&w1, &tmp1, &w1)) != MP_OKAY) {
2863 		goto ERR;
2864 	}
2865 	/* r3 - 8r4 */
2866 	if ((res = lshift_bits(&w4, 3, &tmp1)) != MP_OKAY) {
2867 		goto ERR;
2868 	}
2869 	if ((res = signed_subtract(&w3, &tmp1, &w3)) != MP_OKAY) {
2870 		goto ERR;
2871 	}
2872 	/* 3r2 - r1 - r3 */
2873 	if ((res = multiply_digit(&w2, 3, &w2)) != MP_OKAY) {
2874 		goto ERR;
2875 	}
2876 	if ((res = signed_subtract(&w2, &w1, &w2)) != MP_OKAY) {
2877 		goto ERR;
2878 	}
2879 	if ((res = signed_subtract(&w2, &w3, &w2)) != MP_OKAY) {
2880 		goto ERR;
2881 	}
2882 	/* r1 - r2 */
2883 	if ((res = signed_subtract(&w1, &w2, &w1)) != MP_OKAY) {
2884 		goto ERR;
2885 	}
2886 	/* r3 - r2 */
2887 	if ((res = signed_subtract(&w3, &w2, &w3)) != MP_OKAY) {
2888 		goto ERR;
2889 	}
2890 	/* r1/3 */
2891 	if ((res = third(&w1, &w1, NULL)) != MP_OKAY) {
2892 		goto ERR;
2893 	}
2894 	/* r3/3 */
2895 	if ((res = third(&w3, &w3, NULL)) != MP_OKAY) {
2896 		goto ERR;
2897 	}
2898 
2899 	/* at this point shift W[n] by B*n */
2900 	if ((res = lshift_digits(&w1, 1*B)) != MP_OKAY) {
2901 		goto ERR;
2902 	}
2903 	if ((res = lshift_digits(&w2, 2*B)) != MP_OKAY) {
2904 		goto ERR;
2905 	}
2906 	if ((res = lshift_digits(&w3, 3*B)) != MP_OKAY) {
2907 		goto ERR;
2908 	}
2909 	if ((res = lshift_digits(&w4, 4*B)) != MP_OKAY) {
2910 		goto ERR;
2911 	}
2912 
2913 	if ((res = signed_add(&w0, &w1, b)) != MP_OKAY) {
2914 		goto ERR;
2915 	}
2916 	if ((res = signed_add(&w2, &w3, &tmp1)) != MP_OKAY) {
2917 		goto ERR;
2918 	}
2919 	if ((res = signed_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
2920 		goto ERR;
2921 	}
2922 	if ((res = signed_add(&tmp1, b, b)) != MP_OKAY) {
2923 		goto ERR;
2924 	}
2925 
2926 ERR:
2927 	mp_clear_multi(&w0, &w1, &w2, &w3, &w4, &a0, &a1, &a2, &tmp1, NULL);
2928 	return res;
2929 }
2930 
2931 
2932 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_toom_sqr.c,v $ */
2933 /* Revision: 1.1.1.1 $ */
2934 /* Date: 2011/03/12 22:58:18 $ */
2935 
2936 /* Karatsuba squaring, computes b = a*a using three
2937  * half size squarings
2938  *
2939  * See comments of karatsuba_mul for details.  It
2940  * is essentially the same algorithm but merely
2941  * tuned to perform recursive squarings.
2942  */
2943 static int
karatsuba_square(mp_int * a,mp_int * b)2944 karatsuba_square(mp_int * a, mp_int * b)
2945 {
2946 	mp_int  x0, x1, t1, t2, x0x0, x1x1;
2947 	int     B, err;
2948 
2949 	err = MP_MEM;
2950 
2951 	/* min # of digits */
2952 	B = a->used;
2953 
2954 	/* now divide in two */
2955 	B = (unsigned)B >> 1;
2956 
2957 	/* init copy all the temps */
2958 	if (mp_init_size(&x0, B) != MP_OKAY) {
2959 		goto ERR;
2960 	}
2961 	if (mp_init_size(&x1, a->used - B) != MP_OKAY) {
2962 		goto X0;
2963 	}
2964 	/* init temps */
2965 	if (mp_init_size(&t1, a->used * 2) != MP_OKAY) {
2966 		goto X1;
2967 	}
2968 	if (mp_init_size(&t2, a->used * 2) != MP_OKAY) {
2969 		goto T1;
2970 	}
2971 	if (mp_init_size(&x0x0, B * 2) != MP_OKAY) {
2972 		goto T2;
2973 	}
2974 	if (mp_init_size(&x1x1, (a->used - B) * 2) != MP_OKAY) {
2975 		goto X0X0;
2976 	}
2977 
2978 	memcpy(x0.dp, a->dp, B * sizeof(*x0.dp));
2979 	memcpy(x1.dp, &a->dp[B], (a->used - B) * sizeof(*x1.dp));
2980 
2981 	x0.used = B;
2982 	x1.used = a->used - B;
2983 
2984 	trim_unused_digits(&x0);
2985 
2986 	/* now calc the products x0*x0 and x1*x1 */
2987 	if (square(&x0, &x0x0) != MP_OKAY) {
2988 		goto X1X1;           /* x0x0 = x0*x0 */
2989 	}
2990 	if (square(&x1, &x1x1) != MP_OKAY) {
2991 		goto X1X1;           /* x1x1 = x1*x1 */
2992 	}
2993 	/* now calc (x1+x0)**2 */
2994 	if (basic_add(&x1, &x0, &t1) != MP_OKAY) {
2995 		goto X1X1;           /* t1 = x1 - x0 */
2996 	}
2997 	if (square(&t1, &t1) != MP_OKAY) {
2998 		goto X1X1;           /* t1 = (x1 - x0) * (x1 - x0) */
2999 	}
3000 	/* add x0y0 */
3001 	if (basic_add(&x0x0, &x1x1, &t2) != MP_OKAY) {
3002 		goto X1X1;           /* t2 = x0x0 + x1x1 */
3003 	}
3004 	if (basic_subtract(&t1, &t2, &t1) != MP_OKAY) {
3005 		goto X1X1;           /* t1 = (x1+x0)**2 - (x0x0 + x1x1) */
3006 	}
3007 	/* shift by B */
3008 	if (lshift_digits(&t1, B) != MP_OKAY) {
3009 		goto X1X1;           /* t1 = (x0x0 + x1x1 - (x1-x0)*(x1-x0))<<B */
3010 	}
3011 	if (lshift_digits(&x1x1, B * 2) != MP_OKAY) {
3012 		goto X1X1;           /* x1x1 = x1x1 << 2*B */
3013 	}
3014 	if (signed_add(&x0x0, &t1, &t1) != MP_OKAY) {
3015 		goto X1X1;           /* t1 = x0x0 + t1 */
3016 	}
3017 	if (signed_add(&t1, &x1x1, b) != MP_OKAY) {
3018 		goto X1X1;           /* t1 = x0x0 + t1 + x1x1 */
3019 	}
3020 	err = MP_OKAY;
3021 
3022 X1X1:
3023 	mp_clear(&x1x1);
3024 X0X0:
3025 	mp_clear(&x0x0);
3026 T2:
3027 	mp_clear(&t2);
3028 T1:
3029 	mp_clear(&t1);
3030 X1:
3031 	mp_clear(&x1);
3032 X0:
3033 	mp_clear(&x0);
3034 ERR:
3035 	return err;
3036 }
3037 
3038 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_karatsuba_sqr.c,v $ */
3039 /* Revision: 1.2 $ */
3040 /* Date: 2011/03/12 23:43:54 $ */
3041 
3042 /* the jist of squaring...
3043  * you do like mult except the offset of the tmpx [one that
3044  * starts closer to zero] can't equal the offset of tmpy.
3045  * So basically you set up iy like before then you min it with
3046  * (ty-tx) so that it never happens.  You double all those
3047  * you add in the inner loop
3048 
3049 After that loop you do the squares and add them in.
3050 */
3051 
3052 static int
fast_basic_square(mp_int * a,mp_int * b)3053 fast_basic_square(mp_int * a, mp_int * b)
3054 {
3055 	int       olduse, res, pa, ix, iz;
3056 	mp_digit   W[MP_WARRAY], *tmpx;
3057 	mp_word   W1;
3058 
3059 	/* grow the destination as required */
3060 	pa = a->used + a->used;
3061 	if (b->alloc < pa) {
3062 		if ((res = mp_grow(b, pa)) != MP_OKAY) {
3063 			return res;
3064 		}
3065 	}
3066 
3067 	/* number of output digits to produce */
3068 	W1 = 0;
3069 	for (ix = 0; ix < pa; ix++) {
3070 		int      tx, ty, iy;
3071 		mp_word  _W;
3072 		mp_digit *tmpy;
3073 
3074 		/* clear counter */
3075 		_W = 0;
3076 
3077 		/* get offsets into the two bignums */
3078 		ty = MIN(a->used-1, ix);
3079 		tx = ix - ty;
3080 
3081 		/* setup temp aliases */
3082 		tmpx = a->dp + tx;
3083 		tmpy = a->dp + ty;
3084 
3085 		/* this is the number of times the loop will iterrate, essentially
3086 		 while (tx++ < a->used && ty-- >= 0) { ... }
3087 		*/
3088 		iy = MIN(a->used-tx, ty+1);
3089 
3090 		/* now for squaring tx can never equal ty
3091 		* we halve the distance since they approach at a rate of 2x
3092 		* and we have to round because odd cases need to be executed
3093 		*/
3094 		iy = MIN(iy, (int)((unsigned)(ty-tx+1)>>1));
3095 
3096 		/* execute loop */
3097 		for (iz = 0; iz < iy; iz++) {
3098 			 _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
3099 		}
3100 
3101 		/* double the inner product and add carry */
3102 		_W = _W + _W + W1;
3103 
3104 		/* even columns have the square term in them */
3105 		if ((ix&1) == 0) {
3106 			 _W += ((mp_word)a->dp[(unsigned)ix>>1])*((mp_word)a->dp[(unsigned)ix>>1]);
3107 		}
3108 
3109 		/* store it */
3110 		W[ix] = (mp_digit)(_W & MP_MASK);
3111 
3112 		/* make next carry */
3113 		W1 = _W >> ((mp_word)DIGIT_BIT);
3114 	}
3115 
3116 	/* setup dest */
3117 	olduse  = b->used;
3118 	b->used = a->used+a->used;
3119 
3120 	{
3121 		mp_digit *tmpb;
3122 		tmpb = b->dp;
3123 		for (ix = 0; ix < pa; ix++) {
3124 			*tmpb++ = W[ix] & MP_MASK;
3125 		}
3126 
3127 		/* clear unused digits [that existed in the old copy of c] */
3128 		for (; ix < olduse; ix++) {
3129 			*tmpb++ = 0;
3130 		}
3131 	}
3132 	trim_unused_digits(b);
3133 	return MP_OKAY;
3134 }
3135 
3136 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_s_mp_sqr.c,v $ */
3137 /* Revision: 1.3 $ */
3138 /* Date: 2011/03/18 16:43:04 $ */
3139 
3140 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
3141 static int
basic_square(mp_int * a,mp_int * b)3142 basic_square(mp_int * a, mp_int * b)
3143 {
3144 	mp_int  t;
3145 	int     res, ix, iy, pa;
3146 	mp_word r;
3147 	mp_digit carry, tmpx, *tmpt;
3148 
3149 	pa = a->used;
3150 	if ((res = mp_init_size(&t, 2*pa + 1)) != MP_OKAY) {
3151 		return res;
3152 	}
3153 
3154 	/* default used is maximum possible size */
3155 	t.used = 2*pa + 1;
3156 
3157 	for (ix = 0; ix < pa; ix++) {
3158 		/* first calculate the digit at 2*ix */
3159 		/* calculate double precision result */
3160 		r = ((mp_word) t.dp[2*ix]) +
3161 		((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
3162 
3163 		/* store lower part in result */
3164 		t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
3165 
3166 		/* get the carry */
3167 		carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3168 
3169 		/* left hand side of A[ix] * A[iy] */
3170 		tmpx = a->dp[ix];
3171 
3172 		/* alias for where to store the results */
3173 		tmpt = t.dp + (2*ix + 1);
3174 
3175 		for (iy = ix + 1; iy < pa; iy++) {
3176 			/* first calculate the product */
3177 			r = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
3178 
3179 			/* now calculate the double precision result, note we use
3180 			* addition instead of *2 since it's easier to optimize
3181 			*/
3182 			r = ((mp_word) *tmpt) + r + r + ((mp_word) carry);
3183 
3184 			/* store lower part */
3185 			*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3186 
3187 			/* get carry */
3188 			carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3189 		}
3190 		/* propagate upwards */
3191 		while (carry != ((mp_digit) 0)) {
3192 			r = ((mp_word) *tmpt) + ((mp_word) carry);
3193 			*tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
3194 			carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3195 		}
3196 	}
3197 
3198 	trim_unused_digits(&t);
3199 	mp_exch(&t, b);
3200 	mp_clear(&t);
3201 	return MP_OKAY;
3202 }
3203 
3204 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_s_mp_sqr.c,v $ */
3205 /* Revision: 1.1.1.1 $ */
3206 /* Date: 2011/03/12 22:58:18 $ */
3207 
3208 #define TOOM_SQR_CUTOFF      400
3209 #define KARATSUBA_SQR_CUTOFF 120
3210 
3211 /* computes b = a*a */
3212 static int
square(mp_int * a,mp_int * b)3213 square(mp_int * a, mp_int * b)
3214 {
3215 	int     res;
3216 
3217 	/* use Toom-Cook? */
3218 	if (a->used >= TOOM_SQR_CUTOFF) {
3219 		res = toom_cook_square(a, b);
3220 		/* Karatsuba? */
3221 	} else if (a->used >= KARATSUBA_SQR_CUTOFF) {
3222 		res = karatsuba_square(a, b);
3223 	} else {
3224 		/* can we use the fast comba multiplier? */
3225 		if (can_use_fast_column_array(a->used + a->used + 1, a->used)) {
3226 			res = fast_basic_square(a, b);
3227 		} else {
3228 			res = basic_square(a, b);
3229 		}
3230 	}
3231 	b->sign = MP_ZPOS;
3232 	return res;
3233 }
3234 
3235 /* find window size */
3236 static inline int
find_window_size(mp_int * X)3237 find_window_size(mp_int *X)
3238 {
3239 	int	x;
3240 
3241 	x = mp_count_bits(X);
3242 	return (x <= 7) ? 2 : (x <= 36) ? 3 : (x <= 140) ? 4 : (x <= 450) ? 5 : (x <= 1303) ? 6 : (x <= 3529) ? 7 : 8;
3243 }
3244 
3245 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_sqr.c,v $ */
3246 /* Revision: 1.3 $ */
3247 /* Date: 2011/03/18 16:43:04 $ */
3248 
3249 #define TAB_SIZE 256
3250 
3251 static int
basic_exponent_mod(mp_int * G,mp_int * X,mp_int * P,mp_int * Y,int redmode)3252 basic_exponent_mod(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
3253 {
3254 	mp_digit buf;
3255 	mp_int  M[TAB_SIZE], res, mu;
3256 	int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
3257 	int	(*redux)(mp_int*,mp_int*,mp_int*);
3258 
3259 	winsize = find_window_size(X);
3260 
3261 	/* init M array */
3262 	/* init first cell */
3263 	if ((err = mp_init(&M[1])) != MP_OKAY) {
3264 		return err;
3265 	}
3266 
3267 	/* now init the second half of the array */
3268 	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3269 		if ((err = mp_init(&M[x])) != MP_OKAY) {
3270 			for (y = 1<<(winsize-1); y < x; y++) {
3271 				mp_clear(&M[y]);
3272 			}
3273 			mp_clear(&M[1]);
3274 			return err;
3275 		}
3276 	}
3277 
3278 	/* create mu, used for Barrett reduction */
3279 	if ((err = mp_init(&mu)) != MP_OKAY) {
3280 		goto LBL_M;
3281 	}
3282 
3283 	if (redmode == 0) {
3284 		if ((err = mp_reduce_setup(&mu, P)) != MP_OKAY) {
3285 			goto LBL_MU;
3286 		}
3287 		redux = mp_reduce;
3288 	} else {
3289 		if ((err = mp_reduce_2k_setup_l(P, &mu)) != MP_OKAY) {
3290 			goto LBL_MU;
3291 		}
3292 		redux = mp_reduce_2k_l;
3293 	}
3294 
3295 	/* create M table
3296 	*
3297 	* The M table contains powers of the base,
3298 	* e.g. M[x] = G**x mod P
3299 	*
3300 	* The first half of the table is not
3301 	* computed though accept for M[0] and M[1]
3302 	*/
3303 	if ((err = modulo(G, P, &M[1])) != MP_OKAY) {
3304 		goto LBL_MU;
3305 	}
3306 
3307 	/* compute the value at M[1<<(winsize-1)] by squaring
3308 	* M[1] (winsize-1) times
3309 	*/
3310 	if ((err = mp_copy( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
3311 		goto LBL_MU;
3312 	}
3313 
3314 	for (x = 0; x < (winsize - 1); x++) {
3315 		/* square it */
3316 		if ((err = square(&M[1 << (winsize - 1)],
3317 		       &M[1 << (winsize - 1)])) != MP_OKAY) {
3318 			goto LBL_MU;
3319 		}
3320 
3321 		/* reduce modulo P */
3322 		if ((err = redux(&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
3323 			goto LBL_MU;
3324 		}
3325 	}
3326 
3327 	/* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
3328 	* for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
3329 	*/
3330 	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
3331 		if ((err = signed_multiply(&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
3332 			goto LBL_MU;
3333 		}
3334 		if ((err = redux(&M[x], P, &mu)) != MP_OKAY) {
3335 			goto LBL_MU;
3336 		}
3337 	}
3338 
3339 	/* setup result */
3340 	if ((err = mp_init(&res)) != MP_OKAY) {
3341 		goto LBL_MU;
3342 	}
3343 	set_word(&res, 1);
3344 
3345 	/* set initial mode and bit cnt */
3346 	mode = 0;
3347 	bitcnt = 1;
3348 	buf = 0;
3349 	digidx = X->used - 1;
3350 	bitcpy = 0;
3351 	bitbuf = 0;
3352 
3353 	for (;;) {
3354 		/* grab next digit as required */
3355 		if (--bitcnt == 0) {
3356 			/* if digidx == -1 we are out of digits */
3357 			if (digidx == -1) {
3358 				break;
3359 			}
3360 			/* read next digit and reset the bitcnt */
3361 			buf = X->dp[digidx--];
3362 			bitcnt = (int) DIGIT_BIT;
3363 		}
3364 
3365 		/* grab the next msb from the exponent */
3366 		y = (unsigned)(buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
3367 		buf <<= (mp_digit)1;
3368 
3369 		/* if the bit is zero and mode == 0 then we ignore it
3370 		* These represent the leading zero bits before the first 1 bit
3371 		* in the exponent.  Technically this opt is not required but it
3372 		* does lower the # of trivial squaring/reductions used
3373 		*/
3374 		if (mode == 0 && y == 0) {
3375 			continue;
3376 		}
3377 
3378 		/* if the bit is zero and mode == 1 then we square */
3379 		if (mode == 1 && y == 0) {
3380 			if ((err = square(&res, &res)) != MP_OKAY) {
3381 				goto LBL_RES;
3382 			}
3383 			if ((err = redux(&res, P, &mu)) != MP_OKAY) {
3384 				goto LBL_RES;
3385 			}
3386 			continue;
3387 		}
3388 
3389 		/* else we add it to the window */
3390 		bitbuf |= (y << (winsize - ++bitcpy));
3391 		mode = 2;
3392 
3393 		if (bitcpy == winsize) {
3394 			/* ok window is filled so square as required and multiply  */
3395 			/* square first */
3396 			for (x = 0; x < winsize; x++) {
3397 				if ((err = square(&res, &res)) != MP_OKAY) {
3398 					goto LBL_RES;
3399 				}
3400 				if ((err = redux(&res, P, &mu)) != MP_OKAY) {
3401 					goto LBL_RES;
3402 				}
3403 			}
3404 
3405 			/* then multiply */
3406 			if ((err = signed_multiply(&res, &M[bitbuf], &res)) != MP_OKAY) {
3407 				goto LBL_RES;
3408 			}
3409 			if ((err = redux(&res, P, &mu)) != MP_OKAY) {
3410 				goto LBL_RES;
3411 			}
3412 
3413 			/* empty window and reset */
3414 			bitcpy = 0;
3415 			bitbuf = 0;
3416 			mode = 1;
3417 		}
3418 	}
3419 
3420 	/* if bits remain then square/multiply */
3421 	if (mode == 2 && bitcpy > 0) {
3422 		/* square then multiply if the bit is set */
3423 		for (x = 0; x < bitcpy; x++) {
3424 			if ((err = square(&res, &res)) != MP_OKAY) {
3425 				goto LBL_RES;
3426 			}
3427 			if ((err = redux(&res, P, &mu)) != MP_OKAY) {
3428 				goto LBL_RES;
3429 			}
3430 
3431 			bitbuf <<= 1;
3432 			if ((bitbuf & (1 << winsize)) != 0) {
3433 				/* then multiply */
3434 				if ((err = signed_multiply(&res, &M[1], &res)) != MP_OKAY) {
3435 					goto LBL_RES;
3436 				}
3437 				if ((err = redux(&res, P, &mu)) != MP_OKAY) {
3438 					goto LBL_RES;
3439 				}
3440 			}
3441 		}
3442 	}
3443 
3444 	mp_exch(&res, Y);
3445 	err = MP_OKAY;
3446 LBL_RES:
3447 	mp_clear(&res);
3448 LBL_MU:
3449 	mp_clear(&mu);
3450 LBL_M:
3451 	mp_clear(&M[1]);
3452 	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
3453 		mp_clear(&M[x]);
3454 	}
3455 	return err;
3456 }
3457 
3458 /* determines if a number is a valid DR modulus */
3459 static int
is_diminished_radix_modulus(mp_int * a)3460 is_diminished_radix_modulus(mp_int *a)
3461 {
3462 	int ix;
3463 
3464 	/* must be at least two digits */
3465 	if (a->used < 2) {
3466 		return 0;
3467 	}
3468 
3469 	/* must be of the form b**k - a [a <= b] so all
3470 	* but the first digit must be equal to -1 (mod b).
3471 	*/
3472 	for (ix = 1; ix < a->used; ix++) {
3473 		if (a->dp[ix] != MP_MASK) {
3474 			  return 0;
3475 		}
3476 	}
3477 	return 1;
3478 }
3479 
3480 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_is_modulus.c,v $ */
3481 /* Revision: 1.1.1.1 $ */
3482 /* Date: 2011/03/12 22:58:18 $ */
3483 
3484 /* determines if mp_reduce_2k can be used */
3485 static int
mp_reduce_is_2k(mp_int * a)3486 mp_reduce_is_2k(mp_int *a)
3487 {
3488 	int ix, iy, iw;
3489 	mp_digit iz;
3490 
3491 	if (a->used == 0) {
3492 		return MP_NO;
3493 	}
3494 	if (a->used == 1) {
3495 		return MP_YES;
3496 	}
3497 	if (a->used > 1) {
3498 		iy = mp_count_bits(a);
3499 		iz = 1;
3500 		iw = 1;
3501 
3502 		/* Test every bit from the second digit up, must be 1 */
3503 		for (ix = DIGIT_BIT; ix < iy; ix++) {
3504 			if ((a->dp[iw] & iz) == 0) {
3505 				return MP_NO;
3506 			}
3507 			iz <<= 1;
3508 			if (iz > (mp_digit)MP_MASK) {
3509 				++iw;
3510 				iz = 1;
3511 			}
3512 		}
3513 	}
3514 	return MP_YES;
3515 }
3516 
3517 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_is_2k.c,v $ */
3518 /* Revision: 1.1.1.1 $ */
3519 /* Date: 2011/03/12 22:58:18 $ */
3520 
3521 
3522 /* d = a * b (mod c) */
3523 static int
multiply_modulo(mp_int * d,mp_int * a,mp_int * b,mp_int * c)3524 multiply_modulo(mp_int *d, mp_int * a, mp_int * b, mp_int * c)
3525 {
3526 	mp_int  t;
3527 	int     res;
3528 
3529 	if ((res = mp_init(&t)) != MP_OKAY) {
3530 		return res;
3531 	}
3532 
3533 	if ((res = signed_multiply(a, b, &t)) != MP_OKAY) {
3534 		mp_clear(&t);
3535 		return res;
3536 	}
3537 	res = modulo(&t, c, d);
3538 	mp_clear(&t);
3539 	return res;
3540 }
3541 
3542 /* d = a + b (mod c) */
3543 static int
add_modulo(mp_int * d,mp_int * a,mp_int * b,mp_int * c)3544 add_modulo(mp_int *d, mp_int * a, mp_int * b, mp_int * c)
3545 {
3546 	mp_int  t;
3547 	int     res;
3548 
3549 	if ((res = mp_init(&t)) != MP_OKAY) {
3550 		return res;
3551 	}
3552 	if ((res = signed_add(a, b, &t)) != MP_OKAY) {
3553 		mp_clear(&t);
3554 		return res;
3555 	}
3556 	res = modulo(&t, c, d);
3557 	mp_clear(&t);
3558 	return res;
3559 }
3560 
3561 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_mulmod.c,v $ */
3562 /* Revision: 1.1.1.1 $ */
3563 /* Date: 2011/03/12 22:58:18 $ */
3564 
3565 /* setups the montgomery reduction stuff */
3566 static int
mp_montgomery_setup(mp_int * n,mp_digit * rho)3567 mp_montgomery_setup(mp_int * n, mp_digit * rho)
3568 {
3569 	mp_digit x, b;
3570 
3571 	/* fast inversion mod 2**k
3572 	*
3573 	* Based on the fact that
3574 	*
3575 	* XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
3576 	*                    =>  2*X*A - X*X*A*A = 1
3577 	*                    =>  2*(1) - (1)     = 1
3578 	*/
3579 	b = n->dp[0];
3580 
3581 	if ((b & 1) == 0) {
3582 		return MP_VAL;
3583 	}
3584 
3585 	x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
3586 	x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
3587 	x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
3588 	x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
3589 	if (/*CONSTCOND*/sizeof(mp_digit) == 8) {
3590 		x *= 2 - b * x;	/* here x*a==1 mod 2**64 */
3591 	}
3592 
3593 	/* rho = -1/m mod b */
3594 	*rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
3595 
3596 	return MP_OKAY;
3597 }
3598 
3599 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_setup.c,v $ */
3600 /* Revision: 1.1.1.1 $ */
3601 /* Date: 2011/03/12 22:58:18 $ */
3602 
3603 /* computes xR**-1 == x (mod N) via Montgomery Reduction
3604  *
3605  * This is an optimized implementation of montgomery_reduce
3606  * which uses the comba method to quickly calculate the columns of the
3607  * reduction.
3608  *
3609  * Based on Algorithm 14.32 on pp.601 of HAC.
3610 */
3611 static int
fast_mp_montgomery_reduce(mp_int * x,mp_int * n,mp_digit rho)3612 fast_mp_montgomery_reduce(mp_int * x, mp_int * n, mp_digit rho)
3613 {
3614 	int     ix, res, olduse;
3615 	/*LINTED*/
3616 	mp_word W[MP_WARRAY];
3617 
3618 	/* get old used count */
3619 	olduse = x->used;
3620 
3621 	/* grow a as required */
3622 	if (x->alloc < n->used + 1) {
3623 		if ((res = mp_grow(x, n->used + 1)) != MP_OKAY) {
3624 			return res;
3625 		}
3626 	}
3627 
3628 	/* first we have to get the digits of the input into
3629 	* an array of double precision words W[...]
3630 	*/
3631 	{
3632 		mp_word *_W;
3633 		mp_digit *tmpx;
3634 
3635 		/* alias for the W[] array */
3636 		_W = W;
3637 
3638 		/* alias for the digits of  x*/
3639 		tmpx = x->dp;
3640 
3641 		/* copy the digits of a into W[0..a->used-1] */
3642 		for (ix = 0; ix < x->used; ix++) {
3643 			*_W++ = *tmpx++;
3644 		}
3645 
3646 		/* zero the high words of W[a->used..m->used*2] */
3647 		for (; ix < n->used * 2 + 1; ix++) {
3648 			*_W++ = 0;
3649 		}
3650 	}
3651 
3652 	/* now we proceed to zero successive digits
3653 	* from the least significant upwards
3654 	*/
3655 	for (ix = 0; ix < n->used; ix++) {
3656 		/* mu = ai * m' mod b
3657 		*
3658 		* We avoid a double precision multiplication (which isn't required)
3659 		* by casting the value down to a mp_digit.  Note this requires
3660 		* that W[ix-1] have  the carry cleared (see after the inner loop)
3661 		*/
3662 		mp_digit mu;
3663 		mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
3664 
3665 		/* a = a + mu * m * b**i
3666 		*
3667 		* This is computed in place and on the fly.  The multiplication
3668 		* by b**i is handled by offseting which columns the results
3669 		* are added to.
3670 		*
3671 		* Note the comba method normally doesn't handle carries in the
3672 		* inner loop In this case we fix the carry from the previous
3673 		* column since the Montgomery reduction requires digits of the
3674 		* result (so far) [see above] to work.  This is
3675 		* handled by fixing up one carry after the inner loop.  The
3676 		* carry fixups are done in order so after these loops the
3677 		* first m->used words of W[] have the carries fixed
3678 		*/
3679 		{
3680 			int iy;
3681 			mp_digit *tmpn;
3682 			mp_word *_W;
3683 
3684 			/* alias for the digits of the modulus */
3685 			tmpn = n->dp;
3686 
3687 			/* Alias for the columns set by an offset of ix */
3688 			_W = W + ix;
3689 
3690 			/* inner loop */
3691 			for (iy = 0; iy < n->used; iy++) {
3692 				  *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
3693 			}
3694 		}
3695 
3696 		/* now fix carry for next digit, W[ix+1] */
3697 		W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
3698 	}
3699 
3700 	/* now we have to propagate the carries and
3701 	* shift the words downward [all those least
3702 	* significant digits we zeroed].
3703 	*/
3704 	{
3705 		mp_digit *tmpx;
3706 		mp_word *_W, *_W1;
3707 
3708 		/* nox fix rest of carries */
3709 
3710 		/* alias for current word */
3711 		_W1 = W + ix;
3712 
3713 		/* alias for next word, where the carry goes */
3714 		_W = W + ++ix;
3715 
3716 		for (; ix <= n->used * 2 + 1; ix++) {
3717 			*_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
3718 		}
3719 
3720 		/* copy out, A = A/b**n
3721 		*
3722 		* The result is A/b**n but instead of converting from an
3723 		* array of mp_word to mp_digit than calling rshift_digits
3724 		* we just copy them in the right order
3725 		*/
3726 
3727 		/* alias for destination word */
3728 		tmpx = x->dp;
3729 
3730 		/* alias for shifted double precision result */
3731 		_W = W + n->used;
3732 
3733 		for (ix = 0; ix < n->used + 1; ix++) {
3734 			*tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
3735 		}
3736 
3737 		/* zero oldused digits, if the input a was larger than
3738 		* m->used+1 we'll have to clear the digits
3739 		*/
3740 		for (; ix < olduse; ix++) {
3741 			*tmpx++ = 0;
3742 		}
3743 	}
3744 
3745 	/* set the max used and clamp */
3746 	x->used = n->used + 1;
3747 	trim_unused_digits(x);
3748 
3749 	/* if A >= m then A = A - m */
3750 	if (compare_magnitude(x, n) != MP_LT) {
3751 		return basic_subtract(x, n, x);
3752 	}
3753 	return MP_OKAY;
3754 }
3755 
3756 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_mp_montgomery_reduce.c,v $ */
3757 /* Revision: 1.2 $ */
3758 /* Date: 2011/03/18 16:22:09 $ */
3759 
3760 /* computes xR**-1 == x (mod N) via Montgomery Reduction */
3761 static int
mp_montgomery_reduce(mp_int * x,mp_int * n,mp_digit rho)3762 mp_montgomery_reduce(mp_int * x, mp_int * n, mp_digit rho)
3763 {
3764 	int     ix, res, digs;
3765 	mp_digit mu;
3766 
3767 	/* can the fast reduction [comba] method be used?
3768 	*
3769 	* Note that unlike in mul you're safely allowed *less*
3770 	* than the available columns [255 per default] since carries
3771 	* are fixed up in the inner loop.
3772 	*/
3773 	digs = n->used * 2 + 1;
3774 	if (can_use_fast_column_array(digs, n->used)) {
3775 		return fast_mp_montgomery_reduce(x, n, rho);
3776 	}
3777 
3778 	/* grow the input as required */
3779 	if (x->alloc < digs) {
3780 		if ((res = mp_grow(x, digs)) != MP_OKAY) {
3781 			return res;
3782 		}
3783 	}
3784 	x->used = digs;
3785 
3786 	for (ix = 0; ix < n->used; ix++) {
3787 		/* mu = ai * rho mod b
3788 		*
3789 		* The value of rho must be precalculated via
3790 		* montgomery_setup() such that
3791 		* it equals -1/n0 mod b this allows the
3792 		* following inner loop to reduce the
3793 		* input one digit at a time
3794 		*/
3795 		mu = (mp_digit) (((mp_word)x->dp[ix]) * ((mp_word)rho) & MP_MASK);
3796 
3797 		/* a = a + mu * m * b**i */
3798 		{
3799 			int iy;
3800 			mp_digit *tmpn, *tmpx, carry;
3801 			mp_word r;
3802 
3803 			/* alias for digits of the modulus */
3804 			tmpn = n->dp;
3805 
3806 			/* alias for the digits of x [the input] */
3807 			tmpx = x->dp + ix;
3808 
3809 			/* set the carry to zero */
3810 			carry = 0;
3811 
3812 			/* Multiply and add in place */
3813 			for (iy = 0; iy < n->used; iy++) {
3814 				/* compute product and sum */
3815 				r = ((mp_word)mu) * ((mp_word)*tmpn++) +
3816 					  ((mp_word) carry) + ((mp_word) * tmpx);
3817 
3818 				/* get carry */
3819 				carry = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
3820 
3821 				/* fix digit */
3822 				*tmpx++ = (mp_digit)(r & ((mp_word) MP_MASK));
3823 			}
3824 			/* At this point the ix'th digit of x should be zero */
3825 
3826 
3827 			/* propagate carries upwards as required*/
3828 			while (carry) {
3829 				*tmpx += carry;
3830 				carry = *tmpx >> DIGIT_BIT;
3831 				*tmpx++ &= MP_MASK;
3832 			}
3833 		}
3834 	}
3835 
3836 	/* at this point the n.used'th least
3837 	* significant digits of x are all zero
3838 	* which means we can shift x to the
3839 	* right by n.used digits and the
3840 	* residue is unchanged.
3841 	*/
3842 
3843 	/* x = x/b**n.used */
3844 	trim_unused_digits(x);
3845 	rshift_digits(x, n->used);
3846 
3847 	/* if x >= n then x = x - n */
3848 	if (compare_magnitude(x, n) != MP_LT) {
3849 		return basic_subtract(x, n, x);
3850 	}
3851 
3852 	return MP_OKAY;
3853 }
3854 
3855 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_reduce.c,v $ */
3856 /* Revision: 1.3 $ */
3857 /* Date: 2011/03/18 16:43:04 $ */
3858 
3859 /* determines the setup value */
3860 static void
diminished_radix_setup(mp_int * a,mp_digit * d)3861 diminished_radix_setup(mp_int *a, mp_digit *d)
3862 {
3863 	/* the casts are required if DIGIT_BIT is one less than
3864 	* the number of bits in a mp_digit [e.g. DIGIT_BIT==31]
3865 	*/
3866 	*d = (mp_digit)((((mp_word)1) << ((mp_word)DIGIT_BIT)) -
3867 		((mp_word)a->dp[0]));
3868 }
3869 
3870 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_setup.c,v $ */
3871 /* Revision: 1.1.1.1 $ */
3872 /* Date: 2011/03/12 22:58:18 $ */
3873 
3874 /* reduce "x" in place modulo "n" using the Diminished Radix algorithm.
3875  *
3876  * Based on algorithm from the paper
3877  *
3878  * "Generating Efficient Primes for Discrete Log Cryptosystems"
3879  *                 Chae Hoon Lim, Pil Joong Lee,
3880  *          POSTECH Information Research Laboratories
3881  *
3882  * The modulus must be of a special format [see manual]
3883  *
3884  * Has been modified to use algorithm 7.10 from the LTM book instead
3885  *
3886  * Input x must be in the range 0 <= x <= (n-1)**2
3887  */
3888 static int
diminished_radix_reduce(mp_int * x,mp_int * n,mp_digit k)3889 diminished_radix_reduce(mp_int * x, mp_int * n, mp_digit k)
3890 {
3891 	int      err, i, m;
3892 	mp_word  r;
3893 	mp_digit mu, *tmpx1, *tmpx2;
3894 
3895 	/* m = digits in modulus */
3896 	m = n->used;
3897 
3898 	/* ensure that "x" has at least 2m digits */
3899 	if (x->alloc < m + m) {
3900 		if ((err = mp_grow(x, m + m)) != MP_OKAY) {
3901 			return err;
3902 		}
3903 	}
3904 
3905 	/* top of loop, this is where the code resumes if
3906 	* another reduction pass is required.
3907 	*/
3908 top:
3909 	/* aliases for digits */
3910 	/* alias for lower half of x */
3911 	tmpx1 = x->dp;
3912 
3913 	/* alias for upper half of x, or x/B**m */
3914 	tmpx2 = x->dp + m;
3915 
3916 	/* set carry to zero */
3917 	mu = 0;
3918 
3919 	/* compute (x mod B**m) + k * [x/B**m] inline and inplace */
3920 	for (i = 0; i < m; i++) {
3921 		r = ((mp_word)*tmpx2++) * ((mp_word)k) + *tmpx1 + mu;
3922 		*tmpx1++  = (mp_digit)(r & MP_MASK);
3923 		mu = (mp_digit)(r >> ((mp_word)DIGIT_BIT));
3924 	}
3925 
3926 	/* set final carry */
3927 	*tmpx1++ = mu;
3928 
3929 	/* zero words above m */
3930 	for (i = m + 1; i < x->used; i++) {
3931 		*tmpx1++ = 0;
3932 	}
3933 
3934 	/* clamp, sub and return */
3935 	trim_unused_digits(x);
3936 
3937 	/* if x >= n then subtract and reduce again
3938 	* Each successive "recursion" makes the input smaller and smaller.
3939 	*/
3940 	if (compare_magnitude(x, n) != MP_LT) {
3941 		basic_subtract(x, n, x);
3942 		goto top;
3943 	}
3944 	return MP_OKAY;
3945 }
3946 
3947 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_dr_reduce.c,v $ */
3948 /* Revision: 1.1.1.1 $ */
3949 /* Date: 2011/03/12 22:58:18 $ */
3950 
3951 /* determines the setup value */
3952 static int
mp_reduce_2k_setup(mp_int * a,mp_digit * d)3953 mp_reduce_2k_setup(mp_int *a, mp_digit *d)
3954 {
3955 	int res, p;
3956 	mp_int tmp;
3957 
3958 	if ((res = mp_init(&tmp)) != MP_OKAY) {
3959 		return res;
3960 	}
3961 
3962 	p = mp_count_bits(a);
3963 	if ((res = mp_2expt(&tmp, p)) != MP_OKAY) {
3964 		mp_clear(&tmp);
3965 		return res;
3966 	}
3967 
3968 	if ((res = basic_subtract(&tmp, a, &tmp)) != MP_OKAY) {
3969 		mp_clear(&tmp);
3970 		return res;
3971 	}
3972 
3973 	*d = tmp.dp[0];
3974 	mp_clear(&tmp);
3975 	return MP_OKAY;
3976 }
3977 
3978 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k_setup.c,v $ */
3979 /* Revision: 1.1.1.1 $ */
3980 /* Date: 2011/03/12 22:58:18 $ */
3981 
3982 /* reduces a modulo n where n is of the form 2**p - d */
3983 static int
mp_reduce_2k(mp_int * a,mp_int * n,mp_digit d)3984 mp_reduce_2k(mp_int *a, mp_int *n, mp_digit d)
3985 {
3986 	mp_int q;
3987 	int    p, res;
3988 
3989 	if ((res = mp_init(&q)) != MP_OKAY) {
3990 		return res;
3991 	}
3992 
3993 	p = mp_count_bits(n);
3994 top:
3995 	/* q = a/2**p, a = a mod 2**p */
3996 	if ((res = rshift_bits(a, p, &q, a)) != MP_OKAY) {
3997 		goto ERR;
3998 	}
3999 
4000 	if (d != 1) {
4001 		/* q = q * d */
4002 		if ((res = multiply_digit(&q, d, &q)) != MP_OKAY) {
4003 			 goto ERR;
4004 		}
4005 	}
4006 
4007 	/* a = a + q */
4008 	if ((res = basic_add(a, &q, a)) != MP_OKAY) {
4009 		goto ERR;
4010 	}
4011 
4012 	if (compare_magnitude(a, n) != MP_LT) {
4013 		basic_subtract(a, n, a);
4014 		goto top;
4015 	}
4016 
4017 ERR:
4018 	mp_clear(&q);
4019 	return res;
4020 }
4021 
4022 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_reduce_2k.c,v $ */
4023 /* Revision: 1.1.1.1 $ */
4024 /* Date: 2011/03/12 22:58:18 $ */
4025 
4026 /*
4027  * shifts with subtractions when the result is greater than b.
4028  *
4029  * The method is slightly modified to shift B unconditionally upto just under
4030  * the leading bit of b.  This saves alot of multiple precision shifting.
4031  */
4032 static int
mp_montgomery_calc_normalization(mp_int * a,mp_int * b)4033 mp_montgomery_calc_normalization(mp_int * a, mp_int * b)
4034 {
4035 	int     x, bits, res;
4036 
4037 	/* how many bits of last digit does b use */
4038 	bits = mp_count_bits(b) % DIGIT_BIT;
4039 
4040 	if (b->used > 1) {
4041 		if ((res = mp_2expt(a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
4042 			return res;
4043 		}
4044 	} else {
4045 		set_word(a, 1);
4046 		bits = 1;
4047 	}
4048 
4049 
4050 	/* now compute C = A * B mod b */
4051 	for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
4052 		if ((res = doubled(a, a)) != MP_OKAY) {
4053 			return res;
4054 		}
4055 		if (compare_magnitude(a, b) != MP_LT) {
4056 			if ((res = basic_subtract(a, b, a)) != MP_OKAY) {
4057 				return res;
4058 			}
4059 		}
4060 	}
4061 
4062 	return MP_OKAY;
4063 }
4064 
4065 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_mp_montgomery_calc_normalization.c,v $ */
4066 /* Revision: 1.1.1.1 $ */
4067 /* Date: 2011/03/12 22:58:18 $ */
4068 
4069 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
4070  *
4071  * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
4072  * The value of k changes based on the size of the exponent.
4073  *
4074  * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
4075  */
4076 
4077 #define TAB_SIZE 256
4078 
4079 static int
fast_exponent_modulo(mp_int * G,mp_int * X,mp_int * P,mp_int * Y,int redmode)4080 fast_exponent_modulo(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
4081 {
4082 	mp_int  M[TAB_SIZE], res;
4083 	mp_digit buf, mp;
4084 	int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
4085 
4086 	/* use a pointer to the reduction algorithm.  This allows us to use
4087 	* one of many reduction algorithms without modding the guts of
4088 	* the code with if statements everywhere.
4089 	*/
4090 	int     (*redux)(mp_int*,mp_int*,mp_digit);
4091 
4092 	winsize = find_window_size(X);
4093 
4094 	/* init M array */
4095 	/* init first cell */
4096 	if ((err = mp_init(&M[1])) != MP_OKAY) {
4097 		return err;
4098 	}
4099 
4100 	/* now init the second half of the array */
4101 	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
4102 		if ((err = mp_init(&M[x])) != MP_OKAY) {
4103 			for (y = 1<<(winsize-1); y < x; y++) {
4104 				mp_clear(&M[y]);
4105 			}
4106 			mp_clear(&M[1]);
4107 			return err;
4108 		}
4109 	}
4110 
4111 	/* determine and setup reduction code */
4112 	if (redmode == 0) {
4113 		/* now setup montgomery  */
4114 		if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) {
4115 			goto LBL_M;
4116 		}
4117 
4118 		/* automatically pick the comba one if available (saves quite a few calls/ifs) */
4119 		if (can_use_fast_column_array(P->used + P->used + 1, P->used)) {
4120 			redux = fast_mp_montgomery_reduce;
4121 		} else {
4122 			/* use slower baseline Montgomery method */
4123 			redux = mp_montgomery_reduce;
4124 		}
4125 	} else if (redmode == 1) {
4126 		/* setup DR reduction for moduli of the form B**k - b */
4127 		diminished_radix_setup(P, &mp);
4128 		redux = diminished_radix_reduce;
4129 	} else {
4130 		/* setup DR reduction for moduli of the form 2**k - b */
4131 		if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
4132 			goto LBL_M;
4133 		}
4134 		redux = mp_reduce_2k;
4135 	}
4136 
4137 	/* setup result */
4138 	if ((err = mp_init(&res)) != MP_OKAY) {
4139 		goto LBL_M;
4140 	}
4141 
4142 	/* create M table
4143 	*
4144 
4145 	*
4146 	* The first half of the table is not computed though accept for M[0] and M[1]
4147 	*/
4148 
4149 	if (redmode == 0) {
4150 		/* now we need R mod m */
4151 		if ((err = mp_montgomery_calc_normalization(&res, P)) != MP_OKAY) {
4152 			goto LBL_RES;
4153 		}
4154 
4155 		/* now set M[1] to G * R mod m */
4156 		if ((err = multiply_modulo(&M[1], G, &res, P)) != MP_OKAY) {
4157 			goto LBL_RES;
4158 		}
4159 	} else {
4160 		set_word(&res, 1);
4161 		if ((err = modulo(G, P, &M[1])) != MP_OKAY) {
4162 			goto LBL_RES;
4163 		}
4164 	}
4165 
4166 	/* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
4167 	if ((err = mp_copy( &M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
4168 		goto LBL_RES;
4169 	}
4170 
4171 	for (x = 0; x < (winsize - 1); x++) {
4172 		if ((err = square(&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
4173 			goto LBL_RES;
4174 		}
4175 		if ((err = (*redux)(&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
4176 			goto LBL_RES;
4177 		}
4178 	}
4179 
4180 	/* create upper table */
4181 	for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
4182 		if ((err = signed_multiply(&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
4183 			goto LBL_RES;
4184 		}
4185 		if ((err = (*redux)(&M[x], P, mp)) != MP_OKAY) {
4186 			goto LBL_RES;
4187 		}
4188 	}
4189 
4190 	/* set initial mode and bit cnt */
4191 	mode = 0;
4192 	bitcnt = 1;
4193 	buf = 0;
4194 	digidx = X->used - 1;
4195 	bitcpy = 0;
4196 	bitbuf = 0;
4197 
4198 	for (;;) {
4199 		/* grab next digit as required */
4200 		if (--bitcnt == 0) {
4201 			/* if digidx == -1 we are out of digits so break */
4202 			if (digidx == -1) {
4203 				break;
4204 			}
4205 			/* read next digit and reset bitcnt */
4206 			buf = X->dp[digidx--];
4207 			bitcnt = (int)DIGIT_BIT;
4208 		}
4209 
4210 		/* grab the next msb from the exponent */
4211 		y = (int)(mp_digit)((mp_digit)buf >> (unsigned)(DIGIT_BIT - 1)) & 1;
4212 		buf <<= (mp_digit)1;
4213 
4214 		/* if the bit is zero and mode == 0 then we ignore it
4215 		* These represent the leading zero bits before the first 1 bit
4216 		* in the exponent.  Technically this opt is not required but it
4217 		* does lower the # of trivial squaring/reductions used
4218 		*/
4219 		if (mode == 0 && y == 0) {
4220 			continue;
4221 		}
4222 
4223 		/* if the bit is zero and mode == 1 then we square */
4224 		if (mode == 1 && y == 0) {
4225 			if ((err = square(&res, &res)) != MP_OKAY) {
4226 				goto LBL_RES;
4227 			}
4228 			if ((err = (*redux)(&res, P, mp)) != MP_OKAY) {
4229 				goto LBL_RES;
4230 			}
4231 			continue;
4232 		}
4233 
4234 		/* else we add it to the window */
4235 		bitbuf |= (y << (winsize - ++bitcpy));
4236 		mode = 2;
4237 
4238 		if (bitcpy == winsize) {
4239 			/* ok window is filled so square as required and multiply  */
4240 			/* square first */
4241 			for (x = 0; x < winsize; x++) {
4242 				if ((err = square(&res, &res)) != MP_OKAY) {
4243 					goto LBL_RES;
4244 				}
4245 				if ((err = (*redux)(&res, P, mp)) != MP_OKAY) {
4246 					goto LBL_RES;
4247 				}
4248 			}
4249 
4250 			/* then multiply */
4251 			if ((err = signed_multiply(&res, &M[bitbuf], &res)) != MP_OKAY) {
4252 				goto LBL_RES;
4253 			}
4254 			if ((err = (*redux)(&res, P, mp)) != MP_OKAY) {
4255 				goto LBL_RES;
4256 			}
4257 
4258 			/* empty window and reset */
4259 			bitcpy = 0;
4260 			bitbuf = 0;
4261 			mode = 1;
4262 		}
4263 	}
4264 
4265 	/* if bits remain then square/multiply */
4266 	if (mode == 2 && bitcpy > 0) {
4267 		/* square then multiply if the bit is set */
4268 		for (x = 0; x < bitcpy; x++) {
4269 			if ((err = square(&res, &res)) != MP_OKAY) {
4270 				goto LBL_RES;
4271 			}
4272 			if ((err = (*redux)(&res, P, mp)) != MP_OKAY) {
4273 				goto LBL_RES;
4274 			}
4275 
4276 			/* get next bit of the window */
4277 			bitbuf <<= 1;
4278 			if ((bitbuf & (1 << winsize)) != 0) {
4279 				/* then multiply */
4280 				if ((err = signed_multiply(&res, &M[1], &res)) != MP_OKAY) {
4281 					goto LBL_RES;
4282 				}
4283 				if ((err = (*redux)(&res, P, mp)) != MP_OKAY) {
4284 					goto LBL_RES;
4285 				}
4286 			}
4287 		}
4288 	}
4289 
4290 	if (redmode == 0) {
4291 		/* fixup result if Montgomery reduction is used
4292 		* recall that any value in a Montgomery system is
4293 		* actually multiplied by R mod n.  So we have
4294 		* to reduce one more time to cancel out the factor
4295 		* of R.
4296 		*/
4297 		if ((err = (*redux)(&res, P, mp)) != MP_OKAY) {
4298 			goto LBL_RES;
4299 		}
4300 	}
4301 
4302 	/* swap res with Y */
4303 	mp_exch(&res, Y);
4304 	err = MP_OKAY;
4305 LBL_RES:
4306 	mp_clear(&res);
4307 LBL_M:
4308 	mp_clear(&M[1]);
4309 	for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
4310 		mp_clear(&M[x]);
4311 	}
4312 	return err;
4313 }
4314 
4315 /* Source: /usr/cvsroot/libtommath/dist/libtommath/bn_fast_exponent_modulo.c,v $ */
4316 /* Revision: 1.4 $ */
4317 /* Date: 2011/03/18 16:43:04 $ */
4318 
4319 /* this is a shell function that calls either the normal or Montgomery
4320  * exptmod functions.  Originally the call to the montgomery code was
4321  * embedded in the normal function but that wasted alot of stack space
4322  * for nothing (since 99% of the time the Montgomery code would be called)
4323  */
4324 static int
exponent_modulo(mp_int * G,mp_int * X,mp_int * P,mp_int * Y)4325 exponent_modulo(mp_int * G, mp_int * X, mp_int * P, mp_int *Y)
4326 {
4327 	int diminished_radix;
4328 
4329 	/* modulus P must be positive */
4330 	if (P->sign == MP_NEG) {
4331 		return MP_VAL;
4332 	}
4333 
4334 	/* if exponent X is negative we have to recurse */
4335 	if (X->sign == MP_NEG) {
4336 		mp_int tmpG, tmpX;
4337 		int err;
4338 
4339 		/* first compute 1/G mod P */
4340 		if ((err = mp_init(&tmpG)) != MP_OKAY) {
4341 			return err;
4342 		}
4343 		if ((err = modular_inverse(&tmpG, G, P)) != MP_OKAY) {
4344 			mp_clear(&tmpG);
4345 			return err;
4346 		}
4347 
4348 		/* now get |X| */
4349 		if ((err = mp_init(&tmpX)) != MP_OKAY) {
4350 			mp_clear(&tmpG);
4351 			return err;
4352 		}
4353 		if ((err = absolute(X, &tmpX)) != MP_OKAY) {
4354 			mp_clear_multi(&tmpG, &tmpX, NULL);
4355 			return err;
4356 		}
4357 
4358 		/* and now compute (1/G)**|X| instead of G**X [X < 0] */
4359 		err = exponent_modulo(&tmpG, &tmpX, P, Y);
4360 		mp_clear_multi(&tmpG, &tmpX, NULL);
4361 		return err;
4362 	}
4363 
4364 	/* modified diminished radix reduction */
4365 	if (mp_reduce_is_2k_l(P) == MP_YES) {
4366 		return basic_exponent_mod(G, X, P, Y, 1);
4367 	}
4368 
4369 	/* is it a DR modulus? */
4370 	diminished_radix = is_diminished_radix_modulus(P);
4371 
4372 	/* if not, is it a unrestricted DR modulus? */
4373 	if (!diminished_radix) {
4374 		diminished_radix = mp_reduce_is_2k(P) << 1;
4375 	}
4376 
4377 	/* if the modulus is odd or diminished_radix, use the montgomery method */
4378 	if (PGPV_BN_is_odd(P) == 1 || diminished_radix) {
4379 		return fast_exponent_modulo(G, X, P, Y, diminished_radix);
4380 	}
4381 	/* otherwise use the generic Barrett reduction technique */
4382 	return basic_exponent_mod(G, X, P, Y, 0);
4383 }
4384 
4385 /* reverse an array, used for radix code */
4386 static void
bn_reverse(unsigned char * s,int len)4387 bn_reverse(unsigned char *s, int len)
4388 {
4389 	int     ix, iy;
4390 	uint8_t t;
4391 
4392 	for (ix = 0, iy = len - 1; ix < iy ; ix++, --iy) {
4393 		t = s[ix];
4394 		s[ix] = s[iy];
4395 		s[iy] = t;
4396 	}
4397 }
4398 
4399 static inline int
is_power_of_two(mp_digit b,int * p)4400 is_power_of_two(mp_digit b, int *p)
4401 {
4402 	int x;
4403 
4404 	/* fast return if no power of two */
4405 	if ((b==0) || (b & (b-1))) {
4406 		return 0;
4407 	}
4408 
4409 	for (x = 0; x < DIGIT_BIT; x++) {
4410 		if (b == (((mp_digit)1)<<x)) {
4411 			*p = x;
4412 			return 1;
4413 		}
4414 	}
4415 	return 0;
4416 }
4417 
4418 /* single digit division (based on routine from MPI) */
4419 static int
signed_divide_word(mp_int * a,mp_digit b,mp_int * c,mp_digit * d)4420 signed_divide_word(mp_int *a, mp_digit b, mp_int *c, mp_digit *d)
4421 {
4422 	mp_int  q;
4423 	mp_word w;
4424 	mp_digit t;
4425 	int     res, ix;
4426 
4427 	/* cannot divide by zero */
4428 	if (b == 0) {
4429 		return MP_VAL;
4430 	}
4431 
4432 	/* quick outs */
4433 	if (b == 1 || MP_ISZERO(a) == 1) {
4434 		if (d != NULL) {
4435 			*d = 0;
4436 		}
4437 		if (c != NULL) {
4438 			return mp_copy(a, c);
4439 		}
4440 		return MP_OKAY;
4441 	}
4442 
4443 	/* power of two ? */
4444 	if (is_power_of_two(b, &ix) == 1) {
4445 		if (d != NULL) {
4446 			*d = a->dp[0] & ((((mp_digit)1)<<ix) - 1);
4447 		}
4448 		if (c != NULL) {
4449 			return rshift_bits(a, ix, c, NULL);
4450 		}
4451 		return MP_OKAY;
4452 	}
4453 
4454 	/* three? */
4455 	if (b == 3) {
4456 		return third(a, c, d);
4457 	}
4458 
4459 	/* no easy answer [c'est la vie].  Just division */
4460 	if ((res = mp_init_size(&q, a->used)) != MP_OKAY) {
4461 		return res;
4462 	}
4463 
4464 	q.used = a->used;
4465 	q.sign = a->sign;
4466 	w = 0;
4467 	for (ix = a->used - 1; ix >= 0; ix--) {
4468 		w = (w << ((mp_word)DIGIT_BIT)) | ((mp_word)a->dp[ix]);
4469 
4470 		if (w >= b) {
4471 			t = (mp_digit)(w / b);
4472 			w -= ((mp_word)t) * ((mp_word)b);
4473 		} else {
4474 			t = 0;
4475 		}
4476 		q.dp[ix] = (mp_digit)t;
4477 	}
4478 
4479 	if (d != NULL) {
4480 		*d = (mp_digit)w;
4481 	}
4482 
4483 	if (c != NULL) {
4484 		trim_unused_digits(&q);
4485 		mp_exch(&q, c);
4486 	}
4487 	mp_clear(&q);
4488 
4489 	return res;
4490 }
4491 
4492 static const mp_digit ltm_prime_tab[] = {
4493 	0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
4494 	0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
4495 	0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
4496 	0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F,
4497 #ifndef MP_8BIT
4498 	0x0083,
4499 	0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
4500 	0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
4501 	0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
4502 	0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
4503 
4504 	0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
4505 	0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
4506 	0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
4507 	0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
4508 	0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
4509 	0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
4510 	0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
4511 	0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
4512 
4513 	0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
4514 	0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
4515 	0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
4516 	0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
4517 	0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
4518 	0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
4519 	0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
4520 	0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
4521 
4522 	0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
4523 	0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
4524 	0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
4525 	0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
4526 	0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
4527 	0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
4528 	0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
4529 	0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
4530 #endif
4531 };
4532 
4533 #define PRIME_SIZE	__arraycount(ltm_prime_tab)
4534 
4535 static inline int
mp_prime_is_divisible(mp_int * a,int * result)4536 mp_prime_is_divisible(mp_int *a, int *result)
4537 {
4538 	int     err, ix;
4539 	mp_digit res;
4540 
4541 	/* default to not */
4542 	*result = MP_NO;
4543 
4544 	for (ix = 0; ix < (int)PRIME_SIZE; ix++) {
4545 		/* what is a mod LBL_prime_tab[ix] */
4546 		if ((err = signed_divide_word(a, ltm_prime_tab[ix], NULL, &res)) != MP_OKAY) {
4547 			return err;
4548 		}
4549 
4550 		/* is the residue zero? */
4551 		if (res == 0) {
4552 			*result = MP_YES;
4553 			return MP_OKAY;
4554 		}
4555 	}
4556 
4557 	return MP_OKAY;
4558 }
4559 
4560 /* single digit addition */
4561 static int
add_single_digit(mp_int * a,mp_digit b,mp_int * c)4562 add_single_digit(mp_int *a, mp_digit b, mp_int *c)
4563 {
4564 	int     res, ix, oldused;
4565 	mp_digit *tmpa, *tmpc, mu;
4566 
4567 	/* grow c as required */
4568 	if (c->alloc < a->used + 1) {
4569 		if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4570 			return res;
4571 		}
4572 	}
4573 
4574 	/* if a is negative and |a| >= b, call c = |a| - b */
4575 	if (a->sign == MP_NEG && (a->used > 1 || a->dp[0] >= b)) {
4576 		/* temporarily fix sign of a */
4577 		a->sign = MP_ZPOS;
4578 
4579 		/* c = |a| - b */
4580 		res = signed_subtract_word(a, b, c);
4581 
4582 		/* fix sign  */
4583 		a->sign = c->sign = MP_NEG;
4584 
4585 		/* clamp */
4586 		trim_unused_digits(c);
4587 
4588 		return res;
4589 	}
4590 
4591 	/* old number of used digits in c */
4592 	oldused = c->used;
4593 
4594 	/* sign always positive */
4595 	c->sign = MP_ZPOS;
4596 
4597 	/* source alias */
4598 	tmpa = a->dp;
4599 
4600 	/* destination alias */
4601 	tmpc = c->dp;
4602 
4603 	/* if a is positive */
4604 	if (a->sign == MP_ZPOS) {
4605 		/* add digit, after this we're propagating
4606 		* the carry.
4607 		*/
4608 		*tmpc = *tmpa++ + b;
4609 		mu = *tmpc >> DIGIT_BIT;
4610 		*tmpc++ &= MP_MASK;
4611 
4612 		/* now handle rest of the digits */
4613 		for (ix = 1; ix < a->used; ix++) {
4614 			*tmpc = *tmpa++ + mu;
4615 			mu = *tmpc >> DIGIT_BIT;
4616 			*tmpc++ &= MP_MASK;
4617 		}
4618 		/* set final carry */
4619 		ix++;
4620 		*tmpc++  = mu;
4621 
4622 		/* setup size */
4623 		c->used = a->used + 1;
4624 	} else {
4625 		/* a was negative and |a| < b */
4626 		c->used  = 1;
4627 
4628 		/* the result is a single digit */
4629 		if (a->used == 1) {
4630 			*tmpc++  =  b - a->dp[0];
4631 		} else {
4632 			*tmpc++  =  b;
4633 		}
4634 
4635 		/* setup count so the clearing of oldused
4636 		* can fall through correctly
4637 		*/
4638 		ix = 1;
4639 	}
4640 
4641 	/* now zero to oldused */
4642 	while (ix++ < oldused) {
4643 		*tmpc++ = 0;
4644 	}
4645 	trim_unused_digits(c);
4646 
4647 	return MP_OKAY;
4648 }
4649 
4650 /* single digit subtraction */
4651 static int
signed_subtract_word(mp_int * a,mp_digit b,mp_int * c)4652 signed_subtract_word(mp_int *a, mp_digit b, mp_int *c)
4653 {
4654 	mp_digit *tmpa, *tmpc, mu;
4655 	int       res, ix, oldused;
4656 
4657 	/* grow c as required */
4658 	if (c->alloc < a->used + 1) {
4659 		if ((res = mp_grow(c, a->used + 1)) != MP_OKAY) {
4660 			return res;
4661 		}
4662 	}
4663 
4664 	/* if a is negative just do an unsigned
4665 	* addition [with fudged signs]
4666 	*/
4667 	if (a->sign == MP_NEG) {
4668 		a->sign = MP_ZPOS;
4669 		res = add_single_digit(a, b, c);
4670 		a->sign = c->sign = MP_NEG;
4671 
4672 		/* clamp */
4673 		trim_unused_digits(c);
4674 
4675 		return res;
4676 	}
4677 
4678 	/* setup regs */
4679 	oldused = c->used;
4680 	tmpa = a->dp;
4681 	tmpc = c->dp;
4682 
4683 	/* if a <= b simply fix the single digit */
4684 	if ((a->used == 1 && a->dp[0] <= b) || a->used == 0) {
4685 		if (a->used == 1) {
4686 			*tmpc++ = b - *tmpa;
4687 		} else {
4688 			*tmpc++ = b;
4689 		}
4690 		ix = 1;
4691 
4692 		/* negative/1digit */
4693 		c->sign = MP_NEG;
4694 		c->used = 1;
4695 	} else {
4696 		/* positive/size */
4697 		c->sign = MP_ZPOS;
4698 		c->used = a->used;
4699 
4700 		/* subtract first digit */
4701 		*tmpc = *tmpa++ - b;
4702 		mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
4703 		*tmpc++ &= MP_MASK;
4704 
4705 		/* handle rest of the digits */
4706 		for (ix = 1; ix < a->used; ix++) {
4707 			*tmpc = *tmpa++ - mu;
4708 			mu = *tmpc >> (sizeof(mp_digit) * CHAR_BIT - 1);
4709 			*tmpc++ &= MP_MASK;
4710 		}
4711 	}
4712 
4713 	/* zero excess digits */
4714 	while (ix++ < oldused) {
4715 		*tmpc++ = 0;
4716 	}
4717 	trim_unused_digits(c);
4718 	return MP_OKAY;
4719 }
4720 
4721 static const int lnz[16] = {
4722 	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
4723 };
4724 
4725 /* Counts the number of lsbs which are zero before the first zero bit */
4726 static int
mp_cnt_lsb(mp_int * a)4727 mp_cnt_lsb(mp_int *a)
4728 {
4729 	int x;
4730 	mp_digit q, qq;
4731 
4732 	/* easy out */
4733 	if (MP_ISZERO(a) == 1) {
4734 		return 0;
4735 	}
4736 
4737 	/* scan lower digits until non-zero */
4738 	for (x = 0; x < a->used && a->dp[x] == 0; x++) {
4739 	}
4740 	q = a->dp[x];
4741 	x *= DIGIT_BIT;
4742 
4743 	/* now scan this digit until a 1 is found */
4744 	if ((q & 1) == 0) {
4745 		do {
4746 			 qq  = q & 15;
4747 			 /* LINTED previous op ensures range of qq */
4748 			 x  += lnz[qq];
4749 			 q >>= 4;
4750 		} while (qq == 0);
4751 	}
4752 	return x;
4753 }
4754 
4755 /* c = a * a (mod b) */
4756 static int
square_modulo(mp_int * a,mp_int * b,mp_int * c)4757 square_modulo(mp_int *a, mp_int *b, mp_int *c)
4758 {
4759 	int     res;
4760 	mp_int  t;
4761 
4762 	if ((res = mp_init(&t)) != MP_OKAY) {
4763 		return res;
4764 	}
4765 
4766 	if ((res = square(a, &t)) != MP_OKAY) {
4767 		mp_clear(&t);
4768 		return res;
4769 	}
4770 	res = modulo(&t, b, c);
4771 	mp_clear(&t);
4772 	return res;
4773 }
4774 
4775 static int
mp_prime_miller_rabin(mp_int * a,mp_int * b,int * result)4776 mp_prime_miller_rabin(mp_int *a, mp_int *b, int *result)
4777 {
4778 	mp_int  n1, y, r;
4779 	int     s, j, err;
4780 
4781 	/* default */
4782 	*result = MP_NO;
4783 
4784 	/* ensure b > 1 */
4785 	if (compare_digit(b, 1) != MP_GT) {
4786 		return MP_VAL;
4787 	}
4788 
4789 	/* get n1 = a - 1 */
4790 	if ((err = mp_init_copy(&n1, a)) != MP_OKAY) {
4791 		return err;
4792 	}
4793 	if ((err = signed_subtract_word(&n1, 1, &n1)) != MP_OKAY) {
4794 		goto LBL_N1;
4795 	}
4796 
4797 	/* set 2**s * r = n1 */
4798 	if ((err = mp_init_copy(&r, &n1)) != MP_OKAY) {
4799 		goto LBL_N1;
4800 	}
4801 
4802 	/* count the number of least significant bits
4803 	* which are zero
4804 	*/
4805 	s = mp_cnt_lsb(&r);
4806 
4807 	/* now divide n - 1 by 2**s */
4808 	if ((err = rshift_bits(&r, s, &r, NULL)) != MP_OKAY) {
4809 		goto LBL_R;
4810 	}
4811 
4812 	/* compute y = b**r mod a */
4813 	if ((err = mp_init(&y)) != MP_OKAY) {
4814 		goto LBL_R;
4815 	}
4816 	if ((err = exponent_modulo(b, &r, a, &y)) != MP_OKAY) {
4817 		goto LBL_Y;
4818 	}
4819 
4820 	/* if y != 1 and y != n1 do */
4821 	if (compare_digit(&y, 1) != MP_EQ && signed_compare(&y, &n1) != MP_EQ) {
4822 		j = 1;
4823 		/* while j <= s-1 and y != n1 */
4824 		while ((j <= (s - 1)) && signed_compare(&y, &n1) != MP_EQ) {
4825 			if ((err = square_modulo(&y, a, &y)) != MP_OKAY) {
4826 				goto LBL_Y;
4827 			}
4828 
4829 			/* if y == 1 then composite */
4830 			if (compare_digit(&y, 1) == MP_EQ) {
4831 				goto LBL_Y;
4832 			}
4833 
4834 			++j;
4835 		}
4836 
4837 		/* if y != n1 then composite */
4838 		if (signed_compare(&y, &n1) != MP_EQ) {
4839 			goto LBL_Y;
4840 		}
4841 	}
4842 
4843 	/* probably prime now */
4844 	*result = MP_YES;
4845 LBL_Y:
4846 	mp_clear(&y);
4847 LBL_R:
4848 	mp_clear(&r);
4849 LBL_N1:
4850 	mp_clear(&n1);
4851 	return err;
4852 }
4853 
4854 /* performs a variable number of rounds of Miller-Rabin
4855  *
4856  * Probability of error after t rounds is no more than
4857 
4858  *
4859  * Sets result to 1 if probably prime, 0 otherwise
4860  */
4861 static int
mp_prime_is_prime(mp_int * a,int t,int * result)4862 mp_prime_is_prime(mp_int *a, int t, int *result)
4863 {
4864 	mp_int  b;
4865 	int     ix, err, res;
4866 
4867 	/* default to no */
4868 	*result = MP_NO;
4869 
4870 	/* valid value of t? */
4871 	if (t <= 0 || t > (int)PRIME_SIZE) {
4872 		return MP_VAL;
4873 	}
4874 
4875 	/* is the input equal to one of the primes in the table? */
4876 	for (ix = 0; ix < (int)PRIME_SIZE; ix++) {
4877 		if (compare_digit(a, ltm_prime_tab[ix]) == MP_EQ) {
4878 			*result = 1;
4879 			return MP_OKAY;
4880 		}
4881 	}
4882 
4883 	/* first perform trial division */
4884 	if ((err = mp_prime_is_divisible(a, &res)) != MP_OKAY) {
4885 		return err;
4886 	}
4887 
4888 	/* return if it was trivially divisible */
4889 	if (res == MP_YES) {
4890 		return MP_OKAY;
4891 	}
4892 
4893 	/* now perform the miller-rabin rounds */
4894 	if ((err = mp_init(&b)) != MP_OKAY) {
4895 		return err;
4896 	}
4897 
4898 	for (ix = 0; ix < t; ix++) {
4899 		/* set the prime */
4900 		set_word(&b, ltm_prime_tab[ix]);
4901 
4902 		if ((err = mp_prime_miller_rabin(a, &b, &res)) != MP_OKAY) {
4903 			goto LBL_B;
4904 		}
4905 
4906 		if (res == MP_NO) {
4907 			goto LBL_B;
4908 		}
4909 	}
4910 
4911 	/* passed the test */
4912 	*result = MP_YES;
4913 LBL_B:
4914 	mp_clear(&b);
4915 	return err;
4916 }
4917 
4918 /* returns size of ASCII reprensentation */
4919 static int
mp_radix_size(mp_int * a,int radix,int * size)4920 mp_radix_size(mp_int *a, int radix, int *size)
4921 {
4922 	int     res, digs;
4923 	mp_int  t;
4924 	mp_digit d;
4925 
4926 	*size = 0;
4927 
4928 	/* special case for binary */
4929 	if (radix == 2) {
4930 		*size = mp_count_bits(a) + (a->sign == MP_NEG ? 1 : 0) + 1;
4931 		return MP_OKAY;
4932 	}
4933 
4934 	/* make sure the radix is in range */
4935 	if (radix < 2 || radix > 64) {
4936 		return MP_VAL;
4937 	}
4938 
4939 	if (MP_ISZERO(a) == MP_YES) {
4940 		*size = 2;
4941 		return MP_OKAY;
4942 	}
4943 
4944 	/* digs is the digit count */
4945 	digs = 0;
4946 
4947 	/* if it's negative add one for the sign */
4948 	if (a->sign == MP_NEG) {
4949 		++digs;
4950 	}
4951 
4952 	/* init a copy of the input */
4953 	if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
4954 		return res;
4955 	}
4956 
4957 	/* force temp to positive */
4958 	t.sign = MP_ZPOS;
4959 
4960 	/* fetch out all of the digits */
4961 	while (MP_ISZERO(&t) == MP_NO) {
4962 		if ((res = signed_divide_word(&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
4963 			mp_clear(&t);
4964 			return res;
4965 		}
4966 		++digs;
4967 	}
4968 	mp_clear(&t);
4969 
4970 	/* return digs + 1, the 1 is for the NULL byte that would be required. */
4971 	*size = digs + 1;
4972 	return MP_OKAY;
4973 }
4974 
4975 static const char *mp_s_rmap = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
4976 
4977 /* stores a bignum as a ASCII string in a given radix (2..64)
4978  *
4979  * Stores upto maxlen-1 chars and always a NULL byte
4980  */
4981 static int
mp_toradix_n(mp_int * a,char * str,int radix,int maxlen)4982 mp_toradix_n(mp_int * a, char *str, int radix, int maxlen)
4983 {
4984 	int     res, digs;
4985 	mp_int  t;
4986 	mp_digit d;
4987 	char   *_s = str;
4988 
4989 	/* check range of the maxlen, radix */
4990 	if (maxlen < 2 || radix < 2 || radix > 64) {
4991 		return MP_VAL;
4992 	}
4993 
4994 	/* quick out if its zero */
4995 	if (MP_ISZERO(a) == MP_YES) {
4996 		*str++ = '0';
4997 		*str = '\0';
4998 		return MP_OKAY;
4999 	}
5000 
5001 	if ((res = mp_init_copy(&t, a)) != MP_OKAY) {
5002 		return res;
5003 	}
5004 
5005 	/* if it is negative output a - */
5006 	if (t.sign == MP_NEG) {
5007 		/* we have to reverse our digits later... but not the - sign!! */
5008 		++_s;
5009 
5010 		/* store the flag and mark the number as positive */
5011 		*str++ = '-';
5012 		t.sign = MP_ZPOS;
5013 
5014 		/* subtract a char */
5015 		--maxlen;
5016 	}
5017 
5018 	digs = 0;
5019 	while (MP_ISZERO(&t) == 0) {
5020 		if (--maxlen < 1) {
5021 			/* no more room */
5022 			break;
5023 		}
5024 		if ((res = signed_divide_word(&t, (mp_digit) radix, &t, &d)) != MP_OKAY) {
5025 			mp_clear(&t);
5026 			return res;
5027 		}
5028 		/* LINTED -- radix' range is checked above, limits d's range */
5029 		*str++ = mp_s_rmap[d];
5030 		++digs;
5031 	}
5032 
5033 	/* reverse the digits of the string.  In this case _s points
5034 	* to the first digit [exluding the sign] of the number
5035 	*/
5036 	bn_reverse((unsigned char *)_s, digs);
5037 
5038 	/* append a NULL so the string is properly terminated */
5039 	*str = '\0';
5040 
5041 	mp_clear(&t);
5042 	return MP_OKAY;
5043 }
5044 
5045 static char *
formatbn(const PGPV_BIGNUM * a,const int radix)5046 formatbn(const PGPV_BIGNUM *a, const int radix)
5047 {
5048 	char	*s;
5049 	int	 len;
5050 
5051 	if (mp_radix_size(__UNCONST(a), radix, &len) != MP_OKAY) {
5052 		return NULL;
5053 	}
5054 	if ((s = allocate(1, (size_t)len)) != NULL) {
5055 		if (mp_toradix_n(__UNCONST(a), s, radix, len) != MP_OKAY) {
5056 			deallocate(s, (size_t)len);
5057 			return NULL;
5058 		}
5059 	}
5060 	return s;
5061 }
5062 
5063 static int
mp_getradix_num(mp_int * a,int radix,char * s)5064 mp_getradix_num(mp_int *a, int radix, char *s)
5065 {
5066 	int err, ch, neg, y;
5067 
5068 	/* clear a */
5069 	mp_zero(a);
5070 	/* if first digit is - then set negative */
5071 	if ((ch = *s++) == '-') {
5072 		neg = MP_NEG;
5073 		ch = *s++;
5074 	} else {
5075 		neg = MP_ZPOS;
5076 	}
5077 	for (;;) {
5078 		/* fold lower to upper case */
5079 		if (ch >= 'a' && ch <= 'z') {
5080 			ch = (ch - 'a') + 'A';
5081 		}
5082 		/* find index y in the radix map */
5083 		for (y = 0; y < radix; y++) {
5084 			if (mp_s_rmap[y] == ch) {
5085 				break;
5086 			}
5087 		}
5088 		if (y == radix) {
5089 			break;
5090 		}
5091 		/* shift up and add */
5092 		if ((err = multiply_digit(a, radix, a)) != MP_OKAY) {
5093 			return err;
5094 		}
5095 		if ((err = add_single_digit(a, y, a)) != MP_OKAY) {
5096 			return err;
5097 		}
5098 		ch = *s++;
5099 	}
5100 	if (compare_digit(a, 0) != MP_EQ) {
5101 		a->sign = neg;
5102 	}
5103 	return MP_OKAY;
5104 }
5105 
5106 static int
getbn(PGPV_BIGNUM ** a,const char * str,int radix)5107 getbn(PGPV_BIGNUM **a, const char *str, int radix)
5108 {
5109 	int	len;
5110 
5111 	if (a == NULL || str == NULL || (*a = PGPV_BN_new()) == NULL) {
5112 		return 0;
5113 	}
5114 	if (mp_getradix_num(*a, radix, __UNCONST(str)) != MP_OKAY) {
5115 		return 0;
5116 	}
5117 	mp_radix_size(__UNCONST(*a), radix, &len);
5118 	return len - 1;
5119 }
5120 
5121 /* d = a - b (mod c) */
5122 static int
subtract_modulo(mp_int * a,mp_int * b,mp_int * c,mp_int * d)5123 subtract_modulo(mp_int *a, mp_int *b, mp_int *c, mp_int *d)
5124 {
5125 	int     res;
5126 	mp_int  t;
5127 
5128 
5129 	if ((res = mp_init(&t)) != MP_OKAY) {
5130 		return res;
5131 	}
5132 
5133 	if ((res = signed_subtract(a, b, &t)) != MP_OKAY) {
5134 		mp_clear(&t);
5135 		return res;
5136 	}
5137 	res = modulo(&t, c, d);
5138 	mp_clear(&t);
5139 	return res;
5140 }
5141 
5142 /* bn_mp_gcd.c */
5143 /* Greatest Common Divisor using the binary method */
5144 static int
mp_gcd(mp_int * a,mp_int * b,mp_int * c)5145 mp_gcd(mp_int *a, mp_int *b, mp_int *c)
5146 {
5147 	mp_int  u, v;
5148 	int     k, u_lsb, v_lsb, res;
5149 
5150 	/* either zero than gcd is the largest */
5151 	if (PGPV_BN_is_zero(a) == MP_YES) {
5152 		return absolute(b, c);
5153 	}
5154 	if (PGPV_BN_is_zero(b) == MP_YES) {
5155 		return absolute(a, c);
5156 	}
5157 
5158 	/* get copies of a and b we can modify */
5159 	if ((res = mp_init_copy(&u, a)) != MP_OKAY) {
5160 		return res;
5161 	}
5162 
5163 	if ((res = mp_init_copy(&v, b)) != MP_OKAY) {
5164 		goto LBL_U;
5165 	}
5166 
5167 	/* must be positive for the remainder of the algorithm */
5168 	u.sign = v.sign = MP_ZPOS;
5169 
5170 	/* B1.  Find the common power of two for u and v */
5171 	u_lsb = mp_cnt_lsb(&u);
5172 	v_lsb = mp_cnt_lsb(&v);
5173 	k = MIN(u_lsb, v_lsb);
5174 
5175 	if (k > 0) {
5176 		/* divide the power of two out */
5177 		if ((res = rshift_bits(&u, k, &u, NULL)) != MP_OKAY) {
5178 			goto LBL_V;
5179 		}
5180 
5181 		if ((res = rshift_bits(&v, k, &v, NULL)) != MP_OKAY) {
5182 			goto LBL_V;
5183 		}
5184 	}
5185 
5186 	/* divide any remaining factors of two out */
5187 	if (u_lsb != k) {
5188 		if ((res = rshift_bits(&u, u_lsb - k, &u, NULL)) != MP_OKAY) {
5189 			goto LBL_V;
5190 		}
5191 	}
5192 
5193 	if (v_lsb != k) {
5194 		if ((res = rshift_bits(&v, v_lsb - k, &v, NULL)) != MP_OKAY) {
5195 			goto LBL_V;
5196 		}
5197 	}
5198 
5199 	while (PGPV_BN_is_zero(&v) == 0) {
5200 		/* make sure v is the largest */
5201 		if (compare_magnitude(&u, &v) == MP_GT) {
5202 			/* swap u and v to make sure v is >= u */
5203 			mp_exch(&u, &v);
5204 		}
5205 
5206 		/* subtract smallest from largest */
5207 		if ((res = signed_subtract(&v, &u, &v)) != MP_OKAY) {
5208 			goto LBL_V;
5209 		}
5210 
5211 		/* Divide out all factors of two */
5212 		if ((res = rshift_bits(&v, mp_cnt_lsb(&v), &v, NULL)) != MP_OKAY) {
5213 			goto LBL_V;
5214 		}
5215 	}
5216 
5217 	/* multiply by 2**k which we divided out at the beginning */
5218 	if ((res = lshift_bits(&u, k, c)) != MP_OKAY) {
5219 		goto LBL_V;
5220 	}
5221 	c->sign = MP_ZPOS;
5222 	res = MP_OKAY;
5223 LBL_V:
5224 	mp_clear (&u);
5225 LBL_U:
5226 	mp_clear (&v);
5227 	return res;
5228 }
5229 
5230 /**************************************************************************/
5231 
5232 /* PGPV_BIGNUM emulation layer */
5233 
5234 /* essentiually, these are just wrappers around the libtommath functions */
5235 /* usually the order of args changes */
5236 /* the PGPV_BIGNUM API tends to have more const poisoning */
5237 /* these wrappers also check the arguments passed for sanity */
5238 
5239 PGPV_BIGNUM *
PGPV_BN_bin2bn(const uint8_t * data,int len,PGPV_BIGNUM * ret)5240 PGPV_BN_bin2bn(const uint8_t *data, int len, PGPV_BIGNUM *ret)
5241 {
5242 	if (data == NULL) {
5243 		return PGPV_BN_new();
5244 	}
5245 	if (ret == NULL) {
5246 		ret = PGPV_BN_new();
5247 	}
5248 	return (mp_read_unsigned_bin(ret, data, len) == MP_OKAY) ? ret : NULL;
5249 }
5250 
5251 /* store in unsigned [big endian] format */
5252 int
PGPV_BN_bn2bin(const PGPV_BIGNUM * a,unsigned char * b)5253 PGPV_BN_bn2bin(const PGPV_BIGNUM *a, unsigned char *b)
5254 {
5255 	PGPV_BIGNUM	t;
5256 	int    	x;
5257 
5258 	if (a == NULL || b == NULL) {
5259 		return -1;
5260 	}
5261 	if (mp_init_copy (&t, __UNCONST(a)) != MP_OKAY) {
5262 		return -1;
5263 	}
5264 	for (x = 0; !PGPV_BN_is_zero(&t) ; ) {
5265 		b[x++] = (unsigned char) (t.dp[0] & 0xff);
5266 		if (rshift_bits(&t, 8, &t, NULL) != MP_OKAY) {
5267 			mp_clear(&t);
5268 			return -1;
5269 		}
5270 	}
5271 	bn_reverse(b, x);
5272 	mp_clear(&t);
5273 	return x;
5274 }
5275 
5276 void
PGPV_BN_init(PGPV_BIGNUM * a)5277 PGPV_BN_init(PGPV_BIGNUM *a)
5278 {
5279 	if (a != NULL) {
5280 		mp_init(a);
5281 	}
5282 }
5283 
5284 PGPV_BIGNUM *
PGPV_BN_new(void)5285 PGPV_BN_new(void)
5286 {
5287 	PGPV_BIGNUM	*a;
5288 
5289 	if ((a = allocate(1, sizeof(*a))) != NULL) {
5290 		mp_init(a);
5291 	}
5292 	return a;
5293 }
5294 
5295 /* copy, b = a */
5296 int
PGPV_BN_copy(PGPV_BIGNUM * b,const PGPV_BIGNUM * a)5297 PGPV_BN_copy(PGPV_BIGNUM *b, const PGPV_BIGNUM *a)
5298 {
5299 	if (a == NULL || b == NULL) {
5300 		return MP_VAL;
5301 	}
5302 	return mp_copy(__UNCONST(a), b);
5303 }
5304 
5305 PGPV_BIGNUM *
PGPV_BN_dup(const PGPV_BIGNUM * a)5306 PGPV_BN_dup(const PGPV_BIGNUM *a)
5307 {
5308 	PGPV_BIGNUM	*ret;
5309 
5310 	if (a == NULL) {
5311 		return NULL;
5312 	}
5313 	if ((ret = PGPV_BN_new()) != NULL) {
5314 		PGPV_BN_copy(ret, a);
5315 	}
5316 	return ret;
5317 }
5318 
5319 void
PGPV_BN_swap(PGPV_BIGNUM * a,PGPV_BIGNUM * b)5320 PGPV_BN_swap(PGPV_BIGNUM *a, PGPV_BIGNUM *b)
5321 {
5322 	if (a && b) {
5323 		mp_exch(a, b);
5324 	}
5325 }
5326 
5327 int
PGPV_BN_lshift(PGPV_BIGNUM * r,const PGPV_BIGNUM * a,int n)5328 PGPV_BN_lshift(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, int n)
5329 {
5330 	if (r == NULL || a == NULL || n < 0) {
5331 		return 0;
5332 	}
5333 	PGPV_BN_copy(r, a);
5334 	return lshift_digits(r, n) == MP_OKAY;
5335 }
5336 
5337 int
PGPV_BN_lshift1(PGPV_BIGNUM * r,PGPV_BIGNUM * a)5338 PGPV_BN_lshift1(PGPV_BIGNUM *r, PGPV_BIGNUM *a)
5339 {
5340 	if (r == NULL || a == NULL) {
5341 		return 0;
5342 	}
5343 	PGPV_BN_copy(r, a);
5344 	return lshift_digits(r, 1) == MP_OKAY;
5345 }
5346 
5347 int
PGPV_BN_rshift(PGPV_BIGNUM * r,const PGPV_BIGNUM * a,int n)5348 PGPV_BN_rshift(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, int n)
5349 {
5350 	if (r == NULL || a == NULL || n < 0) {
5351 		return MP_VAL;
5352 	}
5353 	PGPV_BN_copy(r, a);
5354 	return rshift_digits(r, n) == MP_OKAY;
5355 }
5356 
5357 int
PGPV_BN_rshift1(PGPV_BIGNUM * r,PGPV_BIGNUM * a)5358 PGPV_BN_rshift1(PGPV_BIGNUM *r, PGPV_BIGNUM *a)
5359 {
5360 	if (r == NULL || a == NULL) {
5361 		return 0;
5362 	}
5363 	PGPV_BN_copy(r, a);
5364 	return rshift_digits(r, 1) == MP_OKAY;
5365 }
5366 
5367 int
PGPV_BN_set_word(PGPV_BIGNUM * a,PGPV_BN_ULONG w)5368 PGPV_BN_set_word(PGPV_BIGNUM *a, PGPV_BN_ULONG w)
5369 {
5370 	if (a == NULL) {
5371 		return 0;
5372 	}
5373 	set_word(a, w);
5374 	return 1;
5375 }
5376 
5377 int
PGPV_BN_add(PGPV_BIGNUM * r,const PGPV_BIGNUM * a,const PGPV_BIGNUM * b)5378 PGPV_BN_add(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, const PGPV_BIGNUM *b)
5379 {
5380 	if (a == NULL || b == NULL || r == NULL) {
5381 		return 0;
5382 	}
5383 	return signed_add(__UNCONST(a), __UNCONST(b), r) == MP_OKAY;
5384 }
5385 
5386 int
PGPV_BN_sub(PGPV_BIGNUM * r,const PGPV_BIGNUM * a,const PGPV_BIGNUM * b)5387 PGPV_BN_sub(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, const PGPV_BIGNUM *b)
5388 {
5389 	if (a == NULL || b == NULL || r == NULL) {
5390 		return 0;
5391 	}
5392 	return signed_subtract(__UNCONST(a), __UNCONST(b), r) == MP_OKAY;
5393 }
5394 
5395 int
PGPV_BN_mul(PGPV_BIGNUM * r,const PGPV_BIGNUM * a,const PGPV_BIGNUM * b,PGPV_BN_CTX * ctx)5396 PGPV_BN_mul(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, const PGPV_BIGNUM *b, PGPV_BN_CTX *ctx)
5397 {
5398 	if (a == NULL || b == NULL || r == NULL) {
5399 		return 0;
5400 	}
5401 	USE_ARG(ctx);
5402 	return signed_multiply(__UNCONST(a), __UNCONST(b), r) == MP_OKAY;
5403 }
5404 
5405 int
PGPV_BN_div(PGPV_BIGNUM * dv,PGPV_BIGNUM * rem,const PGPV_BIGNUM * a,const PGPV_BIGNUM * d,PGPV_BN_CTX * ctx)5406 PGPV_BN_div(PGPV_BIGNUM *dv, PGPV_BIGNUM *rem, const PGPV_BIGNUM *a, const PGPV_BIGNUM *d, PGPV_BN_CTX *ctx)
5407 {
5408 	if ((dv == NULL && rem == NULL) || a == NULL || d == NULL) {
5409 		return 0;
5410 	}
5411 	USE_ARG(ctx);
5412 	return signed_divide(dv, rem, __UNCONST(a), __UNCONST(d)) == MP_OKAY;
5413 }
5414 
5415 /* perform a bit operation on the 2 bignums */
5416 int
PGPV_BN_bitop(PGPV_BIGNUM * r,const PGPV_BIGNUM * a,char op,const PGPV_BIGNUM * b)5417 PGPV_BN_bitop(PGPV_BIGNUM *r, const PGPV_BIGNUM *a, char op, const PGPV_BIGNUM *b)
5418 {
5419 	unsigned	ndigits;
5420 	mp_digit	ad;
5421 	mp_digit	bd;
5422 	int		i;
5423 
5424 	if (a == NULL || b == NULL || r == NULL) {
5425 		return 0;
5426 	}
5427 	if (PGPV_BN_cmp(__UNCONST(a), __UNCONST(b)) >= 0) {
5428 		PGPV_BN_copy(r, a);
5429 		ndigits = a->used;
5430 	} else {
5431 		PGPV_BN_copy(r, b);
5432 		ndigits = b->used;
5433 	}
5434 	for (i = 0 ; i < (int)ndigits ; i++) {
5435 		ad = (i > a->used) ? 0 : a->dp[i];
5436 		bd = (i > b->used) ? 0 : b->dp[i];
5437 		switch(op) {
5438 		case '&':
5439 			r->dp[i] = (ad & bd);
5440 			break;
5441 		case '|':
5442 			r->dp[i] = (ad | bd);
5443 			break;
5444 		case '^':
5445 			r->dp[i] = (ad ^ bd);
5446 			break;
5447 		default:
5448 			break;
5449 		}
5450 	}
5451 	return 1;
5452 }
5453 
5454 void
PGPV_BN_free(PGPV_BIGNUM * a)5455 PGPV_BN_free(PGPV_BIGNUM *a)
5456 {
5457 	if (a) {
5458 		mp_clear(a);
5459 		free(a);
5460 	}
5461 }
5462 
5463 void
PGPV_BN_clear(PGPV_BIGNUM * a)5464 PGPV_BN_clear(PGPV_BIGNUM *a)
5465 {
5466 	if (a) {
5467 		mp_clear(a);
5468 	}
5469 }
5470 
5471 void
PGPV_BN_clear_free(PGPV_BIGNUM * a)5472 PGPV_BN_clear_free(PGPV_BIGNUM *a)
5473 {
5474 	PGPV_BN_clear(a);
5475 	free(a);
5476 }
5477 
5478 int
PGPV_BN_num_bytes(const PGPV_BIGNUM * a)5479 PGPV_BN_num_bytes(const PGPV_BIGNUM *a)
5480 {
5481 	if (a == NULL) {
5482 		return MP_VAL;
5483 	}
5484 	return mp_unsigned_bin_size(__UNCONST(a));
5485 }
5486 
5487 int
PGPV_BN_num_bits(const PGPV_BIGNUM * a)5488 PGPV_BN_num_bits(const PGPV_BIGNUM *a)
5489 {
5490 	if (a == NULL) {
5491 		return 0;
5492 	}
5493 	return mp_count_bits(a);
5494 }
5495 
5496 void
PGPV_BN_set_negative(PGPV_BIGNUM * a,int n)5497 PGPV_BN_set_negative(PGPV_BIGNUM *a, int n)
5498 {
5499 	if (a) {
5500 		a->sign = (n) ? MP_NEG : 0;
5501 	}
5502 }
5503 
5504 int
PGPV_BN_cmp(PGPV_BIGNUM * a,PGPV_BIGNUM * b)5505 PGPV_BN_cmp(PGPV_BIGNUM *a, PGPV_BIGNUM *b)
5506 {
5507 	if (a == NULL || b == NULL) {
5508 		return MP_VAL;
5509 	}
5510 	switch(signed_compare(a, b)) {
5511 	case MP_LT:
5512 		return -1;
5513 	case MP_GT:
5514 		return 1;
5515 	case MP_EQ:
5516 	default:
5517 		return 0;
5518 	}
5519 }
5520 
5521 int
PGPV_BN_mod_exp(PGPV_BIGNUM * Y,PGPV_BIGNUM * G,const PGPV_BIGNUM * X,const PGPV_BIGNUM * P,PGPV_BN_CTX * ctx)5522 PGPV_BN_mod_exp(PGPV_BIGNUM *Y, PGPV_BIGNUM *G, const PGPV_BIGNUM *X, const PGPV_BIGNUM *P, PGPV_BN_CTX *ctx)
5523 {
5524 	if (Y == NULL || G == NULL || X == NULL || P == NULL) {
5525 		return MP_VAL;
5526 	}
5527 	USE_ARG(ctx);
5528 	return exponent_modulo(G, __UNCONST(X), __UNCONST(P), Y) == MP_OKAY;
5529 }
5530 
5531 PGPV_BIGNUM *
PGPV_BN_mod_inverse(PGPV_BIGNUM * r,PGPV_BIGNUM * a,const PGPV_BIGNUM * n,PGPV_BN_CTX * ctx)5532 PGPV_BN_mod_inverse(PGPV_BIGNUM *r, PGPV_BIGNUM *a, const PGPV_BIGNUM *n, PGPV_BN_CTX *ctx)
5533 {
5534 	USE_ARG(ctx);
5535 	if (r == NULL || a == NULL || n == NULL) {
5536 		return NULL;
5537 	}
5538 	return (modular_inverse(r, a, __UNCONST(n)) == MP_OKAY) ? r : NULL;
5539 }
5540 
5541 int
PGPV_BN_mod_mul(PGPV_BIGNUM * ret,PGPV_BIGNUM * a,PGPV_BIGNUM * b,const PGPV_BIGNUM * m,PGPV_BN_CTX * ctx)5542 PGPV_BN_mod_mul(PGPV_BIGNUM *ret, PGPV_BIGNUM *a, PGPV_BIGNUM *b, const PGPV_BIGNUM *m, PGPV_BN_CTX *ctx)
5543 {
5544 	USE_ARG(ctx);
5545 	if (ret == NULL || a == NULL || b == NULL || m == NULL) {
5546 		return 0;
5547 	}
5548 	return multiply_modulo(ret, a, b, __UNCONST(m)) == MP_OKAY;
5549 }
5550 
5551 int
PGPV_BN_mod_add(PGPV_BIGNUM * ret,PGPV_BIGNUM * a,PGPV_BIGNUM * b,const PGPV_BIGNUM * m,PGPV_BN_CTX * ctx)5552 PGPV_BN_mod_add(PGPV_BIGNUM *ret, PGPV_BIGNUM *a, PGPV_BIGNUM *b, const PGPV_BIGNUM *m, PGPV_BN_CTX *ctx)
5553 {
5554 	USE_ARG(ctx);
5555 	if (ret == NULL || a == NULL || b == NULL || m == NULL) {
5556 		return 0;
5557 	}
5558 	return add_modulo(ret, a, b, __UNCONST(m)) == MP_OKAY;
5559 }
5560 
5561 PGPV_BN_CTX *
PGPV_BN_CTX_new(void)5562 PGPV_BN_CTX_new(void)
5563 {
5564 	return allocate(1, sizeof(PGPV_BN_CTX));
5565 }
5566 
5567 void
PGPV_BN_CTX_init(PGPV_BN_CTX * c)5568 PGPV_BN_CTX_init(PGPV_BN_CTX *c)
5569 {
5570 	if (c != NULL) {
5571 		c->arraysize = 15;
5572 		if ((c->v = allocate(sizeof(*c->v), c->arraysize)) == NULL) {
5573 			c->arraysize = 0;
5574 		}
5575 	}
5576 }
5577 
5578 PGPV_BIGNUM *
PGPV_BN_CTX_get(PGPV_BN_CTX * ctx)5579 PGPV_BN_CTX_get(PGPV_BN_CTX *ctx)
5580 {
5581 	if (ctx == NULL || ctx->v == NULL || ctx->arraysize == 0 || ctx->count == ctx->arraysize - 1) {
5582 		return NULL;
5583 	}
5584 	return ctx->v[ctx->count++] = PGPV_BN_new();
5585 }
5586 
5587 void
PGPV_BN_CTX_start(PGPV_BN_CTX * ctx)5588 PGPV_BN_CTX_start(PGPV_BN_CTX *ctx)
5589 {
5590 	PGPV_BN_CTX_init(ctx);
5591 }
5592 
5593 void
PGPV_BN_CTX_free(PGPV_BN_CTX * c)5594 PGPV_BN_CTX_free(PGPV_BN_CTX *c)
5595 {
5596 	unsigned	i;
5597 
5598 	if (c != NULL && c->v != NULL) {
5599 		for (i = 0 ; i < c->count ; i++) {
5600 			PGPV_BN_clear_free(c->v[i]);
5601 		}
5602 		deallocate(c->v, sizeof(*c->v) * c->arraysize);
5603 	}
5604 }
5605 
5606 void
PGPV_BN_CTX_end(PGPV_BN_CTX * ctx)5607 PGPV_BN_CTX_end(PGPV_BN_CTX *ctx)
5608 {
5609 	PGPV_BN_CTX_free(ctx);
5610 }
5611 
5612 char *
PGPV_BN_bn2hex(const PGPV_BIGNUM * a)5613 PGPV_BN_bn2hex(const PGPV_BIGNUM *a)
5614 {
5615 	return (a == NULL) ? NULL : formatbn(a, 16);
5616 }
5617 
5618 char *
PGPV_BN_bn2dec(const PGPV_BIGNUM * a)5619 PGPV_BN_bn2dec(const PGPV_BIGNUM *a)
5620 {
5621 	return (a == NULL) ? NULL : formatbn(a, 10);
5622 }
5623 
5624 char *
PGPV_BN_bn2radix(const PGPV_BIGNUM * a,unsigned radix)5625 PGPV_BN_bn2radix(const PGPV_BIGNUM *a, unsigned radix)
5626 {
5627 	return (a == NULL) ? NULL : formatbn(a, (int)radix);
5628 }
5629 
5630 #ifndef _KERNEL
5631 int
PGPV_BN_print_fp(FILE * fp,const PGPV_BIGNUM * a)5632 PGPV_BN_print_fp(FILE *fp, const PGPV_BIGNUM *a)
5633 {
5634 	char	*s;
5635 	int	 ret;
5636 
5637 	if (fp == NULL || a == NULL) {
5638 		return 0;
5639 	}
5640 	s = PGPV_BN_bn2hex(a);
5641 	ret = fprintf(fp, "%s", s);
5642 	deallocate(s, strlen(s) + 1);
5643 	return ret;
5644 }
5645 #endif
5646 
5647 #ifdef PGPV_BN_RAND_NEEDED
5648 int
PGPV_BN_rand(PGPV_BIGNUM * rnd,int bits,int top,int bottom)5649 PGPV_BN_rand(PGPV_BIGNUM *rnd, int bits, int top, int bottom)
5650 {
5651 	uint64_t	r;
5652 	int		digits;
5653 	int		i;
5654 
5655 	if (rnd == NULL) {
5656 		return 0;
5657 	}
5658 	mp_init_size(rnd, digits = howmany(bits, DIGIT_BIT));
5659 	for (i = 0 ; i < digits ; i++) {
5660 		r = (uint64_t)arc4random();
5661 		r <<= 32;
5662 		r |= arc4random();
5663 		rnd->dp[i] = (r & MP_MASK);
5664 		rnd->used += 1;
5665 	}
5666 	if (top == 0) {
5667 		rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT));
5668 	}
5669 	if (top == 1) {
5670 		rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)DIGIT_BIT));
5671 		rnd->dp[rnd->used - 1] |= (((mp_digit)1)<<((mp_digit)(DIGIT_BIT - 1)));
5672 	}
5673 	if (bottom) {
5674 		rnd->dp[0] |= 0x1;
5675 	}
5676 	return 1;
5677 }
5678 
5679 int
PGPV_BN_rand_range(PGPV_BIGNUM * rnd,PGPV_BIGNUM * range)5680 PGPV_BN_rand_range(PGPV_BIGNUM *rnd, PGPV_BIGNUM *range)
5681 {
5682 	if (rnd == NULL || range == NULL || PGPV_BN_is_zero(range)) {
5683 		return 0;
5684 	}
5685 	PGPV_BN_rand(rnd, PGPV_BN_num_bits(range), 1, 0);
5686 	return modulo(rnd, range, rnd) == MP_OKAY;
5687 }
5688 #endif
5689 
5690 int
PGPV_BN_is_prime(const PGPV_BIGNUM * a,int checks,void (* callback)(int,int,void *),PGPV_BN_CTX * ctx,void * cb_arg)5691 PGPV_BN_is_prime(const PGPV_BIGNUM *a, int checks, void (*callback)(int, int, void *), PGPV_BN_CTX *ctx, void *cb_arg)
5692 {
5693 	int	primality;
5694 
5695 	if (a == NULL) {
5696 		return 0;
5697 	}
5698 	USE_ARG(ctx);
5699 	USE_ARG(cb_arg);
5700 	USE_ARG(callback);
5701 	return (mp_prime_is_prime(__UNCONST(a), checks, &primality) == MP_OKAY) ? primality : 0;
5702 }
5703 
5704 const PGPV_BIGNUM *
PGPV_BN_value_one(void)5705 PGPV_BN_value_one(void)
5706 {
5707 	static mp_digit		digit = 1UL;
5708 	static const PGPV_BIGNUM	one = { &digit, 1, 1, 0 };
5709 
5710 	return &one;
5711 }
5712 
5713 int
PGPV_BN_hex2bn(PGPV_BIGNUM ** a,const char * str)5714 PGPV_BN_hex2bn(PGPV_BIGNUM **a, const char *str)
5715 {
5716 	return getbn(a, str, 16);
5717 }
5718 
5719 int
PGPV_BN_dec2bn(PGPV_BIGNUM ** a,const char * str)5720 PGPV_BN_dec2bn(PGPV_BIGNUM **a, const char *str)
5721 {
5722 	return getbn(a, str, 10);
5723 }
5724 
5725 int
PGPV_BN_radix2bn(PGPV_BIGNUM ** a,const char * str,unsigned radix)5726 PGPV_BN_radix2bn(PGPV_BIGNUM **a, const char *str, unsigned radix)
5727 {
5728 	return getbn(a, str, (int)radix);
5729 }
5730 
5731 int
PGPV_BN_mod_sub(PGPV_BIGNUM * r,PGPV_BIGNUM * a,PGPV_BIGNUM * b,const PGPV_BIGNUM * m,PGPV_BN_CTX * ctx)5732 PGPV_BN_mod_sub(PGPV_BIGNUM *r, PGPV_BIGNUM *a, PGPV_BIGNUM *b, const PGPV_BIGNUM *m, PGPV_BN_CTX *ctx)
5733 {
5734 	USE_ARG(ctx);
5735 	if (r == NULL || a == NULL || b == NULL || m == NULL) {
5736 		return 0;
5737 	}
5738 	return subtract_modulo(a, b, __UNCONST(m), r) == MP_OKAY;
5739 }
5740 
5741 int
PGPV_BN_is_bit_set(const PGPV_BIGNUM * a,int n)5742 PGPV_BN_is_bit_set(const PGPV_BIGNUM *a, int n)
5743 {
5744 	if (a == NULL || n < 0 || n >= a->used * DIGIT_BIT) {
5745 		return 0;
5746 	}
5747 	return (a->dp[n / DIGIT_BIT] & (1 << (n % DIGIT_BIT))) ? 1 : 0;
5748 }
5749 
5750 /* raise 'a' to power of 'b' */
5751 int
PGPV_BN_raise(PGPV_BIGNUM * res,PGPV_BIGNUM * a,PGPV_BIGNUM * b)5752 PGPV_BN_raise(PGPV_BIGNUM *res, PGPV_BIGNUM *a, PGPV_BIGNUM *b)
5753 {
5754 	uint64_t	 exponent;
5755 	PGPV_BIGNUM		*power;
5756 	PGPV_BIGNUM		*temp;
5757 	char		*t;
5758 
5759 	t = PGPV_BN_bn2dec(b);
5760 	exponent = (uint64_t)strtoull(t, NULL, 10);
5761 	free(t);
5762 	if (exponent == 0) {
5763 		PGPV_BN_copy(res, PGPV_BN_value_one());
5764 	} else {
5765 		power = PGPV_BN_dup(a);
5766 		for ( ; (exponent & 1) == 0 ; exponent >>= 1) {
5767 			PGPV_BN_mul(power, power, power, NULL);
5768 		}
5769 		temp = PGPV_BN_dup(power);
5770 		for (exponent >>= 1 ; exponent > 0 ; exponent >>= 1) {
5771 			PGPV_BN_mul(power, power, power, NULL);
5772 			if (exponent & 1) {
5773 				PGPV_BN_mul(temp, power, temp, NULL);
5774 			}
5775 		}
5776 		PGPV_BN_copy(res, temp);
5777 		PGPV_BN_free(power);
5778 		PGPV_BN_free(temp);
5779 	}
5780 	return 1;
5781 }
5782 
5783 /* compute the factorial */
5784 int
PGPV_BN_factorial(PGPV_BIGNUM * res,PGPV_BIGNUM * f)5785 PGPV_BN_factorial(PGPV_BIGNUM *res, PGPV_BIGNUM *f)
5786 {
5787 	PGPV_BIGNUM	*one;
5788 	PGPV_BIGNUM	*i;
5789 
5790 	i = PGPV_BN_dup(f);
5791 	one = __UNCONST(PGPV_BN_value_one());
5792 	PGPV_BN_sub(i, i, one);
5793 	PGPV_BN_copy(res, f);
5794 	while (PGPV_BN_cmp(i, one) > 0) {
5795 		PGPV_BN_mul(res, res, i, NULL);
5796 		PGPV_BN_sub(i, i, one);
5797 	}
5798 	PGPV_BN_free(i);
5799 	return 1;
5800 }
5801 
5802 /* get greatest common divisor */
5803 int
PGPV_BN_gcd(PGPV_BIGNUM * r,PGPV_BIGNUM * a,PGPV_BIGNUM * b,PGPV_BN_CTX * ctx)5804 PGPV_BN_gcd(PGPV_BIGNUM *r, PGPV_BIGNUM *a, PGPV_BIGNUM *b, PGPV_BN_CTX *ctx)
5805 {
5806 	return mp_gcd(a, b, r);
5807 }
5808 
5809 int
PGPV_BN_sub_word(PGPV_BIGNUM * a,PGPV_BN_ULONG w)5810 PGPV_BN_sub_word(PGPV_BIGNUM *a, PGPV_BN_ULONG w)
5811 {
5812 	PGPV_BIGNUM	*bnw;
5813 
5814 	bnw = PGPV_BN_new();
5815 	PGPV_BN_set_word(bnw, w);
5816 	PGPV_BN_sub(a, a, bnw);
5817 	PGPV_BN_free(bnw);
5818 	return 1;
5819 }
5820 
5821 int
PGPV_BN_add_word(PGPV_BIGNUM * a,PGPV_BN_ULONG w)5822 PGPV_BN_add_word(PGPV_BIGNUM *a, PGPV_BN_ULONG w)
5823 {
5824 	PGPV_BIGNUM	*bnw;
5825 
5826 	bnw = PGPV_BN_new();
5827 	PGPV_BN_set_word(bnw, w);
5828 	PGPV_BN_add(a, a, bnw);
5829 	PGPV_BN_free(bnw);
5830 	return 1;
5831 }
5832