xref: /dragonfly/crypto/libressl/crypto/bn/bn_mul.c (revision 72c33676)
1 /* $OpenBSD: bn_mul.c,v 1.20 2015/02/09 15:49:22 jsing Exp $ */
2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3  * All rights reserved.
4  *
5  * This package is an SSL implementation written
6  * by Eric Young (eay@cryptsoft.com).
7  * The implementation was written so as to conform with Netscapes SSL.
8  *
9  * This library is free for commercial and non-commercial use as long as
10  * the following conditions are aheared to.  The following conditions
11  * apply to all code found in this distribution, be it the RC4, RSA,
12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13  * included with this distribution is covered by the same copyright terms
14  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15  *
16  * Copyright remains Eric Young's, and as such any Copyright notices in
17  * the code are not to be removed.
18  * If this package is used in a product, Eric Young should be given attribution
19  * as the author of the parts of the library used.
20  * This can be in the form of a textual message at program startup or
21  * in documentation (online or textual) provided with the package.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the copyright
27  *    notice, this list of conditions and the following disclaimer.
28  * 2. Redistributions in binary form must reproduce the above copyright
29  *    notice, this list of conditions and the following disclaimer in the
30  *    documentation and/or other materials provided with the distribution.
31  * 3. All advertising materials mentioning features or use of this software
32  *    must display the following acknowledgement:
33  *    "This product includes cryptographic software written by
34  *     Eric Young (eay@cryptsoft.com)"
35  *    The word 'cryptographic' can be left out if the rouines from the library
36  *    being used are not cryptographic related :-).
37  * 4. If you include any Windows specific code (or a derivative thereof) from
38  *    the apps directory (application code) you must include an acknowledgement:
39  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40  *
41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  *
53  * The licence and distribution terms for any publically available version or
54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
55  * copied and put under another distribution licence
56  * [including the GNU Public Licence.]
57  */
58 
59 #ifndef BN_DEBUG
60 # undef NDEBUG /* avoid conflicting definitions */
61 # define NDEBUG
62 #endif
63 
64 #include <assert.h>
65 #include <stdio.h>
66 #include <string.h>
67 
68 #include <openssl/opensslconf.h>
69 
70 #include "bn_lcl.h"
71 
72 #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
73 /* Here follows specialised variants of bn_add_words() and
74    bn_sub_words().  They have the property performing operations on
75    arrays of different sizes.  The sizes of those arrays is expressed through
76    cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
77    which is the delta between the two lengths, calculated as len(a)-len(b).
78    All lengths are the number of BN_ULONGs...  For the operations that require
79    a result array as parameter, it must have the length cl+abs(dl).
80    These functions should probably end up in bn_asm.c as soon as there are
81    assembler counterparts for the systems that use assembler files.  */
82 
83 BN_ULONG
bn_sub_part_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,int cl,int dl)84 bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
85     int dl)
86 {
87 	BN_ULONG c, t;
88 
89 	assert(cl >= 0);
90 	c = bn_sub_words(r, a, b, cl);
91 
92 	if (dl == 0)
93 		return c;
94 
95 	r += cl;
96 	a += cl;
97 	b += cl;
98 
99 	if (dl < 0) {
100 #ifdef BN_COUNT
101 		fprintf(stderr,
102 		    "  bn_sub_part_words %d + %d (dl < 0, c = %d)\n",
103 		    cl, dl, c);
104 #endif
105 		for (;;) {
106 			t = b[0];
107 			r[0] = (0 - t - c) & BN_MASK2;
108 			if (t != 0)
109 				c = 1;
110 			if (++dl >= 0)
111 				break;
112 
113 			t = b[1];
114 			r[1] = (0 - t - c) & BN_MASK2;
115 			if (t != 0)
116 				c = 1;
117 			if (++dl >= 0)
118 				break;
119 
120 			t = b[2];
121 			r[2] = (0 - t - c) & BN_MASK2;
122 			if (t != 0)
123 				c = 1;
124 			if (++dl >= 0)
125 				break;
126 
127 			t = b[3];
128 			r[3] = (0 - t - c) & BN_MASK2;
129 			if (t != 0)
130 				c = 1;
131 			if (++dl >= 0)
132 				break;
133 
134 			b += 4;
135 			r += 4;
136 		}
137 	} else {
138 		int save_dl = dl;
139 #ifdef BN_COUNT
140 		fprintf(stderr,
141 		    "  bn_sub_part_words %d + %d (dl > 0, c = %d)\n",
142 		    cl, dl, c);
143 #endif
144 		while (c) {
145 			t = a[0];
146 			r[0] = (t - c) & BN_MASK2;
147 			if (t != 0)
148 				c = 0;
149 			if (--dl <= 0)
150 				break;
151 
152 			t = a[1];
153 			r[1] = (t - c) & BN_MASK2;
154 			if (t != 0)
155 				c = 0;
156 			if (--dl <= 0)
157 				break;
158 
159 			t = a[2];
160 			r[2] = (t - c) & BN_MASK2;
161 			if (t != 0)
162 				c = 0;
163 			if (--dl <= 0)
164 				break;
165 
166 			t = a[3];
167 			r[3] = (t - c) & BN_MASK2;
168 			if (t != 0)
169 				c = 0;
170 			if (--dl <= 0)
171 				break;
172 
173 			save_dl = dl;
174 			a += 4;
175 			r += 4;
176 		}
177 		if (dl > 0) {
178 #ifdef BN_COUNT
179 			fprintf(stderr,
180 			    "  bn_sub_part_words %d + %d (dl > 0, c == 0)\n",
181 			    cl, dl);
182 #endif
183 			if (save_dl > dl) {
184 				switch (save_dl - dl) {
185 				case 1:
186 					r[1] = a[1];
187 					if (--dl <= 0)
188 						break;
189 				case 2:
190 					r[2] = a[2];
191 					if (--dl <= 0)
192 						break;
193 				case 3:
194 					r[3] = a[3];
195 					if (--dl <= 0)
196 						break;
197 				}
198 				a += 4;
199 				r += 4;
200 			}
201 		}
202 		if (dl > 0) {
203 #ifdef BN_COUNT
204 			fprintf(stderr,
205 			    "  bn_sub_part_words %d + %d (dl > 0, copy)\n",
206 			    cl, dl);
207 #endif
208 			for (;;) {
209 				r[0] = a[0];
210 				if (--dl <= 0)
211 					break;
212 				r[1] = a[1];
213 				if (--dl <= 0)
214 					break;
215 				r[2] = a[2];
216 				if (--dl <= 0)
217 					break;
218 				r[3] = a[3];
219 				if (--dl <= 0)
220 					break;
221 
222 				a += 4;
223 				r += 4;
224 			}
225 		}
226 	}
227 	return c;
228 }
229 #endif
230 
231 BN_ULONG
bn_add_part_words(BN_ULONG * r,const BN_ULONG * a,const BN_ULONG * b,int cl,int dl)232 bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
233     int dl)
234 {
235 	BN_ULONG c, l, t;
236 
237 	assert(cl >= 0);
238 	c = bn_add_words(r, a, b, cl);
239 
240 	if (dl == 0)
241 		return c;
242 
243 	r += cl;
244 	a += cl;
245 	b += cl;
246 
247 	if (dl < 0) {
248 		int save_dl = dl;
249 #ifdef BN_COUNT
250 		fprintf(stderr,
251 		    "  bn_add_part_words %d + %d (dl < 0, c = %d)\n",
252 		    cl, dl, c);
253 #endif
254 		while (c) {
255 			l = (c + b[0]) & BN_MASK2;
256 			c = (l < c);
257 			r[0] = l;
258 			if (++dl >= 0)
259 				break;
260 
261 			l = (c + b[1]) & BN_MASK2;
262 			c = (l < c);
263 			r[1] = l;
264 			if (++dl >= 0)
265 				break;
266 
267 			l = (c + b[2]) & BN_MASK2;
268 			c = (l < c);
269 			r[2] = l;
270 			if (++dl >= 0)
271 				break;
272 
273 			l = (c + b[3]) & BN_MASK2;
274 			c = (l < c);
275 			r[3] = l;
276 			if (++dl >= 0)
277 				break;
278 
279 			save_dl = dl;
280 			b += 4;
281 			r += 4;
282 		}
283 		if (dl < 0) {
284 #ifdef BN_COUNT
285 			fprintf(stderr,
286 			    "  bn_add_part_words %d + %d (dl < 0, c == 0)\n",
287 			    cl, dl);
288 #endif
289 			if (save_dl < dl) {
290 				switch (dl - save_dl) {
291 				case 1:
292 					r[1] = b[1];
293 					if (++dl >= 0)
294 						break;
295 				case 2:
296 					r[2] = b[2];
297 					if (++dl >= 0)
298 						break;
299 				case 3:
300 					r[3] = b[3];
301 					if (++dl >= 0)
302 						break;
303 				}
304 				b += 4;
305 				r += 4;
306 			}
307 		}
308 		if (dl < 0) {
309 #ifdef BN_COUNT
310 			fprintf(stderr,
311 			    "  bn_add_part_words %d + %d (dl < 0, copy)\n",
312 			    cl, dl);
313 #endif
314 			for (;;) {
315 				r[0] = b[0];
316 				if (++dl >= 0)
317 					break;
318 				r[1] = b[1];
319 				if (++dl >= 0)
320 					break;
321 				r[2] = b[2];
322 				if (++dl >= 0)
323 					break;
324 				r[3] = b[3];
325 				if (++dl >= 0)
326 					break;
327 
328 				b += 4;
329 				r += 4;
330 			}
331 		}
332 	} else {
333 		int save_dl = dl;
334 #ifdef BN_COUNT
335 		fprintf(stderr,
336 		    "  bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
337 #endif
338 		while (c) {
339 			t = (a[0] + c) & BN_MASK2;
340 			c = (t < c);
341 			r[0] = t;
342 			if (--dl <= 0)
343 				break;
344 
345 			t = (a[1] + c) & BN_MASK2;
346 			c = (t < c);
347 			r[1] = t;
348 			if (--dl <= 0)
349 				break;
350 
351 			t = (a[2] + c) & BN_MASK2;
352 			c = (t < c);
353 			r[2] = t;
354 			if (--dl <= 0)
355 				break;
356 
357 			t = (a[3] + c) & BN_MASK2;
358 			c = (t < c);
359 			r[3] = t;
360 			if (--dl <= 0)
361 				break;
362 
363 			save_dl = dl;
364 			a += 4;
365 			r += 4;
366 		}
367 #ifdef BN_COUNT
368 		fprintf(stderr,
369 		    "  bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
370 #endif
371 		if (dl > 0) {
372 			if (save_dl > dl) {
373 				switch (save_dl - dl) {
374 				case 1:
375 					r[1] = a[1];
376 					if (--dl <= 0)
377 						break;
378 				case 2:
379 					r[2] = a[2];
380 					if (--dl <= 0)
381 						break;
382 				case 3:
383 					r[3] = a[3];
384 					if (--dl <= 0)
385 						break;
386 				}
387 				a += 4;
388 				r += 4;
389 			}
390 		}
391 		if (dl > 0) {
392 #ifdef BN_COUNT
393 			fprintf(stderr,
394 			    "  bn_add_part_words %d + %d (dl > 0, copy)\n",
395 			    cl, dl);
396 #endif
397 			for (;;) {
398 				r[0] = a[0];
399 				if (--dl <= 0)
400 					break;
401 				r[1] = a[1];
402 				if (--dl <= 0)
403 					break;
404 				r[2] = a[2];
405 				if (--dl <= 0)
406 					break;
407 				r[3] = a[3];
408 				if (--dl <= 0)
409 					break;
410 
411 				a += 4;
412 				r += 4;
413 			}
414 		}
415 	}
416 	return c;
417 }
418 
419 #ifdef BN_RECURSION
420 /* Karatsuba recursive multiplication algorithm
421  * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
422 
423 /* r is 2*n2 words in size,
424  * a and b are both n2 words in size.
425  * n2 must be a power of 2.
426  * We multiply and return the result.
427  * t must be 2*n2 words in size
428  * We calculate
429  * a[0]*b[0]
430  * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
431  * a[1]*b[1]
432  */
433 /* dnX may not be positive, but n2/2+dnX has to be */
434 void
bn_mul_recursive(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b,int n2,int dna,int dnb,BN_ULONG * t)435 bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna,
436     int dnb, BN_ULONG *t)
437 {
438 	int n = n2 / 2, c1, c2;
439 	int tna = n + dna, tnb = n + dnb;
440 	unsigned int neg, zero;
441 	BN_ULONG ln, lo, *p;
442 
443 # ifdef BN_COUNT
444 	fprintf(stderr, " bn_mul_recursive %d%+d * %d%+d\n",n2,dna,n2,dnb);
445 # endif
446 # ifdef BN_MUL_COMBA
447 #  if 0
448 	if (n2 == 4) {
449 		bn_mul_comba4(r, a, b);
450 		return;
451 	}
452 #  endif
453 	/* Only call bn_mul_comba 8 if n2 == 8 and the
454 	 * two arrays are complete [steve]
455 	 */
456 	if (n2 == 8 && dna == 0 && dnb == 0) {
457 		bn_mul_comba8(r, a, b);
458 		return;
459 	}
460 # endif /* BN_MUL_COMBA */
461 	/* Else do normal multiply */
462 	if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
463 		bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
464 		if ((dna + dnb) < 0)
465 			memset(&r[2*n2 + dna + dnb], 0,
466 			    sizeof(BN_ULONG) * -(dna + dnb));
467 		return;
468 	}
469 	/* r=(a[0]-a[1])*(b[1]-b[0]) */
470 	c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
471 	c2 = bn_cmp_part_words(&(b[n]), b,tnb, tnb - n);
472 	zero = neg = 0;
473 	switch (c1 * 3 + c2) {
474 	case -4:
475 		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
476 		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
477 		break;
478 	case -3:
479 		zero = 1;
480 		break;
481 	case -2:
482 		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
483 		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
484 		neg = 1;
485 		break;
486 	case -1:
487 	case 0:
488 	case 1:
489 		zero = 1;
490 		break;
491 	case 2:
492 		bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
493 		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
494 		neg = 1;
495 		break;
496 	case 3:
497 		zero = 1;
498 		break;
499 	case 4:
500 		bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
501 		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
502 		break;
503 	}
504 
505 # ifdef BN_MUL_COMBA
506 	if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
507 					       extra args to do this well */
508 	{
509 		if (!zero)
510 			bn_mul_comba4(&(t[n2]), t, &(t[n]));
511 		else
512 			memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG));
513 
514 		bn_mul_comba4(r, a, b);
515 		bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
516 	} else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
517 						    take extra args to do this
518 						    well */
519 	{
520 		if (!zero)
521 			bn_mul_comba8(&(t[n2]), t, &(t[n]));
522 		else
523 			memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG));
524 
525 		bn_mul_comba8(r, a, b);
526 		bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
527 	} else
528 # endif /* BN_MUL_COMBA */
529 	{
530 		p = &(t[n2 * 2]);
531 		if (!zero)
532 			bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
533 		else
534 			memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
535 		bn_mul_recursive(r, a, b, n, 0, 0, p);
536 		bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
537 	}
538 
539 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
540 	 * r[10] holds (a[0]*b[0])
541 	 * r[32] holds (b[1]*b[1])
542 	 */
543 
544 	c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
545 
546 	if (neg) /* if t[32] is negative */
547 	{
548 		c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
549 	} else {
550 		/* Might have a carry */
551 		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
552 	}
553 
554 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
555 	 * r[10] holds (a[0]*b[0])
556 	 * r[32] holds (b[1]*b[1])
557 	 * c1 holds the carry bits
558 	 */
559 	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
560 	if (c1) {
561 		p = &(r[n + n2]);
562 		lo= *p;
563 		ln = (lo + c1) & BN_MASK2;
564 		*p = ln;
565 
566 		/* The overflow will stop before we over write
567 		 * words we should not overwrite */
568 		if (ln < (BN_ULONG)c1) {
569 			do {
570 				p++;
571 				lo= *p;
572 				ln = (lo + 1) & BN_MASK2;
573 				*p = ln;
574 			} while (ln == 0);
575 		}
576 	}
577 }
578 
579 /* n+tn is the word length
580  * t needs to be n*4 is size, as does r */
581 /* tnX may not be negative but less than n */
582 void
bn_mul_part_recursive(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b,int n,int tna,int tnb,BN_ULONG * t)583 bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna,
584     int tnb, BN_ULONG *t)
585 {
586 	int i, j, n2 = n * 2;
587 	int c1, c2, neg;
588 	BN_ULONG ln, lo, *p;
589 
590 # ifdef BN_COUNT
591 	fprintf(stderr, " bn_mul_part_recursive (%d%+d) * (%d%+d)\n",
592 	    n, tna, n, tnb);
593 # endif
594 	if (n < 8) {
595 		bn_mul_normal(r, a, n + tna, b, n + tnb);
596 		return;
597 	}
598 
599 	/* r=(a[0]-a[1])*(b[1]-b[0]) */
600 	c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
601 	c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
602 	neg = 0;
603 	switch (c1 * 3 + c2) {
604 	case -4:
605 		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
606 		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
607 		break;
608 	case -3:
609 		/* break; */
610 	case -2:
611 		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
612 		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
613 		neg = 1;
614 		break;
615 	case -1:
616 	case 0:
617 	case 1:
618 		/* break; */
619 	case 2:
620 		bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
621 		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
622 		neg = 1;
623 		break;
624 	case 3:
625 		/* break; */
626 	case 4:
627 		bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
628 		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
629 		break;
630 	}
631 		/* The zero case isn't yet implemented here. The speedup
632 		   would probably be negligible. */
633 # if 0
634 	if (n == 4) {
635 		bn_mul_comba4(&(t[n2]), t, &(t[n]));
636 		bn_mul_comba4(r, a, b);
637 		bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn);
638 		memset(&(r[n2 + tn * 2]), 0, sizeof(BN_ULONG) * (n2 - tn * 2));
639 	} else
640 # endif
641 		if (n == 8) {
642 		bn_mul_comba8(&(t[n2]), t, &(t[n]));
643 		bn_mul_comba8(r, a, b);
644 		bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
645 		memset(&(r[n2 + tna + tnb]), 0,
646 		    sizeof(BN_ULONG) * (n2 - tna - tnb));
647 	} else {
648 		p = &(t[n2*2]);
649 		bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
650 		bn_mul_recursive(r, a, b, n, 0, 0, p);
651 		i = n / 2;
652 		/* If there is only a bottom half to the number,
653 		 * just do it */
654 		if (tna > tnb)
655 			j = tna - i;
656 		else
657 			j = tnb - i;
658 		if (j == 0) {
659 			bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]),
660 			    i, tna - i, tnb - i, p);
661 			memset(&(r[n2 + i * 2]), 0,
662 			    sizeof(BN_ULONG) * (n2 - i * 2));
663 		}
664 		else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
665 		{
666 			bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]),
667 			    i, tna - i, tnb - i, p);
668 			memset(&(r[n2 + tna + tnb]), 0,
669 			    sizeof(BN_ULONG) * (n2 - tna - tnb));
670 		}
671 		else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
672 		{
673 			memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2);
674 			if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL &&
675 			    tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
676 				bn_mul_normal(&(r[n2]), &(a[n]), tna,
677 				    &(b[n]), tnb);
678 			} else {
679 				for (;;) {
680 					i /= 2;
681 					/* these simplified conditions work
682 					 * exclusively because difference
683 					 * between tna and tnb is 1 or 0 */
684 					if (i < tna || i < tnb) {
685 						bn_mul_part_recursive(&(r[n2]),
686 						    &(a[n]), &(b[n]), i,
687 						    tna - i, tnb - i, p);
688 						break;
689 					} else if (i == tna || i == tnb) {
690 						bn_mul_recursive(&(r[n2]),
691 						    &(a[n]), &(b[n]), i,
692 						    tna - i, tnb - i, p);
693 						break;
694 					}
695 				}
696 			}
697 		}
698 	}
699 
700 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
701 	 * r[10] holds (a[0]*b[0])
702 	 * r[32] holds (b[1]*b[1])
703 	 */
704 
705 	c1 = (int)(bn_add_words(t, r,&(r[n2]), n2));
706 
707 	if (neg) /* if t[32] is negative */
708 	{
709 		c1 -= (int)(bn_sub_words(&(t[n2]), t,&(t[n2]), n2));
710 	} else {
711 		/* Might have a carry */
712 		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
713 	}
714 
715 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
716 	 * r[10] holds (a[0]*b[0])
717 	 * r[32] holds (b[1]*b[1])
718 	 * c1 holds the carry bits
719 	 */
720 	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
721 	if (c1) {
722 		p = &(r[n + n2]);
723 		lo= *p;
724 		ln = (lo + c1)&BN_MASK2;
725 		*p = ln;
726 
727 		/* The overflow will stop before we over write
728 		 * words we should not overwrite */
729 		if (ln < (BN_ULONG)c1) {
730 			do {
731 				p++;
732 				lo= *p;
733 				ln = (lo + 1) & BN_MASK2;
734 				*p = ln;
735 			} while (ln == 0);
736 		}
737 	}
738 }
739 
740 /* a and b must be the same size, which is n2.
741  * r needs to be n2 words and t needs to be n2*2
742  */
743 void
bn_mul_low_recursive(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b,int n2,BN_ULONG * t)744 bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, BN_ULONG *t)
745 {
746 	int n = n2 / 2;
747 
748 # ifdef BN_COUNT
749 	fprintf(stderr, " bn_mul_low_recursive %d * %d\n",n2,n2);
750 # endif
751 
752 	bn_mul_recursive(r, a, b, n, 0, 0, &(t[0]));
753 	if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL) {
754 		bn_mul_low_recursive(&(t[0]), &(a[0]), &(b[n]), n, &(t[n2]));
755 		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
756 		bn_mul_low_recursive(&(t[0]), &(a[n]), &(b[0]), n, &(t[n2]));
757 		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
758 	} else {
759 		bn_mul_low_normal(&(t[0]), &(a[0]), &(b[n]), n);
760 		bn_mul_low_normal(&(t[n]), &(a[n]), &(b[0]), n);
761 		bn_add_words(&(r[n]), &(r[n]), &(t[0]), n);
762 		bn_add_words(&(r[n]), &(r[n]), &(t[n]), n);
763 	}
764 }
765 
766 /* a and b must be the same size, which is n2.
767  * r needs to be n2 words and t needs to be n2*2
768  * l is the low words of the output.
769  * t needs to be n2*3
770  */
771 void
bn_mul_high(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b,BN_ULONG * l,int n2,BN_ULONG * t)772 bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
773     BN_ULONG *t)
774 {
775 	int i, n;
776 	int c1, c2;
777 	int neg, oneg, zero;
778 	BN_ULONG ll, lc, *lp, *mp;
779 
780 # ifdef BN_COUNT
781 	fprintf(stderr, " bn_mul_high %d * %d\n",n2,n2);
782 # endif
783 	n = n2 / 2;
784 
785 	/* Calculate (al-ah)*(bh-bl) */
786 	neg = zero = 0;
787 	c1 = bn_cmp_words(&(a[0]), &(a[n]), n);
788 	c2 = bn_cmp_words(&(b[n]), &(b[0]), n);
789 	switch (c1 * 3 + c2) {
790 	case -4:
791 		bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
792 		bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
793 		break;
794 	case -3:
795 		zero = 1;
796 		break;
797 	case -2:
798 		bn_sub_words(&(r[0]), &(a[n]), &(a[0]), n);
799 		bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
800 		neg = 1;
801 		break;
802 	case -1:
803 	case 0:
804 	case 1:
805 		zero = 1;
806 		break;
807 	case 2:
808 		bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
809 		bn_sub_words(&(r[n]), &(b[0]), &(b[n]), n);
810 		neg = 1;
811 		break;
812 	case 3:
813 		zero = 1;
814 		break;
815 	case 4:
816 		bn_sub_words(&(r[0]), &(a[0]), &(a[n]), n);
817 		bn_sub_words(&(r[n]), &(b[n]), &(b[0]), n);
818 		break;
819 	}
820 
821 	oneg = neg;
822 	/* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
823 	/* r[10] = (a[1]*b[1]) */
824 # ifdef BN_MUL_COMBA
825 	if (n == 8) {
826 		bn_mul_comba8(&(t[0]), &(r[0]), &(r[n]));
827 		bn_mul_comba8(r, &(a[n]), &(b[n]));
828 	} else
829 # endif
830 	{
831 		bn_mul_recursive(&(t[0]), &(r[0]), &(r[n]), n, 0, 0, &(t[n2]));
832 		bn_mul_recursive(r, &(a[n]), &(b[n]), n, 0, 0, &(t[n2]));
833 	}
834 
835 	/* s0 == low(al*bl)
836 	 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
837 	 * We know s0 and s1 so the only unknown is high(al*bl)
838 	 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
839 	 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
840 	 */
841 	if (l != NULL) {
842 		lp = &(t[n2 + n]);
843 		c1 = (int)(bn_add_words(lp, &(r[0]), &(l[0]), n));
844 	} else {
845 		c1 = 0;
846 		lp = &(r[0]);
847 	}
848 
849 	if (neg)
850 		neg = (int)(bn_sub_words(&(t[n2]), lp, &(t[0]), n));
851 	else {
852 		bn_add_words(&(t[n2]), lp, &(t[0]), n);
853 		neg = 0;
854 	}
855 
856 	if (l != NULL) {
857 		bn_sub_words(&(t[n2 + n]), &(l[n]), &(t[n2]), n);
858 	} else {
859 		lp = &(t[n2 + n]);
860 		mp = &(t[n2]);
861 		for (i = 0; i < n; i++)
862 			lp[i] = ((~mp[i]) + 1) & BN_MASK2;
863 	}
864 
865 	/* s[0] = low(al*bl)
866 	 * t[3] = high(al*bl)
867 	 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
868 	 * r[10] = (a[1]*b[1])
869 	 */
870 	/* R[10] = al*bl
871 	 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
872 	 * R[32] = ah*bh
873 	 */
874 	/* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
875 	 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
876 	 * R[3]=r[1]+(carry/borrow)
877 	 */
878 	if (l != NULL) {
879 		lp = &(t[n2]);
880 		c1 = (int)(bn_add_words(lp, &(t[n2 + n]), &(l[0]), n));
881 	} else {
882 		lp = &(t[n2 + n]);
883 		c1 = 0;
884 	}
885 	c1 += (int)(bn_add_words(&(t[n2]), lp, &(r[0]), n));
886 	if (oneg)
887 		c1 -= (int)(bn_sub_words(&(t[n2]), &(t[n2]), &(t[0]), n));
888 	else
889 		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), &(t[0]), n));
890 
891 	c2 = (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n2 + n]), n));
892 	c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(r[n]), n));
893 	if (oneg)
894 		c2 -= (int)(bn_sub_words(&(r[0]), &(r[0]), &(t[n]), n));
895 	else
896 		c2 += (int)(bn_add_words(&(r[0]), &(r[0]), &(t[n]), n));
897 
898 	if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
899 	{
900 		i = 0;
901 		if (c1 > 0) {
902 			lc = c1;
903 			do {
904 				ll = (r[i] + lc) & BN_MASK2;
905 				r[i++] = ll;
906 				lc = (lc > ll);
907 			} while (lc);
908 		} else {
909 			lc = -c1;
910 			do {
911 				ll = r[i];
912 				r[i++] = (ll - lc) & BN_MASK2;
913 				lc = (lc > ll);
914 			} while (lc);
915 		}
916 	}
917 	if (c2 != 0) /* Add starting at r[1] */
918 	{
919 		i = n;
920 		if (c2 > 0) {
921 			lc = c2;
922 			do {
923 				ll = (r[i] + lc) & BN_MASK2;
924 				r[i++] = ll;
925 				lc = (lc > ll);
926 			} while (lc);
927 		} else {
928 			lc = -c2;
929 			do {
930 				ll = r[i];
931 				r[i++] = (ll - lc) & BN_MASK2;
932 				lc = (lc > ll);
933 			} while (lc);
934 		}
935 	}
936 }
937 #endif /* BN_RECURSION */
938 
939 int
BN_mul(BIGNUM * r,const BIGNUM * a,const BIGNUM * b,BN_CTX * ctx)940 BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
941 {
942 	int ret = 0;
943 	int top, al, bl;
944 	BIGNUM *rr;
945 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
946 	int i;
947 #endif
948 #ifdef BN_RECURSION
949 	BIGNUM *t = NULL;
950 	int j = 0, k;
951 #endif
952 
953 #ifdef BN_COUNT
954 	fprintf(stderr, "BN_mul %d * %d\n",a->top,b->top);
955 #endif
956 
957 	bn_check_top(a);
958 	bn_check_top(b);
959 	bn_check_top(r);
960 
961 	al = a->top;
962 	bl = b->top;
963 
964 	if ((al == 0) || (bl == 0)) {
965 		BN_zero(r);
966 		return (1);
967 	}
968 	top = al + bl;
969 
970 	BN_CTX_start(ctx);
971 	if ((r == a) || (r == b)) {
972 		if ((rr = BN_CTX_get(ctx)) == NULL)
973 			goto err;
974 	} else
975 		rr = r;
976 	rr->neg = a->neg ^ b->neg;
977 
978 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
979 	i = al - bl;
980 #endif
981 #ifdef BN_MUL_COMBA
982 	if (i == 0) {
983 # if 0
984 		if (al == 4) {
985 			if (bn_wexpand(rr, 8) == NULL)
986 				goto err;
987 			rr->top = 8;
988 			bn_mul_comba4(rr->d, a->d, b->d);
989 			goto end;
990 		}
991 # endif
992 		if (al == 8) {
993 			if (bn_wexpand(rr, 16) == NULL)
994 				goto err;
995 			rr->top = 16;
996 			bn_mul_comba8(rr->d, a->d, b->d);
997 			goto end;
998 		}
999 	}
1000 #endif /* BN_MUL_COMBA */
1001 #ifdef BN_RECURSION
1002 	if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) {
1003 		if (i >= -1 && i <= 1) {
1004 			/* Find out the power of two lower or equal
1005 			   to the longest of the two numbers */
1006 			if (i >= 0) {
1007 				j = BN_num_bits_word((BN_ULONG)al);
1008 			}
1009 			if (i == -1) {
1010 				j = BN_num_bits_word((BN_ULONG)bl);
1011 			}
1012 			j = 1 << (j - 1);
1013 			assert(j <= al || j <= bl);
1014 			k = j + j;
1015 			if ((t = BN_CTX_get(ctx)) == NULL)
1016 				goto err;
1017 			if (al > j || bl > j) {
1018 				if (bn_wexpand(t, k * 4) == NULL)
1019 					goto err;
1020 				if (bn_wexpand(rr, k * 4) == NULL)
1021 					goto err;
1022 				bn_mul_part_recursive(rr->d, a->d, b->d,
1023 				    j, al - j, bl - j, t->d);
1024 			}
1025 			else	/* al <= j || bl <= j */
1026 			{
1027 				if (bn_wexpand(t, k * 2) == NULL)
1028 					goto err;
1029 				if (bn_wexpand(rr, k * 2) == NULL)
1030 					goto err;
1031 				bn_mul_recursive(rr->d, a->d, b->d,
1032 				    j, al - j, bl - j, t->d);
1033 			}
1034 			rr->top = top;
1035 			goto end;
1036 		}
1037 #if 0
1038 		if (i == 1 && !BN_get_flags(b, BN_FLG_STATIC_DATA)) {
1039 			BIGNUM *tmp_bn = (BIGNUM *)b;
1040 			if (bn_wexpand(tmp_bn, al) == NULL)
1041 				goto err;
1042 			tmp_bn->d[bl] = 0;
1043 			bl++;
1044 			i--;
1045 		} else if (i == -1 && !BN_get_flags(a, BN_FLG_STATIC_DATA)) {
1046 			BIGNUM *tmp_bn = (BIGNUM *)a;
1047 			if (bn_wexpand(tmp_bn, bl) == NULL)
1048 				goto err;
1049 			tmp_bn->d[al] = 0;
1050 			al++;
1051 			i++;
1052 		}
1053 		if (i == 0) {
1054 			/* symmetric and > 4 */
1055 			/* 16 or larger */
1056 			j = BN_num_bits_word((BN_ULONG)al);
1057 			j = 1 << (j - 1);
1058 			k = j + j;
1059 			if ((t = BN_CTX_get(ctx)) == NULL)
1060 				goto err;
1061 			if (al == j) /* exact multiple */
1062 			{
1063 				if (bn_wexpand(t, k * 2) == NULL)
1064 					goto err;
1065 				if (bn_wexpand(rr, k * 2) == NULL)
1066 					goto err;
1067 				bn_mul_recursive(rr->d, a->d, b->d, al, t->d);
1068 			} else {
1069 				if (bn_wexpand(t, k * 4) == NULL)
1070 					goto err;
1071 				if (bn_wexpand(rr, k * 4) == NULL)
1072 					goto err;
1073 				bn_mul_part_recursive(rr->d, a->d, b->d,
1074 				    al - j, j, t->d);
1075 			}
1076 			rr->top = top;
1077 			goto end;
1078 		}
1079 #endif
1080 	}
1081 #endif /* BN_RECURSION */
1082 	if (bn_wexpand(rr, top) == NULL)
1083 		goto err;
1084 	rr->top = top;
1085 	bn_mul_normal(rr->d, a->d, al, b->d, bl);
1086 
1087 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1088 end:
1089 #endif
1090 	bn_correct_top(rr);
1091 	if (r != rr)
1092 		BN_copy(r, rr);
1093 	ret = 1;
1094 err:
1095 	bn_check_top(r);
1096 	BN_CTX_end(ctx);
1097 	return (ret);
1098 }
1099 
1100 void
bn_mul_normal(BN_ULONG * r,BN_ULONG * a,int na,BN_ULONG * b,int nb)1101 bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1102 {
1103 	BN_ULONG *rr;
1104 
1105 #ifdef BN_COUNT
1106 	fprintf(stderr, " bn_mul_normal %d * %d\n", na, nb);
1107 #endif
1108 
1109 	if (na < nb) {
1110 		int itmp;
1111 		BN_ULONG *ltmp;
1112 
1113 		itmp = na;
1114 		na = nb;
1115 		nb = itmp;
1116 		ltmp = a;
1117 		a = b;
1118 		b = ltmp;
1119 
1120 	}
1121 	rr = &(r[na]);
1122 	if (nb <= 0) {
1123 		(void)bn_mul_words(r, a, na, 0);
1124 		return;
1125 	} else
1126 		rr[0] = bn_mul_words(r, a, na, b[0]);
1127 
1128 	for (;;) {
1129 		if (--nb <= 0)
1130 			return;
1131 		rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
1132 		if (--nb <= 0)
1133 			return;
1134 		rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
1135 		if (--nb <= 0)
1136 			return;
1137 		rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
1138 		if (--nb <= 0)
1139 			return;
1140 		rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
1141 		rr += 4;
1142 		r += 4;
1143 		b += 4;
1144 	}
1145 }
1146 
1147 void
bn_mul_low_normal(BN_ULONG * r,BN_ULONG * a,BN_ULONG * b,int n)1148 bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1149 {
1150 #ifdef BN_COUNT
1151 	fprintf(stderr, " bn_mul_low_normal %d * %d\n", n, n);
1152 #endif
1153 	bn_mul_words(r, a, n, b[0]);
1154 
1155 	for (;;) {
1156 		if (--n <= 0)
1157 			return;
1158 		bn_mul_add_words(&(r[1]), a, n, b[1]);
1159 		if (--n <= 0)
1160 			return;
1161 		bn_mul_add_words(&(r[2]), a, n, b[2]);
1162 		if (--n <= 0)
1163 			return;
1164 		bn_mul_add_words(&(r[3]), a, n, b[3]);
1165 		if (--n <= 0)
1166 			return;
1167 		bn_mul_add_words(&(r[4]), a, n, b[4]);
1168 		r += 4;
1169 		b += 4;
1170 	}
1171 }
1172