1 /* crypto/bn/bn_mul.c */
2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3  * All rights reserved.
4  *
5  * This package is an SSL implementation written
6  * by Eric Young (eay@cryptsoft.com).
7  * The implementation was written so as to conform with Netscapes SSL.
8  *
9  * This library is free for commercial and non-commercial use as long as
10  * the following conditions are aheared to.  The following conditions
11  * apply to all code found in this distribution, be it the RC4, RSA,
12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13  * included with this distribution is covered by the same copyright terms
14  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15  *
16  * Copyright remains Eric Young's, and as such any Copyright notices in
17  * the code are not to be removed.
18  * If this package is used in a product, Eric Young should be given attribution
19  * as the author of the parts of the library used.
20  * This can be in the form of a textual message at program startup or
21  * in documentation (online or textual) provided with the package.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the copyright
27  *    notice, this list of conditions and the following disclaimer.
28  * 2. Redistributions in binary form must reproduce the above copyright
29  *    notice, this list of conditions and the following disclaimer in the
30  *    documentation and/or other materials provided with the distribution.
31  * 3. All advertising materials mentioning features or use of this software
32  *    must display the following acknowledgement:
33  *    "This product includes cryptographic software written by
34  *     Eric Young (eay@cryptsoft.com)"
35  *    The word 'cryptographic' can be left out if the rouines from the library
36  *    being used are not cryptographic related :-).
37  * 4. If you include any Windows specific code (or a derivative thereof) from
38  *    the apps directory (application code) you must include an acknowledgement:
39  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40  *
41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  *
53  * The licence and distribution terms for any publically available version or
54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
55  * copied and put under another distribution licence
56  * [including the GNU Public Licence.]
57  */
58 
59 #ifndef BN_DEBUG
60 # undef NDEBUG /* avoid conflicting definitions */
61 # define NDEBUG
62 #endif
63 
64 #include <stdio.h>
65 #include <assert.h>
66 #include "cryptlib.h"
67 #include "bn_lcl.h"
68 
69 #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
70 /* Here follows specialised variants of bn_add_words() and
71    bn_sub_words().  They have the property performing operations on
72    arrays of different sizes.  The sizes of those arrays is expressed through
73    cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
74    which is the delta between the two lengths, calculated as len(a)-len(b).
75    All lengths are the number of BN_ULONGs...  For the operations that require
76    a result array as parameter, it must have the length cl+abs(dl).
77    These functions should probably end up in bn_asm.c as soon as there are
78    assembler counterparts for the systems that use assembler files.  */
79 
80 BN_ULONG bn_sub_part_words(BN_ULONG *r,
81 	const BN_ULONG *a, const BN_ULONG *b,
82 	int cl, int dl)
83 	{
84 	BN_ULONG c, t;
85 
86 	assert(cl >= 0);
87 	c = bn_sub_words(r, a, b, cl);
88 
89 	if (dl == 0)
90 		return c;
91 
92 	r += cl;
93 	a += cl;
94 	b += cl;
95 
96 	if (dl < 0)
97 		{
98 #ifdef BN_COUNT
99 		fprintf(stderr, "  bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
100 #endif
101 		for (;;)
102 			{
103 			t = b[0];
104 			r[0] = (0-t-c)&BN_MASK2;
105 			if (t != 0) c=1;
106 			if (++dl >= 0) break;
107 
108 			t = b[1];
109 			r[1] = (0-t-c)&BN_MASK2;
110 			if (t != 0) c=1;
111 			if (++dl >= 0) break;
112 
113 			t = b[2];
114 			r[2] = (0-t-c)&BN_MASK2;
115 			if (t != 0) c=1;
116 			if (++dl >= 0) break;
117 
118 			t = b[3];
119 			r[3] = (0-t-c)&BN_MASK2;
120 			if (t != 0) c=1;
121 			if (++dl >= 0) break;
122 
123 			b += 4;
124 			r += 4;
125 			}
126 		}
127 	else
128 		{
129 		int save_dl = dl;
130 #ifdef BN_COUNT
131 		fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl, dl, c);
132 #endif
133 		while(c)
134 			{
135 			t = a[0];
136 			r[0] = (t-c)&BN_MASK2;
137 			if (t != 0) c=0;
138 			if (--dl <= 0) break;
139 
140 			t = a[1];
141 			r[1] = (t-c)&BN_MASK2;
142 			if (t != 0) c=0;
143 			if (--dl <= 0) break;
144 
145 			t = a[2];
146 			r[2] = (t-c)&BN_MASK2;
147 			if (t != 0) c=0;
148 			if (--dl <= 0) break;
149 
150 			t = a[3];
151 			r[3] = (t-c)&BN_MASK2;
152 			if (t != 0) c=0;
153 			if (--dl <= 0) break;
154 
155 			save_dl = dl;
156 			a += 4;
157 			r += 4;
158 			}
159 		if (dl > 0)
160 			{
161 #ifdef BN_COUNT
162 			fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
163 #endif
164 			if (save_dl > dl)
165 				{
166 				switch (save_dl - dl)
167 					{
168 				case 1:
169 					r[1] = a[1];
170 					if (--dl <= 0) break;
171 				case 2:
172 					r[2] = a[2];
173 					if (--dl <= 0) break;
174 				case 3:
175 					r[3] = a[3];
176 					if (--dl <= 0) break;
177 					}
178 				a += 4;
179 				r += 4;
180 				}
181 			}
182 		if (dl > 0)
183 			{
184 #ifdef BN_COUNT
185 			fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, copy)\n", cl, dl);
186 #endif
187 			for(;;)
188 				{
189 				r[0] = a[0];
190 				if (--dl <= 0) break;
191 				r[1] = a[1];
192 				if (--dl <= 0) break;
193 				r[2] = a[2];
194 				if (--dl <= 0) break;
195 				r[3] = a[3];
196 				if (--dl <= 0) break;
197 
198 				a += 4;
199 				r += 4;
200 				}
201 			}
202 		}
203 	return c;
204 	}
205 #endif
206 
207 BN_ULONG bn_add_part_words(BN_ULONG *r,
208 	const BN_ULONG *a, const BN_ULONG *b,
209 	int cl, int dl)
210 	{
211 	BN_ULONG c, l, t;
212 
213 	assert(cl >= 0);
214 	c = bn_add_words(r, a, b, cl);
215 
216 	if (dl == 0)
217 		return c;
218 
219 	r += cl;
220 	a += cl;
221 	b += cl;
222 
223 	if (dl < 0)
224 		{
225 		int save_dl = dl;
226 #ifdef BN_COUNT
227 		fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
228 #endif
229 		while (c)
230 			{
231 			l=(c+b[0])&BN_MASK2;
232 			c=(l < c);
233 			r[0]=l;
234 			if (++dl >= 0) break;
235 
236 			l=(c+b[1])&BN_MASK2;
237 			c=(l < c);
238 			r[1]=l;
239 			if (++dl >= 0) break;
240 
241 			l=(c+b[2])&BN_MASK2;
242 			c=(l < c);
243 			r[2]=l;
244 			if (++dl >= 0) break;
245 
246 			l=(c+b[3])&BN_MASK2;
247 			c=(l < c);
248 			r[3]=l;
249 			if (++dl >= 0) break;
250 
251 			save_dl = dl;
252 			b+=4;
253 			r+=4;
254 			}
255 		if (dl < 0)
256 			{
257 #ifdef BN_COUNT
258 			fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c == 0)\n", cl, dl);
259 #endif
260 			if (save_dl < dl)
261 				{
262 				switch (dl - save_dl)
263 					{
264 				case 1:
265 					r[1] = b[1];
266 					if (++dl >= 0) break;
267 				case 2:
268 					r[2] = b[2];
269 					if (++dl >= 0) break;
270 				case 3:
271 					r[3] = b[3];
272 					if (++dl >= 0) break;
273 					}
274 				b += 4;
275 				r += 4;
276 				}
277 			}
278 		if (dl < 0)
279 			{
280 #ifdef BN_COUNT
281 			fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, copy)\n", cl, dl);
282 #endif
283 			for(;;)
284 				{
285 				r[0] = b[0];
286 				if (++dl >= 0) break;
287 				r[1] = b[1];
288 				if (++dl >= 0) break;
289 				r[2] = b[2];
290 				if (++dl >= 0) break;
291 				r[3] = b[3];
292 				if (++dl >= 0) break;
293 
294 				b += 4;
295 				r += 4;
296 				}
297 			}
298 		}
299 	else
300 		{
301 		int save_dl = dl;
302 #ifdef BN_COUNT
303 		fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
304 #endif
305 		while (c)
306 			{
307 			t=(a[0]+c)&BN_MASK2;
308 			c=(t < c);
309 			r[0]=t;
310 			if (--dl <= 0) break;
311 
312 			t=(a[1]+c)&BN_MASK2;
313 			c=(t < c);
314 			r[1]=t;
315 			if (--dl <= 0) break;
316 
317 			t=(a[2]+c)&BN_MASK2;
318 			c=(t < c);
319 			r[2]=t;
320 			if (--dl <= 0) break;
321 
322 			t=(a[3]+c)&BN_MASK2;
323 			c=(t < c);
324 			r[3]=t;
325 			if (--dl <= 0) break;
326 
327 			save_dl = dl;
328 			a+=4;
329 			r+=4;
330 			}
331 #ifdef BN_COUNT
332 		fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
333 #endif
334 		if (dl > 0)
335 			{
336 			if (save_dl > dl)
337 				{
338 				switch (save_dl - dl)
339 					{
340 				case 1:
341 					r[1] = a[1];
342 					if (--dl <= 0) break;
343 				case 2:
344 					r[2] = a[2];
345 					if (--dl <= 0) break;
346 				case 3:
347 					r[3] = a[3];
348 					if (--dl <= 0) break;
349 					}
350 				a += 4;
351 				r += 4;
352 				}
353 			}
354 		if (dl > 0)
355 			{
356 #ifdef BN_COUNT
357 			fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, copy)\n", cl, dl);
358 #endif
359 			for(;;)
360 				{
361 				r[0] = a[0];
362 				if (--dl <= 0) break;
363 				r[1] = a[1];
364 				if (--dl <= 0) break;
365 				r[2] = a[2];
366 				if (--dl <= 0) break;
367 				r[3] = a[3];
368 				if (--dl <= 0) break;
369 
370 				a += 4;
371 				r += 4;
372 				}
373 			}
374 		}
375 	return c;
376 	}
377 
378 #ifdef BN_RECURSION
379 /* Karatsuba recursive multiplication algorithm
380  * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
381 
382 /* r is 2*n2 words in size,
383  * a and b are both n2 words in size.
384  * n2 must be a power of 2.
385  * We multiply and return the result.
386  * t must be 2*n2 words in size
387  * We calculate
388  * a[0]*b[0]
389  * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
390  * a[1]*b[1]
391  */
392 /* dnX may not be positive, but n2/2+dnX has to be */
393 void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
394 	int dna, int dnb, BN_ULONG *t)
395 	{
396 	int n=n2/2,c1,c2;
397 	int tna=n+dna, tnb=n+dnb;
398 	unsigned int neg,zero;
399 	BN_ULONG ln,lo,*p;
400 
401 # ifdef BN_COUNT
402 	fprintf(stderr," bn_mul_recursive %d%+d * %d%+d\n",n2,dna,n2,dnb);
403 # endif
404 # ifdef BN_MUL_COMBA
405 #  if 0
406 	if (n2 == 4)
407 		{
408 		bn_mul_comba4(r,a,b);
409 		return;
410 		}
411 #  endif
412 	/* Only call bn_mul_comba 8 if n2 == 8 and the
413 	 * two arrays are complete [steve]
414 	 */
415 	if (n2 == 8 && dna == 0 && dnb == 0)
416 		{
417 		bn_mul_comba8(r,a,b);
418 		return;
419 		}
420 # endif /* BN_MUL_COMBA */
421 	/* Else do normal multiply */
422 	if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
423 		{
424 		bn_mul_normal(r,a,n2+dna,b,n2+dnb);
425 		if ((dna + dnb) < 0)
426 			memset(&r[2*n2 + dna + dnb], 0,
427 				sizeof(BN_ULONG) * -(dna + dnb));
428 		return;
429 		}
430 	/* r=(a[0]-a[1])*(b[1]-b[0]) */
431 	c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
432 	c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
433 	zero=neg=0;
434 	switch (c1*3+c2)
435 		{
436 	case -4:
437 		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
438 		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
439 		break;
440 	case -3:
441 		zero=1;
442 		break;
443 	case -2:
444 		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
445 		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
446 		neg=1;
447 		break;
448 	case -1:
449 	case 0:
450 	case 1:
451 		zero=1;
452 		break;
453 	case 2:
454 		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
455 		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
456 		neg=1;
457 		break;
458 	case 3:
459 		zero=1;
460 		break;
461 	case 4:
462 		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
463 		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
464 		break;
465 		}
466 
467 # ifdef BN_MUL_COMBA
468 	if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
469 					       extra args to do this well */
470 		{
471 		if (!zero)
472 			bn_mul_comba4(&(t[n2]),t,&(t[n]));
473 		else
474 			memset(&(t[n2]),0,8*sizeof(BN_ULONG));
475 
476 		bn_mul_comba4(r,a,b);
477 		bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
478 		}
479 	else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
480 						    take extra args to do this
481 						    well */
482 		{
483 		if (!zero)
484 			bn_mul_comba8(&(t[n2]),t,&(t[n]));
485 		else
486 			memset(&(t[n2]),0,16*sizeof(BN_ULONG));
487 
488 		bn_mul_comba8(r,a,b);
489 		bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
490 		}
491 	else
492 # endif /* BN_MUL_COMBA */
493 		{
494 		p= &(t[n2*2]);
495 		if (!zero)
496 			bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
497 		else
498 			memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
499 		bn_mul_recursive(r,a,b,n,0,0,p);
500 		bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p);
501 		}
502 
503 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
504 	 * r[10] holds (a[0]*b[0])
505 	 * r[32] holds (b[1]*b[1])
506 	 */
507 
508 	c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
509 
510 	if (neg) /* if t[32] is negative */
511 		{
512 		c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
513 		}
514 	else
515 		{
516 		/* Might have a carry */
517 		c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
518 		}
519 
520 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
521 	 * r[10] holds (a[0]*b[0])
522 	 * r[32] holds (b[1]*b[1])
523 	 * c1 holds the carry bits
524 	 */
525 	c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
526 	if (c1)
527 		{
528 		p= &(r[n+n2]);
529 		lo= *p;
530 		ln=(lo+c1)&BN_MASK2;
531 		*p=ln;
532 
533 		/* The overflow will stop before we over write
534 		 * words we should not overwrite */
535 		if (ln < (BN_ULONG)c1)
536 			{
537 			do	{
538 				p++;
539 				lo= *p;
540 				ln=(lo+1)&BN_MASK2;
541 				*p=ln;
542 				} while (ln == 0);
543 			}
544 		}
545 	}
546 
547 /* n+tn is the word length
548  * t needs to be n*4 is size, as does r */
549 /* tnX may not be negative but less than n */
550 void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
551 	     int tna, int tnb, BN_ULONG *t)
552 	{
553 	int i,j,n2=n*2;
554 	int c1,c2,neg,zero;
555 	BN_ULONG ln,lo,*p;
556 
557 # ifdef BN_COUNT
558 	fprintf(stderr," bn_mul_part_recursive (%d%+d) * (%d%+d)\n",
559 		n, tna, n, tnb);
560 # endif
561 	if (n < 8)
562 		{
563 		bn_mul_normal(r,a,n+tna,b,n+tnb);
564 		return;
565 		}
566 
567 	/* r=(a[0]-a[1])*(b[1]-b[0]) */
568 	c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
569 	c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
570 	zero=neg=0;
571 	switch (c1*3+c2)
572 		{
573 	case -4:
574 		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
575 		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
576 		break;
577 	case -3:
578 		zero=1;
579 		/* break; */
580 	case -2:
581 		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
582 		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
583 		neg=1;
584 		break;
585 	case -1:
586 	case 0:
587 	case 1:
588 		zero=1;
589 		/* break; */
590 	case 2:
591 		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
592 		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
593 		neg=1;
594 		break;
595 	case 3:
596 		zero=1;
597 		/* break; */
598 	case 4:
599 		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
600 		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
601 		break;
602 		}
603 		/* The zero case isn't yet implemented here. The speedup
604 		   would probably be negligible. */
605 # if 0
606 	if (n == 4)
607 		{
608 		bn_mul_comba4(&(t[n2]),t,&(t[n]));
609 		bn_mul_comba4(r,a,b);
610 		bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
611 		memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
612 		}
613 	else
614 # endif
615 	if (n == 8)
616 		{
617 		bn_mul_comba8(&(t[n2]),t,&(t[n]));
618 		bn_mul_comba8(r,a,b);
619 		bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
620 		memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb));
621 		}
622 	else
623 		{
624 		p= &(t[n2*2]);
625 		bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
626 		bn_mul_recursive(r,a,b,n,0,0,p);
627 		i=n/2;
628 		/* If there is only a bottom half to the number,
629 		 * just do it */
630 		if (tna > tnb)
631 			j = tna - i;
632 		else
633 			j = tnb - i;
634 		if (j == 0)
635 			{
636 			bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),
637 				i,tna-i,tnb-i,p);
638 			memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
639 			}
640 		else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
641 				{
642 				bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
643 					i,tna-i,tnb-i,p);
644 				memset(&(r[n2+tna+tnb]),0,
645 					sizeof(BN_ULONG)*(n2-tna-tnb));
646 				}
647 		else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
648 			{
649 			memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
650 			if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
651 				&& tnb < BN_MUL_RECURSIVE_SIZE_NORMAL)
652 				{
653 				bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
654 				}
655 			else
656 				{
657 				for (;;)
658 					{
659 					i/=2;
660 					/* these simplified conditions work
661 					 * exclusively because difference
662 					 * between tna and tnb is 1 or 0 */
663 					if (i < tna || i < tnb)
664 						{
665 						bn_mul_part_recursive(&(r[n2]),
666 							&(a[n]),&(b[n]),
667 							i,tna-i,tnb-i,p);
668 						break;
669 						}
670 					else if (i == tna || i == tnb)
671 						{
672 						bn_mul_recursive(&(r[n2]),
673 							&(a[n]),&(b[n]),
674 							i,tna-i,tnb-i,p);
675 						break;
676 						}
677 					}
678 				}
679 			}
680 		}
681 
682 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
683 	 * r[10] holds (a[0]*b[0])
684 	 * r[32] holds (b[1]*b[1])
685 	 */
686 
687 	c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
688 
689 	if (neg) /* if t[32] is negative */
690 		{
691 		c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
692 		}
693 	else
694 		{
695 		/* Might have a carry */
696 		c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
697 		}
698 
699 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
700 	 * r[10] holds (a[0]*b[0])
701 	 * r[32] holds (b[1]*b[1])
702 	 * c1 holds the carry bits
703 	 */
704 	c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
705 	if (c1)
706 		{
707 		p= &(r[n+n2]);
708 		lo= *p;
709 		ln=(lo+c1)&BN_MASK2;
710 		*p=ln;
711 
712 		/* The overflow will stop before we over write
713 		 * words we should not overwrite */
714 		if (ln < (BN_ULONG)c1)
715 			{
716 			do	{
717 				p++;
718 				lo= *p;
719 				ln=(lo+1)&BN_MASK2;
720 				*p=ln;
721 				} while (ln == 0);
722 			}
723 		}
724 	}
725 
726 /* a and b must be the same size, which is n2.
727  * r needs to be n2 words and t needs to be n2*2
728  */
729 void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
730 	     BN_ULONG *t)
731 	{
732 	int n=n2/2;
733 
734 # ifdef BN_COUNT
735 	fprintf(stderr," bn_mul_low_recursive %d * %d\n",n2,n2);
736 # endif
737 
738 	bn_mul_recursive(r,a,b,n,0,0,&(t[0]));
739 	if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
740 		{
741 		bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
742 		bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
743 		bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
744 		bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
745 		}
746 	else
747 		{
748 		bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
749 		bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
750 		bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
751 		bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
752 		}
753 	}
754 
755 /* a and b must be the same size, which is n2.
756  * r needs to be n2 words and t needs to be n2*2
757  * l is the low words of the output.
758  * t needs to be n2*3
759  */
760 void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
761 	     BN_ULONG *t)
762 	{
763 	int i,n;
764 	int c1,c2;
765 	int neg,oneg,zero;
766 	BN_ULONG ll,lc,*lp,*mp;
767 
768 # ifdef BN_COUNT
769 	fprintf(stderr," bn_mul_high %d * %d\n",n2,n2);
770 # endif
771 	n=n2/2;
772 
773 	/* Calculate (al-ah)*(bh-bl) */
774 	neg=zero=0;
775 	c1=bn_cmp_words(&(a[0]),&(a[n]),n);
776 	c2=bn_cmp_words(&(b[n]),&(b[0]),n);
777 	switch (c1*3+c2)
778 		{
779 	case -4:
780 		bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
781 		bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
782 		break;
783 	case -3:
784 		zero=1;
785 		break;
786 	case -2:
787 		bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
788 		bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
789 		neg=1;
790 		break;
791 	case -1:
792 	case 0:
793 	case 1:
794 		zero=1;
795 		break;
796 	case 2:
797 		bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
798 		bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
799 		neg=1;
800 		break;
801 	case 3:
802 		zero=1;
803 		break;
804 	case 4:
805 		bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
806 		bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
807 		break;
808 		}
809 
810 	oneg=neg;
811 	/* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
812 	/* r[10] = (a[1]*b[1]) */
813 # ifdef BN_MUL_COMBA
814 	if (n == 8)
815 		{
816 		bn_mul_comba8(&(t[0]),&(r[0]),&(r[n]));
817 		bn_mul_comba8(r,&(a[n]),&(b[n]));
818 		}
819 	else
820 # endif
821 		{
822 		bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2]));
823 		bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2]));
824 		}
825 
826 	/* s0 == low(al*bl)
827 	 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
828 	 * We know s0 and s1 so the only unknown is high(al*bl)
829 	 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
830 	 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
831 	 */
832 	if (l != NULL)
833 		{
834 		lp= &(t[n2+n]);
835 		c1=(int)(bn_add_words(lp,&(r[0]),&(l[0]),n));
836 		}
837 	else
838 		{
839 		c1=0;
840 		lp= &(r[0]);
841 		}
842 
843 	if (neg)
844 		neg=(int)(bn_sub_words(&(t[n2]),lp,&(t[0]),n));
845 	else
846 		{
847 		bn_add_words(&(t[n2]),lp,&(t[0]),n);
848 		neg=0;
849 		}
850 
851 	if (l != NULL)
852 		{
853 		bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
854 		}
855 	else
856 		{
857 		lp= &(t[n2+n]);
858 		mp= &(t[n2]);
859 		for (i=0; i<n; i++)
860 			lp[i]=((~mp[i])+1)&BN_MASK2;
861 		}
862 
863 	/* s[0] = low(al*bl)
864 	 * t[3] = high(al*bl)
865 	 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
866 	 * r[10] = (a[1]*b[1])
867 	 */
868 	/* R[10] = al*bl
869 	 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
870 	 * R[32] = ah*bh
871 	 */
872 	/* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
873 	 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
874 	 * R[3]=r[1]+(carry/borrow)
875 	 */
876 	if (l != NULL)
877 		{
878 		lp= &(t[n2]);
879 		c1= (int)(bn_add_words(lp,&(t[n2+n]),&(l[0]),n));
880 		}
881 	else
882 		{
883 		lp= &(t[n2+n]);
884 		c1=0;
885 		}
886 	c1+=(int)(bn_add_words(&(t[n2]),lp,  &(r[0]),n));
887 	if (oneg)
888 		c1-=(int)(bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n));
889 	else
890 		c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n));
891 
892 	c2 =(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n));
893 	c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(r[n]),n));
894 	if (oneg)
895 		c2-=(int)(bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n));
896 	else
897 		c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n]),n));
898 
899 	if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
900 		{
901 		i=0;
902 		if (c1 > 0)
903 			{
904 			lc=c1;
905 			do	{
906 				ll=(r[i]+lc)&BN_MASK2;
907 				r[i++]=ll;
908 				lc=(lc > ll);
909 				} while (lc);
910 			}
911 		else
912 			{
913 			lc= -c1;
914 			do	{
915 				ll=r[i];
916 				r[i++]=(ll-lc)&BN_MASK2;
917 				lc=(lc > ll);
918 				} while (lc);
919 			}
920 		}
921 	if (c2 != 0) /* Add starting at r[1] */
922 		{
923 		i=n;
924 		if (c2 > 0)
925 			{
926 			lc=c2;
927 			do	{
928 				ll=(r[i]+lc)&BN_MASK2;
929 				r[i++]=ll;
930 				lc=(lc > ll);
931 				} while (lc);
932 			}
933 		else
934 			{
935 			lc= -c2;
936 			do	{
937 				ll=r[i];
938 				r[i++]=(ll-lc)&BN_MASK2;
939 				lc=(lc > ll);
940 				} while (lc);
941 			}
942 		}
943 	}
944 #endif /* BN_RECURSION */
945 
946 int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
947 	{
948 	int ret=0;
949 	int top,al,bl;
950 	BIGNUM *rr;
951 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
952 	int i;
953 #endif
954 #ifdef BN_RECURSION
955 	BIGNUM *t=NULL;
956 	int j=0,k;
957 #endif
958 
959 #ifdef BN_COUNT
960 	fprintf(stderr,"BN_mul %d * %d\n",a->top,b->top);
961 #endif
962 
963 	bn_check_top(a);
964 	bn_check_top(b);
965 	bn_check_top(r);
966 
967 	al=a->top;
968 	bl=b->top;
969 
970 	if ((al == 0) || (bl == 0))
971 		{
972 		BN_zero(r);
973 		return(1);
974 		}
975 	top=al+bl;
976 
977 	BN_CTX_start(ctx);
978 	if ((r == a) || (r == b))
979 		{
980 		if ((rr = BN_CTX_get(ctx)) == NULL) goto err;
981 		}
982 	else
983 		rr = r;
984 	rr->neg=a->neg^b->neg;
985 
986 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
987 	i = al-bl;
988 #endif
989 #ifdef BN_MUL_COMBA
990 	if (i == 0)
991 		{
992 # if 0
993 		if (al == 4)
994 			{
995 			if (bn_wexpand(rr,8) == NULL) goto err;
996 			rr->top=8;
997 			bn_mul_comba4(rr->d,a->d,b->d);
998 			goto end;
999 			}
1000 # endif
1001 		if (al == 8)
1002 			{
1003 			if (bn_wexpand(rr,16) == NULL) goto err;
1004 			rr->top=16;
1005 			bn_mul_comba8(rr->d,a->d,b->d);
1006 			goto end;
1007 			}
1008 		}
1009 #endif /* BN_MUL_COMBA */
1010 #ifdef BN_RECURSION
1011 	if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
1012 		{
1013 		if (i >= -1 && i <= 1)
1014 			{
1015 			int sav_j =0;
1016 			/* Find out the power of two lower or equal
1017 			   to the longest of the two numbers */
1018 			if (i >= 0)
1019 				{
1020 				j = BN_num_bits_word((BN_ULONG)al);
1021 				}
1022 			if (i == -1)
1023 				{
1024 				j = BN_num_bits_word((BN_ULONG)bl);
1025 				}
1026 			sav_j = j;
1027 			j = 1<<(j-1);
1028 			assert(j <= al || j <= bl);
1029 			k = j+j;
1030 			t = BN_CTX_get(ctx);
1031 			if (t == NULL)
1032 				goto err;
1033 			if (al > j || bl > j)
1034 				{
1035 				if (bn_wexpand(t,k*4) == NULL) goto err;
1036 				if (bn_wexpand(rr,k*4) == NULL) goto err;
1037 				bn_mul_part_recursive(rr->d,a->d,b->d,
1038 					j,al-j,bl-j,t->d);
1039 				}
1040 			else	/* al <= j || bl <= j */
1041 				{
1042 				if (bn_wexpand(t,k*2) == NULL) goto err;
1043 				if (bn_wexpand(rr,k*2) == NULL) goto err;
1044 				bn_mul_recursive(rr->d,a->d,b->d,
1045 					j,al-j,bl-j,t->d);
1046 				}
1047 			rr->top=top;
1048 			goto end;
1049 			}
1050 #if 0
1051 		if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA))
1052 			{
1053 			BIGNUM *tmp_bn = (BIGNUM *)b;
1054 			if (bn_wexpand(tmp_bn,al) == NULL) goto err;
1055 			tmp_bn->d[bl]=0;
1056 			bl++;
1057 			i--;
1058 			}
1059 		else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA))
1060 			{
1061 			BIGNUM *tmp_bn = (BIGNUM *)a;
1062 			if (bn_wexpand(tmp_bn,bl) == NULL) goto err;
1063 			tmp_bn->d[al]=0;
1064 			al++;
1065 			i++;
1066 			}
1067 		if (i == 0)
1068 			{
1069 			/* symmetric and > 4 */
1070 			/* 16 or larger */
1071 			j=BN_num_bits_word((BN_ULONG)al);
1072 			j=1<<(j-1);
1073 			k=j+j;
1074 			t = BN_CTX_get(ctx);
1075 			if (al == j) /* exact multiple */
1076 				{
1077 				if (bn_wexpand(t,k*2) == NULL) goto err;
1078 				if (bn_wexpand(rr,k*2) == NULL) goto err;
1079 				bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
1080 				}
1081 			else
1082 				{
1083 				if (bn_wexpand(t,k*4) == NULL) goto err;
1084 				if (bn_wexpand(rr,k*4) == NULL) goto err;
1085 				bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
1086 				}
1087 			rr->top=top;
1088 			goto end;
1089 			}
1090 #endif
1091 		}
1092 #endif /* BN_RECURSION */
1093 	if (bn_wexpand(rr,top) == NULL) goto err;
1094 	rr->top=top;
1095 	bn_mul_normal(rr->d,a->d,al,b->d,bl);
1096 
1097 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1098 end:
1099 #endif
1100 	bn_correct_top(rr);
1101 	if (r != rr) BN_copy(r,rr);
1102 	ret=1;
1103 err:
1104 	bn_check_top(r);
1105 	BN_CTX_end(ctx);
1106 	return(ret);
1107 	}
1108 
1109 void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1110 	{
1111 	BN_ULONG *rr;
1112 
1113 #ifdef BN_COUNT
1114 	fprintf(stderr," bn_mul_normal %d * %d\n",na,nb);
1115 #endif
1116 
1117 	if (na < nb)
1118 		{
1119 		int itmp;
1120 		BN_ULONG *ltmp;
1121 
1122 		itmp=na; na=nb; nb=itmp;
1123 		ltmp=a;   a=b;   b=ltmp;
1124 
1125 		}
1126 	rr= &(r[na]);
1127 	if (nb <= 0)
1128 		{
1129 		(void)bn_mul_words(r,a,na,0);
1130 		return;
1131 		}
1132 	else
1133 		rr[0]=bn_mul_words(r,a,na,b[0]);
1134 
1135 	for (;;)
1136 		{
1137 		if (--nb <= 0) return;
1138 		rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
1139 		if (--nb <= 0) return;
1140 		rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
1141 		if (--nb <= 0) return;
1142 		rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
1143 		if (--nb <= 0) return;
1144 		rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
1145 		rr+=4;
1146 		r+=4;
1147 		b+=4;
1148 		}
1149 	}
1150 
1151 void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1152 	{
1153 #ifdef BN_COUNT
1154 	fprintf(stderr," bn_mul_low_normal %d * %d\n",n,n);
1155 #endif
1156 	bn_mul_words(r,a,n,b[0]);
1157 
1158 	for (;;)
1159 		{
1160 		if (--n <= 0) return;
1161 		bn_mul_add_words(&(r[1]),a,n,b[1]);
1162 		if (--n <= 0) return;
1163 		bn_mul_add_words(&(r[2]),a,n,b[2]);
1164 		if (--n <= 0) return;
1165 		bn_mul_add_words(&(r[3]),a,n,b[3]);
1166 		if (--n <= 0) return;
1167 		bn_mul_add_words(&(r[4]),a,n,b[4]);
1168 		r+=4;
1169 		b+=4;
1170 		}
1171 	}
1172