xref: /openbsd/lib/libcrypto/bn/bn_mul.c (revision 4fcf65c5)
15b37fcf3Sryker /* crypto/bn/bn_mul.c */
25b37fcf3Sryker /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
35b37fcf3Sryker  * All rights reserved.
45b37fcf3Sryker  *
55b37fcf3Sryker  * This package is an SSL implementation written
65b37fcf3Sryker  * by Eric Young (eay@cryptsoft.com).
75b37fcf3Sryker  * The implementation was written so as to conform with Netscapes SSL.
85b37fcf3Sryker  *
95b37fcf3Sryker  * This library is free for commercial and non-commercial use as long as
105b37fcf3Sryker  * the following conditions are aheared to.  The following conditions
115b37fcf3Sryker  * apply to all code found in this distribution, be it the RC4, RSA,
125b37fcf3Sryker  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
135b37fcf3Sryker  * included with this distribution is covered by the same copyright terms
145b37fcf3Sryker  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
155b37fcf3Sryker  *
165b37fcf3Sryker  * Copyright remains Eric Young's, and as such any Copyright notices in
175b37fcf3Sryker  * the code are not to be removed.
185b37fcf3Sryker  * If this package is used in a product, Eric Young should be given attribution
195b37fcf3Sryker  * as the author of the parts of the library used.
205b37fcf3Sryker  * This can be in the form of a textual message at program startup or
215b37fcf3Sryker  * in documentation (online or textual) provided with the package.
225b37fcf3Sryker  *
235b37fcf3Sryker  * Redistribution and use in source and binary forms, with or without
245b37fcf3Sryker  * modification, are permitted provided that the following conditions
255b37fcf3Sryker  * are met:
265b37fcf3Sryker  * 1. Redistributions of source code must retain the copyright
275b37fcf3Sryker  *    notice, this list of conditions and the following disclaimer.
285b37fcf3Sryker  * 2. Redistributions in binary form must reproduce the above copyright
295b37fcf3Sryker  *    notice, this list of conditions and the following disclaimer in the
305b37fcf3Sryker  *    documentation and/or other materials provided with the distribution.
315b37fcf3Sryker  * 3. All advertising materials mentioning features or use of this software
325b37fcf3Sryker  *    must display the following acknowledgement:
335b37fcf3Sryker  *    "This product includes cryptographic software written by
345b37fcf3Sryker  *     Eric Young (eay@cryptsoft.com)"
355b37fcf3Sryker  *    The word 'cryptographic' can be left out if the rouines from the library
365b37fcf3Sryker  *    being used are not cryptographic related :-).
375b37fcf3Sryker  * 4. If you include any Windows specific code (or a derivative thereof) from
385b37fcf3Sryker  *    the apps directory (application code) you must include an acknowledgement:
395b37fcf3Sryker  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
405b37fcf3Sryker  *
415b37fcf3Sryker  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
425b37fcf3Sryker  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
435b37fcf3Sryker  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
445b37fcf3Sryker  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
455b37fcf3Sryker  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
465b37fcf3Sryker  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
475b37fcf3Sryker  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
485b37fcf3Sryker  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
495b37fcf3Sryker  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
505b37fcf3Sryker  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
515b37fcf3Sryker  * SUCH DAMAGE.
525b37fcf3Sryker  *
535b37fcf3Sryker  * The licence and distribution terms for any publically available version or
545b37fcf3Sryker  * derivative of this code cannot be changed.  i.e. this code cannot simply be
555b37fcf3Sryker  * copied and put under another distribution licence
565b37fcf3Sryker  * [including the GNU Public Licence.]
575b37fcf3Sryker  */
585b37fcf3Sryker 
59*4fcf65c5Sdjm #ifndef BN_DEBUG
60*4fcf65c5Sdjm # undef NDEBUG /* avoid conflicting definitions */
61*4fcf65c5Sdjm # define NDEBUG
62*4fcf65c5Sdjm #endif
63*4fcf65c5Sdjm 
645b37fcf3Sryker #include <stdio.h>
65*4fcf65c5Sdjm #include <assert.h>
665b37fcf3Sryker #include "cryptlib.h"
675b37fcf3Sryker #include "bn_lcl.h"
685b37fcf3Sryker 
69*4fcf65c5Sdjm #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
70*4fcf65c5Sdjm /* Here follows specialised variants of bn_add_words() and
71*4fcf65c5Sdjm    bn_sub_words().  They have the property performing operations on
72*4fcf65c5Sdjm    arrays of different sizes.  The sizes of those arrays is expressed through
73*4fcf65c5Sdjm    cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
74*4fcf65c5Sdjm    which is the delta between the two lengths, calculated as len(a)-len(b).
75*4fcf65c5Sdjm    All lengths are the number of BN_ULONGs...  For the operations that require
76*4fcf65c5Sdjm    a result array as parameter, it must have the length cl+abs(dl).
77*4fcf65c5Sdjm    These functions should probably end up in bn_asm.c as soon as there are
78*4fcf65c5Sdjm    assembler counterparts for the systems that use assembler files.  */
79*4fcf65c5Sdjm 
80*4fcf65c5Sdjm BN_ULONG bn_sub_part_words(BN_ULONG *r,
81*4fcf65c5Sdjm 	const BN_ULONG *a, const BN_ULONG *b,
82*4fcf65c5Sdjm 	int cl, int dl)
83*4fcf65c5Sdjm 	{
84*4fcf65c5Sdjm 	BN_ULONG c, t;
85*4fcf65c5Sdjm 
86*4fcf65c5Sdjm 	assert(cl >= 0);
87*4fcf65c5Sdjm 	c = bn_sub_words(r, a, b, cl);
88*4fcf65c5Sdjm 
89*4fcf65c5Sdjm 	if (dl == 0)
90*4fcf65c5Sdjm 		return c;
91*4fcf65c5Sdjm 
92*4fcf65c5Sdjm 	r += cl;
93*4fcf65c5Sdjm 	a += cl;
94*4fcf65c5Sdjm 	b += cl;
95*4fcf65c5Sdjm 
96*4fcf65c5Sdjm 	if (dl < 0)
97*4fcf65c5Sdjm 		{
98*4fcf65c5Sdjm #ifdef BN_COUNT
99*4fcf65c5Sdjm 		fprintf(stderr, "  bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
100*4fcf65c5Sdjm #endif
101*4fcf65c5Sdjm 		for (;;)
102*4fcf65c5Sdjm 			{
103*4fcf65c5Sdjm 			t = b[0];
104*4fcf65c5Sdjm 			r[0] = (0-t-c)&BN_MASK2;
105*4fcf65c5Sdjm 			if (t != 0) c=1;
106*4fcf65c5Sdjm 			if (++dl >= 0) break;
107*4fcf65c5Sdjm 
108*4fcf65c5Sdjm 			t = b[1];
109*4fcf65c5Sdjm 			r[1] = (0-t-c)&BN_MASK2;
110*4fcf65c5Sdjm 			if (t != 0) c=1;
111*4fcf65c5Sdjm 			if (++dl >= 0) break;
112*4fcf65c5Sdjm 
113*4fcf65c5Sdjm 			t = b[2];
114*4fcf65c5Sdjm 			r[2] = (0-t-c)&BN_MASK2;
115*4fcf65c5Sdjm 			if (t != 0) c=1;
116*4fcf65c5Sdjm 			if (++dl >= 0) break;
117*4fcf65c5Sdjm 
118*4fcf65c5Sdjm 			t = b[3];
119*4fcf65c5Sdjm 			r[3] = (0-t-c)&BN_MASK2;
120*4fcf65c5Sdjm 			if (t != 0) c=1;
121*4fcf65c5Sdjm 			if (++dl >= 0) break;
122*4fcf65c5Sdjm 
123*4fcf65c5Sdjm 			b += 4;
124*4fcf65c5Sdjm 			r += 4;
125*4fcf65c5Sdjm 			}
126*4fcf65c5Sdjm 		}
127*4fcf65c5Sdjm 	else
128*4fcf65c5Sdjm 		{
129*4fcf65c5Sdjm 		int save_dl = dl;
130*4fcf65c5Sdjm #ifdef BN_COUNT
131*4fcf65c5Sdjm 		fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl, dl, c);
132*4fcf65c5Sdjm #endif
133*4fcf65c5Sdjm 		while(c)
134*4fcf65c5Sdjm 			{
135*4fcf65c5Sdjm 			t = a[0];
136*4fcf65c5Sdjm 			r[0] = (t-c)&BN_MASK2;
137*4fcf65c5Sdjm 			if (t != 0) c=0;
138*4fcf65c5Sdjm 			if (--dl <= 0) break;
139*4fcf65c5Sdjm 
140*4fcf65c5Sdjm 			t = a[1];
141*4fcf65c5Sdjm 			r[1] = (t-c)&BN_MASK2;
142*4fcf65c5Sdjm 			if (t != 0) c=0;
143*4fcf65c5Sdjm 			if (--dl <= 0) break;
144*4fcf65c5Sdjm 
145*4fcf65c5Sdjm 			t = a[2];
146*4fcf65c5Sdjm 			r[2] = (t-c)&BN_MASK2;
147*4fcf65c5Sdjm 			if (t != 0) c=0;
148*4fcf65c5Sdjm 			if (--dl <= 0) break;
149*4fcf65c5Sdjm 
150*4fcf65c5Sdjm 			t = a[3];
151*4fcf65c5Sdjm 			r[3] = (t-c)&BN_MASK2;
152*4fcf65c5Sdjm 			if (t != 0) c=0;
153*4fcf65c5Sdjm 			if (--dl <= 0) break;
154*4fcf65c5Sdjm 
155*4fcf65c5Sdjm 			save_dl = dl;
156*4fcf65c5Sdjm 			a += 4;
157*4fcf65c5Sdjm 			r += 4;
158*4fcf65c5Sdjm 			}
159*4fcf65c5Sdjm 		if (dl > 0)
160*4fcf65c5Sdjm 			{
161*4fcf65c5Sdjm #ifdef BN_COUNT
162*4fcf65c5Sdjm 			fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
163*4fcf65c5Sdjm #endif
164*4fcf65c5Sdjm 			if (save_dl > dl)
165*4fcf65c5Sdjm 				{
166*4fcf65c5Sdjm 				switch (save_dl - dl)
167*4fcf65c5Sdjm 					{
168*4fcf65c5Sdjm 				case 1:
169*4fcf65c5Sdjm 					r[1] = a[1];
170*4fcf65c5Sdjm 					if (--dl <= 0) break;
171*4fcf65c5Sdjm 				case 2:
172*4fcf65c5Sdjm 					r[2] = a[2];
173*4fcf65c5Sdjm 					if (--dl <= 0) break;
174*4fcf65c5Sdjm 				case 3:
175*4fcf65c5Sdjm 					r[3] = a[3];
176*4fcf65c5Sdjm 					if (--dl <= 0) break;
177*4fcf65c5Sdjm 					}
178*4fcf65c5Sdjm 				a += 4;
179*4fcf65c5Sdjm 				r += 4;
180*4fcf65c5Sdjm 				}
181*4fcf65c5Sdjm 			}
182*4fcf65c5Sdjm 		if (dl > 0)
183*4fcf65c5Sdjm 			{
184*4fcf65c5Sdjm #ifdef BN_COUNT
185*4fcf65c5Sdjm 			fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, copy)\n", cl, dl);
186*4fcf65c5Sdjm #endif
187*4fcf65c5Sdjm 			for(;;)
188*4fcf65c5Sdjm 				{
189*4fcf65c5Sdjm 				r[0] = a[0];
190*4fcf65c5Sdjm 				if (--dl <= 0) break;
191*4fcf65c5Sdjm 				r[1] = a[1];
192*4fcf65c5Sdjm 				if (--dl <= 0) break;
193*4fcf65c5Sdjm 				r[2] = a[2];
194*4fcf65c5Sdjm 				if (--dl <= 0) break;
195*4fcf65c5Sdjm 				r[3] = a[3];
196*4fcf65c5Sdjm 				if (--dl <= 0) break;
197*4fcf65c5Sdjm 
198*4fcf65c5Sdjm 				a += 4;
199*4fcf65c5Sdjm 				r += 4;
200*4fcf65c5Sdjm 				}
201*4fcf65c5Sdjm 			}
202*4fcf65c5Sdjm 		}
203*4fcf65c5Sdjm 	return c;
204*4fcf65c5Sdjm 	}
205*4fcf65c5Sdjm #endif
206*4fcf65c5Sdjm 
207*4fcf65c5Sdjm BN_ULONG bn_add_part_words(BN_ULONG *r,
208*4fcf65c5Sdjm 	const BN_ULONG *a, const BN_ULONG *b,
209*4fcf65c5Sdjm 	int cl, int dl)
210*4fcf65c5Sdjm 	{
211*4fcf65c5Sdjm 	BN_ULONG c, l, t;
212*4fcf65c5Sdjm 
213*4fcf65c5Sdjm 	assert(cl >= 0);
214*4fcf65c5Sdjm 	c = bn_add_words(r, a, b, cl);
215*4fcf65c5Sdjm 
216*4fcf65c5Sdjm 	if (dl == 0)
217*4fcf65c5Sdjm 		return c;
218*4fcf65c5Sdjm 
219*4fcf65c5Sdjm 	r += cl;
220*4fcf65c5Sdjm 	a += cl;
221*4fcf65c5Sdjm 	b += cl;
222*4fcf65c5Sdjm 
223*4fcf65c5Sdjm 	if (dl < 0)
224*4fcf65c5Sdjm 		{
225*4fcf65c5Sdjm 		int save_dl = dl;
226*4fcf65c5Sdjm #ifdef BN_COUNT
227*4fcf65c5Sdjm 		fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
228*4fcf65c5Sdjm #endif
229*4fcf65c5Sdjm 		while (c)
230*4fcf65c5Sdjm 			{
231*4fcf65c5Sdjm 			l=(c+b[0])&BN_MASK2;
232*4fcf65c5Sdjm 			c=(l < c);
233*4fcf65c5Sdjm 			r[0]=l;
234*4fcf65c5Sdjm 			if (++dl >= 0) break;
235*4fcf65c5Sdjm 
236*4fcf65c5Sdjm 			l=(c+b[1])&BN_MASK2;
237*4fcf65c5Sdjm 			c=(l < c);
238*4fcf65c5Sdjm 			r[1]=l;
239*4fcf65c5Sdjm 			if (++dl >= 0) break;
240*4fcf65c5Sdjm 
241*4fcf65c5Sdjm 			l=(c+b[2])&BN_MASK2;
242*4fcf65c5Sdjm 			c=(l < c);
243*4fcf65c5Sdjm 			r[2]=l;
244*4fcf65c5Sdjm 			if (++dl >= 0) break;
245*4fcf65c5Sdjm 
246*4fcf65c5Sdjm 			l=(c+b[3])&BN_MASK2;
247*4fcf65c5Sdjm 			c=(l < c);
248*4fcf65c5Sdjm 			r[3]=l;
249*4fcf65c5Sdjm 			if (++dl >= 0) break;
250*4fcf65c5Sdjm 
251*4fcf65c5Sdjm 			save_dl = dl;
252*4fcf65c5Sdjm 			b+=4;
253*4fcf65c5Sdjm 			r+=4;
254*4fcf65c5Sdjm 			}
255*4fcf65c5Sdjm 		if (dl < 0)
256*4fcf65c5Sdjm 			{
257*4fcf65c5Sdjm #ifdef BN_COUNT
258*4fcf65c5Sdjm 			fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c == 0)\n", cl, dl);
259*4fcf65c5Sdjm #endif
260*4fcf65c5Sdjm 			if (save_dl < dl)
261*4fcf65c5Sdjm 				{
262*4fcf65c5Sdjm 				switch (dl - save_dl)
263*4fcf65c5Sdjm 					{
264*4fcf65c5Sdjm 				case 1:
265*4fcf65c5Sdjm 					r[1] = b[1];
266*4fcf65c5Sdjm 					if (++dl >= 0) break;
267*4fcf65c5Sdjm 				case 2:
268*4fcf65c5Sdjm 					r[2] = b[2];
269*4fcf65c5Sdjm 					if (++dl >= 0) break;
270*4fcf65c5Sdjm 				case 3:
271*4fcf65c5Sdjm 					r[3] = b[3];
272*4fcf65c5Sdjm 					if (++dl >= 0) break;
273*4fcf65c5Sdjm 					}
274*4fcf65c5Sdjm 				b += 4;
275*4fcf65c5Sdjm 				r += 4;
276*4fcf65c5Sdjm 				}
277*4fcf65c5Sdjm 			}
278*4fcf65c5Sdjm 		if (dl < 0)
279*4fcf65c5Sdjm 			{
280*4fcf65c5Sdjm #ifdef BN_COUNT
281*4fcf65c5Sdjm 			fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, copy)\n", cl, dl);
282*4fcf65c5Sdjm #endif
283*4fcf65c5Sdjm 			for(;;)
284*4fcf65c5Sdjm 				{
285*4fcf65c5Sdjm 				r[0] = b[0];
286*4fcf65c5Sdjm 				if (++dl >= 0) break;
287*4fcf65c5Sdjm 				r[1] = b[1];
288*4fcf65c5Sdjm 				if (++dl >= 0) break;
289*4fcf65c5Sdjm 				r[2] = b[2];
290*4fcf65c5Sdjm 				if (++dl >= 0) break;
291*4fcf65c5Sdjm 				r[3] = b[3];
292*4fcf65c5Sdjm 				if (++dl >= 0) break;
293*4fcf65c5Sdjm 
294*4fcf65c5Sdjm 				b += 4;
295*4fcf65c5Sdjm 				r += 4;
296*4fcf65c5Sdjm 				}
297*4fcf65c5Sdjm 			}
298*4fcf65c5Sdjm 		}
299*4fcf65c5Sdjm 	else
300*4fcf65c5Sdjm 		{
301*4fcf65c5Sdjm 		int save_dl = dl;
302*4fcf65c5Sdjm #ifdef BN_COUNT
303*4fcf65c5Sdjm 		fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
304*4fcf65c5Sdjm #endif
305*4fcf65c5Sdjm 		while (c)
306*4fcf65c5Sdjm 			{
307*4fcf65c5Sdjm 			t=(a[0]+c)&BN_MASK2;
308*4fcf65c5Sdjm 			c=(t < c);
309*4fcf65c5Sdjm 			r[0]=t;
310*4fcf65c5Sdjm 			if (--dl <= 0) break;
311*4fcf65c5Sdjm 
312*4fcf65c5Sdjm 			t=(a[1]+c)&BN_MASK2;
313*4fcf65c5Sdjm 			c=(t < c);
314*4fcf65c5Sdjm 			r[1]=t;
315*4fcf65c5Sdjm 			if (--dl <= 0) break;
316*4fcf65c5Sdjm 
317*4fcf65c5Sdjm 			t=(a[2]+c)&BN_MASK2;
318*4fcf65c5Sdjm 			c=(t < c);
319*4fcf65c5Sdjm 			r[2]=t;
320*4fcf65c5Sdjm 			if (--dl <= 0) break;
321*4fcf65c5Sdjm 
322*4fcf65c5Sdjm 			t=(a[3]+c)&BN_MASK2;
323*4fcf65c5Sdjm 			c=(t < c);
324*4fcf65c5Sdjm 			r[3]=t;
325*4fcf65c5Sdjm 			if (--dl <= 0) break;
326*4fcf65c5Sdjm 
327*4fcf65c5Sdjm 			save_dl = dl;
328*4fcf65c5Sdjm 			a+=4;
329*4fcf65c5Sdjm 			r+=4;
330*4fcf65c5Sdjm 			}
331*4fcf65c5Sdjm #ifdef BN_COUNT
332*4fcf65c5Sdjm 		fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
333*4fcf65c5Sdjm #endif
334*4fcf65c5Sdjm 		if (dl > 0)
335*4fcf65c5Sdjm 			{
336*4fcf65c5Sdjm 			if (save_dl > dl)
337*4fcf65c5Sdjm 				{
338*4fcf65c5Sdjm 				switch (save_dl - dl)
339*4fcf65c5Sdjm 					{
340*4fcf65c5Sdjm 				case 1:
341*4fcf65c5Sdjm 					r[1] = a[1];
342*4fcf65c5Sdjm 					if (--dl <= 0) break;
343*4fcf65c5Sdjm 				case 2:
344*4fcf65c5Sdjm 					r[2] = a[2];
345*4fcf65c5Sdjm 					if (--dl <= 0) break;
346*4fcf65c5Sdjm 				case 3:
347*4fcf65c5Sdjm 					r[3] = a[3];
348*4fcf65c5Sdjm 					if (--dl <= 0) break;
349*4fcf65c5Sdjm 					}
350*4fcf65c5Sdjm 				a += 4;
351*4fcf65c5Sdjm 				r += 4;
352*4fcf65c5Sdjm 				}
353*4fcf65c5Sdjm 			}
354*4fcf65c5Sdjm 		if (dl > 0)
355*4fcf65c5Sdjm 			{
356*4fcf65c5Sdjm #ifdef BN_COUNT
357*4fcf65c5Sdjm 			fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, copy)\n", cl, dl);
358*4fcf65c5Sdjm #endif
359*4fcf65c5Sdjm 			for(;;)
360*4fcf65c5Sdjm 				{
361*4fcf65c5Sdjm 				r[0] = a[0];
362*4fcf65c5Sdjm 				if (--dl <= 0) break;
363*4fcf65c5Sdjm 				r[1] = a[1];
364*4fcf65c5Sdjm 				if (--dl <= 0) break;
365*4fcf65c5Sdjm 				r[2] = a[2];
366*4fcf65c5Sdjm 				if (--dl <= 0) break;
367*4fcf65c5Sdjm 				r[3] = a[3];
368*4fcf65c5Sdjm 				if (--dl <= 0) break;
369*4fcf65c5Sdjm 
370*4fcf65c5Sdjm 				a += 4;
371*4fcf65c5Sdjm 				r += 4;
372*4fcf65c5Sdjm 				}
373*4fcf65c5Sdjm 			}
374*4fcf65c5Sdjm 		}
375*4fcf65c5Sdjm 	return c;
376*4fcf65c5Sdjm 	}
377*4fcf65c5Sdjm 
378913ec974Sbeck #ifdef BN_RECURSION
379f6e3f262Sbeck /* Karatsuba recursive multiplication algorithm
380f6e3f262Sbeck  * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
381f6e3f262Sbeck 
382913ec974Sbeck /* r is 2*n2 words in size,
383913ec974Sbeck  * a and b are both n2 words in size.
384913ec974Sbeck  * n2 must be a power of 2.
385913ec974Sbeck  * We multiply and return the result.
386913ec974Sbeck  * t must be 2*n2 words in size
387ba5406e9Sbeck  * We calculate
388913ec974Sbeck  * a[0]*b[0]
389913ec974Sbeck  * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
390913ec974Sbeck  * a[1]*b[1]
391913ec974Sbeck  */
392*4fcf65c5Sdjm /* dnX may not be positive, but n2/2+dnX has to be */
393913ec974Sbeck void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
394*4fcf65c5Sdjm 	int dna, int dnb, BN_ULONG *t)
3955b37fcf3Sryker 	{
396913ec974Sbeck 	int n=n2/2,c1,c2;
397*4fcf65c5Sdjm 	int tna=n+dna, tnb=n+dnb;
398913ec974Sbeck 	unsigned int neg,zero;
399913ec974Sbeck 	BN_ULONG ln,lo,*p;
4005b37fcf3Sryker 
401913ec974Sbeck # ifdef BN_COUNT
402*4fcf65c5Sdjm 	fprintf(stderr," bn_mul_recursive %d%+d * %d%+d\n",n2,dna,n2,dnb);
403913ec974Sbeck # endif
404913ec974Sbeck # ifdef BN_MUL_COMBA
405ba5406e9Sbeck #  if 0
406ba5406e9Sbeck 	if (n2 == 4)
4075b37fcf3Sryker 		{
408913ec974Sbeck 		bn_mul_comba4(r,a,b);
409913ec974Sbeck 		return;
410913ec974Sbeck 		}
411ba5406e9Sbeck #  endif
412*4fcf65c5Sdjm 	/* Only call bn_mul_comba 8 if n2 == 8 and the
413*4fcf65c5Sdjm 	 * two arrays are complete [steve]
414*4fcf65c5Sdjm 	 */
415*4fcf65c5Sdjm 	if (n2 == 8 && dna == 0 && dnb == 0)
416913ec974Sbeck 		{
417913ec974Sbeck 		bn_mul_comba8(r,a,b);
418913ec974Sbeck 		return;
419913ec974Sbeck 		}
420ba5406e9Sbeck # endif /* BN_MUL_COMBA */
421*4fcf65c5Sdjm 	/* Else do normal multiply */
422913ec974Sbeck 	if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
423913ec974Sbeck 		{
424*4fcf65c5Sdjm 		bn_mul_normal(r,a,n2+dna,b,n2+dnb);
425*4fcf65c5Sdjm 		if ((dna + dnb) < 0)
426*4fcf65c5Sdjm 			memset(&r[2*n2 + dna + dnb], 0,
427*4fcf65c5Sdjm 				sizeof(BN_ULONG) * -(dna + dnb));
428913ec974Sbeck 		return;
429913ec974Sbeck 		}
430913ec974Sbeck 	/* r=(a[0]-a[1])*(b[1]-b[0]) */
431*4fcf65c5Sdjm 	c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
432*4fcf65c5Sdjm 	c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
433913ec974Sbeck 	zero=neg=0;
434913ec974Sbeck 	switch (c1*3+c2)
435913ec974Sbeck 		{
436913ec974Sbeck 	case -4:
437*4fcf65c5Sdjm 		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
438*4fcf65c5Sdjm 		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
439913ec974Sbeck 		break;
440913ec974Sbeck 	case -3:
441913ec974Sbeck 		zero=1;
442913ec974Sbeck 		break;
443913ec974Sbeck 	case -2:
444*4fcf65c5Sdjm 		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
445*4fcf65c5Sdjm 		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
446913ec974Sbeck 		neg=1;
447913ec974Sbeck 		break;
448913ec974Sbeck 	case -1:
449913ec974Sbeck 	case 0:
450913ec974Sbeck 	case 1:
451913ec974Sbeck 		zero=1;
452913ec974Sbeck 		break;
453913ec974Sbeck 	case 2:
454*4fcf65c5Sdjm 		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
455*4fcf65c5Sdjm 		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
456913ec974Sbeck 		neg=1;
457913ec974Sbeck 		break;
458913ec974Sbeck 	case 3:
459913ec974Sbeck 		zero=1;
460913ec974Sbeck 		break;
461913ec974Sbeck 	case 4:
462*4fcf65c5Sdjm 		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
463*4fcf65c5Sdjm 		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
464913ec974Sbeck 		break;
4655b37fcf3Sryker 		}
4665b37fcf3Sryker 
467913ec974Sbeck # ifdef BN_MUL_COMBA
468*4fcf65c5Sdjm 	if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
469*4fcf65c5Sdjm 					       extra args to do this well */
4705b37fcf3Sryker 		{
471913ec974Sbeck 		if (!zero)
472913ec974Sbeck 			bn_mul_comba4(&(t[n2]),t,&(t[n]));
473913ec974Sbeck 		else
474913ec974Sbeck 			memset(&(t[n2]),0,8*sizeof(BN_ULONG));
475913ec974Sbeck 
476913ec974Sbeck 		bn_mul_comba4(r,a,b);
477913ec974Sbeck 		bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
4785b37fcf3Sryker 		}
479*4fcf65c5Sdjm 	else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
480*4fcf65c5Sdjm 						    take extra args to do this
481*4fcf65c5Sdjm 						    well */
482913ec974Sbeck 		{
483913ec974Sbeck 		if (!zero)
484913ec974Sbeck 			bn_mul_comba8(&(t[n2]),t,&(t[n]));
485913ec974Sbeck 		else
486913ec974Sbeck 			memset(&(t[n2]),0,16*sizeof(BN_ULONG));
487913ec974Sbeck 
488913ec974Sbeck 		bn_mul_comba8(r,a,b);
489913ec974Sbeck 		bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
490913ec974Sbeck 		}
491913ec974Sbeck 	else
492ba5406e9Sbeck # endif /* BN_MUL_COMBA */
493913ec974Sbeck 		{
494913ec974Sbeck 		p= &(t[n2*2]);
495913ec974Sbeck 		if (!zero)
496*4fcf65c5Sdjm 			bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
497913ec974Sbeck 		else
498913ec974Sbeck 			memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
499*4fcf65c5Sdjm 		bn_mul_recursive(r,a,b,n,0,0,p);
500*4fcf65c5Sdjm 		bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p);
5015b37fcf3Sryker 		}
5025b37fcf3Sryker 
503913ec974Sbeck 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
504913ec974Sbeck 	 * r[10] holds (a[0]*b[0])
505913ec974Sbeck 	 * r[32] holds (b[1]*b[1])
506913ec974Sbeck 	 */
5075b37fcf3Sryker 
508913ec974Sbeck 	c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
5095b37fcf3Sryker 
510913ec974Sbeck 	if (neg) /* if t[32] is negative */
5115b37fcf3Sryker 		{
512913ec974Sbeck 		c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
5135b37fcf3Sryker 		}
5145b37fcf3Sryker 	else
5155b37fcf3Sryker 		{
516913ec974Sbeck 		/* Might have a carry */
517913ec974Sbeck 		c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
5185b37fcf3Sryker 		}
5195b37fcf3Sryker 
520913ec974Sbeck 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
521913ec974Sbeck 	 * r[10] holds (a[0]*b[0])
522913ec974Sbeck 	 * r[32] holds (b[1]*b[1])
523913ec974Sbeck 	 * c1 holds the carry bits
524913ec974Sbeck 	 */
525913ec974Sbeck 	c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
526913ec974Sbeck 	if (c1)
5275b37fcf3Sryker 		{
528913ec974Sbeck 		p= &(r[n+n2]);
529913ec974Sbeck 		lo= *p;
530913ec974Sbeck 		ln=(lo+c1)&BN_MASK2;
531913ec974Sbeck 		*p=ln;
532913ec974Sbeck 
533913ec974Sbeck 		/* The overflow will stop before we over write
534913ec974Sbeck 		 * words we should not overwrite */
535913ec974Sbeck 		if (ln < (BN_ULONG)c1)
536913ec974Sbeck 			{
537913ec974Sbeck 			do	{
538913ec974Sbeck 				p++;
539913ec974Sbeck 				lo= *p;
540913ec974Sbeck 				ln=(lo+1)&BN_MASK2;
541913ec974Sbeck 				*p=ln;
542913ec974Sbeck 				} while (ln == 0);
543913ec974Sbeck 			}
544913ec974Sbeck 		}
5455b37fcf3Sryker 	}
5465b37fcf3Sryker 
547913ec974Sbeck /* n+tn is the word length
548913ec974Sbeck  * t needs to be n*4 is size, as does r */
549*4fcf65c5Sdjm /* tnX may not be negative but less than n */
550*4fcf65c5Sdjm void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
551*4fcf65c5Sdjm 	     int tna, int tnb, BN_ULONG *t)
5525b37fcf3Sryker 	{
553913ec974Sbeck 	int i,j,n2=n*2;
554cdc51833Smarkus 	int c1,c2,neg,zero;
555913ec974Sbeck 	BN_ULONG ln,lo,*p;
5565b37fcf3Sryker 
557913ec974Sbeck # ifdef BN_COUNT
558*4fcf65c5Sdjm 	fprintf(stderr," bn_mul_part_recursive (%d%+d) * (%d%+d)\n",
559*4fcf65c5Sdjm 		n, tna, n, tnb);
560913ec974Sbeck # endif
561913ec974Sbeck 	if (n < 8)
5625b37fcf3Sryker 		{
563*4fcf65c5Sdjm 		bn_mul_normal(r,a,n+tna,b,n+tnb);
564913ec974Sbeck 		return;
5655b37fcf3Sryker 		}
5665b37fcf3Sryker 
567913ec974Sbeck 	/* r=(a[0]-a[1])*(b[1]-b[0]) */
568*4fcf65c5Sdjm 	c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
569*4fcf65c5Sdjm 	c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
570ba5406e9Sbeck 	zero=neg=0;
571ba5406e9Sbeck 	switch (c1*3+c2)
572ba5406e9Sbeck 		{
573ba5406e9Sbeck 	case -4:
574*4fcf65c5Sdjm 		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
575*4fcf65c5Sdjm 		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
576ba5406e9Sbeck 		break;
577ba5406e9Sbeck 	case -3:
578ba5406e9Sbeck 		zero=1;
579ba5406e9Sbeck 		/* break; */
580ba5406e9Sbeck 	case -2:
581*4fcf65c5Sdjm 		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
582*4fcf65c5Sdjm 		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
583ba5406e9Sbeck 		neg=1;
584ba5406e9Sbeck 		break;
585ba5406e9Sbeck 	case -1:
586ba5406e9Sbeck 	case 0:
587ba5406e9Sbeck 	case 1:
588ba5406e9Sbeck 		zero=1;
589ba5406e9Sbeck 		/* break; */
590ba5406e9Sbeck 	case 2:
591*4fcf65c5Sdjm 		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
592*4fcf65c5Sdjm 		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
593ba5406e9Sbeck 		neg=1;
594ba5406e9Sbeck 		break;
595ba5406e9Sbeck 	case 3:
596ba5406e9Sbeck 		zero=1;
597ba5406e9Sbeck 		/* break; */
598ba5406e9Sbeck 	case 4:
599*4fcf65c5Sdjm 		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
600*4fcf65c5Sdjm 		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
601ba5406e9Sbeck 		break;
602ba5406e9Sbeck 		}
603ba5406e9Sbeck 		/* The zero case isn't yet implemented here. The speedup
604ba5406e9Sbeck 		   would probably be negligible. */
605ba5406e9Sbeck # if 0
606ba5406e9Sbeck 	if (n == 4)
6075b37fcf3Sryker 		{
608913ec974Sbeck 		bn_mul_comba4(&(t[n2]),t,&(t[n]));
609913ec974Sbeck 		bn_mul_comba4(r,a,b);
610913ec974Sbeck 		bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
611913ec974Sbeck 		memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
612913ec974Sbeck 		}
613ba5406e9Sbeck 	else
614ba5406e9Sbeck # endif
615ba5406e9Sbeck 	if (n == 8)
616913ec974Sbeck 		{
617913ec974Sbeck 		bn_mul_comba8(&(t[n2]),t,&(t[n]));
618913ec974Sbeck 		bn_mul_comba8(r,a,b);
619*4fcf65c5Sdjm 		bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
620*4fcf65c5Sdjm 		memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb));
621913ec974Sbeck 		}
622913ec974Sbeck 	else
623913ec974Sbeck 		{
624913ec974Sbeck 		p= &(t[n2*2]);
625*4fcf65c5Sdjm 		bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
626*4fcf65c5Sdjm 		bn_mul_recursive(r,a,b,n,0,0,p);
627913ec974Sbeck 		i=n/2;
628913ec974Sbeck 		/* If there is only a bottom half to the number,
629913ec974Sbeck 		 * just do it */
630*4fcf65c5Sdjm 		if (tna > tnb)
631*4fcf65c5Sdjm 			j = tna - i;
632*4fcf65c5Sdjm 		else
633*4fcf65c5Sdjm 			j = tnb - i;
634913ec974Sbeck 		if (j == 0)
635913ec974Sbeck 			{
636*4fcf65c5Sdjm 			bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),
637*4fcf65c5Sdjm 				i,tna-i,tnb-i,p);
638913ec974Sbeck 			memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
639913ec974Sbeck 			}
640913ec974Sbeck 		else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
641913ec974Sbeck 				{
642913ec974Sbeck 				bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
643*4fcf65c5Sdjm 					i,tna-i,tnb-i,p);
644*4fcf65c5Sdjm 				memset(&(r[n2+tna+tnb]),0,
645*4fcf65c5Sdjm 					sizeof(BN_ULONG)*(n2-tna-tnb));
646913ec974Sbeck 				}
647913ec974Sbeck 		else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
648913ec974Sbeck 			{
649913ec974Sbeck 			memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
650*4fcf65c5Sdjm 			if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
651*4fcf65c5Sdjm 				&& tnb < BN_MUL_RECURSIVE_SIZE_NORMAL)
652913ec974Sbeck 				{
653*4fcf65c5Sdjm 				bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
654913ec974Sbeck 				}
655913ec974Sbeck 			else
656913ec974Sbeck 				{
657913ec974Sbeck 				for (;;)
658913ec974Sbeck 					{
659913ec974Sbeck 					i/=2;
660*4fcf65c5Sdjm 					/* these simplified conditions work
661*4fcf65c5Sdjm 					 * exclusively because difference
662*4fcf65c5Sdjm 					 * between tna and tnb is 1 or 0 */
663*4fcf65c5Sdjm 					if (i < tna || i < tnb)
664913ec974Sbeck 						{
665913ec974Sbeck 						bn_mul_part_recursive(&(r[n2]),
666913ec974Sbeck 							&(a[n]),&(b[n]),
667*4fcf65c5Sdjm 							i,tna-i,tnb-i,p);
668913ec974Sbeck 						break;
669913ec974Sbeck 						}
670*4fcf65c5Sdjm 					else if (i == tna || i == tnb)
671913ec974Sbeck 						{
672913ec974Sbeck 						bn_mul_recursive(&(r[n2]),
673913ec974Sbeck 							&(a[n]),&(b[n]),
674*4fcf65c5Sdjm 							i,tna-i,tnb-i,p);
675913ec974Sbeck 						break;
676913ec974Sbeck 						}
677913ec974Sbeck 					}
678913ec974Sbeck 				}
679913ec974Sbeck 			}
6805b37fcf3Sryker 		}
6815b37fcf3Sryker 
682913ec974Sbeck 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
683913ec974Sbeck 	 * r[10] holds (a[0]*b[0])
684913ec974Sbeck 	 * r[32] holds (b[1]*b[1])
685913ec974Sbeck 	 */
6865b37fcf3Sryker 
687913ec974Sbeck 	c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
688ba5406e9Sbeck 
689ba5406e9Sbeck 	if (neg) /* if t[32] is negative */
690ba5406e9Sbeck 		{
691913ec974Sbeck 		c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
692ba5406e9Sbeck 		}
693ba5406e9Sbeck 	else
694ba5406e9Sbeck 		{
695ba5406e9Sbeck 		/* Might have a carry */
696ba5406e9Sbeck 		c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
697ba5406e9Sbeck 		}
6985b37fcf3Sryker 
699913ec974Sbeck 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
700913ec974Sbeck 	 * r[10] holds (a[0]*b[0])
701913ec974Sbeck 	 * r[32] holds (b[1]*b[1])
702913ec974Sbeck 	 * c1 holds the carry bits
703913ec974Sbeck 	 */
704913ec974Sbeck 	c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
705913ec974Sbeck 	if (c1)
706913ec974Sbeck 		{
707913ec974Sbeck 		p= &(r[n+n2]);
708913ec974Sbeck 		lo= *p;
709913ec974Sbeck 		ln=(lo+c1)&BN_MASK2;
710913ec974Sbeck 		*p=ln;
7115b37fcf3Sryker 
712913ec974Sbeck 		/* The overflow will stop before we over write
713913ec974Sbeck 		 * words we should not overwrite */
714cdc51833Smarkus 		if (ln < (BN_ULONG)c1)
715913ec974Sbeck 			{
716913ec974Sbeck 			do	{
717913ec974Sbeck 				p++;
718913ec974Sbeck 				lo= *p;
719913ec974Sbeck 				ln=(lo+1)&BN_MASK2;
720913ec974Sbeck 				*p=ln;
721913ec974Sbeck 				} while (ln == 0);
722913ec974Sbeck 			}
723913ec974Sbeck 		}
724913ec974Sbeck 	}
7255b37fcf3Sryker 
726913ec974Sbeck /* a and b must be the same size, which is n2.
727913ec974Sbeck  * r needs to be n2 words and t needs to be n2*2
728913ec974Sbeck  */
729913ec974Sbeck void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
730913ec974Sbeck 	     BN_ULONG *t)
731913ec974Sbeck 	{
732913ec974Sbeck 	int n=n2/2;
7335b37fcf3Sryker 
734913ec974Sbeck # ifdef BN_COUNT
735*4fcf65c5Sdjm 	fprintf(stderr," bn_mul_low_recursive %d * %d\n",n2,n2);
736913ec974Sbeck # endif
7375b37fcf3Sryker 
738*4fcf65c5Sdjm 	bn_mul_recursive(r,a,b,n,0,0,&(t[0]));
739913ec974Sbeck 	if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
740913ec974Sbeck 		{
741913ec974Sbeck 		bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
742913ec974Sbeck 		bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
743913ec974Sbeck 		bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
744913ec974Sbeck 		bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
745913ec974Sbeck 		}
746913ec974Sbeck 	else
747913ec974Sbeck 		{
748913ec974Sbeck 		bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
749913ec974Sbeck 		bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
750913ec974Sbeck 		bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
751913ec974Sbeck 		bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
752913ec974Sbeck 		}
753913ec974Sbeck 	}
7545b37fcf3Sryker 
755913ec974Sbeck /* a and b must be the same size, which is n2.
756913ec974Sbeck  * r needs to be n2 words and t needs to be n2*2
757913ec974Sbeck  * l is the low words of the output.
758913ec974Sbeck  * t needs to be n2*3
759913ec974Sbeck  */
760913ec974Sbeck void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
761913ec974Sbeck 	     BN_ULONG *t)
762913ec974Sbeck 	{
763913ec974Sbeck 	int i,n;
764913ec974Sbeck 	int c1,c2;
765913ec974Sbeck 	int neg,oneg,zero;
766913ec974Sbeck 	BN_ULONG ll,lc,*lp,*mp;
767913ec974Sbeck 
768913ec974Sbeck # ifdef BN_COUNT
769*4fcf65c5Sdjm 	fprintf(stderr," bn_mul_high %d * %d\n",n2,n2);
770913ec974Sbeck # endif
771913ec974Sbeck 	n=n2/2;
772913ec974Sbeck 
773913ec974Sbeck 	/* Calculate (al-ah)*(bh-bl) */
774913ec974Sbeck 	neg=zero=0;
775913ec974Sbeck 	c1=bn_cmp_words(&(a[0]),&(a[n]),n);
776913ec974Sbeck 	c2=bn_cmp_words(&(b[n]),&(b[0]),n);
777913ec974Sbeck 	switch (c1*3+c2)
778913ec974Sbeck 		{
779913ec974Sbeck 	case -4:
780913ec974Sbeck 		bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
781913ec974Sbeck 		bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
782913ec974Sbeck 		break;
783913ec974Sbeck 	case -3:
784913ec974Sbeck 		zero=1;
785913ec974Sbeck 		break;
786913ec974Sbeck 	case -2:
787913ec974Sbeck 		bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
788913ec974Sbeck 		bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
789913ec974Sbeck 		neg=1;
790913ec974Sbeck 		break;
791913ec974Sbeck 	case -1:
792913ec974Sbeck 	case 0:
793913ec974Sbeck 	case 1:
794913ec974Sbeck 		zero=1;
795913ec974Sbeck 		break;
796913ec974Sbeck 	case 2:
797913ec974Sbeck 		bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
798913ec974Sbeck 		bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
799913ec974Sbeck 		neg=1;
800913ec974Sbeck 		break;
801913ec974Sbeck 	case 3:
802913ec974Sbeck 		zero=1;
803913ec974Sbeck 		break;
804913ec974Sbeck 	case 4:
805913ec974Sbeck 		bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
806913ec974Sbeck 		bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
807913ec974Sbeck 		break;
808913ec974Sbeck 		}
809913ec974Sbeck 
810913ec974Sbeck 	oneg=neg;
811913ec974Sbeck 	/* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
812913ec974Sbeck 	/* r[10] = (a[1]*b[1]) */
813913ec974Sbeck # ifdef BN_MUL_COMBA
814913ec974Sbeck 	if (n == 8)
815913ec974Sbeck 		{
816913ec974Sbeck 		bn_mul_comba8(&(t[0]),&(r[0]),&(r[n]));
817913ec974Sbeck 		bn_mul_comba8(r,&(a[n]),&(b[n]));
818913ec974Sbeck 		}
819913ec974Sbeck 	else
820913ec974Sbeck # endif
821913ec974Sbeck 		{
822*4fcf65c5Sdjm 		bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2]));
823*4fcf65c5Sdjm 		bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2]));
824913ec974Sbeck 		}
825913ec974Sbeck 
826913ec974Sbeck 	/* s0 == low(al*bl)
827913ec974Sbeck 	 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
828913ec974Sbeck 	 * We know s0 and s1 so the only unknown is high(al*bl)
829913ec974Sbeck 	 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
830913ec974Sbeck 	 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
831913ec974Sbeck 	 */
832913ec974Sbeck 	if (l != NULL)
833913ec974Sbeck 		{
834913ec974Sbeck 		lp= &(t[n2+n]);
835913ec974Sbeck 		c1=(int)(bn_add_words(lp,&(r[0]),&(l[0]),n));
836913ec974Sbeck 		}
837913ec974Sbeck 	else
838913ec974Sbeck 		{
839913ec974Sbeck 		c1=0;
840913ec974Sbeck 		lp= &(r[0]);
841913ec974Sbeck 		}
842913ec974Sbeck 
843913ec974Sbeck 	if (neg)
844913ec974Sbeck 		neg=(int)(bn_sub_words(&(t[n2]),lp,&(t[0]),n));
845913ec974Sbeck 	else
846913ec974Sbeck 		{
847913ec974Sbeck 		bn_add_words(&(t[n2]),lp,&(t[0]),n);
848913ec974Sbeck 		neg=0;
849913ec974Sbeck 		}
850913ec974Sbeck 
851913ec974Sbeck 	if (l != NULL)
852913ec974Sbeck 		{
853913ec974Sbeck 		bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
854913ec974Sbeck 		}
855913ec974Sbeck 	else
856913ec974Sbeck 		{
857913ec974Sbeck 		lp= &(t[n2+n]);
858913ec974Sbeck 		mp= &(t[n2]);
859913ec974Sbeck 		for (i=0; i<n; i++)
860913ec974Sbeck 			lp[i]=((~mp[i])+1)&BN_MASK2;
861913ec974Sbeck 		}
862913ec974Sbeck 
863913ec974Sbeck 	/* s[0] = low(al*bl)
864913ec974Sbeck 	 * t[3] = high(al*bl)
865913ec974Sbeck 	 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
866913ec974Sbeck 	 * r[10] = (a[1]*b[1])
867913ec974Sbeck 	 */
868913ec974Sbeck 	/* R[10] = al*bl
869913ec974Sbeck 	 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
870913ec974Sbeck 	 * R[32] = ah*bh
871913ec974Sbeck 	 */
872913ec974Sbeck 	/* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
873913ec974Sbeck 	 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
874913ec974Sbeck 	 * R[3]=r[1]+(carry/borrow)
875913ec974Sbeck 	 */
876913ec974Sbeck 	if (l != NULL)
877913ec974Sbeck 		{
878913ec974Sbeck 		lp= &(t[n2]);
879913ec974Sbeck 		c1= (int)(bn_add_words(lp,&(t[n2+n]),&(l[0]),n));
880913ec974Sbeck 		}
881913ec974Sbeck 	else
882913ec974Sbeck 		{
883913ec974Sbeck 		lp= &(t[n2+n]);
884913ec974Sbeck 		c1=0;
885913ec974Sbeck 		}
886913ec974Sbeck 	c1+=(int)(bn_add_words(&(t[n2]),lp,  &(r[0]),n));
887913ec974Sbeck 	if (oneg)
888913ec974Sbeck 		c1-=(int)(bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n));
889913ec974Sbeck 	else
890913ec974Sbeck 		c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n));
891913ec974Sbeck 
892913ec974Sbeck 	c2 =(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n));
893913ec974Sbeck 	c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(r[n]),n));
894913ec974Sbeck 	if (oneg)
895913ec974Sbeck 		c2-=(int)(bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n));
896913ec974Sbeck 	else
897913ec974Sbeck 		c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n]),n));
898913ec974Sbeck 
899913ec974Sbeck 	if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
900913ec974Sbeck 		{
901913ec974Sbeck 		i=0;
902913ec974Sbeck 		if (c1 > 0)
903913ec974Sbeck 			{
904913ec974Sbeck 			lc=c1;
905913ec974Sbeck 			do	{
906913ec974Sbeck 				ll=(r[i]+lc)&BN_MASK2;
907913ec974Sbeck 				r[i++]=ll;
908913ec974Sbeck 				lc=(lc > ll);
909913ec974Sbeck 				} while (lc);
910913ec974Sbeck 			}
911913ec974Sbeck 		else
912913ec974Sbeck 			{
913913ec974Sbeck 			lc= -c1;
914913ec974Sbeck 			do	{
915913ec974Sbeck 				ll=r[i];
916913ec974Sbeck 				r[i++]=(ll-lc)&BN_MASK2;
917913ec974Sbeck 				lc=(lc > ll);
918913ec974Sbeck 				} while (lc);
919913ec974Sbeck 			}
920913ec974Sbeck 		}
921913ec974Sbeck 	if (c2 != 0) /* Add starting at r[1] */
922913ec974Sbeck 		{
923913ec974Sbeck 		i=n;
924913ec974Sbeck 		if (c2 > 0)
925913ec974Sbeck 			{
926913ec974Sbeck 			lc=c2;
927913ec974Sbeck 			do	{
928913ec974Sbeck 				ll=(r[i]+lc)&BN_MASK2;
929913ec974Sbeck 				r[i++]=ll;
930913ec974Sbeck 				lc=(lc > ll);
931913ec974Sbeck 				} while (lc);
932913ec974Sbeck 			}
933913ec974Sbeck 		else
934913ec974Sbeck 			{
935913ec974Sbeck 			lc= -c2;
936913ec974Sbeck 			do	{
937913ec974Sbeck 				ll=r[i];
938913ec974Sbeck 				r[i++]=(ll-lc)&BN_MASK2;
939913ec974Sbeck 				lc=(lc > ll);
940913ec974Sbeck 				} while (lc);
941913ec974Sbeck 			}
942913ec974Sbeck 		}
9435b37fcf3Sryker 	}
944ba5406e9Sbeck #endif /* BN_RECURSION */
945913ec974Sbeck 
946da347917Sbeck int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
947913ec974Sbeck 	{
948*4fcf65c5Sdjm 	int ret=0;
949913ec974Sbeck 	int top,al,bl;
950913ec974Sbeck 	BIGNUM *rr;
951ba5406e9Sbeck #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
952ba5406e9Sbeck 	int i;
953ba5406e9Sbeck #endif
954913ec974Sbeck #ifdef BN_RECURSION
955*4fcf65c5Sdjm 	BIGNUM *t=NULL;
956*4fcf65c5Sdjm 	int j=0,k;
957913ec974Sbeck #endif
958913ec974Sbeck 
959913ec974Sbeck #ifdef BN_COUNT
960*4fcf65c5Sdjm 	fprintf(stderr,"BN_mul %d * %d\n",a->top,b->top);
961913ec974Sbeck #endif
962913ec974Sbeck 
963913ec974Sbeck 	bn_check_top(a);
964913ec974Sbeck 	bn_check_top(b);
965913ec974Sbeck 	bn_check_top(r);
966913ec974Sbeck 
967913ec974Sbeck 	al=a->top;
968913ec974Sbeck 	bl=b->top;
969913ec974Sbeck 
970913ec974Sbeck 	if ((al == 0) || (bl == 0))
971913ec974Sbeck 		{
972*4fcf65c5Sdjm 		BN_zero(r);
973913ec974Sbeck 		return(1);
974913ec974Sbeck 		}
975913ec974Sbeck 	top=al+bl;
976913ec974Sbeck 
977ba5406e9Sbeck 	BN_CTX_start(ctx);
978913ec974Sbeck 	if ((r == a) || (r == b))
979ba5406e9Sbeck 		{
980ba5406e9Sbeck 		if ((rr = BN_CTX_get(ctx)) == NULL) goto err;
981ba5406e9Sbeck 		}
982913ec974Sbeck 	else
983913ec974Sbeck 		rr = r;
984c109e398Sbeck 	rr->neg=a->neg^b->neg;
985913ec974Sbeck 
986913ec974Sbeck #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
987ba5406e9Sbeck 	i = al-bl;
988ba5406e9Sbeck #endif
989913ec974Sbeck #ifdef BN_MUL_COMBA
990ba5406e9Sbeck 	if (i == 0)
991913ec974Sbeck 		{
992ba5406e9Sbeck # if 0
993ba5406e9Sbeck 		if (al == 4)
994ba5406e9Sbeck 			{
995ba5406e9Sbeck 			if (bn_wexpand(rr,8) == NULL) goto err;
996913ec974Sbeck 			rr->top=8;
997913ec974Sbeck 			bn_mul_comba4(rr->d,a->d,b->d);
998913ec974Sbeck 			goto end;
999913ec974Sbeck 			}
1000ba5406e9Sbeck # endif
1001ba5406e9Sbeck 		if (al == 8)
1002913ec974Sbeck 			{
1003ba5406e9Sbeck 			if (bn_wexpand(rr,16) == NULL) goto err;
1004913ec974Sbeck 			rr->top=16;
1005913ec974Sbeck 			bn_mul_comba8(rr->d,a->d,b->d);
1006913ec974Sbeck 			goto end;
1007913ec974Sbeck 			}
1008913ec974Sbeck 		}
1009ba5406e9Sbeck #endif /* BN_MUL_COMBA */
1010913ec974Sbeck #ifdef BN_RECURSION
1011ba5406e9Sbeck 	if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
1012913ec974Sbeck 		{
1013*4fcf65c5Sdjm 		if (i >= -1 && i <= 1)
1014da347917Sbeck 			{
1015*4fcf65c5Sdjm 			int sav_j =0;
1016*4fcf65c5Sdjm 			/* Find out the power of two lower or equal
1017*4fcf65c5Sdjm 			   to the longest of the two numbers */
1018*4fcf65c5Sdjm 			if (i >= 0)
1019*4fcf65c5Sdjm 				{
1020*4fcf65c5Sdjm 				j = BN_num_bits_word((BN_ULONG)al);
1021*4fcf65c5Sdjm 				}
1022*4fcf65c5Sdjm 			if (i == -1)
1023*4fcf65c5Sdjm 				{
1024*4fcf65c5Sdjm 				j = BN_num_bits_word((BN_ULONG)bl);
1025*4fcf65c5Sdjm 				}
1026*4fcf65c5Sdjm 			sav_j = j;
1027*4fcf65c5Sdjm 			j = 1<<(j-1);
1028*4fcf65c5Sdjm 			assert(j <= al || j <= bl);
1029*4fcf65c5Sdjm 			k = j+j;
1030*4fcf65c5Sdjm 			t = BN_CTX_get(ctx);
1031*4fcf65c5Sdjm 			if (al > j || bl > j)
1032*4fcf65c5Sdjm 				{
1033*4fcf65c5Sdjm 				bn_wexpand(t,k*4);
1034*4fcf65c5Sdjm 				bn_wexpand(rr,k*4);
1035*4fcf65c5Sdjm 				bn_mul_part_recursive(rr->d,a->d,b->d,
1036*4fcf65c5Sdjm 					j,al-j,bl-j,t->d);
1037*4fcf65c5Sdjm 				}
1038*4fcf65c5Sdjm 			else	/* al <= j || bl <= j */
1039*4fcf65c5Sdjm 				{
1040*4fcf65c5Sdjm 				bn_wexpand(t,k*2);
1041*4fcf65c5Sdjm 				bn_wexpand(rr,k*2);
1042*4fcf65c5Sdjm 				bn_mul_recursive(rr->d,a->d,b->d,
1043*4fcf65c5Sdjm 					j,al-j,bl-j,t->d);
1044*4fcf65c5Sdjm 				}
1045*4fcf65c5Sdjm 			rr->top=top;
1046*4fcf65c5Sdjm 			goto end;
1047*4fcf65c5Sdjm 			}
1048*4fcf65c5Sdjm #if 0
1049*4fcf65c5Sdjm 		if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA))
1050*4fcf65c5Sdjm 			{
1051*4fcf65c5Sdjm 			BIGNUM *tmp_bn = (BIGNUM *)b;
1052*4fcf65c5Sdjm 			if (bn_wexpand(tmp_bn,al) == NULL) goto err;
1053*4fcf65c5Sdjm 			tmp_bn->d[bl]=0;
1054913ec974Sbeck 			bl++;
1055ba5406e9Sbeck 			i--;
1056913ec974Sbeck 			}
1057*4fcf65c5Sdjm 		else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA))
1058913ec974Sbeck 			{
1059*4fcf65c5Sdjm 			BIGNUM *tmp_bn = (BIGNUM *)a;
1060*4fcf65c5Sdjm 			if (bn_wexpand(tmp_bn,bl) == NULL) goto err;
1061*4fcf65c5Sdjm 			tmp_bn->d[al]=0;
1062913ec974Sbeck 			al++;
1063ba5406e9Sbeck 			i++;
1064913ec974Sbeck 			}
1065ba5406e9Sbeck 		if (i == 0)
1066913ec974Sbeck 			{
1067ba5406e9Sbeck 			/* symmetric and > 4 */
1068913ec974Sbeck 			/* 16 or larger */
1069913ec974Sbeck 			j=BN_num_bits_word((BN_ULONG)al);
1070913ec974Sbeck 			j=1<<(j-1);
1071913ec974Sbeck 			k=j+j;
1072ba5406e9Sbeck 			t = BN_CTX_get(ctx);
1073913ec974Sbeck 			if (al == j) /* exact multiple */
1074913ec974Sbeck 				{
1075c58501deSbeck 				if (bn_wexpand(t,k*2) == NULL) goto err;
1076c58501deSbeck 				if (bn_wexpand(rr,k*2) == NULL) goto err;
1077913ec974Sbeck 				bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
1078913ec974Sbeck 				}
1079913ec974Sbeck 			else
1080913ec974Sbeck 				{
1081c58501deSbeck 				if (bn_wexpand(t,k*4) == NULL) goto err;
1082c58501deSbeck 				if (bn_wexpand(rr,k*4) == NULL) goto err;
1083913ec974Sbeck 				bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
1084913ec974Sbeck 				}
1085913ec974Sbeck 			rr->top=top;
1086ba5406e9Sbeck 			goto end;
1087ba5406e9Sbeck 			}
1088*4fcf65c5Sdjm #endif
1089767fe2ffSmarkus 		}
1090ba5406e9Sbeck #endif /* BN_RECURSION */
1091ba5406e9Sbeck 	if (bn_wexpand(rr,top) == NULL) goto err;
1092ba5406e9Sbeck 	rr->top=top;
1093ba5406e9Sbeck 	bn_mul_normal(rr->d,a->d,al,b->d,bl);
1094ba5406e9Sbeck 
1095913ec974Sbeck #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
1096913ec974Sbeck end:
1097913ec974Sbeck #endif
1098*4fcf65c5Sdjm 	bn_correct_top(rr);
1099913ec974Sbeck 	if (r != rr) BN_copy(r,rr);
1100ba5406e9Sbeck 	ret=1;
1101ba5406e9Sbeck err:
1102*4fcf65c5Sdjm 	bn_check_top(r);
1103ba5406e9Sbeck 	BN_CTX_end(ctx);
1104ba5406e9Sbeck 	return(ret);
1105913ec974Sbeck 	}
1106913ec974Sbeck 
1107913ec974Sbeck void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
1108913ec974Sbeck 	{
1109913ec974Sbeck 	BN_ULONG *rr;
1110913ec974Sbeck 
1111913ec974Sbeck #ifdef BN_COUNT
1112*4fcf65c5Sdjm 	fprintf(stderr," bn_mul_normal %d * %d\n",na,nb);
1113913ec974Sbeck #endif
1114913ec974Sbeck 
1115913ec974Sbeck 	if (na < nb)
1116913ec974Sbeck 		{
1117913ec974Sbeck 		int itmp;
1118913ec974Sbeck 		BN_ULONG *ltmp;
1119913ec974Sbeck 
1120913ec974Sbeck 		itmp=na; na=nb; nb=itmp;
1121913ec974Sbeck 		ltmp=a;   a=b;   b=ltmp;
1122913ec974Sbeck 
1123913ec974Sbeck 		}
1124913ec974Sbeck 	rr= &(r[na]);
1125*4fcf65c5Sdjm 	if (nb <= 0)
1126*4fcf65c5Sdjm 		{
1127*4fcf65c5Sdjm 		(void)bn_mul_words(r,a,na,0);
1128*4fcf65c5Sdjm 		return;
1129*4fcf65c5Sdjm 		}
1130*4fcf65c5Sdjm 	else
1131913ec974Sbeck 		rr[0]=bn_mul_words(r,a,na,b[0]);
1132913ec974Sbeck 
1133913ec974Sbeck 	for (;;)
1134913ec974Sbeck 		{
1135913ec974Sbeck 		if (--nb <= 0) return;
1136913ec974Sbeck 		rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
1137913ec974Sbeck 		if (--nb <= 0) return;
1138913ec974Sbeck 		rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
1139913ec974Sbeck 		if (--nb <= 0) return;
1140913ec974Sbeck 		rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
1141913ec974Sbeck 		if (--nb <= 0) return;
1142913ec974Sbeck 		rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
1143913ec974Sbeck 		rr+=4;
1144913ec974Sbeck 		r+=4;
1145913ec974Sbeck 		b+=4;
1146913ec974Sbeck 		}
1147913ec974Sbeck 	}
1148913ec974Sbeck 
1149913ec974Sbeck void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
1150913ec974Sbeck 	{
1151913ec974Sbeck #ifdef BN_COUNT
1152*4fcf65c5Sdjm 	fprintf(stderr," bn_mul_low_normal %d * %d\n",n,n);
1153913ec974Sbeck #endif
1154913ec974Sbeck 	bn_mul_words(r,a,n,b[0]);
1155913ec974Sbeck 
1156913ec974Sbeck 	for (;;)
1157913ec974Sbeck 		{
1158913ec974Sbeck 		if (--n <= 0) return;
1159913ec974Sbeck 		bn_mul_add_words(&(r[1]),a,n,b[1]);
1160913ec974Sbeck 		if (--n <= 0) return;
1161913ec974Sbeck 		bn_mul_add_words(&(r[2]),a,n,b[2]);
1162913ec974Sbeck 		if (--n <= 0) return;
1163913ec974Sbeck 		bn_mul_add_words(&(r[3]),a,n,b[3]);
1164913ec974Sbeck 		if (--n <= 0) return;
1165913ec974Sbeck 		bn_mul_add_words(&(r[4]),a,n,b[4]);
1166913ec974Sbeck 		r+=4;
1167913ec974Sbeck 		b+=4;
1168913ec974Sbeck 		}
1169913ec974Sbeck 	}
1170