xref: /openbsd/lib/libcrypto/bn/bn_mul.c (revision 8889fb99)
1*8889fb99Sjsing /* $OpenBSD: bn_mul.c,v 1.30 2023/01/23 12:17:57 jsing Exp $ */
25b37fcf3Sryker /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
35b37fcf3Sryker  * All rights reserved.
45b37fcf3Sryker  *
55b37fcf3Sryker  * This package is an SSL implementation written
65b37fcf3Sryker  * by Eric Young (eay@cryptsoft.com).
75b37fcf3Sryker  * The implementation was written so as to conform with Netscapes SSL.
85b37fcf3Sryker  *
95b37fcf3Sryker  * This library is free for commercial and non-commercial use as long as
105b37fcf3Sryker  * the following conditions are aheared to.  The following conditions
115b37fcf3Sryker  * apply to all code found in this distribution, be it the RC4, RSA,
125b37fcf3Sryker  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
135b37fcf3Sryker  * included with this distribution is covered by the same copyright terms
145b37fcf3Sryker  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
155b37fcf3Sryker  *
165b37fcf3Sryker  * Copyright remains Eric Young's, and as such any Copyright notices in
175b37fcf3Sryker  * the code are not to be removed.
185b37fcf3Sryker  * If this package is used in a product, Eric Young should be given attribution
195b37fcf3Sryker  * as the author of the parts of the library used.
205b37fcf3Sryker  * This can be in the form of a textual message at program startup or
215b37fcf3Sryker  * in documentation (online or textual) provided with the package.
225b37fcf3Sryker  *
235b37fcf3Sryker  * Redistribution and use in source and binary forms, with or without
245b37fcf3Sryker  * modification, are permitted provided that the following conditions
255b37fcf3Sryker  * are met:
265b37fcf3Sryker  * 1. Redistributions of source code must retain the copyright
275b37fcf3Sryker  *    notice, this list of conditions and the following disclaimer.
285b37fcf3Sryker  * 2. Redistributions in binary form must reproduce the above copyright
295b37fcf3Sryker  *    notice, this list of conditions and the following disclaimer in the
305b37fcf3Sryker  *    documentation and/or other materials provided with the distribution.
315b37fcf3Sryker  * 3. All advertising materials mentioning features or use of this software
325b37fcf3Sryker  *    must display the following acknowledgement:
335b37fcf3Sryker  *    "This product includes cryptographic software written by
345b37fcf3Sryker  *     Eric Young (eay@cryptsoft.com)"
355b37fcf3Sryker  *    The word 'cryptographic' can be left out if the rouines from the library
365b37fcf3Sryker  *    being used are not cryptographic related :-).
375b37fcf3Sryker  * 4. If you include any Windows specific code (or a derivative thereof) from
385b37fcf3Sryker  *    the apps directory (application code) you must include an acknowledgement:
395b37fcf3Sryker  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
405b37fcf3Sryker  *
415b37fcf3Sryker  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
425b37fcf3Sryker  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
435b37fcf3Sryker  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
445b37fcf3Sryker  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
455b37fcf3Sryker  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
465b37fcf3Sryker  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
475b37fcf3Sryker  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
485b37fcf3Sryker  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
495b37fcf3Sryker  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
505b37fcf3Sryker  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
515b37fcf3Sryker  * SUCH DAMAGE.
525b37fcf3Sryker  *
535b37fcf3Sryker  * The licence and distribution terms for any publically available version or
545b37fcf3Sryker  * derivative of this code cannot be changed.  i.e. this code cannot simply be
555b37fcf3Sryker  * copied and put under another distribution licence
565b37fcf3Sryker  * [including the GNU Public Licence.]
575b37fcf3Sryker  */
585b37fcf3Sryker 
594fcf65c5Sdjm #include <assert.h>
60a8913c44Sjsing #include <stdio.h>
61a8913c44Sjsing #include <string.h>
62a8913c44Sjsing 
638cf4d6a6Sjsing #include <openssl/opensslconf.h>
648cf4d6a6Sjsing 
65de344ea3Sjsing #include "bn_arch.h"
66c9675a23Stb #include "bn_local.h"
675b37fcf3Sryker 
68*8889fb99Sjsing #ifndef HAVE_BN_MUL_ADD_WORDS
69*8889fb99Sjsing #if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
70*8889fb99Sjsing 
71*8889fb99Sjsing BN_ULONG
72*8889fb99Sjsing bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
73*8889fb99Sjsing {
74*8889fb99Sjsing 	BN_ULONG c1 = 0;
75*8889fb99Sjsing 
76*8889fb99Sjsing 	assert(num >= 0);
77*8889fb99Sjsing 	if (num <= 0)
78*8889fb99Sjsing 		return (c1);
79*8889fb99Sjsing 
80*8889fb99Sjsing #ifndef OPENSSL_SMALL_FOOTPRINT
81*8889fb99Sjsing 	while (num & ~3) {
82*8889fb99Sjsing 		mul_add(rp[0], ap[0], w, c1);
83*8889fb99Sjsing 		mul_add(rp[1], ap[1], w, c1);
84*8889fb99Sjsing 		mul_add(rp[2], ap[2], w, c1);
85*8889fb99Sjsing 		mul_add(rp[3], ap[3], w, c1);
86*8889fb99Sjsing 		ap += 4;
87*8889fb99Sjsing 		rp += 4;
88*8889fb99Sjsing 		num -= 4;
89*8889fb99Sjsing 	}
90*8889fb99Sjsing #endif
91*8889fb99Sjsing 	while (num) {
92*8889fb99Sjsing 		mul_add(rp[0], ap[0], w, c1);
93*8889fb99Sjsing 		ap++;
94*8889fb99Sjsing 		rp++;
95*8889fb99Sjsing 		num--;
96*8889fb99Sjsing 	}
97*8889fb99Sjsing 
98*8889fb99Sjsing 	return (c1);
99*8889fb99Sjsing }
100*8889fb99Sjsing 
101*8889fb99Sjsing #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
102*8889fb99Sjsing 
103*8889fb99Sjsing BN_ULONG
104*8889fb99Sjsing bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
105*8889fb99Sjsing {
106*8889fb99Sjsing 	BN_ULONG c = 0;
107*8889fb99Sjsing 	BN_ULONG bl, bh;
108*8889fb99Sjsing 
109*8889fb99Sjsing 	assert(num >= 0);
110*8889fb99Sjsing 	if (num <= 0)
111*8889fb99Sjsing 		return ((BN_ULONG)0);
112*8889fb99Sjsing 
113*8889fb99Sjsing 	bl = LBITS(w);
114*8889fb99Sjsing 	bh = HBITS(w);
115*8889fb99Sjsing 
116*8889fb99Sjsing #ifndef OPENSSL_SMALL_FOOTPRINT
117*8889fb99Sjsing 	while (num & ~3) {
118*8889fb99Sjsing 		mul_add(rp[0], ap[0], bl, bh, c);
119*8889fb99Sjsing 		mul_add(rp[1], ap[1], bl, bh, c);
120*8889fb99Sjsing 		mul_add(rp[2], ap[2], bl, bh, c);
121*8889fb99Sjsing 		mul_add(rp[3], ap[3], bl, bh, c);
122*8889fb99Sjsing 		ap += 4;
123*8889fb99Sjsing 		rp += 4;
124*8889fb99Sjsing 		num -= 4;
125*8889fb99Sjsing 	}
126*8889fb99Sjsing #endif
127*8889fb99Sjsing 	while (num) {
128*8889fb99Sjsing 		mul_add(rp[0], ap[0], bl, bh, c);
129*8889fb99Sjsing 		ap++;
130*8889fb99Sjsing 		rp++;
131*8889fb99Sjsing 		num--;
132*8889fb99Sjsing 	}
133*8889fb99Sjsing 	return (c);
134*8889fb99Sjsing }
135*8889fb99Sjsing 
136*8889fb99Sjsing #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
137*8889fb99Sjsing #endif
138*8889fb99Sjsing 
139de344ea3Sjsing #ifndef HAVE_BN_MUL_COMBA4
140de344ea3Sjsing void
141de344ea3Sjsing bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
142de344ea3Sjsing {
143de344ea3Sjsing 	BN_ULONG c1, c2, c3;
144de344ea3Sjsing 
145de344ea3Sjsing 	c1 = 0;
146de344ea3Sjsing 	c2 = 0;
147de344ea3Sjsing 	c3 = 0;
148de344ea3Sjsing 	mul_add_c(a[0], b[0], c1, c2, c3);
149de344ea3Sjsing 	r[0] = c1;
150de344ea3Sjsing 	c1 = 0;
151de344ea3Sjsing 	mul_add_c(a[0], b[1], c2, c3, c1);
152de344ea3Sjsing 	mul_add_c(a[1], b[0], c2, c3, c1);
153de344ea3Sjsing 	r[1] = c2;
154de344ea3Sjsing 	c2 = 0;
155de344ea3Sjsing 	mul_add_c(a[2], b[0], c3, c1, c2);
156de344ea3Sjsing 	mul_add_c(a[1], b[1], c3, c1, c2);
157de344ea3Sjsing 	mul_add_c(a[0], b[2], c3, c1, c2);
158de344ea3Sjsing 	r[2] = c3;
159de344ea3Sjsing 	c3 = 0;
160de344ea3Sjsing 	mul_add_c(a[0], b[3], c1, c2, c3);
161de344ea3Sjsing 	mul_add_c(a[1], b[2], c1, c2, c3);
162de344ea3Sjsing 	mul_add_c(a[2], b[1], c1, c2, c3);
163de344ea3Sjsing 	mul_add_c(a[3], b[0], c1, c2, c3);
164de344ea3Sjsing 	r[3] = c1;
165de344ea3Sjsing 	c1 = 0;
166de344ea3Sjsing 	mul_add_c(a[3], b[1], c2, c3, c1);
167de344ea3Sjsing 	mul_add_c(a[2], b[2], c2, c3, c1);
168de344ea3Sjsing 	mul_add_c(a[1], b[3], c2, c3, c1);
169de344ea3Sjsing 	r[4] = c2;
170de344ea3Sjsing 	c2 = 0;
171de344ea3Sjsing 	mul_add_c(a[2], b[3], c3, c1, c2);
172de344ea3Sjsing 	mul_add_c(a[3], b[2], c3, c1, c2);
173de344ea3Sjsing 	r[5] = c3;
174de344ea3Sjsing 	c3 = 0;
175de344ea3Sjsing 	mul_add_c(a[3], b[3], c1, c2, c3);
176de344ea3Sjsing 	r[6] = c1;
177de344ea3Sjsing 	r[7] = c2;
178de344ea3Sjsing }
179de344ea3Sjsing #endif
180de344ea3Sjsing 
181de344ea3Sjsing #ifndef HAVE_BN_MUL_COMBA8
182de344ea3Sjsing void
183de344ea3Sjsing bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
184de344ea3Sjsing {
185de344ea3Sjsing 	BN_ULONG c1, c2, c3;
186de344ea3Sjsing 
187de344ea3Sjsing 	c1 = 0;
188de344ea3Sjsing 	c2 = 0;
189de344ea3Sjsing 	c3 = 0;
190de344ea3Sjsing 	mul_add_c(a[0], b[0], c1, c2, c3);
191de344ea3Sjsing 	r[0] = c1;
192de344ea3Sjsing 	c1 = 0;
193de344ea3Sjsing 	mul_add_c(a[0], b[1], c2, c3, c1);
194de344ea3Sjsing 	mul_add_c(a[1], b[0], c2, c3, c1);
195de344ea3Sjsing 	r[1] = c2;
196de344ea3Sjsing 	c2 = 0;
197de344ea3Sjsing 	mul_add_c(a[2], b[0], c3, c1, c2);
198de344ea3Sjsing 	mul_add_c(a[1], b[1], c3, c1, c2);
199de344ea3Sjsing 	mul_add_c(a[0], b[2], c3, c1, c2);
200de344ea3Sjsing 	r[2] = c3;
201de344ea3Sjsing 	c3 = 0;
202de344ea3Sjsing 	mul_add_c(a[0], b[3], c1, c2, c3);
203de344ea3Sjsing 	mul_add_c(a[1], b[2], c1, c2, c3);
204de344ea3Sjsing 	mul_add_c(a[2], b[1], c1, c2, c3);
205de344ea3Sjsing 	mul_add_c(a[3], b[0], c1, c2, c3);
206de344ea3Sjsing 	r[3] = c1;
207de344ea3Sjsing 	c1 = 0;
208de344ea3Sjsing 	mul_add_c(a[4], b[0], c2, c3, c1);
209de344ea3Sjsing 	mul_add_c(a[3], b[1], c2, c3, c1);
210de344ea3Sjsing 	mul_add_c(a[2], b[2], c2, c3, c1);
211de344ea3Sjsing 	mul_add_c(a[1], b[3], c2, c3, c1);
212de344ea3Sjsing 	mul_add_c(a[0], b[4], c2, c3, c1);
213de344ea3Sjsing 	r[4] = c2;
214de344ea3Sjsing 	c2 = 0;
215de344ea3Sjsing 	mul_add_c(a[0], b[5], c3, c1, c2);
216de344ea3Sjsing 	mul_add_c(a[1], b[4], c3, c1, c2);
217de344ea3Sjsing 	mul_add_c(a[2], b[3], c3, c1, c2);
218de344ea3Sjsing 	mul_add_c(a[3], b[2], c3, c1, c2);
219de344ea3Sjsing 	mul_add_c(a[4], b[1], c3, c1, c2);
220de344ea3Sjsing 	mul_add_c(a[5], b[0], c3, c1, c2);
221de344ea3Sjsing 	r[5] = c3;
222de344ea3Sjsing 	c3 = 0;
223de344ea3Sjsing 	mul_add_c(a[6], b[0], c1, c2, c3);
224de344ea3Sjsing 	mul_add_c(a[5], b[1], c1, c2, c3);
225de344ea3Sjsing 	mul_add_c(a[4], b[2], c1, c2, c3);
226de344ea3Sjsing 	mul_add_c(a[3], b[3], c1, c2, c3);
227de344ea3Sjsing 	mul_add_c(a[2], b[4], c1, c2, c3);
228de344ea3Sjsing 	mul_add_c(a[1], b[5], c1, c2, c3);
229de344ea3Sjsing 	mul_add_c(a[0], b[6], c1, c2, c3);
230de344ea3Sjsing 	r[6] = c1;
231de344ea3Sjsing 	c1 = 0;
232de344ea3Sjsing 	mul_add_c(a[0], b[7], c2, c3, c1);
233de344ea3Sjsing 	mul_add_c(a[1], b[6], c2, c3, c1);
234de344ea3Sjsing 	mul_add_c(a[2], b[5], c2, c3, c1);
235de344ea3Sjsing 	mul_add_c(a[3], b[4], c2, c3, c1);
236de344ea3Sjsing 	mul_add_c(a[4], b[3], c2, c3, c1);
237de344ea3Sjsing 	mul_add_c(a[5], b[2], c2, c3, c1);
238de344ea3Sjsing 	mul_add_c(a[6], b[1], c2, c3, c1);
239de344ea3Sjsing 	mul_add_c(a[7], b[0], c2, c3, c1);
240de344ea3Sjsing 	r[7] = c2;
241de344ea3Sjsing 	c2 = 0;
242de344ea3Sjsing 	mul_add_c(a[7], b[1], c3, c1, c2);
243de344ea3Sjsing 	mul_add_c(a[6], b[2], c3, c1, c2);
244de344ea3Sjsing 	mul_add_c(a[5], b[3], c3, c1, c2);
245de344ea3Sjsing 	mul_add_c(a[4], b[4], c3, c1, c2);
246de344ea3Sjsing 	mul_add_c(a[3], b[5], c3, c1, c2);
247de344ea3Sjsing 	mul_add_c(a[2], b[6], c3, c1, c2);
248de344ea3Sjsing 	mul_add_c(a[1], b[7], c3, c1, c2);
249de344ea3Sjsing 	r[8] = c3;
250de344ea3Sjsing 	c3 = 0;
251de344ea3Sjsing 	mul_add_c(a[2], b[7], c1, c2, c3);
252de344ea3Sjsing 	mul_add_c(a[3], b[6], c1, c2, c3);
253de344ea3Sjsing 	mul_add_c(a[4], b[5], c1, c2, c3);
254de344ea3Sjsing 	mul_add_c(a[5], b[4], c1, c2, c3);
255de344ea3Sjsing 	mul_add_c(a[6], b[3], c1, c2, c3);
256de344ea3Sjsing 	mul_add_c(a[7], b[2], c1, c2, c3);
257de344ea3Sjsing 	r[9] = c1;
258de344ea3Sjsing 	c1 = 0;
259de344ea3Sjsing 	mul_add_c(a[7], b[3], c2, c3, c1);
260de344ea3Sjsing 	mul_add_c(a[6], b[4], c2, c3, c1);
261de344ea3Sjsing 	mul_add_c(a[5], b[5], c2, c3, c1);
262de344ea3Sjsing 	mul_add_c(a[4], b[6], c2, c3, c1);
263de344ea3Sjsing 	mul_add_c(a[3], b[7], c2, c3, c1);
264de344ea3Sjsing 	r[10] = c2;
265de344ea3Sjsing 	c2 = 0;
266de344ea3Sjsing 	mul_add_c(a[4], b[7], c3, c1, c2);
267de344ea3Sjsing 	mul_add_c(a[5], b[6], c3, c1, c2);
268de344ea3Sjsing 	mul_add_c(a[6], b[5], c3, c1, c2);
269de344ea3Sjsing 	mul_add_c(a[7], b[4], c3, c1, c2);
270de344ea3Sjsing 	r[11] = c3;
271de344ea3Sjsing 	c3 = 0;
272de344ea3Sjsing 	mul_add_c(a[7], b[5], c1, c2, c3);
273de344ea3Sjsing 	mul_add_c(a[6], b[6], c1, c2, c3);
274de344ea3Sjsing 	mul_add_c(a[5], b[7], c1, c2, c3);
275de344ea3Sjsing 	r[12] = c1;
276de344ea3Sjsing 	c1 = 0;
277de344ea3Sjsing 	mul_add_c(a[6], b[7], c2, c3, c1);
278de344ea3Sjsing 	mul_add_c(a[7], b[6], c2, c3, c1);
279de344ea3Sjsing 	r[13] = c2;
280de344ea3Sjsing 	c2 = 0;
281de344ea3Sjsing 	mul_add_c(a[7], b[7], c3, c1, c2);
282de344ea3Sjsing 	r[14] = c3;
283de344ea3Sjsing 	r[15] = c1;
284de344ea3Sjsing }
285de344ea3Sjsing #endif
286de344ea3Sjsing 
287*8889fb99Sjsing #ifndef HAVE_BN_MUL_WORDS
288*8889fb99Sjsing #if defined(BN_LLONG) || defined(BN_UMULT_HIGH)
289*8889fb99Sjsing 
290*8889fb99Sjsing BN_ULONG
291*8889fb99Sjsing bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
292*8889fb99Sjsing {
293*8889fb99Sjsing 	BN_ULONG c1 = 0;
294*8889fb99Sjsing 
295*8889fb99Sjsing 	assert(num >= 0);
296*8889fb99Sjsing 	if (num <= 0)
297*8889fb99Sjsing 		return (c1);
298*8889fb99Sjsing 
299*8889fb99Sjsing #ifndef OPENSSL_SMALL_FOOTPRINT
300*8889fb99Sjsing 	while (num & ~3) {
301*8889fb99Sjsing 		mul(rp[0], ap[0], w, c1);
302*8889fb99Sjsing 		mul(rp[1], ap[1], w, c1);
303*8889fb99Sjsing 		mul(rp[2], ap[2], w, c1);
304*8889fb99Sjsing 		mul(rp[3], ap[3], w, c1);
305*8889fb99Sjsing 		ap += 4;
306*8889fb99Sjsing 		rp += 4;
307*8889fb99Sjsing 		num -= 4;
308*8889fb99Sjsing 	}
309*8889fb99Sjsing #endif
310*8889fb99Sjsing 	while (num) {
311*8889fb99Sjsing 		mul(rp[0], ap[0], w, c1);
312*8889fb99Sjsing 		ap++;
313*8889fb99Sjsing 		rp++;
314*8889fb99Sjsing 		num--;
315*8889fb99Sjsing 	}
316*8889fb99Sjsing 	return (c1);
317*8889fb99Sjsing }
318*8889fb99Sjsing #else /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
319*8889fb99Sjsing 
320*8889fb99Sjsing BN_ULONG
321*8889fb99Sjsing bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
322*8889fb99Sjsing {
323*8889fb99Sjsing 	BN_ULONG carry = 0;
324*8889fb99Sjsing 	BN_ULONG bl, bh;
325*8889fb99Sjsing 
326*8889fb99Sjsing 	assert(num >= 0);
327*8889fb99Sjsing 	if (num <= 0)
328*8889fb99Sjsing 		return ((BN_ULONG)0);
329*8889fb99Sjsing 
330*8889fb99Sjsing 	bl = LBITS(w);
331*8889fb99Sjsing 	bh = HBITS(w);
332*8889fb99Sjsing 
333*8889fb99Sjsing #ifndef OPENSSL_SMALL_FOOTPRINT
334*8889fb99Sjsing 	while (num & ~3) {
335*8889fb99Sjsing 		mul(rp[0], ap[0], bl, bh, carry);
336*8889fb99Sjsing 		mul(rp[1], ap[1], bl, bh, carry);
337*8889fb99Sjsing 		mul(rp[2], ap[2], bl, bh, carry);
338*8889fb99Sjsing 		mul(rp[3], ap[3], bl, bh, carry);
339*8889fb99Sjsing 		ap += 4;
340*8889fb99Sjsing 		rp += 4;
341*8889fb99Sjsing 		num -= 4;
342*8889fb99Sjsing 	}
343*8889fb99Sjsing #endif
344*8889fb99Sjsing 	while (num) {
345*8889fb99Sjsing 		mul(rp[0], ap[0], bl, bh, carry);
346*8889fb99Sjsing 		ap++;
347*8889fb99Sjsing 		rp++;
348*8889fb99Sjsing 		num--;
349*8889fb99Sjsing 	}
350*8889fb99Sjsing 	return (carry);
351*8889fb99Sjsing }
352*8889fb99Sjsing #endif /* !(defined(BN_LLONG) || defined(BN_UMULT_HIGH)) */
353*8889fb99Sjsing #endif
354*8889fb99Sjsing 
355e8d08ebaSjsing #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
356fa7eac9fSjsing /*
357fa7eac9fSjsing  * Here follows a specialised variant of bn_sub_words(), which has the property
358fa7eac9fSjsing  * performing operations on arrays of different sizes. The sizes of those arrays
359fa7eac9fSjsing  * is expressed through cl, which is the common length (basically,
360fa7eac9fSjsing  * min(len(a),len(b))), and dl, which is the delta between the two lengths,
361fa7eac9fSjsing  * calculated as len(a)-len(b). All lengths are the number of BN_ULONGs. For the
362fa7eac9fSjsing  * operations that require a result array as parameter, it must have the length
363fa7eac9fSjsing  * cl+abs(dl).
364fa7eac9fSjsing  */
365e8d08ebaSjsing BN_ULONG
366e8d08ebaSjsing bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int cl,
367e8d08ebaSjsing     int dl)
368e8d08ebaSjsing {
369e8d08ebaSjsing 	BN_ULONG c, t;
370e8d08ebaSjsing 
371e8d08ebaSjsing 	assert(cl >= 0);
372e8d08ebaSjsing 	c = bn_sub_words(r, a, b, cl);
373e8d08ebaSjsing 
374e8d08ebaSjsing 	if (dl == 0)
375e8d08ebaSjsing 		return c;
376e8d08ebaSjsing 
377e8d08ebaSjsing 	r += cl;
378e8d08ebaSjsing 	a += cl;
379e8d08ebaSjsing 	b += cl;
380e8d08ebaSjsing 
381e8d08ebaSjsing 	if (dl < 0) {
382e8d08ebaSjsing 		for (;;) {
383e8d08ebaSjsing 			t = b[0];
384e8d08ebaSjsing 			r[0] = (0 - t - c) & BN_MASK2;
385e8d08ebaSjsing 			if (t != 0)
386e8d08ebaSjsing 				c = 1;
387e8d08ebaSjsing 			if (++dl >= 0)
388e8d08ebaSjsing 				break;
389e8d08ebaSjsing 
390e8d08ebaSjsing 			t = b[1];
391e8d08ebaSjsing 			r[1] = (0 - t - c) & BN_MASK2;
392e8d08ebaSjsing 			if (t != 0)
393e8d08ebaSjsing 				c = 1;
394e8d08ebaSjsing 			if (++dl >= 0)
395e8d08ebaSjsing 				break;
396e8d08ebaSjsing 
397e8d08ebaSjsing 			t = b[2];
398e8d08ebaSjsing 			r[2] = (0 - t - c) & BN_MASK2;
399e8d08ebaSjsing 			if (t != 0)
400e8d08ebaSjsing 				c = 1;
401e8d08ebaSjsing 			if (++dl >= 0)
402e8d08ebaSjsing 				break;
403e8d08ebaSjsing 
404e8d08ebaSjsing 			t = b[3];
405e8d08ebaSjsing 			r[3] = (0 - t - c) & BN_MASK2;
406e8d08ebaSjsing 			if (t != 0)
407e8d08ebaSjsing 				c = 1;
408e8d08ebaSjsing 			if (++dl >= 0)
409e8d08ebaSjsing 				break;
410e8d08ebaSjsing 
411e8d08ebaSjsing 			b += 4;
412e8d08ebaSjsing 			r += 4;
413e8d08ebaSjsing 		}
414e8d08ebaSjsing 	} else {
415e8d08ebaSjsing 		int save_dl = dl;
416e8d08ebaSjsing 		while (c) {
417e8d08ebaSjsing 			t = a[0];
418e8d08ebaSjsing 			r[0] = (t - c) & BN_MASK2;
419e8d08ebaSjsing 			if (t != 0)
420e8d08ebaSjsing 				c = 0;
421e8d08ebaSjsing 			if (--dl <= 0)
422e8d08ebaSjsing 				break;
423e8d08ebaSjsing 
424e8d08ebaSjsing 			t = a[1];
425e8d08ebaSjsing 			r[1] = (t - c) & BN_MASK2;
426e8d08ebaSjsing 			if (t != 0)
427e8d08ebaSjsing 				c = 0;
428e8d08ebaSjsing 			if (--dl <= 0)
429e8d08ebaSjsing 				break;
430e8d08ebaSjsing 
431e8d08ebaSjsing 			t = a[2];
432e8d08ebaSjsing 			r[2] = (t - c) & BN_MASK2;
433e8d08ebaSjsing 			if (t != 0)
434e8d08ebaSjsing 				c = 0;
435e8d08ebaSjsing 			if (--dl <= 0)
436e8d08ebaSjsing 				break;
437e8d08ebaSjsing 
438e8d08ebaSjsing 			t = a[3];
439e8d08ebaSjsing 			r[3] = (t - c) & BN_MASK2;
440e8d08ebaSjsing 			if (t != 0)
441e8d08ebaSjsing 				c = 0;
442e8d08ebaSjsing 			if (--dl <= 0)
443e8d08ebaSjsing 				break;
444e8d08ebaSjsing 
445e8d08ebaSjsing 			save_dl = dl;
446e8d08ebaSjsing 			a += 4;
447e8d08ebaSjsing 			r += 4;
448e8d08ebaSjsing 		}
449e8d08ebaSjsing 		if (dl > 0) {
450e8d08ebaSjsing 			if (save_dl > dl) {
451e8d08ebaSjsing 				switch (save_dl - dl) {
452e8d08ebaSjsing 				case 1:
453e8d08ebaSjsing 					r[1] = a[1];
454e8d08ebaSjsing 					if (--dl <= 0)
455e8d08ebaSjsing 						break;
456e8d08ebaSjsing 				case 2:
457e8d08ebaSjsing 					r[2] = a[2];
458e8d08ebaSjsing 					if (--dl <= 0)
459e8d08ebaSjsing 						break;
460e8d08ebaSjsing 				case 3:
461e8d08ebaSjsing 					r[3] = a[3];
462e8d08ebaSjsing 					if (--dl <= 0)
463e8d08ebaSjsing 						break;
464e8d08ebaSjsing 				}
465e8d08ebaSjsing 				a += 4;
466e8d08ebaSjsing 				r += 4;
467e8d08ebaSjsing 			}
468e8d08ebaSjsing 		}
469e8d08ebaSjsing 		if (dl > 0) {
470e8d08ebaSjsing 			for (;;) {
471e8d08ebaSjsing 				r[0] = a[0];
472e8d08ebaSjsing 				if (--dl <= 0)
473e8d08ebaSjsing 					break;
474e8d08ebaSjsing 				r[1] = a[1];
475e8d08ebaSjsing 				if (--dl <= 0)
476e8d08ebaSjsing 					break;
477e8d08ebaSjsing 				r[2] = a[2];
478e8d08ebaSjsing 				if (--dl <= 0)
479e8d08ebaSjsing 					break;
480e8d08ebaSjsing 				r[3] = a[3];
481e8d08ebaSjsing 				if (--dl <= 0)
482e8d08ebaSjsing 					break;
483e8d08ebaSjsing 
484e8d08ebaSjsing 				a += 4;
485e8d08ebaSjsing 				r += 4;
486e8d08ebaSjsing 			}
487e8d08ebaSjsing 		}
488e8d08ebaSjsing 	}
489e8d08ebaSjsing 	return c;
490e8d08ebaSjsing }
491e8d08ebaSjsing #endif
492e8d08ebaSjsing 
493e8d08ebaSjsing void
494e8d08ebaSjsing bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
495e8d08ebaSjsing {
496e8d08ebaSjsing 	BN_ULONG *rr;
497e8d08ebaSjsing 
498e8d08ebaSjsing 
499e8d08ebaSjsing 	if (na < nb) {
500e8d08ebaSjsing 		int itmp;
501e8d08ebaSjsing 		BN_ULONG *ltmp;
502e8d08ebaSjsing 
503e8d08ebaSjsing 		itmp = na;
504e8d08ebaSjsing 		na = nb;
505e8d08ebaSjsing 		nb = itmp;
506e8d08ebaSjsing 		ltmp = a;
507e8d08ebaSjsing 		a = b;
508e8d08ebaSjsing 		b = ltmp;
509e8d08ebaSjsing 
510e8d08ebaSjsing 	}
511e8d08ebaSjsing 	rr = &(r[na]);
512e8d08ebaSjsing 	if (nb <= 0) {
513e8d08ebaSjsing 		(void)bn_mul_words(r, a, na, 0);
514e8d08ebaSjsing 		return;
515e8d08ebaSjsing 	} else
516e8d08ebaSjsing 		rr[0] = bn_mul_words(r, a, na, b[0]);
517e8d08ebaSjsing 
518e8d08ebaSjsing 	for (;;) {
519e8d08ebaSjsing 		if (--nb <= 0)
520e8d08ebaSjsing 			return;
521e8d08ebaSjsing 		rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
522e8d08ebaSjsing 		if (--nb <= 0)
523e8d08ebaSjsing 			return;
524e8d08ebaSjsing 		rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
525e8d08ebaSjsing 		if (--nb <= 0)
526e8d08ebaSjsing 			return;
527e8d08ebaSjsing 		rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
528e8d08ebaSjsing 		if (--nb <= 0)
529e8d08ebaSjsing 			return;
530e8d08ebaSjsing 		rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
531e8d08ebaSjsing 		rr += 4;
532e8d08ebaSjsing 		r += 4;
533e8d08ebaSjsing 		b += 4;
534e8d08ebaSjsing 	}
535e8d08ebaSjsing }
536e8d08ebaSjsing 
537913ec974Sbeck #ifdef BN_RECURSION
538f6e3f262Sbeck /* Karatsuba recursive multiplication algorithm
539f6e3f262Sbeck  * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
540f6e3f262Sbeck 
541913ec974Sbeck /* r is 2*n2 words in size,
542913ec974Sbeck  * a and b are both n2 words in size.
543913ec974Sbeck  * n2 must be a power of 2.
544913ec974Sbeck  * We multiply and return the result.
545913ec974Sbeck  * t must be 2*n2 words in size
546ba5406e9Sbeck  * We calculate
547913ec974Sbeck  * a[0]*b[0]
548913ec974Sbeck  * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
549913ec974Sbeck  * a[1]*b[1]
550913ec974Sbeck  */
5514fcf65c5Sdjm /* dnX may not be positive, but n2/2+dnX has to be */
5522bd9bb84Sjsing void
5532bd9bb84Sjsing bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna,
5542bd9bb84Sjsing     int dnb, BN_ULONG *t)
5555b37fcf3Sryker {
556913ec974Sbeck 	int n = n2 / 2, c1, c2;
5574fcf65c5Sdjm 	int tna = n + dna, tnb = n + dnb;
558913ec974Sbeck 	unsigned int neg, zero;
559913ec974Sbeck 	BN_ULONG ln, lo, *p;
5605b37fcf3Sryker 
561913ec974Sbeck # ifdef BN_MUL_COMBA
562ba5406e9Sbeck #  if 0
5632bd9bb84Sjsing 	if (n2 == 4) {
564913ec974Sbeck 		bn_mul_comba4(r, a, b);
565913ec974Sbeck 		return;
566913ec974Sbeck 	}
567ba5406e9Sbeck #  endif
5684fcf65c5Sdjm 	/* Only call bn_mul_comba 8 if n2 == 8 and the
5694fcf65c5Sdjm 	 * two arrays are complete [steve]
5704fcf65c5Sdjm 	 */
5712bd9bb84Sjsing 	if (n2 == 8 && dna == 0 && dnb == 0) {
572913ec974Sbeck 		bn_mul_comba8(r, a, b);
573913ec974Sbeck 		return;
574913ec974Sbeck 	}
575ba5406e9Sbeck # endif /* BN_MUL_COMBA */
5764fcf65c5Sdjm 	/* Else do normal multiply */
5772bd9bb84Sjsing 	if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
5784fcf65c5Sdjm 		bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
5794fcf65c5Sdjm 		if ((dna + dnb) < 0)
5804fcf65c5Sdjm 			memset(&r[2*n2 + dna + dnb], 0,
5814fcf65c5Sdjm 			    sizeof(BN_ULONG) * -(dna + dnb));
582913ec974Sbeck 		return;
583913ec974Sbeck 	}
584913ec974Sbeck 	/* r=(a[0]-a[1])*(b[1]-b[0]) */
5854fcf65c5Sdjm 	c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
5864fcf65c5Sdjm 	c2 = bn_cmp_part_words(&(b[n]), b,tnb, tnb - n);
587913ec974Sbeck 	zero = neg = 0;
5882bd9bb84Sjsing 	switch (c1 * 3 + c2) {
589913ec974Sbeck 	case -4:
5904fcf65c5Sdjm 		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
5914fcf65c5Sdjm 		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
592913ec974Sbeck 		break;
593913ec974Sbeck 	case -3:
594913ec974Sbeck 		zero = 1;
595913ec974Sbeck 		break;
596913ec974Sbeck 	case -2:
5974fcf65c5Sdjm 		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
5984fcf65c5Sdjm 		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
599913ec974Sbeck 		neg = 1;
600913ec974Sbeck 		break;
601913ec974Sbeck 	case -1:
602913ec974Sbeck 	case 0:
603913ec974Sbeck 	case 1:
604913ec974Sbeck 		zero = 1;
605913ec974Sbeck 		break;
606913ec974Sbeck 	case 2:
6074fcf65c5Sdjm 		bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
6084fcf65c5Sdjm 		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
609913ec974Sbeck 		neg = 1;
610913ec974Sbeck 		break;
611913ec974Sbeck 	case 3:
612913ec974Sbeck 		zero = 1;
613913ec974Sbeck 		break;
614913ec974Sbeck 	case 4:
6154fcf65c5Sdjm 		bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
6164fcf65c5Sdjm 		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
617913ec974Sbeck 		break;
6185b37fcf3Sryker 	}
6195b37fcf3Sryker 
620913ec974Sbeck # ifdef BN_MUL_COMBA
6214fcf65c5Sdjm 	if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
6224fcf65c5Sdjm 					       extra args to do this well */
6235b37fcf3Sryker 	{
624913ec974Sbeck 		if (!zero)
625913ec974Sbeck 			bn_mul_comba4(&(t[n2]), t, &(t[n]));
626913ec974Sbeck 		else
627913ec974Sbeck 			memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG));
628913ec974Sbeck 
629913ec974Sbeck 		bn_mul_comba4(r, a, b);
630913ec974Sbeck 		bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
6312bd9bb84Sjsing 	} else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
6324fcf65c5Sdjm 						    take extra args to do this
6334fcf65c5Sdjm 						    well */
634913ec974Sbeck 	{
635913ec974Sbeck 		if (!zero)
636913ec974Sbeck 			bn_mul_comba8(&(t[n2]), t, &(t[n]));
637913ec974Sbeck 		else
638913ec974Sbeck 			memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG));
639913ec974Sbeck 
640913ec974Sbeck 		bn_mul_comba8(r, a, b);
641913ec974Sbeck 		bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
6422bd9bb84Sjsing 	} else
643ba5406e9Sbeck # endif /* BN_MUL_COMBA */
644913ec974Sbeck 	{
645913ec974Sbeck 		p = &(t[n2 * 2]);
646913ec974Sbeck 		if (!zero)
6474fcf65c5Sdjm 			bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
648913ec974Sbeck 		else
649913ec974Sbeck 			memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
6504fcf65c5Sdjm 		bn_mul_recursive(r, a, b, n, 0, 0, p);
6514fcf65c5Sdjm 		bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
6525b37fcf3Sryker 	}
6535b37fcf3Sryker 
654913ec974Sbeck 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
655913ec974Sbeck 	 * r[10] holds (a[0]*b[0])
656913ec974Sbeck 	 * r[32] holds (b[1]*b[1])
657913ec974Sbeck 	 */
6585b37fcf3Sryker 
659913ec974Sbeck 	c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
6605b37fcf3Sryker 
661913ec974Sbeck 	if (neg) /* if t[32] is negative */
6625b37fcf3Sryker 	{
663913ec974Sbeck 		c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
6642bd9bb84Sjsing 	} else {
665913ec974Sbeck 		/* Might have a carry */
666913ec974Sbeck 		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
6675b37fcf3Sryker 	}
6685b37fcf3Sryker 
669913ec974Sbeck 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
670913ec974Sbeck 	 * r[10] holds (a[0]*b[0])
671913ec974Sbeck 	 * r[32] holds (b[1]*b[1])
672913ec974Sbeck 	 * c1 holds the carry bits
673913ec974Sbeck 	 */
674913ec974Sbeck 	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
6752bd9bb84Sjsing 	if (c1) {
676913ec974Sbeck 		p = &(r[n + n2]);
677913ec974Sbeck 		lo= *p;
678913ec974Sbeck 		ln = (lo + c1) & BN_MASK2;
679913ec974Sbeck 		*p = ln;
680913ec974Sbeck 
681913ec974Sbeck 		/* The overflow will stop before we over write
682913ec974Sbeck 		 * words we should not overwrite */
6832bd9bb84Sjsing 		if (ln < (BN_ULONG)c1) {
684913ec974Sbeck 			do {
685913ec974Sbeck 				p++;
686913ec974Sbeck 				lo= *p;
687913ec974Sbeck 				ln = (lo + 1) & BN_MASK2;
688913ec974Sbeck 				*p = ln;
689913ec974Sbeck 			} while (ln == 0);
690913ec974Sbeck 		}
691913ec974Sbeck 	}
6925b37fcf3Sryker }
6935b37fcf3Sryker 
694913ec974Sbeck /* n+tn is the word length
695913ec974Sbeck  * t needs to be n*4 is size, as does r */
6964fcf65c5Sdjm /* tnX may not be negative but less than n */
6972bd9bb84Sjsing void
6982bd9bb84Sjsing bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna,
6992bd9bb84Sjsing     int tnb, BN_ULONG *t)
7005b37fcf3Sryker {
701913ec974Sbeck 	int i, j, n2 = n * 2;
702c32db552Sdjm 	int c1, c2, neg;
703913ec974Sbeck 	BN_ULONG ln, lo, *p;
7045b37fcf3Sryker 
7052bd9bb84Sjsing 	if (n < 8) {
7064fcf65c5Sdjm 		bn_mul_normal(r, a, n + tna, b, n + tnb);
707913ec974Sbeck 		return;
7085b37fcf3Sryker 	}
7095b37fcf3Sryker 
710913ec974Sbeck 	/* r=(a[0]-a[1])*(b[1]-b[0]) */
7114fcf65c5Sdjm 	c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
7124fcf65c5Sdjm 	c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
713c32db552Sdjm 	neg = 0;
7142bd9bb84Sjsing 	switch (c1 * 3 + c2) {
715ba5406e9Sbeck 	case -4:
7164fcf65c5Sdjm 		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
7174fcf65c5Sdjm 		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
718ba5406e9Sbeck 		break;
719ba5406e9Sbeck 	case -3:
720ba5406e9Sbeck 		/* break; */
721ba5406e9Sbeck 	case -2:
7224fcf65c5Sdjm 		bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */
7234fcf65c5Sdjm 		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
724ba5406e9Sbeck 		neg = 1;
725ba5406e9Sbeck 		break;
726ba5406e9Sbeck 	case -1:
727ba5406e9Sbeck 	case 0:
728ba5406e9Sbeck 	case 1:
729ba5406e9Sbeck 		/* break; */
730ba5406e9Sbeck 	case 2:
7314fcf65c5Sdjm 		bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */
7324fcf65c5Sdjm 		bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
733ba5406e9Sbeck 		neg = 1;
734ba5406e9Sbeck 		break;
735ba5406e9Sbeck 	case 3:
736ba5406e9Sbeck 		/* break; */
737ba5406e9Sbeck 	case 4:
7384fcf65c5Sdjm 		bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
7394fcf65c5Sdjm 		bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
740ba5406e9Sbeck 		break;
741ba5406e9Sbeck 	}
742ba5406e9Sbeck 		/* The zero case isn't yet implemented here. The speedup
743ba5406e9Sbeck 		   would probably be negligible. */
744ba5406e9Sbeck # if 0
7452bd9bb84Sjsing 	if (n == 4) {
746913ec974Sbeck 		bn_mul_comba4(&(t[n2]), t, &(t[n]));
747913ec974Sbeck 		bn_mul_comba4(r, a, b);
748913ec974Sbeck 		bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn);
749913ec974Sbeck 		memset(&(r[n2 + tn * 2]), 0, sizeof(BN_ULONG) * (n2 - tn * 2));
7502bd9bb84Sjsing 	} else
751ba5406e9Sbeck # endif
7522bd9bb84Sjsing 		if (n == 8) {
753913ec974Sbeck 		bn_mul_comba8(&(t[n2]), t, &(t[n]));
754913ec974Sbeck 		bn_mul_comba8(r, a, b);
7554fcf65c5Sdjm 		bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
7562bd9bb84Sjsing 		memset(&(r[n2 + tna + tnb]), 0,
7572bd9bb84Sjsing 		    sizeof(BN_ULONG) * (n2 - tna - tnb));
7582bd9bb84Sjsing 	} else {
759913ec974Sbeck 		p = &(t[n2*2]);
7604fcf65c5Sdjm 		bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
7614fcf65c5Sdjm 		bn_mul_recursive(r, a, b, n, 0, 0, p);
762913ec974Sbeck 		i = n / 2;
763913ec974Sbeck 		/* If there is only a bottom half to the number,
764913ec974Sbeck 		 * just do it */
7654fcf65c5Sdjm 		if (tna > tnb)
7664fcf65c5Sdjm 			j = tna - i;
7674fcf65c5Sdjm 		else
7684fcf65c5Sdjm 			j = tnb - i;
7692bd9bb84Sjsing 		if (j == 0) {
7704fcf65c5Sdjm 			bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]),
7714fcf65c5Sdjm 			    i, tna - i, tnb - i, p);
7722bd9bb84Sjsing 			memset(&(r[n2 + i * 2]), 0,
7732bd9bb84Sjsing 			    sizeof(BN_ULONG) * (n2 - i * 2));
774913ec974Sbeck 		}
775913ec974Sbeck 		else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
776913ec974Sbeck 		{
777913ec974Sbeck 			bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]),
7784fcf65c5Sdjm 			    i, tna - i, tnb - i, p);
7794fcf65c5Sdjm 			memset(&(r[n2 + tna + tnb]), 0,
7804fcf65c5Sdjm 			    sizeof(BN_ULONG) * (n2 - tna - tnb));
781913ec974Sbeck 		}
782913ec974Sbeck 		else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
783913ec974Sbeck 		{
784913ec974Sbeck 			memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2);
7852bd9bb84Sjsing 			if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL &&
7862bd9bb84Sjsing 			    tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
7872bd9bb84Sjsing 				bn_mul_normal(&(r[n2]), &(a[n]), tna,
7882bd9bb84Sjsing 				    &(b[n]), tnb);
7892bd9bb84Sjsing 			} else {
7902bd9bb84Sjsing 				for (;;) {
791913ec974Sbeck 					i /= 2;
7924fcf65c5Sdjm 					/* these simplified conditions work
7934fcf65c5Sdjm 					 * exclusively because difference
7944fcf65c5Sdjm 					 * between tna and tnb is 1 or 0 */
7952bd9bb84Sjsing 					if (i < tna || i < tnb) {
796913ec974Sbeck 						bn_mul_part_recursive(&(r[n2]),
7972bd9bb84Sjsing 						    &(a[n]), &(b[n]), i,
7982bd9bb84Sjsing 						    tna - i, tnb - i, p);
799913ec974Sbeck 						break;
8002bd9bb84Sjsing 					} else if (i == tna || i == tnb) {
801913ec974Sbeck 						bn_mul_recursive(&(r[n2]),
8022bd9bb84Sjsing 						    &(a[n]), &(b[n]), i,
8032bd9bb84Sjsing 						    tna - i, tnb - i, p);
804913ec974Sbeck 						break;
805913ec974Sbeck 					}
806913ec974Sbeck 				}
807913ec974Sbeck 			}
808913ec974Sbeck 		}
8095b37fcf3Sryker 	}
8105b37fcf3Sryker 
811913ec974Sbeck 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
812913ec974Sbeck 	 * r[10] holds (a[0]*b[0])
813913ec974Sbeck 	 * r[32] holds (b[1]*b[1])
814913ec974Sbeck 	 */
8155b37fcf3Sryker 
816913ec974Sbeck 	c1 = (int)(bn_add_words(t, r,&(r[n2]), n2));
817ba5406e9Sbeck 
818ba5406e9Sbeck 	if (neg) /* if t[32] is negative */
819ba5406e9Sbeck 	{
820913ec974Sbeck 		c1 -= (int)(bn_sub_words(&(t[n2]), t,&(t[n2]), n2));
8212bd9bb84Sjsing 	} else {
822ba5406e9Sbeck 		/* Might have a carry */
823ba5406e9Sbeck 		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
824ba5406e9Sbeck 	}
8255b37fcf3Sryker 
826913ec974Sbeck 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
827913ec974Sbeck 	 * r[10] holds (a[0]*b[0])
828913ec974Sbeck 	 * r[32] holds (b[1]*b[1])
829913ec974Sbeck 	 * c1 holds the carry bits
830913ec974Sbeck 	 */
831913ec974Sbeck 	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
8322bd9bb84Sjsing 	if (c1) {
833913ec974Sbeck 		p = &(r[n + n2]);
834913ec974Sbeck 		lo= *p;
835913ec974Sbeck 		ln = (lo + c1)&BN_MASK2;
836913ec974Sbeck 		*p = ln;
8375b37fcf3Sryker 
838913ec974Sbeck 		/* The overflow will stop before we over write
839913ec974Sbeck 		 * words we should not overwrite */
8402bd9bb84Sjsing 		if (ln < (BN_ULONG)c1) {
841913ec974Sbeck 			do {
842913ec974Sbeck 				p++;
843913ec974Sbeck 				lo= *p;
844913ec974Sbeck 				ln = (lo + 1) & BN_MASK2;
845913ec974Sbeck 				*p = ln;
846913ec974Sbeck 			} while (ln == 0);
847913ec974Sbeck 		}
848913ec974Sbeck 	}
849913ec974Sbeck }
850ba5406e9Sbeck #endif /* BN_RECURSION */
851913ec974Sbeck 
8529554b5edSjsing #ifndef HAVE_BN_MUL
8539554b5edSjsing #ifndef BN_RECURSION
8542bd9bb84Sjsing int
8559554b5edSjsing bn_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, int rn, BN_CTX *ctx)
856913ec974Sbeck {
8579554b5edSjsing 	bn_mul_normal(r->d, a->d, a->top, b->d, b->top);
8589554b5edSjsing 
8599554b5edSjsing 	return 1;
8609554b5edSjsing }
8619554b5edSjsing 
8629554b5edSjsing #else /* BN_RECURSION */
8639554b5edSjsing int
8649554b5edSjsing bn_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, int rn, BN_CTX *ctx)
8659554b5edSjsing {
8664fcf65c5Sdjm 	BIGNUM *t = NULL;
8679554b5edSjsing 	int al, bl, i, k;
8689554b5edSjsing 	int j = 0;
8699554b5edSjsing 	int ret = 0;
870913ec974Sbeck 
8719554b5edSjsing 	BN_CTX_start(ctx);
872913ec974Sbeck 
873913ec974Sbeck 	al = a->top;
874913ec974Sbeck 	bl = b->top;
875913ec974Sbeck 
876ba5406e9Sbeck 	i = al - bl;
8779554b5edSjsing 
8782bd9bb84Sjsing 	if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) {
8792bd9bb84Sjsing 		if (i >= -1 && i <= 1) {
8804fcf65c5Sdjm 			/* Find out the power of two lower or equal
8814fcf65c5Sdjm 			   to the longest of the two numbers */
8822bd9bb84Sjsing 			if (i >= 0) {
8834fcf65c5Sdjm 				j = BN_num_bits_word((BN_ULONG)al);
8844fcf65c5Sdjm 			}
8852bd9bb84Sjsing 			if (i == -1) {
8864fcf65c5Sdjm 				j = BN_num_bits_word((BN_ULONG)bl);
8874fcf65c5Sdjm 			}
8884fcf65c5Sdjm 			j = 1 << (j - 1);
8894fcf65c5Sdjm 			assert(j <= al || j <= bl);
8904fcf65c5Sdjm 			k = j + j;
891aa389b8cSjsing 			if ((t = BN_CTX_get(ctx)) == NULL)
8920a5d6edeSdjm 				goto err;
8932bd9bb84Sjsing 			if (al > j || bl > j) {
894e729bd5aSjsing 				if (!bn_wexpand(t, k * 4))
8952bd9bb84Sjsing 					goto err;
8969554b5edSjsing 				if (!bn_wexpand(r, k * 4))
8972bd9bb84Sjsing 					goto err;
8989554b5edSjsing 				bn_mul_part_recursive(r->d, a->d, b->d,
8994fcf65c5Sdjm 				    j, al - j, bl - j, t->d);
9004fcf65c5Sdjm 			}
9014fcf65c5Sdjm 			else	/* al <= j || bl <= j */
9024fcf65c5Sdjm 			{
903e729bd5aSjsing 				if (!bn_wexpand(t, k * 2))
9042bd9bb84Sjsing 					goto err;
9059554b5edSjsing 				if (!bn_wexpand(r, k * 2))
9062bd9bb84Sjsing 					goto err;
9079554b5edSjsing 				bn_mul_recursive(r->d, a->d, b->d,
9084fcf65c5Sdjm 				    j, al - j, bl - j, t->d);
9094fcf65c5Sdjm 			}
9109554b5edSjsing 			r->top = rn;
9114fcf65c5Sdjm 			goto end;
9124fcf65c5Sdjm 		}
9134fcf65c5Sdjm #if 0
9142bd9bb84Sjsing 		if (i == 1 && !BN_get_flags(b, BN_FLG_STATIC_DATA)) {
9154fcf65c5Sdjm 			BIGNUM *tmp_bn = (BIGNUM *)b;
916e729bd5aSjsing 			if (!bn_wexpand(tmp_bn, al))
9172bd9bb84Sjsing 				goto err;
9184fcf65c5Sdjm 			tmp_bn->d[bl] = 0;
919913ec974Sbeck 			bl++;
920ba5406e9Sbeck 			i--;
9212bd9bb84Sjsing 		} else if (i == -1 && !BN_get_flags(a, BN_FLG_STATIC_DATA)) {
9224fcf65c5Sdjm 			BIGNUM *tmp_bn = (BIGNUM *)a;
923e729bd5aSjsing 			if (!bn_wexpand(tmp_bn, bl))
9242bd9bb84Sjsing 				goto err;
9254fcf65c5Sdjm 			tmp_bn->d[al] = 0;
926913ec974Sbeck 			al++;
927ba5406e9Sbeck 			i++;
928913ec974Sbeck 		}
9292bd9bb84Sjsing 		if (i == 0) {
930ba5406e9Sbeck 			/* symmetric and > 4 */
931913ec974Sbeck 			/* 16 or larger */
932913ec974Sbeck 			j = BN_num_bits_word((BN_ULONG)al);
933913ec974Sbeck 			j = 1 << (j - 1);
934913ec974Sbeck 			k = j + j;
935aa389b8cSjsing 			if ((t = BN_CTX_get(ctx)) == NULL)
936aa389b8cSjsing 				goto err;
937913ec974Sbeck 			if (al == j) /* exact multiple */
938913ec974Sbeck 			{
939e729bd5aSjsing 				if (!bn_wexpand(t, k * 2))
9402bd9bb84Sjsing 					goto err;
9419554b5edSjsing 				if (!bn_wexpand(r, k * 2))
9422bd9bb84Sjsing 					goto err;
9439554b5edSjsing 				bn_mul_recursive(r->d, a->d, b->d, al, t->d);
9442bd9bb84Sjsing 			} else {
945e729bd5aSjsing 				if (!bn_wexpand(t, k * 4))
9462bd9bb84Sjsing 					goto err;
9479554b5edSjsing 				if (!bn_wexpand(r, k * 4))
9482bd9bb84Sjsing 					goto err;
9499554b5edSjsing 				bn_mul_part_recursive(r->d, a->d, b->d,
9502bd9bb84Sjsing 				    al - j, j, t->d);
951913ec974Sbeck 			}
9529554b5edSjsing 			r->top = top;
953ba5406e9Sbeck 			goto end;
954ba5406e9Sbeck 		}
9554fcf65c5Sdjm #endif
956767fe2ffSmarkus 	}
957ba5406e9Sbeck 
9589554b5edSjsing 	bn_mul_normal(r->d, a->d, al, b->d, bl);
9599554b5edSjsing 
960913ec974Sbeck  end:
961ba5406e9Sbeck 	ret = 1;
962ba5406e9Sbeck  err:
963ba5406e9Sbeck 	BN_CTX_end(ctx);
9649554b5edSjsing 
9659554b5edSjsing 	return ret;
9669554b5edSjsing }
9679554b5edSjsing #endif /* BN_RECURSION */
9689554b5edSjsing #endif /* HAVE_BN_MUL */
9699554b5edSjsing 
9709554b5edSjsing int
9719554b5edSjsing BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
9729554b5edSjsing {
9739554b5edSjsing 	BIGNUM *rr;
9749554b5edSjsing 	int rn;
9759554b5edSjsing 	int ret = 0;
9769554b5edSjsing 
9779554b5edSjsing 	BN_CTX_start(ctx);
9789554b5edSjsing 
9799554b5edSjsing 	if (BN_is_zero(a) || BN_is_zero(b)) {
9809554b5edSjsing 		BN_zero(r);
9819554b5edSjsing 		goto done;
9829554b5edSjsing 	}
9839554b5edSjsing 
9849554b5edSjsing 	rr = r;
9859554b5edSjsing 	if (rr == a || rr == b)
9869554b5edSjsing 		rr = BN_CTX_get(ctx);
9879554b5edSjsing 	if (rr == NULL)
9889554b5edSjsing 		goto err;
9899554b5edSjsing 
9909554b5edSjsing 	rn = a->top + b->top;
9919554b5edSjsing 	if (rn < a->top)
9929554b5edSjsing 		goto err;
9939554b5edSjsing 	if (!bn_wexpand(rr, rn))
9949554b5edSjsing 		goto err;
9959554b5edSjsing 
9969554b5edSjsing 	if (a->top == 4 && b->top == 4) {
9979554b5edSjsing 		bn_mul_comba4(rr->d, a->d, b->d);
9989554b5edSjsing 	} else if (a->top == 8 && b->top == 8) {
9999554b5edSjsing 		bn_mul_comba8(rr->d, a->d, b->d);
10009554b5edSjsing 	} else {
10019554b5edSjsing 		if (!bn_mul(rr, a, b, rn, ctx))
10029554b5edSjsing 			goto err;
10039554b5edSjsing 	}
10049554b5edSjsing 
10059554b5edSjsing 	rr->top = rn;
10069554b5edSjsing 	rr->neg = a->neg ^ b->neg;
10079554b5edSjsing 
10089554b5edSjsing 	bn_correct_top(rr);
10099554b5edSjsing 
10109554b5edSjsing 	if (r != rr)
10119554b5edSjsing 		BN_copy(r, rr);
10129554b5edSjsing  done:
10139554b5edSjsing 	ret = 1;
10149554b5edSjsing  err:
10159554b5edSjsing 	BN_CTX_end(ctx);
10169554b5edSjsing 
10179554b5edSjsing 	return ret;
1018913ec974Sbeck }
1019