xref: /openbsd/lib/libcrypto/bn/bn_mul.c (revision 363923ba)
1*363923baStb /* $OpenBSD: bn_mul.c,v 1.35 2023/03/27 10:22:47 tb Exp $ */
25b37fcf3Sryker /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
35b37fcf3Sryker  * All rights reserved.
45b37fcf3Sryker  *
55b37fcf3Sryker  * This package is an SSL implementation written
65b37fcf3Sryker  * by Eric Young (eay@cryptsoft.com).
75b37fcf3Sryker  * The implementation was written so as to conform with Netscapes SSL.
85b37fcf3Sryker  *
95b37fcf3Sryker  * This library is free for commercial and non-commercial use as long as
105b37fcf3Sryker  * the following conditions are aheared to.  The following conditions
115b37fcf3Sryker  * apply to all code found in this distribution, be it the RC4, RSA,
125b37fcf3Sryker  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
135b37fcf3Sryker  * included with this distribution is covered by the same copyright terms
145b37fcf3Sryker  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
155b37fcf3Sryker  *
165b37fcf3Sryker  * Copyright remains Eric Young's, and as such any Copyright notices in
175b37fcf3Sryker  * the code are not to be removed.
185b37fcf3Sryker  * If this package is used in a product, Eric Young should be given attribution
195b37fcf3Sryker  * as the author of the parts of the library used.
205b37fcf3Sryker  * This can be in the form of a textual message at program startup or
215b37fcf3Sryker  * in documentation (online or textual) provided with the package.
225b37fcf3Sryker  *
235b37fcf3Sryker  * Redistribution and use in source and binary forms, with or without
245b37fcf3Sryker  * modification, are permitted provided that the following conditions
255b37fcf3Sryker  * are met:
265b37fcf3Sryker  * 1. Redistributions of source code must retain the copyright
275b37fcf3Sryker  *    notice, this list of conditions and the following disclaimer.
285b37fcf3Sryker  * 2. Redistributions in binary form must reproduce the above copyright
295b37fcf3Sryker  *    notice, this list of conditions and the following disclaimer in the
305b37fcf3Sryker  *    documentation and/or other materials provided with the distribution.
315b37fcf3Sryker  * 3. All advertising materials mentioning features or use of this software
325b37fcf3Sryker  *    must display the following acknowledgement:
335b37fcf3Sryker  *    "This product includes cryptographic software written by
345b37fcf3Sryker  *     Eric Young (eay@cryptsoft.com)"
355b37fcf3Sryker  *    The word 'cryptographic' can be left out if the rouines from the library
365b37fcf3Sryker  *    being used are not cryptographic related :-).
375b37fcf3Sryker  * 4. If you include any Windows specific code (or a derivative thereof) from
385b37fcf3Sryker  *    the apps directory (application code) you must include an acknowledgement:
395b37fcf3Sryker  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
405b37fcf3Sryker  *
415b37fcf3Sryker  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
425b37fcf3Sryker  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
435b37fcf3Sryker  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
445b37fcf3Sryker  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
455b37fcf3Sryker  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
465b37fcf3Sryker  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
475b37fcf3Sryker  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
485b37fcf3Sryker  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
495b37fcf3Sryker  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
505b37fcf3Sryker  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
515b37fcf3Sryker  * SUCH DAMAGE.
525b37fcf3Sryker  *
535b37fcf3Sryker  * The licence and distribution terms for any publically available version or
545b37fcf3Sryker  * derivative of this code cannot be changed.  i.e. this code cannot simply be
555b37fcf3Sryker  * copied and put under another distribution licence
565b37fcf3Sryker  * [including the GNU Public Licence.]
575b37fcf3Sryker  */
585b37fcf3Sryker 
594fcf65c5Sdjm #include <assert.h>
60a8913c44Sjsing #include <stdio.h>
61a8913c44Sjsing #include <string.h>
62a8913c44Sjsing 
638cf4d6a6Sjsing #include <openssl/opensslconf.h>
648cf4d6a6Sjsing 
65de344ea3Sjsing #include "bn_arch.h"
66df1d0ed5Sjsing #include "bn_internal.h"
67c9675a23Stb #include "bn_local.h"
685b37fcf3Sryker 
69df1d0ed5Sjsing /*
70df1d0ed5Sjsing  * bn_mul_comba4() computes r[] = a[] * b[] using Comba multiplication
71df1d0ed5Sjsing  * (https://everything2.com/title/Comba+multiplication), where a and b are both
72df1d0ed5Sjsing  * four word arrays, producing an eight word array result.
73df1d0ed5Sjsing  */
74de344ea3Sjsing #ifndef HAVE_BN_MUL_COMBA4
75de344ea3Sjsing void
76de344ea3Sjsing bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
77de344ea3Sjsing {
78df1d0ed5Sjsing 	BN_ULONG c0, c1, c2;
79de344ea3Sjsing 
80df1d0ed5Sjsing 	bn_mulw_addtw(a[0], b[0],  0,  0,  0, &c2, &c1, &r[0]);
81df1d0ed5Sjsing 
82df1d0ed5Sjsing 	bn_mulw_addtw(a[0], b[1],  0, c2, c1, &c2, &c1, &c0);
83df1d0ed5Sjsing 	bn_mulw_addtw(a[1], b[0], c2, c1, c0, &c2, &c1, &r[1]);
84df1d0ed5Sjsing 
85df1d0ed5Sjsing 	bn_mulw_addtw(a[2], b[0],  0, c2, c1, &c2, &c1, &c0);
86df1d0ed5Sjsing 	bn_mulw_addtw(a[1], b[1], c2, c1, c0, &c2, &c1, &c0);
87df1d0ed5Sjsing 	bn_mulw_addtw(a[0], b[2], c2, c1, c0, &c2, &c1, &r[2]);
88df1d0ed5Sjsing 
89df1d0ed5Sjsing 	bn_mulw_addtw(a[0], b[3],  0, c2, c1, &c2, &c1, &c0);
90df1d0ed5Sjsing 	bn_mulw_addtw(a[1], b[2], c2, c1, c0, &c2, &c1, &c0);
91df1d0ed5Sjsing 	bn_mulw_addtw(a[2], b[1], c2, c1, c0, &c2, &c1, &c0);
92df1d0ed5Sjsing 	bn_mulw_addtw(a[3], b[0], c2, c1, c0, &c2, &c1, &r[3]);
93df1d0ed5Sjsing 
94df1d0ed5Sjsing 	bn_mulw_addtw(a[3], b[1],  0, c2, c1, &c2, &c1, &c0);
95df1d0ed5Sjsing 	bn_mulw_addtw(a[2], b[2], c2, c1, c0, &c2, &c1, &c0);
96df1d0ed5Sjsing 	bn_mulw_addtw(a[1], b[3], c2, c1, c0, &c2, &c1, &r[4]);
97df1d0ed5Sjsing 
98df1d0ed5Sjsing 	bn_mulw_addtw(a[2], b[3],  0, c2, c1, &c2, &c1, &c0);
99df1d0ed5Sjsing 	bn_mulw_addtw(a[3], b[2], c2, c1, c0, &c2, &c1, &r[5]);
100df1d0ed5Sjsing 
101df1d0ed5Sjsing 	bn_mulw_addtw(a[3], b[3],  0, c2, c1, &c2, &r[7], &r[6]);
102de344ea3Sjsing }
103de344ea3Sjsing #endif
104de344ea3Sjsing 
105df1d0ed5Sjsing /*
106df1d0ed5Sjsing  * bn_mul_comba8() computes r[] = a[] * b[] using Comba multiplication
107df1d0ed5Sjsing  * (https://everything2.com/title/Comba+multiplication), where a and b are both
108df1d0ed5Sjsing  * eight word arrays, producing a 16 word array result.
109df1d0ed5Sjsing  */
110de344ea3Sjsing #ifndef HAVE_BN_MUL_COMBA8
111de344ea3Sjsing void
112de344ea3Sjsing bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
113de344ea3Sjsing {
114df1d0ed5Sjsing 	BN_ULONG c0, c1, c2;
115de344ea3Sjsing 
116df1d0ed5Sjsing 	bn_mulw_addtw(a[0], b[0],  0,  0,  0, &c2, &c1, &r[0]);
117df1d0ed5Sjsing 
118df1d0ed5Sjsing 	bn_mulw_addtw(a[0], b[1],  0, c2, c1, &c2, &c1, &c0);
119df1d0ed5Sjsing 	bn_mulw_addtw(a[1], b[0], c2, c1, c0, &c2, &c1, &r[1]);
120df1d0ed5Sjsing 
121df1d0ed5Sjsing 	bn_mulw_addtw(a[2], b[0],  0, c2, c1, &c2, &c1, &c0);
122df1d0ed5Sjsing 	bn_mulw_addtw(a[1], b[1], c2, c1, c0, &c2, &c1, &c0);
123df1d0ed5Sjsing 	bn_mulw_addtw(a[0], b[2], c2, c1, c0, &c2, &c1, &r[2]);
124df1d0ed5Sjsing 
125df1d0ed5Sjsing 	bn_mulw_addtw(a[0], b[3],  0, c2, c1, &c2, &c1, &c0);
126df1d0ed5Sjsing 	bn_mulw_addtw(a[1], b[2], c2, c1, c0, &c2, &c1, &c0);
127df1d0ed5Sjsing 	bn_mulw_addtw(a[2], b[1], c2, c1, c0, &c2, &c1, &c0);
128df1d0ed5Sjsing 	bn_mulw_addtw(a[3], b[0], c2, c1, c0, &c2, &c1, &r[3]);
129df1d0ed5Sjsing 
130df1d0ed5Sjsing 	bn_mulw_addtw(a[4], b[0],  0, c2, c1, &c2, &c1, &c0);
131df1d0ed5Sjsing 	bn_mulw_addtw(a[3], b[1], c2, c1, c0, &c2, &c1, &c0);
132df1d0ed5Sjsing 	bn_mulw_addtw(a[2], b[2], c2, c1, c0, &c2, &c1, &c0);
133df1d0ed5Sjsing 	bn_mulw_addtw(a[1], b[3], c2, c1, c0, &c2, &c1, &c0);
134df1d0ed5Sjsing 	bn_mulw_addtw(a[0], b[4], c2, c1, c0, &c2, &c1, &r[4]);
135df1d0ed5Sjsing 
136df1d0ed5Sjsing 	bn_mulw_addtw(a[0], b[5],  0, c2, c1, &c2, &c1, &c0);
137df1d0ed5Sjsing 	bn_mulw_addtw(a[1], b[4], c2, c1, c0, &c2, &c1, &c0);
138df1d0ed5Sjsing 	bn_mulw_addtw(a[2], b[3], c2, c1, c0, &c2, &c1, &c0);
139df1d0ed5Sjsing 	bn_mulw_addtw(a[3], b[2], c2, c1, c0, &c2, &c1, &c0);
140df1d0ed5Sjsing 	bn_mulw_addtw(a[4], b[1], c2, c1, c0, &c2, &c1, &c0);
141df1d0ed5Sjsing 	bn_mulw_addtw(a[5], b[0], c2, c1, c0, &c2, &c1, &r[5]);
142df1d0ed5Sjsing 
143df1d0ed5Sjsing 	bn_mulw_addtw(a[6], b[0],  0, c2, c1, &c2, &c1, &c0);
144df1d0ed5Sjsing 	bn_mulw_addtw(a[5], b[1], c2, c1, c0, &c2, &c1, &c0);
145df1d0ed5Sjsing 	bn_mulw_addtw(a[4], b[2], c2, c1, c0, &c2, &c1, &c0);
146df1d0ed5Sjsing 	bn_mulw_addtw(a[3], b[3], c2, c1, c0, &c2, &c1, &c0);
147df1d0ed5Sjsing 	bn_mulw_addtw(a[2], b[4], c2, c1, c0, &c2, &c1, &c0);
148df1d0ed5Sjsing 	bn_mulw_addtw(a[1], b[5], c2, c1, c0, &c2, &c1, &c0);
149df1d0ed5Sjsing 	bn_mulw_addtw(a[0], b[6], c2, c1, c0, &c2, &c1, &r[6]);
150df1d0ed5Sjsing 
151df1d0ed5Sjsing 	bn_mulw_addtw(a[0], b[7],  0, c2, c1, &c2, &c1, &c0);
152df1d0ed5Sjsing 	bn_mulw_addtw(a[1], b[6], c2, c1, c0, &c2, &c1, &c0);
153df1d0ed5Sjsing 	bn_mulw_addtw(a[2], b[5], c2, c1, c0, &c2, &c1, &c0);
154df1d0ed5Sjsing 	bn_mulw_addtw(a[3], b[4], c2, c1, c0, &c2, &c1, &c0);
155df1d0ed5Sjsing 	bn_mulw_addtw(a[4], b[3], c2, c1, c0, &c2, &c1, &c0);
156df1d0ed5Sjsing 	bn_mulw_addtw(a[5], b[2], c2, c1, c0, &c2, &c1, &c0);
157df1d0ed5Sjsing 	bn_mulw_addtw(a[6], b[1], c2, c1, c0, &c2, &c1, &c0);
158df1d0ed5Sjsing 	bn_mulw_addtw(a[7], b[0], c2, c1, c0, &c2, &c1, &r[7]);
159df1d0ed5Sjsing 
160df1d0ed5Sjsing 	bn_mulw_addtw(a[7], b[1],  0, c2, c1, &c2, &c1, &c0);
161df1d0ed5Sjsing 	bn_mulw_addtw(a[6], b[2], c2, c1, c0, &c2, &c1, &c0);
162df1d0ed5Sjsing 	bn_mulw_addtw(a[5], b[3], c2, c1, c0, &c2, &c1, &c0);
163df1d0ed5Sjsing 	bn_mulw_addtw(a[4], b[4], c2, c1, c0, &c2, &c1, &c0);
164df1d0ed5Sjsing 	bn_mulw_addtw(a[3], b[5], c2, c1, c0, &c2, &c1, &c0);
165df1d0ed5Sjsing 	bn_mulw_addtw(a[2], b[6], c2, c1, c0, &c2, &c1, &c0);
166df1d0ed5Sjsing 	bn_mulw_addtw(a[1], b[7], c2, c1, c0, &c2, &c1, &r[8]);
167df1d0ed5Sjsing 
168df1d0ed5Sjsing 	bn_mulw_addtw(a[2], b[7],  0, c2, c1, &c2, &c1, &c0);
169df1d0ed5Sjsing 	bn_mulw_addtw(a[3], b[6], c2, c1, c0, &c2, &c1, &c0);
170df1d0ed5Sjsing 	bn_mulw_addtw(a[4], b[5], c2, c1, c0, &c2, &c1, &c0);
171df1d0ed5Sjsing 	bn_mulw_addtw(a[5], b[4], c2, c1, c0, &c2, &c1, &c0);
172df1d0ed5Sjsing 	bn_mulw_addtw(a[6], b[3], c2, c1, c0, &c2, &c1, &c0);
173df1d0ed5Sjsing 	bn_mulw_addtw(a[7], b[2], c2, c1, c0, &c2, &c1, &r[9]);
174df1d0ed5Sjsing 
175df1d0ed5Sjsing 	bn_mulw_addtw(a[7], b[3],  0, c2, c1, &c2, &c1, &c0);
176df1d0ed5Sjsing 	bn_mulw_addtw(a[6], b[4], c2, c1, c0, &c2, &c1, &c0);
177df1d0ed5Sjsing 	bn_mulw_addtw(a[5], b[5], c2, c1, c0, &c2, &c1, &c0);
178df1d0ed5Sjsing 	bn_mulw_addtw(a[4], b[6], c2, c1, c0, &c2, &c1, &c0);
179df1d0ed5Sjsing 	bn_mulw_addtw(a[3], b[7], c2, c1, c0, &c2, &c1, &r[10]);
180df1d0ed5Sjsing 
181df1d0ed5Sjsing 	bn_mulw_addtw(a[4], b[7],  0, c2, c1, &c2, &c1, &c0);
182df1d0ed5Sjsing 	bn_mulw_addtw(a[5], b[6], c2, c1, c0, &c2, &c1, &c0);
183df1d0ed5Sjsing 	bn_mulw_addtw(a[6], b[5], c2, c1, c0, &c2, &c1, &c0);
184df1d0ed5Sjsing 	bn_mulw_addtw(a[7], b[4], c2, c1, c0, &c2, &c1, &r[11]);
185df1d0ed5Sjsing 
186df1d0ed5Sjsing 	bn_mulw_addtw(a[7], b[5],  0, c2, c1, &c2, &c1, &c0);
187df1d0ed5Sjsing 	bn_mulw_addtw(a[6], b[6], c2, c1, c0, &c2, &c1, &c0);
188df1d0ed5Sjsing 	bn_mulw_addtw(a[5], b[7], c2, c1, c0, &c2, &c1, &r[12]);
189df1d0ed5Sjsing 
190df1d0ed5Sjsing 	bn_mulw_addtw(a[6], b[7],  0, c2, c1, &c2, &c1, &c0);
191df1d0ed5Sjsing 	bn_mulw_addtw(a[7], b[6], c2, c1, c0, &c2, &c1, &r[13]);
192df1d0ed5Sjsing 
193df1d0ed5Sjsing 	bn_mulw_addtw(a[7], b[7],  0, c2, c1, &c2, &r[15], &r[14]);
194de344ea3Sjsing }
195de344ea3Sjsing #endif
196de344ea3Sjsing 
197df1d0ed5Sjsing /*
198df1d0ed5Sjsing  * bn_mul_words() computes (carry:r[i]) = a[i] * w + carry, where a is an array
199df1d0ed5Sjsing  * of words and w is a single word. This should really be called bn_mulw_words()
200df1d0ed5Sjsing  * since only one input is an array. This is used as a step in the multiplication
201df1d0ed5Sjsing  * of word arrays.
202df1d0ed5Sjsing  */
2038889fb99Sjsing #ifndef HAVE_BN_MUL_WORDS
2048889fb99Sjsing BN_ULONG
205df1d0ed5Sjsing bn_mul_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w)
2068889fb99Sjsing {
2078889fb99Sjsing 	BN_ULONG carry = 0;
2088889fb99Sjsing 
2098889fb99Sjsing 	assert(num >= 0);
2108889fb99Sjsing 	if (num <= 0)
211df1d0ed5Sjsing 		return 0;
2128889fb99Sjsing 
2138889fb99Sjsing #ifndef OPENSSL_SMALL_FOOTPRINT
2148889fb99Sjsing 	while (num & ~3) {
215df1d0ed5Sjsing 		bn_mulw_addw(a[0], w, carry, &carry, &r[0]);
216df1d0ed5Sjsing 		bn_mulw_addw(a[1], w, carry, &carry, &r[1]);
217df1d0ed5Sjsing 		bn_mulw_addw(a[2], w, carry, &carry, &r[2]);
218df1d0ed5Sjsing 		bn_mulw_addw(a[3], w, carry, &carry, &r[3]);
219df1d0ed5Sjsing 		a += 4;
220df1d0ed5Sjsing 		r += 4;
2218889fb99Sjsing 		num -= 4;
2228889fb99Sjsing 	}
2238889fb99Sjsing #endif
2248889fb99Sjsing 	while (num) {
225df1d0ed5Sjsing 		bn_mulw_addw(a[0], w, carry, &carry, &r[0]);
226df1d0ed5Sjsing 		a++;
227df1d0ed5Sjsing 		r++;
2288889fb99Sjsing 		num--;
2298889fb99Sjsing 	}
230df1d0ed5Sjsing 	return carry;
2318889fb99Sjsing }
2328889fb99Sjsing #endif
2338889fb99Sjsing 
23403b14b3bSjsing /*
23503b14b3bSjsing  * bn_mul_add_words() computes (carry:r[i]) = a[i] * w + r[i] + carry, where
23603b14b3bSjsing  * a is an array of words and w is a single word. This should really be called
23703b14b3bSjsing  * bn_mulw_add_words() since only one input is an array. This is used as a step
23803b14b3bSjsing  * in the multiplication of word arrays.
23903b14b3bSjsing  */
24003b14b3bSjsing #ifndef HAVE_BN_MUL_ADD_WORDS
24103b14b3bSjsing BN_ULONG
24203b14b3bSjsing bn_mul_add_words(BN_ULONG *r, const BN_ULONG *a, int num, BN_ULONG w)
24303b14b3bSjsing {
24403b14b3bSjsing 	BN_ULONG carry = 0;
24503b14b3bSjsing 
24603b14b3bSjsing 	assert(num >= 0);
24703b14b3bSjsing 	if (num <= 0)
24803b14b3bSjsing 		return 0;
24903b14b3bSjsing 
25003b14b3bSjsing #ifndef OPENSSL_SMALL_FOOTPRINT
25103b14b3bSjsing 	while (num & ~3) {
25203b14b3bSjsing 		bn_mulw_addw_addw(a[0], w, r[0], carry, &carry, &r[0]);
25303b14b3bSjsing 		bn_mulw_addw_addw(a[1], w, r[1], carry, &carry, &r[1]);
25403b14b3bSjsing 		bn_mulw_addw_addw(a[2], w, r[2], carry, &carry, &r[2]);
25503b14b3bSjsing 		bn_mulw_addw_addw(a[3], w, r[3], carry, &carry, &r[3]);
25603b14b3bSjsing 		a += 4;
25703b14b3bSjsing 		r += 4;
25803b14b3bSjsing 		num -= 4;
25903b14b3bSjsing 	}
26003b14b3bSjsing #endif
26103b14b3bSjsing 	while (num) {
26203b14b3bSjsing 		bn_mulw_addw_addw(a[0], w, r[0], carry, &carry, &r[0]);
26303b14b3bSjsing 		a++;
26403b14b3bSjsing 		r++;
26503b14b3bSjsing 		num--;
26603b14b3bSjsing 	}
26703b14b3bSjsing 
26803b14b3bSjsing 	return carry;
26903b14b3bSjsing }
27003b14b3bSjsing #endif
27103b14b3bSjsing 
272e8d08ebaSjsing void
273e8d08ebaSjsing bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
274e8d08ebaSjsing {
275e8d08ebaSjsing 	BN_ULONG *rr;
276e8d08ebaSjsing 
277e8d08ebaSjsing 
278e8d08ebaSjsing 	if (na < nb) {
279e8d08ebaSjsing 		int itmp;
280e8d08ebaSjsing 		BN_ULONG *ltmp;
281e8d08ebaSjsing 
282e8d08ebaSjsing 		itmp = na;
283e8d08ebaSjsing 		na = nb;
284e8d08ebaSjsing 		nb = itmp;
285e8d08ebaSjsing 		ltmp = a;
286e8d08ebaSjsing 		a = b;
287e8d08ebaSjsing 		b = ltmp;
288e8d08ebaSjsing 
289e8d08ebaSjsing 	}
290e8d08ebaSjsing 	rr = &(r[na]);
291e8d08ebaSjsing 	if (nb <= 0) {
292e8d08ebaSjsing 		(void)bn_mul_words(r, a, na, 0);
293e8d08ebaSjsing 		return;
294e8d08ebaSjsing 	} else
295e8d08ebaSjsing 		rr[0] = bn_mul_words(r, a, na, b[0]);
296e8d08ebaSjsing 
297e8d08ebaSjsing 	for (;;) {
298e8d08ebaSjsing 		if (--nb <= 0)
299e8d08ebaSjsing 			return;
300e8d08ebaSjsing 		rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
301e8d08ebaSjsing 		if (--nb <= 0)
302e8d08ebaSjsing 			return;
303e8d08ebaSjsing 		rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
304e8d08ebaSjsing 		if (--nb <= 0)
305e8d08ebaSjsing 			return;
306e8d08ebaSjsing 		rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
307e8d08ebaSjsing 		if (--nb <= 0)
308e8d08ebaSjsing 			return;
309e8d08ebaSjsing 		rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
310e8d08ebaSjsing 		rr += 4;
311e8d08ebaSjsing 		r += 4;
312e8d08ebaSjsing 		b += 4;
313e8d08ebaSjsing 	}
314e8d08ebaSjsing }
315e8d08ebaSjsing 
316913ec974Sbeck #ifdef BN_RECURSION
317f6e3f262Sbeck /* Karatsuba recursive multiplication algorithm
318f6e3f262Sbeck  * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
319f6e3f262Sbeck 
320913ec974Sbeck /* r is 2*n2 words in size,
321913ec974Sbeck  * a and b are both n2 words in size.
322913ec974Sbeck  * n2 must be a power of 2.
323913ec974Sbeck  * We multiply and return the result.
324913ec974Sbeck  * t must be 2*n2 words in size
325ba5406e9Sbeck  * We calculate
326913ec974Sbeck  * a[0]*b[0]
327913ec974Sbeck  * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
328913ec974Sbeck  * a[1]*b[1]
329913ec974Sbeck  */
3304fcf65c5Sdjm /* dnX may not be positive, but n2/2+dnX has to be */
3312bd9bb84Sjsing void
3322bd9bb84Sjsing bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, int dna,
3332bd9bb84Sjsing     int dnb, BN_ULONG *t)
3345b37fcf3Sryker {
335913ec974Sbeck 	int n = n2 / 2, c1, c2;
3364fcf65c5Sdjm 	int tna = n + dna, tnb = n + dnb;
337913ec974Sbeck 	unsigned int neg, zero;
338913ec974Sbeck 	BN_ULONG ln, lo, *p;
3395b37fcf3Sryker 
340913ec974Sbeck # ifdef BN_MUL_COMBA
341ba5406e9Sbeck #  if 0
3422bd9bb84Sjsing 	if (n2 == 4) {
343913ec974Sbeck 		bn_mul_comba4(r, a, b);
344913ec974Sbeck 		return;
345913ec974Sbeck 	}
346ba5406e9Sbeck #  endif
3474fcf65c5Sdjm 	/* Only call bn_mul_comba 8 if n2 == 8 and the
3484fcf65c5Sdjm 	 * two arrays are complete [steve]
3494fcf65c5Sdjm 	 */
3502bd9bb84Sjsing 	if (n2 == 8 && dna == 0 && dnb == 0) {
351913ec974Sbeck 		bn_mul_comba8(r, a, b);
352913ec974Sbeck 		return;
353913ec974Sbeck 	}
354ba5406e9Sbeck # endif /* BN_MUL_COMBA */
3554fcf65c5Sdjm 	/* Else do normal multiply */
3562bd9bb84Sjsing 	if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
3574fcf65c5Sdjm 		bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
3584fcf65c5Sdjm 		if ((dna + dnb) < 0)
3594fcf65c5Sdjm 			memset(&r[2*n2 + dna + dnb], 0,
3604fcf65c5Sdjm 			    sizeof(BN_ULONG) * -(dna + dnb));
361913ec974Sbeck 		return;
362913ec974Sbeck 	}
363913ec974Sbeck 	/* r=(a[0]-a[1])*(b[1]-b[0]) */
3644fcf65c5Sdjm 	c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
3654fcf65c5Sdjm 	c2 = bn_cmp_part_words(&(b[n]), b,tnb, tnb - n);
366913ec974Sbeck 	zero = neg = 0;
3672bd9bb84Sjsing 	switch (c1 * 3 + c2) {
368913ec974Sbeck 	case -4:
369a70818d0Sjsing 		bn_sub(t, n, &a[n], tna, a, n);	/* - */
370a70818d0Sjsing 		bn_sub(&t[n], n, b, n, &b[n], tnb);	/* - */
371913ec974Sbeck 		break;
372913ec974Sbeck 	case -3:
373913ec974Sbeck 		zero = 1;
374913ec974Sbeck 		break;
375913ec974Sbeck 	case -2:
376a70818d0Sjsing 		bn_sub(t, n, &a[n], tna, a, n);	/* - */
377a70818d0Sjsing 		bn_sub(&t[n], n, &b[n], tnb, b, n);	/* + */
378913ec974Sbeck 		neg = 1;
379913ec974Sbeck 		break;
380913ec974Sbeck 	case -1:
381913ec974Sbeck 	case 0:
382913ec974Sbeck 	case 1:
383913ec974Sbeck 		zero = 1;
384913ec974Sbeck 		break;
385913ec974Sbeck 	case 2:
386a70818d0Sjsing 		bn_sub(t, n, a, n, &a[n], tna);	/* + */
387a70818d0Sjsing 		bn_sub(&t[n], n, b, n, &b[n], tnb);	/* - */
388913ec974Sbeck 		neg = 1;
389913ec974Sbeck 		break;
390913ec974Sbeck 	case 3:
391913ec974Sbeck 		zero = 1;
392913ec974Sbeck 		break;
393913ec974Sbeck 	case 4:
394a70818d0Sjsing 		bn_sub(t, n, a, n, &a[n], tna);
395a70818d0Sjsing 		bn_sub(&t[n], n, &b[n], tnb, b, n);
396913ec974Sbeck 		break;
3975b37fcf3Sryker 	}
3985b37fcf3Sryker 
399913ec974Sbeck # ifdef BN_MUL_COMBA
4004fcf65c5Sdjm 	if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
4014fcf65c5Sdjm 					       extra args to do this well */
4025b37fcf3Sryker 	{
403913ec974Sbeck 		if (!zero)
404913ec974Sbeck 			bn_mul_comba4(&(t[n2]), t, &(t[n]));
405913ec974Sbeck 		else
406913ec974Sbeck 			memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG));
407913ec974Sbeck 
408913ec974Sbeck 		bn_mul_comba4(r, a, b);
409913ec974Sbeck 		bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
4102bd9bb84Sjsing 	} else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
4114fcf65c5Sdjm 						    take extra args to do this
4124fcf65c5Sdjm 						    well */
413913ec974Sbeck 	{
414913ec974Sbeck 		if (!zero)
415913ec974Sbeck 			bn_mul_comba8(&(t[n2]), t, &(t[n]));
416913ec974Sbeck 		else
417913ec974Sbeck 			memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG));
418913ec974Sbeck 
419913ec974Sbeck 		bn_mul_comba8(r, a, b);
420913ec974Sbeck 		bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
4212bd9bb84Sjsing 	} else
422ba5406e9Sbeck # endif /* BN_MUL_COMBA */
423913ec974Sbeck 	{
424913ec974Sbeck 		p = &(t[n2 * 2]);
425913ec974Sbeck 		if (!zero)
4264fcf65c5Sdjm 			bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
427913ec974Sbeck 		else
428913ec974Sbeck 			memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
4294fcf65c5Sdjm 		bn_mul_recursive(r, a, b, n, 0, 0, p);
4304fcf65c5Sdjm 		bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
4315b37fcf3Sryker 	}
4325b37fcf3Sryker 
433913ec974Sbeck 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
434913ec974Sbeck 	 * r[10] holds (a[0]*b[0])
435913ec974Sbeck 	 * r[32] holds (b[1]*b[1])
436913ec974Sbeck 	 */
4375b37fcf3Sryker 
438913ec974Sbeck 	c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
4395b37fcf3Sryker 
440913ec974Sbeck 	if (neg) /* if t[32] is negative */
4415b37fcf3Sryker 	{
442913ec974Sbeck 		c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
4432bd9bb84Sjsing 	} else {
444913ec974Sbeck 		/* Might have a carry */
445913ec974Sbeck 		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
4465b37fcf3Sryker 	}
4475b37fcf3Sryker 
448913ec974Sbeck 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
449913ec974Sbeck 	 * r[10] holds (a[0]*b[0])
450913ec974Sbeck 	 * r[32] holds (b[1]*b[1])
451913ec974Sbeck 	 * c1 holds the carry bits
452913ec974Sbeck 	 */
453913ec974Sbeck 	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
4542bd9bb84Sjsing 	if (c1) {
455913ec974Sbeck 		p = &(r[n + n2]);
456913ec974Sbeck 		lo= *p;
457913ec974Sbeck 		ln = (lo + c1) & BN_MASK2;
458913ec974Sbeck 		*p = ln;
459913ec974Sbeck 
460913ec974Sbeck 		/* The overflow will stop before we over write
461913ec974Sbeck 		 * words we should not overwrite */
4622bd9bb84Sjsing 		if (ln < (BN_ULONG)c1) {
463913ec974Sbeck 			do {
464913ec974Sbeck 				p++;
465913ec974Sbeck 				lo= *p;
466913ec974Sbeck 				ln = (lo + 1) & BN_MASK2;
467913ec974Sbeck 				*p = ln;
468913ec974Sbeck 			} while (ln == 0);
469913ec974Sbeck 		}
470913ec974Sbeck 	}
4715b37fcf3Sryker }
4725b37fcf3Sryker 
473913ec974Sbeck /* n+tn is the word length
474913ec974Sbeck  * t needs to be n*4 is size, as does r */
4754fcf65c5Sdjm /* tnX may not be negative but less than n */
4762bd9bb84Sjsing void
4772bd9bb84Sjsing bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, int tna,
4782bd9bb84Sjsing     int tnb, BN_ULONG *t)
4795b37fcf3Sryker {
480913ec974Sbeck 	int i, j, n2 = n * 2;
481c32db552Sdjm 	int c1, c2, neg;
482913ec974Sbeck 	BN_ULONG ln, lo, *p;
4835b37fcf3Sryker 
4842bd9bb84Sjsing 	if (n < 8) {
4854fcf65c5Sdjm 		bn_mul_normal(r, a, n + tna, b, n + tnb);
486913ec974Sbeck 		return;
4875b37fcf3Sryker 	}
4885b37fcf3Sryker 
489913ec974Sbeck 	/* r=(a[0]-a[1])*(b[1]-b[0]) */
4904fcf65c5Sdjm 	c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
4914fcf65c5Sdjm 	c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
492c32db552Sdjm 	neg = 0;
4932bd9bb84Sjsing 	switch (c1 * 3 + c2) {
494ba5406e9Sbeck 	case -4:
495a70818d0Sjsing 		bn_sub(t, n, &a[n], tna, a, n);		/* - */
496a70818d0Sjsing 		bn_sub(&t[n], n, b, n, &b[n], tnb);	/* - */
497ba5406e9Sbeck 		break;
498ba5406e9Sbeck 	case -3:
499ba5406e9Sbeck 		/* break; */
500ba5406e9Sbeck 	case -2:
501a70818d0Sjsing 		bn_sub(t, n, &a[n], tna, a, n);		/* - */
502a70818d0Sjsing 		bn_sub(&t[n], n, &b[n], tnb, b, n);	/* + */
503ba5406e9Sbeck 		neg = 1;
504ba5406e9Sbeck 		break;
505ba5406e9Sbeck 	case -1:
506ba5406e9Sbeck 	case 0:
507ba5406e9Sbeck 	case 1:
508ba5406e9Sbeck 		/* break; */
509ba5406e9Sbeck 	case 2:
510a70818d0Sjsing 		bn_sub(t, n, a, n, &a[n], tna);		/* + */
511a70818d0Sjsing 		bn_sub(&t[n], n, b, n, &b[n], tnb);	/* - */
512ba5406e9Sbeck 		neg = 1;
513ba5406e9Sbeck 		break;
514ba5406e9Sbeck 	case 3:
515ba5406e9Sbeck 		/* break; */
516ba5406e9Sbeck 	case 4:
517a70818d0Sjsing 		bn_sub(t, n, a, n, &a[n], tna);
518a70818d0Sjsing 		bn_sub(&t[n], n, &b[n], tnb, b, n);
519ba5406e9Sbeck 		break;
520ba5406e9Sbeck 	}
521ba5406e9Sbeck 		/* The zero case isn't yet implemented here. The speedup
522ba5406e9Sbeck 		   would probably be negligible. */
523ba5406e9Sbeck # if 0
5242bd9bb84Sjsing 	if (n == 4) {
525913ec974Sbeck 		bn_mul_comba4(&(t[n2]), t, &(t[n]));
526913ec974Sbeck 		bn_mul_comba4(r, a, b);
527913ec974Sbeck 		bn_mul_normal(&(r[n2]), &(a[n]), tn, &(b[n]), tn);
528913ec974Sbeck 		memset(&(r[n2 + tn * 2]), 0, sizeof(BN_ULONG) * (n2 - tn * 2));
5292bd9bb84Sjsing 	} else
530ba5406e9Sbeck # endif
5312bd9bb84Sjsing 		if (n == 8) {
532913ec974Sbeck 		bn_mul_comba8(&(t[n2]), t, &(t[n]));
533913ec974Sbeck 		bn_mul_comba8(r, a, b);
5344fcf65c5Sdjm 		bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
5352bd9bb84Sjsing 		memset(&(r[n2 + tna + tnb]), 0,
5362bd9bb84Sjsing 		    sizeof(BN_ULONG) * (n2 - tna - tnb));
5372bd9bb84Sjsing 	} else {
538913ec974Sbeck 		p = &(t[n2*2]);
5394fcf65c5Sdjm 		bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
5404fcf65c5Sdjm 		bn_mul_recursive(r, a, b, n, 0, 0, p);
541913ec974Sbeck 		i = n / 2;
542913ec974Sbeck 		/* If there is only a bottom half to the number,
543913ec974Sbeck 		 * just do it */
5444fcf65c5Sdjm 		if (tna > tnb)
5454fcf65c5Sdjm 			j = tna - i;
5464fcf65c5Sdjm 		else
5474fcf65c5Sdjm 			j = tnb - i;
5482bd9bb84Sjsing 		if (j == 0) {
5494fcf65c5Sdjm 			bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]),
5504fcf65c5Sdjm 			    i, tna - i, tnb - i, p);
5512bd9bb84Sjsing 			memset(&(r[n2 + i * 2]), 0,
5522bd9bb84Sjsing 			    sizeof(BN_ULONG) * (n2 - i * 2));
553913ec974Sbeck 		}
554913ec974Sbeck 		else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
555913ec974Sbeck 		{
556913ec974Sbeck 			bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]),
5574fcf65c5Sdjm 			    i, tna - i, tnb - i, p);
5584fcf65c5Sdjm 			memset(&(r[n2 + tna + tnb]), 0,
5594fcf65c5Sdjm 			    sizeof(BN_ULONG) * (n2 - tna - tnb));
560913ec974Sbeck 		}
561913ec974Sbeck 		else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
562913ec974Sbeck 		{
563913ec974Sbeck 			memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2);
5642bd9bb84Sjsing 			if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL &&
5652bd9bb84Sjsing 			    tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
5662bd9bb84Sjsing 				bn_mul_normal(&(r[n2]), &(a[n]), tna,
5672bd9bb84Sjsing 				    &(b[n]), tnb);
5682bd9bb84Sjsing 			} else {
5692bd9bb84Sjsing 				for (;;) {
570913ec974Sbeck 					i /= 2;
5714fcf65c5Sdjm 					/* these simplified conditions work
5724fcf65c5Sdjm 					 * exclusively because difference
5734fcf65c5Sdjm 					 * between tna and tnb is 1 or 0 */
5742bd9bb84Sjsing 					if (i < tna || i < tnb) {
575913ec974Sbeck 						bn_mul_part_recursive(&(r[n2]),
5762bd9bb84Sjsing 						    &(a[n]), &(b[n]), i,
5772bd9bb84Sjsing 						    tna - i, tnb - i, p);
578913ec974Sbeck 						break;
5792bd9bb84Sjsing 					} else if (i == tna || i == tnb) {
580913ec974Sbeck 						bn_mul_recursive(&(r[n2]),
5812bd9bb84Sjsing 						    &(a[n]), &(b[n]), i,
5822bd9bb84Sjsing 						    tna - i, tnb - i, p);
583913ec974Sbeck 						break;
584913ec974Sbeck 					}
585913ec974Sbeck 				}
586913ec974Sbeck 			}
587913ec974Sbeck 		}
5885b37fcf3Sryker 	}
5895b37fcf3Sryker 
590913ec974Sbeck 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
591913ec974Sbeck 	 * r[10] holds (a[0]*b[0])
592913ec974Sbeck 	 * r[32] holds (b[1]*b[1])
593913ec974Sbeck 	 */
5945b37fcf3Sryker 
595913ec974Sbeck 	c1 = (int)(bn_add_words(t, r,&(r[n2]), n2));
596ba5406e9Sbeck 
597ba5406e9Sbeck 	if (neg) /* if t[32] is negative */
598ba5406e9Sbeck 	{
599913ec974Sbeck 		c1 -= (int)(bn_sub_words(&(t[n2]), t,&(t[n2]), n2));
6002bd9bb84Sjsing 	} else {
601ba5406e9Sbeck 		/* Might have a carry */
602ba5406e9Sbeck 		c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
603ba5406e9Sbeck 	}
6045b37fcf3Sryker 
605913ec974Sbeck 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
606913ec974Sbeck 	 * r[10] holds (a[0]*b[0])
607913ec974Sbeck 	 * r[32] holds (b[1]*b[1])
608913ec974Sbeck 	 * c1 holds the carry bits
609913ec974Sbeck 	 */
610913ec974Sbeck 	c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
6112bd9bb84Sjsing 	if (c1) {
612913ec974Sbeck 		p = &(r[n + n2]);
613913ec974Sbeck 		lo= *p;
614913ec974Sbeck 		ln = (lo + c1)&BN_MASK2;
615913ec974Sbeck 		*p = ln;
6165b37fcf3Sryker 
617913ec974Sbeck 		/* The overflow will stop before we over write
618913ec974Sbeck 		 * words we should not overwrite */
6192bd9bb84Sjsing 		if (ln < (BN_ULONG)c1) {
620913ec974Sbeck 			do {
621913ec974Sbeck 				p++;
622913ec974Sbeck 				lo= *p;
623913ec974Sbeck 				ln = (lo + 1) & BN_MASK2;
624913ec974Sbeck 				*p = ln;
625913ec974Sbeck 			} while (ln == 0);
626913ec974Sbeck 		}
627913ec974Sbeck 	}
628913ec974Sbeck }
629ba5406e9Sbeck #endif /* BN_RECURSION */
630913ec974Sbeck 
6319554b5edSjsing #ifndef HAVE_BN_MUL
6329554b5edSjsing #ifndef BN_RECURSION
6332bd9bb84Sjsing int
6349554b5edSjsing bn_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, int rn, BN_CTX *ctx)
635913ec974Sbeck {
6369554b5edSjsing 	bn_mul_normal(r->d, a->d, a->top, b->d, b->top);
6379554b5edSjsing 
6389554b5edSjsing 	return 1;
6399554b5edSjsing }
6409554b5edSjsing 
6419554b5edSjsing #else /* BN_RECURSION */
6429554b5edSjsing int
6439554b5edSjsing bn_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, int rn, BN_CTX *ctx)
6449554b5edSjsing {
6454fcf65c5Sdjm 	BIGNUM *t = NULL;
6469554b5edSjsing 	int al, bl, i, k;
6479554b5edSjsing 	int j = 0;
6489554b5edSjsing 	int ret = 0;
649913ec974Sbeck 
6509554b5edSjsing 	BN_CTX_start(ctx);
651913ec974Sbeck 
652913ec974Sbeck 	al = a->top;
653913ec974Sbeck 	bl = b->top;
654913ec974Sbeck 
655ba5406e9Sbeck 	i = al - bl;
6569554b5edSjsing 
6572bd9bb84Sjsing 	if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) {
6582bd9bb84Sjsing 		if (i >= -1 && i <= 1) {
6594fcf65c5Sdjm 			/* Find out the power of two lower or equal
6604fcf65c5Sdjm 			   to the longest of the two numbers */
6612bd9bb84Sjsing 			if (i >= 0) {
6624fcf65c5Sdjm 				j = BN_num_bits_word((BN_ULONG)al);
6634fcf65c5Sdjm 			}
6642bd9bb84Sjsing 			if (i == -1) {
6654fcf65c5Sdjm 				j = BN_num_bits_word((BN_ULONG)bl);
6664fcf65c5Sdjm 			}
6674fcf65c5Sdjm 			j = 1 << (j - 1);
6684fcf65c5Sdjm 			assert(j <= al || j <= bl);
6694fcf65c5Sdjm 			k = j + j;
670aa389b8cSjsing 			if ((t = BN_CTX_get(ctx)) == NULL)
6710a5d6edeSdjm 				goto err;
6722bd9bb84Sjsing 			if (al > j || bl > j) {
673e729bd5aSjsing 				if (!bn_wexpand(t, k * 4))
6742bd9bb84Sjsing 					goto err;
6759554b5edSjsing 				if (!bn_wexpand(r, k * 4))
6762bd9bb84Sjsing 					goto err;
6779554b5edSjsing 				bn_mul_part_recursive(r->d, a->d, b->d,
6784fcf65c5Sdjm 				    j, al - j, bl - j, t->d);
6794fcf65c5Sdjm 			}
6804fcf65c5Sdjm 			else	/* al <= j || bl <= j */
6814fcf65c5Sdjm 			{
682e729bd5aSjsing 				if (!bn_wexpand(t, k * 2))
6832bd9bb84Sjsing 					goto err;
6849554b5edSjsing 				if (!bn_wexpand(r, k * 2))
6852bd9bb84Sjsing 					goto err;
6869554b5edSjsing 				bn_mul_recursive(r->d, a->d, b->d,
6874fcf65c5Sdjm 				    j, al - j, bl - j, t->d);
6884fcf65c5Sdjm 			}
6899554b5edSjsing 			r->top = rn;
6904fcf65c5Sdjm 			goto end;
6914fcf65c5Sdjm 		}
6924fcf65c5Sdjm #if 0
6932bd9bb84Sjsing 		if (i == 1 && !BN_get_flags(b, BN_FLG_STATIC_DATA)) {
6944fcf65c5Sdjm 			BIGNUM *tmp_bn = (BIGNUM *)b;
695e729bd5aSjsing 			if (!bn_wexpand(tmp_bn, al))
6962bd9bb84Sjsing 				goto err;
6974fcf65c5Sdjm 			tmp_bn->d[bl] = 0;
698913ec974Sbeck 			bl++;
699ba5406e9Sbeck 			i--;
7002bd9bb84Sjsing 		} else if (i == -1 && !BN_get_flags(a, BN_FLG_STATIC_DATA)) {
7014fcf65c5Sdjm 			BIGNUM *tmp_bn = (BIGNUM *)a;
702e729bd5aSjsing 			if (!bn_wexpand(tmp_bn, bl))
7032bd9bb84Sjsing 				goto err;
7044fcf65c5Sdjm 			tmp_bn->d[al] = 0;
705913ec974Sbeck 			al++;
706ba5406e9Sbeck 			i++;
707913ec974Sbeck 		}
7082bd9bb84Sjsing 		if (i == 0) {
709ba5406e9Sbeck 			/* symmetric and > 4 */
710913ec974Sbeck 			/* 16 or larger */
711913ec974Sbeck 			j = BN_num_bits_word((BN_ULONG)al);
712913ec974Sbeck 			j = 1 << (j - 1);
713913ec974Sbeck 			k = j + j;
714aa389b8cSjsing 			if ((t = BN_CTX_get(ctx)) == NULL)
715aa389b8cSjsing 				goto err;
716913ec974Sbeck 			if (al == j) /* exact multiple */
717913ec974Sbeck 			{
718e729bd5aSjsing 				if (!bn_wexpand(t, k * 2))
7192bd9bb84Sjsing 					goto err;
7209554b5edSjsing 				if (!bn_wexpand(r, k * 2))
7212bd9bb84Sjsing 					goto err;
7229554b5edSjsing 				bn_mul_recursive(r->d, a->d, b->d, al, t->d);
7232bd9bb84Sjsing 			} else {
724e729bd5aSjsing 				if (!bn_wexpand(t, k * 4))
7252bd9bb84Sjsing 					goto err;
7269554b5edSjsing 				if (!bn_wexpand(r, k * 4))
7272bd9bb84Sjsing 					goto err;
7289554b5edSjsing 				bn_mul_part_recursive(r->d, a->d, b->d,
7292bd9bb84Sjsing 				    al - j, j, t->d);
730913ec974Sbeck 			}
7319554b5edSjsing 			r->top = top;
732ba5406e9Sbeck 			goto end;
733ba5406e9Sbeck 		}
7344fcf65c5Sdjm #endif
735767fe2ffSmarkus 	}
736ba5406e9Sbeck 
7379554b5edSjsing 	bn_mul_normal(r->d, a->d, al, b->d, bl);
7389554b5edSjsing 
739913ec974Sbeck  end:
740ba5406e9Sbeck 	ret = 1;
741ba5406e9Sbeck  err:
742ba5406e9Sbeck 	BN_CTX_end(ctx);
7439554b5edSjsing 
7449554b5edSjsing 	return ret;
7459554b5edSjsing }
7469554b5edSjsing #endif /* BN_RECURSION */
7479554b5edSjsing #endif /* HAVE_BN_MUL */
7489554b5edSjsing 
7499554b5edSjsing int
7509554b5edSjsing BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
7519554b5edSjsing {
7529554b5edSjsing 	BIGNUM *rr;
7539554b5edSjsing 	int rn;
7549554b5edSjsing 	int ret = 0;
7559554b5edSjsing 
7569554b5edSjsing 	BN_CTX_start(ctx);
7579554b5edSjsing 
7589554b5edSjsing 	if (BN_is_zero(a) || BN_is_zero(b)) {
7599554b5edSjsing 		BN_zero(r);
7609554b5edSjsing 		goto done;
7619554b5edSjsing 	}
7629554b5edSjsing 
7639554b5edSjsing 	rr = r;
7649554b5edSjsing 	if (rr == a || rr == b)
7659554b5edSjsing 		rr = BN_CTX_get(ctx);
7669554b5edSjsing 	if (rr == NULL)
7679554b5edSjsing 		goto err;
7689554b5edSjsing 
7699554b5edSjsing 	rn = a->top + b->top;
7709554b5edSjsing 	if (rn < a->top)
7719554b5edSjsing 		goto err;
7729554b5edSjsing 	if (!bn_wexpand(rr, rn))
7739554b5edSjsing 		goto err;
7749554b5edSjsing 
7759554b5edSjsing 	if (a->top == 4 && b->top == 4) {
7769554b5edSjsing 		bn_mul_comba4(rr->d, a->d, b->d);
7779554b5edSjsing 	} else if (a->top == 8 && b->top == 8) {
7789554b5edSjsing 		bn_mul_comba8(rr->d, a->d, b->d);
7799554b5edSjsing 	} else {
7809554b5edSjsing 		if (!bn_mul(rr, a, b, rn, ctx))
7819554b5edSjsing 			goto err;
7829554b5edSjsing 	}
7839554b5edSjsing 
7849554b5edSjsing 	rr->top = rn;
7859554b5edSjsing 	bn_correct_top(rr);
7869554b5edSjsing 
787896da13fSjsing 	BN_set_negative(rr, a->neg ^ b->neg);
788896da13fSjsing 
789*363923baStb 	if (r != rr) {
790*363923baStb 		if (!bn_copy(r, rr))
791*363923baStb 			goto err;
792*363923baStb 	}
7939554b5edSjsing  done:
7949554b5edSjsing 	ret = 1;
7959554b5edSjsing  err:
7969554b5edSjsing 	BN_CTX_end(ctx);
7979554b5edSjsing 
7989554b5edSjsing 	return ret;
799913ec974Sbeck }
800