xref: /openbsd/lib/libcrypto/bn/bn_sqr.c (revision 3bef86f7)
1 /* $OpenBSD: bn_sqr.c,v 1.36 2023/07/08 12:21:58 beck Exp $ */
2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3  * All rights reserved.
4  *
5  * This package is an SSL implementation written
6  * by Eric Young (eay@cryptsoft.com).
7  * The implementation was written so as to conform with Netscapes SSL.
8  *
9  * This library is free for commercial and non-commercial use as long as
10  * the following conditions are aheared to.  The following conditions
11  * apply to all code found in this distribution, be it the RC4, RSA,
12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13  * included with this distribution is covered by the same copyright terms
14  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15  *
16  * Copyright remains Eric Young's, and as such any Copyright notices in
17  * the code are not to be removed.
18  * If this package is used in a product, Eric Young should be given attribution
19  * as the author of the parts of the library used.
20  * This can be in the form of a textual message at program startup or
21  * in documentation (online or textual) provided with the package.
22  *
23  * Redistribution and use in source and binary forms, with or without
24  * modification, are permitted provided that the following conditions
25  * are met:
26  * 1. Redistributions of source code must retain the copyright
27  *    notice, this list of conditions and the following disclaimer.
28  * 2. Redistributions in binary form must reproduce the above copyright
29  *    notice, this list of conditions and the following disclaimer in the
30  *    documentation and/or other materials provided with the distribution.
31  * 3. All advertising materials mentioning features or use of this software
32  *    must display the following acknowledgement:
33  *    "This product includes cryptographic software written by
34  *     Eric Young (eay@cryptsoft.com)"
35  *    The word 'cryptographic' can be left out if the rouines from the library
36  *    being used are not cryptographic related :-).
37  * 4. If you include any Windows specific code (or a derivative thereof) from
38  *    the apps directory (application code) you must include an acknowledgement:
39  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40  *
41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51  * SUCH DAMAGE.
52  *
53  * The licence and distribution terms for any publically available version or
54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
55  * copied and put under another distribution licence
56  * [including the GNU Public Licence.]
57  */
58 
59 #include <assert.h>
60 #include <stdio.h>
61 #include <string.h>
62 
63 #include "bn_arch.h"
64 #include "bn_local.h"
65 #include "bn_internal.h"
66 
67 int bn_sqr(BIGNUM *r, const BIGNUM *a, int max, BN_CTX *ctx);
68 
69 /*
70  * bn_sqr_comba4() computes r[] = a[] * a[] using Comba multiplication
71  * (https://everything2.com/title/Comba+multiplication), where a is a
72  * four word array, producing an eight word array result.
73  */
74 #ifndef HAVE_BN_SQR_COMBA4
75 void
76 bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
77 {
78 	BN_ULONG c2, c1, c0;
79 
80 	bn_mulw_addtw(a[0], a[0], 0, 0, 0, &c2, &c1, &r[0]);
81 
82 	bn_mul2_mulw_addtw(a[1], a[0], 0, c2, c1, &c2, &c1, &r[1]);
83 
84 	bn_mulw_addtw(a[1], a[1], 0, c2, c1, &c2, &c1, &c0);
85 	bn_mul2_mulw_addtw(a[2], a[0], c2, c1, c0, &c2, &c1, &r[2]);
86 
87 	bn_mul2_mulw_addtw(a[3], a[0], 0, c2, c1, &c2, &c1, &c0);
88 	bn_mul2_mulw_addtw(a[2], a[1], c2, c1, c0, &c2, &c1, &r[3]);
89 
90 	bn_mulw_addtw(a[2], a[2], 0, c2, c1, &c2, &c1, &c0);
91 	bn_mul2_mulw_addtw(a[3], a[1], c2, c1, c0, &c2, &c1, &r[4]);
92 
93 	bn_mul2_mulw_addtw(a[3], a[2], 0, c2, c1, &c2, &c1, &r[5]);
94 
95 	bn_mulw_addtw(a[3], a[3], 0, c2, c1, &c2, &r[7], &r[6]);
96 }
97 #endif
98 
99 /*
100  * bn_sqr_comba8() computes r[] = a[] * a[] using Comba multiplication
101  * (https://everything2.com/title/Comba+multiplication), where a is an
102  * eight word array, producing an 16 word array result.
103  */
104 #ifndef HAVE_BN_SQR_COMBA8
105 void
106 bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
107 {
108 	BN_ULONG c2, c1, c0;
109 
110 	bn_mulw_addtw(a[0], a[0], 0, 0, 0, &c2, &c1, &r[0]);
111 
112 	bn_mul2_mulw_addtw(a[1], a[0], 0, c2, c1, &c2, &c1, &r[1]);
113 
114 	bn_mulw_addtw(a[1], a[1], 0, c2, c1, &c2, &c1, &c0);
115 	bn_mul2_mulw_addtw(a[2], a[0], c2, c1, c0, &c2, &c1, &r[2]);
116 
117 	bn_mul2_mulw_addtw(a[3], a[0], 0, c2, c1, &c2, &c1, &c0);
118 	bn_mul2_mulw_addtw(a[2], a[1], c2, c1, c0, &c2, &c1, &r[3]);
119 
120 	bn_mulw_addtw(a[2], a[2], 0, c2, c1, &c2, &c1, &c0);
121 	bn_mul2_mulw_addtw(a[3], a[1], c2, c1, c0, &c2, &c1, &c0);
122 	bn_mul2_mulw_addtw(a[4], a[0], c2, c1, c0, &c2, &c1, &r[4]);
123 
124 	bn_mul2_mulw_addtw(a[5], a[0], 0, c2, c1, &c2, &c1, &c0);
125 	bn_mul2_mulw_addtw(a[4], a[1], c2, c1, c0, &c2, &c1, &c0);
126 	bn_mul2_mulw_addtw(a[3], a[2], c2, c1, c0, &c2, &c1, &r[5]);
127 
128 	bn_mulw_addtw(a[3], a[3], 0, c2, c1, &c2, &c1, &c0);
129 	bn_mul2_mulw_addtw(a[4], a[2], c2, c1, c0, &c2, &c1, &c0);
130 	bn_mul2_mulw_addtw(a[5], a[1], c2, c1, c0, &c2, &c1, &c0);
131 	bn_mul2_mulw_addtw(a[6], a[0], c2, c1, c0, &c2, &c1, &r[6]);
132 
133 	bn_mul2_mulw_addtw(a[7], a[0], 0, c2, c1, &c2, &c1, &c0);
134 	bn_mul2_mulw_addtw(a[6], a[1], c2, c1, c0, &c2, &c1, &c0);
135 	bn_mul2_mulw_addtw(a[5], a[2], c2, c1, c0, &c2, &c1, &c0);
136 	bn_mul2_mulw_addtw(a[4], a[3], c2, c1, c0, &c2, &c1, &r[7]);
137 
138 	bn_mulw_addtw(a[4], a[4], 0, c2, c1, &c2, &c1, &c0);
139 	bn_mul2_mulw_addtw(a[5], a[3], c2, c1, c0, &c2, &c1, &c0);
140 	bn_mul2_mulw_addtw(a[6], a[2], c2, c1, c0, &c2, &c1, &c0);
141 	bn_mul2_mulw_addtw(a[7], a[1], c2, c1, c0, &c2, &c1, &r[8]);
142 
143 	bn_mul2_mulw_addtw(a[7], a[2], 0, c2, c1, &c2, &c1, &c0);
144 	bn_mul2_mulw_addtw(a[6], a[3], c2, c1, c0, &c2, &c1, &c0);
145 	bn_mul2_mulw_addtw(a[5], a[4], c2, c1, c0, &c2, &c1, &r[9]);
146 
147 	bn_mulw_addtw(a[5], a[5], 0, c2, c1, &c2, &c1, &c0);
148 	bn_mul2_mulw_addtw(a[6], a[4], c2, c1, c0, &c2, &c1, &c0);
149 	bn_mul2_mulw_addtw(a[7], a[3], c2, c1, c0, &c2, &c1, &r[10]);
150 
151 	bn_mul2_mulw_addtw(a[7], a[4], 0, c2, c1, &c2, &c1, &c0);
152 	bn_mul2_mulw_addtw(a[6], a[5], c2, c1, c0, &c2, &c1, &r[11]);
153 
154 	bn_mulw_addtw(a[6], a[6], 0, c2, c1, &c2, &c1, &c0);
155 	bn_mul2_mulw_addtw(a[7], a[5], c2, c1, c0, &c2, &c1, &r[12]);
156 
157 	bn_mul2_mulw_addtw(a[7], a[6], 0, c2, c1, &c2, &c1, &r[13]);
158 
159 	bn_mulw_addtw(a[7], a[7], 0, c2, c1, &c2, &r[15], &r[14]);
160 }
161 #endif
162 
163 #ifndef HAVE_BN_SQR
164 /*
165  * bn_sqr_add_words() computes (r[i*2+1]:r[i*2]) = (r[i*2+1]:r[i*2]) + a[i] * a[i].
166  */
167 static void
168 bn_sqr_add_words(BN_ULONG *r, const BN_ULONG *a, int n)
169 {
170 	BN_ULONG x3, x2, x1, x0;
171 	BN_ULONG carry = 0;
172 
173 	assert(n >= 0);
174 	if (n <= 0)
175 		return;
176 
177 	while (n & ~3) {
178 		bn_mulw(a[0], a[0], &x1, &x0);
179 		bn_mulw(a[1], a[1], &x3, &x2);
180 		bn_qwaddqw(x3, x2, x1, x0, r[3], r[2], r[1], r[0], carry,
181 		    &carry, &r[3], &r[2], &r[1], &r[0]);
182 		bn_mulw(a[2], a[2], &x1, &x0);
183 		bn_mulw(a[3], a[3], &x3, &x2);
184 		bn_qwaddqw(x3, x2, x1, x0, r[7], r[6], r[5], r[4], carry,
185 		    &carry, &r[7], &r[6], &r[5], &r[4]);
186 
187 		a += 4;
188 		r += 8;
189 		n -= 4;
190 	}
191 	while (n) {
192 		bn_mulw_addw_addw(a[0], a[0], r[0], carry, &carry, &r[0]);
193 		bn_addw(r[1], carry, &carry, &r[1]);
194 		a++;
195 		r += 2;
196 		n--;
197 	}
198 }
199 
200 static void
201 bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len)
202 {
203 	const BN_ULONG *ap;
204 	BN_ULONG *rp;
205 	BN_ULONG w;
206 	int n;
207 
208 	if (a_len <= 0)
209 		return;
210 
211 	ap = a;
212 	w = ap[0];
213 	ap++;
214 
215 	rp = r;
216 	rp[0] = rp[r_len - 1] = 0;
217 	rp++;
218 
219 	/* Compute initial product - r[n:1] = a[n:1] * a[0] */
220 	n = a_len - 1;
221 	if (n > 0) {
222 		rp[n] = bn_mul_words(rp, ap, n, w);
223 	}
224 	rp += 2;
225 	n--;
226 
227 	/* Compute and sum remaining products. */
228 	while (n > 0) {
229 		w = ap[0];
230 		ap++;
231 
232 		rp[n] = bn_mul_add_words(rp, ap, n, w);
233 		rp += 2;
234 		n--;
235 	}
236 
237 	/* Double the sum of products. */
238 	bn_add_words(r, r, r, r_len);
239 
240 	/* Add squares. */
241 	bn_sqr_add_words(r, a, a_len);
242 }
243 
244 /*
245  * bn_sqr() computes a * a, storing the result in r. The caller must ensure that
246  * r is not the same BIGNUM as a and that r has been expanded to rn = a->top * 2
247  * words.
248  */
249 int
250 bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx)
251 {
252 	bn_sqr_normal(r->d, r_len, a->d, a->top);
253 
254 	return 1;
255 }
256 #endif
257 
258 int
259 BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
260 {
261 	BIGNUM *rr;
262 	int r_len;
263 	int ret = 1;
264 
265 	BN_CTX_start(ctx);
266 
267 	if (a->top < 1) {
268 		BN_zero(r);
269 		goto done;
270 	}
271 
272 	if ((rr = r) == a)
273 		rr = BN_CTX_get(ctx);
274 	if (rr == NULL)
275 		goto err;
276 
277 	if ((r_len = a->top * 2) < a->top)
278 		goto err;
279 	if (!bn_wexpand(rr, r_len))
280 		goto err;
281 
282 	if (a->top == 4) {
283 		bn_sqr_comba4(rr->d, a->d);
284 	} else if (a->top == 8) {
285 		bn_sqr_comba8(rr->d, a->d);
286 	} else {
287 		if (!bn_sqr(rr, a, r_len, ctx))
288 			goto err;
289 	}
290 
291 	rr->top = r_len;
292 	bn_correct_top(rr);
293 
294 	rr->neg = 0;
295 
296 	if (!bn_copy(r, rr))
297 		goto err;
298  done:
299 	ret = 1;
300  err:
301 	BN_CTX_end(ctx);
302 
303 	return ret;
304 }
305 LCRYPTO_ALIAS(BN_sqr);
306