1 /* $OpenBSD: bn_sqr.c,v 1.36 2023/07/08 12:21:58 beck Exp $ */ 2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3 * All rights reserved. 4 * 5 * This package is an SSL implementation written 6 * by Eric Young (eay@cryptsoft.com). 7 * The implementation was written so as to conform with Netscapes SSL. 8 * 9 * This library is free for commercial and non-commercial use as long as 10 * the following conditions are aheared to. The following conditions 11 * apply to all code found in this distribution, be it the RC4, RSA, 12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13 * included with this distribution is covered by the same copyright terms 14 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15 * 16 * Copyright remains Eric Young's, and as such any Copyright notices in 17 * the code are not to be removed. 18 * If this package is used in a product, Eric Young should be given attribution 19 * as the author of the parts of the library used. 20 * This can be in the form of a textual message at program startup or 21 * in documentation (online or textual) provided with the package. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the copyright 27 * notice, this list of conditions and the following disclaimer. 28 * 2. Redistributions in binary form must reproduce the above copyright 29 * notice, this list of conditions and the following disclaimer in the 30 * documentation and/or other materials provided with the distribution. 31 * 3. All advertising materials mentioning features or use of this software 32 * must display the following acknowledgement: 33 * "This product includes cryptographic software written by 34 * Eric Young (eay@cryptsoft.com)" 35 * The word 'cryptographic' can be left out if the rouines from the library 36 * being used are not cryptographic related :-). 37 * 4. If you include any Windows specific code (or a derivative thereof) from 38 * the apps directory (application code) you must include an acknowledgement: 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40 * 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * The licence and distribution terms for any publically available version or 54 * derivative of this code cannot be changed. i.e. this code cannot simply be 55 * copied and put under another distribution licence 56 * [including the GNU Public Licence.] 57 */ 58 59 #include <assert.h> 60 #include <stdio.h> 61 #include <string.h> 62 63 #include "bn_arch.h" 64 #include "bn_local.h" 65 #include "bn_internal.h" 66 67 int bn_sqr(BIGNUM *r, const BIGNUM *a, int max, BN_CTX *ctx); 68 69 /* 70 * bn_sqr_comba4() computes r[] = a[] * a[] using Comba multiplication 71 * (https://everything2.com/title/Comba+multiplication), where a is a 72 * four word array, producing an eight word array result. 73 */ 74 #ifndef HAVE_BN_SQR_COMBA4 75 void 76 bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) 77 { 78 BN_ULONG c2, c1, c0; 79 80 bn_mulw_addtw(a[0], a[0], 0, 0, 0, &c2, &c1, &r[0]); 81 82 bn_mul2_mulw_addtw(a[1], a[0], 0, c2, c1, &c2, &c1, &r[1]); 83 84 bn_mulw_addtw(a[1], a[1], 0, c2, c1, &c2, &c1, &c0); 85 bn_mul2_mulw_addtw(a[2], a[0], c2, c1, c0, &c2, &c1, &r[2]); 86 87 bn_mul2_mulw_addtw(a[3], a[0], 0, c2, c1, &c2, &c1, &c0); 88 bn_mul2_mulw_addtw(a[2], a[1], c2, c1, c0, &c2, &c1, &r[3]); 89 90 bn_mulw_addtw(a[2], a[2], 0, c2, c1, &c2, &c1, &c0); 91 bn_mul2_mulw_addtw(a[3], a[1], c2, c1, c0, &c2, &c1, &r[4]); 92 93 bn_mul2_mulw_addtw(a[3], a[2], 0, c2, c1, &c2, &c1, &r[5]); 94 95 bn_mulw_addtw(a[3], a[3], 0, c2, c1, &c2, &r[7], &r[6]); 96 } 97 #endif 98 99 /* 100 * bn_sqr_comba8() computes r[] = a[] * a[] using Comba multiplication 101 * (https://everything2.com/title/Comba+multiplication), where a is an 102 * eight word array, producing an 16 word array result. 103 */ 104 #ifndef HAVE_BN_SQR_COMBA8 105 void 106 bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) 107 { 108 BN_ULONG c2, c1, c0; 109 110 bn_mulw_addtw(a[0], a[0], 0, 0, 0, &c2, &c1, &r[0]); 111 112 bn_mul2_mulw_addtw(a[1], a[0], 0, c2, c1, &c2, &c1, &r[1]); 113 114 bn_mulw_addtw(a[1], a[1], 0, c2, c1, &c2, &c1, &c0); 115 bn_mul2_mulw_addtw(a[2], a[0], c2, c1, c0, &c2, &c1, &r[2]); 116 117 bn_mul2_mulw_addtw(a[3], a[0], 0, c2, c1, &c2, &c1, &c0); 118 bn_mul2_mulw_addtw(a[2], a[1], c2, c1, c0, &c2, &c1, &r[3]); 119 120 bn_mulw_addtw(a[2], a[2], 0, c2, c1, &c2, &c1, &c0); 121 bn_mul2_mulw_addtw(a[3], a[1], c2, c1, c0, &c2, &c1, &c0); 122 bn_mul2_mulw_addtw(a[4], a[0], c2, c1, c0, &c2, &c1, &r[4]); 123 124 bn_mul2_mulw_addtw(a[5], a[0], 0, c2, c1, &c2, &c1, &c0); 125 bn_mul2_mulw_addtw(a[4], a[1], c2, c1, c0, &c2, &c1, &c0); 126 bn_mul2_mulw_addtw(a[3], a[2], c2, c1, c0, &c2, &c1, &r[5]); 127 128 bn_mulw_addtw(a[3], a[3], 0, c2, c1, &c2, &c1, &c0); 129 bn_mul2_mulw_addtw(a[4], a[2], c2, c1, c0, &c2, &c1, &c0); 130 bn_mul2_mulw_addtw(a[5], a[1], c2, c1, c0, &c2, &c1, &c0); 131 bn_mul2_mulw_addtw(a[6], a[0], c2, c1, c0, &c2, &c1, &r[6]); 132 133 bn_mul2_mulw_addtw(a[7], a[0], 0, c2, c1, &c2, &c1, &c0); 134 bn_mul2_mulw_addtw(a[6], a[1], c2, c1, c0, &c2, &c1, &c0); 135 bn_mul2_mulw_addtw(a[5], a[2], c2, c1, c0, &c2, &c1, &c0); 136 bn_mul2_mulw_addtw(a[4], a[3], c2, c1, c0, &c2, &c1, &r[7]); 137 138 bn_mulw_addtw(a[4], a[4], 0, c2, c1, &c2, &c1, &c0); 139 bn_mul2_mulw_addtw(a[5], a[3], c2, c1, c0, &c2, &c1, &c0); 140 bn_mul2_mulw_addtw(a[6], a[2], c2, c1, c0, &c2, &c1, &c0); 141 bn_mul2_mulw_addtw(a[7], a[1], c2, c1, c0, &c2, &c1, &r[8]); 142 143 bn_mul2_mulw_addtw(a[7], a[2], 0, c2, c1, &c2, &c1, &c0); 144 bn_mul2_mulw_addtw(a[6], a[3], c2, c1, c0, &c2, &c1, &c0); 145 bn_mul2_mulw_addtw(a[5], a[4], c2, c1, c0, &c2, &c1, &r[9]); 146 147 bn_mulw_addtw(a[5], a[5], 0, c2, c1, &c2, &c1, &c0); 148 bn_mul2_mulw_addtw(a[6], a[4], c2, c1, c0, &c2, &c1, &c0); 149 bn_mul2_mulw_addtw(a[7], a[3], c2, c1, c0, &c2, &c1, &r[10]); 150 151 bn_mul2_mulw_addtw(a[7], a[4], 0, c2, c1, &c2, &c1, &c0); 152 bn_mul2_mulw_addtw(a[6], a[5], c2, c1, c0, &c2, &c1, &r[11]); 153 154 bn_mulw_addtw(a[6], a[6], 0, c2, c1, &c2, &c1, &c0); 155 bn_mul2_mulw_addtw(a[7], a[5], c2, c1, c0, &c2, &c1, &r[12]); 156 157 bn_mul2_mulw_addtw(a[7], a[6], 0, c2, c1, &c2, &c1, &r[13]); 158 159 bn_mulw_addtw(a[7], a[7], 0, c2, c1, &c2, &r[15], &r[14]); 160 } 161 #endif 162 163 #ifndef HAVE_BN_SQR 164 /* 165 * bn_sqr_add_words() computes (r[i*2+1]:r[i*2]) = (r[i*2+1]:r[i*2]) + a[i] * a[i]. 166 */ 167 static void 168 bn_sqr_add_words(BN_ULONG *r, const BN_ULONG *a, int n) 169 { 170 BN_ULONG x3, x2, x1, x0; 171 BN_ULONG carry = 0; 172 173 assert(n >= 0); 174 if (n <= 0) 175 return; 176 177 while (n & ~3) { 178 bn_mulw(a[0], a[0], &x1, &x0); 179 bn_mulw(a[1], a[1], &x3, &x2); 180 bn_qwaddqw(x3, x2, x1, x0, r[3], r[2], r[1], r[0], carry, 181 &carry, &r[3], &r[2], &r[1], &r[0]); 182 bn_mulw(a[2], a[2], &x1, &x0); 183 bn_mulw(a[3], a[3], &x3, &x2); 184 bn_qwaddqw(x3, x2, x1, x0, r[7], r[6], r[5], r[4], carry, 185 &carry, &r[7], &r[6], &r[5], &r[4]); 186 187 a += 4; 188 r += 8; 189 n -= 4; 190 } 191 while (n) { 192 bn_mulw_addw_addw(a[0], a[0], r[0], carry, &carry, &r[0]); 193 bn_addw(r[1], carry, &carry, &r[1]); 194 a++; 195 r += 2; 196 n--; 197 } 198 } 199 200 static void 201 bn_sqr_normal(BN_ULONG *r, int r_len, const BN_ULONG *a, int a_len) 202 { 203 const BN_ULONG *ap; 204 BN_ULONG *rp; 205 BN_ULONG w; 206 int n; 207 208 if (a_len <= 0) 209 return; 210 211 ap = a; 212 w = ap[0]; 213 ap++; 214 215 rp = r; 216 rp[0] = rp[r_len - 1] = 0; 217 rp++; 218 219 /* Compute initial product - r[n:1] = a[n:1] * a[0] */ 220 n = a_len - 1; 221 if (n > 0) { 222 rp[n] = bn_mul_words(rp, ap, n, w); 223 } 224 rp += 2; 225 n--; 226 227 /* Compute and sum remaining products. */ 228 while (n > 0) { 229 w = ap[0]; 230 ap++; 231 232 rp[n] = bn_mul_add_words(rp, ap, n, w); 233 rp += 2; 234 n--; 235 } 236 237 /* Double the sum of products. */ 238 bn_add_words(r, r, r, r_len); 239 240 /* Add squares. */ 241 bn_sqr_add_words(r, a, a_len); 242 } 243 244 /* 245 * bn_sqr() computes a * a, storing the result in r. The caller must ensure that 246 * r is not the same BIGNUM as a and that r has been expanded to rn = a->top * 2 247 * words. 248 */ 249 int 250 bn_sqr(BIGNUM *r, const BIGNUM *a, int r_len, BN_CTX *ctx) 251 { 252 bn_sqr_normal(r->d, r_len, a->d, a->top); 253 254 return 1; 255 } 256 #endif 257 258 int 259 BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) 260 { 261 BIGNUM *rr; 262 int r_len; 263 int ret = 1; 264 265 BN_CTX_start(ctx); 266 267 if (a->top < 1) { 268 BN_zero(r); 269 goto done; 270 } 271 272 if ((rr = r) == a) 273 rr = BN_CTX_get(ctx); 274 if (rr == NULL) 275 goto err; 276 277 if ((r_len = a->top * 2) < a->top) 278 goto err; 279 if (!bn_wexpand(rr, r_len)) 280 goto err; 281 282 if (a->top == 4) { 283 bn_sqr_comba4(rr->d, a->d); 284 } else if (a->top == 8) { 285 bn_sqr_comba8(rr->d, a->d); 286 } else { 287 if (!bn_sqr(rr, a, r_len, ctx)) 288 goto err; 289 } 290 291 rr->top = r_len; 292 bn_correct_top(rr); 293 294 rr->neg = 0; 295 296 if (!bn_copy(r, rr)) 297 goto err; 298 done: 299 ret = 1; 300 err: 301 BN_CTX_end(ctx); 302 303 return ret; 304 } 305 LCRYPTO_ALIAS(BN_sqr); 306