1 /* $OpenBSD: bn_gf2m.c,v 1.20 2015/06/11 15:55:28 jsing Exp $ */ 2 /* ==================================================================== 3 * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED. 4 * 5 * The Elliptic Curve Public-Key Crypto Library (ECC Code) included 6 * herein is developed by SUN MICROSYSTEMS, INC., and is contributed 7 * to the OpenSSL project. 8 * 9 * The ECC Code is licensed pursuant to the OpenSSL open source 10 * license provided below. 11 * 12 * In addition, Sun covenants to all licensees who provide a reciprocal 13 * covenant with respect to their own patents if any, not to sue under 14 * current and future patent claims necessarily infringed by the making, 15 * using, practicing, selling, offering for sale and/or otherwise 16 * disposing of the ECC Code as delivered hereunder (or portions thereof), 17 * provided that such covenant shall not apply: 18 * 1) for code that a licensee deletes from the ECC Code; 19 * 2) separates from the ECC Code; or 20 * 3) for infringements caused by: 21 * i) the modification of the ECC Code or 22 * ii) the combination of the ECC Code with other software or 23 * devices where such combination causes the infringement. 24 * 25 * The software is originally written by Sheueling Chang Shantz and 26 * Douglas Stebila of Sun Microsystems Laboratories. 27 * 28 */ 29 30 /* NOTE: This file is licensed pursuant to the OpenSSL license below 31 * and may be modified; but after modifications, the above covenant 32 * may no longer apply! In such cases, the corresponding paragraph 33 * ["In addition, Sun covenants ... causes the infringement."] and 34 * this note can be edited out; but please keep the Sun copyright 35 * notice and attribution. */ 36 37 /* ==================================================================== 38 * Copyright (c) 1998-2002 The OpenSSL Project. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in 49 * the documentation and/or other materials provided with the 50 * distribution. 51 * 52 * 3. All advertising materials mentioning features or use of this 53 * software must display the following acknowledgment: 54 * "This product includes software developed by the OpenSSL Project 55 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 56 * 57 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 58 * endorse or promote products derived from this software without 59 * prior written permission. For written permission, please contact 60 * openssl-core@openssl.org. 61 * 62 * 5. Products derived from this software may not be called "OpenSSL" 63 * nor may "OpenSSL" appear in their names without prior written 64 * permission of the OpenSSL Project. 65 * 66 * 6. Redistributions of any form whatsoever must retain the following 67 * acknowledgment: 68 * "This product includes software developed by the OpenSSL Project 69 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 70 * 71 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 72 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 73 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 74 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 75 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 76 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 77 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 78 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 79 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 80 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 81 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 82 * OF THE POSSIBILITY OF SUCH DAMAGE. 83 * ==================================================================== 84 * 85 * This product includes cryptographic software written by Eric Young 86 * (eay@cryptsoft.com). This product includes software written by Tim 87 * Hudson (tjh@cryptsoft.com). 88 * 89 */ 90 91 #include <limits.h> 92 #include <stdio.h> 93 94 #include <openssl/opensslconf.h> 95 96 #include <openssl/err.h> 97 98 #include "bn_lcl.h" 99 100 #ifndef OPENSSL_NO_EC2M 101 102 /* Maximum number of iterations before BN_GF2m_mod_solve_quad_arr should fail. */ 103 #define MAX_ITERATIONS 50 104 105 static const BN_ULONG SQR_tb[16] = 106 { 0, 1, 4, 5, 16, 17, 20, 21, 107 64, 65, 68, 69, 80, 81, 84, 85 }; 108 /* Platform-specific macros to accelerate squaring. */ 109 #ifdef _LP64 110 #define SQR1(w) \ 111 SQR_tb[(w) >> 60 & 0xF] << 56 | SQR_tb[(w) >> 56 & 0xF] << 48 | \ 112 SQR_tb[(w) >> 52 & 0xF] << 40 | SQR_tb[(w) >> 48 & 0xF] << 32 | \ 113 SQR_tb[(w) >> 44 & 0xF] << 24 | SQR_tb[(w) >> 40 & 0xF] << 16 | \ 114 SQR_tb[(w) >> 36 & 0xF] << 8 | SQR_tb[(w) >> 32 & 0xF] 115 #define SQR0(w) \ 116 SQR_tb[(w) >> 28 & 0xF] << 56 | SQR_tb[(w) >> 24 & 0xF] << 48 | \ 117 SQR_tb[(w) >> 20 & 0xF] << 40 | SQR_tb[(w) >> 16 & 0xF] << 32 | \ 118 SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >> 8 & 0xF] << 16 | \ 119 SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] 120 #else 121 #define SQR1(w) \ 122 SQR_tb[(w) >> 28 & 0xF] << 24 | SQR_tb[(w) >> 24 & 0xF] << 16 | \ 123 SQR_tb[(w) >> 20 & 0xF] << 8 | SQR_tb[(w) >> 16 & 0xF] 124 #define SQR0(w) \ 125 SQR_tb[(w) >> 12 & 0xF] << 24 | SQR_tb[(w) >> 8 & 0xF] << 16 | \ 126 SQR_tb[(w) >> 4 & 0xF] << 8 | SQR_tb[(w) & 0xF] 127 #endif 128 129 #if !defined(OPENSSL_BN_ASM_GF2m) 130 /* Product of two polynomials a, b each with degree < BN_BITS2 - 1, 131 * result is a polynomial r with degree < 2 * BN_BITS - 1 132 * The caller MUST ensure that the variables have the right amount 133 * of space allocated. 134 */ 135 static void 136 bn_GF2m_mul_1x1(BN_ULONG *r1, BN_ULONG *r0, const BN_ULONG a, const BN_ULONG b) 137 { 138 #ifndef _LP64 139 BN_ULONG h, l, s; 140 BN_ULONG tab[8], top2b = a >> 30; 141 BN_ULONG a1, a2, a4; 142 143 a1 = a & (0x3FFFFFFF); 144 a2 = a1 << 1; 145 a4 = a2 << 1; 146 147 tab[0] = 0; 148 tab[1] = a1; 149 tab[2] = a2; 150 tab[3] = a1 ^ a2; 151 tab[4] = a4; 152 tab[5] = a1 ^ a4; 153 tab[6] = a2 ^ a4; 154 tab[7] = a1 ^ a2 ^ a4; 155 156 s = tab[b & 0x7]; 157 l = s; 158 s = tab[b >> 3 & 0x7]; 159 l ^= s << 3; 160 h = s >> 29; 161 s = tab[b >> 6 & 0x7]; 162 l ^= s << 6; 163 h ^= s >> 26; 164 s = tab[b >> 9 & 0x7]; 165 l ^= s << 9; 166 h ^= s >> 23; 167 s = tab[b >> 12 & 0x7]; 168 l ^= s << 12; 169 h ^= s >> 20; 170 s = tab[b >> 15 & 0x7]; 171 l ^= s << 15; 172 h ^= s >> 17; 173 s = tab[b >> 18 & 0x7]; 174 l ^= s << 18; 175 h ^= s >> 14; 176 s = tab[b >> 21 & 0x7]; 177 l ^= s << 21; 178 h ^= s >> 11; 179 s = tab[b >> 24 & 0x7]; 180 l ^= s << 24; 181 h ^= s >> 8; 182 s = tab[b >> 27 & 0x7]; 183 l ^= s << 27; 184 h ^= s >> 5; 185 s = tab[b >> 30]; 186 l ^= s << 30; 187 h ^= s >> 2; 188 189 /* compensate for the top two bits of a */ 190 if (top2b & 01) { 191 l ^= b << 30; 192 h ^= b >> 2; 193 } 194 if (top2b & 02) { 195 l ^= b << 31; 196 h ^= b >> 1; 197 } 198 199 *r1 = h; 200 *r0 = l; 201 #else 202 BN_ULONG h, l, s; 203 BN_ULONG tab[16], top3b = a >> 61; 204 BN_ULONG a1, a2, a4, a8; 205 206 a1 = a & (0x1FFFFFFFFFFFFFFFULL); 207 a2 = a1 << 1; 208 a4 = a2 << 1; 209 a8 = a4 << 1; 210 211 tab[0] = 0; 212 tab[1] = a1; 213 tab[2] = a2; 214 tab[3] = a1 ^ a2; 215 tab[4] = a4; 216 tab[5] = a1 ^ a4; 217 tab[6] = a2 ^ a4; 218 tab[7] = a1 ^ a2 ^ a4; 219 tab[8] = a8; 220 tab[9] = a1 ^ a8; 221 tab[10] = a2 ^ a8; 222 tab[11] = a1 ^ a2 ^ a8; 223 tab[12] = a4 ^ a8; 224 tab[13] = a1 ^ a4 ^ a8; 225 tab[14] = a2 ^ a4 ^ a8; 226 tab[15] = a1 ^ a2 ^ a4 ^ a8; 227 228 s = tab[b & 0xF]; 229 l = s; 230 s = tab[b >> 4 & 0xF]; 231 l ^= s << 4; 232 h = s >> 60; 233 s = tab[b >> 8 & 0xF]; 234 l ^= s << 8; 235 h ^= s >> 56; 236 s = tab[b >> 12 & 0xF]; 237 l ^= s << 12; 238 h ^= s >> 52; 239 s = tab[b >> 16 & 0xF]; 240 l ^= s << 16; 241 h ^= s >> 48; 242 s = tab[b >> 20 & 0xF]; 243 l ^= s << 20; 244 h ^= s >> 44; 245 s = tab[b >> 24 & 0xF]; 246 l ^= s << 24; 247 h ^= s >> 40; 248 s = tab[b >> 28 & 0xF]; 249 l ^= s << 28; 250 h ^= s >> 36; 251 s = tab[b >> 32 & 0xF]; 252 l ^= s << 32; 253 h ^= s >> 32; 254 s = tab[b >> 36 & 0xF]; 255 l ^= s << 36; 256 h ^= s >> 28; 257 s = tab[b >> 40 & 0xF]; 258 l ^= s << 40; 259 h ^= s >> 24; 260 s = tab[b >> 44 & 0xF]; 261 l ^= s << 44; 262 h ^= s >> 20; 263 s = tab[b >> 48 & 0xF]; 264 l ^= s << 48; 265 h ^= s >> 16; 266 s = tab[b >> 52 & 0xF]; 267 l ^= s << 52; 268 h ^= s >> 12; 269 s = tab[b >> 56 & 0xF]; 270 l ^= s << 56; 271 h ^= s >> 8; 272 s = tab[b >> 60]; 273 l ^= s << 60; 274 h ^= s >> 4; 275 276 /* compensate for the top three bits of a */ 277 if (top3b & 01) { 278 l ^= b << 61; 279 h ^= b >> 3; 280 } 281 if (top3b & 02) { 282 l ^= b << 62; 283 h ^= b >> 2; 284 } 285 if (top3b & 04) { 286 l ^= b << 63; 287 h ^= b >> 1; 288 } 289 290 *r1 = h; 291 *r0 = l; 292 #endif 293 } 294 295 /* Product of two polynomials a, b each with degree < 2 * BN_BITS2 - 1, 296 * result is a polynomial r with degree < 4 * BN_BITS2 - 1 297 * The caller MUST ensure that the variables have the right amount 298 * of space allocated. 299 */ 300 static void 301 bn_GF2m_mul_2x2(BN_ULONG *r, const BN_ULONG a1, const BN_ULONG a0, 302 const BN_ULONG b1, const BN_ULONG b0) 303 { 304 BN_ULONG m1, m0; 305 306 /* r[3] = h1, r[2] = h0; r[1] = l1; r[0] = l0 */ 307 bn_GF2m_mul_1x1(r + 3, r + 2, a1, b1); 308 bn_GF2m_mul_1x1(r + 1, r, a0, b0); 309 bn_GF2m_mul_1x1(&m1, &m0, a0 ^ a1, b0 ^ b1); 310 /* Correction on m1 ^= l1 ^ h1; m0 ^= l0 ^ h0; */ 311 r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */ 312 r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */ 313 } 314 #else 315 void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, 316 BN_ULONG b0); 317 #endif 318 319 /* Add polynomials a and b and store result in r; r could be a or b, a and b 320 * could be equal; r is the bitwise XOR of a and b. 321 */ 322 int 323 BN_GF2m_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) 324 { 325 int i; 326 const BIGNUM *at, *bt; 327 328 bn_check_top(a); 329 bn_check_top(b); 330 331 if (a->top < b->top) { 332 at = b; 333 bt = a; 334 } else { 335 at = a; 336 bt = b; 337 } 338 339 if (bn_wexpand(r, at->top) == NULL) 340 return 0; 341 342 for (i = 0; i < bt->top; i++) { 343 r->d[i] = at->d[i] ^ bt->d[i]; 344 } 345 for (; i < at->top; i++) { 346 r->d[i] = at->d[i]; 347 } 348 349 r->top = at->top; 350 bn_correct_top(r); 351 352 return 1; 353 } 354 355 356 /* Some functions allow for representation of the irreducible polynomials 357 * as an int[], say p. The irreducible f(t) is then of the form: 358 * t^p[0] + t^p[1] + ... + t^p[k] 359 * where m = p[0] > p[1] > ... > p[k] = 0. 360 */ 361 362 363 /* Performs modular reduction of a and store result in r. r could be a. */ 364 int 365 BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[]) 366 { 367 int j, k; 368 int n, dN, d0, d1; 369 BN_ULONG zz, *z; 370 371 bn_check_top(a); 372 373 if (!p[0]) { 374 /* reduction mod 1 => return 0 */ 375 BN_zero(r); 376 return 1; 377 } 378 379 /* Since the algorithm does reduction in the r value, if a != r, copy 380 * the contents of a into r so we can do reduction in r. 381 */ 382 if (a != r) { 383 if (!bn_wexpand(r, a->top)) 384 return 0; 385 for (j = 0; j < a->top; j++) { 386 r->d[j] = a->d[j]; 387 } 388 r->top = a->top; 389 } 390 z = r->d; 391 392 /* start reduction */ 393 dN = p[0] / BN_BITS2; 394 for (j = r->top - 1; j > dN; ) { 395 zz = z[j]; 396 if (z[j] == 0) { 397 j--; 398 continue; 399 } 400 z[j] = 0; 401 402 for (k = 1; p[k] != 0; k++) { 403 /* reducing component t^p[k] */ 404 n = p[0] - p[k]; 405 d0 = n % BN_BITS2; 406 d1 = BN_BITS2 - d0; 407 n /= BN_BITS2; 408 z[j - n] ^= (zz >> d0); 409 if (d0) 410 z[j - n - 1] ^= (zz << d1); 411 } 412 413 /* reducing component t^0 */ 414 n = dN; 415 d0 = p[0] % BN_BITS2; 416 d1 = BN_BITS2 - d0; 417 z[j - n] ^= (zz >> d0); 418 if (d0) 419 z[j - n - 1] ^= (zz << d1); 420 } 421 422 /* final round of reduction */ 423 while (j == dN) { 424 425 d0 = p[0] % BN_BITS2; 426 zz = z[dN] >> d0; 427 if (zz == 0) 428 break; 429 d1 = BN_BITS2 - d0; 430 431 /* clear up the top d1 bits */ 432 if (d0) 433 z[dN] = (z[dN] << d1) >> d1; 434 else 435 z[dN] = 0; 436 z[0] ^= zz; /* reduction t^0 component */ 437 438 for (k = 1; p[k] != 0; k++) { 439 BN_ULONG tmp_ulong; 440 441 /* reducing component t^p[k]*/ 442 n = p[k] / BN_BITS2; 443 d0 = p[k] % BN_BITS2; 444 d1 = BN_BITS2 - d0; 445 z[n] ^= (zz << d0); 446 tmp_ulong = zz >> d1; 447 if (d0 && tmp_ulong) 448 z[n + 1] ^= tmp_ulong; 449 } 450 451 452 } 453 454 bn_correct_top(r); 455 return 1; 456 } 457 458 /* Performs modular reduction of a by p and store result in r. r could be a. 459 * 460 * This function calls down to the BN_GF2m_mod_arr implementation; this wrapper 461 * function is only provided for convenience; for best performance, use the 462 * BN_GF2m_mod_arr function. 463 */ 464 int 465 BN_GF2m_mod(BIGNUM *r, const BIGNUM *a, const BIGNUM *p) 466 { 467 int ret = 0; 468 int arr[6]; 469 470 bn_check_top(a); 471 bn_check_top(p); 472 ret = BN_GF2m_poly2arr(p, arr, sizeof(arr) / sizeof(arr[0])); 473 if (!ret || ret > (int)(sizeof(arr) / sizeof(arr[0]))) { 474 BNerr(BN_F_BN_GF2M_MOD, BN_R_INVALID_LENGTH); 475 return 0; 476 } 477 ret = BN_GF2m_mod_arr(r, a, arr); 478 bn_check_top(r); 479 return ret; 480 } 481 482 483 /* Compute the product of two polynomials a and b, reduce modulo p, and store 484 * the result in r. r could be a or b; a could be b. 485 */ 486 int 487 BN_GF2m_mod_mul_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], 488 BN_CTX *ctx) 489 { 490 int zlen, i, j, k, ret = 0; 491 BIGNUM *s; 492 BN_ULONG x1, x0, y1, y0, zz[4]; 493 494 bn_check_top(a); 495 bn_check_top(b); 496 497 if (a == b) { 498 return BN_GF2m_mod_sqr_arr(r, a, p, ctx); 499 } 500 501 BN_CTX_start(ctx); 502 if ((s = BN_CTX_get(ctx)) == NULL) 503 goto err; 504 505 zlen = a->top + b->top + 4; 506 if (!bn_wexpand(s, zlen)) 507 goto err; 508 s->top = zlen; 509 510 for (i = 0; i < zlen; i++) 511 s->d[i] = 0; 512 513 for (j = 0; j < b->top; j += 2) { 514 y0 = b->d[j]; 515 y1 = ((j + 1) == b->top) ? 0 : b->d[j + 1]; 516 for (i = 0; i < a->top; i += 2) { 517 x0 = a->d[i]; 518 x1 = ((i + 1) == a->top) ? 0 : a->d[i + 1]; 519 bn_GF2m_mul_2x2(zz, x1, x0, y1, y0); 520 for (k = 0; k < 4; k++) 521 s->d[i + j + k] ^= zz[k]; 522 } 523 } 524 525 bn_correct_top(s); 526 if (BN_GF2m_mod_arr(r, s, p)) 527 ret = 1; 528 bn_check_top(r); 529 530 err: 531 BN_CTX_end(ctx); 532 return ret; 533 } 534 535 /* Compute the product of two polynomials a and b, reduce modulo p, and store 536 * the result in r. r could be a or b; a could equal b. 537 * 538 * This function calls down to the BN_GF2m_mod_mul_arr implementation; this wrapper 539 * function is only provided for convenience; for best performance, use the 540 * BN_GF2m_mod_mul_arr function. 541 */ 542 int 543 BN_GF2m_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, 544 BN_CTX *ctx) 545 { 546 int ret = 0; 547 const int max = BN_num_bits(p) + 1; 548 int *arr = NULL; 549 550 bn_check_top(a); 551 bn_check_top(b); 552 bn_check_top(p); 553 if ((arr = reallocarray(NULL, max, sizeof(int))) == NULL) 554 goto err; 555 ret = BN_GF2m_poly2arr(p, arr, max); 556 if (!ret || ret > max) { 557 BNerr(BN_F_BN_GF2M_MOD_MUL, BN_R_INVALID_LENGTH); 558 goto err; 559 } 560 ret = BN_GF2m_mod_mul_arr(r, a, b, arr, ctx); 561 bn_check_top(r); 562 563 err: 564 free(arr); 565 return ret; 566 } 567 568 569 /* Square a, reduce the result mod p, and store it in a. r could be a. */ 570 int 571 BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx) 572 { 573 int i, ret = 0; 574 BIGNUM *s; 575 576 bn_check_top(a); 577 BN_CTX_start(ctx); 578 if ((s = BN_CTX_get(ctx)) == NULL) 579 goto err; 580 if (!bn_wexpand(s, 2 * a->top)) 581 goto err; 582 583 for (i = a->top - 1; i >= 0; i--) { 584 s->d[2 * i + 1] = SQR1(a->d[i]); 585 s->d[2 * i] = SQR0(a->d[i]); 586 } 587 588 s->top = 2 * a->top; 589 bn_correct_top(s); 590 if (!BN_GF2m_mod_arr(r, s, p)) 591 goto err; 592 bn_check_top(r); 593 ret = 1; 594 595 err: 596 BN_CTX_end(ctx); 597 return ret; 598 } 599 600 /* Square a, reduce the result mod p, and store it in a. r could be a. 601 * 602 * This function calls down to the BN_GF2m_mod_sqr_arr implementation; this wrapper 603 * function is only provided for convenience; for best performance, use the 604 * BN_GF2m_mod_sqr_arr function. 605 */ 606 int 607 BN_GF2m_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 608 { 609 int ret = 0; 610 const int max = BN_num_bits(p) + 1; 611 int *arr = NULL; 612 613 bn_check_top(a); 614 bn_check_top(p); 615 if ((arr = reallocarray(NULL, max, sizeof(int))) == NULL) 616 goto err; 617 ret = BN_GF2m_poly2arr(p, arr, max); 618 if (!ret || ret > max) { 619 BNerr(BN_F_BN_GF2M_MOD_SQR, BN_R_INVALID_LENGTH); 620 goto err; 621 } 622 ret = BN_GF2m_mod_sqr_arr(r, a, arr, ctx); 623 bn_check_top(r); 624 625 err: 626 free(arr); 627 return ret; 628 } 629 630 631 /* Invert a, reduce modulo p, and store the result in r. r could be a. 632 * Uses Modified Almost Inverse Algorithm (Algorithm 10) from 633 * Hankerson, D., Hernandez, J.L., and Menezes, A. "Software Implementation 634 * of Elliptic Curve Cryptography Over Binary Fields". 635 */ 636 int 637 BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 638 { 639 BIGNUM *b, *c = NULL, *u = NULL, *v = NULL, *tmp; 640 int ret = 0; 641 642 bn_check_top(a); 643 bn_check_top(p); 644 645 BN_CTX_start(ctx); 646 647 if ((b = BN_CTX_get(ctx)) == NULL) 648 goto err; 649 if ((c = BN_CTX_get(ctx)) == NULL) 650 goto err; 651 if ((u = BN_CTX_get(ctx)) == NULL) 652 goto err; 653 if ((v = BN_CTX_get(ctx)) == NULL) 654 goto err; 655 656 if (!BN_GF2m_mod(u, a, p)) 657 goto err; 658 if (BN_is_zero(u)) 659 goto err; 660 661 if (!BN_copy(v, p)) 662 goto err; 663 #if 0 664 if (!BN_one(b)) 665 goto err; 666 667 while (1) { 668 while (!BN_is_odd(u)) { 669 if (BN_is_zero(u)) 670 goto err; 671 if (!BN_rshift1(u, u)) 672 goto err; 673 if (BN_is_odd(b)) { 674 if (!BN_GF2m_add(b, b, p)) 675 goto err; 676 } 677 if (!BN_rshift1(b, b)) 678 goto err; 679 } 680 681 if (BN_abs_is_word(u, 1)) 682 break; 683 684 if (BN_num_bits(u) < BN_num_bits(v)) { 685 tmp = u; 686 u = v; 687 v = tmp; 688 tmp = b; 689 b = c; 690 c = tmp; 691 } 692 693 if (!BN_GF2m_add(u, u, v)) 694 goto err; 695 if (!BN_GF2m_add(b, b, c)) 696 goto err; 697 } 698 #else 699 { 700 int i, ubits = BN_num_bits(u), 701 vbits = BN_num_bits(v), /* v is copy of p */ 702 top = p->top; 703 BN_ULONG *udp, *bdp, *vdp, *cdp; 704 705 if (!bn_wexpand(u, top)) 706 goto err; 707 udp = u->d; 708 for (i = u->top; i < top; i++) 709 udp[i] = 0; 710 u->top = top; 711 if (!bn_wexpand(b, top)) 712 goto err; 713 bdp = b->d; 714 bdp[0] = 1; 715 for (i = 1; i < top; i++) 716 bdp[i] = 0; 717 b->top = top; 718 if (!bn_wexpand(c, top)) 719 goto err; 720 cdp = c->d; 721 for (i = 0; i < top; i++) 722 cdp[i] = 0; 723 c->top = top; 724 vdp = v->d; /* It pays off to "cache" *->d pointers, because 725 * it allows optimizer to be more aggressive. 726 * But we don't have to "cache" p->d, because *p 727 * is declared 'const'... */ 728 while (1) { 729 while (ubits && !(udp[0]&1)) { 730 BN_ULONG u0, u1, b0, b1, mask; 731 732 u0 = udp[0]; 733 b0 = bdp[0]; 734 mask = (BN_ULONG)0 - (b0 & 1); 735 b0 ^= p->d[0] & mask; 736 for (i = 0; i < top - 1; i++) { 737 u1 = udp[i + 1]; 738 udp[i] = ((u0 >> 1) | 739 (u1 << (BN_BITS2 - 1))) & BN_MASK2; 740 u0 = u1; 741 b1 = bdp[i + 1] ^ (p->d[i + 1] & mask); 742 bdp[i] = ((b0 >> 1) | 743 (b1 << (BN_BITS2 - 1))) & BN_MASK2; 744 b0 = b1; 745 } 746 udp[i] = u0 >> 1; 747 bdp[i] = b0 >> 1; 748 ubits--; 749 } 750 751 if (ubits <= BN_BITS2) { 752 /* See if poly was reducible. */ 753 if (udp[0] == 0) 754 goto err; 755 if (udp[0] == 1) 756 break; 757 } 758 759 if (ubits < vbits) { 760 i = ubits; 761 ubits = vbits; 762 vbits = i; 763 tmp = u; 764 u = v; 765 v = tmp; 766 tmp = b; 767 b = c; 768 c = tmp; 769 udp = vdp; 770 vdp = v->d; 771 bdp = cdp; 772 cdp = c->d; 773 } 774 for (i = 0; i < top; i++) { 775 udp[i] ^= vdp[i]; 776 bdp[i] ^= cdp[i]; 777 } 778 if (ubits == vbits) { 779 BN_ULONG ul; 780 int utop = (ubits - 1) / BN_BITS2; 781 782 while ((ul = udp[utop]) == 0 && utop) 783 utop--; 784 ubits = utop*BN_BITS2 + BN_num_bits_word(ul); 785 } 786 } 787 bn_correct_top(b); 788 } 789 #endif 790 791 if (!BN_copy(r, b)) 792 goto err; 793 bn_check_top(r); 794 ret = 1; 795 796 err: 797 #ifdef BN_DEBUG /* BN_CTX_end would complain about the expanded form */ 798 bn_correct_top(c); 799 bn_correct_top(u); 800 bn_correct_top(v); 801 #endif 802 BN_CTX_end(ctx); 803 return ret; 804 } 805 806 /* Invert xx, reduce modulo p, and store the result in r. r could be xx. 807 * 808 * This function calls down to the BN_GF2m_mod_inv implementation; this wrapper 809 * function is only provided for convenience; for best performance, use the 810 * BN_GF2m_mod_inv function. 811 */ 812 int 813 BN_GF2m_mod_inv_arr(BIGNUM *r, const BIGNUM *xx, const int p[], BN_CTX *ctx) 814 { 815 BIGNUM *field; 816 int ret = 0; 817 818 bn_check_top(xx); 819 BN_CTX_start(ctx); 820 if ((field = BN_CTX_get(ctx)) == NULL) 821 goto err; 822 if (!BN_GF2m_arr2poly(p, field)) 823 goto err; 824 825 ret = BN_GF2m_mod_inv(r, xx, field, ctx); 826 bn_check_top(r); 827 828 err: 829 BN_CTX_end(ctx); 830 return ret; 831 } 832 833 834 #ifndef OPENSSL_SUN_GF2M_DIV 835 /* Divide y by x, reduce modulo p, and store the result in r. r could be x 836 * or y, x could equal y. 837 */ 838 int 839 BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, const BIGNUM *p, 840 BN_CTX *ctx) 841 { 842 BIGNUM *xinv = NULL; 843 int ret = 0; 844 845 bn_check_top(y); 846 bn_check_top(x); 847 bn_check_top(p); 848 849 BN_CTX_start(ctx); 850 if ((xinv = BN_CTX_get(ctx)) == NULL) 851 goto err; 852 853 if (!BN_GF2m_mod_inv(xinv, x, p, ctx)) 854 goto err; 855 if (!BN_GF2m_mod_mul(r, y, xinv, p, ctx)) 856 goto err; 857 bn_check_top(r); 858 ret = 1; 859 860 err: 861 BN_CTX_end(ctx); 862 return ret; 863 } 864 #else 865 /* Divide y by x, reduce modulo p, and store the result in r. r could be x 866 * or y, x could equal y. 867 * Uses algorithm Modular_Division_GF(2^m) from 868 * Chang-Shantz, S. "From Euclid's GCD to Montgomery Multiplication to 869 * the Great Divide". 870 */ 871 int 872 BN_GF2m_mod_div(BIGNUM *r, const BIGNUM *y, const BIGNUM *x, const BIGNUM *p, 873 BN_CTX *ctx) 874 { 875 BIGNUM *a, *b, *u, *v; 876 int ret = 0; 877 878 bn_check_top(y); 879 bn_check_top(x); 880 bn_check_top(p); 881 882 BN_CTX_start(ctx); 883 884 if ((a = BN_CTX_get(ctx)) == NULL) 885 goto err; 886 if ((b = BN_CTX_get(ctx)) == NULL) 887 goto err; 888 if ((u = BN_CTX_get(ctx)) == NULL) 889 goto err; 890 if ((v = BN_CTX_get(ctx)) == NULL) 891 goto err; 892 893 /* reduce x and y mod p */ 894 if (!BN_GF2m_mod(u, y, p)) 895 goto err; 896 if (!BN_GF2m_mod(a, x, p)) 897 goto err; 898 if (!BN_copy(b, p)) 899 goto err; 900 901 while (!BN_is_odd(a)) { 902 if (!BN_rshift1(a, a)) 903 goto err; 904 if (BN_is_odd(u)) 905 if (!BN_GF2m_add(u, u, p)) 906 goto err; 907 if (!BN_rshift1(u, u)) 908 goto err; 909 } 910 911 do { 912 if (BN_GF2m_cmp(b, a) > 0) { 913 if (!BN_GF2m_add(b, b, a)) 914 goto err; 915 if (!BN_GF2m_add(v, v, u)) 916 goto err; 917 do { 918 if (!BN_rshift1(b, b)) 919 goto err; 920 if (BN_is_odd(v)) 921 if (!BN_GF2m_add(v, v, p)) 922 goto err; 923 if (!BN_rshift1(v, v)) 924 goto err; 925 } while (!BN_is_odd(b)); 926 } else if (BN_abs_is_word(a, 1)) 927 break; 928 else { 929 if (!BN_GF2m_add(a, a, b)) 930 goto err; 931 if (!BN_GF2m_add(u, u, v)) 932 goto err; 933 do { 934 if (!BN_rshift1(a, a)) 935 goto err; 936 if (BN_is_odd(u)) 937 if (!BN_GF2m_add(u, u, p)) 938 goto err; 939 if (!BN_rshift1(u, u)) 940 goto err; 941 } while (!BN_is_odd(a)); 942 } 943 } while (1); 944 945 if (!BN_copy(r, u)) 946 goto err; 947 bn_check_top(r); 948 ret = 1; 949 950 err: 951 BN_CTX_end(ctx); 952 return ret; 953 } 954 #endif 955 956 /* Divide yy by xx, reduce modulo p, and store the result in r. r could be xx 957 * or yy, xx could equal yy. 958 * 959 * This function calls down to the BN_GF2m_mod_div implementation; this wrapper 960 * function is only provided for convenience; for best performance, use the 961 * BN_GF2m_mod_div function. 962 */ 963 int 964 BN_GF2m_mod_div_arr(BIGNUM *r, const BIGNUM *yy, const BIGNUM *xx, 965 const int p[], BN_CTX *ctx) 966 { 967 BIGNUM *field; 968 int ret = 0; 969 970 bn_check_top(yy); 971 bn_check_top(xx); 972 973 BN_CTX_start(ctx); 974 if ((field = BN_CTX_get(ctx)) == NULL) 975 goto err; 976 if (!BN_GF2m_arr2poly(p, field)) 977 goto err; 978 979 ret = BN_GF2m_mod_div(r, yy, xx, field, ctx); 980 bn_check_top(r); 981 982 err: 983 BN_CTX_end(ctx); 984 return ret; 985 } 986 987 988 /* Compute the bth power of a, reduce modulo p, and store 989 * the result in r. r could be a. 990 * Uses simple square-and-multiply algorithm A.5.1 from IEEE P1363. 991 */ 992 int 993 BN_GF2m_mod_exp_arr(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const int p[], 994 BN_CTX *ctx) 995 { 996 int ret = 0, i, n; 997 BIGNUM *u; 998 999 bn_check_top(a); 1000 bn_check_top(b); 1001 1002 if (BN_is_zero(b)) 1003 return (BN_one(r)); 1004 1005 if (BN_abs_is_word(b, 1)) 1006 return (BN_copy(r, a) != NULL); 1007 1008 BN_CTX_start(ctx); 1009 if ((u = BN_CTX_get(ctx)) == NULL) 1010 goto err; 1011 1012 if (!BN_GF2m_mod_arr(u, a, p)) 1013 goto err; 1014 1015 n = BN_num_bits(b) - 1; 1016 for (i = n - 1; i >= 0; i--) { 1017 if (!BN_GF2m_mod_sqr_arr(u, u, p, ctx)) 1018 goto err; 1019 if (BN_is_bit_set(b, i)) { 1020 if (!BN_GF2m_mod_mul_arr(u, u, a, p, ctx)) 1021 goto err; 1022 } 1023 } 1024 if (!BN_copy(r, u)) 1025 goto err; 1026 bn_check_top(r); 1027 ret = 1; 1028 1029 err: 1030 BN_CTX_end(ctx); 1031 return ret; 1032 } 1033 1034 /* Compute the bth power of a, reduce modulo p, and store 1035 * the result in r. r could be a. 1036 * 1037 * This function calls down to the BN_GF2m_mod_exp_arr implementation; this wrapper 1038 * function is only provided for convenience; for best performance, use the 1039 * BN_GF2m_mod_exp_arr function. 1040 */ 1041 int 1042 BN_GF2m_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *p, 1043 BN_CTX *ctx) 1044 { 1045 int ret = 0; 1046 const int max = BN_num_bits(p) + 1; 1047 int *arr = NULL; 1048 1049 bn_check_top(a); 1050 bn_check_top(b); 1051 bn_check_top(p); 1052 if ((arr = reallocarray(NULL, max, sizeof(int))) == NULL) 1053 goto err; 1054 ret = BN_GF2m_poly2arr(p, arr, max); 1055 if (!ret || ret > max) { 1056 BNerr(BN_F_BN_GF2M_MOD_EXP, BN_R_INVALID_LENGTH); 1057 goto err; 1058 } 1059 ret = BN_GF2m_mod_exp_arr(r, a, b, arr, ctx); 1060 bn_check_top(r); 1061 1062 err: 1063 free(arr); 1064 return ret; 1065 } 1066 1067 /* Compute the square root of a, reduce modulo p, and store 1068 * the result in r. r could be a. 1069 * Uses exponentiation as in algorithm A.4.1 from IEEE P1363. 1070 */ 1071 int 1072 BN_GF2m_mod_sqrt_arr(BIGNUM *r, const BIGNUM *a, const int p[], BN_CTX *ctx) 1073 { 1074 int ret = 0; 1075 BIGNUM *u; 1076 1077 bn_check_top(a); 1078 1079 if (!p[0]) { 1080 /* reduction mod 1 => return 0 */ 1081 BN_zero(r); 1082 return 1; 1083 } 1084 1085 BN_CTX_start(ctx); 1086 if ((u = BN_CTX_get(ctx)) == NULL) 1087 goto err; 1088 1089 if (!BN_set_bit(u, p[0] - 1)) 1090 goto err; 1091 ret = BN_GF2m_mod_exp_arr(r, a, u, p, ctx); 1092 bn_check_top(r); 1093 1094 err: 1095 BN_CTX_end(ctx); 1096 return ret; 1097 } 1098 1099 /* Compute the square root of a, reduce modulo p, and store 1100 * the result in r. r could be a. 1101 * 1102 * This function calls down to the BN_GF2m_mod_sqrt_arr implementation; this wrapper 1103 * function is only provided for convenience; for best performance, use the 1104 * BN_GF2m_mod_sqrt_arr function. 1105 */ 1106 int 1107 BN_GF2m_mod_sqrt(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 1108 { 1109 int ret = 0; 1110 const int max = BN_num_bits(p) + 1; 1111 int *arr = NULL; 1112 bn_check_top(a); 1113 bn_check_top(p); 1114 if ((arr = reallocarray(NULL, max, sizeof(int))) == NULL) 1115 goto err; 1116 ret = BN_GF2m_poly2arr(p, arr, max); 1117 if (!ret || ret > max) { 1118 BNerr(BN_F_BN_GF2M_MOD_SQRT, BN_R_INVALID_LENGTH); 1119 goto err; 1120 } 1121 ret = BN_GF2m_mod_sqrt_arr(r, a, arr, ctx); 1122 bn_check_top(r); 1123 1124 err: 1125 free(arr); 1126 return ret; 1127 } 1128 1129 /* Find r such that r^2 + r = a mod p. r could be a. If no r exists returns 0. 1130 * Uses algorithms A.4.7 and A.4.6 from IEEE P1363. 1131 */ 1132 int 1133 BN_GF2m_mod_solve_quad_arr(BIGNUM *r, const BIGNUM *a_, const int p[], 1134 BN_CTX *ctx) 1135 { 1136 int ret = 0, count = 0, j; 1137 BIGNUM *a, *z, *rho, *w, *w2, *tmp; 1138 1139 bn_check_top(a_); 1140 1141 if (!p[0]) { 1142 /* reduction mod 1 => return 0 */ 1143 BN_zero(r); 1144 return 1; 1145 } 1146 1147 BN_CTX_start(ctx); 1148 if ((a = BN_CTX_get(ctx)) == NULL) 1149 goto err; 1150 if ((z = BN_CTX_get(ctx)) == NULL) 1151 goto err; 1152 if ((w = BN_CTX_get(ctx)) == NULL) 1153 goto err; 1154 1155 if (!BN_GF2m_mod_arr(a, a_, p)) 1156 goto err; 1157 1158 if (BN_is_zero(a)) { 1159 BN_zero(r); 1160 ret = 1; 1161 goto err; 1162 } 1163 1164 if (p[0] & 0x1) /* m is odd */ 1165 { 1166 /* compute half-trace of a */ 1167 if (!BN_copy(z, a)) 1168 goto err; 1169 for (j = 1; j <= (p[0] - 1) / 2; j++) { 1170 if (!BN_GF2m_mod_sqr_arr(z, z, p, ctx)) 1171 goto err; 1172 if (!BN_GF2m_mod_sqr_arr(z, z, p, ctx)) 1173 goto err; 1174 if (!BN_GF2m_add(z, z, a)) 1175 goto err; 1176 } 1177 1178 } 1179 else /* m is even */ 1180 { 1181 if ((rho = BN_CTX_get(ctx)) == NULL) 1182 goto err; 1183 if ((w2 = BN_CTX_get(ctx)) == NULL) 1184 goto err; 1185 if ((tmp = BN_CTX_get(ctx)) == NULL) 1186 goto err; 1187 do { 1188 if (!BN_rand(rho, p[0], 0, 0)) 1189 goto err; 1190 if (!BN_GF2m_mod_arr(rho, rho, p)) 1191 goto err; 1192 BN_zero(z); 1193 if (!BN_copy(w, rho)) 1194 goto err; 1195 for (j = 1; j <= p[0] - 1; j++) { 1196 if (!BN_GF2m_mod_sqr_arr(z, z, p, ctx)) 1197 goto err; 1198 if (!BN_GF2m_mod_sqr_arr(w2, w, p, ctx)) 1199 goto err; 1200 if (!BN_GF2m_mod_mul_arr(tmp, w2, a, p, ctx)) 1201 goto err; 1202 if (!BN_GF2m_add(z, z, tmp)) 1203 goto err; 1204 if (!BN_GF2m_add(w, w2, rho)) 1205 goto err; 1206 } 1207 count++; 1208 } while (BN_is_zero(w) && (count < MAX_ITERATIONS)); 1209 if (BN_is_zero(w)) { 1210 BNerr(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR, 1211 BN_R_TOO_MANY_ITERATIONS); 1212 goto err; 1213 } 1214 } 1215 1216 if (!BN_GF2m_mod_sqr_arr(w, z, p, ctx)) 1217 goto err; 1218 if (!BN_GF2m_add(w, z, w)) 1219 goto err; 1220 if (BN_GF2m_cmp(w, a)) { 1221 BNerr(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR, BN_R_NO_SOLUTION); 1222 goto err; 1223 } 1224 1225 if (!BN_copy(r, z)) 1226 goto err; 1227 bn_check_top(r); 1228 1229 ret = 1; 1230 1231 err: 1232 BN_CTX_end(ctx); 1233 return ret; 1234 } 1235 1236 /* Find r such that r^2 + r = a mod p. r could be a. If no r exists returns 0. 1237 * 1238 * This function calls down to the BN_GF2m_mod_solve_quad_arr implementation; this wrapper 1239 * function is only provided for convenience; for best performance, use the 1240 * BN_GF2m_mod_solve_quad_arr function. 1241 */ 1242 int 1243 BN_GF2m_mod_solve_quad(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 1244 { 1245 int ret = 0; 1246 const int max = BN_num_bits(p) + 1; 1247 int *arr = NULL; 1248 1249 bn_check_top(a); 1250 bn_check_top(p); 1251 if ((arr = reallocarray(NULL, max, sizeof(int))) == NULL) 1252 goto err; 1253 ret = BN_GF2m_poly2arr(p, arr, max); 1254 if (!ret || ret > max) { 1255 BNerr(BN_F_BN_GF2M_MOD_SOLVE_QUAD, BN_R_INVALID_LENGTH); 1256 goto err; 1257 } 1258 ret = BN_GF2m_mod_solve_quad_arr(r, a, arr, ctx); 1259 bn_check_top(r); 1260 1261 err: 1262 free(arr); 1263 return ret; 1264 } 1265 1266 /* Convert the bit-string representation of a polynomial 1267 * ( \sum_{i=0}^n a_i * x^i) into an array of integers corresponding 1268 * to the bits with non-zero coefficient. Array is terminated with -1. 1269 * Up to max elements of the array will be filled. Return value is total 1270 * number of array elements that would be filled if array was large enough. 1271 */ 1272 int 1273 BN_GF2m_poly2arr(const BIGNUM *a, int p[], int max) 1274 { 1275 int i, j, k = 0; 1276 BN_ULONG mask; 1277 1278 if (BN_is_zero(a)) 1279 return 0; 1280 1281 for (i = a->top - 1; i >= 0; i--) { 1282 if (!a->d[i]) 1283 /* skip word if a->d[i] == 0 */ 1284 continue; 1285 mask = BN_TBIT; 1286 for (j = BN_BITS2 - 1; j >= 0; j--) { 1287 if (a->d[i] & mask) { 1288 if (k < max) 1289 p[k] = BN_BITS2 * i + j; 1290 k++; 1291 } 1292 mask >>= 1; 1293 } 1294 } 1295 1296 if (k < max) { 1297 p[k] = -1; 1298 k++; 1299 } 1300 1301 return k; 1302 } 1303 1304 /* Convert the coefficient array representation of a polynomial to a 1305 * bit-string. The array must be terminated by -1. 1306 */ 1307 int 1308 BN_GF2m_arr2poly(const int p[], BIGNUM *a) 1309 { 1310 int i; 1311 1312 bn_check_top(a); 1313 BN_zero(a); 1314 for (i = 0; p[i] != -1; i++) { 1315 if (BN_set_bit(a, p[i]) == 0) 1316 return 0; 1317 } 1318 bn_check_top(a); 1319 1320 return 1; 1321 } 1322 1323 #endif 1324