1 /* $OpenBSD: bn_exp.c,v 1.22 2015/03/21 08:05:20 doug Exp $ */ 2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3 * All rights reserved. 4 * 5 * This package is an SSL implementation written 6 * by Eric Young (eay@cryptsoft.com). 7 * The implementation was written so as to conform with Netscapes SSL. 8 * 9 * This library is free for commercial and non-commercial use as long as 10 * the following conditions are aheared to. The following conditions 11 * apply to all code found in this distribution, be it the RC4, RSA, 12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13 * included with this distribution is covered by the same copyright terms 14 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15 * 16 * Copyright remains Eric Young's, and as such any Copyright notices in 17 * the code are not to be removed. 18 * If this package is used in a product, Eric Young should be given attribution 19 * as the author of the parts of the library used. 20 * This can be in the form of a textual message at program startup or 21 * in documentation (online or textual) provided with the package. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the copyright 27 * notice, this list of conditions and the following disclaimer. 28 * 2. Redistributions in binary form must reproduce the above copyright 29 * notice, this list of conditions and the following disclaimer in the 30 * documentation and/or other materials provided with the distribution. 31 * 3. All advertising materials mentioning features or use of this software 32 * must display the following acknowledgement: 33 * "This product includes cryptographic software written by 34 * Eric Young (eay@cryptsoft.com)" 35 * The word 'cryptographic' can be left out if the rouines from the library 36 * being used are not cryptographic related :-). 37 * 4. If you include any Windows specific code (or a derivative thereof) from 38 * the apps directory (application code) you must include an acknowledgement: 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40 * 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * The licence and distribution terms for any publically available version or 54 * derivative of this code cannot be changed. i.e. this code cannot simply be 55 * copied and put under another distribution licence 56 * [including the GNU Public Licence.] 57 */ 58 /* ==================================================================== 59 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. 60 * 61 * Redistribution and use in source and binary forms, with or without 62 * modification, are permitted provided that the following conditions 63 * are met: 64 * 65 * 1. Redistributions of source code must retain the above copyright 66 * notice, this list of conditions and the following disclaimer. 67 * 68 * 2. Redistributions in binary form must reproduce the above copyright 69 * notice, this list of conditions and the following disclaimer in 70 * the documentation and/or other materials provided with the 71 * distribution. 72 * 73 * 3. All advertising materials mentioning features or use of this 74 * software must display the following acknowledgment: 75 * "This product includes software developed by the OpenSSL Project 76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 77 * 78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 79 * endorse or promote products derived from this software without 80 * prior written permission. For written permission, please contact 81 * openssl-core@openssl.org. 82 * 83 * 5. Products derived from this software may not be called "OpenSSL" 84 * nor may "OpenSSL" appear in their names without prior written 85 * permission of the OpenSSL Project. 86 * 87 * 6. Redistributions of any form whatsoever must retain the following 88 * acknowledgment: 89 * "This product includes software developed by the OpenSSL Project 90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 103 * OF THE POSSIBILITY OF SUCH DAMAGE. 104 * ==================================================================== 105 * 106 * This product includes cryptographic software written by Eric Young 107 * (eay@cryptsoft.com). This product includes software written by Tim 108 * Hudson (tjh@cryptsoft.com). 109 * 110 */ 111 112 #include <stdlib.h> 113 #include <string.h> 114 115 #include <openssl/err.h> 116 117 #include "bn_lcl.h" 118 119 /* maximum precomputation table size for *variable* sliding windows */ 120 #define TABLE_SIZE 32 121 122 /* this one works - simple but works */ 123 int 124 BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 125 { 126 int i, bits, ret = 0; 127 BIGNUM *v, *rr; 128 129 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 130 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 131 BNerr(BN_F_BN_EXP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 132 return -1; 133 } 134 135 BN_CTX_start(ctx); 136 if ((r == a) || (r == p)) 137 rr = BN_CTX_get(ctx); 138 else 139 rr = r; 140 v = BN_CTX_get(ctx); 141 if (rr == NULL || v == NULL) 142 goto err; 143 144 if (BN_copy(v, a) == NULL) 145 goto err; 146 bits = BN_num_bits(p); 147 148 if (BN_is_odd(p)) { 149 if (BN_copy(rr, a) == NULL) 150 goto err; 151 } else { 152 if (!BN_one(rr)) 153 goto err; 154 } 155 156 for (i = 1; i < bits; i++) { 157 if (!BN_sqr(v, v, ctx)) 158 goto err; 159 if (BN_is_bit_set(p, i)) { 160 if (!BN_mul(rr, rr, v, ctx)) 161 goto err; 162 } 163 } 164 ret = 1; 165 166 err: 167 if (r != rr && rr != NULL) 168 BN_copy(r, rr); 169 BN_CTX_end(ctx); 170 bn_check_top(r); 171 return (ret); 172 } 173 174 int 175 BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 176 BN_CTX *ctx) 177 { 178 int ret; 179 180 bn_check_top(a); 181 bn_check_top(p); 182 bn_check_top(m); 183 184 /* For even modulus m = 2^k*m_odd, it might make sense to compute 185 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery 186 * exponentiation for the odd part), using appropriate exponent 187 * reductions, and combine the results using the CRT. 188 * 189 * For now, we use Montgomery only if the modulus is odd; otherwise, 190 * exponentiation using the reciprocal-based quick remaindering 191 * algorithm is used. 192 * 193 * (Timing obtained with expspeed.c [computations a^p mod m 194 * where a, p, m are of the same length: 256, 512, 1024, 2048, 195 * 4096, 8192 bits], compared to the running time of the 196 * standard algorithm: 197 * 198 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] 199 * 55 .. 77 % [UltraSparc processor, but 200 * debug-solaris-sparcv8-gcc conf.] 201 * 202 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] 203 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] 204 * 205 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont 206 * at 2048 and more bits, but at 512 and 1024 bits, it was 207 * slower even than the standard algorithm! 208 * 209 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] 210 * should be obtained when the new Montgomery reduction code 211 * has been integrated into OpenSSL.) 212 */ 213 214 #define MONT_MUL_MOD 215 #define MONT_EXP_WORD 216 #define RECP_MUL_MOD 217 218 #ifdef MONT_MUL_MOD 219 /* I have finally been able to take out this pre-condition of 220 * the top bit being set. It was caused by an error in BN_div 221 * with negatives. There was also another problem when for a^b%m 222 * a >= m. eay 07-May-97 */ 223 /* if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */ 224 225 if (BN_is_odd(m)) { 226 # ifdef MONT_EXP_WORD 227 if (a->top == 1 && !a->neg && 228 (BN_get_flags(p, BN_FLG_CONSTTIME) == 0)) { 229 BN_ULONG A = a->d[0]; 230 ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL); 231 } else 232 # endif 233 ret = BN_mod_exp_mont(r, a,p, m,ctx, NULL); 234 } else 235 #endif 236 #ifdef RECP_MUL_MOD 237 { 238 ret = BN_mod_exp_recp(r, a,p, m, ctx); 239 } 240 #else 241 { 242 ret = BN_mod_exp_simple(r, a,p, m, ctx); 243 } 244 #endif 245 246 bn_check_top(r); 247 return (ret); 248 } 249 250 int 251 BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 252 BN_CTX *ctx) 253 { 254 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 255 int start = 1; 256 BIGNUM *aa; 257 /* Table of variables obtained from 'ctx' */ 258 BIGNUM *val[TABLE_SIZE]; 259 BN_RECP_CTX recp; 260 261 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 262 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 263 BNerr(BN_F_BN_MOD_EXP_RECP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 264 return -1; 265 } 266 267 bits = BN_num_bits(p); 268 269 if (bits == 0) { 270 ret = BN_one(r); 271 return ret; 272 } 273 274 BN_CTX_start(ctx); 275 if ((aa = BN_CTX_get(ctx)) == NULL) 276 goto err; 277 if ((val[0] = BN_CTX_get(ctx)) == NULL) 278 goto err; 279 280 BN_RECP_CTX_init(&recp); 281 if (m->neg) { 282 /* ignore sign of 'm' */ 283 if (!BN_copy(aa, m)) 284 goto err; 285 aa->neg = 0; 286 if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) 287 goto err; 288 } else { 289 if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) 290 goto err; 291 } 292 293 if (!BN_nnmod(val[0], a, m, ctx)) 294 goto err; /* 1 */ 295 if (BN_is_zero(val[0])) { 296 BN_zero(r); 297 ret = 1; 298 goto err; 299 } 300 301 window = BN_window_bits_for_exponent_size(bits); 302 if (window > 1) { 303 if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) 304 goto err; /* 2 */ 305 j = 1 << (window - 1); 306 for (i = 1; i < j; i++) { 307 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 308 !BN_mod_mul_reciprocal(val[i], val[i - 1], 309 aa, &recp, ctx)) 310 goto err; 311 } 312 } 313 314 start = 1; /* This is used to avoid multiplication etc 315 * when there is only the value '1' in the 316 * buffer. */ 317 wvalue = 0; /* The 'value' of the window */ 318 wstart = bits - 1; /* The top bit of the window */ 319 wend = 0; /* The bottom bit of the window */ 320 321 if (!BN_one(r)) 322 goto err; 323 324 for (;;) { 325 if (BN_is_bit_set(p, wstart) == 0) { 326 if (!start) 327 if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) 328 goto err; 329 if (wstart == 0) 330 break; 331 wstart--; 332 continue; 333 } 334 /* We now have wstart on a 'set' bit, we now need to work out 335 * how bit a window to do. To do this we need to scan 336 * forward until the last set bit before the end of the 337 * window */ 338 j = wstart; 339 wvalue = 1; 340 wend = 0; 341 for (i = 1; i < window; i++) { 342 if (wstart - i < 0) 343 break; 344 if (BN_is_bit_set(p, wstart - i)) { 345 wvalue <<= (i - wend); 346 wvalue |= 1; 347 wend = i; 348 } 349 } 350 351 /* wend is the size of the current window */ 352 j = wend + 1; 353 /* add the 'bytes above' */ 354 if (!start) 355 for (i = 0; i < j; i++) { 356 if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) 357 goto err; 358 } 359 360 /* wvalue will be an odd number < 2^window */ 361 if (!BN_mod_mul_reciprocal(r, r,val[wvalue >> 1], &recp, ctx)) 362 goto err; 363 364 /* move the 'window' down further */ 365 wstart -= wend + 1; 366 wvalue = 0; 367 start = 0; 368 if (wstart < 0) 369 break; 370 } 371 ret = 1; 372 373 err: 374 BN_CTX_end(ctx); 375 BN_RECP_CTX_free(&recp); 376 bn_check_top(r); 377 return (ret); 378 } 379 380 int 381 BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 382 BN_CTX *ctx, BN_MONT_CTX *in_mont) 383 { 384 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 385 int start = 1; 386 BIGNUM *d, *r; 387 const BIGNUM *aa; 388 /* Table of variables obtained from 'ctx' */ 389 BIGNUM *val[TABLE_SIZE]; 390 BN_MONT_CTX *mont = NULL; 391 392 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 393 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); 394 } 395 396 bn_check_top(a); 397 bn_check_top(p); 398 bn_check_top(m); 399 400 if (!BN_is_odd(m)) { 401 BNerr(BN_F_BN_MOD_EXP_MONT, BN_R_CALLED_WITH_EVEN_MODULUS); 402 return (0); 403 } 404 bits = BN_num_bits(p); 405 if (bits == 0) { 406 ret = BN_one(rr); 407 return ret; 408 } 409 410 BN_CTX_start(ctx); 411 if ((d = BN_CTX_get(ctx)) == NULL) 412 goto err; 413 if ((r = BN_CTX_get(ctx)) == NULL) 414 goto err; 415 if ((val[0] = BN_CTX_get(ctx)) == NULL) 416 goto err; 417 418 /* If this is not done, things will break in the montgomery 419 * part */ 420 421 if (in_mont != NULL) 422 mont = in_mont; 423 else { 424 if ((mont = BN_MONT_CTX_new()) == NULL) 425 goto err; 426 if (!BN_MONT_CTX_set(mont, m, ctx)) 427 goto err; 428 } 429 430 if (a->neg || BN_ucmp(a, m) >= 0) { 431 if (!BN_nnmod(val[0], a,m, ctx)) 432 goto err; 433 aa = val[0]; 434 } else 435 aa = a; 436 if (BN_is_zero(aa)) { 437 BN_zero(rr); 438 ret = 1; 439 goto err; 440 } 441 if (!BN_to_montgomery(val[0], aa, mont, ctx)) 442 goto err; /* 1 */ 443 444 window = BN_window_bits_for_exponent_size(bits); 445 if (window > 1) { 446 if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) 447 goto err; /* 2 */ 448 j = 1 << (window - 1); 449 for (i = 1; i < j; i++) { 450 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 451 !BN_mod_mul_montgomery(val[i], val[i - 1], 452 d, mont, ctx)) 453 goto err; 454 } 455 } 456 457 start = 1; /* This is used to avoid multiplication etc 458 * when there is only the value '1' in the 459 * buffer. */ 460 wvalue = 0; /* The 'value' of the window */ 461 wstart = bits - 1; /* The top bit of the window */ 462 wend = 0; /* The bottom bit of the window */ 463 464 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) 465 goto err; 466 for (;;) { 467 if (BN_is_bit_set(p, wstart) == 0) { 468 if (!start) { 469 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 470 goto err; 471 } 472 if (wstart == 0) 473 break; 474 wstart--; 475 continue; 476 } 477 /* We now have wstart on a 'set' bit, we now need to work out 478 * how bit a window to do. To do this we need to scan 479 * forward until the last set bit before the end of the 480 * window */ 481 j = wstart; 482 wvalue = 1; 483 wend = 0; 484 for (i = 1; i < window; i++) { 485 if (wstart - i < 0) 486 break; 487 if (BN_is_bit_set(p, wstart - i)) { 488 wvalue <<= (i - wend); 489 wvalue |= 1; 490 wend = i; 491 } 492 } 493 494 /* wend is the size of the current window */ 495 j = wend + 1; 496 /* add the 'bytes above' */ 497 if (!start) 498 for (i = 0; i < j; i++) { 499 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 500 goto err; 501 } 502 503 /* wvalue will be an odd number < 2^window */ 504 if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) 505 goto err; 506 507 /* move the 'window' down further */ 508 wstart -= wend + 1; 509 wvalue = 0; 510 start = 0; 511 if (wstart < 0) 512 break; 513 } 514 if (!BN_from_montgomery(rr, r,mont, ctx)) 515 goto err; 516 ret = 1; 517 518 err: 519 if ((in_mont == NULL) && (mont != NULL)) 520 BN_MONT_CTX_free(mont); 521 BN_CTX_end(ctx); 522 bn_check_top(rr); 523 return (ret); 524 } 525 526 527 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout 528 * so that accessing any of these table values shows the same access pattern as far 529 * as cache lines are concerned. The following functions are used to transfer a BIGNUM 530 * from/to that table. */ 531 532 static int 533 MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, 534 int idx, int width) 535 { 536 size_t i, j; 537 538 if (top > b->top) 539 top = b->top; /* this works because 'buf' is explicitly zeroed */ 540 for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) { 541 buf[j] = ((unsigned char*)b->d)[i]; 542 } 543 544 return 1; 545 } 546 547 static int 548 MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, 549 int width) 550 { 551 size_t i, j; 552 553 if (bn_wexpand(b, top) == NULL) 554 return 0; 555 556 for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) { 557 ((unsigned char*)b->d)[i] = buf[j]; 558 } 559 560 b->top = top; 561 bn_correct_top(b); 562 return 1; 563 } 564 565 /* Given a pointer value, compute the next address that is a cache line multiple. */ 566 #define MOD_EXP_CTIME_ALIGN(x_) \ 567 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 568 569 /* This variant of BN_mod_exp_mont() uses fixed windows and the special 570 * precomputation memory layout to limit data-dependency to a minimum 571 * to protect secret exponents (cf. the hyper-threading timing attacks 572 * pointed out by Colin Percival, 573 * http://www.daemonology.net/hyperthreading-considered-harmful/) 574 */ 575 int 576 BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 577 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 578 { 579 int i, bits, ret = 0, window, wvalue; 580 int top; 581 BN_MONT_CTX *mont = NULL; 582 int numPowers; 583 unsigned char *powerbufFree = NULL; 584 int powerbufLen = 0; 585 unsigned char *powerbuf = NULL; 586 BIGNUM tmp, am; 587 588 bn_check_top(a); 589 bn_check_top(p); 590 bn_check_top(m); 591 592 top = m->top; 593 594 if (!(m->d[0] & 1)) { 595 BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, 596 BN_R_CALLED_WITH_EVEN_MODULUS); 597 return (0); 598 } 599 bits = BN_num_bits(p); 600 if (bits == 0) { 601 ret = BN_one(rr); 602 return ret; 603 } 604 605 BN_CTX_start(ctx); 606 607 /* Allocate a montgomery context if it was not supplied by the caller. 608 * If this is not done, things will break in the montgomery part. 609 */ 610 if (in_mont != NULL) 611 mont = in_mont; 612 else { 613 if ((mont = BN_MONT_CTX_new()) == NULL) 614 goto err; 615 if (!BN_MONT_CTX_set(mont, m, ctx)) 616 goto err; 617 } 618 619 /* Get the window size to use with size of p. */ 620 window = BN_window_bits_for_ctime_exponent_size(bits); 621 #if defined(OPENSSL_BN_ASM_MONT5) 622 if (window == 6 && bits <= 1024) 623 window = 5; /* ~5% improvement of 2048-bit RSA sign */ 624 #endif 625 626 /* Allocate a buffer large enough to hold all of the pre-computed 627 * powers of am, am itself and tmp. 628 */ 629 numPowers = 1 << window; 630 powerbufLen = sizeof(m->d[0]) * (top * numPowers + 631 ((2*top) > numPowers ? (2*top) : numPowers)); 632 if ((powerbufFree = malloc(powerbufLen + 633 MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) 634 goto err; 635 636 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 637 memset(powerbuf, 0, powerbufLen); 638 639 /* lay down tmp and am right after powers table */ 640 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); 641 am.d = tmp.d + top; 642 tmp.top = am.top = 0; 643 tmp.dmax = am.dmax = top; 644 tmp.neg = am.neg = 0; 645 tmp.flags = am.flags = BN_FLG_STATIC_DATA; 646 647 /* prepare a^0 in Montgomery domain */ 648 #if 1 649 if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) 650 goto err; 651 #else 652 tmp.d[0] = (0 - m - >d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ 653 for (i = 1; i < top; i++) 654 tmp.d[i] = (~m->d[i]) & BN_MASK2; 655 tmp.top = top; 656 #endif 657 658 /* prepare a^1 in Montgomery domain */ 659 if (a->neg || BN_ucmp(a, m) >= 0) { 660 if (!BN_mod(&am, a,m, ctx)) 661 goto err; 662 if (!BN_to_montgomery(&am, &am, mont, ctx)) 663 goto err; 664 } else if (!BN_to_montgomery(&am, a,mont, ctx)) 665 goto err; 666 667 #if defined(OPENSSL_BN_ASM_MONT5) 668 /* This optimization uses ideas from http://eprint.iacr.org/2011/239, 669 * specifically optimization of cache-timing attack countermeasures 670 * and pre-computation optimization. */ 671 672 /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as 673 * 512-bit RSA is hardly relevant, we omit it to spare size... */ 674 if (window == 5 && top > 1) { 675 void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, 676 const void *table, const BN_ULONG *np, 677 const BN_ULONG *n0, int num, int power); 678 void bn_scatter5(const BN_ULONG *inp, size_t num, 679 void *table, size_t power); 680 void bn_gather5(BN_ULONG *out, size_t num, 681 void *table, size_t power); 682 683 BN_ULONG *np = mont->N.d, *n0 = mont->n0; 684 685 /* BN_to_montgomery can contaminate words above .top 686 * [in BN_DEBUG[_DEBUG] build]... */ 687 for (i = am.top; i < top; i++) 688 am.d[i] = 0; 689 for (i = tmp.top; i < top; i++) 690 tmp.d[i] = 0; 691 692 bn_scatter5(tmp.d, top, powerbuf, 0); 693 bn_scatter5(am.d, am.top, powerbuf, 1); 694 bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); 695 bn_scatter5(tmp.d, top, powerbuf, 2); 696 697 #if 0 698 for (i = 3; i < 32; i++) { 699 /* Calculate a^i = a^(i-1) * a */ 700 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 701 n0, top, i - 1); 702 bn_scatter5(tmp.d, top, powerbuf, i); 703 } 704 #else 705 /* same as above, but uses squaring for 1/2 of operations */ 706 for (i = 4; i < 32; i*=2) { 707 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 708 bn_scatter5(tmp.d, top, powerbuf, i); 709 } 710 for (i = 3; i < 8; i += 2) { 711 int j; 712 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 713 n0, top, i - 1); 714 bn_scatter5(tmp.d, top, powerbuf, i); 715 for (j = 2 * i; j < 32; j *= 2) { 716 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 717 bn_scatter5(tmp.d, top, powerbuf, j); 718 } 719 } 720 for (; i < 16; i += 2) { 721 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 722 n0, top, i - 1); 723 bn_scatter5(tmp.d, top, powerbuf, i); 724 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 725 bn_scatter5(tmp.d, top, powerbuf, 2*i); 726 } 727 for (; i < 32; i += 2) { 728 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 729 n0, top, i - 1); 730 bn_scatter5(tmp.d, top, powerbuf, i); 731 } 732 #endif 733 bits--; 734 for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) 735 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 736 bn_gather5(tmp.d, top, powerbuf, wvalue); 737 738 /* Scan the exponent one window at a time starting from the most 739 * significant bits. 740 */ 741 while (bits >= 0) { 742 for (wvalue = 0, i = 0; i < 5; i++, bits--) 743 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 744 745 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 746 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 747 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 748 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 749 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 750 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); 751 } 752 753 tmp.top = top; 754 bn_correct_top(&tmp); 755 } else 756 #endif 757 { 758 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, 759 numPowers)) 760 goto err; 761 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, 762 numPowers)) 763 goto err; 764 765 /* If the window size is greater than 1, then calculate 766 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) 767 * (even powers could instead be computed as (a^(i/2))^2 768 * to use the slight performance advantage of sqr over mul). 769 */ 770 if (window > 1) { 771 if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) 772 goto err; 773 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 774 2, numPowers)) 775 goto err; 776 for (i = 3; i < numPowers; i++) { 777 /* Calculate a^i = a^(i-1) * a */ 778 if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, 779 mont, ctx)) 780 goto err; 781 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, 782 powerbuf, i, numPowers)) 783 goto err; 784 } 785 } 786 787 bits--; 788 for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) 789 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 790 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, 791 wvalue, numPowers)) 792 goto err; 793 794 /* Scan the exponent one window at a time starting from the most 795 * significant bits. 796 */ 797 while (bits >= 0) { 798 wvalue = 0; /* The 'value' of the window */ 799 800 /* Scan the window, squaring the result as we go */ 801 for (i = 0; i < window; i++, bits--) { 802 if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, 803 mont, ctx)) 804 goto err; 805 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 806 } 807 808 /* Fetch the appropriate pre-computed value from the pre-buf */ 809 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, 810 wvalue, numPowers)) 811 goto err; 812 813 /* Multiply the result into the intermediate result */ 814 if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) 815 goto err; 816 } 817 } 818 819 /* Convert the final result from montgomery to standard format */ 820 if (!BN_from_montgomery(rr, &tmp, mont, ctx)) 821 goto err; 822 ret = 1; 823 824 err: 825 if ((in_mont == NULL) && (mont != NULL)) 826 BN_MONT_CTX_free(mont); 827 if (powerbuf != NULL) { 828 explicit_bzero(powerbuf, powerbufLen); 829 free(powerbufFree); 830 } 831 BN_CTX_end(ctx); 832 return (ret); 833 } 834 835 int 836 BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m, 837 BN_CTX *ctx, BN_MONT_CTX *in_mont) 838 { 839 BN_MONT_CTX *mont = NULL; 840 int b, bits, ret = 0; 841 int r_is_one; 842 BN_ULONG w, next_w; 843 BIGNUM *d, *r, *t; 844 BIGNUM *swap_tmp; 845 846 #define BN_MOD_MUL_WORD(r, w, m) \ 847 (BN_mul_word(r, (w)) && \ 848 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ 849 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) 850 /* BN_MOD_MUL_WORD is only used with 'w' large, 851 * so the BN_ucmp test is probably more overhead 852 * than always using BN_mod (which uses BN_copy if 853 * a similar test returns true). */ 854 /* We can use BN_mod and do not need BN_nnmod because our 855 * accumulator is never negative (the result of BN_mod does 856 * not depend on the sign of the modulus). 857 */ 858 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \ 859 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) 860 861 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 862 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 863 BNerr(BN_F_BN_MOD_EXP_MONT_WORD, 864 ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 865 return -1; 866 } 867 868 bn_check_top(p); 869 bn_check_top(m); 870 871 if (!BN_is_odd(m)) { 872 BNerr(BN_F_BN_MOD_EXP_MONT_WORD, BN_R_CALLED_WITH_EVEN_MODULUS); 873 return (0); 874 } 875 if (m->top == 1) 876 a %= m->d[0]; /* make sure that 'a' is reduced */ 877 878 bits = BN_num_bits(p); 879 if (bits == 0) { 880 ret = BN_one(rr); 881 return ret; 882 } 883 if (a == 0) { 884 BN_zero(rr); 885 ret = 1; 886 return ret; 887 } 888 889 BN_CTX_start(ctx); 890 if ((d = BN_CTX_get(ctx)) == NULL) 891 goto err; 892 if ((r = BN_CTX_get(ctx)) == NULL) 893 goto err; 894 if ((t = BN_CTX_get(ctx)) == NULL) 895 goto err; 896 897 if (in_mont != NULL) 898 mont = in_mont; 899 else { 900 if ((mont = BN_MONT_CTX_new()) == NULL) 901 goto err; 902 if (!BN_MONT_CTX_set(mont, m, ctx)) 903 goto err; 904 } 905 906 r_is_one = 1; /* except for Montgomery factor */ 907 908 /* bits-1 >= 0 */ 909 910 /* The result is accumulated in the product r*w. */ 911 w = a; /* bit 'bits-1' of 'p' is always set */ 912 for (b = bits - 2; b >= 0; b--) { 913 /* First, square r*w. */ 914 next_w = w * w; 915 if ((next_w / w) != w) /* overflow */ 916 { 917 if (r_is_one) { 918 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 919 goto err; 920 r_is_one = 0; 921 } else { 922 if (!BN_MOD_MUL_WORD(r, w, m)) 923 goto err; 924 } 925 next_w = 1; 926 } 927 w = next_w; 928 if (!r_is_one) { 929 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 930 goto err; 931 } 932 933 /* Second, multiply r*w by 'a' if exponent bit is set. */ 934 if (BN_is_bit_set(p, b)) { 935 next_w = w * a; 936 if ((next_w / a) != w) /* overflow */ 937 { 938 if (r_is_one) { 939 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 940 goto err; 941 r_is_one = 0; 942 } else { 943 if (!BN_MOD_MUL_WORD(r, w, m)) 944 goto err; 945 } 946 next_w = a; 947 } 948 w = next_w; 949 } 950 } 951 952 /* Finally, set r:=r*w. */ 953 if (w != 1) { 954 if (r_is_one) { 955 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 956 goto err; 957 r_is_one = 0; 958 } else { 959 if (!BN_MOD_MUL_WORD(r, w, m)) 960 goto err; 961 } 962 } 963 964 if (r_is_one) /* can happen only if a == 1*/ 965 { 966 if (!BN_one(rr)) 967 goto err; 968 } else { 969 if (!BN_from_montgomery(rr, r, mont, ctx)) 970 goto err; 971 } 972 ret = 1; 973 974 err: 975 if ((in_mont == NULL) && (mont != NULL)) 976 BN_MONT_CTX_free(mont); 977 BN_CTX_end(ctx); 978 bn_check_top(rr); 979 return (ret); 980 } 981 982 983 /* The old fallback, simple version :-) */ 984 int 985 BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 986 BN_CTX *ctx) 987 { 988 int i, j,bits, ret = 0, wstart, wend, window, wvalue; 989 int start = 1; 990 BIGNUM *d; 991 /* Table of variables obtained from 'ctx' */ 992 BIGNUM *val[TABLE_SIZE]; 993 994 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 995 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 996 BNerr(BN_F_BN_MOD_EXP_SIMPLE, 997 ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 998 return -1; 999 } 1000 1001 bits = BN_num_bits(p); 1002 1003 if (bits == 0) { 1004 ret = BN_one(r); 1005 return ret; 1006 } 1007 1008 BN_CTX_start(ctx); 1009 if ((d = BN_CTX_get(ctx)) == NULL) 1010 goto err; 1011 if ((val[0] = BN_CTX_get(ctx)) == NULL) 1012 goto err; 1013 1014 if (!BN_nnmod(val[0],a,m,ctx)) 1015 goto err; /* 1 */ 1016 if (BN_is_zero(val[0])) { 1017 BN_zero(r); 1018 ret = 1; 1019 goto err; 1020 } 1021 1022 window = BN_window_bits_for_exponent_size(bits); 1023 if (window > 1) { 1024 if (!BN_mod_mul(d, val[0], val[0], m, ctx)) 1025 goto err; /* 2 */ 1026 j = 1 << (window - 1); 1027 for (i = 1; i < j; i++) { 1028 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 1029 !BN_mod_mul(val[i], val[i - 1], d,m, ctx)) 1030 goto err; 1031 } 1032 } 1033 1034 start = 1; /* This is used to avoid multiplication etc 1035 * when there is only the value '1' in the 1036 * buffer. */ 1037 wvalue = 0; /* The 'value' of the window */ 1038 wstart = bits - 1; /* The top bit of the window */ 1039 wend = 0; /* The bottom bit of the window */ 1040 1041 if (!BN_one(r)) 1042 goto err; 1043 1044 for (;;) { 1045 if (BN_is_bit_set(p, wstart) == 0) { 1046 if (!start) 1047 if (!BN_mod_mul(r, r, r, m, ctx)) 1048 goto err; 1049 if (wstart == 0) 1050 break; 1051 wstart--; 1052 continue; 1053 } 1054 /* We now have wstart on a 'set' bit, we now need to work out 1055 * how bit a window to do. To do this we need to scan 1056 * forward until the last set bit before the end of the 1057 * window */ 1058 j = wstart; 1059 wvalue = 1; 1060 wend = 0; 1061 for (i = 1; i < window; i++) { 1062 if (wstart - i < 0) 1063 break; 1064 if (BN_is_bit_set(p, wstart - i)) { 1065 wvalue <<= (i - wend); 1066 wvalue |= 1; 1067 wend = i; 1068 } 1069 } 1070 1071 /* wend is the size of the current window */ 1072 j = wend + 1; 1073 /* add the 'bytes above' */ 1074 if (!start) 1075 for (i = 0; i < j; i++) { 1076 if (!BN_mod_mul(r, r, r, m, ctx)) 1077 goto err; 1078 } 1079 1080 /* wvalue will be an odd number < 2^window */ 1081 if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) 1082 goto err; 1083 1084 /* move the 'window' down further */ 1085 wstart -= wend + 1; 1086 wvalue = 0; 1087 start = 0; 1088 if (wstart < 0) 1089 break; 1090 } 1091 ret = 1; 1092 1093 err: 1094 BN_CTX_end(ctx); 1095 bn_check_top(r); 1096 return (ret); 1097 } 1098