1 /* $OpenBSD: bn_exp.c,v 1.32 2022/04/20 13:32:34 tb Exp $ */ 2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3 * All rights reserved. 4 * 5 * This package is an SSL implementation written 6 * by Eric Young (eay@cryptsoft.com). 7 * The implementation was written so as to conform with Netscapes SSL. 8 * 9 * This library is free for commercial and non-commercial use as long as 10 * the following conditions are aheared to. The following conditions 11 * apply to all code found in this distribution, be it the RC4, RSA, 12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13 * included with this distribution is covered by the same copyright terms 14 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15 * 16 * Copyright remains Eric Young's, and as such any Copyright notices in 17 * the code are not to be removed. 18 * If this package is used in a product, Eric Young should be given attribution 19 * as the author of the parts of the library used. 20 * This can be in the form of a textual message at program startup or 21 * in documentation (online or textual) provided with the package. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the copyright 27 * notice, this list of conditions and the following disclaimer. 28 * 2. Redistributions in binary form must reproduce the above copyright 29 * notice, this list of conditions and the following disclaimer in the 30 * documentation and/or other materials provided with the distribution. 31 * 3. All advertising materials mentioning features or use of this software 32 * must display the following acknowledgement: 33 * "This product includes cryptographic software written by 34 * Eric Young (eay@cryptsoft.com)" 35 * The word 'cryptographic' can be left out if the rouines from the library 36 * being used are not cryptographic related :-). 37 * 4. If you include any Windows specific code (or a derivative thereof) from 38 * the apps directory (application code) you must include an acknowledgement: 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40 * 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * The licence and distribution terms for any publically available version or 54 * derivative of this code cannot be changed. i.e. this code cannot simply be 55 * copied and put under another distribution licence 56 * [including the GNU Public Licence.] 57 */ 58 /* ==================================================================== 59 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. 60 * 61 * Redistribution and use in source and binary forms, with or without 62 * modification, are permitted provided that the following conditions 63 * are met: 64 * 65 * 1. Redistributions of source code must retain the above copyright 66 * notice, this list of conditions and the following disclaimer. 67 * 68 * 2. Redistributions in binary form must reproduce the above copyright 69 * notice, this list of conditions and the following disclaimer in 70 * the documentation and/or other materials provided with the 71 * distribution. 72 * 73 * 3. All advertising materials mentioning features or use of this 74 * software must display the following acknowledgment: 75 * "This product includes software developed by the OpenSSL Project 76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 77 * 78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 79 * endorse or promote products derived from this software without 80 * prior written permission. For written permission, please contact 81 * openssl-core@openssl.org. 82 * 83 * 5. Products derived from this software may not be called "OpenSSL" 84 * nor may "OpenSSL" appear in their names without prior written 85 * permission of the OpenSSL Project. 86 * 87 * 6. Redistributions of any form whatsoever must retain the following 88 * acknowledgment: 89 * "This product includes software developed by the OpenSSL Project 90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 103 * OF THE POSSIBILITY OF SUCH DAMAGE. 104 * ==================================================================== 105 * 106 * This product includes cryptographic software written by Eric Young 107 * (eay@cryptsoft.com). This product includes software written by Tim 108 * Hudson (tjh@cryptsoft.com). 109 * 110 */ 111 112 #include <stdlib.h> 113 #include <string.h> 114 115 #include <openssl/err.h> 116 117 #include "bn_lcl.h" 118 #include "constant_time_locl.h" 119 120 /* maximum precomputation table size for *variable* sliding windows */ 121 #define TABLE_SIZE 32 122 123 /* this one works - simple but works */ 124 int 125 BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 126 { 127 int i, bits, ret = 0; 128 BIGNUM *v, *rr; 129 130 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 131 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 132 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 133 return -1; 134 } 135 136 BN_CTX_start(ctx); 137 if ((r == a) || (r == p)) 138 rr = BN_CTX_get(ctx); 139 else 140 rr = r; 141 v = BN_CTX_get(ctx); 142 if (rr == NULL || v == NULL) 143 goto err; 144 145 if (BN_copy(v, a) == NULL) 146 goto err; 147 bits = BN_num_bits(p); 148 149 if (BN_is_odd(p)) { 150 if (BN_copy(rr, a) == NULL) 151 goto err; 152 } else { 153 if (!BN_one(rr)) 154 goto err; 155 } 156 157 for (i = 1; i < bits; i++) { 158 if (!BN_sqr(v, v, ctx)) 159 goto err; 160 if (BN_is_bit_set(p, i)) { 161 if (!BN_mul(rr, rr, v, ctx)) 162 goto err; 163 } 164 } 165 ret = 1; 166 167 err: 168 if (r != rr && rr != NULL) 169 BN_copy(r, rr); 170 BN_CTX_end(ctx); 171 bn_check_top(r); 172 return (ret); 173 } 174 175 static int 176 BN_mod_exp_internal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 177 BN_CTX *ctx, int ct) 178 { 179 int ret; 180 181 bn_check_top(a); 182 bn_check_top(p); 183 bn_check_top(m); 184 185 /* For even modulus m = 2^k*m_odd, it might make sense to compute 186 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery 187 * exponentiation for the odd part), using appropriate exponent 188 * reductions, and combine the results using the CRT. 189 * 190 * For now, we use Montgomery only if the modulus is odd; otherwise, 191 * exponentiation using the reciprocal-based quick remaindering 192 * algorithm is used. 193 * 194 * (Timing obtained with expspeed.c [computations a^p mod m 195 * where a, p, m are of the same length: 256, 512, 1024, 2048, 196 * 4096, 8192 bits], compared to the running time of the 197 * standard algorithm: 198 * 199 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] 200 * 55 .. 77 % [UltraSparc processor, but 201 * debug-solaris-sparcv8-gcc conf.] 202 * 203 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] 204 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] 205 * 206 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont 207 * at 2048 and more bits, but at 512 and 1024 bits, it was 208 * slower even than the standard algorithm! 209 * 210 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] 211 * should be obtained when the new Montgomery reduction code 212 * has been integrated into OpenSSL.) 213 */ 214 215 if (BN_is_odd(m)) { 216 if (a->top == 1 && !a->neg && !ct) { 217 BN_ULONG A = a->d[0]; 218 ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL); 219 } else 220 ret = BN_mod_exp_mont_ct(r, a,p, m,ctx, NULL); 221 } else { 222 ret = BN_mod_exp_recp(r, a,p, m, ctx); 223 } 224 225 bn_check_top(r); 226 return (ret); 227 } 228 229 int 230 BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 231 BN_CTX *ctx) 232 { 233 return BN_mod_exp_internal(r, a, p, m, ctx, 234 (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); 235 } 236 237 int 238 BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 239 BN_CTX *ctx) 240 { 241 return BN_mod_exp_internal(r, a, p, m, ctx, 1); 242 } 243 244 245 int 246 BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 247 BN_CTX *ctx) 248 { 249 return BN_mod_exp_internal(r, a, p, m, ctx, 0); 250 } 251 252 253 int 254 BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 255 BN_CTX *ctx) 256 { 257 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 258 int start = 1; 259 BIGNUM *aa; 260 /* Table of variables obtained from 'ctx' */ 261 BIGNUM *val[TABLE_SIZE]; 262 BN_RECP_CTX recp; 263 264 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 265 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 266 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 267 return -1; 268 } 269 270 bits = BN_num_bits(p); 271 if (bits == 0) { 272 /* x**0 mod 1 is still zero. */ 273 if (BN_is_one(m)) { 274 ret = 1; 275 BN_zero(r); 276 } else 277 ret = BN_one(r); 278 return ret; 279 } 280 281 BN_RECP_CTX_init(&recp); 282 283 BN_CTX_start(ctx); 284 if ((aa = BN_CTX_get(ctx)) == NULL) 285 goto err; 286 if ((val[0] = BN_CTX_get(ctx)) == NULL) 287 goto err; 288 289 if (m->neg) { 290 /* ignore sign of 'm' */ 291 if (!BN_copy(aa, m)) 292 goto err; 293 aa->neg = 0; 294 if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) 295 goto err; 296 } else { 297 if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) 298 goto err; 299 } 300 301 if (!BN_nnmod(val[0], a, m, ctx)) 302 goto err; /* 1 */ 303 if (BN_is_zero(val[0])) { 304 BN_zero(r); 305 ret = 1; 306 goto err; 307 } 308 309 window = BN_window_bits_for_exponent_size(bits); 310 if (window > 1) { 311 if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) 312 goto err; /* 2 */ 313 j = 1 << (window - 1); 314 for (i = 1; i < j; i++) { 315 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 316 !BN_mod_mul_reciprocal(val[i], val[i - 1], 317 aa, &recp, ctx)) 318 goto err; 319 } 320 } 321 322 start = 1; /* This is used to avoid multiplication etc 323 * when there is only the value '1' in the 324 * buffer. */ 325 wvalue = 0; /* The 'value' of the window */ 326 wstart = bits - 1; /* The top bit of the window */ 327 wend = 0; /* The bottom bit of the window */ 328 329 if (!BN_one(r)) 330 goto err; 331 332 for (;;) { 333 if (BN_is_bit_set(p, wstart) == 0) { 334 if (!start) 335 if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) 336 goto err; 337 if (wstart == 0) 338 break; 339 wstart--; 340 continue; 341 } 342 /* We now have wstart on a 'set' bit, we now need to work out 343 * how bit a window to do. To do this we need to scan 344 * forward until the last set bit before the end of the 345 * window */ 346 j = wstart; 347 wvalue = 1; 348 wend = 0; 349 for (i = 1; i < window; i++) { 350 if (wstart - i < 0) 351 break; 352 if (BN_is_bit_set(p, wstart - i)) { 353 wvalue <<= (i - wend); 354 wvalue |= 1; 355 wend = i; 356 } 357 } 358 359 /* wend is the size of the current window */ 360 j = wend + 1; 361 /* add the 'bytes above' */ 362 if (!start) 363 for (i = 0; i < j; i++) { 364 if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) 365 goto err; 366 } 367 368 /* wvalue will be an odd number < 2^window */ 369 if (!BN_mod_mul_reciprocal(r, r,val[wvalue >> 1], &recp, ctx)) 370 goto err; 371 372 /* move the 'window' down further */ 373 wstart -= wend + 1; 374 wvalue = 0; 375 start = 0; 376 if (wstart < 0) 377 break; 378 } 379 ret = 1; 380 381 err: 382 BN_CTX_end(ctx); 383 BN_RECP_CTX_free(&recp); 384 bn_check_top(r); 385 return (ret); 386 } 387 388 static int 389 BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 390 BN_CTX *ctx, BN_MONT_CTX *in_mont, int ct) 391 { 392 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 393 int start = 1; 394 BIGNUM *d, *r; 395 const BIGNUM *aa; 396 /* Table of variables obtained from 'ctx' */ 397 BIGNUM *val[TABLE_SIZE]; 398 BN_MONT_CTX *mont = NULL; 399 400 if (ct) { 401 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); 402 } 403 404 bn_check_top(a); 405 bn_check_top(p); 406 bn_check_top(m); 407 408 if (!BN_is_odd(m)) { 409 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); 410 return (0); 411 } 412 413 bits = BN_num_bits(p); 414 if (bits == 0) { 415 /* x**0 mod 1 is still zero. */ 416 if (BN_is_one(m)) { 417 ret = 1; 418 BN_zero(rr); 419 } else 420 ret = BN_one(rr); 421 return ret; 422 } 423 424 BN_CTX_start(ctx); 425 if ((d = BN_CTX_get(ctx)) == NULL) 426 goto err; 427 if ((r = BN_CTX_get(ctx)) == NULL) 428 goto err; 429 if ((val[0] = BN_CTX_get(ctx)) == NULL) 430 goto err; 431 432 /* If this is not done, things will break in the montgomery 433 * part */ 434 435 if (in_mont != NULL) 436 mont = in_mont; 437 else { 438 if ((mont = BN_MONT_CTX_new()) == NULL) 439 goto err; 440 if (!BN_MONT_CTX_set(mont, m, ctx)) 441 goto err; 442 } 443 444 if (a->neg || BN_ucmp(a, m) >= 0) { 445 if (!BN_nnmod(val[0], a,m, ctx)) 446 goto err; 447 aa = val[0]; 448 } else 449 aa = a; 450 if (BN_is_zero(aa)) { 451 BN_zero(rr); 452 ret = 1; 453 goto err; 454 } 455 if (!BN_to_montgomery(val[0], aa, mont, ctx)) 456 goto err; /* 1 */ 457 458 window = BN_window_bits_for_exponent_size(bits); 459 if (window > 1) { 460 if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) 461 goto err; /* 2 */ 462 j = 1 << (window - 1); 463 for (i = 1; i < j; i++) { 464 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 465 !BN_mod_mul_montgomery(val[i], val[i - 1], 466 d, mont, ctx)) 467 goto err; 468 } 469 } 470 471 start = 1; /* This is used to avoid multiplication etc 472 * when there is only the value '1' in the 473 * buffer. */ 474 wvalue = 0; /* The 'value' of the window */ 475 wstart = bits - 1; /* The top bit of the window */ 476 wend = 0; /* The bottom bit of the window */ 477 478 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) 479 goto err; 480 for (;;) { 481 if (BN_is_bit_set(p, wstart) == 0) { 482 if (!start) { 483 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 484 goto err; 485 } 486 if (wstart == 0) 487 break; 488 wstart--; 489 continue; 490 } 491 /* We now have wstart on a 'set' bit, we now need to work out 492 * how bit a window to do. To do this we need to scan 493 * forward until the last set bit before the end of the 494 * window */ 495 j = wstart; 496 wvalue = 1; 497 wend = 0; 498 for (i = 1; i < window; i++) { 499 if (wstart - i < 0) 500 break; 501 if (BN_is_bit_set(p, wstart - i)) { 502 wvalue <<= (i - wend); 503 wvalue |= 1; 504 wend = i; 505 } 506 } 507 508 /* wend is the size of the current window */ 509 j = wend + 1; 510 /* add the 'bytes above' */ 511 if (!start) 512 for (i = 0; i < j; i++) { 513 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 514 goto err; 515 } 516 517 /* wvalue will be an odd number < 2^window */ 518 if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) 519 goto err; 520 521 /* move the 'window' down further */ 522 wstart -= wend + 1; 523 wvalue = 0; 524 start = 0; 525 if (wstart < 0) 526 break; 527 } 528 if (!BN_from_montgomery(rr, r,mont, ctx)) 529 goto err; 530 ret = 1; 531 532 err: 533 if ((in_mont == NULL) && (mont != NULL)) 534 BN_MONT_CTX_free(mont); 535 BN_CTX_end(ctx); 536 bn_check_top(rr); 537 return (ret); 538 } 539 540 int 541 BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 542 BN_CTX *ctx, BN_MONT_CTX *in_mont) 543 { 544 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 545 (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); 546 } 547 548 int 549 BN_mod_exp_mont_ct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 550 BN_CTX *ctx, BN_MONT_CTX *in_mont) 551 { 552 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 1); 553 } 554 555 int 556 BN_mod_exp_mont_nonct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 557 BN_CTX *ctx, BN_MONT_CTX *in_mont) 558 { 559 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 0); 560 } 561 562 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout 563 * so that accessing any of these table values shows the same access pattern as far 564 * as cache lines are concerned. The following functions are used to transfer a BIGNUM 565 * from/to that table. */ 566 567 static int 568 MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, 569 int idx, int window) 570 { 571 int i, j; 572 int width = 1 << window; 573 BN_ULONG *table = (BN_ULONG *)buf; 574 575 if (top > b->top) 576 top = b->top; /* this works because 'buf' is explicitly zeroed */ 577 578 for (i = 0, j = idx; i < top; i++, j += width) { 579 table[j] = b->d[i]; 580 } 581 582 return 1; 583 } 584 585 static int 586 MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, 587 int window) 588 { 589 int i, j; 590 int width = 1 << window; 591 volatile BN_ULONG *table = (volatile BN_ULONG *)buf; 592 593 if (bn_wexpand(b, top) == NULL) 594 return 0; 595 596 if (window <= 3) { 597 for (i = 0; i < top; i++, table += width) { 598 BN_ULONG acc = 0; 599 600 for (j = 0; j < width; j++) { 601 acc |= table[j] & 602 ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); 603 } 604 605 b->d[i] = acc; 606 } 607 } else { 608 int xstride = 1 << (window - 2); 609 BN_ULONG y0, y1, y2, y3; 610 611 i = idx >> (window - 2); /* equivalent of idx / xstride */ 612 idx &= xstride - 1; /* equivalent of idx % xstride */ 613 614 y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1); 615 y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1); 616 y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1); 617 y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1); 618 619 for (i = 0; i < top; i++, table += width) { 620 BN_ULONG acc = 0; 621 622 for (j = 0; j < xstride; j++) { 623 acc |= ( (table[j + 0 * xstride] & y0) | 624 (table[j + 1 * xstride] & y1) | 625 (table[j + 2 * xstride] & y2) | 626 (table[j + 3 * xstride] & y3) ) 627 & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); 628 } 629 630 b->d[i] = acc; 631 } 632 } 633 b->top = top; 634 bn_correct_top(b); 635 return 1; 636 } 637 638 /* Given a pointer value, compute the next address that is a cache line multiple. */ 639 #define MOD_EXP_CTIME_ALIGN(x_) \ 640 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 641 642 /* This variant of BN_mod_exp_mont() uses fixed windows and the special 643 * precomputation memory layout to limit data-dependency to a minimum 644 * to protect secret exponents (cf. the hyper-threading timing attacks 645 * pointed out by Colin Percival, 646 * http://www.daemonology.net/hyperthreading-considered-harmful/) 647 */ 648 int 649 BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 650 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 651 { 652 int i, bits, ret = 0, window, wvalue; 653 int top; 654 BN_MONT_CTX *mont = NULL; 655 int numPowers; 656 unsigned char *powerbufFree = NULL; 657 int powerbufLen = 0; 658 unsigned char *powerbuf = NULL; 659 BIGNUM tmp, am; 660 661 bn_check_top(a); 662 bn_check_top(p); 663 bn_check_top(m); 664 665 if (!BN_is_odd(m)) { 666 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); 667 return (0); 668 } 669 670 top = m->top; 671 672 bits = BN_num_bits(p); 673 if (bits == 0) { 674 /* x**0 mod 1 is still zero. */ 675 if (BN_is_one(m)) { 676 ret = 1; 677 BN_zero(rr); 678 } else 679 ret = BN_one(rr); 680 return ret; 681 } 682 683 BN_CTX_start(ctx); 684 685 /* Allocate a montgomery context if it was not supplied by the caller. 686 * If this is not done, things will break in the montgomery part. 687 */ 688 if (in_mont != NULL) 689 mont = in_mont; 690 else { 691 if ((mont = BN_MONT_CTX_new()) == NULL) 692 goto err; 693 if (!BN_MONT_CTX_set(mont, m, ctx)) 694 goto err; 695 } 696 697 /* Get the window size to use with size of p. */ 698 window = BN_window_bits_for_ctime_exponent_size(bits); 699 #if defined(OPENSSL_BN_ASM_MONT5) 700 if (window == 6 && bits <= 1024) 701 window = 5; /* ~5% improvement of 2048-bit RSA sign */ 702 #endif 703 704 /* Allocate a buffer large enough to hold all of the pre-computed 705 * powers of am, am itself and tmp. 706 */ 707 numPowers = 1 << window; 708 powerbufLen = sizeof(m->d[0]) * (top * numPowers + 709 ((2*top) > numPowers ? (2*top) : numPowers)); 710 if ((powerbufFree = calloc(powerbufLen + 711 MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH, 1)) == NULL) 712 goto err; 713 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 714 715 /* lay down tmp and am right after powers table */ 716 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); 717 am.d = tmp.d + top; 718 tmp.top = am.top = 0; 719 tmp.dmax = am.dmax = top; 720 tmp.neg = am.neg = 0; 721 tmp.flags = am.flags = BN_FLG_STATIC_DATA; 722 723 /* prepare a^0 in Montgomery domain */ 724 #if 1 725 if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) 726 goto err; 727 #else 728 tmp.d[0] = (0 - m - >d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ 729 for (i = 1; i < top; i++) 730 tmp.d[i] = (~m->d[i]) & BN_MASK2; 731 tmp.top = top; 732 #endif 733 734 /* prepare a^1 in Montgomery domain */ 735 if (a->neg || BN_ucmp(a, m) >= 0) { 736 if (!BN_mod_ct(&am, a,m, ctx)) 737 goto err; 738 if (!BN_to_montgomery(&am, &am, mont, ctx)) 739 goto err; 740 } else if (!BN_to_montgomery(&am, a,mont, ctx)) 741 goto err; 742 743 #if defined(OPENSSL_BN_ASM_MONT5) 744 /* This optimization uses ideas from http://eprint.iacr.org/2011/239, 745 * specifically optimization of cache-timing attack countermeasures 746 * and pre-computation optimization. */ 747 748 /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as 749 * 512-bit RSA is hardly relevant, we omit it to spare size... */ 750 if (window == 5 && top > 1) { 751 void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, 752 const void *table, const BN_ULONG *np, 753 const BN_ULONG *n0, int num, int power); 754 void bn_scatter5(const BN_ULONG *inp, size_t num, 755 void *table, size_t power); 756 void bn_gather5(BN_ULONG *out, size_t num, 757 void *table, size_t power); 758 759 BN_ULONG *np = mont->N.d, *n0 = mont->n0; 760 761 /* BN_to_montgomery can contaminate words above .top 762 * [in BN_DEBUG[_DEBUG] build]... */ 763 for (i = am.top; i < top; i++) 764 am.d[i] = 0; 765 for (i = tmp.top; i < top; i++) 766 tmp.d[i] = 0; 767 768 bn_scatter5(tmp.d, top, powerbuf, 0); 769 bn_scatter5(am.d, am.top, powerbuf, 1); 770 bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); 771 bn_scatter5(tmp.d, top, powerbuf, 2); 772 773 #if 0 774 for (i = 3; i < 32; i++) { 775 /* Calculate a^i = a^(i-1) * a */ 776 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 777 n0, top, i - 1); 778 bn_scatter5(tmp.d, top, powerbuf, i); 779 } 780 #else 781 /* same as above, but uses squaring for 1/2 of operations */ 782 for (i = 4; i < 32; i*=2) { 783 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 784 bn_scatter5(tmp.d, top, powerbuf, i); 785 } 786 for (i = 3; i < 8; i += 2) { 787 int j; 788 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 789 n0, top, i - 1); 790 bn_scatter5(tmp.d, top, powerbuf, i); 791 for (j = 2 * i; j < 32; j *= 2) { 792 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 793 bn_scatter5(tmp.d, top, powerbuf, j); 794 } 795 } 796 for (; i < 16; i += 2) { 797 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 798 n0, top, i - 1); 799 bn_scatter5(tmp.d, top, powerbuf, i); 800 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 801 bn_scatter5(tmp.d, top, powerbuf, 2*i); 802 } 803 for (; i < 32; i += 2) { 804 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 805 n0, top, i - 1); 806 bn_scatter5(tmp.d, top, powerbuf, i); 807 } 808 #endif 809 bits--; 810 for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) 811 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 812 bn_gather5(tmp.d, top, powerbuf, wvalue); 813 814 /* Scan the exponent one window at a time starting from the most 815 * significant bits. 816 */ 817 while (bits >= 0) { 818 for (wvalue = 0, i = 0; i < 5; i++, bits--) 819 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 820 821 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 822 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 823 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 824 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 825 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 826 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); 827 } 828 829 tmp.top = top; 830 bn_correct_top(&tmp); 831 } else 832 #endif 833 { 834 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, 835 window)) 836 goto err; 837 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, 838 window)) 839 goto err; 840 841 /* If the window size is greater than 1, then calculate 842 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) 843 * (even powers could instead be computed as (a^(i/2))^2 844 * to use the slight performance advantage of sqr over mul). 845 */ 846 if (window > 1) { 847 if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) 848 goto err; 849 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 850 2, window)) 851 goto err; 852 for (i = 3; i < numPowers; i++) { 853 /* Calculate a^i = a^(i-1) * a */ 854 if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, 855 mont, ctx)) 856 goto err; 857 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, 858 powerbuf, i, window)) 859 goto err; 860 } 861 } 862 863 bits--; 864 for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) 865 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 866 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, 867 wvalue, window)) 868 goto err; 869 870 /* Scan the exponent one window at a time starting from the most 871 * significant bits. 872 */ 873 while (bits >= 0) { 874 wvalue = 0; /* The 'value' of the window */ 875 876 /* Scan the window, squaring the result as we go */ 877 for (i = 0; i < window; i++, bits--) { 878 if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, 879 mont, ctx)) 880 goto err; 881 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 882 } 883 884 /* Fetch the appropriate pre-computed value from the pre-buf */ 885 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, 886 wvalue, window)) 887 goto err; 888 889 /* Multiply the result into the intermediate result */ 890 if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) 891 goto err; 892 } 893 } 894 895 /* Convert the final result from montgomery to standard format */ 896 if (!BN_from_montgomery(rr, &tmp, mont, ctx)) 897 goto err; 898 ret = 1; 899 900 err: 901 if ((in_mont == NULL) && (mont != NULL)) 902 BN_MONT_CTX_free(mont); 903 freezero(powerbufFree, powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); 904 BN_CTX_end(ctx); 905 return (ret); 906 } 907 908 int 909 BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m, 910 BN_CTX *ctx, BN_MONT_CTX *in_mont) 911 { 912 BN_MONT_CTX *mont = NULL; 913 int b, bits, ret = 0; 914 int r_is_one; 915 BN_ULONG w, next_w; 916 BIGNUM *d, *r, *t; 917 BIGNUM *swap_tmp; 918 919 #define BN_MOD_MUL_WORD(r, w, m) \ 920 (BN_mul_word(r, (w)) && \ 921 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ 922 (BN_mod_ct(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) 923 /* BN_MOD_MUL_WORD is only used with 'w' large, 924 * so the BN_ucmp test is probably more overhead 925 * than always using BN_mod (which uses BN_copy if 926 * a similar test returns true). */ 927 /* We can use BN_mod and do not need BN_nnmod because our 928 * accumulator is never negative (the result of BN_mod does 929 * not depend on the sign of the modulus). 930 */ 931 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \ 932 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) 933 934 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 935 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 936 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 937 return -1; 938 } 939 940 bn_check_top(p); 941 bn_check_top(m); 942 943 if (!BN_is_odd(m)) { 944 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); 945 return (0); 946 } 947 if (m->top == 1) 948 a %= m->d[0]; /* make sure that 'a' is reduced */ 949 950 bits = BN_num_bits(p); 951 if (bits == 0) { 952 /* x**0 mod 1 is still zero. */ 953 if (BN_is_one(m)) { 954 ret = 1; 955 BN_zero(rr); 956 } else 957 ret = BN_one(rr); 958 return ret; 959 } 960 if (a == 0) { 961 BN_zero(rr); 962 ret = 1; 963 return ret; 964 } 965 966 BN_CTX_start(ctx); 967 if ((d = BN_CTX_get(ctx)) == NULL) 968 goto err; 969 if ((r = BN_CTX_get(ctx)) == NULL) 970 goto err; 971 if ((t = BN_CTX_get(ctx)) == NULL) 972 goto err; 973 974 if (in_mont != NULL) 975 mont = in_mont; 976 else { 977 if ((mont = BN_MONT_CTX_new()) == NULL) 978 goto err; 979 if (!BN_MONT_CTX_set(mont, m, ctx)) 980 goto err; 981 } 982 983 r_is_one = 1; /* except for Montgomery factor */ 984 985 /* bits-1 >= 0 */ 986 987 /* The result is accumulated in the product r*w. */ 988 w = a; /* bit 'bits-1' of 'p' is always set */ 989 for (b = bits - 2; b >= 0; b--) { 990 /* First, square r*w. */ 991 next_w = w * w; 992 if ((next_w / w) != w) /* overflow */ 993 { 994 if (r_is_one) { 995 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 996 goto err; 997 r_is_one = 0; 998 } else { 999 if (!BN_MOD_MUL_WORD(r, w, m)) 1000 goto err; 1001 } 1002 next_w = 1; 1003 } 1004 w = next_w; 1005 if (!r_is_one) { 1006 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 1007 goto err; 1008 } 1009 1010 /* Second, multiply r*w by 'a' if exponent bit is set. */ 1011 if (BN_is_bit_set(p, b)) { 1012 next_w = w * a; 1013 if ((next_w / a) != w) /* overflow */ 1014 { 1015 if (r_is_one) { 1016 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 1017 goto err; 1018 r_is_one = 0; 1019 } else { 1020 if (!BN_MOD_MUL_WORD(r, w, m)) 1021 goto err; 1022 } 1023 next_w = a; 1024 } 1025 w = next_w; 1026 } 1027 } 1028 1029 /* Finally, set r:=r*w. */ 1030 if (w != 1) { 1031 if (r_is_one) { 1032 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 1033 goto err; 1034 r_is_one = 0; 1035 } else { 1036 if (!BN_MOD_MUL_WORD(r, w, m)) 1037 goto err; 1038 } 1039 } 1040 1041 if (r_is_one) /* can happen only if a == 1*/ 1042 { 1043 if (!BN_one(rr)) 1044 goto err; 1045 } else { 1046 if (!BN_from_montgomery(rr, r, mont, ctx)) 1047 goto err; 1048 } 1049 ret = 1; 1050 1051 err: 1052 if ((in_mont == NULL) && (mont != NULL)) 1053 BN_MONT_CTX_free(mont); 1054 BN_CTX_end(ctx); 1055 bn_check_top(rr); 1056 return (ret); 1057 } 1058 1059 1060 /* The old fallback, simple version :-) */ 1061 int 1062 BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 1063 BN_CTX *ctx) 1064 { 1065 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 1066 int start = 1; 1067 BIGNUM *d; 1068 /* Table of variables obtained from 'ctx' */ 1069 BIGNUM *val[TABLE_SIZE]; 1070 1071 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 1072 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 1073 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 1074 return -1; 1075 } 1076 1077 bits = BN_num_bits(p); 1078 if (bits == 0) { 1079 /* x**0 mod 1 is still zero. */ 1080 if (BN_is_one(m)) { 1081 ret = 1; 1082 BN_zero(r); 1083 } else 1084 ret = BN_one(r); 1085 return ret; 1086 } 1087 1088 BN_CTX_start(ctx); 1089 if ((d = BN_CTX_get(ctx)) == NULL) 1090 goto err; 1091 if ((val[0] = BN_CTX_get(ctx)) == NULL) 1092 goto err; 1093 1094 if (!BN_nnmod(val[0],a,m,ctx)) 1095 goto err; /* 1 */ 1096 if (BN_is_zero(val[0])) { 1097 BN_zero(r); 1098 ret = 1; 1099 goto err; 1100 } 1101 1102 window = BN_window_bits_for_exponent_size(bits); 1103 if (window > 1) { 1104 if (!BN_mod_mul(d, val[0], val[0], m, ctx)) 1105 goto err; /* 2 */ 1106 j = 1 << (window - 1); 1107 for (i = 1; i < j; i++) { 1108 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 1109 !BN_mod_mul(val[i], val[i - 1], d,m, ctx)) 1110 goto err; 1111 } 1112 } 1113 1114 start = 1; /* This is used to avoid multiplication etc 1115 * when there is only the value '1' in the 1116 * buffer. */ 1117 wvalue = 0; /* The 'value' of the window */ 1118 wstart = bits - 1; /* The top bit of the window */ 1119 wend = 0; /* The bottom bit of the window */ 1120 1121 if (!BN_one(r)) 1122 goto err; 1123 1124 for (;;) { 1125 if (BN_is_bit_set(p, wstart) == 0) { 1126 if (!start) 1127 if (!BN_mod_mul(r, r, r, m, ctx)) 1128 goto err; 1129 if (wstart == 0) 1130 break; 1131 wstart--; 1132 continue; 1133 } 1134 /* We now have wstart on a 'set' bit, we now need to work out 1135 * how bit a window to do. To do this we need to scan 1136 * forward until the last set bit before the end of the 1137 * window */ 1138 j = wstart; 1139 wvalue = 1; 1140 wend = 0; 1141 for (i = 1; i < window; i++) { 1142 if (wstart - i < 0) 1143 break; 1144 if (BN_is_bit_set(p, wstart - i)) { 1145 wvalue <<= (i - wend); 1146 wvalue |= 1; 1147 wend = i; 1148 } 1149 } 1150 1151 /* wend is the size of the current window */ 1152 j = wend + 1; 1153 /* add the 'bytes above' */ 1154 if (!start) 1155 for (i = 0; i < j; i++) { 1156 if (!BN_mod_mul(r, r, r, m, ctx)) 1157 goto err; 1158 } 1159 1160 /* wvalue will be an odd number < 2^window */ 1161 if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) 1162 goto err; 1163 1164 /* move the 'window' down further */ 1165 wstart -= wend + 1; 1166 wvalue = 0; 1167 start = 0; 1168 if (wstart < 0) 1169 break; 1170 } 1171 ret = 1; 1172 1173 err: 1174 BN_CTX_end(ctx); 1175 bn_check_top(r); 1176 return (ret); 1177 } 1178