1 /* $OpenBSD: bn_exp.c,v 1.31 2017/05/02 03:59:44 deraadt Exp $ */ 2 /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) 3 * All rights reserved. 4 * 5 * This package is an SSL implementation written 6 * by Eric Young (eay@cryptsoft.com). 7 * The implementation was written so as to conform with Netscapes SSL. 8 * 9 * This library is free for commercial and non-commercial use as long as 10 * the following conditions are aheared to. The following conditions 11 * apply to all code found in this distribution, be it the RC4, RSA, 12 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 13 * included with this distribution is covered by the same copyright terms 14 * except that the holder is Tim Hudson (tjh@cryptsoft.com). 15 * 16 * Copyright remains Eric Young's, and as such any Copyright notices in 17 * the code are not to be removed. 18 * If this package is used in a product, Eric Young should be given attribution 19 * as the author of the parts of the library used. 20 * This can be in the form of a textual message at program startup or 21 * in documentation (online or textual) provided with the package. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 1. Redistributions of source code must retain the copyright 27 * notice, this list of conditions and the following disclaimer. 28 * 2. Redistributions in binary form must reproduce the above copyright 29 * notice, this list of conditions and the following disclaimer in the 30 * documentation and/or other materials provided with the distribution. 31 * 3. All advertising materials mentioning features or use of this software 32 * must display the following acknowledgement: 33 * "This product includes cryptographic software written by 34 * Eric Young (eay@cryptsoft.com)" 35 * The word 'cryptographic' can be left out if the rouines from the library 36 * being used are not cryptographic related :-). 37 * 4. If you include any Windows specific code (or a derivative thereof) from 38 * the apps directory (application code) you must include an acknowledgement: 39 * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" 40 * 41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 44 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 51 * SUCH DAMAGE. 52 * 53 * The licence and distribution terms for any publically available version or 54 * derivative of this code cannot be changed. i.e. this code cannot simply be 55 * copied and put under another distribution licence 56 * [including the GNU Public Licence.] 57 */ 58 /* ==================================================================== 59 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. 60 * 61 * Redistribution and use in source and binary forms, with or without 62 * modification, are permitted provided that the following conditions 63 * are met: 64 * 65 * 1. Redistributions of source code must retain the above copyright 66 * notice, this list of conditions and the following disclaimer. 67 * 68 * 2. Redistributions in binary form must reproduce the above copyright 69 * notice, this list of conditions and the following disclaimer in 70 * the documentation and/or other materials provided with the 71 * distribution. 72 * 73 * 3. All advertising materials mentioning features or use of this 74 * software must display the following acknowledgment: 75 * "This product includes software developed by the OpenSSL Project 76 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 77 * 78 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 79 * endorse or promote products derived from this software without 80 * prior written permission. For written permission, please contact 81 * openssl-core@openssl.org. 82 * 83 * 5. Products derived from this software may not be called "OpenSSL" 84 * nor may "OpenSSL" appear in their names without prior written 85 * permission of the OpenSSL Project. 86 * 87 * 6. Redistributions of any form whatsoever must retain the following 88 * acknowledgment: 89 * "This product includes software developed by the OpenSSL Project 90 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 93 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 95 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 96 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 97 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 98 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 99 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 101 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 102 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 103 * OF THE POSSIBILITY OF SUCH DAMAGE. 104 * ==================================================================== 105 * 106 * This product includes cryptographic software written by Eric Young 107 * (eay@cryptsoft.com). This product includes software written by Tim 108 * Hudson (tjh@cryptsoft.com). 109 * 110 */ 111 112 #include <stdlib.h> 113 #include <string.h> 114 115 #include <openssl/err.h> 116 117 #include "bn_lcl.h" 118 #include "constant_time_locl.h" 119 120 /* maximum precomputation table size for *variable* sliding windows */ 121 #define TABLE_SIZE 32 122 123 /* this one works - simple but works */ 124 int 125 BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) 126 { 127 int i, bits, ret = 0; 128 BIGNUM *v, *rr; 129 130 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 131 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 132 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 133 return -1; 134 } 135 136 BN_CTX_start(ctx); 137 if ((r == a) || (r == p)) 138 rr = BN_CTX_get(ctx); 139 else 140 rr = r; 141 v = BN_CTX_get(ctx); 142 if (rr == NULL || v == NULL) 143 goto err; 144 145 if (BN_copy(v, a) == NULL) 146 goto err; 147 bits = BN_num_bits(p); 148 149 if (BN_is_odd(p)) { 150 if (BN_copy(rr, a) == NULL) 151 goto err; 152 } else { 153 if (!BN_one(rr)) 154 goto err; 155 } 156 157 for (i = 1; i < bits; i++) { 158 if (!BN_sqr(v, v, ctx)) 159 goto err; 160 if (BN_is_bit_set(p, i)) { 161 if (!BN_mul(rr, rr, v, ctx)) 162 goto err; 163 } 164 } 165 ret = 1; 166 167 err: 168 if (r != rr && rr != NULL) 169 BN_copy(r, rr); 170 BN_CTX_end(ctx); 171 bn_check_top(r); 172 return (ret); 173 } 174 175 static int 176 BN_mod_exp_internal(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 177 BN_CTX *ctx, int ct) 178 { 179 int ret; 180 181 bn_check_top(a); 182 bn_check_top(p); 183 bn_check_top(m); 184 185 /* For even modulus m = 2^k*m_odd, it might make sense to compute 186 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery 187 * exponentiation for the odd part), using appropriate exponent 188 * reductions, and combine the results using the CRT. 189 * 190 * For now, we use Montgomery only if the modulus is odd; otherwise, 191 * exponentiation using the reciprocal-based quick remaindering 192 * algorithm is used. 193 * 194 * (Timing obtained with expspeed.c [computations a^p mod m 195 * where a, p, m are of the same length: 256, 512, 1024, 2048, 196 * 4096, 8192 bits], compared to the running time of the 197 * standard algorithm: 198 * 199 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] 200 * 55 .. 77 % [UltraSparc processor, but 201 * debug-solaris-sparcv8-gcc conf.] 202 * 203 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] 204 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] 205 * 206 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont 207 * at 2048 and more bits, but at 512 and 1024 bits, it was 208 * slower even than the standard algorithm! 209 * 210 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] 211 * should be obtained when the new Montgomery reduction code 212 * has been integrated into OpenSSL.) 213 */ 214 215 if (BN_is_odd(m)) { 216 if (a->top == 1 && !a->neg && !ct) { 217 BN_ULONG A = a->d[0]; 218 ret = BN_mod_exp_mont_word(r, A,p, m,ctx, NULL); 219 } else 220 ret = BN_mod_exp_mont_ct(r, a,p, m,ctx, NULL); 221 } else { 222 ret = BN_mod_exp_recp(r, a,p, m, ctx); 223 } 224 225 bn_check_top(r); 226 return (ret); 227 } 228 229 int 230 BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 231 BN_CTX *ctx) 232 { 233 return BN_mod_exp_internal(r, a, p, m, ctx, 234 (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); 235 } 236 237 int 238 BN_mod_exp_ct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 239 BN_CTX *ctx) 240 { 241 return BN_mod_exp_internal(r, a, p, m, ctx, 1); 242 } 243 244 245 int 246 BN_mod_exp_nonct(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 247 BN_CTX *ctx) 248 { 249 return BN_mod_exp_internal(r, a, p, m, ctx, 0); 250 } 251 252 253 int 254 BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 255 BN_CTX *ctx) 256 { 257 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 258 int start = 1; 259 BIGNUM *aa; 260 /* Table of variables obtained from 'ctx' */ 261 BIGNUM *val[TABLE_SIZE]; 262 BN_RECP_CTX recp; 263 264 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 265 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 266 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 267 return -1; 268 } 269 270 bits = BN_num_bits(p); 271 if (bits == 0) { 272 /* x**0 mod 1 is still zero. */ 273 if (BN_is_one(m)) { 274 ret = 1; 275 BN_zero(r); 276 } else 277 ret = BN_one(r); 278 return ret; 279 } 280 281 BN_CTX_start(ctx); 282 if ((aa = BN_CTX_get(ctx)) == NULL) 283 goto err; 284 if ((val[0] = BN_CTX_get(ctx)) == NULL) 285 goto err; 286 287 BN_RECP_CTX_init(&recp); 288 if (m->neg) { 289 /* ignore sign of 'm' */ 290 if (!BN_copy(aa, m)) 291 goto err; 292 aa->neg = 0; 293 if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) 294 goto err; 295 } else { 296 if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) 297 goto err; 298 } 299 300 if (!BN_nnmod(val[0], a, m, ctx)) 301 goto err; /* 1 */ 302 if (BN_is_zero(val[0])) { 303 BN_zero(r); 304 ret = 1; 305 goto err; 306 } 307 308 window = BN_window_bits_for_exponent_size(bits); 309 if (window > 1) { 310 if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) 311 goto err; /* 2 */ 312 j = 1 << (window - 1); 313 for (i = 1; i < j; i++) { 314 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 315 !BN_mod_mul_reciprocal(val[i], val[i - 1], 316 aa, &recp, ctx)) 317 goto err; 318 } 319 } 320 321 start = 1; /* This is used to avoid multiplication etc 322 * when there is only the value '1' in the 323 * buffer. */ 324 wvalue = 0; /* The 'value' of the window */ 325 wstart = bits - 1; /* The top bit of the window */ 326 wend = 0; /* The bottom bit of the window */ 327 328 if (!BN_one(r)) 329 goto err; 330 331 for (;;) { 332 if (BN_is_bit_set(p, wstart) == 0) { 333 if (!start) 334 if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) 335 goto err; 336 if (wstart == 0) 337 break; 338 wstart--; 339 continue; 340 } 341 /* We now have wstart on a 'set' bit, we now need to work out 342 * how bit a window to do. To do this we need to scan 343 * forward until the last set bit before the end of the 344 * window */ 345 j = wstart; 346 wvalue = 1; 347 wend = 0; 348 for (i = 1; i < window; i++) { 349 if (wstart - i < 0) 350 break; 351 if (BN_is_bit_set(p, wstart - i)) { 352 wvalue <<= (i - wend); 353 wvalue |= 1; 354 wend = i; 355 } 356 } 357 358 /* wend is the size of the current window */ 359 j = wend + 1; 360 /* add the 'bytes above' */ 361 if (!start) 362 for (i = 0; i < j; i++) { 363 if (!BN_mod_mul_reciprocal(r, r,r, &recp, ctx)) 364 goto err; 365 } 366 367 /* wvalue will be an odd number < 2^window */ 368 if (!BN_mod_mul_reciprocal(r, r,val[wvalue >> 1], &recp, ctx)) 369 goto err; 370 371 /* move the 'window' down further */ 372 wstart -= wend + 1; 373 wvalue = 0; 374 start = 0; 375 if (wstart < 0) 376 break; 377 } 378 ret = 1; 379 380 err: 381 BN_CTX_end(ctx); 382 BN_RECP_CTX_free(&recp); 383 bn_check_top(r); 384 return (ret); 385 } 386 387 static int 388 BN_mod_exp_mont_internal(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 389 BN_CTX *ctx, BN_MONT_CTX *in_mont, int ct) 390 { 391 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 392 int start = 1; 393 BIGNUM *d, *r; 394 const BIGNUM *aa; 395 /* Table of variables obtained from 'ctx' */ 396 BIGNUM *val[TABLE_SIZE]; 397 BN_MONT_CTX *mont = NULL; 398 399 if (ct) { 400 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); 401 } 402 403 bn_check_top(a); 404 bn_check_top(p); 405 bn_check_top(m); 406 407 if (!BN_is_odd(m)) { 408 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); 409 return (0); 410 } 411 412 bits = BN_num_bits(p); 413 if (bits == 0) { 414 /* x**0 mod 1 is still zero. */ 415 if (BN_is_one(m)) { 416 ret = 1; 417 BN_zero(rr); 418 } else 419 ret = BN_one(rr); 420 return ret; 421 } 422 423 BN_CTX_start(ctx); 424 if ((d = BN_CTX_get(ctx)) == NULL) 425 goto err; 426 if ((r = BN_CTX_get(ctx)) == NULL) 427 goto err; 428 if ((val[0] = BN_CTX_get(ctx)) == NULL) 429 goto err; 430 431 /* If this is not done, things will break in the montgomery 432 * part */ 433 434 if (in_mont != NULL) 435 mont = in_mont; 436 else { 437 if ((mont = BN_MONT_CTX_new()) == NULL) 438 goto err; 439 if (!BN_MONT_CTX_set(mont, m, ctx)) 440 goto err; 441 } 442 443 if (a->neg || BN_ucmp(a, m) >= 0) { 444 if (!BN_nnmod(val[0], a,m, ctx)) 445 goto err; 446 aa = val[0]; 447 } else 448 aa = a; 449 if (BN_is_zero(aa)) { 450 BN_zero(rr); 451 ret = 1; 452 goto err; 453 } 454 if (!BN_to_montgomery(val[0], aa, mont, ctx)) 455 goto err; /* 1 */ 456 457 window = BN_window_bits_for_exponent_size(bits); 458 if (window > 1) { 459 if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) 460 goto err; /* 2 */ 461 j = 1 << (window - 1); 462 for (i = 1; i < j; i++) { 463 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 464 !BN_mod_mul_montgomery(val[i], val[i - 1], 465 d, mont, ctx)) 466 goto err; 467 } 468 } 469 470 start = 1; /* This is used to avoid multiplication etc 471 * when there is only the value '1' in the 472 * buffer. */ 473 wvalue = 0; /* The 'value' of the window */ 474 wstart = bits - 1; /* The top bit of the window */ 475 wend = 0; /* The bottom bit of the window */ 476 477 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) 478 goto err; 479 for (;;) { 480 if (BN_is_bit_set(p, wstart) == 0) { 481 if (!start) { 482 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 483 goto err; 484 } 485 if (wstart == 0) 486 break; 487 wstart--; 488 continue; 489 } 490 /* We now have wstart on a 'set' bit, we now need to work out 491 * how bit a window to do. To do this we need to scan 492 * forward until the last set bit before the end of the 493 * window */ 494 j = wstart; 495 wvalue = 1; 496 wend = 0; 497 for (i = 1; i < window; i++) { 498 if (wstart - i < 0) 499 break; 500 if (BN_is_bit_set(p, wstart - i)) { 501 wvalue <<= (i - wend); 502 wvalue |= 1; 503 wend = i; 504 } 505 } 506 507 /* wend is the size of the current window */ 508 j = wend + 1; 509 /* add the 'bytes above' */ 510 if (!start) 511 for (i = 0; i < j; i++) { 512 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 513 goto err; 514 } 515 516 /* wvalue will be an odd number < 2^window */ 517 if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) 518 goto err; 519 520 /* move the 'window' down further */ 521 wstart -= wend + 1; 522 wvalue = 0; 523 start = 0; 524 if (wstart < 0) 525 break; 526 } 527 if (!BN_from_montgomery(rr, r,mont, ctx)) 528 goto err; 529 ret = 1; 530 531 err: 532 if ((in_mont == NULL) && (mont != NULL)) 533 BN_MONT_CTX_free(mont); 534 BN_CTX_end(ctx); 535 bn_check_top(rr); 536 return (ret); 537 } 538 539 int 540 BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 541 BN_CTX *ctx, BN_MONT_CTX *in_mont) 542 { 543 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 544 (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)); 545 } 546 547 int 548 BN_mod_exp_mont_ct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 549 BN_CTX *ctx, BN_MONT_CTX *in_mont) 550 { 551 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 1); 552 } 553 554 int 555 BN_mod_exp_mont_nonct(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 556 BN_CTX *ctx, BN_MONT_CTX *in_mont) 557 { 558 return BN_mod_exp_mont_internal(rr, a, p, m, ctx, in_mont, 0); 559 } 560 561 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout 562 * so that accessing any of these table values shows the same access pattern as far 563 * as cache lines are concerned. The following functions are used to transfer a BIGNUM 564 * from/to that table. */ 565 566 static int 567 MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, 568 int idx, int window) 569 { 570 int i, j; 571 int width = 1 << window; 572 BN_ULONG *table = (BN_ULONG *)buf; 573 574 if (top > b->top) 575 top = b->top; /* this works because 'buf' is explicitly zeroed */ 576 577 for (i = 0, j = idx; i < top; i++, j += width) { 578 table[j] = b->d[i]; 579 } 580 581 return 1; 582 } 583 584 static int 585 MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, 586 int window) 587 { 588 int i, j; 589 int width = 1 << window; 590 volatile BN_ULONG *table = (volatile BN_ULONG *)buf; 591 592 if (bn_wexpand(b, top) == NULL) 593 return 0; 594 595 if (window <= 3) { 596 for (i = 0; i < top; i++, table += width) { 597 BN_ULONG acc = 0; 598 599 for (j = 0; j < width; j++) { 600 acc |= table[j] & 601 ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); 602 } 603 604 b->d[i] = acc; 605 } 606 } else { 607 int xstride = 1 << (window - 2); 608 BN_ULONG y0, y1, y2, y3; 609 610 i = idx >> (window - 2); /* equivalent of idx / xstride */ 611 idx &= xstride - 1; /* equivalent of idx % xstride */ 612 613 y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1); 614 y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1); 615 y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1); 616 y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1); 617 618 for (i = 0; i < top; i++, table += width) { 619 BN_ULONG acc = 0; 620 621 for (j = 0; j < xstride; j++) { 622 acc |= ( (table[j + 0 * xstride] & y0) | 623 (table[j + 1 * xstride] & y1) | 624 (table[j + 2 * xstride] & y2) | 625 (table[j + 3 * xstride] & y3) ) 626 & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1)); 627 } 628 629 b->d[i] = acc; 630 } 631 } 632 b->top = top; 633 bn_correct_top(b); 634 return 1; 635 } 636 637 /* Given a pointer value, compute the next address that is a cache line multiple. */ 638 #define MOD_EXP_CTIME_ALIGN(x_) \ 639 ((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 640 641 /* This variant of BN_mod_exp_mont() uses fixed windows and the special 642 * precomputation memory layout to limit data-dependency to a minimum 643 * to protect secret exponents (cf. the hyper-threading timing attacks 644 * pointed out by Colin Percival, 645 * http://www.daemonology.net/hyperthreading-considered-harmful/) 646 */ 647 int 648 BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 649 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) 650 { 651 int i, bits, ret = 0, window, wvalue; 652 int top; 653 BN_MONT_CTX *mont = NULL; 654 int numPowers; 655 unsigned char *powerbufFree = NULL; 656 int powerbufLen = 0; 657 unsigned char *powerbuf = NULL; 658 BIGNUM tmp, am; 659 660 bn_check_top(a); 661 bn_check_top(p); 662 bn_check_top(m); 663 664 if (!BN_is_odd(m)) { 665 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); 666 return (0); 667 } 668 669 top = m->top; 670 671 bits = BN_num_bits(p); 672 if (bits == 0) { 673 /* x**0 mod 1 is still zero. */ 674 if (BN_is_one(m)) { 675 ret = 1; 676 BN_zero(rr); 677 } else 678 ret = BN_one(rr); 679 return ret; 680 } 681 682 BN_CTX_start(ctx); 683 684 /* Allocate a montgomery context if it was not supplied by the caller. 685 * If this is not done, things will break in the montgomery part. 686 */ 687 if (in_mont != NULL) 688 mont = in_mont; 689 else { 690 if ((mont = BN_MONT_CTX_new()) == NULL) 691 goto err; 692 if (!BN_MONT_CTX_set(mont, m, ctx)) 693 goto err; 694 } 695 696 /* Get the window size to use with size of p. */ 697 window = BN_window_bits_for_ctime_exponent_size(bits); 698 #if defined(OPENSSL_BN_ASM_MONT5) 699 if (window == 6 && bits <= 1024) 700 window = 5; /* ~5% improvement of 2048-bit RSA sign */ 701 #endif 702 703 /* Allocate a buffer large enough to hold all of the pre-computed 704 * powers of am, am itself and tmp. 705 */ 706 numPowers = 1 << window; 707 powerbufLen = sizeof(m->d[0]) * (top * numPowers + 708 ((2*top) > numPowers ? (2*top) : numPowers)); 709 if ((powerbufFree = calloc(powerbufLen + 710 MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH, 1)) == NULL) 711 goto err; 712 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 713 714 /* lay down tmp and am right after powers table */ 715 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); 716 am.d = tmp.d + top; 717 tmp.top = am.top = 0; 718 tmp.dmax = am.dmax = top; 719 tmp.neg = am.neg = 0; 720 tmp.flags = am.flags = BN_FLG_STATIC_DATA; 721 722 /* prepare a^0 in Montgomery domain */ 723 #if 1 724 if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) 725 goto err; 726 #else 727 tmp.d[0] = (0 - m - >d[0]) & BN_MASK2; /* 2^(top*BN_BITS2) - m */ 728 for (i = 1; i < top; i++) 729 tmp.d[i] = (~m->d[i]) & BN_MASK2; 730 tmp.top = top; 731 #endif 732 733 /* prepare a^1 in Montgomery domain */ 734 if (a->neg || BN_ucmp(a, m) >= 0) { 735 if (!BN_mod_ct(&am, a,m, ctx)) 736 goto err; 737 if (!BN_to_montgomery(&am, &am, mont, ctx)) 738 goto err; 739 } else if (!BN_to_montgomery(&am, a,mont, ctx)) 740 goto err; 741 742 #if defined(OPENSSL_BN_ASM_MONT5) 743 /* This optimization uses ideas from http://eprint.iacr.org/2011/239, 744 * specifically optimization of cache-timing attack countermeasures 745 * and pre-computation optimization. */ 746 747 /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as 748 * 512-bit RSA is hardly relevant, we omit it to spare size... */ 749 if (window == 5 && top > 1) { 750 void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, 751 const void *table, const BN_ULONG *np, 752 const BN_ULONG *n0, int num, int power); 753 void bn_scatter5(const BN_ULONG *inp, size_t num, 754 void *table, size_t power); 755 void bn_gather5(BN_ULONG *out, size_t num, 756 void *table, size_t power); 757 758 BN_ULONG *np = mont->N.d, *n0 = mont->n0; 759 760 /* BN_to_montgomery can contaminate words above .top 761 * [in BN_DEBUG[_DEBUG] build]... */ 762 for (i = am.top; i < top; i++) 763 am.d[i] = 0; 764 for (i = tmp.top; i < top; i++) 765 tmp.d[i] = 0; 766 767 bn_scatter5(tmp.d, top, powerbuf, 0); 768 bn_scatter5(am.d, am.top, powerbuf, 1); 769 bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); 770 bn_scatter5(tmp.d, top, powerbuf, 2); 771 772 #if 0 773 for (i = 3; i < 32; i++) { 774 /* Calculate a^i = a^(i-1) * a */ 775 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 776 n0, top, i - 1); 777 bn_scatter5(tmp.d, top, powerbuf, i); 778 } 779 #else 780 /* same as above, but uses squaring for 1/2 of operations */ 781 for (i = 4; i < 32; i*=2) { 782 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 783 bn_scatter5(tmp.d, top, powerbuf, i); 784 } 785 for (i = 3; i < 8; i += 2) { 786 int j; 787 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 788 n0, top, i - 1); 789 bn_scatter5(tmp.d, top, powerbuf, i); 790 for (j = 2 * i; j < 32; j *= 2) { 791 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 792 bn_scatter5(tmp.d, top, powerbuf, j); 793 } 794 } 795 for (; i < 16; i += 2) { 796 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 797 n0, top, i - 1); 798 bn_scatter5(tmp.d, top, powerbuf, i); 799 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 800 bn_scatter5(tmp.d, top, powerbuf, 2*i); 801 } 802 for (; i < 32; i += 2) { 803 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, 804 n0, top, i - 1); 805 bn_scatter5(tmp.d, top, powerbuf, i); 806 } 807 #endif 808 bits--; 809 for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) 810 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 811 bn_gather5(tmp.d, top, powerbuf, wvalue); 812 813 /* Scan the exponent one window at a time starting from the most 814 * significant bits. 815 */ 816 while (bits >= 0) { 817 for (wvalue = 0, i = 0; i < 5; i++, bits--) 818 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 819 820 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 821 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 822 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 823 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 824 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 825 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); 826 } 827 828 tmp.top = top; 829 bn_correct_top(&tmp); 830 } else 831 #endif 832 { 833 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, 834 window)) 835 goto err; 836 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, 837 window)) 838 goto err; 839 840 /* If the window size is greater than 1, then calculate 841 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) 842 * (even powers could instead be computed as (a^(i/2))^2 843 * to use the slight performance advantage of sqr over mul). 844 */ 845 if (window > 1) { 846 if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) 847 goto err; 848 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 849 2, window)) 850 goto err; 851 for (i = 3; i < numPowers; i++) { 852 /* Calculate a^i = a^(i-1) * a */ 853 if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, 854 mont, ctx)) 855 goto err; 856 if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, 857 powerbuf, i, window)) 858 goto err; 859 } 860 } 861 862 bits--; 863 for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) 864 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 865 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, 866 wvalue, window)) 867 goto err; 868 869 /* Scan the exponent one window at a time starting from the most 870 * significant bits. 871 */ 872 while (bits >= 0) { 873 wvalue = 0; /* The 'value' of the window */ 874 875 /* Scan the window, squaring the result as we go */ 876 for (i = 0; i < window; i++, bits--) { 877 if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, 878 mont, ctx)) 879 goto err; 880 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 881 } 882 883 /* Fetch the appropriate pre-computed value from the pre-buf */ 884 if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, 885 wvalue, window)) 886 goto err; 887 888 /* Multiply the result into the intermediate result */ 889 if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) 890 goto err; 891 } 892 } 893 894 /* Convert the final result from montgomery to standard format */ 895 if (!BN_from_montgomery(rr, &tmp, mont, ctx)) 896 goto err; 897 ret = 1; 898 899 err: 900 if ((in_mont == NULL) && (mont != NULL)) 901 BN_MONT_CTX_free(mont); 902 freezero(powerbufFree, powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); 903 BN_CTX_end(ctx); 904 return (ret); 905 } 906 907 int 908 BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, const BIGNUM *m, 909 BN_CTX *ctx, BN_MONT_CTX *in_mont) 910 { 911 BN_MONT_CTX *mont = NULL; 912 int b, bits, ret = 0; 913 int r_is_one; 914 BN_ULONG w, next_w; 915 BIGNUM *d, *r, *t; 916 BIGNUM *swap_tmp; 917 918 #define BN_MOD_MUL_WORD(r, w, m) \ 919 (BN_mul_word(r, (w)) && \ 920 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ 921 (BN_mod_ct(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) 922 /* BN_MOD_MUL_WORD is only used with 'w' large, 923 * so the BN_ucmp test is probably more overhead 924 * than always using BN_mod (which uses BN_copy if 925 * a similar test returns true). */ 926 /* We can use BN_mod and do not need BN_nnmod because our 927 * accumulator is never negative (the result of BN_mod does 928 * not depend on the sign of the modulus). 929 */ 930 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \ 931 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) 932 933 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 934 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 935 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 936 return -1; 937 } 938 939 bn_check_top(p); 940 bn_check_top(m); 941 942 if (!BN_is_odd(m)) { 943 BNerror(BN_R_CALLED_WITH_EVEN_MODULUS); 944 return (0); 945 } 946 if (m->top == 1) 947 a %= m->d[0]; /* make sure that 'a' is reduced */ 948 949 bits = BN_num_bits(p); 950 if (bits == 0) { 951 /* x**0 mod 1 is still zero. */ 952 if (BN_is_one(m)) { 953 ret = 1; 954 BN_zero(rr); 955 } else 956 ret = BN_one(rr); 957 return ret; 958 } 959 if (a == 0) { 960 BN_zero(rr); 961 ret = 1; 962 return ret; 963 } 964 965 BN_CTX_start(ctx); 966 if ((d = BN_CTX_get(ctx)) == NULL) 967 goto err; 968 if ((r = BN_CTX_get(ctx)) == NULL) 969 goto err; 970 if ((t = BN_CTX_get(ctx)) == NULL) 971 goto err; 972 973 if (in_mont != NULL) 974 mont = in_mont; 975 else { 976 if ((mont = BN_MONT_CTX_new()) == NULL) 977 goto err; 978 if (!BN_MONT_CTX_set(mont, m, ctx)) 979 goto err; 980 } 981 982 r_is_one = 1; /* except for Montgomery factor */ 983 984 /* bits-1 >= 0 */ 985 986 /* The result is accumulated in the product r*w. */ 987 w = a; /* bit 'bits-1' of 'p' is always set */ 988 for (b = bits - 2; b >= 0; b--) { 989 /* First, square r*w. */ 990 next_w = w * w; 991 if ((next_w / w) != w) /* overflow */ 992 { 993 if (r_is_one) { 994 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 995 goto err; 996 r_is_one = 0; 997 } else { 998 if (!BN_MOD_MUL_WORD(r, w, m)) 999 goto err; 1000 } 1001 next_w = 1; 1002 } 1003 w = next_w; 1004 if (!r_is_one) { 1005 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) 1006 goto err; 1007 } 1008 1009 /* Second, multiply r*w by 'a' if exponent bit is set. */ 1010 if (BN_is_bit_set(p, b)) { 1011 next_w = w * a; 1012 if ((next_w / a) != w) /* overflow */ 1013 { 1014 if (r_is_one) { 1015 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 1016 goto err; 1017 r_is_one = 0; 1018 } else { 1019 if (!BN_MOD_MUL_WORD(r, w, m)) 1020 goto err; 1021 } 1022 next_w = a; 1023 } 1024 w = next_w; 1025 } 1026 } 1027 1028 /* Finally, set r:=r*w. */ 1029 if (w != 1) { 1030 if (r_is_one) { 1031 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) 1032 goto err; 1033 r_is_one = 0; 1034 } else { 1035 if (!BN_MOD_MUL_WORD(r, w, m)) 1036 goto err; 1037 } 1038 } 1039 1040 if (r_is_one) /* can happen only if a == 1*/ 1041 { 1042 if (!BN_one(rr)) 1043 goto err; 1044 } else { 1045 if (!BN_from_montgomery(rr, r, mont, ctx)) 1046 goto err; 1047 } 1048 ret = 1; 1049 1050 err: 1051 if ((in_mont == NULL) && (mont != NULL)) 1052 BN_MONT_CTX_free(mont); 1053 BN_CTX_end(ctx); 1054 bn_check_top(rr); 1055 return (ret); 1056 } 1057 1058 1059 /* The old fallback, simple version :-) */ 1060 int 1061 BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 1062 BN_CTX *ctx) 1063 { 1064 int i, j, bits, ret = 0, wstart, wend, window, wvalue; 1065 int start = 1; 1066 BIGNUM *d; 1067 /* Table of variables obtained from 'ctx' */ 1068 BIGNUM *val[TABLE_SIZE]; 1069 1070 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 1071 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 1072 BNerror(ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 1073 return -1; 1074 } 1075 1076 bits = BN_num_bits(p); 1077 if (bits == 0) { 1078 /* x**0 mod 1 is still zero. */ 1079 if (BN_is_one(m)) { 1080 ret = 1; 1081 BN_zero(r); 1082 } else 1083 ret = BN_one(r); 1084 return ret; 1085 } 1086 1087 BN_CTX_start(ctx); 1088 if ((d = BN_CTX_get(ctx)) == NULL) 1089 goto err; 1090 if ((val[0] = BN_CTX_get(ctx)) == NULL) 1091 goto err; 1092 1093 if (!BN_nnmod(val[0],a,m,ctx)) 1094 goto err; /* 1 */ 1095 if (BN_is_zero(val[0])) { 1096 BN_zero(r); 1097 ret = 1; 1098 goto err; 1099 } 1100 1101 window = BN_window_bits_for_exponent_size(bits); 1102 if (window > 1) { 1103 if (!BN_mod_mul(d, val[0], val[0], m, ctx)) 1104 goto err; /* 2 */ 1105 j = 1 << (window - 1); 1106 for (i = 1; i < j; i++) { 1107 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 1108 !BN_mod_mul(val[i], val[i - 1], d,m, ctx)) 1109 goto err; 1110 } 1111 } 1112 1113 start = 1; /* This is used to avoid multiplication etc 1114 * when there is only the value '1' in the 1115 * buffer. */ 1116 wvalue = 0; /* The 'value' of the window */ 1117 wstart = bits - 1; /* The top bit of the window */ 1118 wend = 0; /* The bottom bit of the window */ 1119 1120 if (!BN_one(r)) 1121 goto err; 1122 1123 for (;;) { 1124 if (BN_is_bit_set(p, wstart) == 0) { 1125 if (!start) 1126 if (!BN_mod_mul(r, r, r, m, ctx)) 1127 goto err; 1128 if (wstart == 0) 1129 break; 1130 wstart--; 1131 continue; 1132 } 1133 /* We now have wstart on a 'set' bit, we now need to work out 1134 * how bit a window to do. To do this we need to scan 1135 * forward until the last set bit before the end of the 1136 * window */ 1137 j = wstart; 1138 wvalue = 1; 1139 wend = 0; 1140 for (i = 1; i < window; i++) { 1141 if (wstart - i < 0) 1142 break; 1143 if (BN_is_bit_set(p, wstart - i)) { 1144 wvalue <<= (i - wend); 1145 wvalue |= 1; 1146 wend = i; 1147 } 1148 } 1149 1150 /* wend is the size of the current window */ 1151 j = wend + 1; 1152 /* add the 'bytes above' */ 1153 if (!start) 1154 for (i = 0; i < j; i++) { 1155 if (!BN_mod_mul(r, r, r, m, ctx)) 1156 goto err; 1157 } 1158 1159 /* wvalue will be an odd number < 2^window */ 1160 if (!BN_mod_mul(r, r, val[wvalue >> 1], m, ctx)) 1161 goto err; 1162 1163 /* move the 'window' down further */ 1164 wstart -= wend + 1; 1165 wvalue = 0; 1166 start = 0; 1167 if (wstart < 0) 1168 break; 1169 } 1170 ret = 1; 1171 1172 err: 1173 BN_CTX_end(ctx); 1174 bn_check_top(r); 1175 return (ret); 1176 } 1177