1 /* 2 * Copyright 2001-2020 The OpenSSL Project Authors. All Rights Reserved. 3 * Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved 4 * 5 * Licensed under the OpenSSL license (the "License"). You may not use 6 * this file except in compliance with the License. You can obtain a copy 7 * in the file LICENSE in the source distribution or at 8 * https://www.openssl.org/source/license.html 9 */ 10 11 #include <string.h> 12 #include <openssl/err.h> 13 14 #include "internal/cryptlib.h" 15 #include "crypto/bn.h" 16 #include "ec_local.h" 17 #include "internal/refcount.h" 18 19 /* 20 * This file implements the wNAF-based interleaving multi-exponentiation method 21 * Formerly at: 22 * http://www.informatik.tu-darmstadt.de/TI/Mitarbeiter/moeller.html#multiexp 23 * You might now find it here: 24 * http://link.springer.com/chapter/10.1007%2F3-540-45537-X_13 25 * http://www.bmoeller.de/pdf/TI-01-08.multiexp.pdf 26 * For multiplication with precomputation, we use wNAF splitting, formerly at: 27 * http://www.informatik.tu-darmstadt.de/TI/Mitarbeiter/moeller.html#fastexp 28 */ 29 30 /* structure for precomputed multiples of the generator */ 31 struct ec_pre_comp_st { 32 const EC_GROUP *group; /* parent EC_GROUP object */ 33 size_t blocksize; /* block size for wNAF splitting */ 34 size_t numblocks; /* max. number of blocks for which we have 35 * precomputation */ 36 size_t w; /* window size */ 37 EC_POINT **points; /* array with pre-calculated multiples of 38 * generator: 'num' pointers to EC_POINT 39 * objects followed by a NULL */ 40 size_t num; /* numblocks * 2^(w-1) */ 41 CRYPTO_REF_COUNT references; 42 CRYPTO_RWLOCK *lock; 43 }; 44 45 static EC_PRE_COMP *ec_pre_comp_new(const EC_GROUP *group) 46 { 47 EC_PRE_COMP *ret = NULL; 48 49 if (!group) 50 return NULL; 51 52 ret = OPENSSL_zalloc(sizeof(*ret)); 53 if (ret == NULL) { 54 ECerr(EC_F_EC_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); 55 return ret; 56 } 57 58 ret->group = group; 59 ret->blocksize = 8; /* default */ 60 ret->w = 4; /* default */ 61 ret->references = 1; 62 63 ret->lock = CRYPTO_THREAD_lock_new(); 64 if (ret->lock == NULL) { 65 ECerr(EC_F_EC_PRE_COMP_NEW, ERR_R_MALLOC_FAILURE); 66 OPENSSL_free(ret); 67 return NULL; 68 } 69 return ret; 70 } 71 72 EC_PRE_COMP *EC_ec_pre_comp_dup(EC_PRE_COMP *pre) 73 { 74 int i; 75 if (pre != NULL) 76 CRYPTO_UP_REF(&pre->references, &i, pre->lock); 77 return pre; 78 } 79 80 void EC_ec_pre_comp_free(EC_PRE_COMP *pre) 81 { 82 int i; 83 84 if (pre == NULL) 85 return; 86 87 CRYPTO_DOWN_REF(&pre->references, &i, pre->lock); 88 REF_PRINT_COUNT("EC_ec", pre); 89 if (i > 0) 90 return; 91 REF_ASSERT_ISNT(i < 0); 92 93 if (pre->points != NULL) { 94 EC_POINT **pts; 95 96 for (pts = pre->points; *pts != NULL; pts++) 97 EC_POINT_free(*pts); 98 OPENSSL_free(pre->points); 99 } 100 CRYPTO_THREAD_lock_free(pre->lock); 101 OPENSSL_free(pre); 102 } 103 104 #define EC_POINT_BN_set_flags(P, flags) do { \ 105 BN_set_flags((P)->X, (flags)); \ 106 BN_set_flags((P)->Y, (flags)); \ 107 BN_set_flags((P)->Z, (flags)); \ 108 } while(0) 109 110 /*- 111 * This functions computes a single point multiplication over the EC group, 112 * using, at a high level, a Montgomery ladder with conditional swaps, with 113 * various timing attack defenses. 114 * 115 * It performs either a fixed point multiplication 116 * (scalar * generator) 117 * when point is NULL, or a variable point multiplication 118 * (scalar * point) 119 * when point is not NULL. 120 * 121 * `scalar` cannot be NULL and should be in the range [0,n) otherwise all 122 * constant time bets are off (where n is the cardinality of the EC group). 123 * 124 * This function expects `group->order` and `group->cardinality` to be well 125 * defined and non-zero: it fails with an error code otherwise. 126 * 127 * NB: This says nothing about the constant-timeness of the ladder step 128 * implementation (i.e., the default implementation is based on EC_POINT_add and 129 * EC_POINT_dbl, which of course are not constant time themselves) or the 130 * underlying multiprecision arithmetic. 131 * 132 * The product is stored in `r`. 133 * 134 * This is an internal function: callers are in charge of ensuring that the 135 * input parameters `group`, `r`, `scalar` and `ctx` are not NULL. 136 * 137 * Returns 1 on success, 0 otherwise. 138 */ 139 int ec_scalar_mul_ladder(const EC_GROUP *group, EC_POINT *r, 140 const BIGNUM *scalar, const EC_POINT *point, 141 BN_CTX *ctx) 142 { 143 int i, cardinality_bits, group_top, kbit, pbit, Z_is_one; 144 EC_POINT *p = NULL; 145 EC_POINT *s = NULL; 146 BIGNUM *k = NULL; 147 BIGNUM *lambda = NULL; 148 BIGNUM *cardinality = NULL; 149 int ret = 0; 150 151 /* early exit if the input point is the point at infinity */ 152 if (point != NULL && EC_POINT_is_at_infinity(group, point)) 153 return EC_POINT_set_to_infinity(group, r); 154 155 if (BN_is_zero(group->order)) { 156 ECerr(EC_F_EC_SCALAR_MUL_LADDER, EC_R_UNKNOWN_ORDER); 157 return 0; 158 } 159 if (BN_is_zero(group->cofactor)) { 160 ECerr(EC_F_EC_SCALAR_MUL_LADDER, EC_R_UNKNOWN_COFACTOR); 161 return 0; 162 } 163 164 BN_CTX_start(ctx); 165 166 if (((p = EC_POINT_new(group)) == NULL) 167 || ((s = EC_POINT_new(group)) == NULL)) { 168 ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_MALLOC_FAILURE); 169 goto err; 170 } 171 172 if (point == NULL) { 173 if (!EC_POINT_copy(p, group->generator)) { 174 ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_EC_LIB); 175 goto err; 176 } 177 } else { 178 if (!EC_POINT_copy(p, point)) { 179 ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_EC_LIB); 180 goto err; 181 } 182 } 183 184 EC_POINT_BN_set_flags(p, BN_FLG_CONSTTIME); 185 EC_POINT_BN_set_flags(r, BN_FLG_CONSTTIME); 186 EC_POINT_BN_set_flags(s, BN_FLG_CONSTTIME); 187 188 cardinality = BN_CTX_get(ctx); 189 lambda = BN_CTX_get(ctx); 190 k = BN_CTX_get(ctx); 191 if (k == NULL) { 192 ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_MALLOC_FAILURE); 193 goto err; 194 } 195 196 if (!BN_mul(cardinality, group->order, group->cofactor, ctx)) { 197 ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB); 198 goto err; 199 } 200 201 /* 202 * Group cardinalities are often on a word boundary. 203 * So when we pad the scalar, some timing diff might 204 * pop if it needs to be expanded due to carries. 205 * So expand ahead of time. 206 */ 207 cardinality_bits = BN_num_bits(cardinality); 208 group_top = bn_get_top(cardinality); 209 if ((bn_wexpand(k, group_top + 2) == NULL) 210 || (bn_wexpand(lambda, group_top + 2) == NULL)) { 211 ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB); 212 goto err; 213 } 214 215 if (!BN_copy(k, scalar)) { 216 ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB); 217 goto err; 218 } 219 220 BN_set_flags(k, BN_FLG_CONSTTIME); 221 222 if ((BN_num_bits(k) > cardinality_bits) || (BN_is_negative(k))) { 223 /*- 224 * this is an unusual input, and we don't guarantee 225 * constant-timeness 226 */ 227 if (!BN_nnmod(k, k, cardinality, ctx)) { 228 ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB); 229 goto err; 230 } 231 } 232 233 if (!BN_add(lambda, k, cardinality)) { 234 ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB); 235 goto err; 236 } 237 BN_set_flags(lambda, BN_FLG_CONSTTIME); 238 if (!BN_add(k, lambda, cardinality)) { 239 ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB); 240 goto err; 241 } 242 /* 243 * lambda := scalar + cardinality 244 * k := scalar + 2*cardinality 245 */ 246 kbit = BN_is_bit_set(lambda, cardinality_bits); 247 BN_consttime_swap(kbit, k, lambda, group_top + 2); 248 249 group_top = bn_get_top(group->field); 250 if ((bn_wexpand(s->X, group_top) == NULL) 251 || (bn_wexpand(s->Y, group_top) == NULL) 252 || (bn_wexpand(s->Z, group_top) == NULL) 253 || (bn_wexpand(r->X, group_top) == NULL) 254 || (bn_wexpand(r->Y, group_top) == NULL) 255 || (bn_wexpand(r->Z, group_top) == NULL) 256 || (bn_wexpand(p->X, group_top) == NULL) 257 || (bn_wexpand(p->Y, group_top) == NULL) 258 || (bn_wexpand(p->Z, group_top) == NULL)) { 259 ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_BN_LIB); 260 goto err; 261 } 262 263 /* ensure input point is in affine coords for ladder step efficiency */ 264 if (!p->Z_is_one && !EC_POINT_make_affine(group, p, ctx)) { 265 ECerr(EC_F_EC_SCALAR_MUL_LADDER, ERR_R_EC_LIB); 266 goto err; 267 } 268 269 /* Initialize the Montgomery ladder */ 270 if (!ec_point_ladder_pre(group, r, s, p, ctx)) { 271 ECerr(EC_F_EC_SCALAR_MUL_LADDER, EC_R_LADDER_PRE_FAILURE); 272 goto err; 273 } 274 275 /* top bit is a 1, in a fixed pos */ 276 pbit = 1; 277 278 #define EC_POINT_CSWAP(c, a, b, w, t) do { \ 279 BN_consttime_swap(c, (a)->X, (b)->X, w); \ 280 BN_consttime_swap(c, (a)->Y, (b)->Y, w); \ 281 BN_consttime_swap(c, (a)->Z, (b)->Z, w); \ 282 t = ((a)->Z_is_one ^ (b)->Z_is_one) & (c); \ 283 (a)->Z_is_one ^= (t); \ 284 (b)->Z_is_one ^= (t); \ 285 } while(0) 286 287 /*- 288 * The ladder step, with branches, is 289 * 290 * k[i] == 0: S = add(R, S), R = dbl(R) 291 * k[i] == 1: R = add(S, R), S = dbl(S) 292 * 293 * Swapping R, S conditionally on k[i] leaves you with state 294 * 295 * k[i] == 0: T, U = R, S 296 * k[i] == 1: T, U = S, R 297 * 298 * Then perform the ECC ops. 299 * 300 * U = add(T, U) 301 * T = dbl(T) 302 * 303 * Which leaves you with state 304 * 305 * k[i] == 0: U = add(R, S), T = dbl(R) 306 * k[i] == 1: U = add(S, R), T = dbl(S) 307 * 308 * Swapping T, U conditionally on k[i] leaves you with state 309 * 310 * k[i] == 0: R, S = T, U 311 * k[i] == 1: R, S = U, T 312 * 313 * Which leaves you with state 314 * 315 * k[i] == 0: S = add(R, S), R = dbl(R) 316 * k[i] == 1: R = add(S, R), S = dbl(S) 317 * 318 * So we get the same logic, but instead of a branch it's a 319 * conditional swap, followed by ECC ops, then another conditional swap. 320 * 321 * Optimization: The end of iteration i and start of i-1 looks like 322 * 323 * ... 324 * CSWAP(k[i], R, S) 325 * ECC 326 * CSWAP(k[i], R, S) 327 * (next iteration) 328 * CSWAP(k[i-1], R, S) 329 * ECC 330 * CSWAP(k[i-1], R, S) 331 * ... 332 * 333 * So instead of two contiguous swaps, you can merge the condition 334 * bits and do a single swap. 335 * 336 * k[i] k[i-1] Outcome 337 * 0 0 No Swap 338 * 0 1 Swap 339 * 1 0 Swap 340 * 1 1 No Swap 341 * 342 * This is XOR. pbit tracks the previous bit of k. 343 */ 344 345 for (i = cardinality_bits - 1; i >= 0; i--) { 346 kbit = BN_is_bit_set(k, i) ^ pbit; 347 EC_POINT_CSWAP(kbit, r, s, group_top, Z_is_one); 348 349 /* Perform a single step of the Montgomery ladder */ 350 if (!ec_point_ladder_step(group, r, s, p, ctx)) { 351 ECerr(EC_F_EC_SCALAR_MUL_LADDER, EC_R_LADDER_STEP_FAILURE); 352 goto err; 353 } 354 /* 355 * pbit logic merges this cswap with that of the 356 * next iteration 357 */ 358 pbit ^= kbit; 359 } 360 /* one final cswap to move the right value into r */ 361 EC_POINT_CSWAP(pbit, r, s, group_top, Z_is_one); 362 #undef EC_POINT_CSWAP 363 364 /* Finalize ladder (and recover full point coordinates) */ 365 if (!ec_point_ladder_post(group, r, s, p, ctx)) { 366 ECerr(EC_F_EC_SCALAR_MUL_LADDER, EC_R_LADDER_POST_FAILURE); 367 goto err; 368 } 369 370 ret = 1; 371 372 err: 373 EC_POINT_free(p); 374 EC_POINT_clear_free(s); 375 BN_CTX_end(ctx); 376 377 return ret; 378 } 379 380 #undef EC_POINT_BN_set_flags 381 382 /* 383 * TODO: table should be optimised for the wNAF-based implementation, 384 * sometimes smaller windows will give better performance (thus the 385 * boundaries should be increased) 386 */ 387 #define EC_window_bits_for_scalar_size(b) \ 388 ((size_t) \ 389 ((b) >= 2000 ? 6 : \ 390 (b) >= 800 ? 5 : \ 391 (b) >= 300 ? 4 : \ 392 (b) >= 70 ? 3 : \ 393 (b) >= 20 ? 2 : \ 394 1)) 395 396 /*- 397 * Compute 398 * \sum scalars[i]*points[i], 399 * also including 400 * scalar*generator 401 * in the addition if scalar != NULL 402 */ 403 int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, 404 size_t num, const EC_POINT *points[], const BIGNUM *scalars[], 405 BN_CTX *ctx) 406 { 407 const EC_POINT *generator = NULL; 408 EC_POINT *tmp = NULL; 409 size_t totalnum; 410 size_t blocksize = 0, numblocks = 0; /* for wNAF splitting */ 411 size_t pre_points_per_block = 0; 412 size_t i, j; 413 int k; 414 int r_is_inverted = 0; 415 int r_is_at_infinity = 1; 416 size_t *wsize = NULL; /* individual window sizes */ 417 signed char **wNAF = NULL; /* individual wNAFs */ 418 size_t *wNAF_len = NULL; 419 size_t max_len = 0; 420 size_t num_val; 421 EC_POINT **val = NULL; /* precomputation */ 422 EC_POINT **v; 423 EC_POINT ***val_sub = NULL; /* pointers to sub-arrays of 'val' or 424 * 'pre_comp->points' */ 425 const EC_PRE_COMP *pre_comp = NULL; 426 int num_scalar = 0; /* flag: will be set to 1 if 'scalar' must be 427 * treated like other scalars, i.e. 428 * precomputation is not available */ 429 int ret = 0; 430 431 if (!BN_is_zero(group->order) && !BN_is_zero(group->cofactor)) { 432 /*- 433 * Handle the common cases where the scalar is secret, enforcing a 434 * scalar multiplication implementation based on a Montgomery ladder, 435 * with various timing attack defenses. 436 */ 437 if ((scalar != group->order) && (scalar != NULL) && (num == 0)) { 438 /*- 439 * In this case we want to compute scalar * GeneratorPoint: this 440 * codepath is reached most prominently by (ephemeral) key 441 * generation of EC cryptosystems (i.e. ECDSA keygen and sign setup, 442 * ECDH keygen/first half), where the scalar is always secret. This 443 * is why we ignore if BN_FLG_CONSTTIME is actually set and we 444 * always call the ladder version. 445 */ 446 return ec_scalar_mul_ladder(group, r, scalar, NULL, ctx); 447 } 448 if ((scalar == NULL) && (num == 1) && (scalars[0] != group->order)) { 449 /*- 450 * In this case we want to compute scalar * VariablePoint: this 451 * codepath is reached most prominently by the second half of ECDH, 452 * where the secret scalar is multiplied by the peer's public point. 453 * To protect the secret scalar, we ignore if BN_FLG_CONSTTIME is 454 * actually set and we always call the ladder version. 455 */ 456 return ec_scalar_mul_ladder(group, r, scalars[0], points[0], ctx); 457 } 458 } 459 460 if (scalar != NULL) { 461 generator = EC_GROUP_get0_generator(group); 462 if (generator == NULL) { 463 ECerr(EC_F_EC_WNAF_MUL, EC_R_UNDEFINED_GENERATOR); 464 goto err; 465 } 466 467 /* look if we can use precomputed multiples of generator */ 468 469 pre_comp = group->pre_comp.ec; 470 if (pre_comp && pre_comp->numblocks 471 && (EC_POINT_cmp(group, generator, pre_comp->points[0], ctx) == 472 0)) { 473 blocksize = pre_comp->blocksize; 474 475 /* 476 * determine maximum number of blocks that wNAF splitting may 477 * yield (NB: maximum wNAF length is bit length plus one) 478 */ 479 numblocks = (BN_num_bits(scalar) / blocksize) + 1; 480 481 /* 482 * we cannot use more blocks than we have precomputation for 483 */ 484 if (numblocks > pre_comp->numblocks) 485 numblocks = pre_comp->numblocks; 486 487 pre_points_per_block = (size_t)1 << (pre_comp->w - 1); 488 489 /* check that pre_comp looks sane */ 490 if (pre_comp->num != (pre_comp->numblocks * pre_points_per_block)) { 491 ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR); 492 goto err; 493 } 494 } else { 495 /* can't use precomputation */ 496 pre_comp = NULL; 497 numblocks = 1; 498 num_scalar = 1; /* treat 'scalar' like 'num'-th element of 499 * 'scalars' */ 500 } 501 } 502 503 totalnum = num + numblocks; 504 505 wsize = OPENSSL_malloc(totalnum * sizeof(wsize[0])); 506 wNAF_len = OPENSSL_malloc(totalnum * sizeof(wNAF_len[0])); 507 /* include space for pivot */ 508 wNAF = OPENSSL_malloc((totalnum + 1) * sizeof(wNAF[0])); 509 val_sub = OPENSSL_malloc(totalnum * sizeof(val_sub[0])); 510 511 /* Ensure wNAF is initialised in case we end up going to err */ 512 if (wNAF != NULL) 513 wNAF[0] = NULL; /* preliminary pivot */ 514 515 if (wsize == NULL || wNAF_len == NULL || wNAF == NULL || val_sub == NULL) { 516 ECerr(EC_F_EC_WNAF_MUL, ERR_R_MALLOC_FAILURE); 517 goto err; 518 } 519 520 /* 521 * num_val will be the total number of temporarily precomputed points 522 */ 523 num_val = 0; 524 525 for (i = 0; i < num + num_scalar; i++) { 526 size_t bits; 527 528 bits = i < num ? BN_num_bits(scalars[i]) : BN_num_bits(scalar); 529 wsize[i] = EC_window_bits_for_scalar_size(bits); 530 num_val += (size_t)1 << (wsize[i] - 1); 531 wNAF[i + 1] = NULL; /* make sure we always have a pivot */ 532 wNAF[i] = 533 bn_compute_wNAF((i < num ? scalars[i] : scalar), wsize[i], 534 &wNAF_len[i]); 535 if (wNAF[i] == NULL) 536 goto err; 537 if (wNAF_len[i] > max_len) 538 max_len = wNAF_len[i]; 539 } 540 541 if (numblocks) { 542 /* we go here iff scalar != NULL */ 543 544 if (pre_comp == NULL) { 545 if (num_scalar != 1) { 546 ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR); 547 goto err; 548 } 549 /* we have already generated a wNAF for 'scalar' */ 550 } else { 551 signed char *tmp_wNAF = NULL; 552 size_t tmp_len = 0; 553 554 if (num_scalar != 0) { 555 ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR); 556 goto err; 557 } 558 559 /* 560 * use the window size for which we have precomputation 561 */ 562 wsize[num] = pre_comp->w; 563 tmp_wNAF = bn_compute_wNAF(scalar, wsize[num], &tmp_len); 564 if (!tmp_wNAF) 565 goto err; 566 567 if (tmp_len <= max_len) { 568 /* 569 * One of the other wNAFs is at least as long as the wNAF 570 * belonging to the generator, so wNAF splitting will not buy 571 * us anything. 572 */ 573 574 numblocks = 1; 575 totalnum = num + 1; /* don't use wNAF splitting */ 576 wNAF[num] = tmp_wNAF; 577 wNAF[num + 1] = NULL; 578 wNAF_len[num] = tmp_len; 579 /* 580 * pre_comp->points starts with the points that we need here: 581 */ 582 val_sub[num] = pre_comp->points; 583 } else { 584 /* 585 * don't include tmp_wNAF directly into wNAF array - use wNAF 586 * splitting and include the blocks 587 */ 588 589 signed char *pp; 590 EC_POINT **tmp_points; 591 592 if (tmp_len < numblocks * blocksize) { 593 /* 594 * possibly we can do with fewer blocks than estimated 595 */ 596 numblocks = (tmp_len + blocksize - 1) / blocksize; 597 if (numblocks > pre_comp->numblocks) { 598 ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR); 599 OPENSSL_free(tmp_wNAF); 600 goto err; 601 } 602 totalnum = num + numblocks; 603 } 604 605 /* split wNAF in 'numblocks' parts */ 606 pp = tmp_wNAF; 607 tmp_points = pre_comp->points; 608 609 for (i = num; i < totalnum; i++) { 610 if (i < totalnum - 1) { 611 wNAF_len[i] = blocksize; 612 if (tmp_len < blocksize) { 613 ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR); 614 OPENSSL_free(tmp_wNAF); 615 goto err; 616 } 617 tmp_len -= blocksize; 618 } else 619 /* 620 * last block gets whatever is left (this could be 621 * more or less than 'blocksize'!) 622 */ 623 wNAF_len[i] = tmp_len; 624 625 wNAF[i + 1] = NULL; 626 wNAF[i] = OPENSSL_malloc(wNAF_len[i]); 627 if (wNAF[i] == NULL) { 628 ECerr(EC_F_EC_WNAF_MUL, ERR_R_MALLOC_FAILURE); 629 OPENSSL_free(tmp_wNAF); 630 goto err; 631 } 632 memcpy(wNAF[i], pp, wNAF_len[i]); 633 if (wNAF_len[i] > max_len) 634 max_len = wNAF_len[i]; 635 636 if (*tmp_points == NULL) { 637 ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR); 638 OPENSSL_free(tmp_wNAF); 639 goto err; 640 } 641 val_sub[i] = tmp_points; 642 tmp_points += pre_points_per_block; 643 pp += blocksize; 644 } 645 OPENSSL_free(tmp_wNAF); 646 } 647 } 648 } 649 650 /* 651 * All points we precompute now go into a single array 'val'. 652 * 'val_sub[i]' is a pointer to the subarray for the i-th point, or to a 653 * subarray of 'pre_comp->points' if we already have precomputation. 654 */ 655 val = OPENSSL_malloc((num_val + 1) * sizeof(val[0])); 656 if (val == NULL) { 657 ECerr(EC_F_EC_WNAF_MUL, ERR_R_MALLOC_FAILURE); 658 goto err; 659 } 660 val[num_val] = NULL; /* pivot element */ 661 662 /* allocate points for precomputation */ 663 v = val; 664 for (i = 0; i < num + num_scalar; i++) { 665 val_sub[i] = v; 666 for (j = 0; j < ((size_t)1 << (wsize[i] - 1)); j++) { 667 *v = EC_POINT_new(group); 668 if (*v == NULL) 669 goto err; 670 v++; 671 } 672 } 673 if (!(v == val + num_val)) { 674 ECerr(EC_F_EC_WNAF_MUL, ERR_R_INTERNAL_ERROR); 675 goto err; 676 } 677 678 if ((tmp = EC_POINT_new(group)) == NULL) 679 goto err; 680 681 /*- 682 * prepare precomputed values: 683 * val_sub[i][0] := points[i] 684 * val_sub[i][1] := 3 * points[i] 685 * val_sub[i][2] := 5 * points[i] 686 * ... 687 */ 688 for (i = 0; i < num + num_scalar; i++) { 689 if (i < num) { 690 if (!EC_POINT_copy(val_sub[i][0], points[i])) 691 goto err; 692 } else { 693 if (!EC_POINT_copy(val_sub[i][0], generator)) 694 goto err; 695 } 696 697 if (wsize[i] > 1) { 698 if (!EC_POINT_dbl(group, tmp, val_sub[i][0], ctx)) 699 goto err; 700 for (j = 1; j < ((size_t)1 << (wsize[i] - 1)); j++) { 701 if (!EC_POINT_add 702 (group, val_sub[i][j], val_sub[i][j - 1], tmp, ctx)) 703 goto err; 704 } 705 } 706 } 707 708 if (!EC_POINTs_make_affine(group, num_val, val, ctx)) 709 goto err; 710 711 r_is_at_infinity = 1; 712 713 for (k = max_len - 1; k >= 0; k--) { 714 if (!r_is_at_infinity) { 715 if (!EC_POINT_dbl(group, r, r, ctx)) 716 goto err; 717 } 718 719 for (i = 0; i < totalnum; i++) { 720 if (wNAF_len[i] > (size_t)k) { 721 int digit = wNAF[i][k]; 722 int is_neg; 723 724 if (digit) { 725 is_neg = digit < 0; 726 727 if (is_neg) 728 digit = -digit; 729 730 if (is_neg != r_is_inverted) { 731 if (!r_is_at_infinity) { 732 if (!EC_POINT_invert(group, r, ctx)) 733 goto err; 734 } 735 r_is_inverted = !r_is_inverted; 736 } 737 738 /* digit > 0 */ 739 740 if (r_is_at_infinity) { 741 if (!EC_POINT_copy(r, val_sub[i][digit >> 1])) 742 goto err; 743 744 /*- 745 * Apply coordinate blinding for EC_POINT. 746 * 747 * The underlying EC_METHOD can optionally implement this function: 748 * ec_point_blind_coordinates() returns 0 in case of errors or 1 on 749 * success or if coordinate blinding is not implemented for this 750 * group. 751 */ 752 if (!ec_point_blind_coordinates(group, r, ctx)) { 753 ECerr(EC_F_EC_WNAF_MUL, EC_R_POINT_COORDINATES_BLIND_FAILURE); 754 goto err; 755 } 756 757 r_is_at_infinity = 0; 758 } else { 759 if (!EC_POINT_add 760 (group, r, r, val_sub[i][digit >> 1], ctx)) 761 goto err; 762 } 763 } 764 } 765 } 766 } 767 768 if (r_is_at_infinity) { 769 if (!EC_POINT_set_to_infinity(group, r)) 770 goto err; 771 } else { 772 if (r_is_inverted) 773 if (!EC_POINT_invert(group, r, ctx)) 774 goto err; 775 } 776 777 ret = 1; 778 779 err: 780 EC_POINT_free(tmp); 781 OPENSSL_free(wsize); 782 OPENSSL_free(wNAF_len); 783 if (wNAF != NULL) { 784 signed char **w; 785 786 for (w = wNAF; *w != NULL; w++) 787 OPENSSL_free(*w); 788 789 OPENSSL_free(wNAF); 790 } 791 if (val != NULL) { 792 for (v = val; *v != NULL; v++) 793 EC_POINT_clear_free(*v); 794 795 OPENSSL_free(val); 796 } 797 OPENSSL_free(val_sub); 798 return ret; 799 } 800 801 /*- 802 * ec_wNAF_precompute_mult() 803 * creates an EC_PRE_COMP object with preprecomputed multiples of the generator 804 * for use with wNAF splitting as implemented in ec_wNAF_mul(). 805 * 806 * 'pre_comp->points' is an array of multiples of the generator 807 * of the following form: 808 * points[0] = generator; 809 * points[1] = 3 * generator; 810 * ... 811 * points[2^(w-1)-1] = (2^(w-1)-1) * generator; 812 * points[2^(w-1)] = 2^blocksize * generator; 813 * points[2^(w-1)+1] = 3 * 2^blocksize * generator; 814 * ... 815 * points[2^(w-1)*(numblocks-1)-1] = (2^(w-1)) * 2^(blocksize*(numblocks-2)) * generator 816 * points[2^(w-1)*(numblocks-1)] = 2^(blocksize*(numblocks-1)) * generator 817 * ... 818 * points[2^(w-1)*numblocks-1] = (2^(w-1)) * 2^(blocksize*(numblocks-1)) * generator 819 * points[2^(w-1)*numblocks] = NULL 820 */ 821 int ec_wNAF_precompute_mult(EC_GROUP *group, BN_CTX *ctx) 822 { 823 const EC_POINT *generator; 824 EC_POINT *tmp_point = NULL, *base = NULL, **var; 825 BN_CTX *new_ctx = NULL; 826 const BIGNUM *order; 827 size_t i, bits, w, pre_points_per_block, blocksize, numblocks, num; 828 EC_POINT **points = NULL; 829 EC_PRE_COMP *pre_comp; 830 int ret = 0; 831 832 /* if there is an old EC_PRE_COMP object, throw it away */ 833 EC_pre_comp_free(group); 834 if ((pre_comp = ec_pre_comp_new(group)) == NULL) 835 return 0; 836 837 generator = EC_GROUP_get0_generator(group); 838 if (generator == NULL) { 839 ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, EC_R_UNDEFINED_GENERATOR); 840 goto err; 841 } 842 843 if (ctx == NULL) { 844 ctx = new_ctx = BN_CTX_new(); 845 if (ctx == NULL) 846 goto err; 847 } 848 849 BN_CTX_start(ctx); 850 851 order = EC_GROUP_get0_order(group); 852 if (order == NULL) 853 goto err; 854 if (BN_is_zero(order)) { 855 ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, EC_R_UNKNOWN_ORDER); 856 goto err; 857 } 858 859 bits = BN_num_bits(order); 860 /* 861 * The following parameters mean we precompute (approximately) one point 862 * per bit. TBD: The combination 8, 4 is perfect for 160 bits; for other 863 * bit lengths, other parameter combinations might provide better 864 * efficiency. 865 */ 866 blocksize = 8; 867 w = 4; 868 if (EC_window_bits_for_scalar_size(bits) > w) { 869 /* let's not make the window too small ... */ 870 w = EC_window_bits_for_scalar_size(bits); 871 } 872 873 numblocks = (bits + blocksize - 1) / blocksize; /* max. number of blocks 874 * to use for wNAF 875 * splitting */ 876 877 pre_points_per_block = (size_t)1 << (w - 1); 878 num = pre_points_per_block * numblocks; /* number of points to compute 879 * and store */ 880 881 points = OPENSSL_malloc(sizeof(*points) * (num + 1)); 882 if (points == NULL) { 883 ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, ERR_R_MALLOC_FAILURE); 884 goto err; 885 } 886 887 var = points; 888 var[num] = NULL; /* pivot */ 889 for (i = 0; i < num; i++) { 890 if ((var[i] = EC_POINT_new(group)) == NULL) { 891 ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, ERR_R_MALLOC_FAILURE); 892 goto err; 893 } 894 } 895 896 if ((tmp_point = EC_POINT_new(group)) == NULL 897 || (base = EC_POINT_new(group)) == NULL) { 898 ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, ERR_R_MALLOC_FAILURE); 899 goto err; 900 } 901 902 if (!EC_POINT_copy(base, generator)) 903 goto err; 904 905 /* do the precomputation */ 906 for (i = 0; i < numblocks; i++) { 907 size_t j; 908 909 if (!EC_POINT_dbl(group, tmp_point, base, ctx)) 910 goto err; 911 912 if (!EC_POINT_copy(*var++, base)) 913 goto err; 914 915 for (j = 1; j < pre_points_per_block; j++, var++) { 916 /* 917 * calculate odd multiples of the current base point 918 */ 919 if (!EC_POINT_add(group, *var, tmp_point, *(var - 1), ctx)) 920 goto err; 921 } 922 923 if (i < numblocks - 1) { 924 /* 925 * get the next base (multiply current one by 2^blocksize) 926 */ 927 size_t k; 928 929 if (blocksize <= 2) { 930 ECerr(EC_F_EC_WNAF_PRECOMPUTE_MULT, ERR_R_INTERNAL_ERROR); 931 goto err; 932 } 933 934 if (!EC_POINT_dbl(group, base, tmp_point, ctx)) 935 goto err; 936 for (k = 2; k < blocksize; k++) { 937 if (!EC_POINT_dbl(group, base, base, ctx)) 938 goto err; 939 } 940 } 941 } 942 943 if (!EC_POINTs_make_affine(group, num, points, ctx)) 944 goto err; 945 946 pre_comp->group = group; 947 pre_comp->blocksize = blocksize; 948 pre_comp->numblocks = numblocks; 949 pre_comp->w = w; 950 pre_comp->points = points; 951 points = NULL; 952 pre_comp->num = num; 953 SETPRECOMP(group, ec, pre_comp); 954 pre_comp = NULL; 955 ret = 1; 956 957 err: 958 BN_CTX_end(ctx); 959 BN_CTX_free(new_ctx); 960 EC_ec_pre_comp_free(pre_comp); 961 if (points) { 962 EC_POINT **p; 963 964 for (p = points; *p != NULL; p++) 965 EC_POINT_free(*p); 966 OPENSSL_free(points); 967 } 968 EC_POINT_free(tmp_point); 969 EC_POINT_free(base); 970 return ret; 971 } 972 973 int ec_wNAF_have_precompute_mult(const EC_GROUP *group) 974 { 975 return HAVEPRECOMP(group, ec); 976 } 977