1 /* 2 * Copyright 1995-2018 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the OpenSSL license (the "License"). You may not use 5 * this file except in compliance with the License. You can obtain a copy 6 * in the file LICENSE in the source distribution or at 7 * https://www.openssl.org/source/license.html 8 */ 9 10 /* 11 * Details about Montgomery multiplication algorithms can be found at 12 * http://security.ece.orst.edu/publications.html, e.g. 13 * http://security.ece.orst.edu/koc/papers/j37acmon.pdf and 14 * sections 3.8 and 4.2 in http://security.ece.orst.edu/koc/papers/r01rsasw.pdf 15 */ 16 17 #include "internal/cryptlib.h" 18 #include "bn_lcl.h" 19 20 #define MONT_WORD /* use the faster word-based algorithm */ 21 22 #ifdef MONT_WORD 23 static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont); 24 #endif 25 26 int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, 27 BN_MONT_CTX *mont, BN_CTX *ctx) 28 { 29 int ret = bn_mul_mont_fixed_top(r, a, b, mont, ctx); 30 31 bn_correct_top(r); 32 bn_check_top(r); 33 34 return ret; 35 } 36 37 int bn_mul_mont_fixed_top(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, 38 BN_MONT_CTX *mont, BN_CTX *ctx) 39 { 40 BIGNUM *tmp; 41 int ret = 0; 42 int num = mont->N.top; 43 44 #if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD) 45 if (num > 1 && a->top == num && b->top == num) { 46 if (bn_wexpand(r, num) == NULL) 47 return 0; 48 if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) { 49 r->neg = a->neg ^ b->neg; 50 r->top = num; 51 r->flags |= BN_FLG_FIXED_TOP; 52 return 1; 53 } 54 } 55 #endif 56 57 if ((a->top + b->top) > 2 * num) 58 return 0; 59 60 BN_CTX_start(ctx); 61 tmp = BN_CTX_get(ctx); 62 if (tmp == NULL) 63 goto err; 64 65 bn_check_top(tmp); 66 if (a == b) { 67 if (!bn_sqr_fixed_top(tmp, a, ctx)) 68 goto err; 69 } else { 70 if (!bn_mul_fixed_top(tmp, a, b, ctx)) 71 goto err; 72 } 73 /* reduce from aRR to aR */ 74 #ifdef MONT_WORD 75 if (!bn_from_montgomery_word(r, tmp, mont)) 76 goto err; 77 #else 78 if (!BN_from_montgomery(r, tmp, mont, ctx)) 79 goto err; 80 #endif 81 ret = 1; 82 err: 83 BN_CTX_end(ctx); 84 return ret; 85 } 86 87 #ifdef MONT_WORD 88 static int bn_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) 89 { 90 BIGNUM *n; 91 BN_ULONG *ap, *np, *rp, n0, v, carry; 92 int nl, max, i; 93 unsigned int rtop; 94 95 n = &(mont->N); 96 nl = n->top; 97 if (nl == 0) { 98 ret->top = 0; 99 return 1; 100 } 101 102 max = (2 * nl); /* carry is stored separately */ 103 if (bn_wexpand(r, max) == NULL) 104 return 0; 105 106 r->neg ^= n->neg; 107 np = n->d; 108 rp = r->d; 109 110 /* clear the top words of T */ 111 for (rtop = r->top, i = 0; i < max; i++) { 112 v = (BN_ULONG)0 - ((i - rtop) >> (8 * sizeof(rtop) - 1)); 113 rp[i] &= v; 114 } 115 116 r->top = max; 117 r->flags |= BN_FLG_FIXED_TOP; 118 n0 = mont->n0[0]; 119 120 /* 121 * Add multiples of |n| to |r| until R = 2^(nl * BN_BITS2) divides it. On 122 * input, we had |r| < |n| * R, so now |r| < 2 * |n| * R. Note that |r| 123 * includes |carry| which is stored separately. 124 */ 125 for (carry = 0, i = 0; i < nl; i++, rp++) { 126 v = bn_mul_add_words(rp, np, nl, (rp[0] * n0) & BN_MASK2); 127 v = (v + carry + rp[nl]) & BN_MASK2; 128 carry |= (v != rp[nl]); 129 carry &= (v <= rp[nl]); 130 rp[nl] = v; 131 } 132 133 if (bn_wexpand(ret, nl) == NULL) 134 return 0; 135 ret->top = nl; 136 ret->flags |= BN_FLG_FIXED_TOP; 137 ret->neg = r->neg; 138 139 rp = ret->d; 140 141 /* 142 * Shift |nl| words to divide by R. We have |ap| < 2 * |n|. Note that |ap| 143 * includes |carry| which is stored separately. 144 */ 145 ap = &(r->d[nl]); 146 147 carry -= bn_sub_words(rp, ap, np, nl); 148 /* 149 * |carry| is -1 if |ap| - |np| underflowed or zero if it did not. Note 150 * |carry| cannot be 1. That would imply the subtraction did not fit in 151 * |nl| words, and we know at most one subtraction is needed. 152 */ 153 for (i = 0; i < nl; i++) { 154 rp[i] = (carry & ap[i]) | (~carry & rp[i]); 155 ap[i] = 0; 156 } 157 158 return 1; 159 } 160 #endif /* MONT_WORD */ 161 162 int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, 163 BN_CTX *ctx) 164 { 165 int retn; 166 167 retn = bn_from_mont_fixed_top(ret, a, mont, ctx); 168 bn_correct_top(ret); 169 bn_check_top(ret); 170 171 return retn; 172 } 173 174 int bn_from_mont_fixed_top(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont, 175 BN_CTX *ctx) 176 { 177 int retn = 0; 178 #ifdef MONT_WORD 179 BIGNUM *t; 180 181 BN_CTX_start(ctx); 182 if ((t = BN_CTX_get(ctx)) && BN_copy(t, a)) { 183 retn = bn_from_montgomery_word(ret, t, mont); 184 } 185 BN_CTX_end(ctx); 186 #else /* !MONT_WORD */ 187 BIGNUM *t1, *t2; 188 189 BN_CTX_start(ctx); 190 t1 = BN_CTX_get(ctx); 191 t2 = BN_CTX_get(ctx); 192 if (t2 == NULL) 193 goto err; 194 195 if (!BN_copy(t1, a)) 196 goto err; 197 BN_mask_bits(t1, mont->ri); 198 199 if (!BN_mul(t2, t1, &mont->Ni, ctx)) 200 goto err; 201 BN_mask_bits(t2, mont->ri); 202 203 if (!BN_mul(t1, t2, &mont->N, ctx)) 204 goto err; 205 if (!BN_add(t2, a, t1)) 206 goto err; 207 if (!BN_rshift(ret, t2, mont->ri)) 208 goto err; 209 210 if (BN_ucmp(ret, &(mont->N)) >= 0) { 211 if (!BN_usub(ret, ret, &(mont->N))) 212 goto err; 213 } 214 retn = 1; 215 bn_check_top(ret); 216 err: 217 BN_CTX_end(ctx); 218 #endif /* MONT_WORD */ 219 return retn; 220 } 221 222 int bn_to_mont_fixed_top(BIGNUM *r, const BIGNUM *a, BN_MONT_CTX *mont, 223 BN_CTX *ctx) 224 { 225 return bn_mul_mont_fixed_top(r, a, &(mont->RR), mont, ctx); 226 } 227 228 BN_MONT_CTX *BN_MONT_CTX_new(void) 229 { 230 BN_MONT_CTX *ret; 231 232 if ((ret = OPENSSL_malloc(sizeof(*ret))) == NULL) { 233 BNerr(BN_F_BN_MONT_CTX_NEW, ERR_R_MALLOC_FAILURE); 234 return NULL; 235 } 236 237 BN_MONT_CTX_init(ret); 238 ret->flags = BN_FLG_MALLOCED; 239 return ret; 240 } 241 242 void BN_MONT_CTX_init(BN_MONT_CTX *ctx) 243 { 244 ctx->ri = 0; 245 bn_init(&ctx->RR); 246 bn_init(&ctx->N); 247 bn_init(&ctx->Ni); 248 ctx->n0[0] = ctx->n0[1] = 0; 249 ctx->flags = 0; 250 } 251 252 void BN_MONT_CTX_free(BN_MONT_CTX *mont) 253 { 254 if (mont == NULL) 255 return; 256 BN_clear_free(&mont->RR); 257 BN_clear_free(&mont->N); 258 BN_clear_free(&mont->Ni); 259 if (mont->flags & BN_FLG_MALLOCED) 260 OPENSSL_free(mont); 261 } 262 263 int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) 264 { 265 int i, ret = 0; 266 BIGNUM *Ri, *R; 267 268 if (BN_is_zero(mod)) 269 return 0; 270 271 BN_CTX_start(ctx); 272 if ((Ri = BN_CTX_get(ctx)) == NULL) 273 goto err; 274 R = &(mont->RR); /* grab RR as a temp */ 275 if (!BN_copy(&(mont->N), mod)) 276 goto err; /* Set N */ 277 if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0) 278 BN_set_flags(&(mont->N), BN_FLG_CONSTTIME); 279 mont->N.neg = 0; 280 281 #ifdef MONT_WORD 282 { 283 BIGNUM tmod; 284 BN_ULONG buf[2]; 285 286 bn_init(&tmod); 287 tmod.d = buf; 288 tmod.dmax = 2; 289 tmod.neg = 0; 290 291 if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0) 292 BN_set_flags(&tmod, BN_FLG_CONSTTIME); 293 294 mont->ri = (BN_num_bits(mod) + (BN_BITS2 - 1)) / BN_BITS2 * BN_BITS2; 295 296 # if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32) 297 /* 298 * Only certain BN_BITS2<=32 platforms actually make use of n0[1], 299 * and we could use the #else case (with a shorter R value) for the 300 * others. However, currently only the assembler files do know which 301 * is which. 302 */ 303 304 BN_zero(R); 305 if (!(BN_set_bit(R, 2 * BN_BITS2))) 306 goto err; 307 308 tmod.top = 0; 309 if ((buf[0] = mod->d[0])) 310 tmod.top = 1; 311 if ((buf[1] = mod->top > 1 ? mod->d[1] : 0)) 312 tmod.top = 2; 313 314 if (BN_is_one(&tmod)) 315 BN_zero(Ri); 316 else if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL) 317 goto err; 318 if (!BN_lshift(Ri, Ri, 2 * BN_BITS2)) 319 goto err; /* R*Ri */ 320 if (!BN_is_zero(Ri)) { 321 if (!BN_sub_word(Ri, 1)) 322 goto err; 323 } else { /* if N mod word size == 1 */ 324 325 if (bn_expand(Ri, (int)sizeof(BN_ULONG) * 2) == NULL) 326 goto err; 327 /* Ri-- (mod double word size) */ 328 Ri->neg = 0; 329 Ri->d[0] = BN_MASK2; 330 Ri->d[1] = BN_MASK2; 331 Ri->top = 2; 332 } 333 if (!BN_div(Ri, NULL, Ri, &tmod, ctx)) 334 goto err; 335 /* 336 * Ni = (R*Ri-1)/N, keep only couple of least significant words: 337 */ 338 mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0; 339 mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0; 340 # else 341 BN_zero(R); 342 if (!(BN_set_bit(R, BN_BITS2))) 343 goto err; /* R */ 344 345 buf[0] = mod->d[0]; /* tmod = N mod word size */ 346 buf[1] = 0; 347 tmod.top = buf[0] != 0 ? 1 : 0; 348 /* Ri = R^-1 mod N */ 349 if (BN_is_one(&tmod)) 350 BN_zero(Ri); 351 else if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL) 352 goto err; 353 if (!BN_lshift(Ri, Ri, BN_BITS2)) 354 goto err; /* R*Ri */ 355 if (!BN_is_zero(Ri)) { 356 if (!BN_sub_word(Ri, 1)) 357 goto err; 358 } else { /* if N mod word size == 1 */ 359 360 if (!BN_set_word(Ri, BN_MASK2)) 361 goto err; /* Ri-- (mod word size) */ 362 } 363 if (!BN_div(Ri, NULL, Ri, &tmod, ctx)) 364 goto err; 365 /* 366 * Ni = (R*Ri-1)/N, keep only least significant word: 367 */ 368 mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0; 369 mont->n0[1] = 0; 370 # endif 371 } 372 #else /* !MONT_WORD */ 373 { /* bignum version */ 374 mont->ri = BN_num_bits(&mont->N); 375 BN_zero(R); 376 if (!BN_set_bit(R, mont->ri)) 377 goto err; /* R = 2^ri */ 378 /* Ri = R^-1 mod N */ 379 if ((BN_mod_inverse(Ri, R, &mont->N, ctx)) == NULL) 380 goto err; 381 if (!BN_lshift(Ri, Ri, mont->ri)) 382 goto err; /* R*Ri */ 383 if (!BN_sub_word(Ri, 1)) 384 goto err; 385 /* 386 * Ni = (R*Ri-1) / N 387 */ 388 if (!BN_div(&(mont->Ni), NULL, Ri, &mont->N, ctx)) 389 goto err; 390 } 391 #endif 392 393 /* setup RR for conversions */ 394 BN_zero(&(mont->RR)); 395 if (!BN_set_bit(&(mont->RR), mont->ri * 2)) 396 goto err; 397 if (!BN_mod(&(mont->RR), &(mont->RR), &(mont->N), ctx)) 398 goto err; 399 400 for (i = mont->RR.top, ret = mont->N.top; i < ret; i++) 401 mont->RR.d[i] = 0; 402 mont->RR.top = ret; 403 mont->RR.flags |= BN_FLG_FIXED_TOP; 404 405 ret = 1; 406 err: 407 BN_CTX_end(ctx); 408 return ret; 409 } 410 411 BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from) 412 { 413 if (to == from) 414 return to; 415 416 if (!BN_copy(&(to->RR), &(from->RR))) 417 return NULL; 418 if (!BN_copy(&(to->N), &(from->N))) 419 return NULL; 420 if (!BN_copy(&(to->Ni), &(from->Ni))) 421 return NULL; 422 to->ri = from->ri; 423 to->n0[0] = from->n0[0]; 424 to->n0[1] = from->n0[1]; 425 return to; 426 } 427 428 BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, CRYPTO_RWLOCK *lock, 429 const BIGNUM *mod, BN_CTX *ctx) 430 { 431 BN_MONT_CTX *ret; 432 433 CRYPTO_THREAD_read_lock(lock); 434 ret = *pmont; 435 CRYPTO_THREAD_unlock(lock); 436 if (ret) 437 return ret; 438 439 /* 440 * We don't want to serialise globally while doing our lazy-init math in 441 * BN_MONT_CTX_set. That punishes threads that are doing independent 442 * things. Instead, punish the case where more than one thread tries to 443 * lazy-init the same 'pmont', by having each do the lazy-init math work 444 * independently and only use the one from the thread that wins the race 445 * (the losers throw away the work they've done). 446 */ 447 ret = BN_MONT_CTX_new(); 448 if (ret == NULL) 449 return NULL; 450 if (!BN_MONT_CTX_set(ret, mod, ctx)) { 451 BN_MONT_CTX_free(ret); 452 return NULL; 453 } 454 455 /* The locked compare-and-set, after the local work is done. */ 456 CRYPTO_THREAD_write_lock(lock); 457 if (*pmont) { 458 BN_MONT_CTX_free(ret); 459 ret = *pmont; 460 } else 461 *pmont = ret; 462 CRYPTO_THREAD_unlock(lock); 463 return ret; 464 } 465