1 /* 2 * Copyright 1995-2023 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the OpenSSL license (the "License"). You may not use 5 * this file except in compliance with the License. You can obtain a copy 6 * in the file LICENSE in the source distribution or at 7 * https://www.openssl.org/source/license.html 8 */ 9 10 #ifndef OSSL_CRYPTO_BN_LOCAL_H 11 # define OSSL_CRYPTO_BN_LOCAL_H 12 13 /* 14 * The EDK2 build doesn't use bn_conf.h; it sets THIRTY_TWO_BIT or 15 * SIXTY_FOUR_BIT in its own environment since it doesn't re-run our 16 * Configure script and needs to support both 32-bit and 64-bit. 17 */ 18 # include <openssl/opensslconf.h> 19 20 # if !defined(OPENSSL_SYS_UEFI) 21 # include "crypto/bn_conf.h" 22 # endif 23 24 # include "crypto/bn.h" 25 26 /* 27 * These preprocessor symbols control various aspects of the bignum headers 28 * and library code. They're not defined by any "normal" configuration, as 29 * they are intended for development and testing purposes. NB: defining all 30 * three can be useful for debugging application code as well as openssl 31 * itself. BN_DEBUG - turn on various debugging alterations to the bignum 32 * code BN_DEBUG_RAND - uses random poisoning of unused words to trip up 33 * mismanagement of bignum internals. You must also define BN_DEBUG. 34 */ 35 /* #define BN_DEBUG */ 36 /* #define BN_DEBUG_RAND */ 37 38 /* 39 * This should limit the stack usage due to alloca to about 4K. 40 * BN_SOFT_LIMIT is a soft limit equivalent to 2*OPENSSL_RSA_MAX_MODULUS_BITS. 41 * Beyond that size bn_mul_mont is no longer used, and the constant time 42 * assembler code is disabled, due to the blatant alloca and bn_mul_mont usage. 43 * Note that bn_mul_mont does an alloca that is hidden away in assembly. 44 * It is not recommended to do computations with numbers exceeding this limit, 45 * since the result will be highly version dependent: 46 * While the current OpenSSL version will use non-optimized, but safe code, 47 * previous versions will use optimized code, that may crash due to unexpected 48 * stack overflow, and future versions may very well turn this into a hard 49 * limit. 50 * Note however, that it is possible to override the size limit using 51 * "./config -DBN_SOFT_LIMIT=<limit>" if necessary, and the O/S specific 52 * stack limit is known and taken into consideration. 53 */ 54 # ifndef BN_SOFT_LIMIT 55 # define BN_SOFT_LIMIT (4096 / BN_BYTES) 56 # endif 57 58 # ifndef OPENSSL_SMALL_FOOTPRINT 59 # define BN_MUL_COMBA 60 # define BN_SQR_COMBA 61 # define BN_RECURSION 62 # endif 63 64 /* 65 * This next option uses the C libraries (2 word)/(1 word) function. If it is 66 * not defined, I use my C version (which is slower). The reason for this 67 * flag is that when the particular C compiler library routine is used, and 68 * the library is linked with a different compiler, the library is missing. 69 * This mostly happens when the library is built with gcc and then linked 70 * using normal cc. This would be a common occurrence because gcc normally 71 * produces code that is 2 times faster than system compilers for the big 72 * number stuff. For machines with only one compiler (or shared libraries), 73 * this should be on. Again this in only really a problem on machines using 74 * "long long's", are 32bit, and are not using my assembler code. 75 */ 76 # if defined(OPENSSL_SYS_MSDOS) || defined(OPENSSL_SYS_WINDOWS) || \ 77 defined(OPENSSL_SYS_WIN32) || defined(linux) 78 # define BN_DIV2W 79 # endif 80 81 /* 82 * 64-bit processor with LP64 ABI 83 */ 84 # ifdef SIXTY_FOUR_BIT_LONG 85 # define BN_ULLONG unsigned long long 86 # define BN_BITS4 32 87 # define BN_MASK2 (0xffffffffffffffffL) 88 # define BN_MASK2l (0xffffffffL) 89 # define BN_MASK2h (0xffffffff00000000L) 90 # define BN_MASK2h1 (0xffffffff80000000L) 91 # define BN_DEC_CONV (10000000000000000000UL) 92 # define BN_DEC_NUM 19 93 # define BN_DEC_FMT1 "%lu" 94 # define BN_DEC_FMT2 "%019lu" 95 # endif 96 97 /* 98 * 64-bit processor other than LP64 ABI 99 */ 100 # ifdef SIXTY_FOUR_BIT 101 # undef BN_LLONG 102 # undef BN_ULLONG 103 # define BN_BITS4 32 104 # define BN_MASK2 (0xffffffffffffffffLL) 105 # define BN_MASK2l (0xffffffffL) 106 # define BN_MASK2h (0xffffffff00000000LL) 107 # define BN_MASK2h1 (0xffffffff80000000LL) 108 # define BN_DEC_CONV (10000000000000000000ULL) 109 # define BN_DEC_NUM 19 110 # define BN_DEC_FMT1 "%llu" 111 # define BN_DEC_FMT2 "%019llu" 112 # endif 113 114 # ifdef THIRTY_TWO_BIT 115 # ifdef BN_LLONG 116 # if defined(_WIN32) && !defined(__GNUC__) 117 # define BN_ULLONG unsigned __int64 118 # else 119 # define BN_ULLONG unsigned long long 120 # endif 121 # endif 122 # define BN_BITS4 16 123 # define BN_MASK2 (0xffffffffL) 124 # define BN_MASK2l (0xffff) 125 # define BN_MASK2h1 (0xffff8000L) 126 # define BN_MASK2h (0xffff0000L) 127 # define BN_DEC_CONV (1000000000L) 128 # define BN_DEC_NUM 9 129 # define BN_DEC_FMT1 "%u" 130 # define BN_DEC_FMT2 "%09u" 131 # endif 132 133 134 /*- 135 * Bignum consistency macros 136 * There is one "API" macro, bn_fix_top(), for stripping leading zeroes from 137 * bignum data after direct manipulations on the data. There is also an 138 * "internal" macro, bn_check_top(), for verifying that there are no leading 139 * zeroes. Unfortunately, some auditing is required due to the fact that 140 * bn_fix_top() has become an overabused duct-tape because bignum data is 141 * occasionally passed around in an inconsistent state. So the following 142 * changes have been made to sort this out; 143 * - bn_fix_top()s implementation has been moved to bn_correct_top() 144 * - if BN_DEBUG isn't defined, bn_fix_top() maps to bn_correct_top(), and 145 * bn_check_top() is as before. 146 * - if BN_DEBUG *is* defined; 147 * - bn_check_top() tries to pollute unused words even if the bignum 'top' is 148 * consistent. (ed: only if BN_DEBUG_RAND is defined) 149 * - bn_fix_top() maps to bn_check_top() rather than "fixing" anything. 150 * The idea is to have debug builds flag up inconsistent bignums when they 151 * occur. If that occurs in a bn_fix_top(), we examine the code in question; if 152 * the use of bn_fix_top() was appropriate (ie. it follows directly after code 153 * that manipulates the bignum) it is converted to bn_correct_top(), and if it 154 * was not appropriate, we convert it permanently to bn_check_top() and track 155 * down the cause of the bug. Eventually, no internal code should be using the 156 * bn_fix_top() macro. External applications and libraries should try this with 157 * their own code too, both in terms of building against the openssl headers 158 * with BN_DEBUG defined *and* linking with a version of OpenSSL built with it 159 * defined. This not only improves external code, it provides more test 160 * coverage for openssl's own code. 161 */ 162 163 # ifdef BN_DEBUG 164 /* 165 * The new BN_FLG_FIXED_TOP flag marks vectors that were not treated with 166 * bn_correct_top, in other words such vectors are permitted to have zeros 167 * in most significant limbs. Such vectors are used internally to achieve 168 * execution time invariance for critical operations with private keys. 169 * It's BN_DEBUG-only flag, because user application is not supposed to 170 * observe it anyway. Moreover, optimizing compiler would actually remove 171 * all operations manipulating the bit in question in non-BN_DEBUG build. 172 */ 173 # define BN_FLG_FIXED_TOP 0x10000 174 # ifdef BN_DEBUG_RAND 175 # define bn_pollute(a) \ 176 do { \ 177 const BIGNUM *_bnum1 = (a); \ 178 if (_bnum1->top < _bnum1->dmax) { \ 179 unsigned char _tmp_char; \ 180 /* We cast away const without the compiler knowing, any \ 181 * *genuinely* constant variables that aren't mutable \ 182 * wouldn't be constructed with top!=dmax. */ \ 183 BN_ULONG *_not_const; \ 184 memcpy(&_not_const, &_bnum1->d, sizeof(_not_const)); \ 185 RAND_bytes(&_tmp_char, 1); /* Debug only - safe to ignore error return */\ 186 memset(_not_const + _bnum1->top, _tmp_char, \ 187 sizeof(*_not_const) * (_bnum1->dmax - _bnum1->top)); \ 188 } \ 189 } while(0) 190 # else 191 # define bn_pollute(a) 192 # endif 193 # define bn_check_top(a) \ 194 do { \ 195 const BIGNUM *_bnum2 = (a); \ 196 if (_bnum2 != NULL) { \ 197 int _top = _bnum2->top; \ 198 (void)ossl_assert((_top == 0 && !_bnum2->neg) || \ 199 (_top && ((_bnum2->flags & BN_FLG_FIXED_TOP) \ 200 || _bnum2->d[_top - 1] != 0))); \ 201 bn_pollute(_bnum2); \ 202 } \ 203 } while(0) 204 205 # define bn_fix_top(a) bn_check_top(a) 206 207 # define bn_check_size(bn, bits) bn_wcheck_size(bn, ((bits+BN_BITS2-1))/BN_BITS2) 208 # define bn_wcheck_size(bn, words) \ 209 do { \ 210 const BIGNUM *_bnum2 = (bn); \ 211 assert((words) <= (_bnum2)->dmax && \ 212 (words) >= (_bnum2)->top); \ 213 /* avoid unused variable warning with NDEBUG */ \ 214 (void)(_bnum2); \ 215 } while(0) 216 217 # else /* !BN_DEBUG */ 218 219 # define BN_FLG_FIXED_TOP 0 220 # define bn_pollute(a) 221 # define bn_check_top(a) 222 # define bn_fix_top(a) bn_correct_top(a) 223 # define bn_check_size(bn, bits) 224 # define bn_wcheck_size(bn, words) 225 226 # endif 227 228 BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, 229 BN_ULONG w); 230 BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); 231 void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num); 232 BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d); 233 BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, 234 int num); 235 BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, 236 int num); 237 238 struct bignum_st { 239 BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit 240 * chunks. */ 241 int top; /* Index of last used d +1. */ 242 /* The next are internal book keeping for bn_expand. */ 243 int dmax; /* Size of the d array. */ 244 int neg; /* one if the number is negative */ 245 int flags; 246 }; 247 248 /* Used for montgomery multiplication */ 249 struct bn_mont_ctx_st { 250 int ri; /* number of bits in R */ 251 BIGNUM RR; /* used to convert to montgomery form, 252 possibly zero-padded */ 253 BIGNUM N; /* The modulus */ 254 BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1 (Ni is only 255 * stored for bignum algorithm) */ 256 BN_ULONG n0[2]; /* least significant word(s) of Ni; (type 257 * changed with 0.9.9, was "BN_ULONG n0;" 258 * before) */ 259 int flags; 260 }; 261 262 /* 263 * Used for reciprocal division/mod functions It cannot be shared between 264 * threads 265 */ 266 struct bn_recp_ctx_st { 267 BIGNUM N; /* the divisor */ 268 BIGNUM Nr; /* the reciprocal */ 269 int num_bits; 270 int shift; 271 int flags; 272 }; 273 274 /* Used for slow "generation" functions. */ 275 struct bn_gencb_st { 276 unsigned int ver; /* To handle binary (in)compatibility */ 277 void *arg; /* callback-specific data */ 278 union { 279 /* if (ver==1) - handles old style callbacks */ 280 void (*cb_1) (int, int, void *); 281 /* if (ver==2) - new callback style */ 282 int (*cb_2) (int, int, BN_GENCB *); 283 } cb; 284 }; 285 286 struct bn_blinding_st { 287 BIGNUM *A; 288 BIGNUM *Ai; 289 BIGNUM *e; 290 BIGNUM *mod; /* just a reference */ 291 CRYPTO_THREAD_ID tid; 292 int counter; 293 unsigned long flags; 294 BN_MONT_CTX *m_ctx; 295 int (*bn_mod_exp) (BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 296 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx); 297 CRYPTO_RWLOCK *lock; 298 }; 299 300 /*- 301 * BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions 302 * 303 * 304 * For window size 'w' (w >= 2) and a random 'b' bits exponent, 305 * the number of multiplications is a constant plus on average 306 * 307 * 2^(w-1) + (b-w)/(w+1); 308 * 309 * here 2^(w-1) is for precomputing the table (we actually need 310 * entries only for windows that have the lowest bit set), and 311 * (b-w)/(w+1) is an approximation for the expected number of 312 * w-bit windows, not counting the first one. 313 * 314 * Thus we should use 315 * 316 * w >= 6 if b > 671 317 * w = 5 if 671 > b > 239 318 * w = 4 if 239 > b > 79 319 * w = 3 if 79 > b > 23 320 * w <= 2 if 23 > b 321 * 322 * (with draws in between). Very small exponents are often selected 323 * with low Hamming weight, so we use w = 1 for b <= 23. 324 */ 325 # define BN_window_bits_for_exponent_size(b) \ 326 ((b) > 671 ? 6 : \ 327 (b) > 239 ? 5 : \ 328 (b) > 79 ? 4 : \ 329 (b) > 23 ? 3 : 1) 330 331 /* 332 * BN_mod_exp_mont_consttime is based on the assumption that the L1 data cache 333 * line width of the target processor is at least the following value. 334 */ 335 # define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH ( 64 ) 336 # define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1) 337 338 /* 339 * Window sizes optimized for fixed window size modular exponentiation 340 * algorithm (BN_mod_exp_mont_consttime). To achieve the security goals of 341 * BN_mode_exp_mont_consttime, the maximum size of the window must not exceed 342 * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH). Window size thresholds are 343 * defined for cache line sizes of 32 and 64, cache line sizes where 344 * log_2(32)=5 and log_2(64)=6 respectively. A window size of 7 should only be 345 * used on processors that have a 128 byte or greater cache line size. 346 */ 347 # if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64 348 349 # define BN_window_bits_for_ctime_exponent_size(b) \ 350 ((b) > 937 ? 6 : \ 351 (b) > 306 ? 5 : \ 352 (b) > 89 ? 4 : \ 353 (b) > 22 ? 3 : 1) 354 # define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6) 355 356 # elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32 357 358 # define BN_window_bits_for_ctime_exponent_size(b) \ 359 ((b) > 306 ? 5 : \ 360 (b) > 89 ? 4 : \ 361 (b) > 22 ? 3 : 1) 362 # define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5) 363 364 # endif 365 366 /* Pentium pro 16,16,16,32,64 */ 367 /* Alpha 16,16,16,16.64 */ 368 # define BN_MULL_SIZE_NORMAL (16)/* 32 */ 369 # define BN_MUL_RECURSIVE_SIZE_NORMAL (16)/* 32 less than */ 370 # define BN_SQR_RECURSIVE_SIZE_NORMAL (16)/* 32 */ 371 # define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32)/* 32 */ 372 # define BN_MONT_CTX_SET_SIZE_WORD (64)/* 32 */ 373 374 /* 375 * 2011-02-22 SMS. In various places, a size_t variable or a type cast to 376 * size_t was used to perform integer-only operations on pointers. This 377 * failed on VMS with 64-bit pointers (CC /POINTER_SIZE = 64) because size_t 378 * is still only 32 bits. What's needed in these cases is an integer type 379 * with the same size as a pointer, which size_t is not certain to be. The 380 * only fix here is VMS-specific. 381 */ 382 # if defined(OPENSSL_SYS_VMS) 383 # if __INITIAL_POINTER_SIZE == 64 384 # define PTR_SIZE_INT long long 385 # else /* __INITIAL_POINTER_SIZE == 64 */ 386 # define PTR_SIZE_INT int 387 # endif /* __INITIAL_POINTER_SIZE == 64 [else] */ 388 # elif !defined(PTR_SIZE_INT) /* defined(OPENSSL_SYS_VMS) */ 389 # define PTR_SIZE_INT size_t 390 # endif /* defined(OPENSSL_SYS_VMS) [else] */ 391 392 # if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC) 393 /* 394 * BN_UMULT_HIGH section. 395 * If the compiler doesn't support 2*N integer type, then you have to 396 * replace every N*N multiplication with 4 (N/2)*(N/2) accompanied by some 397 * shifts and additions which unavoidably results in severe performance 398 * penalties. Of course provided that the hardware is capable of producing 399 * 2*N result... That's when you normally start considering assembler 400 * implementation. However! It should be pointed out that some CPUs (e.g., 401 * PowerPC, Alpha, and IA-64) provide *separate* instruction calculating 402 * the upper half of the product placing the result into a general 403 * purpose register. Now *if* the compiler supports inline assembler, 404 * then it's not impossible to implement the "bignum" routines (and have 405 * the compiler optimize 'em) exhibiting "native" performance in C. That's 406 * what BN_UMULT_HIGH macro is about:-) Note that more recent compilers do 407 * support 2*64 integer type, which is also used here. 408 */ 409 # if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16 && \ 410 (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)) 411 # define BN_UMULT_HIGH(a,b) (((__uint128_t)(a)*(b))>>64) 412 # define BN_UMULT_LOHI(low,high,a,b) ({ \ 413 __uint128_t ret=(__uint128_t)(a)*(b); \ 414 (high)=ret>>64; (low)=ret; }) 415 # elif defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) 416 # if defined(__DECC) 417 # include <c_asm.h> 418 # define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b)) 419 # elif defined(__GNUC__) && __GNUC__>=2 420 # define BN_UMULT_HIGH(a,b) ({ \ 421 register BN_ULONG ret; \ 422 asm ("umulh %1,%2,%0" \ 423 : "=r"(ret) \ 424 : "r"(a), "r"(b)); \ 425 ret; }) 426 # endif /* compiler */ 427 # elif defined(_ARCH_PPC64) && defined(SIXTY_FOUR_BIT_LONG) 428 # if defined(__GNUC__) && __GNUC__>=2 429 # define BN_UMULT_HIGH(a,b) ({ \ 430 register BN_ULONG ret; \ 431 asm ("mulhdu %0,%1,%2" \ 432 : "=r"(ret) \ 433 : "r"(a), "r"(b)); \ 434 ret; }) 435 # endif /* compiler */ 436 # elif (defined(__x86_64) || defined(__x86_64__)) && \ 437 (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT)) 438 # if defined(__GNUC__) && __GNUC__>=2 439 # define BN_UMULT_HIGH(a,b) ({ \ 440 register BN_ULONG ret,discard; \ 441 asm ("mulq %3" \ 442 : "=a"(discard),"=d"(ret) \ 443 : "a"(a), "g"(b) \ 444 : "cc"); \ 445 ret; }) 446 # define BN_UMULT_LOHI(low,high,a,b) \ 447 asm ("mulq %3" \ 448 : "=a"(low),"=d"(high) \ 449 : "a"(a),"g"(b) \ 450 : "cc"); 451 # endif 452 # elif (defined(_M_AMD64) || defined(_M_X64)) && defined(SIXTY_FOUR_BIT) 453 # if defined(_MSC_VER) && _MSC_VER>=1400 454 unsigned __int64 __umulh(unsigned __int64 a, unsigned __int64 b); 455 unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b, 456 unsigned __int64 *h); 457 # pragma intrinsic(__umulh,_umul128) 458 # define BN_UMULT_HIGH(a,b) __umulh((a),(b)) 459 # define BN_UMULT_LOHI(low,high,a,b) ((low)=_umul128((a),(b),&(high))) 460 # endif 461 # elif defined(__mips) && (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)) 462 # if defined(__GNUC__) && __GNUC__>=2 463 # define BN_UMULT_HIGH(a,b) ({ \ 464 register BN_ULONG ret; \ 465 asm ("dmultu %1,%2" \ 466 : "=h"(ret) \ 467 : "r"(a), "r"(b) : "l"); \ 468 ret; }) 469 # define BN_UMULT_LOHI(low,high,a,b) \ 470 asm ("dmultu %2,%3" \ 471 : "=l"(low),"=h"(high) \ 472 : "r"(a), "r"(b)); 473 # endif 474 # elif defined(__aarch64__) && defined(SIXTY_FOUR_BIT_LONG) 475 # if defined(__GNUC__) && __GNUC__>=2 476 # define BN_UMULT_HIGH(a,b) ({ \ 477 register BN_ULONG ret; \ 478 asm ("umulh %0,%1,%2" \ 479 : "=r"(ret) \ 480 : "r"(a), "r"(b)); \ 481 ret; }) 482 # endif 483 # endif /* cpu */ 484 # endif /* OPENSSL_NO_ASM */ 485 486 # ifdef BN_DEBUG_RAND 487 # define bn_clear_top2max(a) \ 488 { \ 489 int ind = (a)->dmax - (a)->top; \ 490 BN_ULONG *ftl = &(a)->d[(a)->top-1]; \ 491 for (; ind != 0; ind--) \ 492 *(++ftl) = 0x0; \ 493 } 494 # else 495 # define bn_clear_top2max(a) 496 # endif 497 498 # ifdef BN_LLONG 499 /******************************************************************* 500 * Using the long long type, has to be twice as wide as BN_ULONG... 501 */ 502 # define Lw(t) (((BN_ULONG)(t))&BN_MASK2) 503 # define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2) 504 505 # define mul_add(r,a,w,c) { \ 506 BN_ULLONG t; \ 507 t=(BN_ULLONG)w * (a) + (r) + (c); \ 508 (r)= Lw(t); \ 509 (c)= Hw(t); \ 510 } 511 512 # define mul(r,a,w,c) { \ 513 BN_ULLONG t; \ 514 t=(BN_ULLONG)w * (a) + (c); \ 515 (r)= Lw(t); \ 516 (c)= Hw(t); \ 517 } 518 519 # define sqr(r0,r1,a) { \ 520 BN_ULLONG t; \ 521 t=(BN_ULLONG)(a)*(a); \ 522 (r0)=Lw(t); \ 523 (r1)=Hw(t); \ 524 } 525 526 # elif defined(BN_UMULT_LOHI) 527 # define mul_add(r,a,w,c) { \ 528 BN_ULONG high,low,ret,tmp=(a); \ 529 ret = (r); \ 530 BN_UMULT_LOHI(low,high,w,tmp); \ 531 ret += (c); \ 532 (c) = (ret<(c))?1:0; \ 533 (c) += high; \ 534 ret += low; \ 535 (c) += (ret<low)?1:0; \ 536 (r) = ret; \ 537 } 538 539 # define mul(r,a,w,c) { \ 540 BN_ULONG high,low,ret,ta=(a); \ 541 BN_UMULT_LOHI(low,high,w,ta); \ 542 ret = low + (c); \ 543 (c) = high; \ 544 (c) += (ret<low)?1:0; \ 545 (r) = ret; \ 546 } 547 548 # define sqr(r0,r1,a) { \ 549 BN_ULONG tmp=(a); \ 550 BN_UMULT_LOHI(r0,r1,tmp,tmp); \ 551 } 552 553 # elif defined(BN_UMULT_HIGH) 554 # define mul_add(r,a,w,c) { \ 555 BN_ULONG high,low,ret,tmp=(a); \ 556 ret = (r); \ 557 high= BN_UMULT_HIGH(w,tmp); \ 558 ret += (c); \ 559 low = (w) * tmp; \ 560 (c) = (ret<(c))?1:0; \ 561 (c) += high; \ 562 ret += low; \ 563 (c) += (ret<low)?1:0; \ 564 (r) = ret; \ 565 } 566 567 # define mul(r,a,w,c) { \ 568 BN_ULONG high,low,ret,ta=(a); \ 569 low = (w) * ta; \ 570 high= BN_UMULT_HIGH(w,ta); \ 571 ret = low + (c); \ 572 (c) = high; \ 573 (c) += (ret<low)?1:0; \ 574 (r) = ret; \ 575 } 576 577 # define sqr(r0,r1,a) { \ 578 BN_ULONG tmp=(a); \ 579 (r0) = tmp * tmp; \ 580 (r1) = BN_UMULT_HIGH(tmp,tmp); \ 581 } 582 583 # else 584 /************************************************************* 585 * No long long type 586 */ 587 588 # define LBITS(a) ((a)&BN_MASK2l) 589 # define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l) 590 # define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2) 591 592 # define LLBITS(a) ((a)&BN_MASKl) 593 # define LHBITS(a) (((a)>>BN_BITS2)&BN_MASKl) 594 # define LL2HBITS(a) ((BN_ULLONG)((a)&BN_MASKl)<<BN_BITS2) 595 596 # define mul64(l,h,bl,bh) \ 597 { \ 598 BN_ULONG m,m1,lt,ht; \ 599 \ 600 lt=l; \ 601 ht=h; \ 602 m =(bh)*(lt); \ 603 lt=(bl)*(lt); \ 604 m1=(bl)*(ht); \ 605 ht =(bh)*(ht); \ 606 m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \ 607 ht+=HBITS(m); \ 608 m1=L2HBITS(m); \ 609 lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \ 610 (l)=lt; \ 611 (h)=ht; \ 612 } 613 614 # define sqr64(lo,ho,in) \ 615 { \ 616 BN_ULONG l,h,m; \ 617 \ 618 h=(in); \ 619 l=LBITS(h); \ 620 h=HBITS(h); \ 621 m =(l)*(h); \ 622 l*=l; \ 623 h*=h; \ 624 h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \ 625 m =(m&BN_MASK2l)<<(BN_BITS4+1); \ 626 l=(l+m)&BN_MASK2; if (l < m) h++; \ 627 (lo)=l; \ 628 (ho)=h; \ 629 } 630 631 # define mul_add(r,a,bl,bh,c) { \ 632 BN_ULONG l,h; \ 633 \ 634 h= (a); \ 635 l=LBITS(h); \ 636 h=HBITS(h); \ 637 mul64(l,h,(bl),(bh)); \ 638 \ 639 /* non-multiply part */ \ 640 l=(l+(c))&BN_MASK2; if (l < (c)) h++; \ 641 (c)=(r); \ 642 l=(l+(c))&BN_MASK2; if (l < (c)) h++; \ 643 (c)=h&BN_MASK2; \ 644 (r)=l; \ 645 } 646 647 # define mul(r,a,bl,bh,c) { \ 648 BN_ULONG l,h; \ 649 \ 650 h= (a); \ 651 l=LBITS(h); \ 652 h=HBITS(h); \ 653 mul64(l,h,(bl),(bh)); \ 654 \ 655 /* non-multiply part */ \ 656 l+=(c); if ((l&BN_MASK2) < (c)) h++; \ 657 (c)=h&BN_MASK2; \ 658 (r)=l&BN_MASK2; \ 659 } 660 # endif /* !BN_LLONG */ 661 662 void BN_RECP_CTX_init(BN_RECP_CTX *recp); 663 void BN_MONT_CTX_init(BN_MONT_CTX *ctx); 664 665 void bn_init(BIGNUM *a); 666 void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb); 667 void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); 668 void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b); 669 void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp); 670 void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a); 671 void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a); 672 int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n); 673 int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, int cl, int dl); 674 void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, 675 int dna, int dnb, BN_ULONG *t); 676 void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, 677 int n, int tna, int tnb, BN_ULONG *t); 678 void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t); 679 void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n); 680 void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, 681 BN_ULONG *t); 682 BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, 683 int cl, int dl); 684 int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, 685 const BN_ULONG *np, const BN_ULONG *n0, int num); 686 687 BIGNUM *int_bn_mod_inverse(BIGNUM *in, 688 const BIGNUM *a, const BIGNUM *n, BN_CTX *ctx, 689 int *noinv); 690 691 static ossl_inline BIGNUM *bn_expand(BIGNUM *a, int bits) 692 { 693 if (bits > (INT_MAX - BN_BITS2 + 1)) 694 return NULL; 695 696 if (((bits+BN_BITS2-1)/BN_BITS2) <= (a)->dmax) 697 return a; 698 699 return bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2); 700 } 701 702 #endif 703