1 //===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains some functions that are useful for math stuff. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef LLVM_SUPPORT_MATHEXTRAS_H 14 #define LLVM_SUPPORT_MATHEXTRAS_H 15 16 #include "llvm/ADT/bit.h" 17 #include "llvm/Support/Compiler.h" 18 #include <cassert> 19 #include <climits> 20 #include <cstdint> 21 #include <cstring> 22 #include <limits> 23 #include <type_traits> 24 25 namespace llvm { 26 27 /// Mathematical constants. 28 namespace numbers { 29 // TODO: Track C++20 std::numbers. 30 // TODO: Favor using the hexadecimal FP constants (requires C++17). 31 constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113 32 egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620 33 ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162 34 ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392 35 log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0) 36 log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2) 37 pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796 38 inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541 39 sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161 40 inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197 41 sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219 42 inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1) 43 sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194 44 inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1) 45 phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622 46 constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113 47 egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620 48 ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162 49 ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392 50 log2ef = 1.44269504F, // (0x1.715476P+0) 51 log10ef = .434294482F, // (0x1.bcb7b2P-2) 52 pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796 53 inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541 54 sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161 55 inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197 56 sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193 57 inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1) 58 sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194 59 inv_sqrt3f = .577350269F, // (0x1.279a74P-1) 60 phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622 61 } // namespace numbers 62 63 /// Create a bitmask with the N right-most bits set to 1, and all other 64 /// bits set to 0. Only unsigned types are allowed. 65 template <typename T> T maskTrailingOnes(unsigned N) { 66 static_assert(std::is_unsigned_v<T>, "Invalid type!"); 67 const unsigned Bits = CHAR_BIT * sizeof(T); 68 assert(N <= Bits && "Invalid bit index"); 69 return N == 0 ? 0 : (T(-1) >> (Bits - N)); 70 } 71 72 /// Create a bitmask with the N left-most bits set to 1, and all other 73 /// bits set to 0. Only unsigned types are allowed. 74 template <typename T> T maskLeadingOnes(unsigned N) { 75 return ~maskTrailingOnes<T>(CHAR_BIT * sizeof(T) - N); 76 } 77 78 /// Create a bitmask with the N right-most bits set to 0, and all other 79 /// bits set to 1. Only unsigned types are allowed. 80 template <typename T> T maskTrailingZeros(unsigned N) { 81 return maskLeadingOnes<T>(CHAR_BIT * sizeof(T) - N); 82 } 83 84 /// Create a bitmask with the N left-most bits set to 0, and all other 85 /// bits set to 1. Only unsigned types are allowed. 86 template <typename T> T maskLeadingZeros(unsigned N) { 87 return maskTrailingOnes<T>(CHAR_BIT * sizeof(T) - N); 88 } 89 90 /// Macro compressed bit reversal table for 256 bits. 91 /// 92 /// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable 93 static const unsigned char BitReverseTable256[256] = { 94 #define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64 95 #define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16) 96 #define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4) 97 R6(0), R6(2), R6(1), R6(3) 98 #undef R2 99 #undef R4 100 #undef R6 101 }; 102 103 /// Reverse the bits in \p Val. 104 template <typename T> T reverseBits(T Val) { 105 #if __has_builtin(__builtin_bitreverse8) 106 if constexpr (std::is_same_v<T, uint8_t>) 107 return __builtin_bitreverse8(Val); 108 #endif 109 #if __has_builtin(__builtin_bitreverse16) 110 if constexpr (std::is_same_v<T, uint16_t>) 111 return __builtin_bitreverse16(Val); 112 #endif 113 #if __has_builtin(__builtin_bitreverse32) 114 if constexpr (std::is_same_v<T, uint32_t>) 115 return __builtin_bitreverse32(Val); 116 #endif 117 #if __has_builtin(__builtin_bitreverse64) 118 if constexpr (std::is_same_v<T, uint64_t>) 119 return __builtin_bitreverse64(Val); 120 #endif 121 122 unsigned char in[sizeof(Val)]; 123 unsigned char out[sizeof(Val)]; 124 std::memcpy(in, &Val, sizeof(Val)); 125 for (unsigned i = 0; i < sizeof(Val); ++i) 126 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]]; 127 std::memcpy(&Val, out, sizeof(Val)); 128 return Val; 129 } 130 131 // NOTE: The following support functions use the _32/_64 extensions instead of 132 // type overloading so that signed and unsigned integers can be used without 133 // ambiguity. 134 135 /// Return the high 32 bits of a 64 bit value. 136 constexpr inline uint32_t Hi_32(uint64_t Value) { 137 return static_cast<uint32_t>(Value >> 32); 138 } 139 140 /// Return the low 32 bits of a 64 bit value. 141 constexpr inline uint32_t Lo_32(uint64_t Value) { 142 return static_cast<uint32_t>(Value); 143 } 144 145 /// Make a 64-bit integer from a high / low pair of 32-bit integers. 146 constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) { 147 return ((uint64_t)High << 32) | (uint64_t)Low; 148 } 149 150 /// Checks if an integer fits into the given bit width. 151 template <unsigned N> constexpr inline bool isInt(int64_t x) { 152 if constexpr (N == 8) 153 return static_cast<int8_t>(x) == x; 154 if constexpr (N == 16) 155 return static_cast<int16_t>(x) == x; 156 if constexpr (N == 32) 157 return static_cast<int32_t>(x) == x; 158 if constexpr (N < 64) 159 return -(INT64_C(1) << (N - 1)) <= x && x < (INT64_C(1) << (N - 1)); 160 (void)x; // MSVC v19.25 warns that x is unused. 161 return true; 162 } 163 164 /// Checks if a signed integer is an N bit number shifted left by S. 165 template <unsigned N, unsigned S> 166 constexpr inline bool isShiftedInt(int64_t x) { 167 static_assert( 168 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number."); 169 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide."); 170 return isInt<N + S>(x) && (x % (UINT64_C(1) << S) == 0); 171 } 172 173 /// Checks if an unsigned integer fits into the given bit width. 174 template <unsigned N> constexpr inline bool isUInt(uint64_t x) { 175 static_assert(N > 0, "isUInt<0> doesn't make sense"); 176 if constexpr (N == 8) 177 return static_cast<uint8_t>(x) == x; 178 if constexpr (N == 16) 179 return static_cast<uint16_t>(x) == x; 180 if constexpr (N == 32) 181 return static_cast<uint32_t>(x) == x; 182 if constexpr (N < 64) 183 return x < (UINT64_C(1) << (N)); 184 (void)x; // MSVC v19.25 warns that x is unused. 185 return true; 186 } 187 188 /// Checks if a unsigned integer is an N bit number shifted left by S. 189 template <unsigned N, unsigned S> 190 constexpr inline bool isShiftedUInt(uint64_t x) { 191 static_assert( 192 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)"); 193 static_assert(N + S <= 64, 194 "isShiftedUInt<N, S> with N + S > 64 is too wide."); 195 // Per the two static_asserts above, S must be strictly less than 64. So 196 // 1 << S is not undefined behavior. 197 return isUInt<N + S>(x) && (x % (UINT64_C(1) << S) == 0); 198 } 199 200 /// Gets the maximum value for a N-bit unsigned integer. 201 inline uint64_t maxUIntN(uint64_t N) { 202 assert(N > 0 && N <= 64 && "integer width out of range"); 203 204 // uint64_t(1) << 64 is undefined behavior, so we can't do 205 // (uint64_t(1) << N) - 1 206 // without checking first that N != 64. But this works and doesn't have a 207 // branch. 208 return UINT64_MAX >> (64 - N); 209 } 210 211 /// Gets the minimum value for a N-bit signed integer. 212 inline int64_t minIntN(int64_t N) { 213 assert(N > 0 && N <= 64 && "integer width out of range"); 214 215 return UINT64_C(1) + ~(UINT64_C(1) << (N - 1)); 216 } 217 218 /// Gets the maximum value for a N-bit signed integer. 219 inline int64_t maxIntN(int64_t N) { 220 assert(N > 0 && N <= 64 && "integer width out of range"); 221 222 // This relies on two's complement wraparound when N == 64, so we convert to 223 // int64_t only at the very end to avoid UB. 224 return (UINT64_C(1) << (N - 1)) - 1; 225 } 226 227 /// Checks if an unsigned integer fits into the given (dynamic) bit width. 228 inline bool isUIntN(unsigned N, uint64_t x) { 229 return N >= 64 || x <= maxUIntN(N); 230 } 231 232 /// Checks if an signed integer fits into the given (dynamic) bit width. 233 inline bool isIntN(unsigned N, int64_t x) { 234 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N)); 235 } 236 237 /// Return true if the argument is a non-empty sequence of ones starting at the 238 /// least significant bit with the remainder zero (32 bit version). 239 /// Ex. isMask_32(0x0000FFFFU) == true. 240 constexpr inline bool isMask_32(uint32_t Value) { 241 return Value && ((Value + 1) & Value) == 0; 242 } 243 244 /// Return true if the argument is a non-empty sequence of ones starting at the 245 /// least significant bit with the remainder zero (64 bit version). 246 constexpr inline bool isMask_64(uint64_t Value) { 247 return Value && ((Value + 1) & Value) == 0; 248 } 249 250 /// Return true if the argument contains a non-empty sequence of ones with the 251 /// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true. 252 constexpr inline bool isShiftedMask_32(uint32_t Value) { 253 return Value && isMask_32((Value - 1) | Value); 254 } 255 256 /// Return true if the argument contains a non-empty sequence of ones with the 257 /// remainder zero (64 bit version.) 258 constexpr inline bool isShiftedMask_64(uint64_t Value) { 259 return Value && isMask_64((Value - 1) | Value); 260 } 261 262 /// Return true if the argument is a power of two > 0. 263 /// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.) 264 constexpr inline bool isPowerOf2_32(uint32_t Value) { 265 return llvm::has_single_bit(Value); 266 } 267 268 /// Return true if the argument is a power of two > 0 (64 bit edition.) 269 constexpr inline bool isPowerOf2_64(uint64_t Value) { 270 return llvm::has_single_bit(Value); 271 } 272 273 /// Return true if the argument contains a non-empty sequence of ones with the 274 /// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true. 275 /// If true, \p MaskIdx will specify the index of the lowest set bit and \p 276 /// MaskLen is updated to specify the length of the mask, else neither are 277 /// updated. 278 inline bool isShiftedMask_32(uint32_t Value, unsigned &MaskIdx, 279 unsigned &MaskLen) { 280 if (!isShiftedMask_32(Value)) 281 return false; 282 MaskIdx = llvm::countr_zero(Value); 283 MaskLen = llvm::popcount(Value); 284 return true; 285 } 286 287 /// Return true if the argument contains a non-empty sequence of ones with the 288 /// remainder zero (64 bit version.) If true, \p MaskIdx will specify the index 289 /// of the lowest set bit and \p MaskLen is updated to specify the length of the 290 /// mask, else neither are updated. 291 inline bool isShiftedMask_64(uint64_t Value, unsigned &MaskIdx, 292 unsigned &MaskLen) { 293 if (!isShiftedMask_64(Value)) 294 return false; 295 MaskIdx = llvm::countr_zero(Value); 296 MaskLen = llvm::popcount(Value); 297 return true; 298 } 299 300 /// Compile time Log2. 301 /// Valid only for positive powers of two. 302 template <size_t kValue> constexpr inline size_t CTLog2() { 303 static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue), 304 "Value is not a valid power of 2"); 305 return 1 + CTLog2<kValue / 2>(); 306 } 307 308 template <> constexpr inline size_t CTLog2<1>() { return 0; } 309 310 /// Return the floor log base 2 of the specified value, -1 if the value is zero. 311 /// (32 bit edition.) 312 /// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2 313 inline unsigned Log2_32(uint32_t Value) { 314 return 31 - llvm::countl_zero(Value); 315 } 316 317 /// Return the floor log base 2 of the specified value, -1 if the value is zero. 318 /// (64 bit edition.) 319 inline unsigned Log2_64(uint64_t Value) { 320 return 63 - llvm::countl_zero(Value); 321 } 322 323 /// Return the ceil log base 2 of the specified value, 32 if the value is zero. 324 /// (32 bit edition). 325 /// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3 326 inline unsigned Log2_32_Ceil(uint32_t Value) { 327 return 32 - llvm::countl_zero(Value - 1); 328 } 329 330 /// Return the ceil log base 2 of the specified value, 64 if the value is zero. 331 /// (64 bit edition.) 332 inline unsigned Log2_64_Ceil(uint64_t Value) { 333 return 64 - llvm::countl_zero(Value - 1); 334 } 335 336 /// A and B are either alignments or offsets. Return the minimum alignment that 337 /// may be assumed after adding the two together. 338 constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) { 339 // The largest power of 2 that divides both A and B. 340 // 341 // Replace "-Value" by "1+~Value" in the following commented code to avoid 342 // MSVC warning C4146 343 // return (A | B) & -(A | B); 344 return (A | B) & (1 + ~(A | B)); 345 } 346 347 /// Returns the next power of two (in 64-bits) that is strictly greater than A. 348 /// Returns zero on overflow. 349 constexpr inline uint64_t NextPowerOf2(uint64_t A) { 350 A |= (A >> 1); 351 A |= (A >> 2); 352 A |= (A >> 4); 353 A |= (A >> 8); 354 A |= (A >> 16); 355 A |= (A >> 32); 356 return A + 1; 357 } 358 359 /// Returns the power of two which is greater than or equal to the given value. 360 /// Essentially, it is a ceil operation across the domain of powers of two. 361 inline uint64_t PowerOf2Ceil(uint64_t A) { 362 if (!A) 363 return 0; 364 return NextPowerOf2(A - 1); 365 } 366 367 /// Returns the next integer (mod 2**64) that is greater than or equal to 368 /// \p Value and is a multiple of \p Align. \p Align must be non-zero. 369 /// 370 /// Examples: 371 /// \code 372 /// alignTo(5, 8) = 8 373 /// alignTo(17, 8) = 24 374 /// alignTo(~0LL, 8) = 0 375 /// alignTo(321, 255) = 510 376 /// \endcode 377 inline uint64_t alignTo(uint64_t Value, uint64_t Align) { 378 assert(Align != 0u && "Align can't be 0."); 379 return (Value + Align - 1) / Align * Align; 380 } 381 382 inline uint64_t alignToPowerOf2(uint64_t Value, uint64_t Align) { 383 assert(Align != 0 && (Align & (Align - 1)) == 0 && 384 "Align must be a power of 2"); 385 return (Value + Align - 1) & -Align; 386 } 387 388 /// If non-zero \p Skew is specified, the return value will be a minimal integer 389 /// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for 390 /// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p 391 /// Skew mod \p A'. \p Align must be non-zero. 392 /// 393 /// Examples: 394 /// \code 395 /// alignTo(5, 8, 7) = 7 396 /// alignTo(17, 8, 1) = 17 397 /// alignTo(~0LL, 8, 3) = 3 398 /// alignTo(321, 255, 42) = 552 399 /// \endcode 400 inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew) { 401 assert(Align != 0u && "Align can't be 0."); 402 Skew %= Align; 403 return alignTo(Value - Skew, Align) + Skew; 404 } 405 406 /// Returns the next integer (mod 2**64) that is greater than or equal to 407 /// \p Value and is a multiple of \c Align. \c Align must be non-zero. 408 template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) { 409 static_assert(Align != 0u, "Align must be non-zero"); 410 return (Value + Align - 1) / Align * Align; 411 } 412 413 /// Returns the integer ceil(Numerator / Denominator). 414 inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) { 415 return alignTo(Numerator, Denominator) / Denominator; 416 } 417 418 /// Returns the integer nearest(Numerator / Denominator). 419 inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) { 420 return (Numerator + (Denominator / 2)) / Denominator; 421 } 422 423 /// Returns the largest uint64_t less than or equal to \p Value and is 424 /// \p Skew mod \p Align. \p Align must be non-zero 425 inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { 426 assert(Align != 0u && "Align can't be 0."); 427 Skew %= Align; 428 return (Value - Skew) / Align * Align + Skew; 429 } 430 431 /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. 432 /// Requires 0 < B <= 32. 433 template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) { 434 static_assert(B > 0, "Bit width can't be 0."); 435 static_assert(B <= 32, "Bit width out of range."); 436 return int32_t(X << (32 - B)) >> (32 - B); 437 } 438 439 /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. 440 /// Requires 0 < B <= 32. 441 inline int32_t SignExtend32(uint32_t X, unsigned B) { 442 assert(B > 0 && "Bit width can't be 0."); 443 assert(B <= 32 && "Bit width out of range."); 444 return int32_t(X << (32 - B)) >> (32 - B); 445 } 446 447 /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. 448 /// Requires 0 < B <= 64. 449 template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) { 450 static_assert(B > 0, "Bit width can't be 0."); 451 static_assert(B <= 64, "Bit width out of range."); 452 return int64_t(x << (64 - B)) >> (64 - B); 453 } 454 455 /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. 456 /// Requires 0 < B <= 64. 457 inline int64_t SignExtend64(uint64_t X, unsigned B) { 458 assert(B > 0 && "Bit width can't be 0."); 459 assert(B <= 64 && "Bit width out of range."); 460 return int64_t(X << (64 - B)) >> (64 - B); 461 } 462 463 /// Subtract two unsigned integers, X and Y, of type T and return the absolute 464 /// value of the result. 465 template <typename T> 466 std::enable_if_t<std::is_unsigned_v<T>, T> AbsoluteDifference(T X, T Y) { 467 return X > Y ? (X - Y) : (Y - X); 468 } 469 470 /// Add two unsigned integers, X and Y, of type T. Clamp the result to the 471 /// maximum representable value of T on overflow. ResultOverflowed indicates if 472 /// the result is larger than the maximum representable value of type T. 473 template <typename T> 474 std::enable_if_t<std::is_unsigned_v<T>, T> 475 SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) { 476 bool Dummy; 477 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; 478 // Hacker's Delight, p. 29 479 T Z = X + Y; 480 Overflowed = (Z < X || Z < Y); 481 if (Overflowed) 482 return std::numeric_limits<T>::max(); 483 else 484 return Z; 485 } 486 487 /// Add multiple unsigned integers of type T. Clamp the result to the 488 /// maximum representable value of T on overflow. 489 template <class T, class... Ts> 490 std::enable_if_t<std::is_unsigned_v<T>, T> SaturatingAdd(T X, T Y, T Z, 491 Ts... Args) { 492 bool Overflowed = false; 493 T XY = SaturatingAdd(X, Y, &Overflowed); 494 if (Overflowed) 495 return SaturatingAdd(std::numeric_limits<T>::max(), T(1), Args...); 496 return SaturatingAdd(XY, Z, Args...); 497 } 498 499 /// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the 500 /// maximum representable value of T on overflow. ResultOverflowed indicates if 501 /// the result is larger than the maximum representable value of type T. 502 template <typename T> 503 std::enable_if_t<std::is_unsigned_v<T>, T> 504 SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) { 505 bool Dummy; 506 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; 507 508 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that 509 // because it fails for uint16_t (where multiplication can have undefined 510 // behavior due to promotion to int), and requires a division in addition 511 // to the multiplication. 512 513 Overflowed = false; 514 515 // Log2(Z) would be either Log2Z or Log2Z + 1. 516 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z 517 // will necessarily be less than Log2Max as desired. 518 int Log2Z = Log2_64(X) + Log2_64(Y); 519 const T Max = std::numeric_limits<T>::max(); 520 int Log2Max = Log2_64(Max); 521 if (Log2Z < Log2Max) { 522 return X * Y; 523 } 524 if (Log2Z > Log2Max) { 525 Overflowed = true; 526 return Max; 527 } 528 529 // We're going to use the top bit, and maybe overflow one 530 // bit past it. Multiply all but the bottom bit then add 531 // that on at the end. 532 T Z = (X >> 1) * Y; 533 if (Z & ~(Max >> 1)) { 534 Overflowed = true; 535 return Max; 536 } 537 Z <<= 1; 538 if (X & 1) 539 return SaturatingAdd(Z, Y, ResultOverflowed); 540 541 return Z; 542 } 543 544 /// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to 545 /// the product. Clamp the result to the maximum representable value of T on 546 /// overflow. ResultOverflowed indicates if the result is larger than the 547 /// maximum representable value of type T. 548 template <typename T> 549 std::enable_if_t<std::is_unsigned_v<T>, T> 550 SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) { 551 bool Dummy; 552 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; 553 554 T Product = SaturatingMultiply(X, Y, &Overflowed); 555 if (Overflowed) 556 return Product; 557 558 return SaturatingAdd(A, Product, &Overflowed); 559 } 560 561 /// Use this rather than HUGE_VALF; the latter causes warnings on MSVC. 562 extern const float huge_valf; 563 564 565 /// Add two signed integers, computing the two's complement truncated result, 566 /// returning true if overflow occurred. 567 template <typename T> 568 std::enable_if_t<std::is_signed_v<T>, T> AddOverflow(T X, T Y, T &Result) { 569 #if __has_builtin(__builtin_add_overflow) 570 return __builtin_add_overflow(X, Y, &Result); 571 #else 572 // Perform the unsigned addition. 573 using U = std::make_unsigned_t<T>; 574 const U UX = static_cast<U>(X); 575 const U UY = static_cast<U>(Y); 576 const U UResult = UX + UY; 577 578 // Convert to signed. 579 Result = static_cast<T>(UResult); 580 581 // Adding two positive numbers should result in a positive number. 582 if (X > 0 && Y > 0) 583 return Result <= 0; 584 // Adding two negatives should result in a negative number. 585 if (X < 0 && Y < 0) 586 return Result >= 0; 587 return false; 588 #endif 589 } 590 591 /// Subtract two signed integers, computing the two's complement truncated 592 /// result, returning true if an overflow ocurred. 593 template <typename T> 594 std::enable_if_t<std::is_signed_v<T>, T> SubOverflow(T X, T Y, T &Result) { 595 #if __has_builtin(__builtin_sub_overflow) 596 return __builtin_sub_overflow(X, Y, &Result); 597 #else 598 // Perform the unsigned addition. 599 using U = std::make_unsigned_t<T>; 600 const U UX = static_cast<U>(X); 601 const U UY = static_cast<U>(Y); 602 const U UResult = UX - UY; 603 604 // Convert to signed. 605 Result = static_cast<T>(UResult); 606 607 // Subtracting a positive number from a negative results in a negative number. 608 if (X <= 0 && Y > 0) 609 return Result >= 0; 610 // Subtracting a negative number from a positive results in a positive number. 611 if (X >= 0 && Y < 0) 612 return Result <= 0; 613 return false; 614 #endif 615 } 616 617 /// Multiply two signed integers, computing the two's complement truncated 618 /// result, returning true if an overflow ocurred. 619 template <typename T> 620 std::enable_if_t<std::is_signed_v<T>, T> MulOverflow(T X, T Y, T &Result) { 621 // Perform the unsigned multiplication on absolute values. 622 using U = std::make_unsigned_t<T>; 623 const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X); 624 const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y); 625 const U UResult = UX * UY; 626 627 // Convert to signed. 628 const bool IsNegative = (X < 0) ^ (Y < 0); 629 Result = IsNegative ? (0 - UResult) : UResult; 630 631 // If any of the args was 0, result is 0 and no overflow occurs. 632 if (UX == 0 || UY == 0) 633 return false; 634 635 // UX and UY are in [1, 2^n], where n is the number of digits. 636 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for 637 // positive) divided by an argument compares to the other. 638 if (IsNegative) 639 return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY; 640 else 641 return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY; 642 } 643 644 } // End llvm namespace 645 646 #endif 647