1 //===-- A class to store a normalized floating point number -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_LIBC_UTILS_FPUTIL_NORMAL_FLOAT_H 10 #define LLVM_LIBC_UTILS_FPUTIL_NORMAL_FLOAT_H 11 12 #include "FPBits.h" 13 14 #include "utils/CPP/TypeTraits.h" 15 16 #include <stdint.h> 17 18 namespace __llvm_libc { 19 namespace fputil { 20 21 // A class which stores the normalized form of a floating point value. 22 // The special IEEE-754 bits patterns of Zero, infinity and NaNs are 23 // are not handled by this class. 24 // 25 // A normalized floating point number is of this form: 26 // (-1)*sign * 2^exponent * <mantissa> 27 // where <mantissa> is of the form 1.<...>. 28 template <typename T> struct NormalFloat { 29 static_assert( 30 cpp::IsFloatingPointType<T>::Value, 31 "NormalFloat template parameter has to be a floating point type."); 32 33 using UIntType = typename FPBits<T>::UIntType; 34 static constexpr UIntType one = (UIntType(1) << MantissaWidth<T>::value); 35 36 // Unbiased exponent value. 37 int32_t exponent; 38 39 UIntType mantissa; 40 // We want |UIntType| to have atleast one bit more than the actual mantissa 41 // bit width to accommodate the implicit 1 value. 42 static_assert(sizeof(UIntType) * 8 >= MantissaWidth<T>::value + 1, 43 "Bad type for mantissa in NormalFloat."); 44 45 bool sign; 46 47 NormalFloat(int32_t e, UIntType m, bool s) 48 : exponent(e), mantissa(m), sign(s) { 49 if (mantissa >= one) 50 return; 51 52 unsigned normalizationShift = evaluateNormalizationShift(mantissa); 53 mantissa = mantissa << normalizationShift; 54 exponent -= normalizationShift; 55 } 56 57 explicit NormalFloat(T x) { initFromBits(FPBits<T>(x)); } 58 59 explicit NormalFloat(FPBits<T> bits) { initFromBits(bits); } 60 61 // Compares this normalized number with another normalized number. 62 // Returns -1 is this number is less than |other|, 0 if this number is equal 63 // to |other|, and 1 if this number is greater than |other|. 64 int cmp(const NormalFloat<T> &other) const { 65 if (sign != other.sign) 66 return sign ? -1 : 1; 67 68 if (exponent > other.exponent) { 69 return sign ? -1 : 1; 70 } else if (exponent == other.exponent) { 71 if (mantissa > other.mantissa) 72 return sign ? -1 : 1; 73 else if (mantissa == other.mantissa) 74 return 0; 75 else 76 return sign ? 1 : -1; 77 } else { 78 return sign ? 1 : -1; 79 } 80 } 81 82 // Returns a new normalized floating point number which is equal in value 83 // to this number multiplied by 2^e. That is: 84 // new = this * 2^e 85 NormalFloat<T> mul2(int e) const { 86 NormalFloat<T> result = *this; 87 result.exponent += e; 88 return result; 89 } 90 91 operator T() const { 92 int biasedExponent = exponent + FPBits<T>::exponentBias; 93 // Max exponent is of the form 0xFF...E. That is why -2 and not -1. 94 constexpr int maxExponentValue = (1 << ExponentWidth<T>::value) - 2; 95 if (biasedExponent > maxExponentValue) { 96 return sign ? T(FPBits<T>::negInf()) : T(FPBits<T>::inf()); 97 } 98 99 FPBits<T> result(T(0.0)); 100 result.setSign(sign); 101 102 constexpr int subnormalExponent = -FPBits<T>::exponentBias + 1; 103 if (exponent < subnormalExponent) { 104 unsigned shift = subnormalExponent - exponent; 105 // Since exponent > subnormalExponent, shift is strictly greater than 106 // zero. 107 if (shift <= MantissaWidth<T>::value + 1) { 108 // Generate a subnormal number. Might lead to loss of precision. 109 // We round to nearest and round halfway cases to even. 110 const UIntType shiftOutMask = (UIntType(1) << shift) - 1; 111 const UIntType shiftOutValue = mantissa & shiftOutMask; 112 const UIntType halfwayValue = UIntType(1) << (shift - 1); 113 result.setUnbiasedExponent(0); 114 result.setMantissa(mantissa >> shift); 115 UIntType newMantissa = result.getMantissa(); 116 if (shiftOutValue > halfwayValue) { 117 newMantissa += 1; 118 } else if (shiftOutValue == halfwayValue) { 119 // Round to even. 120 if (result.getMantissa() & 0x1) 121 newMantissa += 1; 122 } 123 result.setMantissa(newMantissa); 124 // Adding 1 to mantissa can lead to overflow. This can only happen if 125 // mantissa was all ones (0b111..11). For such a case, we will carry 126 // the overflow into the exponent. 127 if (newMantissa == one) 128 result.setUnbiasedExponent(1); 129 return T(result); 130 } else { 131 return T(result); 132 } 133 } 134 135 result.setUnbiasedExponent(exponent + FPBits<T>::exponentBias); 136 result.setMantissa(mantissa); 137 return T(result); 138 } 139 140 private: 141 void initFromBits(FPBits<T> bits) { 142 sign = bits.getSign(); 143 144 if (bits.isInfOrNaN() || bits.isZero()) { 145 // Ignore special bit patterns. Implementations deal with them separately 146 // anyway so this should not be a problem. 147 exponent = 0; 148 mantissa = 0; 149 return; 150 } 151 152 // Normalize subnormal numbers. 153 if (bits.getUnbiasedExponent() == 0) { 154 unsigned shift = evaluateNormalizationShift(bits.getMantissa()); 155 mantissa = UIntType(bits.getMantissa()) << shift; 156 exponent = 1 - FPBits<T>::exponentBias - shift; 157 } else { 158 exponent = bits.getUnbiasedExponent() - FPBits<T>::exponentBias; 159 mantissa = one | bits.getMantissa(); 160 } 161 } 162 163 unsigned evaluateNormalizationShift(UIntType m) { 164 unsigned shift = 0; 165 for (; (one & m) == 0 && (shift < MantissaWidth<T>::value); 166 m <<= 1, ++shift) 167 ; 168 return shift; 169 } 170 }; 171 172 #ifdef SPECIAL_X86_LONG_DOUBLE 173 template <> 174 inline void NormalFloat<long double>::initFromBits(FPBits<long double> bits) { 175 sign = bits.getSign(); 176 177 if (bits.isInfOrNaN() || bits.isZero()) { 178 // Ignore special bit patterns. Implementations deal with them separately 179 // anyway so this should not be a problem. 180 exponent = 0; 181 mantissa = 0; 182 return; 183 } 184 185 if (bits.getUnbiasedExponent() == 0) { 186 if (bits.getImplicitBit() == 0) { 187 // Since we ignore zero value, the mantissa in this case is non-zero. 188 int normalizationShift = evaluateNormalizationShift(bits.getMantissa()); 189 exponent = -16382 - normalizationShift; 190 mantissa = (bits.getMantissa() << normalizationShift); 191 } else { 192 exponent = -16382; 193 mantissa = one | bits.getMantissa(); 194 } 195 } else { 196 if (bits.getImplicitBit() == 0) { 197 // Invalid number so just store 0 similar to a NaN. 198 exponent = 0; 199 mantissa = 0; 200 } else { 201 exponent = bits.getUnbiasedExponent() - 16383; 202 mantissa = one | bits.getMantissa(); 203 } 204 } 205 } 206 207 template <> inline NormalFloat<long double>::operator long double() const { 208 int biasedExponent = exponent + FPBits<long double>::exponentBias; 209 // Max exponent is of the form 0xFF...E. That is why -2 and not -1. 210 constexpr int maxExponentValue = (1 << ExponentWidth<long double>::value) - 2; 211 if (biasedExponent > maxExponentValue) { 212 return sign ? FPBits<long double>::negInf() : FPBits<long double>::inf(); 213 } 214 215 FPBits<long double> result(0.0l); 216 result.setSign(sign); 217 218 constexpr int subnormalExponent = -FPBits<long double>::exponentBias + 1; 219 if (exponent < subnormalExponent) { 220 unsigned shift = subnormalExponent - exponent; 221 if (shift <= MantissaWidth<long double>::value + 1) { 222 // Generate a subnormal number. Might lead to loss of precision. 223 // We round to nearest and round halfway cases to even. 224 const UIntType shiftOutMask = (UIntType(1) << shift) - 1; 225 const UIntType shiftOutValue = mantissa & shiftOutMask; 226 const UIntType halfwayValue = UIntType(1) << (shift - 1); 227 result.setUnbiasedExponent(0); 228 result.setMantissa(mantissa >> shift); 229 UIntType newMantissa = result.getMantissa(); 230 if (shiftOutValue > halfwayValue) { 231 newMantissa += 1; 232 } else if (shiftOutValue == halfwayValue) { 233 // Round to even. 234 if (result.getMantissa() & 0x1) 235 newMantissa += 1; 236 } 237 result.setMantissa(newMantissa); 238 // Adding 1 to mantissa can lead to overflow. This can only happen if 239 // mantissa was all ones (0b111..11). For such a case, we will carry 240 // the overflow into the exponent and set the implicit bit to 1. 241 if (newMantissa == one) { 242 result.setUnbiasedExponent(1); 243 result.setImplicitBit(1); 244 } else { 245 result.setImplicitBit(0); 246 } 247 return static_cast<long double>(result); 248 } else { 249 return static_cast<long double>(result); 250 } 251 } 252 253 result.setUnbiasedExponent(biasedExponent); 254 result.setMantissa(mantissa); 255 result.setImplicitBit(1); 256 return static_cast<long double>(result); 257 } 258 #endif // SPECIAL_X86_LONG_DOUBLE 259 260 } // namespace fputil 261 } // namespace __llvm_libc 262 263 #endif // LLVM_LIBC_UTILS_FPUTIL_NORMAL_FLOAT_H 264