1 //===-- A class to store a normalized floating point number -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLVM_LIBC_UTILS_FPUTIL_NORMAL_FLOAT_H
10 #define LLVM_LIBC_UTILS_FPUTIL_NORMAL_FLOAT_H
11
12 #include "FPBits.h"
13
14 #include "utils/CPP/TypeTraits.h"
15
16 #include <stdint.h>
17
18 namespace __llvm_libc {
19 namespace fputil {
20
21 // A class which stores the normalized form of a floating point value.
22 // The special IEEE-754 bits patterns of Zero, infinity and NaNs are
23 // are not handled by this class.
24 //
25 // A normalized floating point number is of this form:
26 // (-1)*sign * 2^exponent * <mantissa>
27 // where <mantissa> is of the form 1.<...>.
28 template <typename T> struct NormalFloat {
29 static_assert(
30 cpp::IsFloatingPointType<T>::Value,
31 "NormalFloat template parameter has to be a floating point type.");
32
33 using UIntType = typename FPBits<T>::UIntType;
34 static constexpr UIntType one = (UIntType(1) << MantissaWidth<T>::value);
35
36 // Unbiased exponent value.
37 int32_t exponent;
38
39 UIntType mantissa;
40 // We want |UIntType| to have atleast one bit more than the actual mantissa
41 // bit width to accommodate the implicit 1 value.
42 static_assert(sizeof(UIntType) * 8 >= MantissaWidth<T>::value + 1,
43 "Bad type for mantissa in NormalFloat.");
44
45 bool sign;
46
NormalFloatNormalFloat47 NormalFloat(int32_t e, UIntType m, bool s)
48 : exponent(e), mantissa(m), sign(s) {
49 if (mantissa >= one)
50 return;
51
52 unsigned normalizationShift = evaluateNormalizationShift(mantissa);
53 mantissa = mantissa << normalizationShift;
54 exponent -= normalizationShift;
55 }
56
NormalFloatNormalFloat57 explicit NormalFloat(T x) { initFromBits(FPBits<T>(x)); }
58
NormalFloatNormalFloat59 explicit NormalFloat(FPBits<T> bits) { initFromBits(bits); }
60
61 // Compares this normalized number with another normalized number.
62 // Returns -1 is this number is less than |other|, 0 if this number is equal
63 // to |other|, and 1 if this number is greater than |other|.
cmpNormalFloat64 int cmp(const NormalFloat<T> &other) const {
65 if (sign != other.sign)
66 return sign ? -1 : 1;
67
68 if (exponent > other.exponent) {
69 return sign ? -1 : 1;
70 } else if (exponent == other.exponent) {
71 if (mantissa > other.mantissa)
72 return sign ? -1 : 1;
73 else if (mantissa == other.mantissa)
74 return 0;
75 else
76 return sign ? 1 : -1;
77 } else {
78 return sign ? 1 : -1;
79 }
80 }
81
82 // Returns a new normalized floating point number which is equal in value
83 // to this number multiplied by 2^e. That is:
84 // new = this * 2^e
mul2NormalFloat85 NormalFloat<T> mul2(int e) const {
86 NormalFloat<T> result = *this;
87 result.exponent += e;
88 return result;
89 }
90
TNormalFloat91 operator T() const {
92 int biasedExponent = exponent + FPBits<T>::exponentBias;
93 // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
94 constexpr int maxExponentValue = (1 << ExponentWidth<T>::value) - 2;
95 if (biasedExponent > maxExponentValue) {
96 return sign ? FPBits<T>::negInf() : FPBits<T>::inf();
97 }
98
99 FPBits<T> result(T(0.0));
100 result.sign = sign;
101
102 constexpr int subnormalExponent = -FPBits<T>::exponentBias + 1;
103 if (exponent < subnormalExponent) {
104 unsigned shift = subnormalExponent - exponent;
105 // Since exponent > subnormalExponent, shift is strictly greater than
106 // zero.
107 if (shift <= MantissaWidth<T>::value + 1) {
108 // Generate a subnormal number. Might lead to loss of precision.
109 // We round to nearest and round halfway cases to even.
110 const UIntType shiftOutMask = (UIntType(1) << shift) - 1;
111 const UIntType shiftOutValue = mantissa & shiftOutMask;
112 const UIntType halfwayValue = UIntType(1) << (shift - 1);
113 result.exponent = 0;
114 result.mantissa = mantissa >> shift;
115 UIntType newMantissa = result.mantissa;
116 if (shiftOutValue > halfwayValue) {
117 newMantissa += 1;
118 } else if (shiftOutValue == halfwayValue) {
119 // Round to even.
120 if (result.mantissa & 0x1)
121 newMantissa += 1;
122 }
123 result.mantissa = newMantissa;
124 // Adding 1 to mantissa can lead to overflow. This can only happen if
125 // mantissa was all ones (0b111..11). For such a case, we will carry
126 // the overflow into the exponent.
127 if (newMantissa == one)
128 result.exponent = 1;
129 return result;
130 } else {
131 return result;
132 }
133 }
134
135 result.exponent = exponent + FPBits<T>::exponentBias;
136 result.mantissa = mantissa;
137 return result;
138 }
139
140 private:
initFromBitsNormalFloat141 void initFromBits(FPBits<T> bits) {
142 sign = bits.sign;
143
144 if (bits.isInfOrNaN() || bits.isZero()) {
145 // Ignore special bit patterns. Implementations deal with them separately
146 // anyway so this should not be a problem.
147 exponent = 0;
148 mantissa = 0;
149 return;
150 }
151
152 // Normalize subnormal numbers.
153 if (bits.exponent == 0) {
154 unsigned shift = evaluateNormalizationShift(bits.mantissa);
155 mantissa = UIntType(bits.mantissa) << shift;
156 exponent = 1 - FPBits<T>::exponentBias - shift;
157 } else {
158 exponent = bits.exponent - FPBits<T>::exponentBias;
159 mantissa = one | bits.mantissa;
160 }
161 }
162
evaluateNormalizationShiftNormalFloat163 unsigned evaluateNormalizationShift(UIntType m) {
164 unsigned shift = 0;
165 for (; (one & m) == 0 && (shift < MantissaWidth<T>::value);
166 m <<= 1, ++shift)
167 ;
168 return shift;
169 }
170 };
171
172 #if defined(__x86_64__) || defined(__i386__)
173 template <>
initFromBits(FPBits<long double> bits)174 inline void NormalFloat<long double>::initFromBits(FPBits<long double> bits) {
175 sign = bits.sign;
176
177 if (bits.isInfOrNaN() || bits.isZero()) {
178 // Ignore special bit patterns. Implementations deal with them separately
179 // anyway so this should not be a problem.
180 exponent = 0;
181 mantissa = 0;
182 return;
183 }
184
185 if (bits.exponent == 0) {
186 if (bits.implicitBit == 0) {
187 // Since we ignore zero value, the mantissa in this case is non-zero.
188 int normalizationShift = evaluateNormalizationShift(bits.mantissa);
189 exponent = -16382 - normalizationShift;
190 mantissa = (bits.mantissa << normalizationShift);
191 } else {
192 exponent = -16382;
193 mantissa = one | bits.mantissa;
194 }
195 } else {
196 if (bits.implicitBit == 0) {
197 // Invalid number so just store 0 similar to a NaN.
198 exponent = 0;
199 mantissa = 0;
200 } else {
201 exponent = bits.exponent - 16383;
202 mantissa = one | bits.mantissa;
203 }
204 }
205 }
206
207 template <> inline NormalFloat<long double>::operator long double() const {
208 int biasedExponent = exponent + FPBits<long double>::exponentBias;
209 // Max exponent is of the form 0xFF...E. That is why -2 and not -1.
210 constexpr int maxExponentValue = (1 << ExponentWidth<long double>::value) - 2;
211 if (biasedExponent > maxExponentValue) {
212 return sign ? FPBits<long double>::negInf() : FPBits<long double>::inf();
213 }
214
215 FPBits<long double> result(0.0l);
216 result.sign = sign;
217
218 constexpr int subnormalExponent = -FPBits<long double>::exponentBias + 1;
219 if (exponent < subnormalExponent) {
220 unsigned shift = subnormalExponent - exponent;
221 if (shift <= MantissaWidth<long double>::value + 1) {
222 // Generate a subnormal number. Might lead to loss of precision.
223 // We round to nearest and round halfway cases to even.
224 const UIntType shiftOutMask = (UIntType(1) << shift) - 1;
225 const UIntType shiftOutValue = mantissa & shiftOutMask;
226 const UIntType halfwayValue = UIntType(1) << (shift - 1);
227 result.exponent = 0;
228 result.mantissa = mantissa >> shift;
229 UIntType newMantissa = result.mantissa;
230 if (shiftOutValue > halfwayValue) {
231 newMantissa += 1;
232 } else if (shiftOutValue == halfwayValue) {
233 // Round to even.
234 if (result.mantissa & 0x1)
235 newMantissa += 1;
236 }
237 result.mantissa = newMantissa;
238 // Adding 1 to mantissa can lead to overflow. This can only happen if
239 // mantissa was all ones (0b111..11). For such a case, we will carry
240 // the overflow into the exponent and set the implicit bit to 1.
241 if (newMantissa == one) {
242 result.exponent = 1;
243 result.implicitBit = 1;
244 } else {
245 result.implicitBit = 0;
246 }
247 return result;
248 } else {
249 return result;
250 }
251 }
252
253 result.exponent = biasedExponent;
254 result.mantissa = mantissa;
255 result.implicitBit = 1;
256 return result;
257 }
258 #endif
259
260 } // namespace fputil
261 } // namespace __llvm_libc
262
263 #endif // LLVM_LIBC_UTILS_FPUTIL_NORMAL_FLOAT_H
264