1 #pragma once
2 // nonconstexpr754.hpp: manipulation functions for IEEE-754 native types
3 //
4 // Copyright (C) 2017-2021 Stillwater Supercomputing, Inc.
5 //
6 // This file is part of the universal numbers project, which is released under an MIT Open Source license.
7 #include <sstream>
8 #include <iomanip>
9 #include <cmath> // for frexpf/frexp/frexpl float/double/long double fraction/exponent extraction
10 #include <limits>
11 #include <tuple>
12
13 #include <universal/utility/color_print.hpp>
14
15 namespace sw::universal {
16
17
18 ////////////////////////////////////////////////////////////////////////
19 // numerical helpers
20
21 union float_decoder {
float_decoder()22 float_decoder() : f{0.0f} {}
float_decoder(float _f)23 float_decoder(float _f) : f{_f} {}
24 float f;
25 struct {
26 uint32_t fraction : 23;
27 uint32_t exponent : 8;
28 uint32_t sign : 1;
29 } parts;
30 };
31
32 union double_decoder {
double_decoder()33 double_decoder() : d{0.0} {}
double_decoder(double _d)34 double_decoder(double _d) : d{_d} {}
35 double d;
36 struct {
37 uint64_t fraction : 52;
38 uint64_t exponent : 11;
39 uint64_t sign : 1;
40 } parts;
41 };
42
extractFields(float value,bool & s,uint64_t & rawExponentBits,uint64_t & rawFractionBits)43 inline void extractFields(float value, bool& s, uint64_t& rawExponentBits, uint64_t& rawFractionBits) {
44 float_decoder decoder;
45 decoder.f = value;
46 s = decoder.parts.sign ? true : false;
47 rawExponentBits = static_cast<uint64_t>(decoder.parts.exponent);
48 rawFractionBits = static_cast<uint64_t>(decoder.parts.fraction);
49 }
50
extractFields(double value,bool & s,uint64_t & rawExponentBits,uint64_t & rawFractionBits)51 inline void extractFields(double value, bool& s, uint64_t& rawExponentBits, uint64_t& rawFractionBits) {
52 double_decoder decoder;
53 decoder.d = value;
54 s = decoder.parts.sign ? true : false;
55 rawExponentBits = decoder.parts.exponent;
56 rawFractionBits = decoder.parts.fraction;
57 }
58
59 ////////////////// string operators
60
61 /////////////////////////////////////////////////////////////////////////////////////////////////////////
62 // native single precision IEEE floating point
63
64 // generate a binary string for a native single precision IEEE floating point
to_hex(float number)65 inline std::string to_hex(float number) {
66 std::stringstream s;
67 float_decoder decoder;
68 decoder.f = number;
69 s << (decoder.parts.sign ? '1' : '0') << '.' << std::hex << int(decoder.parts.exponent) << '.' << decoder.parts.fraction;
70 return s.str();
71 }
72
73 // generate a binary string for a native single precision IEEE floating point
to_binary(float number,bool bNibbleMarker=false)74 inline std::string to_binary(float number, bool bNibbleMarker = false) {
75 std::stringstream s;
76 float_decoder decoder;
77 decoder.f = number;
78
79 s << "0b";
80 // print sign bit
81 s << (decoder.parts.sign ? '1' : '0') << '.';
82
83 // print exponent bits
84 {
85 uint8_t mask = 0x80;
86 for (int i = 7; i >= 0; --i) {
87 s << ((decoder.parts.exponent & mask) ? '1' : '0');
88 if (bNibbleMarker && i != 0 && (i % 4) == 0) s << '\'';
89 mask >>= 1;
90 }
91 }
92
93 s << '.';
94
95 // print fraction bits
96 uint32_t mask = (uint32_t(1) << 22);
97 for (int i = 22; i >= 0; --i) {
98 s << ((decoder.parts.fraction & mask) ? '1' : '0');
99 if (bNibbleMarker && i != 0 && (i % 4) == 0) s << '\'';
100 mask >>= 1;
101 }
102
103 return s.str();
104 }
105
106 // return in triple form (sign, scale, fraction)
to_triple(float number,bool nibbleMarker=false)107 inline std::string to_triple(float number, bool nibbleMarker = false) {
108 std::stringstream s;
109
110 float_decoder decoder;
111 decoder.f = number;
112
113 // print sign bit
114 s << '(' << (decoder.parts.sign ? '-' : '+') << ',';
115
116 // exponent
117 // the exponent value used in the arithmetic is the exponent shifted by a bias
118 // for the IEEE 754 binary32 case, an exponent value of 127 represents the actual zero
119 // (i.e. for 2^(e - 127) to be one, e must be 127).
120 // Exponents range from ¿126 to +127 because exponents of ¿127 (all 0s) and +128 (all 1s) are reserved for special numbers.
121 if (decoder.parts.exponent == 0) {
122 s << "exp=0,";
123 }
124 else if (decoder.parts.exponent == 0xFF) {
125 s << "exp=1, ";
126 }
127 int scale = int(decoder.parts.exponent) - 127;
128 s << scale << ',';
129
130 // print fraction bits
131 uint32_t mask = (uint32_t(1) << 22);
132 for (int i = 22; i >= 0; --i) {
133 s << ((decoder.parts.fraction & mask) ? '1' : '0');
134 if (nibbleMarker && i != 0 && (i % 4) == 0) s << '\'';
135 mask >>= 1;
136 }
137
138 s << ')';
139 return s.str();
140 }
141
142 // specialization for IEEE single precision floats
to_base2_scientific(float number)143 inline std::string to_base2_scientific(float number) {
144 std::stringstream s;
145 float_decoder decoder;
146 decoder.f = number;
147 s << (decoder.parts.sign == 1 ? "-" : "+") << "1.";
148 uint32_t mask = (uint32_t(1) << 22);
149 for (int i = 22; i >= 0; --i) {
150 s << ((decoder.parts.fraction & mask) ? '1' : '0');
151 mask >>= 1;
152 }
153 s << "e" << std::showpos << (static_cast<int>(decoder.parts.exponent) - 127);
154 return s.str();
155 }
156
157 // generate a color coded binary string for a native single precision IEEE floating point
color_print(float number)158 inline std::string color_print(float number) {
159 std::stringstream s;
160 float_decoder decoder;
161 decoder.f = number;
162
163 Color red(ColorCode::FG_RED);
164 Color yellow(ColorCode::FG_YELLOW);
165 Color blue(ColorCode::FG_BLUE);
166 Color magenta(ColorCode::FG_MAGENTA);
167 Color cyan(ColorCode::FG_CYAN);
168 Color white(ColorCode::FG_WHITE);
169 Color def(ColorCode::FG_DEFAULT);
170
171 // print prefix
172 s << yellow << "0b";
173
174 // print sign bit
175 s << red << (decoder.parts.sign ? '1' : '0') << '.';
176
177 // print exponent bits
178 {
179 uint8_t mask = 0x80;
180 for (int i = 7; i >= 0; --i) {
181 s << cyan << ((decoder.parts.exponent & mask) ? '1' : '0');
182 if (i > 0 && i % 4 == 0) s << cyan << '\'';
183 mask >>= 1;
184 }
185 }
186
187 s << '.';
188
189 // print fraction bits
190 uint32_t mask = (uint32_t(1) << 22);
191 for (int i = 22; i >= 0; --i) {
192 s << magenta << ((decoder.parts.fraction & mask) ? '1' : '0');
193 if (i > 0 && i % 4 == 0) s << magenta << '\'';
194 mask >>= 1;
195 }
196
197 s << def;
198 return s.str();
199 }
200
201 // generate a color coded binary string for a native double precision IEEE floating point
color_print(double number)202 inline std::string color_print(double number) {
203 std::stringstream s;
204 double_decoder decoder;
205 decoder.d = number;
206
207 Color red(ColorCode::FG_RED);
208 Color yellow(ColorCode::FG_YELLOW);
209 Color blue(ColorCode::FG_BLUE);
210 Color magenta(ColorCode::FG_MAGENTA);
211 Color cyan(ColorCode::FG_CYAN);
212 Color white(ColorCode::FG_WHITE);
213 Color def(ColorCode::FG_DEFAULT);
214
215 // print prefix
216 s << yellow << "0b";
217
218 // print sign bit
219 s << red << (decoder.parts.sign ? '1' : '0') << '.';
220
221 // print exponent bits
222 {
223 uint64_t mask = 0x400;
224 for (int i = 10; i >= 0; --i) {
225 s << cyan << ((decoder.parts.exponent & mask) ? '1' : '0');
226 if (i > 0 && i % 4 == 0) s << cyan << '\'';
227 mask >>= 1;
228 }
229 }
230
231 s << '.';
232
233 // print fraction bits
234 uint64_t mask = (uint64_t(1) << 51);
235 for (int i = 51; i >= 0; --i) {
236 s << magenta << ((decoder.parts.fraction & mask) ? '1' : '0');
237 if (i > 0 && i % 4 == 0) s << magenta << '\'';
238 mask >>= 1;
239 }
240
241 s << def;
242 return s.str();
243 }
244
245 /////////////////////////////////////////////////////////////////////////////////////////////////////////
246 // native double precision IEEE floating point
247
248 // generate a binary string for a native double precision IEEE floating point
to_hex(double number)249 inline std::string to_hex(double number) {
250 std::stringstream s;
251 double_decoder decoder;
252 decoder.d = number;
253 s << (decoder.parts.sign ? '1' : '0') << '.' << std::hex << int(decoder.parts.exponent) << '.' << decoder.parts.fraction;
254 return s.str();
255 }
256
257 // generate a binary string for a native double precision IEEE floating point
to_binary(double number,bool bNibbleMarker=false)258 inline std::string to_binary(double number, bool bNibbleMarker = false) {
259 std::stringstream s;
260 double_decoder decoder;
261 decoder.d = number;
262
263 s << "0b";
264 // print sign bit
265 s << (decoder.parts.sign ? '1' : '0') << '.';
266
267 // print exponent bits
268 {
269 uint64_t mask = 0x400;
270 for (int i = 10; i >= 0; --i) {
271 s << ((decoder.parts.exponent & mask) ? '1' : '0');
272 if (bNibbleMarker && i != 0 && (i % 4) == 0) s << '\'';
273 mask >>= 1;
274 }
275 }
276
277 s << '.';
278
279 // print fraction bits
280 uint64_t mask = (uint64_t(1) << 51);
281 for (int i = 51; i >= 0; --i) {
282 s << ((decoder.parts.fraction & mask) ? '1' : '0');
283 if (bNibbleMarker && i != 0 && (i % 4) == 0) s << '\'';
284 mask >>= 1;
285 }
286
287 return s.str();
288 }
289
290 // return in triple form (+, scale, fraction)
to_triple(double number)291 inline std::string to_triple(double number) {
292 std::stringstream s;
293 double_decoder decoder;
294 decoder.d = number;
295
296 // print sign bit
297 s << '(' << (decoder.parts.sign ? '-' : '+') << ',';
298
299 // exponent
300 // the exponent value used in the arithmetic is the exponent shifted by a bias
301 // for the IEEE 754 binary32 case, an exponent value of 127 represents the actual zero
302 // (i.e. for 2^(e - 127) to be one, e must be 127).
303 // Exponents range from -126 to +127 because exponents of -127 (all 0s) and +128 (all 1s) are reserved for special numbers.
304 if (decoder.parts.exponent == 0) {
305 s << "exp=0,";
306 }
307 else if (decoder.parts.exponent == 0xFF) {
308 s << "exp=1, ";
309 }
310 int scale = int(decoder.parts.exponent) - 1023;
311 s << scale << ',';
312
313 // print fraction bits
314 uint64_t mask = (uint64_t(1) << 51);
315 for (int i = 51; i >= 0; --i) {
316 s << ((decoder.parts.fraction & mask) ? '1' : '0');
317 mask >>= 1;
318 }
319
320 s << ')';
321 return s.str();
322 }
323
324 // specialization for IEEE double precision floats
to_base2_scientific(double number)325 inline std::string to_base2_scientific(double number) {
326 std::stringstream s;
327 double_decoder decoder;
328 decoder.d = number;
329 s << (decoder.parts.sign == 1 ? "-" : "+") << "1.";
330 uint64_t mask = (uint64_t(1) << 51);
331 for (int i = 51; i >= 0; --i) {
332 s << ((decoder.parts.fraction & mask) ? '1' : '0');
333 mask >>= 1;
334 }
335 s << "e" << std::showpos << (static_cast<int>(decoder.parts.exponent) - 1023);
336 return s.str();
337 }
338
339 /// Returns a tuple of sign, exponent, and fraction.
ieee_components(float fp)340 inline std::tuple<bool, int32_t, uint32_t> ieee_components(float fp)
341 {
342 static_assert(std::numeric_limits<float>::is_iec559,
343 "This function only works when float complies with IEC 559 (IEEE 754)");
344 static_assert(sizeof(float) == 4, "This function only works when float is 32 bit.");
345
346 float_decoder fd{ fp }; // initializes the first member of the union
347 // Reading inactive union parts is forbidden in constexpr :-(
348 return std::make_tuple<bool, int32_t, uint32_t>(
349 static_cast<bool>(fd.parts.sign),
350 static_cast<int32_t>(fd.parts.exponent),
351 static_cast<uint32_t>(fd.parts.fraction)
352 );
353
354 #if 0 // reinterpret_cast forbidden in constexpr :-(
355 uint32_t& as_int = reinterpret_cast<uint32_t&>(fp);
356 uint32_t exp = static_cast<int32_t>(as_int >> 23);
357 if (exp & 0x80)
358 exp |= 0xffffff00l; // turn on leading bits for negativ exponent
359 return { fp < 0.0, exp, as_int & uint32_t{0x007FFFFFul} };
360 #endif
361 }
362
363 /// Returns a tuple of sign, exponent, and fraction.
ieee_components(double fp)364 inline std::tuple<bool, int64_t, uint64_t> ieee_components(double fp)
365 {
366 static_assert(std::numeric_limits<double>::is_iec559,
367 "This function only works when double complies with IEC 559 (IEEE 754)");
368 static_assert(sizeof(double) == 8, "This function only works when double is 64 bit.");
369
370 double_decoder dd{ fp }; // initializes the first member of the union
371 // Reading inactive union parts is forbidden in constexpr :-(
372 return std::make_tuple<bool, int64_t, uint64_t>(
373 static_cast<bool>(dd.parts.sign),
374 static_cast<int64_t>(dd.parts.exponent),
375 static_cast<uint64_t>(dd.parts.fraction)
376 );
377 }
378
379 } // namespace sw::universal
380
381 /////////////////////////////////////////////////////////////////////////////////////////////////////////
382 // compiler specific long double IEEE floating point
383
384 /*
385 Long double is not consistently implemented across different compilers.
386 The following section organizes the implementation details of each
387 of the compilers supported.
388
389 The x86 extended precision format is an 80-bit format first
390 implemented in the Intel 8087 math coprocessor and is supported
391 by all processors that are based on the x86 design that incorporate
392 a floating-point unit(FPU).This 80 - bit format uses one bit for
393 the sign of the significand, 15 bits for the exponent field
394 (i.e. the same range as the 128 - bit quadruple precision IEEE 754 format)
395 and 64 bits for the significand. The exponent field is biased by 16383,
396 meaning that 16383 has to be subtracted from the value in the
397 exponent field to compute the actual power of 2.
398 An exponent field value of 32767 (all fifteen bits 1) is reserved
399 so as to enable the representation of special states such as
400 infinity and Not a Number.If the exponent field is zero, the
401 value is a denormal number and the exponent of 2 is 16382.
402 */
403 #include <universal/native/nonconstexpr/extract_fp_components.hpp>
404 #include <universal/native/nonconstexpr/msvc_long_double.hpp>
405 #include <universal/native/nonconstexpr/clang_long_double.hpp>
406 #include <universal/native/nonconstexpr/gcc_long_double.hpp>
407 #include <universal/native/nonconstexpr/riscv_long_double.hpp>
408 /*
409 the support for these compilers is not up to date
410 #include <universal/native/nonconstexpr/intelicc_long_double.hpp>
411 #include <universal/native/nonconstexpr/ibmxlc_long_double.hpp>
412 #include <universal/native/nonconstexpr/hpcc_long_double.hpp>
413 #include <universal/native/nonconstexpr/pgi_long_double.hpp>
414 #include <universal/native/nonconstexpr/sunpro_long_double.hpp>
415 */
416