1 #pragma once
2 // nonconstexpr754.hpp: manipulation functions for IEEE-754 native types
3 //
4 // Copyright (C) 2017-2021 Stillwater Supercomputing, Inc.
5 //
6 // This file is part of the universal numbers project, which is released under an MIT Open Source license.
7 #include <sstream>
8 #include <iomanip>
9 #include <cmath>    // for frexpf/frexp/frexpl  float/double/long double fraction/exponent extraction
10 #include <limits>
11 #include <tuple>
12 
13 #include <universal/utility/color_print.hpp>
14 
15 namespace sw::universal {
16 
17 
18 ////////////////////////////////////////////////////////////////////////
19 // numerical helpers
20 
21 union float_decoder {
float_decoder()22   float_decoder() : f{0.0f} {}
float_decoder(float _f)23   float_decoder(float _f) : f{_f} {}
24   float f;
25   struct {
26     uint32_t fraction : 23;
27     uint32_t exponent :  8;
28     uint32_t sign     :  1;
29   } parts;
30 };
31 
32 union double_decoder {
double_decoder()33   double_decoder() : d{0.0} {}
double_decoder(double _d)34   double_decoder(double _d) : d{_d} {}
35   double d;
36   struct {
37     uint64_t fraction : 52;
38     uint64_t exponent : 11;
39     uint64_t sign     :  1;
40   } parts;
41 };
42 
extractFields(float value,bool & s,uint64_t & rawExponentBits,uint64_t & rawFractionBits)43 inline void extractFields(float value, bool& s, uint64_t& rawExponentBits, uint64_t& rawFractionBits) {
44 	float_decoder decoder;
45 	decoder.f = value;
46 	s = decoder.parts.sign ? true : false;
47 	rawExponentBits = static_cast<uint64_t>(decoder.parts.exponent);
48 	rawFractionBits = static_cast<uint64_t>(decoder.parts.fraction);
49 }
50 
extractFields(double value,bool & s,uint64_t & rawExponentBits,uint64_t & rawFractionBits)51 inline void extractFields(double value, bool& s, uint64_t& rawExponentBits, uint64_t& rawFractionBits) {
52 	double_decoder decoder;
53 	decoder.d = value;
54 	s = decoder.parts.sign ? true : false;
55 	rawExponentBits = decoder.parts.exponent;
56 	rawFractionBits = decoder.parts.fraction;
57 }
58 
59 ////////////////// string operators
60 
61 /////////////////////////////////////////////////////////////////////////////////////////////////////////
62 // native single precision IEEE floating point
63 
64 // generate a binary string for a native single precision IEEE floating point
to_hex(float number)65 inline std::string to_hex(float number) {
66 	std::stringstream s;
67 	float_decoder decoder;
68 	decoder.f = number;
69 	s << (decoder.parts.sign ? '1' : '0') << '.' << std::hex << int(decoder.parts.exponent) << '.' << decoder.parts.fraction;
70 	return s.str();
71 }
72 
73 // generate a binary string for a native single precision IEEE floating point
to_binary(float number,bool bNibbleMarker=false)74 inline std::string to_binary(float number, bool bNibbleMarker = false) {
75 	std::stringstream s;
76 	float_decoder decoder;
77 	decoder.f = number;
78 
79 	s << "0b";
80 	// print sign bit
81 	s << (decoder.parts.sign ? '1' : '0') << '.';
82 
83 	// print exponent bits
84 	{
85 		uint8_t mask = 0x80;
86 		for (int i = 7; i >= 0; --i) {
87 			s << ((decoder.parts.exponent & mask) ? '1' : '0');
88 			if (bNibbleMarker && i != 0 && (i % 4) == 0) s << '\'';
89 			mask >>= 1;
90 		}
91 	}
92 
93 	s << '.';
94 
95 	// print fraction bits
96 	uint32_t mask = (uint32_t(1) << 22);
97 	for (int i = 22; i >= 0; --i) {
98 		s << ((decoder.parts.fraction & mask) ? '1' : '0');
99 		if (bNibbleMarker && i != 0 && (i % 4) == 0) s << '\'';
100 		mask >>= 1;
101 	}
102 
103 	return s.str();
104 }
105 
106 // return in triple form (sign, scale, fraction)
to_triple(float number,bool nibbleMarker=false)107 inline std::string to_triple(float number, bool nibbleMarker = false) {
108 	std::stringstream s;
109 
110 	float_decoder decoder;
111 	decoder.f = number;
112 
113 	// print sign bit
114 	s << '(' << (decoder.parts.sign ? '-' : '+') << ',';
115 
116 	// exponent
117 	// the exponent value used in the arithmetic is the exponent shifted by a bias
118 	// for the IEEE 754 binary32 case, an exponent value of 127 represents the actual zero
119 	// (i.e. for 2^(e - 127) to be one, e must be 127).
120 	// Exponents range from ¿126 to +127 because exponents of ¿127 (all 0s) and +128 (all 1s) are reserved for special numbers.
121 	if (decoder.parts.exponent == 0) {
122 		s << "exp=0,";
123 	}
124 	else if (decoder.parts.exponent == 0xFF) {
125 		s << "exp=1, ";
126 	}
127 	int scale = int(decoder.parts.exponent) - 127;
128 	s << scale << ',';
129 
130 	// print fraction bits
131 	uint32_t mask = (uint32_t(1) << 22);
132 	for (int i = 22; i >= 0; --i) {
133 		s << ((decoder.parts.fraction & mask) ? '1' : '0');
134 		if (nibbleMarker && i != 0 && (i % 4) == 0) s << '\'';
135 		mask >>= 1;
136 	}
137 
138 	s << ')';
139 	return s.str();
140 }
141 
142 // specialization for IEEE single precision floats
to_base2_scientific(float number)143 inline std::string to_base2_scientific(float number) {
144 	std::stringstream s;
145 	float_decoder decoder;
146 	decoder.f = number;
147 	s << (decoder.parts.sign == 1 ? "-" : "+") << "1.";
148 	uint32_t mask = (uint32_t(1) << 22);
149 	for (int i = 22; i >= 0; --i) {
150 		s << ((decoder.parts.fraction & mask) ? '1' : '0');
151 		mask >>= 1;
152 	}
153 	s << "e" << std::showpos << (static_cast<int>(decoder.parts.exponent) - 127);
154 	return s.str();
155 }
156 
157 // generate a color coded binary string for a native single precision IEEE floating point
color_print(float number)158 inline std::string color_print(float number) {
159 	std::stringstream s;
160 	float_decoder decoder;
161 	decoder.f = number;
162 
163 	Color red(ColorCode::FG_RED);
164 	Color yellow(ColorCode::FG_YELLOW);
165 	Color blue(ColorCode::FG_BLUE);
166 	Color magenta(ColorCode::FG_MAGENTA);
167 	Color cyan(ColorCode::FG_CYAN);
168 	Color white(ColorCode::FG_WHITE);
169 	Color def(ColorCode::FG_DEFAULT);
170 
171 	// print prefix
172 	s << yellow << "0b";
173 
174 	// print sign bit
175 	s << red << (decoder.parts.sign ? '1' : '0') << '.';
176 
177 	// print exponent bits
178 	{
179 		uint8_t mask = 0x80;
180 		for (int i = 7; i >= 0; --i) {
181 			s << cyan << ((decoder.parts.exponent & mask) ? '1' : '0');
182 			if (i > 0 && i % 4 == 0) s << cyan << '\'';
183 			mask >>= 1;
184 		}
185 	}
186 
187 	s << '.';
188 
189 	// print fraction bits
190 	uint32_t mask = (uint32_t(1) << 22);
191 	for (int i = 22; i >= 0; --i) {
192 		s << magenta << ((decoder.parts.fraction & mask) ? '1' : '0');
193 		if (i > 0 && i % 4 == 0) s << magenta << '\'';
194 		mask >>= 1;
195 	}
196 
197 	s << def;
198 	return s.str();
199 }
200 
201 // generate a color coded binary string for a native double precision IEEE floating point
color_print(double number)202 inline std::string color_print(double number) {
203 	std::stringstream s;
204 	double_decoder decoder;
205 	decoder.d = number;
206 
207 	Color red(ColorCode::FG_RED);
208 	Color yellow(ColorCode::FG_YELLOW);
209 	Color blue(ColorCode::FG_BLUE);
210 	Color magenta(ColorCode::FG_MAGENTA);
211 	Color cyan(ColorCode::FG_CYAN);
212 	Color white(ColorCode::FG_WHITE);
213 	Color def(ColorCode::FG_DEFAULT);
214 
215 	// print prefix
216 	s << yellow << "0b";
217 
218 	// print sign bit
219 	s << red << (decoder.parts.sign ? '1' : '0') << '.';
220 
221 	// print exponent bits
222 	{
223 		uint64_t mask = 0x400;
224 		for (int i = 10; i >= 0; --i) {
225 			s << cyan << ((decoder.parts.exponent & mask) ? '1' : '0');
226 			if (i > 0 && i % 4 == 0) s << cyan << '\'';
227 			mask >>= 1;
228 		}
229 	}
230 
231 	s << '.';
232 
233 	// print fraction bits
234 	uint64_t mask = (uint64_t(1) << 51);
235 	for (int i = 51; i >= 0; --i) {
236 		s << magenta << ((decoder.parts.fraction & mask) ? '1' : '0');
237 		if (i > 0 && i % 4 == 0) s << magenta << '\'';
238 		mask >>= 1;
239 	}
240 
241 	s << def;
242 	return s.str();
243 }
244 
245 /////////////////////////////////////////////////////////////////////////////////////////////////////////
246 // native double precision IEEE floating point
247 
248 // generate a binary string for a native double precision IEEE floating point
to_hex(double number)249 inline std::string to_hex(double number) {
250 	std::stringstream s;
251 	double_decoder decoder;
252 	decoder.d = number;
253 	s << (decoder.parts.sign ? '1' : '0') << '.' << std::hex << int(decoder.parts.exponent) << '.' << decoder.parts.fraction;
254 	return s.str();
255 }
256 
257 // generate a binary string for a native double precision IEEE floating point
to_binary(double number,bool bNibbleMarker=false)258 inline std::string to_binary(double number, bool bNibbleMarker = false) {
259 	std::stringstream s;
260 	double_decoder decoder;
261 	decoder.d = number;
262 
263 	s << "0b";
264 	// print sign bit
265 	s << (decoder.parts.sign ? '1' : '0') << '.';
266 
267 	// print exponent bits
268 	{
269 		uint64_t mask = 0x400;
270 		for (int i = 10; i >= 0; --i) {
271 			s << ((decoder.parts.exponent & mask) ? '1' : '0');
272 			if (bNibbleMarker && i != 0 && (i % 4) == 0) s << '\'';
273 			mask >>= 1;
274 		}
275 	}
276 
277 	s << '.';
278 
279 	// print fraction bits
280 	uint64_t mask = (uint64_t(1) << 51);
281 	for (int i = 51; i >= 0; --i) {
282 		s << ((decoder.parts.fraction & mask) ? '1' : '0');
283 		if (bNibbleMarker && i != 0 && (i % 4) == 0) s << '\'';
284 		mask >>= 1;
285 	}
286 
287 	return s.str();
288 }
289 
290 // return in triple form (+, scale, fraction)
to_triple(double number)291 inline std::string to_triple(double number) {
292 	std::stringstream s;
293 	double_decoder decoder;
294 	decoder.d = number;
295 
296 	// print sign bit
297 	s << '(' << (decoder.parts.sign ? '-' : '+') << ',';
298 
299 	// exponent
300 	// the exponent value used in the arithmetic is the exponent shifted by a bias
301 	// for the IEEE 754 binary32 case, an exponent value of 127 represents the actual zero
302 	// (i.e. for 2^(e - 127) to be one, e must be 127).
303 	// Exponents range from -126 to +127 because exponents of -127 (all 0s) and +128 (all 1s) are reserved for special numbers.
304 	if (decoder.parts.exponent == 0) {
305 		s << "exp=0,";
306 	}
307 	else if (decoder.parts.exponent == 0xFF) {
308 		s << "exp=1, ";
309 	}
310 	int scale = int(decoder.parts.exponent) - 1023;
311 	s << scale << ',';
312 
313 	// print fraction bits
314 	uint64_t mask = (uint64_t(1) << 51);
315 	for (int i = 51; i >= 0; --i) {
316 		s << ((decoder.parts.fraction & mask) ? '1' : '0');
317 		mask >>= 1;
318 	}
319 
320 	s << ')';
321 	return s.str();
322 }
323 
324 // specialization for IEEE double precision floats
to_base2_scientific(double number)325 inline std::string to_base2_scientific(double number) {
326 	std::stringstream s;
327 	double_decoder decoder;
328 	decoder.d = number;
329 	s << (decoder.parts.sign == 1 ? "-" : "+") << "1.";
330 	uint64_t mask = (uint64_t(1) << 51);
331 	for (int i = 51; i >= 0; --i) {
332 		s << ((decoder.parts.fraction & mask) ? '1' : '0');
333 		mask >>= 1;
334 	}
335 	s << "e" << std::showpos << (static_cast<int>(decoder.parts.exponent) - 1023);
336 	return s.str();
337 }
338 
339 /// Returns a tuple of sign, exponent, and fraction.
ieee_components(float fp)340 inline std::tuple<bool, int32_t, uint32_t> ieee_components(float fp)
341 {
342 	static_assert(std::numeric_limits<float>::is_iec559,
343 		"This function only works when float complies with IEC 559 (IEEE 754)");
344 	static_assert(sizeof(float) == 4, "This function only works when float is 32 bit.");
345 
346 	float_decoder fd{ fp }; // initializes the first member of the union
347 	// Reading inactive union parts is forbidden in constexpr :-(
348 	return std::make_tuple<bool, int32_t, uint32_t>(
349 		static_cast<bool>(fd.parts.sign),
350 		static_cast<int32_t>(fd.parts.exponent),
351 		static_cast<uint32_t>(fd.parts.fraction)
352 	);
353 
354 #if 0 // reinterpret_cast forbidden in constexpr :-(
355 	uint32_t& as_int = reinterpret_cast<uint32_t&>(fp);
356 	uint32_t exp = static_cast<int32_t>(as_int >> 23);
357 	if (exp & 0x80)
358 		exp |= 0xffffff00l; // turn on leading bits for negativ exponent
359 	return { fp < 0.0, exp, as_int & uint32_t{0x007FFFFFul} };
360 #endif
361 }
362 
363 /// Returns a tuple of sign, exponent, and fraction.
ieee_components(double fp)364 inline std::tuple<bool, int64_t, uint64_t> ieee_components(double fp)
365 {
366 	static_assert(std::numeric_limits<double>::is_iec559,
367 		"This function only works when double complies with IEC 559 (IEEE 754)");
368 	static_assert(sizeof(double) == 8, "This function only works when double is 64 bit.");
369 
370 	double_decoder dd{ fp }; // initializes the first member of the union
371 	// Reading inactive union parts is forbidden in constexpr :-(
372 	return std::make_tuple<bool, int64_t, uint64_t>(
373 		static_cast<bool>(dd.parts.sign),
374 		static_cast<int64_t>(dd.parts.exponent),
375 		static_cast<uint64_t>(dd.parts.fraction)
376 	);
377 }
378 
379 } // namespace sw::universal
380 
381 /////////////////////////////////////////////////////////////////////////////////////////////////////////
382 // compiler specific long double IEEE floating point
383 
384 /*
385 Long double is not consistently implemented across different compilers.
386 The following section organizes the implementation details of each
387 of the compilers supported.
388 
389 The x86 extended precision format is an 80-bit format first
390 implemented in the Intel 8087 math coprocessor and is supported
391 by all processors that are based on the x86 design that incorporate
392 a floating-point unit(FPU).This 80 - bit format uses one bit for
393 the sign of the significand, 15 bits for the exponent field
394 (i.e. the same range as the 128 - bit quadruple precision IEEE 754 format)
395 and 64 bits for the significand. The exponent field is biased by 16383,
396 meaning that 16383 has to be subtracted from the value in the
397 exponent field to compute the actual power of 2.
398 An exponent field value of 32767 (all fifteen bits 1) is reserved
399 so as to enable the representation of special states such as
400 infinity and Not a Number.If the exponent field is zero, the
401 value is a denormal number and the exponent of 2 is 16382.
402 */
403 #include <universal/native/nonconstexpr/extract_fp_components.hpp>
404 #include <universal/native/nonconstexpr/msvc_long_double.hpp>
405 #include <universal/native/nonconstexpr/clang_long_double.hpp>
406 #include <universal/native/nonconstexpr/gcc_long_double.hpp>
407 #include <universal/native/nonconstexpr/riscv_long_double.hpp>
408 /*
409   the support for these compilers is not up to date
410 #include <universal/native/nonconstexpr/intelicc_long_double.hpp>
411 #include <universal/native/nonconstexpr/ibmxlc_long_double.hpp>
412 #include <universal/native/nonconstexpr/hpcc_long_double.hpp>
413 #include <universal/native/nonconstexpr/pgi_long_double.hpp>
414 #include <universal/native/nonconstexpr/sunpro_long_double.hpp>
415 */
416