1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2017-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #include "Floats.hpp"
10 #include "../strings.hpp"
11
12 #if defined(_MSC_VER)
13 // This only affects defined(_DEBUG) and we could scope this to debug builds
14 // only, but we prefer to keep Debug and Release configs as close as possible,
15 // and formatting/disassembly isn't considered a performance critical path.
16 //
17 // There's a defect in Windows Debug runtime libraries in emitting denorms
18 // when denorm mode is FTZ by the calling library and with the Debug runtime
19 // https://developercommunity.visualstudio.com/content/problem/1187587/printf-assert-failure-unexpected-input-value-log10.html
20 //
21 // To test this place _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);
22 // in before calling into this module (and compiled for Debug on MSVC++).
23 #define IGA_NEEDS_DENORM_WORKAROUND
24 #endif
25
26 // For non-NaN cases this flag enables use of a native AVX instruction
27 // for half float conversion _cvtss_sh.
28 //
29 // #define IGA_USE_FP16C
30
31 #include <cmath>
32 #include <cstdlib>
33 #include <cstdint>
34 #include <iomanip>
35 #include <iostream>
36 #include <limits>
37 #include <sstream>
38 #if defined(IGA_NEEDS_DENORM_WORKAROUND) || defined(IGA_USE_FP16C)
39 #include <immintrin.h>
40 #endif
41
42
43 using namespace iga;
44
45 // Big Theory Statement (BTS): on NaN's in IGA
46 //
47 // NaN is an equivalence class of floating point values. Different
48 // libraries, runtimes, and hardware use different elements of the
49 // class as the characteristic value when they want a NaN value.
50 // For instance, the sign bit can be anything and the mantissa payload
51 // bits can be anything except for the leading mantissa bit (which
52 // distinguishes a qnan from an snan).
53 //
54 // NOTE: we distinguish between 'qnan' and 'snan' instead of just using 'nan'
55 // with the whole payload since we parse as 64b and only narrow once
56 // we see the type; we considered 'nan' '(' HEXLIT ')' and just letting
57 // the user sort the meaning of the mantissa out, but this gets messy
58 // when widening literals during parse or narrowing them during
59 // formatting.
60 //
61 // Without loss of generality (this generalizes to all IEEE sizes),
62 // the 32-bit IEEE 754 (C.f. section 6.2.1 of the spec) we have
63 // s eeeeeeee mmm`mmmm`mmmm`mmmm`mmmm`mmmm
64 // (where the s and the m's can be anything such that at least one m is
65 // non-zero; all 0's would imply infinity)
66 //
67 // SNAN: "snan" (signaling NaN) is:
68 // s 11111111 0xx`xxxx`xxxx`xxxx`xxxx`xxxx
69 // ^ leading bit of mantissa is 0; at least one bit
70 // of the rest of the payload (x's must be non-zero)
71 //
72 // QNAN: "qnan" (quiet NaN) is:
73 // s 11111111 1xx`xxxx`xxxx`xxxx`xxxx`xxxx
74 // ^ leading bit of mantiss is 1 and the rest can be anything
75 // including all zeros since the top mantissa bit is set
76 // (NaN only needs one mantissa bit set and the quiet bit
77 // counts)
78 //
79 // IGA supports the following syntax (from examples)
80 // NaNLit ::= '-'? ('snan'|'qnan') '(' HEXLIT ')'
81 //
82 // Examples:
83 // * "-snan(0xA):f" parses as
84 // s eeeeeeee mmm`mmmm`mmmm`mmmm`mmmm`mmmm
85 // 1 11111111 000`0000`0000`0000`0000`1010
86 // ^ negative ^ snan ^^^^ 0xA payload
87 // sign bit is respected
88 //
89 // * "qnan(0x1B):f" parses as
90 // s eeeeeeee mmm`mmmm`mmmm`mmmm`mmmm`mmmm
91 // 1 11111111 100`0000`0000`0000`0001`1011
92 // ^ positive ^ qnan ^^^^^^ 0x1B
93 //
94 // * "snan(0x0):f" would be a parse error since the mantissa must not be 0
95 // s eeeeeeee mmm`mmmm`mmmm`mmmm`mmmm`mmmm
96 // 1 11111111 000`0000`0000`0000`0000`0000
97 // ^ positive ^ snan^^^^^^^^^^^^^^^^^^^^^^^ at least one bit must be 0
98 //
99 // * "snan(0x00800000):f" illegal: payload value too large for bitfield
100 // s eeeeeeee mmm`mmmm`mmmm`mmmm`mmmm`mmmm
101 // 1 11111111 000`0000`0000`0000`0000`0000
102 // ^ high bit of payload overflows to here
103 //
104 // * "snan(0x00800000):df" payload fits since df has 53 bits
105 // s eeeeeeeeeee m`mmmm`mmmm`mmmm`mmmm`mmmm`mmmm`mmmm`mmmm`mmmm`mmmm`mmmm`mmmm
106 // 1 11111111111 0`0000`0000`0000`0000`0000`0000`1000`0000`0000`0000`0000`0000
107 // ^ 0x800000
108 //
109 // * "snan(0x400000):f" is legal, but just means qnan(0). This is because
110 // the parser parses the value as 64b and then converts it to 32b when it
111 // sees the :f. The payload fits while parsing, but conversion cannot
112 // tell that we were dealing with qnan
113
114 template <typename F> int FloatMantissaBits();
115 template <typename F> int FloatExponentBits();
FloatMantissaBits()116 template <> int FloatMantissaBits<double>() {return 52;}
FloatExponentBits()117 template <> int FloatExponentBits<double>() {return 11;}
FloatMantissaBits()118 template <> int FloatMantissaBits<float>() {return 23;}
FloatExponentBits()119 template <> int FloatExponentBits<float>() {return 8;}
FloatMantissaBits()120 template <> int FloatMantissaBits<uint16_t>() {return 10;}
FloatExponentBits()121 template <> int FloatExponentBits<uint16_t>() {return 5;}
122
123 // E.g. FloatBias<float>() == 127
124 //
FloatBias()125 template <typename F> static int FloatBias() {
126 return (1 << (FloatExponentBits<F>() - 1)) - 1;
127 }
128
129 // FloatBiasDiff<double,float> => 1023 - 127
130 // FloatBiasDiff<float,half> => 127 - 15 (0x70)
FloatBiasDiff()131 template <typename FBIG,typename FSML> static int FloatBiasDiff() {
132 return FloatBias<FBIG>() - FloatBias<FSML>();
133 }
FloatMantissaBitsDiff()134 template <typename FBIG,typename FSML> static int FloatMantissaBitsDiff() {
135 return FloatMantissaBits<FBIG>() - FloatMantissaBits<FSML>();
136 }
137
138 template <typename F, typename I> static
FormatFloatImplNaN(std::ostream & os,I bits)139 void FormatFloatImplNaN(std::ostream &os, I bits)
140 {
141 const int MANT_LEN = FloatMantissaBits<F>();
142 const int EXPN_LEN = FloatExponentBits<F>();
143 const I SIGN_BIT = ((I)1 << (MANT_LEN + EXPN_LEN));
144 if (bits & SIGN_BIT) {
145 os << '-';
146 }
147 // split the payload into the quiet/signaling bit
148 // (high bit of the mantissa) and the lower bits
149 const I QNAN_BIT = (I)1 << (MANT_LEN - 1);
150 if (bits & QNAN_BIT) {
151 os << "qnan";
152 } else {
153 os << "snan";
154 }
155 os << "(";
156 I lowerPayload = bits & (QNAN_BIT - 1); // lower bits of mantissa
157 fmtHex(os, (uint64_t)lowerPayload);
158 os << ")";
159 }
160
161 // Checks if a floating point number will reparse exactly as formatted.
162 //
163 // We parse floats as 64b literals and cast down. So we use strtod for all
164 // floating point types and cast down.
165 template <typename T>
willReparseExactly(T x,std::string str)166 static bool willReparseExactly(T x, std::string str)
167 {
168 // we always parse as a double since the parser will do the same
169 double y = strtod(str.c_str(), nullptr);
170 return ((T)y == x);
171 }
172
173 #ifdef IGA_NEEDS_DENORM_WORKAROUND
174 // C.f. with the #define for this above for notes on this
175 struct ScopedDenormWorkaround {
176 unsigned int oldDenormMode;
ScopedDenormWorkaroundScopedDenormWorkaround177 ScopedDenormWorkaround()
178 : oldDenormMode(_MM_GET_DENORMALS_ZERO_MODE())
179 {
180 _MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_OFF);
181 }
~ScopedDenormWorkaroundScopedDenormWorkaround182 ~ScopedDenormWorkaround() {
183 _MM_SET_DENORMALS_ZERO_MODE(oldDenormMode);
184 }
185 };
186 #endif
187
188 // Formats a float in decimal or scientific format if it will not lose
189 // precision during reparse
190 //
191 // Returns false if we were able to format it as something representable.
192 template <typename F, typename I>
TryFormatFloatImplNonHex(std::ostream & os,F x)193 static bool TryFormatFloatImplNonHex(std::ostream &os, F x)
194 {
195 #ifdef IGA_NEEDS_DENORM_WORKAROUND
196 ScopedDenormWorkaround sdw;
197 #endif
198 auto fpc = std::fpclassify(x);
199 switch (fpc)
200 {
201 case FP_INFINITE:
202 if (x < 0) {
203 os << '-';
204 }
205 os << "inf";
206 return true;
207 case FP_NAN:
208 FormatFloatImplNaN<F,I>(os, iga::FloatToBits(x));
209 return true;
210 ///////////////////////////////////////////////////////////////////////////
211 // case FP_ZERO:
212 // case FP_NORMAL:
213 // case FP_SUBNORMAL:
214 default:
215 // Fall through and go through the pretty-print algorithm
216 break;
217 }
218
219 // we try and render it several different ways until we find something
220 // that we can parse back bit-exact.
221 // We try:
222 // - decimal
223 // - exponential
224 // - and then fall back on hex
225 // static_assert(sizeof(F) == sizeof(I));
226
227 // try as default, this lets STL pick the format.
228 // it sometimes gives nice terse output
229 // e.g. "3" for "3.0" instead of "3.0000000000..." (when possible)
230 std::stringstream ss;
231 ss.unsetf(std::ios_base::floatfield);
232 ss << x;
233 if (willReparseExactly(x, ss.str())) {
234 auto str = ss.str();
235 os << str;
236 if (str.find('.') == std::string::npos &&
237 str.find('e') == std::string::npos &&
238 str.find('E') == std::string::npos)
239 {
240 // floats need a ".0" suffixing them if not in scientific form
241 // STL default float sometimes drops the .
242 //
243 // e.g. given "-0.0f", if we were to format that
244 // as "-0:f" (which MSVCRT does), then it parses as
245 // "0" since this is the negation the S64 value 0 (during parse)
246 //
247 // NOTE: we also have to ensure that we aren't using an
248 // exponential (scientific) form already.
249 //
250 // e.g. 1e-007 should not convert to 1e-007.0
251 os << ".0";
252 }
253 return true;
254 }
255
256 // try as scientific
257 ss.str(std::string()); // reset
258 ss << std::scientific << x;
259 if (willReparseExactly(x, ss.str())) {
260 os << ss.str();
261 return true;
262 }
263
264 // TODO: IFDEF this given a new enough compiler
265 // (must parse too)
266 // e.g. 0x1.47ae147ae147bp-7 (need to parse first)
267 // NOTE: parsing should use >>
268 // float f;
269 // e.g. std::istringstream(""0x1P-1022") >> std::hexfloat >> f;
270
271 // fallback to hex integral
272 // FormatFloatImplHex<F,I>(os, x);
273 return false;
274 }
275
276 template <typename F>
FormatFloatAsHex(std::ostream & os,F f)277 static void FormatFloatAsHex(std::ostream &os, F f)
278 {
279 fmtHex(os, (uint64_t)iga::FloatToBits(f));
280 }
281
FormatFloat(std::ostream & os,float x)282 void iga::FormatFloat(std::ostream &os, float x)
283 {
284 if (!TryFormatFloatImplNonHex<float,uint32_t>(os, x)) {
285 FormatFloatAsHex<float>(os, x);
286 }
287 }
288
FormatFloat(std::ostream & os,double x)289 void iga::FormatFloat(std::ostream &os, double x)
290 {
291 if (!TryFormatFloatImplNonHex<double,uint64_t>(os, x)) {
292 FormatFloatAsHex<double>(os, x);
293 }
294 }
295
FormatFloat(std::ostream & os,uint16_t w16)296 void iga::FormatFloat(std::ostream &os, uint16_t w16)
297 {
298 #if 0
299 // this would turn off all non-hex floats
300 fmtHex(os, (uint64_t)w16);
301 #else
302 // trys to format a half float in a friendly format
303 // falling back to hex if all else fails
304 float f32 = ConvertHalfToFloat(w16);
305 if (IS_NAN(f32)) {
306 // So we get the correct payload size for NaNs
307 FormatFloatImplNaN<uint16_t,uint16_t>(os, w16);
308 } else if (ConvertFloatToHalf(f32) != w16 ||
309 !TryFormatFloatImplNonHex<float,uint32_t>(os, f32))
310 {
311 FormatFloatAsHex<uint16_t>(os, w16);
312 } // else: FormatFloatImplNonHex worked
313 #endif
314 }
315
316
FormatFloat(std::ostream & os,uint8_t x)317 void iga::FormatFloat(std::ostream &os, uint8_t x)
318 {
319 FormatFloat(os, ConvertQuarterToFloatGEN(x));
320 }
321
ConvertDoubleToFloatBits(double f)322 uint32_t iga::ConvertDoubleToFloatBits(double f)
323 {
324 uint64_t f64 = FloatToBits(f);
325
326 uint64_t m64 = f64 & F64_MANT_MASK;
327 uint64_t e64 = (f64 & F64_EXP_MASK) >> FloatMantissaBits<double>();
328 if (e64 == (F64_EXP_MASK >> FloatMantissaBits<double>()) && m64 != 0) {
329 // f64 NaN
330 uint32_t m32 = (uint32_t)m64 & F32_MANT_MASK;
331 m32 |= (uint32_t)((m64 & F64_QNAN_BIT) >>
332 (FloatMantissaBits<double>() - FloatMantissaBits<float>())); // preserve snan
333 if (m32 == 0) {
334 // The payload was only in the high bits which we dropped;
335 // make it non-zero so we retain NaN'ness
336 m32 = 1;
337 }
338 uint32_t s32 = (uint32_t)(f64 >> 32) & F32_SIGN_BIT;
339 return (s32 | F32_EXP_MASK | m32);
340 } else {
341 // regular conversion can deal with all the other special cases
342 return FloatToBits((float)f);
343 }
344 }
345
ConvertDoubleToFloat(double f)346 float iga::ConvertDoubleToFloat(double f)
347 {
348 return FloatFromBits(ConvertDoubleToFloatBits(f));
349 }
350
351
ConvertFloatToHalf(float f)352 uint16_t iga::ConvertFloatToHalf(float f)
353 {
354 static const uint32_t F32_EXP_MASK =
355 ((1 << FloatExponentBits<float>()) - 1) << FloatMantissaBits<float>();
356
357 const uint32_t w32 = FloatToBits(f);
358 const uint32_t w32_u = w32 & 0x7FFFFFFF;
359 const uint32_t sign = w32 & 0x80000000;
360 const uint16_t sign16 = (uint16_t)(sign >> 16);
361
362 if (w32_u > F32_EXP_MASK) { // NaN
363 uint16_t m16 = 0;
364 m16 |= (F32_QNAN_BIT & w32_u) >> // preserve qnan bit
365 (FloatMantissaBits<float>() - FloatMantissaBits<uint16_t>());
366 m16 |= (F16_MANT_MASK >> 1) & w32_u; // and bottom 9b
367 //
368 // s eeeeeeee qmmmmmmmmmmmmmmmmmmmmm
369 // | |||||||||
370 // | vvvvvvvvv
371 // +---------->qmmmmmmmmm
372 if (m16 == 0x0) {
373 // if the nonzero payload is in the high bits and and gets
374 // dropped and the signal bit is non-zero, then m16 is 0;
375 // to maintain it as a qnan we must set at least one bit
376 m16 = 0x1;
377 }
378 return sign16 | F16_EXP_MASK | m16;
379 } else if (w32_u == F32_EXP_MASK) { // +/-Infinity
380 return sign16 | F16_EXP_MASK;
381 } else {
382 // norm/denorm
383 #ifdef IGA_USE_FP16C
384 // should be on all 3rd generation or newer CPUs (HSW/SNB)
385 // _MM_FROUND_NO_EXC
386 const auto oldCsr = _mm_getcsr();
387 _mm_setcsr(oldCsr | _MM_FROUND_NO_EXC);
388 uint16_t w16 = (uint16_t)_mm_cvtsi128_si32(
389 _mm_cvtps_ph(_mm_set_ss(f), _MM_FROUND_TO_NEAREST_INT));
390 _mm_setcsr(oldCsr);
391 return w16;
392 #else // !IGA_USE_FP16C
393 static const uint32_t F32_BIAS = FloatBias<float>();
394 static const uint32_t F16_BIAS = FloatBias<uint16_t>();
395 static const int F32_MNT_BITS = FloatMantissaBits<float>();
396 static const int F16_MNT_BITS = FloatMantissaBits<uint16_t>();
397
398 static const uint32_t LOWEST_OVERFLOW = // 0x47800000
399 (F32_BIAS + F16_BIAS + 1) << FloatMantissaBits<float>();
400 static const uint32_t LOWEST_NORM = // 0x38800000
401 (F32_BIAS - F16_BIAS + 1) << FloatMantissaBits<float>();
402 static const uint32_t LOWEST_DENORM = // 0x33000000
403 (F32_BIAS - F16_BIAS - (uint16_t)F16_MNT_BITS) << FloatMantissaBits<float>();
404 auto round = [](uint32_t v, uint32_t g, uint32_t s) {
405 return v + (g & (s | v));
406 };
407 if (w32_u >= LOWEST_OVERFLOW) { // overflows to infinity
408 return sign16 | F16_EXP_MASK;
409 } else if (w32_u >= LOWEST_NORM) { // fits as normalized half
410 uint32_t v =
411 (((w32_u >> F32_MNT_BITS) - (F32_BIAS - F16_BIAS)) << F16_MNT_BITS) |
412 ((w32_u >> (F32_MNT_BITS - F16_MNT_BITS)) & F16_MANT_MASK);
413 uint32_t g = (w32_u >> (F32_MNT_BITS - F16_MNT_BITS - 1)) & 0x1;
414 uint32_t s = (w32_u & 0x0FFF) ? 1 : 0;
415 return (uint16_t)round(sign16 | v, g, s);
416 } else if (w32_u >= LOWEST_DENORM) { // fits as normalized half
417 uint32_t i = (F32_BIAS - 1 - 1) - (w32_u >> F32_MNT_BITS);
418 uint32_t w32_u2 = (w32_u & F32_MANT_MASK) | (F32_MANT_MASK + 1);
419 uint32_t v = sign16 | (w32_u2 >> (i + 1));
420 uint32_t g = (w32_u2 >> i) & 1;
421 uint32_t s = (w32_u2 & ((1 << i) - 1)) ? 1 : 0;
422 return (uint16_t)round(v, g, s);
423 } else {
424 // underflow to +-0
425 return sign16;
426 }
427 #endif // !IGA_USE_FP16C
428 }
429 }
430
431
432
433 // GEN's 8-bit restricted float ("quarter float")
434 // s eee mmmm (bias 2^(3-1) - 1 == 3)
435 // s gfe dcba
436 // mantissa shifted to the top of the float
437 // dcb`a000`0000`0000`0000`0000
438 // exponenent is unpacked by expanding
439 // gfe to the following:
440 //
441 // dgGGGGfe where G = ~g
442 // negative and positive 0 bypass this logic
443 //
444 // => NaN and infinities are illegal in this format
445 // => Denorms are not supported
446 // QUOTE:
447 // Specifically, when the exponent field is zero and the fraction
448 // field is not zero, an implied one is still present instead of
449 // taking a denormalized form (without an implied one). This results
450 // in a simple implementation but with a smaller dynamic range -
451 // the magnitude of the smallest non-zero number is 0.1328125.
ConvertQuarterToFloatGEN(uint8_t u8)452 float iga::ConvertQuarterToFloatGEN(uint8_t u8)
453 {
454 if (u8 == 0x00) {
455 return 0.0f;
456 } else if (u8 == 0x80) {
457 return -0.0f;
458 } else {
459 uint32_t f32;
460 f32 = ((uint32_t)u8 & 0x80) << (32 - 8); // d: sign
461 f32 |= ((0x30 & (uint32_t)u8) << (23 - 4)); // fe: low bits of exp
462 if ((0x40 & u8) == 0) { // g = 0
463 // exp=011111fe
464 f32 |= 0x1F << (23 + 2);
465 } else {
466 // exp=100000fe
467 f32 |= 1ul << (23 + 7); // f32 high exp bit
468 }
469 f32 |= ((uint32_t)u8 & 0xF) << (23 - 4); // dcba: mantissa
470 return FloatFromBits(f32);
471 }
472 }
473
IsNaN(uint16_t u16)474 bool iga::IsNaN(uint16_t u16)
475 {
476 return
477 (F16_EXP_MASK & u16) == F16_EXP_MASK &&
478 (F16_MANT_MASK & u16) != 0;
479 }
480
IsInf(uint16_t u16)481 bool iga::IsInf(uint16_t u16)
482 {
483 return
484 (F16_EXP_MASK & u16) == F16_EXP_MASK &&
485 (F16_MANT_MASK & u16) == 0;
486 }
487
488
ConvertHalfToFloat(uint16_t u16)489 float iga::ConvertHalfToFloat(uint16_t u16)
490 {
491 uint16_t u16_u = u16 & 0x7FFF;
492 uint32_t s32 = ((uint32_t)u16 & F16_SIGN_BIT) << 16;
493 uint32_t m16 = u16 & F16_MANT_MASK;
494 if (u16_u > F16_EXP_MASK) {
495 // NaN
496 uint32_t m32 = (u16 & F16_QNAN_BIT) <<
497 FloatMantissaBitsDiff<float,uint16_t>(); // preserve sNaN bit
498 m32 |= (F16_MANT_MASK >> 1) & m16;
499 if (m32 == 0) {
500 m32 = 1; // ensure still NaN
501 }
502 return FloatFromBits(s32 | F32_EXP_MASK | m32);
503 }
504 #ifdef IGA_USE_FP16C
505 float f = _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(u16)));
506 return f;
507 #else // !IGA_USE_FP16C
508 uint32_t e16 = (u16 & F16_EXP_MASK) >> FloatMantissaBits<uint16_t>();
509 uint32_t e32, m32;
510 if (u16_u == F16_EXP_MASK) {
511 // +-infinity
512 e32 = F32_EXP_MASK >> FloatMantissaBits<float>();
513 m32 = 0;
514 } else if (e16 != 0 && e16 < 0x1F) {
515 // normal number
516 e32 = e16 + FloatBiasDiff<float,uint16_t>(); // (127 - 15); // 0x70
517 m32 = m16 << FloatMantissaBitsDiff<float,uint16_t>(); // (23 - 10);
518 } else if (e16 == 0 && m16 != 0) {
519 // denorm/subnorm number (e16 == 0) => renormalize it
520 // shift the mantissa left until the hidden one gets set
521 for (e32 = FloatBiasDiff<float,uint16_t>() + 1;
522 (m16 & (F16_MANT_MASK + 1)) == 0;
523 m16 <<= 1, e32--)
524 ;
525 m32 = (m16 << FloatMantissaBitsDiff<float,uint16_t>()) & F32_MANT_MASK;
526 } else { // if (e16 == 0) // +/- 0.0
527 e32 = 0;
528 m32 = 0;
529 }
530 return FloatFromBits(s32 | (e32 << FloatMantissaBits<float>()) | m32);
531 #endif // !IGA_USE_FP16C
532 }
533
ConvertFloatToDouble(float f)534 double iga::ConvertFloatToDouble(float f)
535 {
536 if (IS_NAN(f)) {
537 uint32_t f32 = FloatToBits(f);
538
539 uint64_t m64;
540 m64 = (uint64_t)(f32 & F32_QNAN_BIT) <<
541 (FloatMantissaBits<double>() - FloatMantissaBits<float>()); // keep the sNaN bit
542 m64 |= (F32_MANT_MASK >> 1) & f32; // keep the non sNaN part
543 // lower part of the payload
544 uint64_t bits =
545 (((uint64_t)f32 & F32_SIGN_BIT) << 32) | // sign
546 F64_EXP_MASK | // exp
547 m64; // new mantissa
548 return FloatFromBits(bits);
549 } else {
550 // not NaN: use default value
551 return (double)f;
552 }
553 }
554
555
556
ParseFLTLIT(const std::string & syntax,double & value)557 bool iga::ParseFLTLIT(const std::string &syntax, double &value)
558 {
559 char *end = nullptr;
560 value = std::strtod(syntax.c_str(), &end);
561 if (*end) {
562 return false;
563 }
564 return true;
565 }
566