1 #pragma once
2 // value.hpp: definition of a (sign, scale, significant) representation of an approximation to a real value
3 //
4 // Copyright (C) 2017-2021 Stillwater Supercomputing, Inc.
5 //
6 // This file is part of the universal numbers project, which is released under an MIT Open Source license.
7 #include <cassert>
8 #include <iostream>
9 #include <iomanip>
10 #include <limits>
11 #include <tuple>
12 #include <algorithm> // std::max
13 
14 #include <universal/common/exceptions.hpp>
15 #include <universal/number/support/decimal.hpp>
16 #include <universal/native/nonconstexpr754.hpp>
17 #include <universal/native/bit_functions.hpp>
18 #include <universal/internal/bitblock/bitblock.hpp>
19 
20 namespace sw::universal::internal {
21 
22 struct value_internal_exception : public universal_internal_exception {
value_internal_exceptionsw::universal::internal::value_internal_exception23 	value_internal_exception(const std::string& error)
24 		: universal_internal_exception(std::string("value internal exception: ") + error) {};
25 };
26 
27 struct value_shift_too_large : public value_internal_exception {
value_shift_too_largesw::universal::internal::value_shift_too_large28 	value_shift_too_large() : value_internal_exception("shift value too large") {}
29 };
30 
31 using namespace sw::universal;
32 
33 // Forward definitions
34 template<size_t fbits> class value;
35 template<size_t fbits> value<fbits> abs(const value<fbits>& v);
36 
37 #ifdef VALUE_TRACE_CONVERSION
38 constexpr bool _trace_value_conversion = true;
39 #else
40 constexpr bool _trace_value_conversion = false;
41 #endif
42 
43 #ifdef VALUE_TRACE_ADD
44 constexpr bool _trace_value_add = true;
45 #else
46 constexpr bool _trace_value_add = false;
47 #endif
48 
49 #ifdef VALUE_TRACE_SUB
50 constexpr bool _trace_value_sub = true;
51 #else
52 constexpr bool _trace_value_sub = false;
53 #endif
54 
55 #ifdef VALUE_TRACE_MUL
56 constexpr bool _trace_value_mul = true;
57 #else
58 constexpr bool _trace_value_mul = false;
59 #endif
60 
61 #ifdef VALUE_TRACE_DIV
62 constexpr bool _trace_value_div = true;
63 #else
64 constexpr bool _trace_value_div = false;
65 #endif
66 
67 // template class representing a value in scientific notation, using a template size for the number of fraction bits
68 template<size_t fbits>
69 class value {
70 public:
71 	static constexpr size_t fhbits = fbits + 1;    // number of fraction bits including the hidden bit
value()72 	constexpr value()
73           : _sign{false}, _scale{0}, _nrOfBits{fbits}, _fraction{}, _inf{false},
74             _zero{true}, _nan{false} {}
value(bool sign,int scale,const internal::bitblock<fbits> & fraction_without_hidden_bit,bool zero=true,bool inf=false)75 	constexpr value(bool sign, int scale, const internal::bitblock<fbits>& fraction_without_hidden_bit,
76                         bool zero = true, bool inf = false)
77           : _sign{sign}, _scale{scale}, _nrOfBits{fbits}, _fraction{fraction_without_hidden_bit},
78             _inf{inf}, _zero{zero}, _nan{false} {}
79 
80 
81 	// decorated constructors
value(const value & initial_value)82 	constexpr value(const value& initial_value)       { *this = initial_value; }
value(signed char initial_value)83 	constexpr value(signed char initial_value)        { *this = initial_value; }
value(short initial_value)84 	constexpr value(short initial_value)              { *this = initial_value; }
value(int initial_value)85 	constexpr value(int initial_value)                { *this = initial_value; }
value(long initial_value)86 	constexpr value(long initial_value)               { *this = initial_value; }
value(long long initial_value)87 	constexpr value(long long initial_value)          { *this = initial_value; }
value(char initial_value)88 	constexpr value(char initial_value)               { *this = initial_value; }
value(unsigned short initial_value)89 	constexpr value(unsigned short initial_value)     { *this = initial_value; }
value(unsigned int initial_value)90 	constexpr value(unsigned int initial_value)       { *this = initial_value; }
value(unsigned long initial_value)91 	constexpr value(unsigned long initial_value)      { *this = initial_value; }
value(unsigned long long initial_value)92 	constexpr value(unsigned long long initial_value) { *this = initial_value; }
value(float initial_value)93 	value(float initial_value)              { *this = initial_value; }
value(double initial_value)94 	value(double initial_value) : value{}   { *this = initial_value; }
value(long double initial_value)95 	value(long double initial_value)        { *this = initial_value; }
96 
97 	// assignment operators
operator =(const value & rhs)98 	constexpr value& operator=(const value& rhs) {
99 		_sign	  = rhs._sign;
100 		_scale	  = rhs._scale;
101 		_fraction = rhs._fraction;
102 		_nrOfBits = rhs._nrOfBits;
103 		_inf      = rhs._inf;
104 		_zero     = rhs._zero;
105 		_nan      = rhs._nan;
106 		return *this;
107 	}
operator =(signed char rhs)108 	constexpr value<fbits>& operator=(signed char rhs) {
109 		*this = static_cast<long long>(rhs);
110 		return *this;
111 	}
operator =(short rhs)112 	constexpr value<fbits>& operator=(short rhs) {
113 		*this = static_cast<long long>(rhs);
114 		return *this;
115 	}
operator =(int rhs)116 	constexpr value<fbits>& operator=(int rhs) {
117 		*this = static_cast<long long>(rhs);
118 		return *this;
119 	}
operator =(long rhs)120 	constexpr value<fbits>& operator=(long rhs) {
121 		*this = static_cast<long long>(rhs);
122 		return *this;
123 	}
operator =(long long rhs)124 	constexpr value<fbits>& operator=(long long rhs) {
125 		if (_trace_value_conversion) std::cout << "---------------------- CONVERT -------------------" << std::endl;
126 		if (rhs == 0) {
127 			setzero();
128 			return *this;
129 		}
130 		reset();
131 		_sign = (0x8000000000000000 & rhs);  // 1 is negative, 0 is positive
132 		if (_sign) {
133 			// process negative number: process 2's complement of the input
134 			_scale = int(sw::universal::findMostSignificantBit(-rhs)) - 1;
135 			uint64_t _fraction_without_hidden_bit = uint64_t(_scale == 0 ? 0 : (-rhs << (64 - _scale)));
136 			_fraction = copy_integer_fraction<fbits>(_fraction_without_hidden_bit);
137 			//take_2s_complement();
138 			_nrOfBits = fbits;
139 			if (_trace_value_conversion) std::cout << "int64 " << rhs << " sign " << _sign << " scale " << _scale << " fraction b" << _fraction << std::dec << std::endl;
140 		}
141 		else {
142 			// process positive number
143 			_scale = int(sw::universal::findMostSignificantBit(rhs)) - 1;
144 			uint64_t _fraction_without_hidden_bit = uint64_t(_scale == 0 ? 0 : (rhs << (64 - _scale)));
145 			_fraction = copy_integer_fraction<fbits>(_fraction_without_hidden_bit);
146 			_nrOfBits = fbits;
147 			if (_trace_value_conversion) std::cout << "int64 " << rhs << " sign " << _sign << " scale " << _scale << " fraction b" << _fraction << std::dec << std::endl;
148 		}
149 		return *this;
150 	}
operator =(char rhs)151 	constexpr value<fbits>& operator=(char rhs) {
152 		*this = (unsigned long long)(rhs);
153 		return *this;
154 	}
operator =(unsigned short rhs)155 	constexpr value<fbits>& operator=(unsigned short rhs) {
156 		*this = static_cast<long long>(rhs);
157 		return *this;
158 	}
operator =(unsigned int rhs)159 	constexpr value<fbits>& operator=(unsigned int rhs) {
160 		*this = static_cast<long long>(rhs);
161 		return *this;
162 	}
operator =(unsigned long rhs)163 	constexpr value<fbits>& operator=(unsigned long rhs) {
164 		*this = static_cast<long long>(rhs);
165 		return *this;
166 	}
operator =(unsigned long long rhs)167 	constexpr value<fbits>& operator=(unsigned long long rhs) {
168 		if (_trace_value_conversion) std::cout << "---------------------- CONVERT -------------------" << std::endl;
169 		if (rhs == 0) {
170 			setzero();
171 		}
172 		else {
173 			reset();
174 			_scale = static_cast<int>(sw::universal::findMostSignificantBit(rhs)) - 1;
175 			uint64_t _fraction_without_hidden_bit = _scale == 0 ? 0ull : (rhs << (64 - _scale)); // the scale == -1 case is handled above
176 			_fraction = copy_integer_fraction<fbits>(_fraction_without_hidden_bit);
177 			_nrOfBits = fbits;
178 		}
179 		if (_trace_value_conversion) std::cout << "uint64 " << rhs << " sign " << _sign << " scale " << _scale << " fraction b" << _fraction << std::dec << std::endl;
180 		return *this;
181 	}
operator =(float rhs)182 	value<fbits>& operator=(float rhs) {
183 		reset();
184 		if (_trace_value_conversion) std::cout << "---------------------- CONVERT -------------------" << std::endl;
185 
186 		switch (std::fpclassify(rhs)) {
187 		case FP_ZERO:
188 			_nrOfBits = fbits;
189 			_zero = true;
190 			break;
191 		case FP_INFINITE:
192 			_inf  = true;
193 			_sign = true;
194 			break;
195 		case FP_NAN:
196 			_nan = true;
197 			_sign = true;
198 			break;
199 		case FP_SUBNORMAL:
200 		case FP_NORMAL:
201 			{
202 				float _fr{0};
203 				unsigned int _23b_fraction_without_hidden_bit{0};
204 				int _exponent{0};
205 				extract_fp_components(rhs, _sign, _exponent, _fr, _23b_fraction_without_hidden_bit);
206 				_scale = _exponent - 1;
207 				_fraction = extract_23b_fraction<fbits>(_23b_fraction_without_hidden_bit);
208 				_nrOfBits = fbits;
209 				if (_trace_value_conversion) std::cout << "float " << rhs << " sign " << _sign << " scale " << _scale << " 23b fraction 0x" << std::hex << _23b_fraction_without_hidden_bit << " _fraction b" << _fraction << std::dec << std::endl;
210 			}
211 			break;
212 		}
213 		return *this;
214 	}
operator =(double rhs)215 	value<fbits>& operator=(double rhs) {
216 		using std::get;
217 		reset();
218 		if (_trace_value_conversion) std::cout << "---------------------- CONVERT -------------------" << std::endl;
219 
220 		switch (std::fpclassify(rhs)) {
221 		case FP_ZERO:
222 			_nrOfBits = fbits;
223 			_zero = true;
224 			break;
225 		case FP_INFINITE:
226 			_inf = true;
227 			_sign = true;
228 			break;
229 		case FP_NAN:
230 			_nan = true;
231 			_sign = true;
232 			break;
233 		case FP_SUBNORMAL:
234 		case FP_NORMAL:
235 			{
236 #if 1
237 				double _fr{0};
238 				unsigned long long _52b_fraction_without_hidden_bit{0};
239 				int _exponent{0};
240 				extract_fp_components(rhs, _sign, _exponent, _fr, _52b_fraction_without_hidden_bit);
241 #endif
242 #if 0
243                                 auto components= ieee_components(rhs);
244                                 _sign= get<0>(components);
245                                 int _exponent= get<1>(components);
246                                 unsigned long long _52b_fraction_without_hidden_bit= get<2>(components);
247 #endif
248 				_scale = _exponent - 1;
249 				_fraction = extract_52b_fraction<fbits>(_52b_fraction_without_hidden_bit);
250 				_nrOfBits = fbits;
251 				if (_trace_value_conversion) std::cout << "double " << rhs << " sign " << _sign << " scale " << _scale << " 52b fraction 0x" << std::hex << _52b_fraction_without_hidden_bit << " _fraction b" << _fraction << std::dec << std::endl;
252 			}
253 			break;
254 		}
255 		return *this;
256 	}
operator =(long double rhs)257 	value<fbits>& operator=(long double rhs) {
258 		reset();
259 		if (_trace_value_conversion) std::cout << "---------------------- CONVERT -------------------" << std::endl;
260 
261 		switch (std::fpclassify(rhs)) {
262 		case FP_ZERO:
263 			_nrOfBits = fbits;
264 			_zero = true;
265 			break;
266 		case FP_INFINITE:
267 			_inf = true;
268 			_sign = true;
269 			break;
270 		case FP_NAN:
271 			_nan = true;
272 			_sign = true;
273 			break;
274 		case FP_SUBNORMAL:
275 		case FP_NORMAL:
276 			{
277 				long double _fr{0};
278 				unsigned long long _63b_fraction_without_hidden_bit{0};
279 				int _exponent{0};
280 				extract_fp_components(rhs, _sign, _exponent, _fr, _63b_fraction_without_hidden_bit);
281 				_scale = _exponent - 1;
282 				// how to interpret the fraction bits: TODO: this should be a static compile-time code block
283 				if (sizeof(long double) == 8) {
284 					// we are just a double and thus only have 52bits of fraction
285 					_fraction = extract_52b_fraction<fbits>(_63b_fraction_without_hidden_bit);
286 					if (_trace_value_conversion) std::cout << "long double " << rhs << " sign " << _sign << " scale " << _scale << " 52b fraction 0x" << std::hex << _63b_fraction_without_hidden_bit << " _fraction b" << _fraction << std::dec << std::endl;
287 
288 				}
289 				else if (sizeof(long double) == 16) {
290 					// how to differentiate between 80bit and 128bit formats?
291 					_fraction = extract_63b_fraction<fbits>(_63b_fraction_without_hidden_bit);
292 					if (_trace_value_conversion) std::cout << "long double " << rhs << " sign " << _sign << " scale " << _scale << " 63b fraction 0x" << std::hex << _63b_fraction_without_hidden_bit << " _fraction b" << _fraction << std::dec << std::endl;
293 
294 				}
295 				_nrOfBits = fbits;
296 			}
297 			break;
298 		}
299 		return *this;
300 	}
301 
302 	// operators
operator -() const303 	constexpr value<fbits> operator-() const {
304 		return value<fbits>(!_sign, _scale, _fraction, _zero, _inf);
305 	}
306 
operator ++()307 	value<fbits>& operator++() {
308 		*this = *this + value<fbits>(1);
309 		return *this;
310 	}
operator ++(int)311 	value<fbits> operator++(int) {
312 		value<fbits> tmp{ *this };
313 		operator++();
314 		return tmp;
315 	}
316 
317 	// modifiers
reset()318 	constexpr void reset() & {
319 		_sign  = false;
320 		_scale = 0;
321 		_nrOfBits = 0;
322 		_inf = false;
323 		_zero = false;
324 		_nan = false;
325 		_fraction.reset(); // not constexpr
326                 // _fraction= bitblock<fbits>{}; // work around
327 	}
set(bool sign,int scale,bitblock<fbits> fraction_without_hidden_bit,bool zero,bool inf,bool nan=false)328 	void set(bool sign, int scale, bitblock<fbits> fraction_without_hidden_bit, bool zero, bool inf, bool nan = false) {
329 		_sign     = sign;
330 		_scale    = scale;
331 		_fraction = fraction_without_hidden_bit;
332 		_zero     = zero;
333 		_inf      = inf;
334 		_nan      = nan;
335 	}
setzero()336 	void setzero() {
337 		_zero     = true;
338 		_sign     = false;
339 		_inf      = false;
340 		_nan      = false;
341 		_scale    = 0;
342 		_nrOfBits = fbits;
343 		_fraction.reset();
344 	}
setinf()345 	void setinf() {      // this maps to NaR on the posit side, and that has a sign = 1
346 		_inf      = true;
347 		_sign     = true;
348 		_zero     = false;
349 		_nan      = false;
350 		_scale    = 0;
351 		_nrOfBits = fbits;
352 		_fraction.reset();
353 	}
setnan()354 	void setnan() {		// this will also map to NaR
355 		_nan      = true;
356 		_sign     = true;
357 		_zero     = false;
358 		_inf      = false;
359 		_scale    = 0;
360 		_nrOfBits = fbits;
361 		_fraction.reset();
362 	}
setExponent(int e)363 	inline void setExponent(int e) { _scale = e; }
isneg() const364 	inline bool isneg() const { return _sign; }
ispos() const365 	inline bool ispos() const { return !_sign; }
iszero() const366 	inline constexpr bool iszero() const { return _zero; }
isinf() const367 	inline constexpr bool isinf() const { return _inf; }
isnan() const368 	inline constexpr bool isnan() const { return _nan; }
sign() const369 	inline bool sign() const { return _sign; }
scale() const370 	inline int scale() const { return _scale; }
fraction() const371 	bitblock<fbits> fraction() const { return _fraction; }
372 
373 	// Normalized shift (e.g., for addition).
374 	template <size_t Size>
nshift(int shift) const375 	bitblock<Size> nshift(int shift) const {
376 	bitblock<Size> number;
377 
378 #if VALUE_THROW_ARITHMETIC_EXCEPTION
379 		// Check range
380 		if (int(fbits) + shift >= int(Size))
381 			throw value_shift_too_large{};
382 #else
383 		// Check range
384 		if (int(fbits) + shift >= int(Size)) {
385 			std::cerr << "nshift: shift is too large\n";
386 			number.reset();
387 			return number;
388 		}
389 #endif // VALUE_THROW_ARITHMETIC_EXCEPTIONS
390 
391 		int hpos = int(fbits) + shift;       // position of hidden bit
392 		if (hpos <= 0) {   // If hidden bit is LSB or beyond just set uncertainty bit and call it a day
393 			number[0] = true;
394 			return number;
395 		}
396 		number[size_t(hpos)] = true; // hidden bit now safely set
397 
398 		// Copy fraction bits into certain part
399 		for (int npos = hpos - 1, fpos = int(fbits) - 1; npos > 0 && fpos >= 0; --npos, --fpos)
400 			number[size_t(npos)] = _fraction[size_t(fpos)];
401 
402 		// Set uncertainty bit
403 		bool uncertainty = false;
404 		for (int fpos = std::min(int(fbits) - 1, -shift); fpos >= 0 && !uncertainty; --fpos)
405 			uncertainty |= _fraction[size_t(fpos)];
406 		number[0] = uncertainty;
407 		return number;
408 	}
409 	// get a fixed point number by making the hidden bit explicit: useful for multiply units
get_fixed_point() const410 	bitblock<fhbits> get_fixed_point() const {
411 		bitblock<fbits + 1> fixed_point_number;
412 		fixed_point_number.set(fbits, true); // make hidden bit explicit
413 		for (size_t i = 0; i < fbits; i++) {
414 			fixed_point_number[i] = _fraction[i];
415 		}
416 		return fixed_point_number;
417 	}
418 	// get the fraction value including the implicit hidden bit (this is at an exponent level 1 smaller)
419 	template<typename Ty = double>
get_implicit_fraction_value() const420 	Ty get_implicit_fraction_value() const {
421 		if (_zero) return (long double)0.0;
422 		Ty v = 1.0;
423 		Ty scale = 0.5;
424 		for (int i = int(fbits) - 1; i >= 0; i--) {
425 			if (_fraction.test(size_t(i))) v += scale;
426 			scale *= 0.5;
427 			if (scale == 0.0) break;
428 		}
429 		return v;
430 	}
431 	template<typename Ty = double>
sign_value() const432 	Ty sign_value() const {
433 		return (_sign ? Ty{ -1 } : Ty{ 1 });
434 	}
435 	template<typename Ty = double>
scale_value() const436 	Ty scale_value() const {
437 		if (_zero) return Ty(0.0);
438 		return std::pow(Ty(2.0), Ty(_scale));
439 	}
440 	template<typename Ty = double>
fraction_value() const441 	Ty fraction_value() const {
442 		if (_zero) return (long double)0.0;
443 		Ty v = 1.0;
444 		Ty scale = 0.5;
445 		for (int i = int(fbits) - 1; i >= 0; i--) {
446 			if (_fraction.test(size_t(i))) v += scale;
447 			scale *= 0.5;
448 			if (scale == 0.0) break;
449 		}
450 		return v;
451 	}
452 
453 	// conversion helpers
to_long_double() const454 	long double to_long_double() const {
455 		return sign_value<long double>() * scale_value<long double>() * fraction_value<long double>();
456 	}
to_double() const457 	double to_double() const {
458 		return sign_value<double>() * scale_value<double>() * fraction_value<double>();
459 	}
to_float() const460 	float to_float() const {
461 		return sign_value<float>() * scale_value<float>() * fraction_value<float>();
462 	}
463 
464 	// explicit conversion operators to native types
operator long double() const465 	explicit operator long double() const { return to_long_double(); }
operator double() const466 	explicit operator double() const { return to_double(); }
operator float() const467 	explicit operator float() const { return to_float(); }
468 
469 	// TODO: this does not implement a 'real' right extend. tgtbits need to be shorter than fbits
470 	template<size_t srcbits, size_t tgtbits>
right_extend(const value<srcbits> & src)471 	void right_extend(const value<srcbits>& src) {
472 		_sign = src.sign();
473 		_scale = src.scale();
474 		_nrOfBits = tgtbits;
475 		_inf = src.isinf();
476 		_zero = src.iszero();
477 		_nan = src.isnan();
478 		bitblock<srcbits> src_fraction = src.fraction();
479 		if (!_inf && !_zero && !_nan) {
480 			for (int s = srcbits - 1, t = tgtbits - 1; s >= 0 && t >= 0; --s, --t)
481 				_fraction[t] = src_fraction[s];
482 		}
483 	}
484 	// round to a target size number of bits using round-to-nearest round-to-even-on-tie
485 	template<size_t tgt_size>
round_to()486 	value<tgt_size> round_to() {
487 		bitblock<tgt_size> rounded_fraction;
488 		if (tgt_size == 0) {
489 			bool round_up = false;
490 			if (fbits >= 2) {
491 				bool blast = _fraction[fbits - 1ull];
492 				bool sb = anyAfter(_fraction, static_cast<int>(fbits) - 2);
493 				if (blast && sb) round_up = true;
494 			}
495 			else if (fbits == 1) {
496 				round_up = _fraction[0];
497 			}
498 			return value<tgt_size>(_sign, (round_up ? _scale + 1 : _scale), rounded_fraction, _zero, _inf);
499 		}
500 		else {
501 			if (!_zero || !_inf) {
502 				if (tgt_size < fbits) {
503 					int rb = int(tgt_size) - 1;
504 					int lb = int(fbits) - int(tgt_size) - 1;
505 					for (int i = int(fbits) - 1; i > lb; i--, rb--) {
506 						rounded_fraction[static_cast<size_t>(rb)] = _fraction[static_cast<size_t>(i)];
507 					}
508 					bool blast = _fraction[static_cast<size_t>(lb)];
509 					bool sb = false;
510 					if (lb > 0) sb = anyAfter(_fraction, lb-1);
511 					if (blast || sb) rounded_fraction[0ull] = true;
512 				}
513 				else {
514 					int rb = int(tgt_size) - 1;
515 					for (int i = int(fbits) - 1; i >= 0; i--, rb--) {
516 						rounded_fraction[static_cast<size_t>(rb)] = _fraction[static_cast<size_t>(i)];
517 					}
518 				}
519 			}
520 		}
521 		return value<tgt_size>(_sign, _scale, rounded_fraction, _zero, _inf);
522 	}
523 
524 private:
525 	bool                _sign;
526 	int                 _scale;
527 	int                 _nrOfBits;  // in case the fraction is smaller than the full fbits
528 	bitblock<fbits>	    _fraction;
529 	bool                _inf;
530 	bool                _zero;
531 	bool                _nan;
532 
533 	// template parameters need names different from class template parameters (for gcc and clang)
534 	template<size_t nfbits>
535 	friend std::ostream& operator<< (std::ostream& ostr, const value<nfbits>& r);
536 	template<size_t nfbits>
537 	friend std::istream& operator>> (std::istream& istr, value<nfbits>& r);
538 
539 	template<size_t nfbits>
540 	friend bool operator==(const value<nfbits>& lhs, const value<nfbits>& rhs);
541 	template<size_t nfbits>
542 	friend bool operator!=(const value<nfbits>& lhs, const value<nfbits>& rhs);
543 	template<size_t nfbits>
544 	friend bool operator< (const value<nfbits>& lhs, const value<nfbits>& rhs);
545 	template<size_t nfbits>
546 	friend bool operator> (const value<nfbits>& lhs, const value<nfbits>& rhs);
547 	template<size_t nfbits>
548 	friend bool operator<=(const value<nfbits>& lhs, const value<nfbits>& rhs);
549 	template<size_t nfbits>
550 	friend bool operator>=(const value<nfbits>& lhs, const value<nfbits>& rhs);
551 };
552 
553 /*
554  n fraction bits represent a sampling of 2^n samples of a 10^n domain.
555 
556  if you need to represent a smaller decimal than n digits, you need to
557  round in the fraction domain, and then convert the rounded bit string.
558  */
559 
560 // convert a value to a decimal representation
561 template<size_t fbits>
convert_to_decimal_string(const value<fbits> & v)562 inline std::string convert_to_decimal_string(const value<fbits>& v) {
563 	std::cout << to_triple(v) << '\n';
564 	auto scale = v.scale();
565 	auto bits = v.fraction();
566 	// construct the value of the hidden bit
567 	support::decimal bitValue;
568 
569 	// construct the value of the fraction
570 	// step 1: calculate the decimal value of the smallest discretization step of the range
571 	support::decimal range, discretizationLevels, step, partial, multiplier, one;
572 	// create the decimal range we are discretizing
573 	range.setdigit(1);
574 	range.shiftLeft(fbits); // == 10^fbits
575 	// calculate the discretization levels of this range
576 	discretizationLevels.setdigit(1);
577 	for (size_t i = 0; i < fbits; ++i) support::add(discretizationLevels, discretizationLevels);
578 	step = support::div(range, discretizationLevels);
579 	// step 2: construct the value of fraction in terms of discretization samples
580 	partial.setzero();
581 	multiplier.setdigit(1);
582 	// convert the fraction part
583 	for (unsigned i = 0; i < fbits; ++i) {
584 		if (bits[i]) support::add(partial, multiplier);
585 		support::add(multiplier, multiplier);
586 	}
587 	support::add(partial, multiplier); // add the hidden bit value
588 
589 	// step 3: calculate the value of fraction = nrOfSamples * discretizationStep
590 	support::mul(partial, step);
591 
592 	// construct a decimal fixed-point
593 	if (scale > 0) {
594 		support::decimal scaleUp;
595 		scaleUp.setdigit(1);
596 		for (auto i = 0; i < scale; ++i) support::add(scaleUp, scaleUp);
597 		support::mul(partial, scaleUp);
598 	}
599 	else if (scale < 0) {
600 		support::decimal scaleDown;
601 		scaleDown.setdigit(1);
602 		for (auto i = 0; i < -scale; ++i) support::add(scaleDown, scaleDown);
603 		partial = support::div(partial, scaleDown);
604 	}
605 	std::stringstream str;
606 	for (support::decimal::const_reverse_iterator rit = partial.rbegin(); rit != partial.rend(); ++rit) {
607 		str << (int)*rit;
608 	}
609 #ifdef REV
610 	// leading 0s will cause the partial to be represented incorrectly
611 	// if we simply convert it to digits.
612 	// The partial represents the parts in the range, so we can deduce
613 	// the number of leading zeros by comparing to the length of range
614 	size_t nrLeadingZeros = range.size() - partial.size() - 1;
615 	for (size_t i = 0; i < nrLeadingZeros; ++i) str << '0';
616 	size_t digitsWritten = nrLeadingZeros;
617 	for (support::decimal::const_reverse_iterator rit = partial.rbegin(); rit != partial.rend(); ++rit) {
618 		str << (int)*rit;
619 		++digitsWritten;
620 	}
621 	if (digitsWritten < fbits) { // deal with trailing 0s
622 		for (size_t i = digitsWritten; i < fbits; ++i) {
623 			str << '0';
624 		}
625 	}
626 #endif
627 	return str.str();
628 }
629 ////////////////////// VALUE operators
630 template<size_t nfbits>
operator <<(std::ostream & ostr,const value<nfbits> & v)631 inline std::ostream& operator<<(std::ostream& ostr, const value<nfbits>& v) {
632 	if (v._inf) {
633 		ostr << FP_INFINITE;
634 	}
635 	else {
636 		ostr << (long double)v;
637 	}
638 	return ostr;
639 }
640 
641 template<size_t nfbits>
operator >>(std::istream & istr,const value<nfbits> & v)642 inline std::istream& operator>> (std::istream& istr, const value<nfbits>& v) {
643 	istr >> v._fraction;
644 	return istr;
645 }
646 
647 template<size_t nfbits>
operator ==(const value<nfbits> & lhs,const value<nfbits> & rhs)648 inline bool operator==(const value<nfbits>& lhs, const value<nfbits>& rhs) { return lhs._sign == rhs._sign && lhs._scale == rhs._scale && lhs._fraction == rhs._fraction && lhs._nrOfBits == rhs._nrOfBits && lhs._zero == rhs._zero && lhs._inf == rhs._inf; }
649 template<size_t nfbits>
operator !=(const value<nfbits> & lhs,const value<nfbits> & rhs)650 inline bool operator!=(const value<nfbits>& lhs, const value<nfbits>& rhs) { return !operator==(lhs, rhs); }
651 template<size_t nfbits>
operator <(const value<nfbits> & lhs,const value<nfbits> & rhs)652 inline bool operator< (const value<nfbits>& lhs, const value<nfbits>& rhs) {
653 	if (lhs._inf) {
654 		if (rhs._inf) return false; else return true; // everything is less than -infinity
655 	}
656 	else {
657 		if (rhs._inf) return false;
658 	}
659 
660 	if (lhs._zero) {
661 		if (rhs._zero) return false; // they are both 0
662 		if (rhs._sign) return false; else return true;
663 	}
664 	if (rhs._zero) {
665 		if (lhs._sign) return true; else return false;
666 	}
667 	if (lhs._sign) {
668 		if (rhs._sign) {	// both operands are negative
669 			if (lhs._scale > rhs._scale) {
670 				return true;	// lhs is more negative
671 			}
672 			else {
673 				if (lhs._scale == rhs._scale) {
674 					// compare the fraction, which is an unsigned value
675 					if (lhs._fraction == rhs._fraction) return false; // they are the same value
676 					if (lhs._fraction > rhs._fraction) {
677 						return true; // lhs is more negative
678 					}
679 					else {
680 						return false; // lhs is less negative
681 					}
682 				}
683 				else {
684 					return false; // lhs is less negative
685 				}
686 			}
687 		}
688 		else {
689 			return true; // lhs is negative, rhs is positive
690 		}
691 	}
692 	else {
693 		if (rhs._sign) {
694 			return false; // lhs is positive, rhs is negative
695 		}
696 		else {
697 			if (lhs._scale > rhs._scale) {
698 				return false; // lhs is more positive
699 			}
700 			else {
701 				if (lhs._scale == rhs._scale) {
702 					// compare the fractions
703 					if (lhs._fraction == rhs._fraction) return false; // they are the same value
704 					if (lhs._fraction > rhs._fraction) {
705 						return false; // lhs is more positive than rhs
706 					}
707 					else {
708 						return true; // lhs is less positive than rhs
709 					}
710 				}
711 				else {
712 					return true; // lhs is less positive
713 				}
714 			}
715 		}
716 	}
717 //	return false; // all paths are taken care of
718 }
719 template<size_t nfbits>
operator >(const value<nfbits> & lhs,const value<nfbits> & rhs)720 inline bool operator> (const value<nfbits>& lhs, const value<nfbits>& rhs) { return  operator< (rhs, lhs); }
721 template<size_t nfbits>
operator <=(const value<nfbits> & lhs,const value<nfbits> & rhs)722 inline bool operator<=(const value<nfbits>& lhs, const value<nfbits>& rhs) { return !operator> (lhs, rhs); }
723 template<size_t nfbits>
operator >=(const value<nfbits> & lhs,const value<nfbits> & rhs)724 inline bool operator>=(const value<nfbits>& lhs, const value<nfbits>& rhs) { return !operator< (lhs, rhs); }
725 
726 template<size_t nbits>
to_binary(const bitblock<nbits> & a,bool nibbleMarker=true)727 inline std::string to_binary(const bitblock<nbits>& a, bool nibbleMarker = true) {
728 	if constexpr (nbits > 1) {
729 		std::stringstream s;
730 		s << "0b";
731 		for (int i = int(nbits - 1); i >= 0; --i) {
732 			s << (a[static_cast<size_t>(i)] ? '1' : '0');
733 			if (i > 0 && (i % 4) == 0 && nibbleMarker) s << '\'';
734 		}
735 		return s.str();
736 	}
737 	else {
738 		return std::string("-");
739 	}
740 }
741 template<size_t fbits>
to_triple(const value<fbits> & v)742 inline std::string to_triple(const value<fbits>& v) {
743 	std::stringstream s;
744 	if (v.iszero()) {
745 		s << "(+, 0," << std::setw(fbits) << v.fraction() << ')';
746 		return s.str();
747 	}
748 	else if (v.isinf()) {
749 		s << "(inf," << std::setw(fbits) << v.fraction() << ')';
750 		return s.str();
751 	}
752 	s << (v.sign() ? "(-, " : "(+, ");
753 	s << v.scale() << ", ";
754 	s << to_binary(v.fraction(), true) << ')';
755 //	s << "(" << (v.sign() ? "-" : "+") << "," << v.scale() << "," << v.fraction() << ')';
756 	return s.str();
757 }
758 
759 /// Magnitude of a scientific notation value (equivalent to turning the sign bit off).
760 template<size_t nfbits>
abs(const value<nfbits> & v)761 value<nfbits> abs(const value<nfbits>& v) {
762 	return value<nfbits>(false, v.scale(), v.fraction(), v.iszero());
763 }
764 
765 // add two values with fbits fraction bits, round them to abits, and return the abits+1 result value
766 template<size_t fbits, size_t abits>
module_add(const value<fbits> & lhs,const value<fbits> & rhs,value<abits+1> & result)767 void module_add(const value<fbits>& lhs, const value<fbits>& rhs, value<abits + 1>& result) {
768 	// with sign/magnitude adders it is customary to organize the computation
769 	// along the four quadrants of sign combinations
770 	//  + + = +
771 	//  + - =   lhs > rhs ? + : -
772 	//  - + =   lhs > rhs ? - : +
773 	//  - - =
774 	// to simplify the result processing assign the biggest
775 	// absolute value to R1, then the sign of the result will be sign of the value in R1.
776 
777 	if (lhs.isinf() || rhs.isinf()) {
778 		result.setinf();
779 		return;
780 	}
781 	int lhs_scale = lhs.scale(), rhs_scale = rhs.scale(), scale_of_result = std::max(lhs_scale, rhs_scale);
782 
783 	// align the fractions
784 	bitblock<abits> r1 = lhs.template nshift<abits>(lhs_scale - scale_of_result + 3);
785 	bitblock<abits> r2 = rhs.template nshift<abits>(rhs_scale - scale_of_result + 3);
786 	bool r1_sign = lhs.sign(), r2_sign = rhs.sign();
787 	bool signs_are_different = r1_sign != r2_sign;
788 
789 	if (signs_are_different && abs(lhs) < abs(rhs)) {
790 		std::swap(r1, r2);
791 		std::swap(r1_sign, r2_sign);
792 	}
793 
794 	if (signs_are_different) r2 = twos_complement(r2);
795 
796 	if (_trace_value_add) {
797 		std::cout << (r1_sign ? "sign -1" : "sign  1") << " scale " << std::setw(3) << scale_of_result << " r1       " << r1 << std::endl;
798 		if (signs_are_different) {
799 			std::cout << (r2_sign ? "sign -1" : "sign  1") << " scale " << std::setw(3) << scale_of_result << " r2 orig  " << twos_complement(r2) << std::endl;
800 		}
801 		std::cout << (r2_sign ? "sign -1" : "sign  1") << " scale " << std::setw(3) << scale_of_result << " r2       " << r2 << std::endl;
802 	}
803 
804 	bitblock<abits + 1> sum;
805 	const bool carry = add_unsigned(r1, r2, sum);
806 
807 	if (_trace_value_add) std::cout << (r1_sign ? "sign -1" : "sign  1") << " carry " << std::setw(3) << (carry ? 1 : 0) << " sum     " << sum << std::endl;
808 
809 	int shift = 0;
810 	if (carry) {
811 		if (r1_sign == r2_sign) {  // the carry && signs== implies that we have a number bigger than r1
812 			shift = -1;
813 		}
814 		else {
815 			// the carry && signs!= implies ||result|| < ||r1||, must find MSB (in the complement)
816 			for (int i = int(abits) - 1; i >= 0 && !sum[size_t(i)]; --i) {
817 				++shift;
818 			}
819 		}
820 	}
821 	assert(shift >= -1);
822 
823 	if (shift >= long(abits)) {            // we have actual 0
824 		sum.reset();
825 		result.set(false, 0, sum, true, false, false);
826 		return;
827 	}
828 
829 	scale_of_result -= shift;
830 	const int hpos = int(abits) - 1 - shift;         // position of the hidden bit
831 	sum <<= abits - hpos + 1;
832 	if (_trace_value_add) std::cout << (r1_sign ? "sign -1" : "sign  1") << " scale " << std::setw(3) << scale_of_result << " sum     " << sum << std::endl;
833 	result.set(r1_sign, scale_of_result, sum, false, false, false);
834 }
835 
836 // subtract module: use ADDER
837 template<size_t fbits, size_t abits>
module_subtract(const value<fbits> & lhs,const value<fbits> & rhs,value<abits+1> & result)838 void module_subtract(const value<fbits>& lhs, const value<fbits>& rhs, value<abits + 1>& result) {
839 	if (lhs.isinf() || rhs.isinf()) {
840 		result.setinf();
841 		return;
842 	}
843 	int lhs_scale = lhs.scale(), rhs_scale = rhs.scale(), scale_of_result = std::max(lhs_scale, rhs_scale);
844 
845 	// align the fractions
846 	bitblock<abits> r1 = lhs.template nshift<abits>(lhs_scale - scale_of_result + 3);
847 	bitblock<abits> r2 = rhs.template nshift<abits>(rhs_scale - scale_of_result + 3);
848 	bool r1_sign = lhs.sign(), r2_sign = !rhs.sign();
849 	bool signs_are_different = r1_sign != r2_sign;
850 
851 	if (abs(lhs) < abs(rhs)) {
852 		std::swap(r1, r2);
853 		std::swap(r1_sign, r2_sign);
854 	}
855 
856 	if (signs_are_different) r2 = twos_complement(r2);
857 
858 	if (_trace_value_sub) {
859 		std::cout << (r1_sign ? "sign -1" : "sign  1") << " scale " << std::setw(3) << scale_of_result << " r1       " << r1 << std::endl;
860 		std::cout << (r2_sign ? "sign -1" : "sign  1") << " scale " << std::setw(3) << scale_of_result << " r2       " << r2 << std::endl;
861 	}
862 
863 	bitblock<abits + 1> sum;
864 	const bool carry = add_unsigned(r1, r2, sum);
865 
866 	if (_trace_value_sub) std::cout << (r1_sign ? "sign -1" : "sign  1") << " carry " << std::setw(3) << (carry ? 1 : 0) << " sum     " << sum << std::endl;
867 
868 	int shift = 0;
869 	if (carry) {
870 		if (r1_sign == r2_sign) {  // the carry && signs== implies that we have a number bigger than r1
871 			shift = -1;
872 		}
873 		else {
874 			// the carry && signs!= implies r2 is complement, result < r1, must find hidden bit (in the complement)
875 			for (int i = static_cast<int>(abits) - 1; i >= 0 && !sum[static_cast<size_t>(i)]; --i) {
876 				shift++;
877 			}
878 		}
879 	}
880 	assert(shift >= -1);
881 
882 	if (shift >= static_cast<int>(abits)) {            // we have actual 0
883 		sum.reset();
884 		result.set(false, 0, sum, true, false, false);
885 		return;
886 	}
887 
888 	scale_of_result -= shift;
889 	const int hpos = static_cast<int>(abits) - 1 - shift;         // position of the hidden bit
890 	sum <<= abits - hpos + 1;
891 	if (_trace_value_sub) std::cout << (r1_sign ? "sign -1" : "sign  1") << " scale " << std::setw(3) << scale_of_result << " sum     " << sum << std::endl;
892 	result.set(r1_sign, scale_of_result, sum, false, false, false);
893 }
894 
895 // subtract module using SUBTRACTOR: CURRENTLY BROKEN FOR UNKNOWN REASON
896 template<size_t fbits, size_t abits>
module_subtract_BROKEN(const value<fbits> & lhs,const value<fbits> & rhs,value<abits+1> & result)897 void module_subtract_BROKEN(const value<fbits>& lhs, const value<fbits>& rhs, value<abits + 1>& result) {
898 
899 	if (lhs.isinf() || rhs.isinf()) {
900 		result.setinf();
901 		return;
902 	}
903 	int lhs_scale = lhs.scale(), rhs_scale = rhs.scale(), scale_of_result = std::max(lhs_scale, rhs_scale);
904 
905 	// align the fractions
906 	bitblock<abits> r1 = lhs.template nshift<abits>(lhs_scale - scale_of_result + 3);
907 	bitblock<abits> r2 = rhs.template nshift<abits>(rhs_scale - scale_of_result + 3);
908 	bool r1_sign = lhs.sign(), r2_sign = rhs.sign();
909 	//bool signs_are_equal = r1_sign == r2_sign;
910 
911 	if (r1_sign) r1 = twos_complement(r1);
912 	if (r1_sign) r2 = twos_complement(r2);
913 
914 	if (_trace_value_sub) {
915 		std::cout << (r1_sign ? "sign -1" : "sign  1") << " scale " << std::setw(3) << scale_of_result << " r1       " << r1 << std::endl;
916 		std::cout << (r2_sign ? "sign -1" : "sign  1") << " scale " << std::setw(3) << scale_of_result << " r2       " << r2 << std::endl;
917 	}
918 
919 	bitblock<abits + 1> difference;
920 	const bool borrow = subtract_unsigned(r1, r2, difference);
921 
922 	if (_trace_value_sub) std::cout << (r1_sign ? "sign -1" : "sign  1") << " borrow" << std::setw(3) << (borrow ? 1 : 0) << " diff    " << difference << std::endl;
923 
924 	long shift = 0;
925 	if (borrow) {   // we have a negative value result
926 		difference = twos_complement(difference);
927 	}
928 	// find hidden bit
929 	for (int i = abits - 1; i >= 0 && difference[i]; i--) {
930 		shift++;
931 	}
932 	assert(shift >= -1);
933 
934 	if (shift >= long(abits)) {            // we have actual 0
935 		difference.reset();
936 		result.set(false, 0, difference, true, false, false);
937 		return;
938 	}
939 
940 	scale_of_result -= shift;
941 	const int hpos = abits - 1 - shift;         // position of the hidden bit
942 	difference <<= abits - hpos + 1;
943 	if (_trace_value_sub) std::cout << (borrow ? "sign -1" : "sign  1") << " scale " << std::setw(3) << scale_of_result << " result  " << difference << std::endl;
944 	result.set(borrow, scale_of_result, difference, false, false, false);
945 }
946 
947 // multiply module
948 template<size_t fbits, size_t mbits>
module_multiply(const value<fbits> & lhs,const value<fbits> & rhs,value<mbits> & result)949 void module_multiply(const value<fbits>& lhs, const value<fbits>& rhs, value<mbits>& result) {
950 	static constexpr size_t fhbits = fbits + 1;  // fraction + hidden bit
951 	if (_trace_value_mul) std::cout << "lhs  " << to_triple(lhs) << std::endl << "rhs  " << to_triple(rhs) << std::endl;
952 
953 	if (lhs.isinf() || rhs.isinf()) {
954 		result.setinf();
955 		return;
956 	}
957 	if (lhs.iszero() || rhs.iszero()) {
958 		result.setzero();
959 		return;
960 	}
961 
962 	bool new_sign = lhs.sign() ^ rhs.sign();
963 	int new_scale = lhs.scale() + rhs.scale();
964 	bitblock<mbits> result_fraction;
965 
966 	if (fbits > 0) {
967 		// fractions are without hidden bit, get_fixed_point adds the hidden bit back in
968 		bitblock<fhbits> r1 = lhs.get_fixed_point();
969 		bitblock<fhbits> r2 = rhs.get_fixed_point();
970 		multiply_unsigned(r1, r2, result_fraction);
971 
972 		if (_trace_value_mul) std::cout << "r1  " << r1 << std::endl << "r2  " << r2 << std::endl << "res " << result_fraction << std::endl;
973 		// check if the radix point needs to shift
974 		int shift = 2;
975 		if (result_fraction.test(mbits - 1)) {
976 			shift = 1;
977 			if (_trace_value_mul) std::cout << " shift " << shift << std::endl;
978 			new_scale += 1;
979 		}
980 		result_fraction <<= static_cast<size_t>(shift);    // shift hidden bit out
981 	}
982 	else {   // posit<3,0>, <4,1>, <5,2>, <6,3>, <7,4> etc are pure sign and scale
983 		// multiply the hidden bits together, i.e. 1*1: we know the answer a priori
984 	}
985 	if (_trace_value_mul) std::cout << "sign " << (new_sign ? "-1 " : " 1 ") << "scale " << new_scale << " fraction " << result_fraction << std::endl;
986 
987 	result.set(new_sign, new_scale, result_fraction, false, false, false);
988 }
989 
990 // divide module
991 template<size_t fbits, size_t divbits>
module_divide(const value<fbits> & lhs,const value<fbits> & rhs,value<divbits> & result)992 void module_divide(const value<fbits>& lhs, const value<fbits>& rhs, value<divbits>& result) {
993 	static constexpr size_t fhbits = fbits + 1;  // fraction + hidden bit
994 	if (_trace_value_div) std::cout << "lhs  " << to_triple(lhs) << std::endl << "rhs  " << to_triple(rhs) << std::endl;
995 
996 	if (lhs.isinf() || rhs.isinf()) {
997 		result.setinf();
998 		return;
999 	}
1000 	if (lhs.iszero() || rhs.iszero()) {
1001 		result.setzero();
1002 		return;
1003 	}
1004 
1005 	bool new_sign = lhs.sign() ^ rhs.sign();
1006 	int new_scale = lhs.scale() - rhs.scale();
1007 	bitblock<divbits> result_fraction;
1008 
1009 	if (fbits > 0) {
1010 		// fractions are without hidden bit, get_fixed_point adds the hidden bit back in
1011 		bitblock<fhbits> r1 = lhs.get_fixed_point();
1012 		bitblock<fhbits> r2 = rhs.get_fixed_point();
1013 		divide_with_fraction(r1, r2, result_fraction);
1014 		if (_trace_value_div) std::cout << "r1     " << r1 << std::endl << "r2     " << r2 << std::endl << "result " << result_fraction << std::endl << "scale  " << new_scale << std::endl;
1015 		// check if the radix point needs to shift
1016 		// radix point is at divbits - fhbits
1017 		int msb = static_cast<int>(divbits - fhbits);
1018 		int shift = fhbits;
1019 		if (!result_fraction.test(static_cast<size_t>(msb))) {
1020 			msb--; shift++;
1021 			while (!result_fraction.test(static_cast<size_t>(msb))) { // search for the first 1
1022 				msb--; shift++;
1023 			}
1024 		}
1025 		result_fraction <<= static_cast<size_t>(shift);    // shift hidden bit out
1026 		new_scale -= (shift - static_cast<int>(fhbits));
1027 		if (_trace_value_div) std::cout << "shift  " << shift << std::endl << "result " << result_fraction << std::endl << "scale  " << new_scale << std::endl;;
1028 	}
1029 	else {   // posit<3,0>, <4,1>, <5,2>, <6,3>, <7,4> etc are pure sign and scale
1030 			 // no need to multiply the hidden bits together, i.e. 1*1: we know the answer a priori
1031 	}
1032 	if (_trace_value_div) std::cout << "sign " << (new_sign ? "-1 " : " 1 ") << "scale " << new_scale << " fraction " << result_fraction << std::endl;
1033 
1034 	result.set(new_sign, new_scale, result_fraction, false, false, false);
1035 }
1036 
1037 template<size_t fbits>
operator +(const value<fbits> & lhs,const value<fbits> & rhs)1038 value<fbits> operator+(const value<fbits>& lhs, const value<fbits>& rhs) {
1039 	constexpr size_t abits = fbits + 5;
1040 	value<abits+1> result;
1041 	module_add<fbits,abits>(lhs, rhs, result);
1042 #if defined(__GNUC__) || defined(__GNUG__)
1043 	return value<fbits>(); // for some reason GCC doesn't want to compile result.round_to<fbits>()
1044 #else
1045 	return result.round_to<fbits>();
1046 #endif
1047 }
1048 template<size_t fbits>
operator -(const value<fbits> & lhs,const value<fbits> & rhs)1049 value<fbits> operator-(const value<fbits>& lhs, const value<fbits>& rhs) {
1050 	constexpr size_t abits = fbits + 5;
1051 	value<abits+1> result;
1052 	module_subtract<fbits,abits>(lhs, rhs, result);
1053 #if defined(__GNUC__) || defined(__GNUG__)
1054 	return value<fbits>(); // for some reason GCC doesn't want to compile result.round_to<fbits>()
1055 #else
1056 	return result.round_to<fbits>();
1057 #endif
1058 }
1059 template<size_t fbits>
operator *(const value<fbits> & lhs,const value<fbits> & rhs)1060 value<fbits> operator*(const value<fbits>& lhs, const value<fbits>& rhs) {
1061 	constexpr size_t mbits = 2*fbits + 2;
1062 	value<mbits> result;
1063 	module_multiply(lhs, rhs, result);
1064 #if defined(__GNUC__) || defined(__GNUG__)
1065 	return value<fbits>(); // for some reason GCC doesn't want to compile result.round_to<fbits>()
1066 #else
1067 	return result.round_to<fbits>();
1068 #endif
1069 }
1070 template<size_t fbits>
operator /(const value<fbits> & lhs,const value<fbits> & rhs)1071 value<fbits> operator/(const value<fbits>& lhs, const value<fbits>& rhs) {
1072 	constexpr size_t divbits = 2 * fbits + 5;
1073 	value<divbits> result;
1074 	module_divide(lhs, rhs, result);
1075 #if defined(__GNUC__) || defined(__GNUG__)
1076 	return value<fbits>(); // for some reason GCC doesn't want to compile result.round_to<fbits>()
1077 #else
1078 	return result.round_to<fbits>();
1079 #endif
1080 }
1081 template<size_t fbits>
sqrt(const value<fbits> & a)1082 value<fbits> sqrt(const value<fbits>& a) {
1083 	return std::sqrt(double(a));
1084 }
1085 
1086 }  // namespace sw::universal::internal
1087