1 #pragma once
2 // value.hpp: definition of a (sign, scale, significant) representation of an approximation to a real value
3 //
4 // Copyright (C) 2017-2021 Stillwater Supercomputing, Inc.
5 //
6 // This file is part of the universal numbers project, which is released under an MIT Open Source license.
7 #include <cassert>
8 #include <iostream>
9 #include <iomanip>
10 #include <limits>
11 #include <tuple>
12 #include <algorithm> // std::max
13
14 #include <universal/common/exceptions.hpp>
15 #include <universal/number/support/decimal.hpp>
16 #include <universal/native/nonconstexpr754.hpp>
17 #include <universal/native/bit_functions.hpp>
18 #include <universal/internal/bitblock/bitblock.hpp>
19
20 namespace sw::universal::internal {
21
22 struct value_internal_exception : public universal_internal_exception {
value_internal_exceptionsw::universal::internal::value_internal_exception23 value_internal_exception(const std::string& error)
24 : universal_internal_exception(std::string("value internal exception: ") + error) {};
25 };
26
27 struct value_shift_too_large : public value_internal_exception {
value_shift_too_largesw::universal::internal::value_shift_too_large28 value_shift_too_large() : value_internal_exception("shift value too large") {}
29 };
30
31 using namespace sw::universal;
32
33 // Forward definitions
34 template<size_t fbits> class value;
35 template<size_t fbits> value<fbits> abs(const value<fbits>& v);
36
37 #ifdef VALUE_TRACE_CONVERSION
38 constexpr bool _trace_value_conversion = true;
39 #else
40 constexpr bool _trace_value_conversion = false;
41 #endif
42
43 #ifdef VALUE_TRACE_ADD
44 constexpr bool _trace_value_add = true;
45 #else
46 constexpr bool _trace_value_add = false;
47 #endif
48
49 #ifdef VALUE_TRACE_SUB
50 constexpr bool _trace_value_sub = true;
51 #else
52 constexpr bool _trace_value_sub = false;
53 #endif
54
55 #ifdef VALUE_TRACE_MUL
56 constexpr bool _trace_value_mul = true;
57 #else
58 constexpr bool _trace_value_mul = false;
59 #endif
60
61 #ifdef VALUE_TRACE_DIV
62 constexpr bool _trace_value_div = true;
63 #else
64 constexpr bool _trace_value_div = false;
65 #endif
66
67 // template class representing a value in scientific notation, using a template size for the number of fraction bits
68 template<size_t fbits>
69 class value {
70 public:
71 static constexpr size_t fhbits = fbits + 1; // number of fraction bits including the hidden bit
value()72 constexpr value()
73 : _sign{false}, _scale{0}, _nrOfBits{fbits}, _fraction{}, _inf{false},
74 _zero{true}, _nan{false} {}
value(bool sign,int scale,const internal::bitblock<fbits> & fraction_without_hidden_bit,bool zero=true,bool inf=false)75 constexpr value(bool sign, int scale, const internal::bitblock<fbits>& fraction_without_hidden_bit,
76 bool zero = true, bool inf = false)
77 : _sign{sign}, _scale{scale}, _nrOfBits{fbits}, _fraction{fraction_without_hidden_bit},
78 _inf{inf}, _zero{zero}, _nan{false} {}
79
80
81 // decorated constructors
value(const value & initial_value)82 constexpr value(const value& initial_value) { *this = initial_value; }
value(signed char initial_value)83 constexpr value(signed char initial_value) { *this = initial_value; }
value(short initial_value)84 constexpr value(short initial_value) { *this = initial_value; }
value(int initial_value)85 constexpr value(int initial_value) { *this = initial_value; }
value(long initial_value)86 constexpr value(long initial_value) { *this = initial_value; }
value(long long initial_value)87 constexpr value(long long initial_value) { *this = initial_value; }
value(char initial_value)88 constexpr value(char initial_value) { *this = initial_value; }
value(unsigned short initial_value)89 constexpr value(unsigned short initial_value) { *this = initial_value; }
value(unsigned int initial_value)90 constexpr value(unsigned int initial_value) { *this = initial_value; }
value(unsigned long initial_value)91 constexpr value(unsigned long initial_value) { *this = initial_value; }
value(unsigned long long initial_value)92 constexpr value(unsigned long long initial_value) { *this = initial_value; }
value(float initial_value)93 value(float initial_value) { *this = initial_value; }
value(double initial_value)94 value(double initial_value) : value{} { *this = initial_value; }
value(long double initial_value)95 value(long double initial_value) { *this = initial_value; }
96
97 // assignment operators
operator =(const value & rhs)98 constexpr value& operator=(const value& rhs) {
99 _sign = rhs._sign;
100 _scale = rhs._scale;
101 _fraction = rhs._fraction;
102 _nrOfBits = rhs._nrOfBits;
103 _inf = rhs._inf;
104 _zero = rhs._zero;
105 _nan = rhs._nan;
106 return *this;
107 }
operator =(signed char rhs)108 constexpr value<fbits>& operator=(signed char rhs) {
109 *this = static_cast<long long>(rhs);
110 return *this;
111 }
operator =(short rhs)112 constexpr value<fbits>& operator=(short rhs) {
113 *this = static_cast<long long>(rhs);
114 return *this;
115 }
operator =(int rhs)116 constexpr value<fbits>& operator=(int rhs) {
117 *this = static_cast<long long>(rhs);
118 return *this;
119 }
operator =(long rhs)120 constexpr value<fbits>& operator=(long rhs) {
121 *this = static_cast<long long>(rhs);
122 return *this;
123 }
operator =(long long rhs)124 constexpr value<fbits>& operator=(long long rhs) {
125 if (_trace_value_conversion) std::cout << "---------------------- CONVERT -------------------" << std::endl;
126 if (rhs == 0) {
127 setzero();
128 return *this;
129 }
130 reset();
131 _sign = (0x8000000000000000 & rhs); // 1 is negative, 0 is positive
132 if (_sign) {
133 // process negative number: process 2's complement of the input
134 _scale = int(sw::universal::findMostSignificantBit(-rhs)) - 1;
135 uint64_t _fraction_without_hidden_bit = uint64_t(_scale == 0 ? 0 : (-rhs << (64 - _scale)));
136 _fraction = copy_integer_fraction<fbits>(_fraction_without_hidden_bit);
137 //take_2s_complement();
138 _nrOfBits = fbits;
139 if (_trace_value_conversion) std::cout << "int64 " << rhs << " sign " << _sign << " scale " << _scale << " fraction b" << _fraction << std::dec << std::endl;
140 }
141 else {
142 // process positive number
143 _scale = int(sw::universal::findMostSignificantBit(rhs)) - 1;
144 uint64_t _fraction_without_hidden_bit = uint64_t(_scale == 0 ? 0 : (rhs << (64 - _scale)));
145 _fraction = copy_integer_fraction<fbits>(_fraction_without_hidden_bit);
146 _nrOfBits = fbits;
147 if (_trace_value_conversion) std::cout << "int64 " << rhs << " sign " << _sign << " scale " << _scale << " fraction b" << _fraction << std::dec << std::endl;
148 }
149 return *this;
150 }
operator =(char rhs)151 constexpr value<fbits>& operator=(char rhs) {
152 *this = (unsigned long long)(rhs);
153 return *this;
154 }
operator =(unsigned short rhs)155 constexpr value<fbits>& operator=(unsigned short rhs) {
156 *this = static_cast<long long>(rhs);
157 return *this;
158 }
operator =(unsigned int rhs)159 constexpr value<fbits>& operator=(unsigned int rhs) {
160 *this = static_cast<long long>(rhs);
161 return *this;
162 }
operator =(unsigned long rhs)163 constexpr value<fbits>& operator=(unsigned long rhs) {
164 *this = static_cast<long long>(rhs);
165 return *this;
166 }
operator =(unsigned long long rhs)167 constexpr value<fbits>& operator=(unsigned long long rhs) {
168 if (_trace_value_conversion) std::cout << "---------------------- CONVERT -------------------" << std::endl;
169 if (rhs == 0) {
170 setzero();
171 }
172 else {
173 reset();
174 _scale = static_cast<int>(sw::universal::findMostSignificantBit(rhs)) - 1;
175 uint64_t _fraction_without_hidden_bit = _scale == 0 ? 0ull : (rhs << (64 - _scale)); // the scale == -1 case is handled above
176 _fraction = copy_integer_fraction<fbits>(_fraction_without_hidden_bit);
177 _nrOfBits = fbits;
178 }
179 if (_trace_value_conversion) std::cout << "uint64 " << rhs << " sign " << _sign << " scale " << _scale << " fraction b" << _fraction << std::dec << std::endl;
180 return *this;
181 }
operator =(float rhs)182 value<fbits>& operator=(float rhs) {
183 reset();
184 if (_trace_value_conversion) std::cout << "---------------------- CONVERT -------------------" << std::endl;
185
186 switch (std::fpclassify(rhs)) {
187 case FP_ZERO:
188 _nrOfBits = fbits;
189 _zero = true;
190 break;
191 case FP_INFINITE:
192 _inf = true;
193 _sign = true;
194 break;
195 case FP_NAN:
196 _nan = true;
197 _sign = true;
198 break;
199 case FP_SUBNORMAL:
200 case FP_NORMAL:
201 {
202 float _fr{0};
203 unsigned int _23b_fraction_without_hidden_bit{0};
204 int _exponent{0};
205 extract_fp_components(rhs, _sign, _exponent, _fr, _23b_fraction_without_hidden_bit);
206 _scale = _exponent - 1;
207 _fraction = extract_23b_fraction<fbits>(_23b_fraction_without_hidden_bit);
208 _nrOfBits = fbits;
209 if (_trace_value_conversion) std::cout << "float " << rhs << " sign " << _sign << " scale " << _scale << " 23b fraction 0x" << std::hex << _23b_fraction_without_hidden_bit << " _fraction b" << _fraction << std::dec << std::endl;
210 }
211 break;
212 }
213 return *this;
214 }
operator =(double rhs)215 value<fbits>& operator=(double rhs) {
216 using std::get;
217 reset();
218 if (_trace_value_conversion) std::cout << "---------------------- CONVERT -------------------" << std::endl;
219
220 switch (std::fpclassify(rhs)) {
221 case FP_ZERO:
222 _nrOfBits = fbits;
223 _zero = true;
224 break;
225 case FP_INFINITE:
226 _inf = true;
227 _sign = true;
228 break;
229 case FP_NAN:
230 _nan = true;
231 _sign = true;
232 break;
233 case FP_SUBNORMAL:
234 case FP_NORMAL:
235 {
236 #if 1
237 double _fr{0};
238 unsigned long long _52b_fraction_without_hidden_bit{0};
239 int _exponent{0};
240 extract_fp_components(rhs, _sign, _exponent, _fr, _52b_fraction_without_hidden_bit);
241 #endif
242 #if 0
243 auto components= ieee_components(rhs);
244 _sign= get<0>(components);
245 int _exponent= get<1>(components);
246 unsigned long long _52b_fraction_without_hidden_bit= get<2>(components);
247 #endif
248 _scale = _exponent - 1;
249 _fraction = extract_52b_fraction<fbits>(_52b_fraction_without_hidden_bit);
250 _nrOfBits = fbits;
251 if (_trace_value_conversion) std::cout << "double " << rhs << " sign " << _sign << " scale " << _scale << " 52b fraction 0x" << std::hex << _52b_fraction_without_hidden_bit << " _fraction b" << _fraction << std::dec << std::endl;
252 }
253 break;
254 }
255 return *this;
256 }
operator =(long double rhs)257 value<fbits>& operator=(long double rhs) {
258 reset();
259 if (_trace_value_conversion) std::cout << "---------------------- CONVERT -------------------" << std::endl;
260
261 switch (std::fpclassify(rhs)) {
262 case FP_ZERO:
263 _nrOfBits = fbits;
264 _zero = true;
265 break;
266 case FP_INFINITE:
267 _inf = true;
268 _sign = true;
269 break;
270 case FP_NAN:
271 _nan = true;
272 _sign = true;
273 break;
274 case FP_SUBNORMAL:
275 case FP_NORMAL:
276 {
277 long double _fr{0};
278 unsigned long long _63b_fraction_without_hidden_bit{0};
279 int _exponent{0};
280 extract_fp_components(rhs, _sign, _exponent, _fr, _63b_fraction_without_hidden_bit);
281 _scale = _exponent - 1;
282 // how to interpret the fraction bits: TODO: this should be a static compile-time code block
283 if (sizeof(long double) == 8) {
284 // we are just a double and thus only have 52bits of fraction
285 _fraction = extract_52b_fraction<fbits>(_63b_fraction_without_hidden_bit);
286 if (_trace_value_conversion) std::cout << "long double " << rhs << " sign " << _sign << " scale " << _scale << " 52b fraction 0x" << std::hex << _63b_fraction_without_hidden_bit << " _fraction b" << _fraction << std::dec << std::endl;
287
288 }
289 else if (sizeof(long double) == 16) {
290 // how to differentiate between 80bit and 128bit formats?
291 _fraction = extract_63b_fraction<fbits>(_63b_fraction_without_hidden_bit);
292 if (_trace_value_conversion) std::cout << "long double " << rhs << " sign " << _sign << " scale " << _scale << " 63b fraction 0x" << std::hex << _63b_fraction_without_hidden_bit << " _fraction b" << _fraction << std::dec << std::endl;
293
294 }
295 _nrOfBits = fbits;
296 }
297 break;
298 }
299 return *this;
300 }
301
302 // operators
operator -() const303 constexpr value<fbits> operator-() const {
304 return value<fbits>(!_sign, _scale, _fraction, _zero, _inf);
305 }
306
operator ++()307 value<fbits>& operator++() {
308 *this = *this + value<fbits>(1);
309 return *this;
310 }
operator ++(int)311 value<fbits> operator++(int) {
312 value<fbits> tmp{ *this };
313 operator++();
314 return tmp;
315 }
316
317 // modifiers
reset()318 constexpr void reset() & {
319 _sign = false;
320 _scale = 0;
321 _nrOfBits = 0;
322 _inf = false;
323 _zero = false;
324 _nan = false;
325 _fraction.reset(); // not constexpr
326 // _fraction= bitblock<fbits>{}; // work around
327 }
set(bool sign,int scale,bitblock<fbits> fraction_without_hidden_bit,bool zero,bool inf,bool nan=false)328 void set(bool sign, int scale, bitblock<fbits> fraction_without_hidden_bit, bool zero, bool inf, bool nan = false) {
329 _sign = sign;
330 _scale = scale;
331 _fraction = fraction_without_hidden_bit;
332 _zero = zero;
333 _inf = inf;
334 _nan = nan;
335 }
setzero()336 void setzero() {
337 _zero = true;
338 _sign = false;
339 _inf = false;
340 _nan = false;
341 _scale = 0;
342 _nrOfBits = fbits;
343 _fraction.reset();
344 }
setinf()345 void setinf() { // this maps to NaR on the posit side, and that has a sign = 1
346 _inf = true;
347 _sign = true;
348 _zero = false;
349 _nan = false;
350 _scale = 0;
351 _nrOfBits = fbits;
352 _fraction.reset();
353 }
setnan()354 void setnan() { // this will also map to NaR
355 _nan = true;
356 _sign = true;
357 _zero = false;
358 _inf = false;
359 _scale = 0;
360 _nrOfBits = fbits;
361 _fraction.reset();
362 }
setExponent(int e)363 inline void setExponent(int e) { _scale = e; }
isneg() const364 inline bool isneg() const { return _sign; }
ispos() const365 inline bool ispos() const { return !_sign; }
iszero() const366 inline constexpr bool iszero() const { return _zero; }
isinf() const367 inline constexpr bool isinf() const { return _inf; }
isnan() const368 inline constexpr bool isnan() const { return _nan; }
sign() const369 inline bool sign() const { return _sign; }
scale() const370 inline int scale() const { return _scale; }
fraction() const371 bitblock<fbits> fraction() const { return _fraction; }
372
373 // Normalized shift (e.g., for addition).
374 template <size_t Size>
nshift(int shift) const375 bitblock<Size> nshift(int shift) const {
376 bitblock<Size> number;
377
378 #if VALUE_THROW_ARITHMETIC_EXCEPTION
379 // Check range
380 if (int(fbits) + shift >= int(Size))
381 throw value_shift_too_large{};
382 #else
383 // Check range
384 if (int(fbits) + shift >= int(Size)) {
385 std::cerr << "nshift: shift is too large\n";
386 number.reset();
387 return number;
388 }
389 #endif // VALUE_THROW_ARITHMETIC_EXCEPTIONS
390
391 int hpos = int(fbits) + shift; // position of hidden bit
392 if (hpos <= 0) { // If hidden bit is LSB or beyond just set uncertainty bit and call it a day
393 number[0] = true;
394 return number;
395 }
396 number[size_t(hpos)] = true; // hidden bit now safely set
397
398 // Copy fraction bits into certain part
399 for (int npos = hpos - 1, fpos = int(fbits) - 1; npos > 0 && fpos >= 0; --npos, --fpos)
400 number[size_t(npos)] = _fraction[size_t(fpos)];
401
402 // Set uncertainty bit
403 bool uncertainty = false;
404 for (int fpos = std::min(int(fbits) - 1, -shift); fpos >= 0 && !uncertainty; --fpos)
405 uncertainty |= _fraction[size_t(fpos)];
406 number[0] = uncertainty;
407 return number;
408 }
409 // get a fixed point number by making the hidden bit explicit: useful for multiply units
get_fixed_point() const410 bitblock<fhbits> get_fixed_point() const {
411 bitblock<fbits + 1> fixed_point_number;
412 fixed_point_number.set(fbits, true); // make hidden bit explicit
413 for (size_t i = 0; i < fbits; i++) {
414 fixed_point_number[i] = _fraction[i];
415 }
416 return fixed_point_number;
417 }
418 // get the fraction value including the implicit hidden bit (this is at an exponent level 1 smaller)
419 template<typename Ty = double>
get_implicit_fraction_value() const420 Ty get_implicit_fraction_value() const {
421 if (_zero) return (long double)0.0;
422 Ty v = 1.0;
423 Ty scale = 0.5;
424 for (int i = int(fbits) - 1; i >= 0; i--) {
425 if (_fraction.test(size_t(i))) v += scale;
426 scale *= 0.5;
427 if (scale == 0.0) break;
428 }
429 return v;
430 }
431 template<typename Ty = double>
sign_value() const432 Ty sign_value() const {
433 return (_sign ? Ty{ -1 } : Ty{ 1 });
434 }
435 template<typename Ty = double>
scale_value() const436 Ty scale_value() const {
437 if (_zero) return Ty(0.0);
438 return std::pow(Ty(2.0), Ty(_scale));
439 }
440 template<typename Ty = double>
fraction_value() const441 Ty fraction_value() const {
442 if (_zero) return (long double)0.0;
443 Ty v = 1.0;
444 Ty scale = 0.5;
445 for (int i = int(fbits) - 1; i >= 0; i--) {
446 if (_fraction.test(size_t(i))) v += scale;
447 scale *= 0.5;
448 if (scale == 0.0) break;
449 }
450 return v;
451 }
452
453 // conversion helpers
to_long_double() const454 long double to_long_double() const {
455 return sign_value<long double>() * scale_value<long double>() * fraction_value<long double>();
456 }
to_double() const457 double to_double() const {
458 return sign_value<double>() * scale_value<double>() * fraction_value<double>();
459 }
to_float() const460 float to_float() const {
461 return sign_value<float>() * scale_value<float>() * fraction_value<float>();
462 }
463
464 // explicit conversion operators to native types
operator long double() const465 explicit operator long double() const { return to_long_double(); }
operator double() const466 explicit operator double() const { return to_double(); }
operator float() const467 explicit operator float() const { return to_float(); }
468
469 // TODO: this does not implement a 'real' right extend. tgtbits need to be shorter than fbits
470 template<size_t srcbits, size_t tgtbits>
right_extend(const value<srcbits> & src)471 void right_extend(const value<srcbits>& src) {
472 _sign = src.sign();
473 _scale = src.scale();
474 _nrOfBits = tgtbits;
475 _inf = src.isinf();
476 _zero = src.iszero();
477 _nan = src.isnan();
478 bitblock<srcbits> src_fraction = src.fraction();
479 if (!_inf && !_zero && !_nan) {
480 for (int s = srcbits - 1, t = tgtbits - 1; s >= 0 && t >= 0; --s, --t)
481 _fraction[t] = src_fraction[s];
482 }
483 }
484 // round to a target size number of bits using round-to-nearest round-to-even-on-tie
485 template<size_t tgt_size>
round_to()486 value<tgt_size> round_to() {
487 bitblock<tgt_size> rounded_fraction;
488 if (tgt_size == 0) {
489 bool round_up = false;
490 if (fbits >= 2) {
491 bool blast = _fraction[fbits - 1ull];
492 bool sb = anyAfter(_fraction, static_cast<int>(fbits) - 2);
493 if (blast && sb) round_up = true;
494 }
495 else if (fbits == 1) {
496 round_up = _fraction[0];
497 }
498 return value<tgt_size>(_sign, (round_up ? _scale + 1 : _scale), rounded_fraction, _zero, _inf);
499 }
500 else {
501 if (!_zero || !_inf) {
502 if (tgt_size < fbits) {
503 int rb = int(tgt_size) - 1;
504 int lb = int(fbits) - int(tgt_size) - 1;
505 for (int i = int(fbits) - 1; i > lb; i--, rb--) {
506 rounded_fraction[static_cast<size_t>(rb)] = _fraction[static_cast<size_t>(i)];
507 }
508 bool blast = _fraction[static_cast<size_t>(lb)];
509 bool sb = false;
510 if (lb > 0) sb = anyAfter(_fraction, lb-1);
511 if (blast || sb) rounded_fraction[0ull] = true;
512 }
513 else {
514 int rb = int(tgt_size) - 1;
515 for (int i = int(fbits) - 1; i >= 0; i--, rb--) {
516 rounded_fraction[static_cast<size_t>(rb)] = _fraction[static_cast<size_t>(i)];
517 }
518 }
519 }
520 }
521 return value<tgt_size>(_sign, _scale, rounded_fraction, _zero, _inf);
522 }
523
524 private:
525 bool _sign;
526 int _scale;
527 int _nrOfBits; // in case the fraction is smaller than the full fbits
528 bitblock<fbits> _fraction;
529 bool _inf;
530 bool _zero;
531 bool _nan;
532
533 // template parameters need names different from class template parameters (for gcc and clang)
534 template<size_t nfbits>
535 friend std::ostream& operator<< (std::ostream& ostr, const value<nfbits>& r);
536 template<size_t nfbits>
537 friend std::istream& operator>> (std::istream& istr, value<nfbits>& r);
538
539 template<size_t nfbits>
540 friend bool operator==(const value<nfbits>& lhs, const value<nfbits>& rhs);
541 template<size_t nfbits>
542 friend bool operator!=(const value<nfbits>& lhs, const value<nfbits>& rhs);
543 template<size_t nfbits>
544 friend bool operator< (const value<nfbits>& lhs, const value<nfbits>& rhs);
545 template<size_t nfbits>
546 friend bool operator> (const value<nfbits>& lhs, const value<nfbits>& rhs);
547 template<size_t nfbits>
548 friend bool operator<=(const value<nfbits>& lhs, const value<nfbits>& rhs);
549 template<size_t nfbits>
550 friend bool operator>=(const value<nfbits>& lhs, const value<nfbits>& rhs);
551 };
552
553 /*
554 n fraction bits represent a sampling of 2^n samples of a 10^n domain.
555
556 if you need to represent a smaller decimal than n digits, you need to
557 round in the fraction domain, and then convert the rounded bit string.
558 */
559
560 // convert a value to a decimal representation
561 template<size_t fbits>
convert_to_decimal_string(const value<fbits> & v)562 inline std::string convert_to_decimal_string(const value<fbits>& v) {
563 std::cout << to_triple(v) << '\n';
564 auto scale = v.scale();
565 auto bits = v.fraction();
566 // construct the value of the hidden bit
567 support::decimal bitValue;
568
569 // construct the value of the fraction
570 // step 1: calculate the decimal value of the smallest discretization step of the range
571 support::decimal range, discretizationLevels, step, partial, multiplier, one;
572 // create the decimal range we are discretizing
573 range.setdigit(1);
574 range.shiftLeft(fbits); // == 10^fbits
575 // calculate the discretization levels of this range
576 discretizationLevels.setdigit(1);
577 for (size_t i = 0; i < fbits; ++i) support::add(discretizationLevels, discretizationLevels);
578 step = support::div(range, discretizationLevels);
579 // step 2: construct the value of fraction in terms of discretization samples
580 partial.setzero();
581 multiplier.setdigit(1);
582 // convert the fraction part
583 for (unsigned i = 0; i < fbits; ++i) {
584 if (bits[i]) support::add(partial, multiplier);
585 support::add(multiplier, multiplier);
586 }
587 support::add(partial, multiplier); // add the hidden bit value
588
589 // step 3: calculate the value of fraction = nrOfSamples * discretizationStep
590 support::mul(partial, step);
591
592 // construct a decimal fixed-point
593 if (scale > 0) {
594 support::decimal scaleUp;
595 scaleUp.setdigit(1);
596 for (auto i = 0; i < scale; ++i) support::add(scaleUp, scaleUp);
597 support::mul(partial, scaleUp);
598 }
599 else if (scale < 0) {
600 support::decimal scaleDown;
601 scaleDown.setdigit(1);
602 for (auto i = 0; i < -scale; ++i) support::add(scaleDown, scaleDown);
603 partial = support::div(partial, scaleDown);
604 }
605 std::stringstream str;
606 for (support::decimal::const_reverse_iterator rit = partial.rbegin(); rit != partial.rend(); ++rit) {
607 str << (int)*rit;
608 }
609 #ifdef REV
610 // leading 0s will cause the partial to be represented incorrectly
611 // if we simply convert it to digits.
612 // The partial represents the parts in the range, so we can deduce
613 // the number of leading zeros by comparing to the length of range
614 size_t nrLeadingZeros = range.size() - partial.size() - 1;
615 for (size_t i = 0; i < nrLeadingZeros; ++i) str << '0';
616 size_t digitsWritten = nrLeadingZeros;
617 for (support::decimal::const_reverse_iterator rit = partial.rbegin(); rit != partial.rend(); ++rit) {
618 str << (int)*rit;
619 ++digitsWritten;
620 }
621 if (digitsWritten < fbits) { // deal with trailing 0s
622 for (size_t i = digitsWritten; i < fbits; ++i) {
623 str << '0';
624 }
625 }
626 #endif
627 return str.str();
628 }
629 ////////////////////// VALUE operators
630 template<size_t nfbits>
operator <<(std::ostream & ostr,const value<nfbits> & v)631 inline std::ostream& operator<<(std::ostream& ostr, const value<nfbits>& v) {
632 if (v._inf) {
633 ostr << FP_INFINITE;
634 }
635 else {
636 ostr << (long double)v;
637 }
638 return ostr;
639 }
640
641 template<size_t nfbits>
operator >>(std::istream & istr,const value<nfbits> & v)642 inline std::istream& operator>> (std::istream& istr, const value<nfbits>& v) {
643 istr >> v._fraction;
644 return istr;
645 }
646
647 template<size_t nfbits>
operator ==(const value<nfbits> & lhs,const value<nfbits> & rhs)648 inline bool operator==(const value<nfbits>& lhs, const value<nfbits>& rhs) { return lhs._sign == rhs._sign && lhs._scale == rhs._scale && lhs._fraction == rhs._fraction && lhs._nrOfBits == rhs._nrOfBits && lhs._zero == rhs._zero && lhs._inf == rhs._inf; }
649 template<size_t nfbits>
operator !=(const value<nfbits> & lhs,const value<nfbits> & rhs)650 inline bool operator!=(const value<nfbits>& lhs, const value<nfbits>& rhs) { return !operator==(lhs, rhs); }
651 template<size_t nfbits>
operator <(const value<nfbits> & lhs,const value<nfbits> & rhs)652 inline bool operator< (const value<nfbits>& lhs, const value<nfbits>& rhs) {
653 if (lhs._inf) {
654 if (rhs._inf) return false; else return true; // everything is less than -infinity
655 }
656 else {
657 if (rhs._inf) return false;
658 }
659
660 if (lhs._zero) {
661 if (rhs._zero) return false; // they are both 0
662 if (rhs._sign) return false; else return true;
663 }
664 if (rhs._zero) {
665 if (lhs._sign) return true; else return false;
666 }
667 if (lhs._sign) {
668 if (rhs._sign) { // both operands are negative
669 if (lhs._scale > rhs._scale) {
670 return true; // lhs is more negative
671 }
672 else {
673 if (lhs._scale == rhs._scale) {
674 // compare the fraction, which is an unsigned value
675 if (lhs._fraction == rhs._fraction) return false; // they are the same value
676 if (lhs._fraction > rhs._fraction) {
677 return true; // lhs is more negative
678 }
679 else {
680 return false; // lhs is less negative
681 }
682 }
683 else {
684 return false; // lhs is less negative
685 }
686 }
687 }
688 else {
689 return true; // lhs is negative, rhs is positive
690 }
691 }
692 else {
693 if (rhs._sign) {
694 return false; // lhs is positive, rhs is negative
695 }
696 else {
697 if (lhs._scale > rhs._scale) {
698 return false; // lhs is more positive
699 }
700 else {
701 if (lhs._scale == rhs._scale) {
702 // compare the fractions
703 if (lhs._fraction == rhs._fraction) return false; // they are the same value
704 if (lhs._fraction > rhs._fraction) {
705 return false; // lhs is more positive than rhs
706 }
707 else {
708 return true; // lhs is less positive than rhs
709 }
710 }
711 else {
712 return true; // lhs is less positive
713 }
714 }
715 }
716 }
717 // return false; // all paths are taken care of
718 }
719 template<size_t nfbits>
operator >(const value<nfbits> & lhs,const value<nfbits> & rhs)720 inline bool operator> (const value<nfbits>& lhs, const value<nfbits>& rhs) { return operator< (rhs, lhs); }
721 template<size_t nfbits>
operator <=(const value<nfbits> & lhs,const value<nfbits> & rhs)722 inline bool operator<=(const value<nfbits>& lhs, const value<nfbits>& rhs) { return !operator> (lhs, rhs); }
723 template<size_t nfbits>
operator >=(const value<nfbits> & lhs,const value<nfbits> & rhs)724 inline bool operator>=(const value<nfbits>& lhs, const value<nfbits>& rhs) { return !operator< (lhs, rhs); }
725
726 template<size_t nbits>
to_binary(const bitblock<nbits> & a,bool nibbleMarker=true)727 inline std::string to_binary(const bitblock<nbits>& a, bool nibbleMarker = true) {
728 if constexpr (nbits > 1) {
729 std::stringstream s;
730 s << "0b";
731 for (int i = int(nbits - 1); i >= 0; --i) {
732 s << (a[static_cast<size_t>(i)] ? '1' : '0');
733 if (i > 0 && (i % 4) == 0 && nibbleMarker) s << '\'';
734 }
735 return s.str();
736 }
737 else {
738 return std::string("-");
739 }
740 }
741 template<size_t fbits>
to_triple(const value<fbits> & v)742 inline std::string to_triple(const value<fbits>& v) {
743 std::stringstream s;
744 if (v.iszero()) {
745 s << "(+, 0," << std::setw(fbits) << v.fraction() << ')';
746 return s.str();
747 }
748 else if (v.isinf()) {
749 s << "(inf," << std::setw(fbits) << v.fraction() << ')';
750 return s.str();
751 }
752 s << (v.sign() ? "(-, " : "(+, ");
753 s << v.scale() << ", ";
754 s << to_binary(v.fraction(), true) << ')';
755 // s << "(" << (v.sign() ? "-" : "+") << "," << v.scale() << "," << v.fraction() << ')';
756 return s.str();
757 }
758
759 /// Magnitude of a scientific notation value (equivalent to turning the sign bit off).
760 template<size_t nfbits>
abs(const value<nfbits> & v)761 value<nfbits> abs(const value<nfbits>& v) {
762 return value<nfbits>(false, v.scale(), v.fraction(), v.iszero());
763 }
764
765 // add two values with fbits fraction bits, round them to abits, and return the abits+1 result value
766 template<size_t fbits, size_t abits>
module_add(const value<fbits> & lhs,const value<fbits> & rhs,value<abits+1> & result)767 void module_add(const value<fbits>& lhs, const value<fbits>& rhs, value<abits + 1>& result) {
768 // with sign/magnitude adders it is customary to organize the computation
769 // along the four quadrants of sign combinations
770 // + + = +
771 // + - = lhs > rhs ? + : -
772 // - + = lhs > rhs ? - : +
773 // - - =
774 // to simplify the result processing assign the biggest
775 // absolute value to R1, then the sign of the result will be sign of the value in R1.
776
777 if (lhs.isinf() || rhs.isinf()) {
778 result.setinf();
779 return;
780 }
781 int lhs_scale = lhs.scale(), rhs_scale = rhs.scale(), scale_of_result = std::max(lhs_scale, rhs_scale);
782
783 // align the fractions
784 bitblock<abits> r1 = lhs.template nshift<abits>(lhs_scale - scale_of_result + 3);
785 bitblock<abits> r2 = rhs.template nshift<abits>(rhs_scale - scale_of_result + 3);
786 bool r1_sign = lhs.sign(), r2_sign = rhs.sign();
787 bool signs_are_different = r1_sign != r2_sign;
788
789 if (signs_are_different && abs(lhs) < abs(rhs)) {
790 std::swap(r1, r2);
791 std::swap(r1_sign, r2_sign);
792 }
793
794 if (signs_are_different) r2 = twos_complement(r2);
795
796 if (_trace_value_add) {
797 std::cout << (r1_sign ? "sign -1" : "sign 1") << " scale " << std::setw(3) << scale_of_result << " r1 " << r1 << std::endl;
798 if (signs_are_different) {
799 std::cout << (r2_sign ? "sign -1" : "sign 1") << " scale " << std::setw(3) << scale_of_result << " r2 orig " << twos_complement(r2) << std::endl;
800 }
801 std::cout << (r2_sign ? "sign -1" : "sign 1") << " scale " << std::setw(3) << scale_of_result << " r2 " << r2 << std::endl;
802 }
803
804 bitblock<abits + 1> sum;
805 const bool carry = add_unsigned(r1, r2, sum);
806
807 if (_trace_value_add) std::cout << (r1_sign ? "sign -1" : "sign 1") << " carry " << std::setw(3) << (carry ? 1 : 0) << " sum " << sum << std::endl;
808
809 int shift = 0;
810 if (carry) {
811 if (r1_sign == r2_sign) { // the carry && signs== implies that we have a number bigger than r1
812 shift = -1;
813 }
814 else {
815 // the carry && signs!= implies ||result|| < ||r1||, must find MSB (in the complement)
816 for (int i = int(abits) - 1; i >= 0 && !sum[size_t(i)]; --i) {
817 ++shift;
818 }
819 }
820 }
821 assert(shift >= -1);
822
823 if (shift >= long(abits)) { // we have actual 0
824 sum.reset();
825 result.set(false, 0, sum, true, false, false);
826 return;
827 }
828
829 scale_of_result -= shift;
830 const int hpos = int(abits) - 1 - shift; // position of the hidden bit
831 sum <<= abits - hpos + 1;
832 if (_trace_value_add) std::cout << (r1_sign ? "sign -1" : "sign 1") << " scale " << std::setw(3) << scale_of_result << " sum " << sum << std::endl;
833 result.set(r1_sign, scale_of_result, sum, false, false, false);
834 }
835
836 // subtract module: use ADDER
837 template<size_t fbits, size_t abits>
module_subtract(const value<fbits> & lhs,const value<fbits> & rhs,value<abits+1> & result)838 void module_subtract(const value<fbits>& lhs, const value<fbits>& rhs, value<abits + 1>& result) {
839 if (lhs.isinf() || rhs.isinf()) {
840 result.setinf();
841 return;
842 }
843 int lhs_scale = lhs.scale(), rhs_scale = rhs.scale(), scale_of_result = std::max(lhs_scale, rhs_scale);
844
845 // align the fractions
846 bitblock<abits> r1 = lhs.template nshift<abits>(lhs_scale - scale_of_result + 3);
847 bitblock<abits> r2 = rhs.template nshift<abits>(rhs_scale - scale_of_result + 3);
848 bool r1_sign = lhs.sign(), r2_sign = !rhs.sign();
849 bool signs_are_different = r1_sign != r2_sign;
850
851 if (abs(lhs) < abs(rhs)) {
852 std::swap(r1, r2);
853 std::swap(r1_sign, r2_sign);
854 }
855
856 if (signs_are_different) r2 = twos_complement(r2);
857
858 if (_trace_value_sub) {
859 std::cout << (r1_sign ? "sign -1" : "sign 1") << " scale " << std::setw(3) << scale_of_result << " r1 " << r1 << std::endl;
860 std::cout << (r2_sign ? "sign -1" : "sign 1") << " scale " << std::setw(3) << scale_of_result << " r2 " << r2 << std::endl;
861 }
862
863 bitblock<abits + 1> sum;
864 const bool carry = add_unsigned(r1, r2, sum);
865
866 if (_trace_value_sub) std::cout << (r1_sign ? "sign -1" : "sign 1") << " carry " << std::setw(3) << (carry ? 1 : 0) << " sum " << sum << std::endl;
867
868 int shift = 0;
869 if (carry) {
870 if (r1_sign == r2_sign) { // the carry && signs== implies that we have a number bigger than r1
871 shift = -1;
872 }
873 else {
874 // the carry && signs!= implies r2 is complement, result < r1, must find hidden bit (in the complement)
875 for (int i = static_cast<int>(abits) - 1; i >= 0 && !sum[static_cast<size_t>(i)]; --i) {
876 shift++;
877 }
878 }
879 }
880 assert(shift >= -1);
881
882 if (shift >= static_cast<int>(abits)) { // we have actual 0
883 sum.reset();
884 result.set(false, 0, sum, true, false, false);
885 return;
886 }
887
888 scale_of_result -= shift;
889 const int hpos = static_cast<int>(abits) - 1 - shift; // position of the hidden bit
890 sum <<= abits - hpos + 1;
891 if (_trace_value_sub) std::cout << (r1_sign ? "sign -1" : "sign 1") << " scale " << std::setw(3) << scale_of_result << " sum " << sum << std::endl;
892 result.set(r1_sign, scale_of_result, sum, false, false, false);
893 }
894
895 // subtract module using SUBTRACTOR: CURRENTLY BROKEN FOR UNKNOWN REASON
896 template<size_t fbits, size_t abits>
module_subtract_BROKEN(const value<fbits> & lhs,const value<fbits> & rhs,value<abits+1> & result)897 void module_subtract_BROKEN(const value<fbits>& lhs, const value<fbits>& rhs, value<abits + 1>& result) {
898
899 if (lhs.isinf() || rhs.isinf()) {
900 result.setinf();
901 return;
902 }
903 int lhs_scale = lhs.scale(), rhs_scale = rhs.scale(), scale_of_result = std::max(lhs_scale, rhs_scale);
904
905 // align the fractions
906 bitblock<abits> r1 = lhs.template nshift<abits>(lhs_scale - scale_of_result + 3);
907 bitblock<abits> r2 = rhs.template nshift<abits>(rhs_scale - scale_of_result + 3);
908 bool r1_sign = lhs.sign(), r2_sign = rhs.sign();
909 //bool signs_are_equal = r1_sign == r2_sign;
910
911 if (r1_sign) r1 = twos_complement(r1);
912 if (r1_sign) r2 = twos_complement(r2);
913
914 if (_trace_value_sub) {
915 std::cout << (r1_sign ? "sign -1" : "sign 1") << " scale " << std::setw(3) << scale_of_result << " r1 " << r1 << std::endl;
916 std::cout << (r2_sign ? "sign -1" : "sign 1") << " scale " << std::setw(3) << scale_of_result << " r2 " << r2 << std::endl;
917 }
918
919 bitblock<abits + 1> difference;
920 const bool borrow = subtract_unsigned(r1, r2, difference);
921
922 if (_trace_value_sub) std::cout << (r1_sign ? "sign -1" : "sign 1") << " borrow" << std::setw(3) << (borrow ? 1 : 0) << " diff " << difference << std::endl;
923
924 long shift = 0;
925 if (borrow) { // we have a negative value result
926 difference = twos_complement(difference);
927 }
928 // find hidden bit
929 for (int i = abits - 1; i >= 0 && difference[i]; i--) {
930 shift++;
931 }
932 assert(shift >= -1);
933
934 if (shift >= long(abits)) { // we have actual 0
935 difference.reset();
936 result.set(false, 0, difference, true, false, false);
937 return;
938 }
939
940 scale_of_result -= shift;
941 const int hpos = abits - 1 - shift; // position of the hidden bit
942 difference <<= abits - hpos + 1;
943 if (_trace_value_sub) std::cout << (borrow ? "sign -1" : "sign 1") << " scale " << std::setw(3) << scale_of_result << " result " << difference << std::endl;
944 result.set(borrow, scale_of_result, difference, false, false, false);
945 }
946
947 // multiply module
948 template<size_t fbits, size_t mbits>
module_multiply(const value<fbits> & lhs,const value<fbits> & rhs,value<mbits> & result)949 void module_multiply(const value<fbits>& lhs, const value<fbits>& rhs, value<mbits>& result) {
950 static constexpr size_t fhbits = fbits + 1; // fraction + hidden bit
951 if (_trace_value_mul) std::cout << "lhs " << to_triple(lhs) << std::endl << "rhs " << to_triple(rhs) << std::endl;
952
953 if (lhs.isinf() || rhs.isinf()) {
954 result.setinf();
955 return;
956 }
957 if (lhs.iszero() || rhs.iszero()) {
958 result.setzero();
959 return;
960 }
961
962 bool new_sign = lhs.sign() ^ rhs.sign();
963 int new_scale = lhs.scale() + rhs.scale();
964 bitblock<mbits> result_fraction;
965
966 if (fbits > 0) {
967 // fractions are without hidden bit, get_fixed_point adds the hidden bit back in
968 bitblock<fhbits> r1 = lhs.get_fixed_point();
969 bitblock<fhbits> r2 = rhs.get_fixed_point();
970 multiply_unsigned(r1, r2, result_fraction);
971
972 if (_trace_value_mul) std::cout << "r1 " << r1 << std::endl << "r2 " << r2 << std::endl << "res " << result_fraction << std::endl;
973 // check if the radix point needs to shift
974 int shift = 2;
975 if (result_fraction.test(mbits - 1)) {
976 shift = 1;
977 if (_trace_value_mul) std::cout << " shift " << shift << std::endl;
978 new_scale += 1;
979 }
980 result_fraction <<= static_cast<size_t>(shift); // shift hidden bit out
981 }
982 else { // posit<3,0>, <4,1>, <5,2>, <6,3>, <7,4> etc are pure sign and scale
983 // multiply the hidden bits together, i.e. 1*1: we know the answer a priori
984 }
985 if (_trace_value_mul) std::cout << "sign " << (new_sign ? "-1 " : " 1 ") << "scale " << new_scale << " fraction " << result_fraction << std::endl;
986
987 result.set(new_sign, new_scale, result_fraction, false, false, false);
988 }
989
990 // divide module
991 template<size_t fbits, size_t divbits>
module_divide(const value<fbits> & lhs,const value<fbits> & rhs,value<divbits> & result)992 void module_divide(const value<fbits>& lhs, const value<fbits>& rhs, value<divbits>& result) {
993 static constexpr size_t fhbits = fbits + 1; // fraction + hidden bit
994 if (_trace_value_div) std::cout << "lhs " << to_triple(lhs) << std::endl << "rhs " << to_triple(rhs) << std::endl;
995
996 if (lhs.isinf() || rhs.isinf()) {
997 result.setinf();
998 return;
999 }
1000 if (lhs.iszero() || rhs.iszero()) {
1001 result.setzero();
1002 return;
1003 }
1004
1005 bool new_sign = lhs.sign() ^ rhs.sign();
1006 int new_scale = lhs.scale() - rhs.scale();
1007 bitblock<divbits> result_fraction;
1008
1009 if (fbits > 0) {
1010 // fractions are without hidden bit, get_fixed_point adds the hidden bit back in
1011 bitblock<fhbits> r1 = lhs.get_fixed_point();
1012 bitblock<fhbits> r2 = rhs.get_fixed_point();
1013 divide_with_fraction(r1, r2, result_fraction);
1014 if (_trace_value_div) std::cout << "r1 " << r1 << std::endl << "r2 " << r2 << std::endl << "result " << result_fraction << std::endl << "scale " << new_scale << std::endl;
1015 // check if the radix point needs to shift
1016 // radix point is at divbits - fhbits
1017 int msb = static_cast<int>(divbits - fhbits);
1018 int shift = fhbits;
1019 if (!result_fraction.test(static_cast<size_t>(msb))) {
1020 msb--; shift++;
1021 while (!result_fraction.test(static_cast<size_t>(msb))) { // search for the first 1
1022 msb--; shift++;
1023 }
1024 }
1025 result_fraction <<= static_cast<size_t>(shift); // shift hidden bit out
1026 new_scale -= (shift - static_cast<int>(fhbits));
1027 if (_trace_value_div) std::cout << "shift " << shift << std::endl << "result " << result_fraction << std::endl << "scale " << new_scale << std::endl;;
1028 }
1029 else { // posit<3,0>, <4,1>, <5,2>, <6,3>, <7,4> etc are pure sign and scale
1030 // no need to multiply the hidden bits together, i.e. 1*1: we know the answer a priori
1031 }
1032 if (_trace_value_div) std::cout << "sign " << (new_sign ? "-1 " : " 1 ") << "scale " << new_scale << " fraction " << result_fraction << std::endl;
1033
1034 result.set(new_sign, new_scale, result_fraction, false, false, false);
1035 }
1036
1037 template<size_t fbits>
operator +(const value<fbits> & lhs,const value<fbits> & rhs)1038 value<fbits> operator+(const value<fbits>& lhs, const value<fbits>& rhs) {
1039 constexpr size_t abits = fbits + 5;
1040 value<abits+1> result;
1041 module_add<fbits,abits>(lhs, rhs, result);
1042 #if defined(__GNUC__) || defined(__GNUG__)
1043 return value<fbits>(); // for some reason GCC doesn't want to compile result.round_to<fbits>()
1044 #else
1045 return result.round_to<fbits>();
1046 #endif
1047 }
1048 template<size_t fbits>
operator -(const value<fbits> & lhs,const value<fbits> & rhs)1049 value<fbits> operator-(const value<fbits>& lhs, const value<fbits>& rhs) {
1050 constexpr size_t abits = fbits + 5;
1051 value<abits+1> result;
1052 module_subtract<fbits,abits>(lhs, rhs, result);
1053 #if defined(__GNUC__) || defined(__GNUG__)
1054 return value<fbits>(); // for some reason GCC doesn't want to compile result.round_to<fbits>()
1055 #else
1056 return result.round_to<fbits>();
1057 #endif
1058 }
1059 template<size_t fbits>
operator *(const value<fbits> & lhs,const value<fbits> & rhs)1060 value<fbits> operator*(const value<fbits>& lhs, const value<fbits>& rhs) {
1061 constexpr size_t mbits = 2*fbits + 2;
1062 value<mbits> result;
1063 module_multiply(lhs, rhs, result);
1064 #if defined(__GNUC__) || defined(__GNUG__)
1065 return value<fbits>(); // for some reason GCC doesn't want to compile result.round_to<fbits>()
1066 #else
1067 return result.round_to<fbits>();
1068 #endif
1069 }
1070 template<size_t fbits>
operator /(const value<fbits> & lhs,const value<fbits> & rhs)1071 value<fbits> operator/(const value<fbits>& lhs, const value<fbits>& rhs) {
1072 constexpr size_t divbits = 2 * fbits + 5;
1073 value<divbits> result;
1074 module_divide(lhs, rhs, result);
1075 #if defined(__GNUC__) || defined(__GNUG__)
1076 return value<fbits>(); // for some reason GCC doesn't want to compile result.round_to<fbits>()
1077 #else
1078 return result.round_to<fbits>();
1079 #endif
1080 }
1081 template<size_t fbits>
sqrt(const value<fbits> & a)1082 value<fbits> sqrt(const value<fbits>& a) {
1083 return std::sqrt(double(a));
1084 }
1085
1086 } // namespace sw::universal::internal
1087