1 #pragma once
2 // blocktriple.hpp: definition of a (sign, scale, significant) representation of a generic floating-point value
3 //
4 // Copyright (C) 2017-2021 Stillwater Supercomputing, Inc.
5 //
6 // This file is part of the universal numbers project, which is released under an MIT Open Source license.
7 #include <cassert>
8 #include <iostream>
9 #include <iomanip>
10 #include <limits>
11
12 // check on required compilation guards
13 // should be defined by calling environment, just catching it here just in case it is not
14 #ifndef LONG_DOUBLE_SUPPORT
15 #pragma message("LONG_DOUBLE_SUPPORT is not defined")
16 #define LONG_DOUBLE_SUPPORT 0
17 #endif
18 #if !defined(BIT_CAST_SUPPORT)
19 #pragma message("BIT_CAST_SUPPORT is not defined")
20 #define BIT_CAST_SUPPORT 0
21 #endif
22 #if !defined(CONSTEXPRESSION)
23 #define CONSTEXPRESSION
24 #endif
25
26 // dependent types for stand-alone use of this class
27 #include <universal/native/integers.hpp> // to_binary(uint64_t)
28 #include <universal/native/ieee754.hpp>
29 #include <universal/native/subnormal.hpp>
30 #include <universal/native/bit_functions.hpp>
31 #include <universal/internal/blockfraction/blockfraction.hpp>
32 // blocktriple operation trace options
33 #include <universal/internal/blocktriple/trace_constants.hpp>
34
35 namespace sw::universal {
36
37 /*
38 The blocktriple is used as a marshalling class to transform
39 floating-point type number systems into a uniform floating-point
40 arithmetic class that we can validate and reuse.
41
42 The blocktriple design favors performance over encapsulation.
43 During arithmetic operations, the fraction bits of the arguments
44 need to be manipulated and extended, and we wanted to avoid
45 copying these fraction bits into new storage classes.
46
47 However, the size of the fraction bit buffers depends on the
48 arithmetic operator. This implies that at the time of creation
49 we need to know the intended use, and configure the blocktriple
50 accordingly.
51
52 for add and subtract
53 blockfraction = 00h.ffffeee <- three bits before radix point, fraction bits plus 3 rounding bits
54 size_t bfbits = fbits + 3;
55
56 for multiply
57 size_t bfbits = 2*fhbits;
58 */
59
60 // operator specialization tag for blocktriple
61 enum class BlockTripleOperator { ADD, MUL, DIV, SQRT, REPRESENTATION };
62
63 // Forward definitions
64 template<size_t fbits, BlockTripleOperator op, typename bt> class blocktriple;
65 template<size_t fbits, BlockTripleOperator op, typename bt> blocktriple<fbits, op, bt> abs(const blocktriple<fbits, op, bt>& v);
66
67 template<size_t fbits, BlockTripleOperator op, typename bt>
convert(unsigned long long uint,blocktriple<fbits,op,bt> & tgt)68 blocktriple<fbits, op, bt>& convert(unsigned long long uint, blocktriple<fbits, op, bt>& tgt) {
69 return tgt;
70 }
71
72 // Generate a type tag for this type: blocktriple<fbits, operator, unsigned int>
73 template<size_t fbits, BlockTripleOperator op, typename bt>
type_tag(const blocktriple<fbits,op,bt> & v)74 std::string type_tag(const blocktriple<fbits, op, bt>& v) {
75 std::stringstream s;
76 s << "blocktriple<"
77 << fbits << ", ";
78 switch (op) {
79 case BlockTripleOperator::REPRESENTATION:
80 s << "BlockTripleOperator::REPRESENTATION, ";
81 break;
82 case BlockTripleOperator::ADD:
83 s << "BlockTripleOperator::ADD, ";
84 break;
85 case BlockTripleOperator::MUL:
86 s << "BlockTripleOperator::MUL, ";
87 break;
88 case BlockTripleOperator::DIV:
89 s << "BlockTripleOperator::DIV, ";
90 break;
91 case BlockTripleOperator::SQRT:
92 s << "BlockTripleOperator::SQRT, ";
93 break;
94 default:
95 s << "unknown operator, ";
96 }
97 s << typeid(bt).name() << '>';
98 if (v.iszero()) s << ' ';
99 return s.str();
100 }
101
102 /// <summary>
103 /// Generalized blocktriple representing a (sign, scale, significant) with unrounded arithmetic
104 ///
105 /// For addition and subtraction, blocktriple uses a 2's complement representation of the form iii.fffff.
106 /// The 3 integer bits are required to capture the negative overflow condition.
107 ///
108 /// For multiplication, blocktriple uses a 1's complement representation of the form ii.fffff.
109 /// The 2 integer bits are required to capture the overflow condition.
110 ///
111 /// Blocktriple does not normalize the output of ADD/SUB/MUL so that all bits are
112 /// available for the rounding decision. Number systems that use blocktriple as
113 /// their general floating-point engine can use the roundUp(targetFbits) method to
114 /// obtain the rounding direction, and the alignmentShift(targetFbits) method to
115 /// obtain the shift required to normalize the fraction bits.
116 /// </summary>
117 /// <typeparam name="fractionbits">number of fraction bits in the significant</typeparam>
118 /// <typeparam name="bt">block type: one of [uint8_t, uint16_t, uint32_t, uint64_t]</typeparam>
119 template<size_t fractionbits, BlockTripleOperator _op = BlockTripleOperator::ADD, typename bt = uint32_t>
120 class blocktriple {
121 public:
122 static constexpr size_t nbits = fractionbits; // a convenience and consistency alias
123 static constexpr size_t fbits = fractionbits;
124 typedef bt BlockType;
125 static constexpr BlockTripleOperator op = _op;
126
127 static constexpr size_t bitsInByte = 8ull;
128 static constexpr size_t bitsInBlock = sizeof(bt) * bitsInByte;
129 static constexpr size_t nrBlocks = 1ull + ((nbits - 1ull) / bitsInBlock);
130 static constexpr size_t storageMask = (0xFFFF'FFFF'FFFF'FFFFull >> (64ull - bitsInBlock));
131
132 static constexpr size_t MSU = nrBlocks - 1ull; // MSU == Most Significant Unit, as MSB is already taken
133
134 static constexpr size_t fhbits = fbits + 1; // size of all bits
135 static constexpr size_t abits = fbits + 3ull; // size of the addend
136 static constexpr size_t mbits = 2ull * fhbits; // size of the multiplier output
137 static constexpr size_t divbits = 3ull * fbits + 4ull; // size of the divider output
138 static constexpr size_t sqrtbits = 2ull * fhbits; // size of the square root output
139 // we transform input operands into the operation's target output size
140 // so that everything is aligned correctly before the operation starts.
141 static constexpr size_t bfbits =
142 (op == BlockTripleOperator::ADD ? abits :
143 (op == BlockTripleOperator::MUL ? mbits :
144 (op == BlockTripleOperator::DIV ? divbits :
145 (op == BlockTripleOperator::SQRT ? sqrtbits : fhbits)))); // REPRESENTATION is the fall through condition
146 // radix point of the OUTPUT of an operator
147 static constexpr int radix =
148 (op == BlockTripleOperator::ADD ? static_cast<int>(fbits) :
149 (op == BlockTripleOperator::MUL ? static_cast<int>(2*fbits) :
150 (op == BlockTripleOperator::DIV ? static_cast<int>(fbits) :
151 (op == BlockTripleOperator::SQRT ? static_cast<int>(sqrtbits) : static_cast<int>(fbits))))); // REPRESENTATION is the fall through condition
152 static constexpr BitEncoding encoding =
153 (op == BlockTripleOperator::ADD ? BitEncoding::Twos :
154 (op == BlockTripleOperator::MUL ? BitEncoding::Ones :
155 (op == BlockTripleOperator::DIV ? BitEncoding::Ones :
156 (op == BlockTripleOperator::SQRT ? BitEncoding::Ones : BitEncoding::Ones))));
157 static constexpr size_t normalBits = (bfbits < 64 ? bfbits : 64);
158 static constexpr size_t normalFormMask = (normalBits == 64) ? 0xFFFF'FFFF'FFFF'FFFFull : (~(0xFFFF'FFFF'FFFF'FFFFull << (normalBits - 1)));
159 // to maximize performance, can we make the default blocktype a uint64_t?
160 // storage unit for block arithmetic needs to be uin32_t until we can figure out
161 // how to manage carry propagation on uint64_t using intrinsics/assembly code
162 using Frac = sw::universal::blockfraction<bfbits, bt, encoding>;
163
164 static constexpr bt ALL_ONES = bt(~0);
165 // generate the special case overflow pattern mask when representation is nbits + 1 < 64
166 static constexpr size_t maxbits = (nbits + 1) < 63 ? (nbits + 1) : 63;
167 static constexpr size_t overflowPattern = (maxbits < 63) ? (1ull << maxbits) : 0ull; // overflow of 1.11111 to 10.0000
168
169 constexpr blocktriple(const blocktriple&) noexcept = default;
170 constexpr blocktriple(blocktriple&&) noexcept = default;
171
172 constexpr blocktriple& operator=(const blocktriple&) noexcept = default;
173 constexpr blocktriple& operator=(blocktriple&&) noexcept = default;
174
blocktriple()175 constexpr blocktriple() noexcept :
176 _nan{ false }, _inf{ false }, _zero{ true },
177 _sign{ false }, _scale{ 0 } {} // _significant uses default constructor and static constexpr radix computation
178
179 // decorated constructors
blocktriple(signed char iv)180 constexpr blocktriple(signed char iv) noexcept { *this = iv; }
blocktriple(short iv)181 constexpr blocktriple(short iv) noexcept { *this = iv; }
blocktriple(int iv)182 constexpr blocktriple(int iv) noexcept { *this = iv; }
blocktriple(long iv)183 constexpr blocktriple(long iv) noexcept { *this = iv; }
blocktriple(long long iv)184 constexpr blocktriple(long long iv) noexcept { *this = iv; }
blocktriple(char iv)185 constexpr blocktriple(char iv) noexcept { *this = iv; }
blocktriple(unsigned short iv)186 constexpr blocktriple(unsigned short iv) noexcept { *this = iv; }
blocktriple(unsigned int iv)187 constexpr blocktriple(unsigned int iv) noexcept { *this = iv; }
blocktriple(unsigned long iv)188 constexpr blocktriple(unsigned long iv) noexcept { *this = iv; }
blocktriple(unsigned long long iv)189 constexpr blocktriple(unsigned long long iv) noexcept { *this = iv; }
blocktriple(float iv)190 constexpr blocktriple(float iv) noexcept { *this = iv; }
blocktriple(double iv)191 constexpr blocktriple(double iv) noexcept { *this = iv; }
192
193
194 // conversion operators
operator =(signed char rhs)195 constexpr blocktriple& operator=(signed char rhs) noexcept { return convert_signed_integer(rhs); }
operator =(short rhs)196 constexpr blocktriple& operator=(short rhs) noexcept { return convert_signed_integer(rhs); }
operator =(int rhs)197 constexpr blocktriple& operator=(int rhs) noexcept { return convert_signed_integer(rhs); }
operator =(long rhs)198 constexpr blocktriple& operator=(long rhs) noexcept { return convert_signed_integer(rhs); }
operator =(long long rhs)199 constexpr blocktriple& operator=(long long rhs) noexcept { return convert_signed_integer(rhs); }
operator =(char rhs)200 constexpr blocktriple& operator=(char rhs) noexcept { return convert_unsigned_integer(rhs); }
operator =(unsigned short rhs)201 constexpr blocktriple& operator=(unsigned short rhs) noexcept { return convert_unsigned_integer(rhs); }
operator =(unsigned int rhs)202 constexpr blocktriple& operator=(unsigned int rhs) noexcept { return convert_unsigned_integer(rhs); }
operator =(unsigned long rhs)203 constexpr blocktriple& operator=(unsigned long rhs) noexcept { return convert_unsigned_integer(rhs); }
operator =(unsigned long long rhs)204 constexpr blocktriple& operator=(unsigned long long rhs) noexcept { return convert_unsigned_integer(rhs); }
operator =(float rhs)205 constexpr blocktriple& operator=(float rhs) noexcept { return convert_float(rhs); }
operator =(double rhs)206 constexpr blocktriple& operator=(double rhs) noexcept { return convert_double(rhs); }
207
208
209 // guard long double support to enable ARM and RISC-V embedded environments
210 #if LONG_DOUBLE_SUPPORT
blocktriple(long double iv)211 CONSTEXPRESSION blocktriple(long double iv) noexcept { *this = iv; }
operator =(long double rhs)212 CONSTEXPRESSION blocktriple& operator=(long double rhs) noexcept { return *this = (long double)(rhs); };
operator long double() const213 explicit operator long double() const noexcept { return to_native<long double>(); }
214 #endif
215
216 // operators
operator <<=(int leftShift)217 constexpr blocktriple& operator<<=(int leftShift) noexcept {
218 if (leftShift == 0) return *this;
219 if (leftShift < 0) return operator>>=(-leftShift);
220 _scale -= leftShift;
221 _significant <<= leftShift;
222 return *this;
223 }
operator >>=(int rightShift)224 constexpr blocktriple& operator>>=(int rightShift) noexcept {
225 if (rightShift == 0) return *this;
226 if (rightShift < 0) return operator<<=(-rightShift);
227 _scale += rightShift;
228 _significant >>= rightShift;
229 return *this;
230 }
231
232 /// <summary>
233 /// roundingDecision returns a pair<bool, size_t> to direct the rounding and right shift
234 /// </summary>
235 /// <param name="adjustment">adjustment for subnormals </param>
236 /// <returns>std::pair<bool, size_t> of rounding direction (up is true, down is false), and the right shift</returns>
roundingDecision(int adjustment=0) const237 constexpr std::pair<bool, size_t> roundingDecision(int adjustment = 0) const noexcept {
238 // preconditions: blocktriple is in 1's complement form, and not a denorm
239 // this implies that the scale of the significant is 0 or 1
240 size_t significantScale = static_cast<size_t>(significantscale());
241 // find the shift that gets us to the lsb
242 size_t shift = significantScale + static_cast<size_t>(radix) - fbits;
243 #ifdef PERFORMANCE_OPTIMIZATION
244 if constexpr (bfbits < 65) {
245 uint64_t fracbits = _significant.get_ull(); // get all the bits, including the integer bits
246 // ... lsb | guard round sticky round
247 // x 0 x x down
248 // 0 1 0 0 down round to even
249 // 1 1 0 0 up round to even
250 // x 1 0 1 up
251 uint64_t mask = (1ull << (shift + adjustment);
252 bool lsb = fracbits & mask;
253 mask >>= 1;
254 bool guard = fracbits & mask;
255 mask >>= 1;
256 bool round = fracbits & mask;
257 if (shift < 2) {
258 mask = 0xFFFF'FFFF'FFFF'FFFFull;
259 }
260 else {
261 mask = 0xFFFF'FFFF'FFFF'FFFFull << (shift - 2);
262 }
263 mask = ~mask;
264 // std::cout << "fracbits : " << to_binary(fracbits) << std::endl;
265 // std::cout << "sticky mask : " << to_binary(mask) << std::endl;
266 bool sticky = fracbits & mask;
267 roundup = (guard && (lsb || (round || sticky)));
268 }
269 else {
270 roundup = _significant.roundingMode(shift);
271 }
272 #endif
273 bool roundup = _significant.roundingMode(shift + adjustment);
274 return std::pair<bool, size_t>(roundup, shift + adjustment);
275 }
276 // apply a 2's complement recoding of the fraction bits
277 inline constexpr blocktriple& twosComplement() noexcept {
278 _significant.twosComplement();
279 return *this;
280 }
281
282 // modifiers
283 constexpr void clear() noexcept {
284 _nan = false;
285 _inf = false;
286 _zero = true;
287 _sign = false;
288 _scale = 0;
289 _significant.clear();
290 }
291 constexpr void setzero(bool sign = false) noexcept {
292 clear();
293 _sign = sign;
294 }
295 constexpr void setnan(bool sign = true) noexcept {
296 clear();
297 _nan = true;
298 _inf = false;
299 _zero = false;
300 _sign = sign; // if true, signalling NaN, otherwise quiet
301 }
302 constexpr void setinf(bool sign = true) noexcept {
303 clear();
304 _inf = true;
305 _zero = false;
306 _sign = sign;
307 }
308 constexpr void setpos() noexcept { _sign = false; }
309 constexpr void setnormal() noexcept {
310 _nan = false;
311 _inf = false;
312 _zero = false;
313 }
314 constexpr void setsign(bool s) noexcept { _sign = s; }
315 constexpr void setscale(int scale) noexcept { _scale = scale; }
316 constexpr void setradix(int _radix) noexcept { _significant.setradix(_radix); }
317 constexpr void setbit(size_t index, bool v = true) noexcept { _significant.setbit(index, v); }
318 /// <summary>
319 /// set the bits of the significant, given raw fraction bits. only works for bfbits < 64
320 /// </summary>
321 /// <param name="raw">raw bit pattern representing the fraction bits</param>
322 /// <returns></returns>
323 constexpr void setbits(uint64_t raw) noexcept {
324 // the setbits() api cannot be modified as it is shared by all number systems
325 // as a standard mechanism for the test suites to set bits.
326 // However, blocktriple uses extra state to encode the special values,
327 // and the test can't use this interface to set that.
328 // Thus the caller (typically the test suite) must manage this special state.
329 // _scale must be set by caller, so that the same raw bit pattern can
330 // span different scales
331 //
332 // blocktriple non-special values are always in normalized form
333 _nan = false; _inf = false;
334 _significant.setradix(radix);
335 // Here we just check for 0 special case
336 if (raw == 0) {
337 _zero = true;
338 _significant.clear();
339 }
340 else {
341 _zero = false;
342 _significant.setbits(raw);
343 }
344 }
345 constexpr void setblock(size_t i, const bt& block) {
346 _significant.setblock(i, block);
347 }
348
349 // selectors
350 inline constexpr bool isnan() const noexcept { return _nan; }
351 inline constexpr bool isinf() const noexcept { return _inf; }
352 inline constexpr bool iszero() const noexcept { return _zero; }
353 inline constexpr bool ispos() const noexcept { return !_sign; }
354 inline constexpr bool isneg() const noexcept { return _sign; }
355 inline constexpr bool sign() const noexcept { return _sign; }
356 inline constexpr int scale() const noexcept { return _scale; }
357 inline constexpr int significantscale() const noexcept {
358 int sigScale = 0;
359 for (int i = bfbits - 1; i >= radix; --i) {
360 if (_significant.at(static_cast<size_t>(i))) {
361 sigScale = i - radix;
362 break;
363 }
364 }
365 return sigScale;
366 }
367 inline constexpr Frac significant() const noexcept { return _significant; }
368 // specialty function to offer a fast path to get the fraction bits out of the representation
369 // to convert to a target number system: only valid for bfbits <= 64
370 inline constexpr uint64_t fraction_ull() const noexcept { return _significant.fraction_ull(); }
371 inline constexpr uint64_t get_ull() const noexcept { return _significant.get_ull(); }
372 // fraction bit accessors
373 inline constexpr bool at(size_t index) const noexcept { return _significant.at(index); }
374 inline constexpr bool test(size_t index) const noexcept { return _significant.at(index); }
375
376 // conversion operators
377 explicit operator float() const noexcept { return to_native<float>(); }
378 explicit operator double() const noexcept { return to_native<double>(); }
379
380
381 /////////////////////////////////////////////////////////////
382 // ALU operators
383
384 /// <summary>
385 /// add two fixed-point numbers with fbits fraction bits
386 /// yielding an unrounded sum of 3+fbits. (currently we generate a 3+(2*fbits) result as we haven't implemented the sticky bit optimization)
387 /// This sum can overflow, be normal, or denormal.
388 /// Since we are not rounding
389 /// we cannot act on overflow as we would potentially shift
390 /// rounding state out, and thus the output must be processed
391 /// by the calling environment. We can act on denormalized
392 /// encodings, so these are processed in this function.
393 /// To avoid fraction bit copies, the input arguments
394 /// must be prepared by the calling environment, and
395 /// this function only manipulates the bits.
396 /// </summary>
397 /// <param name="lhs">ephemeral blocktriple that may get modified</param>
398 /// <param name="rhs">ephemeral blocktriple that may get modified</param>
399 /// <param name="result">unrounded sum</param>
400 void add(blocktriple& lhs, blocktriple& rhs) {
401 int lhs_scale = lhs.scale();
402 int rhs_scale = rhs.scale();
403 int scale_of_result = std::max(lhs_scale, rhs_scale);
404
405 // avoid copy by directly manipulating the fraction bits of the arguments
406 int expDiff = lhs_scale - rhs_scale;
407 if (expDiff < 0) {
408 lhs >>= -expDiff;
409 }
410 else if (expDiff > 0) {
411 rhs >>= expDiff;
412 }
413 if (lhs.isneg()) lhs._significant.twosComplement();
414 if (rhs.isneg()) rhs._significant.twosComplement();
415
416 _significant.add(lhs._significant, rhs._significant); // do the bit arithmetic manipulation
417 _significant.setradix(radix); // set the radix interpretation of the output
418
419 if constexpr (_trace_btriple_add) {
420 std::cout << "blockfraction unrounded add\n";
421 std::cout << typeid(lhs._significant).name() << '\n';
422 std::cout << "lhs significant : " << to_binary(lhs) << " : " << lhs << '\n';
423 std::cout << "rhs significant : " << to_binary(rhs) << " : " << rhs << '\n';
424 std::cout << typeid(_significant).name() << '\n';
425 std::cout << "sum significant : " << to_binary(*this) << " : " << *this << '\n';
426 }
427 if (_significant.iszero()) {
428 clear();
429 }
430 else {
431 _zero = false;
432 if (_significant.test(bfbits-1)) { // is the result negative?
433 _significant.twosComplement();
434 _sign = true;
435 }
436 _scale = scale_of_result;
437 // leave 01#.ffff to output processing: this is an overflow condition
438 // 001.ffff is a perfect normalized format
439 // fix 000.#### denormalized state to normalized
440 if (!_significant.test(bfbits-2) && !_significant.test(bfbits-3)) {
441 // found a denormalized form to normalize: find MSB
442 int msb = _significant.msb(); // zero case has been taken care off above
443 // std::cout << "sum : " << to_binary(*this) << std::endl;
444 // std::cout << "msb : " << msb << std::endl;
445 int leftShift = static_cast<int>(bfbits) - 3 - msb;
446 _significant <<= leftShift;
447 _scale -= leftShift;
448 }
449 }
450 if constexpr (_trace_btriple_add) {
451 std::cout << "blocktriple normalized add\n";
452 std::cout << typeid(lhs).name() << '\n';
453 std::cout << "lhs : " << to_binary(lhs) << " : " << lhs << '\n';
454 std::cout << "rhs : " << to_binary(rhs) << " : " << rhs << '\n';
455 std::cout << typeid(*this).name() << '\n';
456 std::cout << "sum : " << to_binary(*this) << " : " << *this << '\n';
457 }
458 }
459
460 void sub(blocktriple& lhs, blocktriple& rhs) {
461 add(lhs, rhs.twosComplement());
462 }
463
464 /// <summary>
465 /// multiply two real numbers with fbits fraction bits
466 /// yielding an 2*(1+fbits) unrounded product.
467 ///
468 /// This product can overflow, be normal, or denormal.
469 /// Since we are not rounding
470 /// we cannot act on overflow as we would potentially shift
471 /// rounding state out, and thus the output must be processed
472 /// by the calling environment. We can act on denormalized
473 /// encodings, so these are processed in this function.
474 /// To avoid fraction bit copies, the input arguments
475 /// must be prepared by the calling environment, and
476 /// this function only manipulates the bits.
477 /// /// </summary>
478 /// <param name="lhs">ephemeral blocktriple that may get modified</param>
479 /// <param name="rhs">ephemeral blocktriple that may get modified</param>
480 /// <param name="result">unrounded sum</param>
481 void mul(blocktriple& lhs, blocktriple& rhs) {
482 int lhs_scale = lhs.scale();
483 int rhs_scale = rhs.scale();
484 int scale_of_result = lhs_scale + rhs_scale;
485
486 // avoid copy by directly manipulating the fraction bits of the arguments
487 _significant.mul(lhs._significant, rhs._significant); // do the bit arithmetic manipulation
488 _significant.setradix(2*fbits); // set the radix interpretation of the output
489
490 if constexpr (_trace_btriple_mul) {
491 std::cout << "blockfraction unrounded mul\n";
492 std::cout << typeid(lhs._significant).name() << '\n';
493 std::cout << "lhs significant : " << to_binary(lhs) << " : " << lhs << '\n';
494 std::cout << "rhs significant : " << to_binary(rhs) << " : " << rhs << '\n';
495 std::cout << typeid(_significant).name() << '\n';
496 std::cout << "mul significant : " << to_binary(*this) << " : " << *this << '\n'; // <-- the scale of this representation is not yet set
497 }
498 if (_significant.iszero()) {
499 clear();
500 }
501 else {
502 _zero = false;
503 _scale = scale_of_result;
504 _sign = (lhs.sign() == rhs.sign()) ? false : true;
505 if (_significant.test(bfbits - 1)) { // test for carry
506 bool roundup = _significant.test(1) && _significant.test(0);
507 _scale += 1;
508 _significant >>= 1;
509 if (roundup) _significant.increment();
510 }
511 else if (_significant.test(bfbits - 2)) {
512 // all good, found a normal form
513 }
514 else {
515 // found a denormalized form, thus need to normalize: find MSB
516 int msb = _significant.msb(); // zero case has been taken care off above
517 // std::cout << "mul : " << to_binary(*this) << std::endl;
518 // std::cout << "msb : " << msb << std::endl;
519 int leftShift = static_cast<int>(bfbits) - 3 - msb;
520 _significant <<= leftShift;
521 _scale -= leftShift;
522 }
523 }
524 if constexpr (_trace_btriple_mul) {
525 std::cout << "blocktriple normalized mul\n";
526 std::cout << typeid(lhs).name() << '\n';
527 std::cout << "lhs : " << to_binary(lhs) << " : " << lhs << '\n';
528 std::cout << "rhs : " << to_binary(rhs) << " : " << rhs << '\n';
529 std::cout << typeid(*this).name() << '\n';
530 std::cout << "mul : " << to_binary(*this) << " : " << *this << '\n';
531 }
532 }
533
534 void div(blocktriple& lhs, blocktriple& rhs) {
535 int lhs_scale = lhs.scale();
536 int rhs_scale = rhs.scale();
537 int scale_of_result = lhs_scale + rhs_scale;
538
539 // avoid copy by directly manipulating the fraction bits of the arguments
540 _significant.mul(lhs._significant, rhs._significant);
541
542 if constexpr (_trace_btriple_div) {
543 std::cout << "blockfraction unrounded div\n";
544 std::cout << typeid(lhs._significant).name() << '\n';
545 std::cout << "lhs significant : " << to_binary(lhs) << " : " << lhs << '\n';
546 std::cout << "rhs significant : " << to_binary(rhs) << " : " << rhs << '\n';
547 std::cout << typeid(_significant).name() << '\n';
548 std::cout << "div significant : " << to_binary(*this) << " : " << *this << '\n'; // <-- the scale of this representation is not yet set
549 }
550 if (_significant.iszero()) {
551 clear();
552 }
553 else {
554 _zero = false;
555 _scale = scale_of_result;
556 if (_significant.test(bfbits - 1)) { // test for carry
557 _scale += 1;
558 _significant >>= 2; // TODO: do we need to round on bits shifted away?
559 }
560 else if (_significant.test(bfbits - 2)) { // check for the hidden bit
561 _significant >>= 1;
562 }
563 else {
564 // found a denormalized form, thus need to normalize: find MSB
565 int msb = _significant.msb(); // zero case has been taken care off above
566 // std::cout << "div : " << to_binary(*this) << std::endl;
567 // std::cout << "msb : " << msb << std::endl;
568 int leftShift = static_cast<int>(bfbits) - 3 - msb;
569 _significant <<= leftShift;
570 _scale -= leftShift;
571 }
572 }
573 if constexpr (_trace_btriple_div) {
574 std::cout << "blocktriple normalized div\n";
575 std::cout << typeid(lhs).name() << '\n';
576 std::cout << "lhs : " << to_binary(lhs) << " : " << lhs << '\n';
577 std::cout << "rhs : " << to_binary(rhs) << " : " << rhs << '\n';
578 std::cout << typeid(*this).name() << '\n';
579 std::cout << "div : " << to_binary(*this) << " : " << *this << '\n';
580 }
581 }
582
583 private:
584 // special cases to keep track of
585 bool _nan; // most dominant state
586 bool _inf; // second most dominant state
587 bool _zero;// third most dominant special case
588
589 // the triple (sign, scale, significant)
590 bool _sign;
591 int _scale;
592
593 protected:
594 Frac _significant;
595
596 // helpers
597
598 private:
599 /// <summary>
600 /// round a set of source bits to the present representation.
601 /// srcbits is the number of bits of significant in the source representation
602 /// round<> is intended only for rounding raw IEEE-754 bits
603 /// </summary>
604 /// <typeparam name="StorageType">type of incoming bits</typeparam>
605 /// <param name="raw">the raw unrounded bits</param>
606 /// <returns></returns>
607 template<size_t srcbits, typename StorageType>
608 constexpr StorageType round(StorageType raw) noexcept {
609 if constexpr (nbits < srcbits) {
610 // round to even: lsb guard round sticky
611 // collect guard, round, and sticky bits
612 // this same logic will work for the case where
613 // we only have a guard bit and no round and/or sticky bits
614 // because the mask logic will make round and sticky both 0
615
616 // example: rounding the bits of a float to our nbits
617 // float significant: 24bits : 0bhfff'ffff'ffff'ffff'ffff'ffff; h is hidden, f is fraction bit
618 // blocktriple target: 10bits: 0bhfff'ffff'fff hidden bit is implicit, 10 fraction bits
619 // lg'rs
620 // 0b0000'0000'0001'0000'0000'0000; guard mask == 1 << srcbits - nbits - 2: 24 - 10 - 2 = 12
621 constexpr uint32_t upper = 8 * sizeof(StorageType) + 2;
622 constexpr uint32_t shift = srcbits - nbits - 2ull; // srcbits includes the hidden bit, nbits does not
623 StorageType mask = (StorageType{ 1ull } << shift);
624 // std::cout << "raw : " << to_binary(raw, sizeof(StorageType)*8, true) << '\n';
625 // std::cout << "guard : " << to_binary(mask, sizeof(StorageType) * 8, true) << '\n';
626 bool guard = (mask & raw);
627 mask >>= 1;
628 // std::cout << "round : " << to_binary(mask, sizeof(StorageType) * 8, true) << '\n';
629 bool round = (mask & raw);
630 if constexpr (shift > 1 && shift < upper) { // protect against a negative shift
631 StorageType allones(StorageType(~0));
632 mask = StorageType(allones << (shift - 1));
633 mask = ~mask;
634 }
635 else {
636 mask = 0;
637 }
638 // std::cout << "sticky: " << to_binary(mask, sizeof(StorageType) * 8, true) << '\n';
639 bool sticky = (mask & raw);
640
641 raw >>= (shift + 1); // shift out the bits we are rounding away
642 bool lsb = (raw & 0x1);
643 // std::cout << "raw : " << to_binary(raw, sizeof(StorageType) * 8, true) << '\n';
644
645 // ... lsb | guard round sticky round
646 // x 0 x x down
647 // 0 1 0 0 down round to even
648 // 1 1 0 0 up round to even
649 // x 1 0 1 up
650 // x 1 1 0 up
651 // x 1 1 1 up
652 if (guard) {
653 if (lsb && (!round && !sticky)) ++raw; // round to even
654 if (round || sticky) ++raw;
655 if (raw == overflowPattern) {
656 ++_scale;
657 raw >>= 1;
658 }
659 }
660 }
661 else {
662 constexpr size_t shift = nbits - srcbits;
663 if constexpr (shift < sizeof(StorageType)) {
664 raw <<= shift;
665 }
666 else {
667 #if !BIT_CAST_SUPPORT
668 std::cerr << "round: shift " << shift << " is too large (>= " << sizeof(StorageType) << ")\n";
669 #endif
670 }
671 }
672 // std::cout << "final : " << to_binary(raw, sizeof(StorageType) * 8, true) << '\n';
673 return static_cast<StorageType>(raw);
674 }
675
676 template<typename Ty>
677 constexpr inline blocktriple& convert_unsigned_integer(const Ty& rhs) noexcept {
678 _nan = false;
679 _inf = false;
680 _zero = true;
681 if (0 == rhs) return *this;
682 _zero = false;
683 _sign = false;
684 uint64_t raw = static_cast<uint64_t>(rhs);
685 _scale = static_cast<int>(findMostSignificantBit(raw)) - 1; // precondition that msb > 0 is satisfied by the zero test above
686 constexpr size_t sizeInBits = 8 * sizeof(Ty);
687 uint64_t shift = sizeInBits - int64_t(_scale) - 1;
688 raw <<= shift;
689 uint64_t rounded_bits = round<sizeInBits, uint64_t>(raw);
690 switch (op) {
691 case BlockTripleOperator::ADD:
692 _significant.setradix(fbits);
693 break;
694 case BlockTripleOperator::MUL:
695 _significant.setradix(fbits);
696 break;
697 case BlockTripleOperator::DIV:
698 _significant.setradix(3 * fbits);
699 break;
700 case BlockTripleOperator::SQRT:
701 _significant.setradix(2 * fbits);
702 break;
703 case BlockTripleOperator::REPRESENTATION:
704 _significant.setradix(fbits);
705 break;
706 }
707 _significant.setbits(rounded_bits);
708 return *this;
709 }
710 template<typename Ty>
711 constexpr inline blocktriple& convert_signed_integer(const Ty& rhs) noexcept {
712 _nan = false;
713 _inf = false;
714 _zero = true;
715 if (0 == rhs) return *this;
716 _zero = false;
717 _sign = (rhs < 0);
718 uint64_t raw = static_cast<uint64_t>(_sign ? -rhs : rhs);
719 _scale = static_cast<int>(findMostSignificantBit(raw)) - 1; // precondition that msb > 0 is satisfied by the zero test above
720 constexpr size_t sizeInBits = 8 * sizeof(Ty);
721 uint64_t shift = sizeInBits - int64_t(_scale) - 1;
722 raw <<= shift;
723 uint64_t rounded_bits = round<sizeInBits, uint64_t>(raw);
724 switch (op) {
725 case BlockTripleOperator::ADD:
726 _significant.setradix(fbits);
727 break;
728 case BlockTripleOperator::MUL:
729 _significant.setradix(fbits);
730 break;
731 case BlockTripleOperator::DIV:
732 _significant.setradix(3*fbits);
733 break;
734 case BlockTripleOperator::SQRT:
735 _significant.setradix(2*fbits);
736 break;
737 case BlockTripleOperator::REPRESENTATION:
738 _significant.setradix(fbits);
739 break;
740 }
741 _significant.setbits(rounded_bits);
742 return *this;
743 }
744
745 inline CONSTEXPRESSION blocktriple& convert_float(float rhs) noexcept { // TODO: deal with subnormals and inf
746 #if BIT_CAST_SUPPORT
747 // normal number
748 uint32_t bc = std::bit_cast<uint32_t>(rhs);
749 bool s = (0x8000'0000ul & bc);
750 uint32_t raw_exp = static_cast<uint32_t>((0x7F80'0000ul & bc) >> 23);
751 uint32_t raw = (1ul << 23) | (0x007F'FFFFul & bc);
752 #else // !BIT_CAST_SUPPORT
753 float_decoder decoder;
754 decoder.f = rhs;
755 bool s = decoder.parts.sign ? true : false;
756 uint32_t raw_exp = decoder.parts.exponent;
757 uint32_t raw = (1ul << 23) | decoder.parts.fraction;
758 #endif // !BIT_CAST_SUPPORT
759
760 // special case handling
761 if (raw_exp == 0xFFu) { // special cases
762 if (raw == 1ul || raw == 0x00C0'0001ul) {
763 // 1.11111111.00000000000000000000001 signalling nan
764 // 0.11111111.00000000000000000000001 signalling nan
765 // MSVC
766 // 1.11111111.10000000000000000000001 signalling nan
767 // 0.11111111.10000000000000000000001 signalling nan
768 // NAN_TYPE_SIGNALLING;
769 _nan = true;
770 _inf = false;
771 _sign = true; // this is the encoding of a signalling NaN
772 return *this;
773 }
774 if (raw == 0x00C0'0000ul) {
775 // 1.11111111.10000000000000000000000 quiet nan
776 // 0.11111111.10000000000000000000000 quiet nan
777 // NAN_TYPE_QUIET);
778 _nan = true;
779 _inf = false;
780 _sign = false; // this is the encoding of a quiet NaN
781 return *this;
782 }
783 if (raw == 0ul) {
784 // 1.11111111.00000000000000000000000 -inf
785 // 0.11111111.00000000000000000000000 +inf
786 _nan = false;
787 _inf = true;
788 _sign = s; // + or - infinity
789 return *this;
790 }
791 }
792 if (rhs == 0.0f) { // IEEE rule: this is valid for + and - 0.0
793 _nan = false;
794 _inf = false;
795 _zero = true;
796 _sign = s;
797 return *this;
798 }
799 // normal number, not zero
800 _nan = false;
801 _inf = false;
802 _zero = false;
803 _sign = s;
804 _scale = static_cast<int>(raw_exp) - 127;
805 uint32_t rounded_bits = round<24, uint32_t>(raw);
806 _significant.setradix(fbits); // round maps the fraction bits to the radix at fbits
807 _significant.setbits(rounded_bits);
808 return *this;
809 }
810
811 inline CONSTEXPRESSION blocktriple& convert_double(double rhs) noexcept { // TODO: deal with subnormals and inf
812 #if BIT_CAST_SUPPORT
813 uint64_t bc = std::bit_cast<uint64_t>(rhs);
814 bool s = (0x8000'0000'0000'0000ull & bc);
815 uint32_t raw_exp = static_cast<uint32_t>((0x7FF0'0000'0000'0000ull & bc) >> 52);
816 uint64_t raw = (1ull << 52) | (0x000F'FFFF'FFFF'FFFFull & bc);
817 #else
818 double_decoder decoder;
819 decoder.d = rhs;
820 bool s = decoder.parts.sign ? true : false;
821 uint64_t raw_exp = decoder.parts.exponent;
822 uint64_t raw = (1ull << 52) | decoder.parts.fraction;
823 #endif // !BIT_CAST_SUPPORT
824
825 // special case handling
826 if (raw_exp == 0x7FFu) { // special cases
827 if (raw == 1ul || raw == 0x0040'0001ul) {
828 // 1.111'1111'1111.0000000000...0000000001 signalling nan
829 // 0.111'1111'1111.0000000000...0000000001 signalling nan
830 // MSVC
831 // 1.111'1111'1111.1000000000...0000000001 signalling nan
832 // 0.111'1111'1111.1000000000...0000000001 signalling nan
833 // NAN_TYPE_SIGNALLING;
834 _nan = true;
835 _inf = true; // this is the encoding of a signalling NaN
836 return *this;
837 }
838 if (raw == 0x0008'0000'0000'0000ull) {
839 // 1.111'1111'1111.1000000000...0000000000 quiet nan
840 // 0.111'1111'1111.1000000000...0000000000 quiet nan
841 // NAN_TYPE_QUIET);
842 _nan = true;
843 _inf = false; // this is the encoding of a quiet NaN
844 return *this;
845 }
846 if (raw == 0ul) {
847 // 1.11111111.00000000000000000000000 -inf
848 // 0.11111111.00000000000000000000000 +inf
849 _nan = false;
850 _inf = true;
851 _sign = s; // + or - infinity
852 return *this;
853 }
854 }
855 if (rhs == 0.0f) { // IEEE rule: this is valid for + and - 0.0
856 _nan = false;
857 _inf = false;
858 _zero = true;
859 _sign = s;
860 return *this;
861 }
862 // normal number
863 _nan = false;
864 _inf = false;
865 _zero = false;
866 _sign = s;
867 _scale = static_cast<int>(raw_exp) - 1023;
868 uint64_t rounded_bits = round<53, uint64_t>(raw); // round manipulates _scale if needed
869 _significant.setradix(fbits); // round maps the fraction bits to the radix at fbits
870 _significant.setbits(rounded_bits);
871 return *this;
872 }
873
874 template<typename Real>
875 constexpr Real to_native() const noexcept {
876 if (_nan) { if (_sign) return std::numeric_limits<Real>::signaling_NaN(); else return std::numeric_limits<Real>::quiet_NaN(); }
877 if (_inf) { if (_sign) return -std::numeric_limits<Real>::infinity(); else return std::numeric_limits<Real>::infinity(); }
878 if (_zero) { if (_sign) return -Real(0.0f); else return Real(0.0f); }
879 Real v = Real(_significant);
880 v *= std::pow(Real(2.0f), Real(_scale));
881 return (_sign ? -v : v);
882 }
883
884 // template parameters need names different from class template parameters (for gcc and clang)
885 template<size_t nnbits, BlockTripleOperator oop, typename bbt>
886 friend std::ostream& operator<< (std::ostream& ostr, const blocktriple<nnbits, oop, bbt>& a);
887 template<size_t nnbits, BlockTripleOperator oop, typename bbt>
888 friend std::istream& operator>> (std::istream& istr, blocktriple<nnbits, oop, bbt>& a);
889
890 // declare as friends to avoid needing a marshalling function to get significant bits out
891 template<size_t nnbits, BlockTripleOperator oop, typename bbt>
892 friend std::string to_binary(const blocktriple<nnbits, oop, bbt>&, bool);
893 template<size_t nnbits, BlockTripleOperator oop, typename bbt>
894 friend std::string to_triple(const blocktriple<nnbits, oop, bbt>&, bool);
895
896 // logic operators
897 template<size_t nnbits, BlockTripleOperator oop, typename bbt>
898 friend bool operator==(const blocktriple<nnbits, oop, bbt>& lhs, const blocktriple<nnbits, oop, bbt>& rhs);
899 template<size_t nnbits, BlockTripleOperator oop, typename bbt>
900 friend bool operator!=(const blocktriple<nnbits, oop, bbt>& lhs, const blocktriple<nnbits, oop, bbt>& rhs);
901 template<size_t nnbits, BlockTripleOperator oop, typename bbt>
902 friend bool operator< (const blocktriple<nnbits, oop, bbt>& lhs, const blocktriple<nnbits, oop, bbt>& rhs);
903 template<size_t nnbits, BlockTripleOperator oop, typename bbt>
904 friend bool operator> (const blocktriple<nnbits, oop, bbt>& lhs, const blocktriple<nnbits, oop, bbt>& rhs);
905 template<size_t nnbits, BlockTripleOperator oop, typename bbt>
906 friend bool operator<=(const blocktriple<nnbits, oop, bbt>& lhs, const blocktriple<nnbits, oop, bbt>& rhs);
907 template<size_t nnbits, BlockTripleOperator oop, typename bbt>
908 friend bool operator>=(const blocktriple<nnbits, oop, bbt>& lhs, const blocktriple<nnbits, oop, bbt>& rhs);
909 };
910
911 ////////////////////// operators
912
913 inline std::ostream& operator<<(std::ostream& ostr, const BlockTripleOperator& op) {
914 switch (op) {
915 case BlockTripleOperator::ADD:
916 ostr << "ADD";
917 break;
918 case BlockTripleOperator::MUL:
919 ostr << "MUL";
920 break;
921 case BlockTripleOperator::DIV:
922 ostr << "DIV";
923 break;
924 case BlockTripleOperator::SQRT:
925 ostr << "SQRT";
926 break;
927 case BlockTripleOperator::REPRESENTATION:
928 ostr << "REP";
929 break;
930 default:
931 ostr << "NOP";
932 }
933 return ostr;
934 }
935 template<size_t nbits, BlockTripleOperator op, typename bt>
936 inline std::ostream& operator<<(std::ostream& ostr, const blocktriple<nbits, op, bt>& a) {
937 if (a._inf) {
938 ostr << FP_INFINITE;
939 }
940 else {
941 ostr << double(a);
942 }
943 return ostr;
944 }
945
946 template<size_t nbits, BlockTripleOperator op, typename bt>
947 inline std::istream& operator>> (std::istream& istr, const blocktriple<nbits, op, bt>& a) {
948 istr >> a._fraction;
949 return istr;
950 }
951
952 template<size_t nbits, BlockTripleOperator op, typename bt>
953 inline bool operator==(const blocktriple<nbits, op, bt>& lhs, const blocktriple<nbits, op, bt>& rhs) { return lhs._sign == rhs._sign && lhs._scale == rhs._scale && lhs._significant == rhs._significant && lhs._zero == rhs._zero && lhs._inf == rhs._inf; }
954 template<size_t nbits, BlockTripleOperator op, typename bt>
955 inline bool operator!=(const blocktriple<nbits, op, bt>& lhs, const blocktriple<nbits, op, bt>& rhs) { return !operator==(lhs, rhs); }
956 template<size_t nbits, BlockTripleOperator op, typename bt>
957 inline bool operator< (const blocktriple<nbits, op, bt>& lhs, const blocktriple<nbits, op, bt>& rhs) {
958 if (lhs._inf) {
959 if (rhs._inf) return false; else return true; // everything is less than -infinity
960 }
961 else {
962 if (rhs._inf) return false;
963 }
964
965 if (lhs._zero) {
966 if (rhs._zero) return false; // they are both 0
967 if (rhs._sign) return false; else return true;
968 }
969 if (rhs._zero) {
970 if (lhs._sign) return true; else return false;
971 }
972 if (lhs._sign) {
973 if (rhs._sign) { // both operands are negative
974 if (lhs._scale > rhs._scale) {
975 return true; // lhs is more negative
976 }
977 else {
978 if (lhs._scale == rhs._scale) {
979 // compare the fraction, which is an unsigned value
980 if (lhs._significant == rhs._significant) return false; // they are the same value
981 if (lhs._significant > rhs._significant) {
982 return true; // lhs is more negative
983 }
984 else {
985 return false; // lhs is less negative
986 }
987 }
988 else {
989 return false; // lhs is less negative
990 }
991 }
992 }
993 else {
994 return true; // lhs is negative, rhs is positive
995 }
996 }
997 else {
998 if (rhs._sign) {
999 return false; // lhs is positive, rhs is negative
1000 }
1001 else {
1002 if (lhs._scale > rhs._scale) {
1003 return false; // lhs is more positive
1004 }
1005 else {
1006 if (lhs._scale == rhs._scale) {
1007 // compare the fractions
1008 if (lhs._significant == rhs._significant) return false; // they are the same value
1009 if (lhs._significant > rhs._significant) {
1010 return false; // lhs is more positive than rhs
1011 }
1012 else {
1013 return true; // lhs is less positive than rhs
1014 }
1015 }
1016 else {
1017 return true; // lhs is less positive
1018 }
1019 }
1020 }
1021 }
1022 }
1023 template<size_t nbits, BlockTripleOperator op, typename bt>
1024 inline bool operator> (const blocktriple<nbits, op, bt>& lhs, const blocktriple<nbits, op, bt>& rhs) { return operator< (rhs, lhs); }
1025 template<size_t nbits, BlockTripleOperator op, typename bt>
1026 inline bool operator<=(const blocktriple<nbits, op, bt>& lhs, const blocktriple<nbits, op, bt>& rhs) { return !operator> (lhs, rhs); }
1027 template<size_t nbits, BlockTripleOperator op, typename bt>
1028 inline bool operator>=(const blocktriple<nbits, op, bt>& lhs, const blocktriple<nbits, op, bt>& rhs) { return !operator< (lhs, rhs); }
1029
1030
1031 ////////////////////////////////// string conversion functions //////////////////////////////
1032
1033 template<size_t nbits, BlockTripleOperator op, typename bt>
1034 std::string to_binary(const sw::universal::blocktriple<nbits, op, bt>& a, bool nibbleMarker = true) {
1035 return to_triple(a, nibbleMarker);
1036 }
1037
1038 template<size_t nbits, BlockTripleOperator op, typename bt>
1039 std::string to_triple(const blocktriple<nbits, op, bt>& a, bool nibbleMarker = true) {
1040 std::stringstream s;
1041 s << (a._sign ? "(-, " : "(+, ");
1042 s << std::setw(3) << a._scale << ", ";
1043 s << to_binary(a._significant, nibbleMarker) << ')';
1044 return s.str();
1045 }
1046
1047 template<size_t nbits, BlockTripleOperator op, typename bt>
1048 blocktriple<nbits> abs(const blocktriple<nbits, op, bt>& a) {
1049 blocktriple<nbits> absolute(a);
1050 absolute.setpos();
1051 return absolute;
1052 }
1053
1054
1055 } // namespace sw::universal
1056