1 #pragma once
2 // blockfraction.hpp: parameterized blocked binary number system representing a floating-point fraction including leading 1 bit
3 //
4 // Copyright (C) 2017-2021 Stillwater Supercomputing, Inc.
5 //
6 // This file is part of the universal numbers project, which is released under an MIT Open Source license.
7 #include <iostream>
8 #include <string>
9 #include <sstream>
10 #include <cmath> // for std::pow() used in conversions to native IEEE-754 formats values
11
12 // should be defined by calling environment, just catching it here just in case it is not
13 #ifndef LONG_DOUBLE_SUPPORT
14 #pragma message("LONG_DOUBLE_SUPPORT is not defined")
15 #define LONG_DOUBLE_SUPPORT 0
16 #endif
17
18 /*
19 The fraction bits in a floating-point representation benefit from different
20 representations for different operators:
21 - for addition and subtraction, a 2's complement encoding is best,
22 - for multiplication, a simple 1's complement encoding is best
23 - for division
24 - for square root
25 a blockfraction type will be marked by its encoding to enable direct code paths.
26 By encoding it in the type, we won't be able to dynamically go between types,
27 but that is ok as the blockfraction is a composition type that gets used
28 by the ephemeral blocktriple type, which is set up for each floating-point
29 operation, used, and then discarded.
30
31 The last piece of information we need to manage for blockfractions is where
32 the radix point is. For add/sub it is at a fixed location, nbits - 3, and
33 for multiplication and division is transforms from the input values to the
34 output values. The blockfraction operators, add, sub, mul, div, sqrt manage
35 this radix point transformation. Fundamentally, the actual bits of the
36 blockfraction are used as a binary encoded integer. The encoding interpretation
37 and the placement of the radix point, are directed by the aggregating class,
38 such as blocktriple.
39 */
40 namespace sw::universal {
41
42 // Encoding of the BlockFraction
43 enum class BitEncoding {
44 Flex, // placeholder for flexible use cases
45 Ones, // 1's complement encoding
46 Twos // 2's complement encoding
47 };
48
49 // forward references
50 template<size_t nbits, typename bt, BitEncoding encoding> class blockfraction;
51 template<size_t nbits, typename bt, BitEncoding encoding> constexpr blockfraction<nbits, bt, encoding> twosComplement(const blockfraction<nbits, bt, encoding>&);
52 template<size_t nbits, typename bt, BitEncoding encoding> struct bfquorem;
53 template<size_t nbits, typename bt, BitEncoding encoding> bfquorem<nbits, bt, encoding> longdivision(const blockfraction<nbits, bt, encoding>&, const blockfraction<nbits, bt, encoding>&);
54
55 // idiv_t for blockfraction<nbits> to capture quotient and remainder during long division
56 template<size_t nbits, typename bt, BitEncoding encoding>
57 struct bfquorem {
bfquoremsw::universal::bfquorem58 bfquorem() {} // default constructors
59 int exceptionId;
60 blockfraction<nbits, bt, encoding> quo; // quotient
61 blockfraction<nbits, bt, encoding> rem; // remainder
62 };
63
64 /*
65 NOTE 1
66 For block arithmetic, we need to manage a carry bit.
67 This disqualifies using uint64_t as a block type as we can't catch the overflow condition
68 in the same way as the other native types, uint8_t, uint16_t, uint32_t.
69 We could use a sint64_t and then convert to uint64_t and observe the MSB.
70 That requires different logic though.
71 TODO: the highest performance for 32bits and up would be to have a uint64_t base type
72 for which we need asm to get the carry bit logic to work.
73
74 TODO: are there mechanisms where we can use SIMD for vector operations?
75 If there are, then doing something with more fitting and smaller base types might
76 yield more concurrency and thus more throughput for that ISA.
77
78
79 NOTE 2
80 adding two block triples of nbits would yield a result of nbits+1. To implement a
81 fast use of blockfraction storage complicates this relationship.
82
83 Standardizing the blocktriple add to take two arguments of nbits, and product a result
84 of nbits+1, makes sense in the abstract pipeline as the triple would gain one bit of
85 accuracy. Any subsequent use would need to make a decision whether to round or not.
86 If we go to a quire, we wouldn't round, if we reassign it to a source precision, we would.
87
88 What is the required API of blockfraction to support that semantic?
89 */
90
91
92
93 /// <summary>
94 /// a block-based floating-point fraction
95 /// for add/sub in 2's complement of the form ##h.fffff
96 /// for mul in sign-magnitude form expanded to 0'00001.fffff
97 /// for div in sign-magnitude form expanded to 00000'00001'fffff
98 ///
99 /// NOTE: don't set a default blocktype as this makes the integration more brittle
100 /// as blocktriple uses the blockfraction as storage class and needs to interact
101 /// with the client number system, which also is blocked. Using the same blocktype
102 /// simplifies the copying of exponent and fraction bits from and to the client.
103 /// </summary>
104 /// <typeparam name="bt"></typeparam>
105 template<size_t _nbits, typename bt, BitEncoding _encoding>
106 class blockfraction {
107 public:
108 typedef bt BlockType;
109 static constexpr size_t nbits = _nbits;
110 static constexpr BitEncoding encoding = _encoding;
111 static constexpr size_t bitsInByte = 8;
112 static constexpr size_t bitsInBlock = sizeof(bt) * bitsInByte;
113 static_assert(bitsInBlock <= 64, "storage unit for block arithmetic needs to be <= uint64_t");
114
115 static constexpr size_t nrBlocks = 1ull + ((nbits - 1ull) / bitsInBlock);
116 static constexpr uint64_t storageMask = (0xFFFF'FFFF'FFFF'FFFFull >> (64 - bitsInBlock));
117 static constexpr uint64_t maxRightShift = ((64 - nbits + 3) > 62) ? 63 : (64 - nbits + 3);
118 static constexpr uint64_t fmask = (64 - nbits + 3) > 63 ? 0ull : (0xFFFF'FFFF'FFFF'FFFFull >> maxRightShift);
119
120 static constexpr size_t MSU = nrBlocks - 1; // MSU == Most Significant Unit
121 static constexpr bt ALL_ONES = bt(~0);
122 static constexpr bt MSU_MASK = (ALL_ONES >> (nrBlocks * bitsInBlock - nbits));
123 static constexpr bt OVERFLOW_BIT = ~(MSU_MASK >> 1) & MSU_MASK;
124
125 // constructors
blockfraction()126 constexpr blockfraction() noexcept : radixPoint{ nbits }, _block { 0 } {}
blockfraction(uint64_t raw,int radixPoint)127 constexpr blockfraction(uint64_t raw, int radixPoint) noexcept : radixPoint{ radixPoint }, _block { 0 } {
128 if constexpr (1 == nrBlocks) {
129 _block[0] = static_cast<bt>(storageMask & raw);;
130 }
131 else if constexpr (2 == nrBlocks) {
132 _block[0] = static_cast<bt>(storageMask & raw);
133 _block[1] = static_cast<bt>(storageMask & (raw >> bitsInBlock));
134 }
135 else if constexpr (3 == nrBlocks) {
136 _block[0] = static_cast<bt>(storageMask & raw);
137 _block[1] = static_cast<bt>(storageMask & (raw >> bitsInBlock));
138 _block[2] = static_cast<bt>(storageMask & (raw >> 2*bitsInBlock));
139 }
140 else if constexpr (4 == nrBlocks) {
141 _block[0] = static_cast<bt>(storageMask & raw);
142 _block[1] = static_cast<bt>(storageMask & (raw >> bitsInBlock));
143 _block[2] = static_cast<bt>(storageMask & (raw >> 2 * bitsInBlock));
144 _block[3] = static_cast<bt>(storageMask & (raw >> 3 * bitsInBlock));
145 }
146 else {
147 for (size_t i = 0; i < nrBlocks; ++i) {
148 _block[i] = static_cast<bt>(storageMask & (raw >> i * bitsInBlock));
149 }
150 }
151 }
152
153 blockfraction(const blockfraction&) noexcept = default;
154 blockfraction(blockfraction&&) noexcept = default;
155
156 blockfraction& operator=(const blockfraction&) noexcept = default;
157 blockfraction& operator=(blockfraction&&) noexcept = default;
158
159 #ifdef NEVER
160 // disable the ability to copy different blockfractions to catch any
161 // unintended (implicit) copies when working with blockfractions.
162 // For performance, the blockfraction must be used in-place.
163 // Typical design, allocates a blocktriple on the stack, and subsequently
164 // uses in add/sub/mul/div/sqrt will directly access the encapsulated blockfraction.
165
166 /// construct a blockfraction from another: bt must be the same
167 template<size_t nnbits>
blockfraction(const blockfraction<nnbits,bt> & rhs)168 blockfraction(const blockfraction<nnbits, bt>& rhs) {
169 this->assign(rhs);
170 }
171
172 // blockfraction cannot have decorated constructors or assignment
173 // as blockfraction does not have all the information to interpret a value
174 // So by design, the class interface does not interact with values
blockfraction(long long initial_value)175 constexpr blockfraction(long long initial_value) noexcept : _block{ 0 } { *this = initial_value; }
176
operator =(long long rhs)177 constexpr blockfraction& operator=(long long rhs) noexcept {
178 if constexpr (1 < nrBlocks) {
179 for (unsigned i = 0; i < nrBlocks; ++i) {
180 _block[i] = rhs & storageMask;
181 rhs >>= bitsInBlock;
182 }
183 // enforce precondition for fast comparison by properly nulling bits that are outside of nbits
184 _block[MSU] &= MSU_MASK;
185 }
186 else if constexpr (1 == nrBlocks) {
187 _block[0] = rhs & storageMask;
188 // enforce precondition for fast comparison by properly nulling bits that are outside of nbits
189 _block[MSU] &= MSU_MASK;
190 }
191 return *this;
192 }
193 #endif
194
195 /// conversion operators
operator float() const196 explicit operator float() const { return float(to_float()); }
operator double() const197 explicit operator double() const { return double(to_double()); }
198 #if LONG_DOUBLE_SUPPORT
operator long double() const199 explicit operator long double() const { return (long double)to_long_double(); }
200 #endif
201
202 /// prefix operators
203 //
204 //
205 // one's complement
operator ~() const206 blockfraction operator~() const {
207 blockfraction complement(*this);
208 complement.flip();
209 return complement;
210 }
211
212 /// logic operators
213 // none
214
215 /// arithmetic operators
216 // none
217
increment()218 void increment() {
219 bool carry = true;
220 for (unsigned i = 0; i < nrBlocks; ++i) {
221 // cast up so we can test for overflow
222 uint64_t l = uint64_t(_block[i]);
223 uint64_t s = l + (carry ? uint64_t(1) : uint64_t(0));
224 carry = (s > ALL_ONES);
225 _block[i] = bt(s);
226 }
227 // enforce precondition for fast comparison by properly nulling bits that are outside of nbits
228 _block[MSU] &= MSU_MASK;
229 }
230 /// <summary>
231 /// add two fractions of the form 00h.fffff, that is, radix point at nbits-3
232 /// In this encoding, all valid values are encapsulated
233 /// </summary>
234 /// <param name="lhs">nbits of fraction in the form 00h.ffff</param>
235 /// <param name="rhs">nbits of fraction in the form 00h.ffff</param>
add(const blockfraction & lhs,const blockfraction & rhs)236 void add(const blockfraction& lhs, const blockfraction& rhs) {
237 bool carry = false;
238 for (unsigned i = 0; i < nrBlocks; ++i) {
239 // cast up so we can test for overflow
240 uint64_t l = uint64_t(lhs._block[i]);
241 uint64_t r = uint64_t(rhs._block[i]);
242 uint64_t s = l + r + (carry ? uint64_t(1) : uint64_t(0));
243 carry = (s > ALL_ONES);
244 _block[i] = bt(s);
245 }
246 // enforce precondition for fast comparison by properly nulling bits that are outside of nbits
247 _block[MSU] &= MSU_MASK;
248 }
sub(const blockfraction & lhs,blockfraction & rhs)249 void sub(const blockfraction& lhs, blockfraction& rhs) {
250 add(lhs, rhs.twosComplement());
251 }
mul(const blockfraction & lhs,const blockfraction & rhs)252 void mul(const blockfraction& lhs, const blockfraction& rhs) {
253 blockfraction<nbits, bt, encoding> base(lhs);
254 blockfraction<nbits, bt, encoding> multiplicant(rhs);
255 clear();
256 for (size_t i = 0; i < nbits; ++i) {
257 if (base.at(i)) {
258 add(*this, multiplicant);
259 }
260 multiplicant <<= 1;
261 }
262 // since we used operator+=, which enforces the nulling of leading bits
263 // we don't need to null here
264 }
div(const blockfraction & lhs,const blockfraction & rhs)265 void div(const blockfraction& lhs, const blockfraction& rhs) {
266 bfquorem<nbits, bt, encoding> result = longdivision(*this, rhs);
267 *this = result.quo;
268 }
269
270 #ifdef FRACTION_REMAINDER
271 // remainder operator
operator %=(const blockfraction & rhs)272 blockfraction& operator%=(const blockfraction& rhs) {
273 bfquorem<nbits, bt, encoding> result = longdivision(*this, rhs);
274 *this = result.rem;
275 return *this;
276 }
277 #endif
278
279 // shift left operator
operator <<=(int bitsToShift)280 blockfraction& operator<<=(int bitsToShift) {
281 if (bitsToShift == 0) return *this;
282 if (bitsToShift < 0) return operator>>=(-bitsToShift);
283 if (bitsToShift > long(nbits)) bitsToShift = nbits; // clip to max
284 if (bitsToShift >= long(bitsInBlock)) {
285 int blockShift = bitsToShift / static_cast<int>(bitsInBlock);
286 for (int i = static_cast<int>(MSU); i >= blockShift; --i) {
287 _block[i] = _block[i - blockShift];
288 }
289 for (int i = blockShift - 1; i >= 0; --i) {
290 _block[i] = bt(0);
291 }
292 // adjust the shift
293 bitsToShift -= static_cast<int>(blockShift * bitsInBlock);
294 if (bitsToShift == 0) return *this;
295 }
296 if constexpr (MSU > 0) {
297 // construct the mask for the upper bits in the block that need to move to the higher word
298 bt mask = 0xFFFFFFFFFFFFFFFF << (bitsInBlock - bitsToShift);
299 for (size_t i = MSU; i > 0; --i) {
300 _block[i] <<= bitsToShift;
301 // mix in the bits from the right
302 bt bits = bt(mask & _block[i - 1]);
303 _block[i] |= (bits >> (bitsInBlock - bitsToShift));
304 }
305 }
306 _block[0] <<= bitsToShift;
307 return *this;
308 }
309
310 // shift right operator
operator >>=(int bitsToShift)311 blockfraction& operator>>=(int bitsToShift) {
312 if (bitsToShift == 0) return *this;
313 if (bitsToShift < 0) return operator<<=(-bitsToShift);
314 if (bitsToShift >= static_cast<int>(nbits)) {
315 setzero();
316 return *this;
317 }
318
319 size_t blockShift = 0;
320 if (bitsToShift >= static_cast<int>(bitsInBlock)) {
321 blockShift = bitsToShift / bitsInBlock;
322 if (MSU >= blockShift) {
323 // shift by blocks
324 for (size_t i = 0; i <= MSU - blockShift; ++i) {
325 _block[i] = _block[i + blockShift];
326 }
327 }
328 // adjust the shift
329 bitsToShift -= static_cast<int>(blockShift * bitsInBlock);
330 if (bitsToShift == 0) {
331 // clean up the blocks we have shifted clean
332 bitsToShift += static_cast<int>(blockShift * bitsInBlock);
333 for (size_t i = nbits - bitsToShift; i < nbits; ++i) {
334 this->setbit(i, false); // reset
335 }
336
337 return *this;
338 }
339 }
340 if constexpr (MSU > 0) {
341 bt mask = ALL_ONES;
342 mask >>= (bitsInBlock - bitsToShift); // this is a mask for the lower bits in the block that need to move to the lower word
343 for (size_t i = 0; i < MSU; ++i) { // TODO: can this be improved? we should not have to work on the upper blocks in case we block shifted
344 _block[i] >>= bitsToShift;
345 // mix in the bits from the left
346 bt bits = bt(mask & _block[i + 1]);
347 _block[i] |= (bits << (bitsInBlock - bitsToShift));
348 }
349 }
350 _block[MSU] >>= bitsToShift;
351
352 // clean up the blocks we have shifted clean
353 bitsToShift += static_cast<int>(blockShift * bitsInBlock);
354 for (size_t i = nbits - bitsToShift; i < nbits; ++i) {
355 this->setbit(i, false); // reset
356 }
357
358 // enforce precondition for fast comparison by properly nulling bits that are outside of nbits
359 _block[MSU] &= MSU_MASK;
360 return *this;
361 }
362
363 // modifiers
364 // clear a block binary number
clear()365 inline constexpr void clear() noexcept {
366 if constexpr (1 == nrBlocks) {
367 _block[0] = 0;
368 }
369 else if constexpr (2 == nrBlocks) {
370 _block[0] = 0;
371 _block[1] = 0;
372 }
373 else if constexpr (3 == nrBlocks) {
374 _block[0] = 0;
375 _block[1] = 0;
376 _block[2] = 0;
377 }
378 else if constexpr (4 == nrBlocks) {
379 _block[0] = 0;
380 _block[1] = 0;
381 _block[2] = 0;
382 _block[3] = 0;
383 }
384 else if constexpr (5 == nrBlocks) {
385 _block[0] = 0;
386 _block[1] = 0;
387 _block[2] = 0;
388 _block[3] = 0;
389 _block[4] = 0;
390 }
391 else if constexpr (6 == nrBlocks) {
392 _block[0] = 0;
393 _block[1] = 0;
394 _block[2] = 0;
395 _block[3] = 0;
396 _block[4] = 0;
397 _block[5] = 0;
398 }
399 else if constexpr (7 == nrBlocks) {
400 _block[0] = 0;
401 _block[1] = 0;
402 _block[2] = 0;
403 _block[3] = 0;
404 _block[4] = 0;
405 _block[5] = 0;
406 _block[6] = 0;
407 }
408 else if constexpr (8 == nrBlocks) {
409 _block[0] = 0;
410 _block[1] = 0;
411 _block[2] = 0;
412 _block[3] = 0;
413 _block[4] = 0;
414 _block[5] = 0;
415 _block[6] = 0;
416 _block[7] = 0;
417 }
418 else {
419 for (size_t i = 0; i < nrBlocks; ++i) {
420 _block[i] = static_cast<bt>(0ull);
421 }
422 }
423 }
setzero()424 inline constexpr void setzero() noexcept { clear(); }
setradix(int radix)425 inline constexpr void setradix(int radix) { radixPoint = radix; }
setbit(size_t i,bool v=true)426 inline constexpr void setbit(size_t i, bool v = true) noexcept {
427 if (i < nbits) {
428 bt block = _block[i / bitsInBlock];
429 bt null = ~(1ull << (i % bitsInBlock));
430 bt bit = bt(v ? 1 : 0);
431 bt mask = bt(bit << (i % bitsInBlock));
432 _block[i / bitsInBlock] = bt((block & null) | mask);
433 }
434 // when i is out of bounds, fail silently as no-op
435 }
setblock(size_t b,const bt & block)436 inline constexpr void setblock(size_t b, const bt& block) noexcept {
437 if (b < nrBlocks) _block[b] = block;
438 // when b is out of bounds, fail silently as no-op
439 }
setbits(uint64_t value)440 inline constexpr void setbits(uint64_t value) noexcept {
441 // radixPoint needs to be set, either using the constructor or the setradix() function
442 if constexpr (1 == nrBlocks) {
443 _block[0] = value & storageMask;
444 }
445 else if constexpr (1 < nrBlocks) {
446 if constexpr (bitsInBlock == 64) {
447 // just set the highest bits with the value provided
448 _block[MSU] = value;
449 }
450 else {
451 for (size_t i = 0; i < nrBlocks; ++i) {
452 _block[i] = value & storageMask;
453 value >>= bitsInBlock;
454 }
455 }
456 }
457 _block[MSU] &= MSU_MASK; // enforce precondition for fast comparison by properly nulling bits that are outside of nbits
458 }
flip()459 inline constexpr blockfraction& flip() noexcept { // in-place one's complement
460 for (size_t i = 0; i < nrBlocks; ++i) {
461 _block[i] = bt(~_block[i]);
462 }
463 _block[MSU] &= MSU_MASK; // assert precondition of properly nulled leading non-bits
464 return *this;
465 }
466 // in-place 2's complement
twosComplement()467 inline constexpr blockfraction& twosComplement() noexcept {
468 blockfraction<nbits, bt, encoding> plusOne;
469 plusOne.setbit(0);
470 flip();
471 add(*this, plusOne);
472 return *this;
473 }
474
475 // selectors
iszero() const476 inline constexpr bool iszero() const noexcept {
477 for (size_t i = 0; i < nrBlocks; ++i) if (_block[i] != 0) return false;
478 return true;
479 }
radix() const480 inline constexpr int radix() const { return radixPoint; }
isodd() const481 inline constexpr bool isodd() const noexcept { return _block[0] & 0x1; }
iseven() const482 inline constexpr bool iseven() const noexcept { return !isodd(); }
sign() const483 inline constexpr bool sign() const { return false; } // dummy to unify the API with other number systems in Universal
test(size_t bitIndex) const484 inline constexpr bool test(size_t bitIndex) const noexcept { return at(bitIndex); }
at(size_t bitIndex) const485 inline constexpr bool at(size_t bitIndex) const noexcept {
486 if (bitIndex >= nbits) return false;
487 bt word = _block[bitIndex / bitsInBlock];
488 bt mask = bt(1ull << (bitIndex % bitsInBlock));
489 return (word & mask);
490 }
491 // check carry bit in output of the ALU
checkCarry() const492 inline constexpr bool checkCarry() const noexcept { return at(nbits - 2); }
493 // helpers
nibble(size_t n) const494 inline constexpr uint8_t nibble(size_t n) const {
495 if (n < (1 + ((nbits - 1) >> 2))) {
496 bt word = _block[(n * 4) / bitsInBlock];
497 size_t nibbleIndexInWord = n % (bitsInBlock >> 2);
498 bt mask = static_cast<bt>(0x0Fu << (nibbleIndexInWord*4));
499 bt nibblebits = static_cast<bt>(mask & word);
500 return static_cast<uint8_t>(nibblebits >> static_cast<bt>(nibbleIndexInWord*4));
501 }
502 throw "nibble index out of bounds";
503 }
block(size_t b) const504 inline constexpr bt block(size_t b) const noexcept {
505 if (b >= nrBlocks) return bt{ 0 };
506 return _block[b];
507 }
fraction_ull() const508 inline constexpr uint64_t fraction_ull() const noexcept {
509 uint64_t raw = get_ull();
510 // remove the non-fraction bits
511 uint64_t fractionBits = (0xFFFF'FFFF'FFFF'FFFFull >> (64 - radixPoint));
512 raw &= fractionBits;
513 return raw;
514 }
get_ull() const515 inline constexpr uint64_t get_ull() const noexcept {
516 uint64_t raw{ 0 };
517 if constexpr (bitsInBlock < 64) {
518 if constexpr (1 == nrBlocks) {
519 raw = _block[MSU];
520 raw &= MSU_MASK;
521 }
522 else if constexpr (2 == nrBlocks) {
523 raw = _block[MSU];
524 raw &= MSU_MASK;
525 raw <<= bitsInBlock;
526 raw |= _block[0];
527 }
528 else if constexpr (3 == nrBlocks) {
529 raw = _block[MSU];
530 raw &= MSU_MASK;
531 raw <<= bitsInBlock;
532 raw |= _block[1];
533 raw <<= bitsInBlock;
534 raw |= _block[0];
535 }
536 else if constexpr (4 == nrBlocks) {
537 raw = _block[MSU];
538 raw &= MSU_MASK;
539 raw <<= bitsInBlock;
540 raw |= _block[2];
541 raw <<= bitsInBlock;
542 raw |= _block[1];
543 raw <<= bitsInBlock;
544 raw |= _block[0];
545 }
546 else {
547 raw = _block[MSU];
548 raw &= MSU_MASK;
549 for (int i = MSU - 1; i >= 0; --i) {
550 raw <<= bitsInBlock;
551 raw |= _block[i];
552 }
553 }
554 }
555 else { // take top 64bits and ignore the rest
556 raw = _block[MSU];
557 raw &= MSU_MASK;
558 }
559 return raw;
560 }
561 #ifdef DEPRECATED
562 // copy a value over from one blockfraction to this blockfraction
563 // blockfraction is a 2's complement encoding, so we sign-extend by default
564 template<size_t srcbits>
assign(const blockfraction<srcbits,bt> & rhs)565 inline blockfraction<nbits, bt>& assign(const blockfraction<srcbits, bt>& rhs) {
566 clear();
567 // since bt is the same, we can directly copy the blocks in
568 size_t minNrBlocks = (this->nrBlocks < rhs.nrBlocks) ? this->nrBlocks : rhs.nrBlocks;
569 for (size_t i = 0; i < minNrBlocks; ++i) {
570 _block[i] = rhs.block(i);
571 }
572 if constexpr (nbits > srcbits) { // check if we need to sign extend
573 if (rhs.sign()) {
574 for (size_t i = srcbits; i < nbits; ++i) { // TODO: replace bit-oriented sequence with block
575 setbit(i);
576 }
577 }
578 }
579 // enforce precondition for fast comparison by properly nulling bits that are outside of nbits
580 _block[MSU] &= MSU_MASK;
581 return *this;
582 }
583
584 // copy a value over from one blockfraction to this without sign-extending the value
585 // blockfraction is a 2's complement encoding, so we sign-extend by default
586 // for fraction/significent encodings, we need to turn off sign-extending.
587 template<size_t srcbits>
assignWithoutSignExtend(const blockfraction<srcbits,bt> & rhs)588 inline blockfraction<nbits, bt>& assignWithoutSignExtend(const blockfraction<srcbits, bt>& rhs) {
589 clear();
590 // since bt is the same, we can simply copy the blocks in
591 size_t minNrBlocks = (this->nrBlocks < rhs.nrBlocks) ? this->nrBlocks : rhs.nrBlocks;
592 for (size_t i = 0; i < minNrBlocks; ++i) {
593 _block[i] = rhs.block(i);
594 }
595 // enforce precondition for fast comparison by properly nulling bits that are outside of nbits
596 _block[MSU] &= MSU_MASK;
597 return *this;
598 }
599 #endif
600 // return the position of the most significant bit, -1 if v == 0
msb() const601 inline int msb() const noexcept {
602 for (int i = int(MSU); i >= 0; --i) {
603 if (_block[i] != 0) {
604 bt mask = (bt(1u) << (bitsInBlock-1));
605 for (int j = bitsInBlock - 1; j >= 0; --j) {
606 if (_block[i] & mask) {
607 return i * static_cast<int>(bitsInBlock) + j;
608 }
609 mask >>= 1;
610 }
611 }
612 }
613 return -1; // no significant bit found, all bits are zero
614 }
615
616 // conversion to native types
to_float() const617 inline constexpr float to_float() const noexcept {
618 return float(to_double());
619 }
to_double() const620 inline constexpr double to_double() const noexcept {
621 double d{ 0.0 };
622 blockfraction<nbits, bt, encoding> tmp(*this);
623 int bit = static_cast<int>(nbits - 1);
624 int shift = static_cast<int>(nbits - 1 - radixPoint);
625
626 // special case preprocessing for 2's complement encodings
627 if constexpr (encoding == BitEncoding::Twos) {
628 // nbits in the target form 00h.fffff, check msb and if set take 2's complement
629 if (test(static_cast<size_t>(bit--))) tmp.twosComplement();
630 --shift; // and remove the MSB from the value computation
631 }
632
633 // process the value above the radix
634 size_t bitValue = 1ull << shift;
635 for (; bit >= radixPoint; --bit) {
636 if (tmp.test(static_cast<size_t>(bit))) d += static_cast<double>(bitValue);
637 bitValue >>= 1;
638 }
639 // process the value below the radix
640 double v = std::pow(2.0, -double(radixPoint));
641 for (size_t fbit = 0; fbit < static_cast<size_t>(radixPoint); ++fbit) {
642 if (tmp.test(fbit)) d += v;
643 v *= 2.0;
644 }
645
646 // if constexpr (nbits > 49) { // check if we can represent this value with a native normal double with 52 fraction bits => nbits <= (52 - 3)
647 // std::cerr << "to_double() will yield inaccurate result since blockfraction has more precision than native IEEE-754 double\n";
648 // }
649
650 return d;
651 }
to_long_double() const652 inline constexpr long double to_long_double() const noexcept {
653 return (long double)to_double();
654 }
655
656 // determine the rounding mode: up if true, down if false
657 // fraction bits: bbbbbbbbbbb
658 // target LSB : |
659 // guard : |
660 // round : |
661 // sticky : ---
roundingMode(size_t targetLsb) const662 bool roundingMode(size_t targetLsb) const {
663 bool lsb = at(targetLsb);
664 bool guard = (targetLsb == 0 ? false : at(targetLsb - 1));
665 bool round = (targetLsb <= 1 ? false : at(targetLsb - 2));
666 bool sticky = (targetLsb <= 2 ? false : any(targetLsb - 3));
667 bool tie = guard && !round && !sticky;
668 return (lsb && tie) || (guard && !tie);
669 }
any(size_t msb) const670 bool any(size_t msb) const {
671 msb = (msb > nbits - 1 ? nbits - 1 : msb);
672 size_t topBlock = msb / bitsInBlock;
673 bt mask = bt(ALL_ONES >> (bitsInBlock - 1 - (msb % bitsInBlock)));
674 for (size_t i = 0; i < topBlock; ++i) {
675 if (_block[i] > 0) return true;
676 }
677 // process the partial block
678 if (_block[topBlock] & mask) return true;
679 return false;
680 }
681
682 protected:
683 // HELPER methods
684 // none
685
686 public:
687 int radixPoint;
688 bt _block[nrBlocks];
689
690 private:
691 //////////////////////////////////////////////////////////////////////////////
692 // friend functions
693
694 // integer - integer logic comparisons
695 template<size_t N, typename B, BitEncoding C>
696 friend bool operator==(const blockfraction<N,B,C>& lhs, const blockfraction<N, B, C>& rhs);
697 template<size_t N, typename B, BitEncoding C>
698 friend bool operator!=(const blockfraction<N, B, C>& lhs, const blockfraction<N, B, C>& rhs);
699 // the other logic operators are defined in terms of arithmetic terms
700
701 template<size_t N, typename B, BitEncoding C>
702 friend std::ostream& operator<<(std::ostream& ostr, const blockfraction<N, B, C>& v);
703 };
704
705 //////////////////////////////////////////////////////////////////////////////////
706 // stream operators
707
708 // ostream operator
709 template<size_t nbits, typename bt, BitEncoding encoding>
operator <<(std::ostream & ostr,const blockfraction<nbits,bt,encoding> & number)710 std::ostream& operator<<(std::ostream& ostr, const blockfraction<nbits, bt, encoding>& number) {
711 return ostr << double(number);
712 }
713
714 //////////////////////////////////////////////////////////////////////////////
715 // conversions to string representations
716
717 // create a binary representation of the blockfraction: 00h.ffff
718 // by design, the radix point is at nbits-3
719 template<size_t nbits, typename bt, BitEncoding encoding>
to_binary(const blockfraction<nbits,bt,encoding> & number,bool nibbleMarker=false)720 std::string to_binary(const blockfraction<nbits, bt, encoding>& number, bool nibbleMarker = false) {
721 std::stringstream s;
722 s << "0b";
723 #ifdef DEPRECATED
724 int i = nbits - 1;
725 s << (number.at(size_t(i--)) ? '1' : '0'); // sign indicator of 2's complement
726 s << (number.at(size_t(i--)) ? '1' : '0'); // overflow indicator to trigger right shift
727 s << (number.at(size_t(i--)) ? '1' : '0'); // the hidden bit
728 s << '.';
729 for (; i >= 0; --i) {
730 s << (number.at(size_t(i)) ? '1' : '0');
731 if (i > 0 && (i % 4) == 0 && nibbleMarker) s << '\'';
732 }
733 #endif
734 for (int i = nbits - 1; i >= 0; --i) {
735 s << (number.at(size_t(i)) ? '1' : '0');
736 if (i == number.radix()) {
737 s << '.';
738 }
739 else {
740 if (i > 0 && (i % 4) == 0 && nibbleMarker) s << '\'';
741 }
742 }
743 return s.str();
744 }
745
746 // local helper to display the contents of a byte array
747 template<size_t nbits, typename bt, BitEncoding encoding>
to_hex(const blockfraction<nbits,bt,encoding> & number,bool wordMarker=true)748 std::string to_hex(const blockfraction<nbits, bt, encoding>& number, bool wordMarker = true) {
749 static constexpr size_t bitsInByte = 8;
750 static constexpr size_t bitsInBlock = sizeof(bt) * bitsInByte;
751 char hexChar[16] = {
752 '0', '1', '2', '3', '4', '5', '6', '7',
753 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
754 };
755 std::stringstream ss;
756 ss << "0x" << std::hex;
757 int nrNibbles = int(1 + ((nbits - 1) >> 2));
758 for (int n = nrNibbles - 1; n >= 0; --n) {
759 uint8_t nibble = number.nibble(static_cast<size_t>(n));
760 ss << hexChar[nibble];
761 if (wordMarker && n > 0 && ((n * 4ll) % bitsInBlock) == 0) ss << '\'';
762 }
763 return ss.str();
764 }
765
766 //////////////////////////////////////////////////////////////////////////////////
767 // logic operators
768
769 template<size_t N, typename B, BitEncoding C>
operator ==(const blockfraction<N,B,C> & lhs,const blockfraction<N,B,C> & rhs)770 inline bool operator==(const blockfraction<N, B, C>& lhs, const blockfraction<N, B, C>& rhs) {
771 for (size_t i = 0; i < lhs.nrBlocks; ++i) {
772 if (lhs._block[i] != rhs._block[i]) {
773 return false;
774 }
775 }
776 return true;
777 }
778 template<size_t N, typename B, BitEncoding C>
operator !=(const blockfraction<N,B,C> & lhs,const blockfraction<N,B,C> & rhs)779 inline bool operator!=(const blockfraction<N, B, C>& lhs, const blockfraction<N, B, C>& rhs) {
780 return !operator==(lhs, rhs);
781 }
782 template<size_t N, typename B, BitEncoding C>
operator <(const blockfraction<N,B,C> & lhs,const blockfraction<N,B,C> & rhs)783 inline bool operator<(const blockfraction<N, B, C>& lhs, const blockfraction<N, B, C>& rhs) {
784 if (lhs.ispos() && rhs.isneg()) return false; // need to filter out possible overflow conditions
785 if (lhs.isneg() && rhs.ispos()) return true; // need to filter out possible underflow conditions
786 if (lhs == rhs) return false; // so the maxneg logic works
787 blockfraction<N, B, C> mneg; maxneg<N, B>(mneg);
788 if (rhs == mneg) return false; // special case: nothing is smaller than maximum negative
789 blockfraction<N, B, C> diff = lhs - rhs;
790 return diff.isneg();
791 }
792 template<size_t N, typename B, BitEncoding C>
operator <=(const blockfraction<N,B,C> & lhs,const blockfraction<N,B,C> & rhs)793 inline bool operator<=(const blockfraction<N, B, C>& lhs, const blockfraction<N, B, C>& rhs) {
794 return (lhs < rhs || lhs == rhs);
795 }
796 template<size_t N, typename B, BitEncoding C>
operator >(const blockfraction<N,B,C> & lhs,const blockfraction<N,B,C> & rhs)797 inline bool operator>(const blockfraction<N, B, C>& lhs, const blockfraction<N, B, C>& rhs) {
798 return !(lhs <= rhs);
799 }
800 template<size_t N, typename B, BitEncoding C>
operator >=(const blockfraction<N,B,C> & lhs,const blockfraction<N,B,C> & rhs)801 inline bool operator>=(const blockfraction<N, B, C>& lhs, const blockfraction<N, B, C>& rhs) {
802 return !(lhs < rhs);
803 }
804
805 ///////////////////////////////////////////////////////////////////////////////
806 // binary operators
807
808 template<size_t nbits, typename bt, BitEncoding encoding>
operator <<(const blockfraction<nbits,bt,encoding> & a,const long b)809 inline blockfraction<nbits, bt, encoding> operator<<(const blockfraction<nbits, bt, encoding>& a, const long b) {
810 blockfraction<nbits, bt, encoding> c(a);
811 return c <<= b;
812 }
813 template<size_t nbits, typename bt, BitEncoding encoding>
operator >>(const blockfraction<nbits,bt,encoding> & a,const long b)814 inline blockfraction<nbits, bt, encoding> operator>>(const blockfraction<nbits, bt, encoding>& a, const long b) {
815 blockfraction<nbits, bt, encoding> c(a);
816 return c >>= b;
817 }
818
819 // divide a by b and return both quotient and remainder
820 template<size_t nbits, typename bt, BitEncoding encoding>
longdivision(const blockfraction<nbits,bt,encoding> & _a,const blockfraction<nbits,bt,encoding> & _b)821 bfquorem<nbits, bt, encoding> longdivision(const blockfraction<nbits, bt, encoding>& _a, const blockfraction<nbits, bt, encoding>& _b) {
822 bfquorem<nbits, bt, encoding> result;
823 if (_b.iszero()) {
824 result.exceptionId = 1; // division by zero
825 return result;
826 }
827 #ifdef LATER
828 // generate the absolute values to do long division
829 // 2's complement special case -max requires an signed int that is 1 bit bigger to represent abs()
830 bool a_sign = _a.sign();
831 bool b_sign = _b.sign();
832 bool result_negative = (a_sign ^ b_sign);
833 // normalize both arguments to positive, which requires expansion by 1-bit to deal with maxneg
834 blockfraction<nbits + 1, bt, encoding> a(_a);
835 blockfraction<nbits + 1, bt, encoding> b(_b);
836 if (a_sign) a.twosComplement();
837 if (b_sign) b.twosComplement();
838
839 if (a < b) { // optimization for integer numbers
840 result.rem = _a; // a % b = a when a / b = 0
841 return result; // a / b = 0 when b > a
842 }
843 // initialize the long division
844 blockfraction<nbits + 1, bt, encoding> accumulator = a;
845 // prepare the subtractand
846 blockfraction<nbits + 1, bt, encoding> subtractand = b;
847 int msb_b = b.msb();
848 int msb_a = a.msb();
849 int shift = msb_a - msb_b;
850 subtractand <<= shift;
851 // long division
852 for (int i = shift; i >= 0; --i) {
853 if (subtractand <= accumulator) {
854 accumulator -= subtractand;
855 result.quo.set(static_cast<size_t>(i));
856 }
857 else {
858 result.quo.reset(static_cast<size_t>(i));
859 }
860 subtractand >>= 1;
861 }
862 if (result_negative) { // take 2's complement
863 result.quo.flip();
864 result.quo += 1;
865 }
866 if (_a.isneg()) {
867 result.rem = -accumulator;
868 }
869 else {
870 result.rem = accumulator;
871 }
872 #endif
873 return result;
874 }
875
876 ///////////////////////////////////////////////////////////////////////////////
877 // specialty binary operators
878
879
880 #define TRACE_DIV 0
881 // unrounded division, returns a blockfraction that is of size 2*nbits
882 template<size_t nbits, size_t roundingBits, typename bt, BitEncoding encoding>
urdiv(const blockfraction<nbits,bt,encoding> & a,const blockfraction<nbits,bt,encoding> & b,blockfraction<roundingBits,bt,encoding> & r)883 inline blockfraction<2 * nbits + roundingBits, bt, encoding> urdiv(const blockfraction<nbits, bt, encoding>& a, const blockfraction<nbits, bt, encoding>& b, blockfraction<roundingBits, bt, encoding>& r) {
884 if (b.iszero()) {
885 // division by zero
886 throw "urdiv divide by zero";
887 }
888 // generate the absolute values to do long division
889 // 2's complement special case -max requires an signed int that is 1 bit bigger to represent abs()
890 bool a_sign = a.sign();
891 bool b_sign = b.sign();
892 bool result_negative = (a_sign ^ b_sign);
893
894 // normalize both arguments to positive in new size
895 blockfraction<nbits + 1, bt, encoding> a_new(a); // TODO optimize: now create a, create _a.bb, copy, destroy _a.bb_copy
896 blockfraction<nbits + 1, bt, encoding> b_new(b);
897 if (a_sign) a_new.twoscomplement();
898 if (b_sign) b_new.twoscomplement();
899
900 // initialize the long division
901 blockfraction<2 * nbits + roundingBits, bt, encoding> decimator(a_new);
902 blockfraction<2 * nbits + roundingBits, bt, encoding> subtractand(b_new); // prepare the subtractand
903 blockfraction<2 * nbits + roundingBits, bt, encoding> result;
904
905 int msp = nbits + roundingBits - 1; // msp = most significant position
906 decimator <<= msp; // scale the decimator to the largest possible positive value
907
908 int msb_b = subtractand.msb();
909 int msb_a = decimator.msb();
910 int shift = msb_a - msb_b;
911 int scale = shift - msp; // scale of the result quotient
912 subtractand <<= shift;
913
914 #if TRACE_DIV
915 std::cout << " " << to_binary(decimator) << std::endl;
916 std::cout << "- " << to_binary(subtractand) << " shift: " << shift << std::endl;
917 #endif
918 // long division
919 for (int i = msb_a; i >= 0; --i) {
920
921 if (subtractand <= decimator) {
922 decimator -= subtractand;
923 result.set(static_cast<size_t>(i));
924 }
925 else {
926 result.reset(static_cast<size_t>(i));
927 }
928 subtractand >>= 1;
929
930 #if TRACE_DIV
931 std::cout << " " << to_binary(decimator) << ' ' << to_binary(result) << std::endl;
932 std::cout << "- " << to_binary(subtractand) << std::endl;
933 #endif
934 }
935 result <<= scale;
936 if (result_negative) result.twosComplement();
937 r.assign(result); // copy the lowest bits which represent the bits on which we need to apply the rounding test
938 return result;
939 }
940
941 // free function generator of the 2's complement of a blockfraction
942 template<size_t nbits, typename bt, BitEncoding encoding>
twosComplement(const blockfraction<nbits,bt,encoding> & a)943 inline constexpr blockfraction<nbits, bt, encoding> twosComplement(const blockfraction<nbits, bt, encoding>& a) {
944 blockfraction<nbits, bt, encoding> b(a);
945 return b.twosComplement();
946 }
947
948 } // namespace sw::universal
949