1 #pragma once
2 // blockfraction.hpp: parameterized blocked binary number system representing a floating-point fraction including leading 1 bit
3 //
4 // Copyright (C) 2017-2021 Stillwater Supercomputing, Inc.
5 //
6 // This file is part of the universal numbers project, which is released under an MIT Open Source license.
7 #include <iostream>
8 #include <string>
9 #include <sstream>
10 #include <cmath> // for std::pow() used in conversions to native IEEE-754 formats values
11 
12 // should be defined by calling environment, just catching it here just in case it is not
13 #ifndef LONG_DOUBLE_SUPPORT
14 #pragma message("LONG_DOUBLE_SUPPORT is not defined")
15 #define LONG_DOUBLE_SUPPORT 0
16 #endif
17 
18 /*
19    The fraction bits in a floating-point representation benefit from different
20    representations for different operators:
21    - for addition and subtraction, a 2's complement encoding is best,
22    - for multiplication, a simple 1's complement encoding is best
23    - for division
24    - for square root
25    a blockfraction type will be marked by its encoding to enable direct code paths.
26    By encoding it in the type, we won't be able to dynamically go between types,
27    but that is ok as the blockfraction is a composition type that gets used
28    by the ephemeral blocktriple type, which is set up for each floating-point
29    operation, used, and then discarded.
30 
31    The last piece of information we need to manage for blockfractions is where
32    the radix point is. For add/sub it is at a fixed location, nbits - 3, and
33    for multiplication and division is transforms from the input values to the
34    output values. The blockfraction operators, add, sub, mul, div, sqrt manage
35    this radix point transformation. Fundamentally, the actual bits of the
36    blockfraction are used as a binary encoded integer. The encoding interpretation
37    and the placement of the radix point, are directed by the aggregating class,
38    such as blocktriple.
39  */
40 namespace sw::universal {
41 
42 	// Encoding of the BlockFraction
43 	enum class BitEncoding {
44 		Flex,        // placeholder for flexible use cases
45 		Ones,        // 1's complement encoding
46 		Twos         // 2's complement encoding
47 	};
48 
49 // forward references
50 template<size_t nbits, typename bt, BitEncoding encoding> class blockfraction;
51 template<size_t nbits, typename bt, BitEncoding encoding> constexpr blockfraction<nbits, bt, encoding> twosComplement(const blockfraction<nbits, bt, encoding>&);
52 template<size_t nbits, typename bt, BitEncoding encoding> struct bfquorem;
53 template<size_t nbits, typename bt, BitEncoding encoding> bfquorem<nbits, bt, encoding> longdivision(const blockfraction<nbits, bt, encoding>&, const blockfraction<nbits, bt, encoding>&);
54 
55 // idiv_t for blockfraction<nbits> to capture quotient and remainder during long division
56 template<size_t nbits, typename bt, BitEncoding encoding>
57 struct bfquorem {
bfquoremsw::universal::bfquorem58 	bfquorem() {} // default constructors
59 	int exceptionId;
60 	blockfraction<nbits, bt, encoding> quo; // quotient
61 	blockfraction<nbits, bt, encoding> rem; // remainder
62 };
63 
64 /*
65 NOTE 1
66    For block arithmetic, we need to manage a carry bit.
67 This disqualifies using uint64_t as a block type as we can't catch the overflow condition
68 in the same way as the other native types, uint8_t, uint16_t, uint32_t.
69    We could use a sint64_t and then convert to uint64_t and observe the MSB.
70 That requires different logic though.
71 TODO: the highest performance for 32bits and up would be to have a uint64_t base type
72 for which we need asm to get the carry bit logic to work.
73 
74 TODO: are there mechanisms where we can use SIMD for vector operations?
75 If there are, then doing something with more fitting and smaller base types might
76 yield more concurrency and thus more throughput for that ISA.
77 
78 
79 NOTE 2
80 adding two block triples of nbits would yield a result of nbits+1. To implement a
81 fast use of blockfraction storage complicates this relationship.
82 
83 Standardizing the blocktriple add to take two arguments of nbits, and product a result
84 of nbits+1, makes sense in the abstract pipeline as the triple would gain one bit of
85 accuracy. Any subsequent use would need to make a decision whether to round or not.
86 If we go to a quire, we wouldn't round, if we reassign it to a source precision, we would.
87 
88 What is the required API of blockfraction to support that semantic?
89 */
90 
91 
92 
93 /// <summary>
94 /// a block-based floating-point fraction
95 /// for add/sub  in 2's complement of the form  ##h.fffff
96 /// for mul      in sign-magnitude form expanded to 0'00001.fffff
97 /// for div      in sign-magnitude form expanded to 00000'00001'fffff
98 ///
99 /// NOTE: don't set a default blocktype as this makes the integration more brittle
100 /// as blocktriple uses the blockfraction as storage class and needs to interact
101 /// with the client number system, which also is blocked. Using the same blocktype
102 /// simplifies the copying of exponent and fraction bits from and to the client.
103 /// </summary>
104 /// <typeparam name="bt"></typeparam>
105 template<size_t _nbits, typename bt, BitEncoding _encoding>
106 class blockfraction {
107 public:
108 	typedef bt BlockType;
109 	static constexpr size_t nbits = _nbits;
110 	static constexpr BitEncoding encoding = _encoding;
111 	static constexpr size_t bitsInByte = 8;
112 	static constexpr size_t bitsInBlock = sizeof(bt) * bitsInByte;
113 	static_assert(bitsInBlock <= 64, "storage unit for block arithmetic needs to be <= uint64_t");
114 
115 	static constexpr size_t nrBlocks = 1ull + ((nbits - 1ull) / bitsInBlock);
116 	static constexpr uint64_t storageMask = (0xFFFF'FFFF'FFFF'FFFFull >> (64 - bitsInBlock));
117 	static constexpr uint64_t maxRightShift = ((64 - nbits + 3) > 62) ? 63 : (64 - nbits + 3);
118 	static constexpr uint64_t fmask = (64 - nbits + 3) > 63 ? 0ull : (0xFFFF'FFFF'FFFF'FFFFull >> maxRightShift);
119 
120 	static constexpr size_t MSU = nrBlocks - 1; // MSU == Most Significant Unit
121 	static constexpr bt ALL_ONES = bt(~0);
122 	static constexpr bt MSU_MASK = (ALL_ONES >> (nrBlocks * bitsInBlock - nbits));
123 	static constexpr bt OVERFLOW_BIT = ~(MSU_MASK >> 1) & MSU_MASK;
124 
125 	// constructors
blockfraction()126 	constexpr blockfraction() noexcept : radixPoint{ nbits }, _block { 0 } {}
blockfraction(uint64_t raw,int radixPoint)127 	constexpr blockfraction(uint64_t raw, int radixPoint) noexcept : radixPoint{ radixPoint }, _block { 0 } {
128 		if constexpr (1 == nrBlocks) {
129 			_block[0] = static_cast<bt>(storageMask & raw);;
130 		}
131 		else if constexpr (2 == nrBlocks) {
132 			_block[0] = static_cast<bt>(storageMask & raw);
133 			_block[1] = static_cast<bt>(storageMask & (raw >> bitsInBlock));
134 		}
135 		else if constexpr (3 == nrBlocks) {
136 			_block[0] = static_cast<bt>(storageMask & raw);
137 			_block[1] = static_cast<bt>(storageMask & (raw >> bitsInBlock));
138 			_block[2] = static_cast<bt>(storageMask & (raw >> 2*bitsInBlock));
139 		}
140 		else if constexpr (4 == nrBlocks) {
141 			_block[0] = static_cast<bt>(storageMask & raw);
142 			_block[1] = static_cast<bt>(storageMask & (raw >> bitsInBlock));
143 			_block[2] = static_cast<bt>(storageMask & (raw >> 2 * bitsInBlock));
144 			_block[3] = static_cast<bt>(storageMask & (raw >> 3 * bitsInBlock));
145 		}
146 		else {
147 			for (size_t i = 0; i < nrBlocks; ++i) {
148 				_block[i] = static_cast<bt>(storageMask & (raw >> i * bitsInBlock));
149 			}
150 		}
151 	}
152 
153 	blockfraction(const blockfraction&) noexcept = default;
154 	blockfraction(blockfraction&&) noexcept = default;
155 
156 	blockfraction& operator=(const blockfraction&) noexcept = default;
157 	blockfraction& operator=(blockfraction&&) noexcept = default;
158 
159 #ifdef NEVER
160 	// disable the ability to copy different blockfractions to catch any
161 	// unintended (implicit) copies when working with blockfractions.
162 	// For performance, the blockfraction must be used in-place.
163 	// Typical design, allocates a blocktriple on the stack, and subsequently
164 	// uses in add/sub/mul/div/sqrt will directly access the encapsulated blockfraction.
165 
166 	/// construct a blockfraction from another: bt must be the same
167 	template<size_t nnbits>
blockfraction(const blockfraction<nnbits,bt> & rhs)168 	blockfraction(const blockfraction<nnbits, bt>& rhs) {
169 		this->assign(rhs);
170 	}
171 
172 	// blockfraction cannot have decorated constructors or assignment
173 	// as blockfraction does not have all the information to interpret a value
174 	// So by design, the class interface does not interact with values
blockfraction(long long initial_value)175 	constexpr blockfraction(long long initial_value) noexcept : _block{ 0 } { *this = initial_value; }
176 
operator =(long long rhs)177 	constexpr blockfraction& operator=(long long rhs) noexcept {
178 		if constexpr (1 < nrBlocks) {
179 			for (unsigned i = 0; i < nrBlocks; ++i) {
180 				_block[i] = rhs & storageMask;
181 				rhs >>= bitsInBlock;
182 			}
183 			// enforce precondition for fast comparison by properly nulling bits that are outside of nbits
184 			_block[MSU] &= MSU_MASK;
185 		}
186 		else if constexpr (1 == nrBlocks) {
187 			_block[0] = rhs & storageMask;
188 			// enforce precondition for fast comparison by properly nulling bits that are outside of nbits
189 			_block[MSU] &= MSU_MASK;
190 		}
191 		return *this;
192 	}
193 #endif
194 
195 	/// conversion operators
operator float() const196 	explicit operator float() const              { return float(to_float()); }
operator double() const197 	explicit operator double() const             { return double(to_double()); }
198 #if LONG_DOUBLE_SUPPORT
operator long double() const199 	explicit operator long double() const        { return (long double)to_long_double(); }
200 #endif
201 
202 	/// prefix operators
203 	//
204 	//
205 	// one's complement
operator ~() const206 	blockfraction operator~() const {
207 		blockfraction complement(*this);
208 		complement.flip();
209 		return complement;
210 	}
211 
212 	/// logic operators
213 	// none
214 
215 	/// arithmetic operators
216 	// none
217 
increment()218 	void increment() {
219 		bool carry = true;
220 		for (unsigned i = 0; i < nrBlocks; ++i) {
221 			// cast up so we can test for overflow
222 			uint64_t l = uint64_t(_block[i]);
223 			uint64_t s = l + (carry ? uint64_t(1) : uint64_t(0));
224 			carry = (s > ALL_ONES);
225 			_block[i] = bt(s);
226 		}
227 		// enforce precondition for fast comparison by properly nulling bits that are outside of nbits
228 		_block[MSU] &= MSU_MASK;
229 	}
230 	/// <summary>
231 	/// add two fractions of the form 00h.fffff, that is, radix point at nbits-3
232 	/// In this encoding, all valid values are encapsulated
233 	/// </summary>
234 	/// <param name="lhs">nbits of fraction in the form 00h.ffff</param>
235 	/// <param name="rhs">nbits of fraction in the form 00h.ffff</param>
add(const blockfraction & lhs,const blockfraction & rhs)236 	void add(const blockfraction& lhs, const blockfraction& rhs) {
237 		bool carry = false;
238 		for (unsigned i = 0; i < nrBlocks; ++i) {
239 			// cast up so we can test for overflow
240 			uint64_t l = uint64_t(lhs._block[i]);
241 			uint64_t r = uint64_t(rhs._block[i]);
242 			uint64_t s = l + r + (carry ? uint64_t(1) : uint64_t(0));
243 			carry = (s > ALL_ONES);
244 			_block[i] = bt(s);
245 		}
246 		// enforce precondition for fast comparison by properly nulling bits that are outside of nbits
247 		_block[MSU] &= MSU_MASK;
248 	}
sub(const blockfraction & lhs,blockfraction & rhs)249 	void sub(const blockfraction& lhs, blockfraction& rhs) {
250 		add(lhs, rhs.twosComplement());
251 	}
mul(const blockfraction & lhs,const blockfraction & rhs)252 	void mul(const blockfraction& lhs, const blockfraction& rhs) {
253 		blockfraction<nbits, bt, encoding> base(lhs);
254 		blockfraction<nbits, bt, encoding> multiplicant(rhs);
255 		clear();
256 		for (size_t i = 0; i < nbits; ++i) {
257 			if (base.at(i)) {
258 				add(*this, multiplicant);
259 			}
260 			multiplicant <<= 1;
261 		}
262 		// since we used operator+=, which enforces the nulling of leading bits
263 		// we don't need to null here
264 	}
div(const blockfraction & lhs,const blockfraction & rhs)265 	void div(const blockfraction& lhs, const blockfraction& rhs) {
266 		bfquorem<nbits, bt, encoding> result = longdivision(*this, rhs);
267 		*this = result.quo;
268 	}
269 
270 #ifdef FRACTION_REMAINDER
271 	// remainder operator
operator %=(const blockfraction & rhs)272 	blockfraction& operator%=(const blockfraction& rhs) {
273 		bfquorem<nbits, bt, encoding> result = longdivision(*this, rhs);
274 		*this = result.rem;
275 		return *this;
276 	}
277 #endif
278 
279 	// shift left operator
operator <<=(int bitsToShift)280 	blockfraction& operator<<=(int bitsToShift) {
281 		if (bitsToShift == 0) return *this;
282 		if (bitsToShift < 0) return operator>>=(-bitsToShift);
283 		if (bitsToShift > long(nbits)) bitsToShift = nbits; // clip to max
284 		if (bitsToShift >= long(bitsInBlock)) {
285 			int blockShift = bitsToShift / static_cast<int>(bitsInBlock);
286 			for (int i = static_cast<int>(MSU); i >= blockShift; --i) {
287 				_block[i] = _block[i - blockShift];
288 			}
289 			for (int i = blockShift - 1; i >= 0; --i) {
290 				_block[i] = bt(0);
291 			}
292 			// adjust the shift
293 			bitsToShift -= static_cast<int>(blockShift * bitsInBlock);
294 			if (bitsToShift == 0) return *this;
295 		}
296 		if constexpr (MSU > 0) {
297 			// construct the mask for the upper bits in the block that need to move to the higher word
298 			bt mask = 0xFFFFFFFFFFFFFFFF << (bitsInBlock - bitsToShift);
299 			for (size_t i = MSU; i > 0; --i) {
300 				_block[i] <<= bitsToShift;
301 				// mix in the bits from the right
302 				bt bits = bt(mask & _block[i - 1]);
303 				_block[i] |= (bits >> (bitsInBlock - bitsToShift));
304 			}
305 		}
306 		_block[0] <<= bitsToShift;
307 		return *this;
308 	}
309 
310 	// shift right operator
operator >>=(int bitsToShift)311 	blockfraction& operator>>=(int bitsToShift) {
312 		if (bitsToShift == 0) return *this;
313 		if (bitsToShift < 0) return operator<<=(-bitsToShift);
314 		if (bitsToShift >= static_cast<int>(nbits)) {
315 			setzero();
316 			return *this;
317 		}
318 
319 		size_t blockShift = 0;
320 		if (bitsToShift >= static_cast<int>(bitsInBlock)) {
321 			blockShift = bitsToShift / bitsInBlock;
322 			if (MSU >= blockShift) {
323 				// shift by blocks
324 				for (size_t i = 0; i <= MSU - blockShift; ++i) {
325 					_block[i] = _block[i + blockShift];
326 				}
327 			}
328 			// adjust the shift
329 			bitsToShift -= static_cast<int>(blockShift * bitsInBlock);
330 			if (bitsToShift == 0) {
331 				// clean up the blocks we have shifted clean
332 				bitsToShift += static_cast<int>(blockShift * bitsInBlock);
333 				for (size_t i = nbits - bitsToShift; i < nbits; ++i) {
334 					this->setbit(i, false); // reset
335 				}
336 
337 				return *this;
338 			}
339 		}
340 		if constexpr (MSU > 0) {
341 			bt mask = ALL_ONES;
342 			mask >>= (bitsInBlock - bitsToShift); // this is a mask for the lower bits in the block that need to move to the lower word
343 			for (size_t i = 0; i < MSU; ++i) {  // TODO: can this be improved? we should not have to work on the upper blocks in case we block shifted
344 				_block[i] >>= bitsToShift;
345 				// mix in the bits from the left
346 				bt bits = bt(mask & _block[i + 1]);
347 				_block[i] |= (bits << (bitsInBlock - bitsToShift));
348 			}
349 		}
350 		_block[MSU] >>= bitsToShift;
351 
352 		// clean up the blocks we have shifted clean
353 		bitsToShift += static_cast<int>(blockShift * bitsInBlock);
354 		for (size_t i = nbits - bitsToShift; i < nbits; ++i) {
355 			this->setbit(i, false); // reset
356 		}
357 
358 		// enforce precondition for fast comparison by properly nulling bits that are outside of nbits
359 		_block[MSU] &= MSU_MASK;
360 		return *this;
361 	}
362 
363 	// modifiers
364 	 // clear a block binary number
clear()365 	inline constexpr void clear() noexcept {
366 		if constexpr (1 == nrBlocks) {
367 			_block[0] = 0;
368 		}
369 		else if constexpr (2 == nrBlocks) {
370 			_block[0] = 0;
371 			_block[1] = 0;
372 		}
373 		else if constexpr (3 == nrBlocks) {
374 			_block[0] = 0;
375 			_block[1] = 0;
376 			_block[2] = 0;
377 		}
378 		else if constexpr (4 == nrBlocks) {
379 			_block[0] = 0;
380 			_block[1] = 0;
381 			_block[2] = 0;
382 			_block[3] = 0;
383 		}
384 		else if constexpr (5 == nrBlocks) {
385 			_block[0] = 0;
386 			_block[1] = 0;
387 			_block[2] = 0;
388 			_block[3] = 0;
389 			_block[4] = 0;
390 		}
391 		else if constexpr (6 == nrBlocks) {
392 			_block[0] = 0;
393 			_block[1] = 0;
394 			_block[2] = 0;
395 			_block[3] = 0;
396 			_block[4] = 0;
397 			_block[5] = 0;
398 		}
399 		else if constexpr (7 == nrBlocks) {
400 			_block[0] = 0;
401 			_block[1] = 0;
402 			_block[2] = 0;
403 			_block[3] = 0;
404 			_block[4] = 0;
405 			_block[5] = 0;
406 			_block[6] = 0;
407 		}
408 		else if constexpr (8 == nrBlocks) {
409 			_block[0] = 0;
410 			_block[1] = 0;
411 			_block[2] = 0;
412 			_block[3] = 0;
413 			_block[4] = 0;
414 			_block[5] = 0;
415 			_block[6] = 0;
416 			_block[7] = 0;
417 		}
418 		else {
419 			for (size_t i = 0; i < nrBlocks; ++i) {
420 				_block[i] = static_cast<bt>(0ull);
421 			}
422 		}
423 	}
setzero()424 	inline constexpr void setzero() noexcept { clear(); }
setradix(int radix)425 	inline constexpr void setradix(int radix) { radixPoint = radix; }
setbit(size_t i,bool v=true)426 	inline constexpr void setbit(size_t i, bool v = true) noexcept {
427 		if (i < nbits) {
428 			bt block = _block[i / bitsInBlock];
429 			bt null = ~(1ull << (i % bitsInBlock));
430 			bt bit = bt(v ? 1 : 0);
431 			bt mask = bt(bit << (i % bitsInBlock));
432 			_block[i / bitsInBlock] = bt((block & null) | mask);
433 		}
434 		// when i is out of bounds, fail silently as no-op
435 	}
setblock(size_t b,const bt & block)436 	inline constexpr void setblock(size_t b, const bt& block) noexcept {
437 		if (b < nrBlocks) _block[b] = block;
438 		// when b is out of bounds, fail silently as no-op
439 	}
setbits(uint64_t value)440 	inline constexpr void setbits(uint64_t value) noexcept {
441 		// radixPoint needs to be set, either using the constructor or the setradix() function
442 		if constexpr (1 == nrBlocks) {
443 			_block[0] = value & storageMask;
444 		}
445 		else if constexpr (1 < nrBlocks) {
446 			if constexpr (bitsInBlock == 64) {
447 				// just set the highest bits with the value provided
448 				_block[MSU] = value;
449 			}
450 			else {
451 				for (size_t i = 0; i < nrBlocks; ++i) {
452 					_block[i] = value & storageMask;
453 					value >>= bitsInBlock;
454 				}
455 			}
456 		}
457 		_block[MSU] &= MSU_MASK; // enforce precondition for fast comparison by properly nulling bits that are outside of nbits
458 	}
flip()459 	inline constexpr blockfraction& flip() noexcept { // in-place one's complement
460 		for (size_t i = 0; i < nrBlocks; ++i) {
461 			_block[i] = bt(~_block[i]);
462 		}
463 		_block[MSU] &= MSU_MASK; // assert precondition of properly nulled leading non-bits
464 		return *this;
465 	}
466 	// in-place 2's complement
twosComplement()467 	inline constexpr blockfraction& twosComplement() noexcept {
468 		blockfraction<nbits, bt, encoding> plusOne;
469 		plusOne.setbit(0);
470 		flip();
471 		add(*this, plusOne);
472 		return *this;
473 	}
474 
475 	// selectors
iszero() const476 	inline constexpr bool iszero() const noexcept {
477 		for (size_t i = 0; i < nrBlocks; ++i) if (_block[i] != 0) return false;
478 		return true;
479 	}
radix() const480 	inline constexpr int  radix() const { return radixPoint; }
isodd() const481 	inline constexpr bool isodd() const noexcept { return _block[0] & 0x1;	}
iseven() const482 	inline constexpr bool iseven() const noexcept { return !isodd(); }
sign() const483 	inline constexpr bool sign() const { return false; } // dummy to unify the API with other number systems in Universal
test(size_t bitIndex) const484 	inline constexpr bool test(size_t bitIndex) const noexcept { return at(bitIndex); }
at(size_t bitIndex) const485 	inline constexpr bool at(size_t bitIndex) const noexcept {
486 		if (bitIndex >= nbits) return false;
487 		bt word = _block[bitIndex / bitsInBlock];
488 		bt mask = bt(1ull << (bitIndex % bitsInBlock));
489 		return (word & mask);
490 	}
491 	// check carry bit in output of the ALU
checkCarry() const492 	inline constexpr bool checkCarry() const noexcept { return at(nbits - 2); }
493 	// helpers
nibble(size_t n) const494 	inline constexpr uint8_t nibble(size_t n) const {
495 		if (n < (1 + ((nbits - 1) >> 2))) {
496 			bt word = _block[(n * 4) / bitsInBlock];
497 			size_t nibbleIndexInWord = n % (bitsInBlock >> 2);
498 			bt mask = static_cast<bt>(0x0Fu << (nibbleIndexInWord*4));
499 			bt nibblebits = static_cast<bt>(mask & word);
500 			return static_cast<uint8_t>(nibblebits >> static_cast<bt>(nibbleIndexInWord*4));
501 		}
502 		throw "nibble index out of bounds";
503 	}
block(size_t b) const504 	inline constexpr bt block(size_t b) const noexcept {
505 		if (b >= nrBlocks) return bt{ 0 };
506 		return _block[b];
507 	}
fraction_ull() const508 	inline constexpr uint64_t fraction_ull() const noexcept {
509 		uint64_t raw = get_ull();
510 		// remove the non-fraction bits
511 		uint64_t fractionBits = (0xFFFF'FFFF'FFFF'FFFFull >> (64 - radixPoint));
512 		raw &= fractionBits;
513 		return raw;
514 	}
get_ull() const515 	inline constexpr uint64_t get_ull() const noexcept {
516 		uint64_t raw{ 0 };
517 		if constexpr (bitsInBlock < 64) {
518 			if constexpr (1 == nrBlocks) {
519 				raw = _block[MSU];
520 				raw &= MSU_MASK;
521 			}
522 			else if constexpr (2 == nrBlocks) {
523 				raw = _block[MSU];
524 				raw &= MSU_MASK;
525 				raw <<= bitsInBlock;
526 				raw |= _block[0];
527 			}
528 			else if constexpr (3 == nrBlocks) {
529 				raw = _block[MSU];
530 				raw &= MSU_MASK;
531 				raw <<= bitsInBlock;
532 				raw |= _block[1];
533 				raw <<= bitsInBlock;
534 				raw |= _block[0];
535 			}
536 			else if constexpr (4 == nrBlocks) {
537 				raw = _block[MSU];
538 				raw &= MSU_MASK;
539 				raw <<= bitsInBlock;
540 				raw |= _block[2];
541 				raw <<= bitsInBlock;
542 				raw |= _block[1];
543 				raw <<= bitsInBlock;
544 				raw |= _block[0];
545 			}
546 			else {
547 				raw = _block[MSU];
548 				raw &= MSU_MASK;
549 				for (int i = MSU - 1; i >= 0; --i) {
550 					raw <<= bitsInBlock;
551 					raw |= _block[i];
552 				}
553 			}
554 		}
555 		else { // take top 64bits and ignore the rest
556 			raw = _block[MSU];
557 			raw &= MSU_MASK;
558 		}
559 		return raw;
560 	}
561 #ifdef DEPRECATED
562 	// copy a value over from one blockfraction to this blockfraction
563 	// blockfraction is a 2's complement encoding, so we sign-extend by default
564 	template<size_t srcbits>
assign(const blockfraction<srcbits,bt> & rhs)565 	inline blockfraction<nbits, bt>& assign(const blockfraction<srcbits, bt>& rhs) {
566 		clear();
567 		// since bt is the same, we can directly copy the blocks in
568 		size_t minNrBlocks = (this->nrBlocks < rhs.nrBlocks) ? this->nrBlocks : rhs.nrBlocks;
569 		for (size_t i = 0; i < minNrBlocks; ++i) {
570 			_block[i] = rhs.block(i);
571 		}
572 		if constexpr (nbits > srcbits) { // check if we need to sign extend
573 			if (rhs.sign()) {
574 				for (size_t i = srcbits; i < nbits; ++i) { // TODO: replace bit-oriented sequence with block
575 					setbit(i);
576 				}
577 			}
578 		}
579 		// enforce precondition for fast comparison by properly nulling bits that are outside of nbits
580 		_block[MSU] &= MSU_MASK;
581 		return *this;
582 	}
583 
584 	// copy a value over from one blockfraction to this without sign-extending the value
585 	// blockfraction is a 2's complement encoding, so we sign-extend by default
586 	// for fraction/significent encodings, we need to turn off sign-extending.
587 	template<size_t srcbits>
assignWithoutSignExtend(const blockfraction<srcbits,bt> & rhs)588 	inline blockfraction<nbits, bt>& assignWithoutSignExtend(const blockfraction<srcbits, bt>& rhs) {
589 		clear();
590 		// since bt is the same, we can simply copy the blocks in
591 		size_t minNrBlocks = (this->nrBlocks < rhs.nrBlocks) ? this->nrBlocks : rhs.nrBlocks;
592 		for (size_t i = 0; i < minNrBlocks; ++i) {
593 			_block[i] = rhs.block(i);
594 		}
595 		// enforce precondition for fast comparison by properly nulling bits that are outside of nbits
596 		_block[MSU] &= MSU_MASK;
597 		return *this;
598 	}
599 #endif
600 	// return the position of the most significant bit, -1 if v == 0
msb() const601 	inline int msb() const noexcept {
602 		for (int i = int(MSU); i >= 0; --i) {
603 			if (_block[i] != 0) {
604 				bt mask = (bt(1u) << (bitsInBlock-1));
605 				for (int j = bitsInBlock - 1; j >= 0; --j) {
606 					if (_block[i] & mask) {
607 						return i * static_cast<int>(bitsInBlock) + j;
608 					}
609 					mask >>= 1;
610 				}
611 			}
612 		}
613 		return -1; // no significant bit found, all bits are zero
614 	}
615 
616 	// conversion to native types
to_float() const617 	inline constexpr float to_float() const noexcept {
618 		return float(to_double());
619 	}
to_double() const620 	inline constexpr double to_double() const noexcept {
621 		double d{ 0.0 };
622 		blockfraction<nbits, bt, encoding> tmp(*this);
623 		int bit = static_cast<int>(nbits - 1);
624 		int shift = static_cast<int>(nbits - 1 - radixPoint);
625 
626 		// special case preprocessing for 2's complement encodings
627 		if constexpr (encoding == BitEncoding::Twos) {
628 			// nbits in the target form 00h.fffff, check msb and if set take 2's complement
629 			if (test(static_cast<size_t>(bit--))) tmp.twosComplement();
630 			--shift; // and remove the MSB from the value computation
631 		}
632 
633 		// process the value above the radix
634 		size_t bitValue = 1ull << shift;
635 		for (; bit >= radixPoint; --bit) {
636 			if (tmp.test(static_cast<size_t>(bit))) d += static_cast<double>(bitValue);
637 			bitValue >>= 1;
638 		}
639 		// process the value below the radix
640 		double v = std::pow(2.0, -double(radixPoint));
641 		for (size_t fbit = 0; fbit < static_cast<size_t>(radixPoint); ++fbit) {
642 			if (tmp.test(fbit)) d += v;
643 			v *= 2.0;
644 		}
645 
646 //		if constexpr (nbits > 49) { // check if we can represent this value with a native normal double with 52 fraction bits => nbits <= (52 - 3)
647 //			std::cerr << "to_double() will yield inaccurate result since blockfraction has more precision than native IEEE-754 double\n";
648 //		}
649 
650 		return d;
651 	}
to_long_double() const652 	inline constexpr long double to_long_double() const noexcept {
653 		return (long double)to_double();
654 	}
655 
656 	// determine the rounding mode: up if true, down if false
657 	// fraction bits:    bbbbbbbbbbb
658 	// target LSB   :         |
659 	// guard        :          |
660 	// round        :           |
661 	// sticky       :            ---
roundingMode(size_t targetLsb) const662 	bool roundingMode(size_t targetLsb) const {
663 		bool lsb    = at(targetLsb);
664 		bool guard  = (targetLsb == 0 ? false : at(targetLsb - 1));
665 		bool round  = (targetLsb <= 1 ? false : at(targetLsb - 2));
666 		bool sticky = (targetLsb <= 2 ? false : any(targetLsb - 3));
667 		bool tie = guard && !round && !sticky;
668 		return (lsb && tie) || (guard && !tie);
669 	}
any(size_t msb) const670 	bool any(size_t msb) const {
671 		msb = (msb > nbits - 1 ? nbits - 1 : msb);
672 		size_t topBlock = msb / bitsInBlock;
673 		bt mask = bt(ALL_ONES >> (bitsInBlock - 1 - (msb % bitsInBlock)));
674 		for (size_t i = 0; i < topBlock; ++i) {
675 			if (_block[i] > 0) return true;
676 		}
677 		// process the partial block
678 		if (_block[topBlock] & mask) return true;
679 		return false;
680 	}
681 
682 protected:
683 	// HELPER methods
684 	// none
685 
686 public:
687 	int radixPoint;
688 	bt _block[nrBlocks];
689 
690 private:
691 	//////////////////////////////////////////////////////////////////////////////
692 	// friend functions
693 
694 	// integer - integer logic comparisons
695 	template<size_t N, typename B, BitEncoding C>
696 	friend bool operator==(const blockfraction<N,B,C>& lhs, const blockfraction<N, B, C>& rhs);
697 	template<size_t N, typename B, BitEncoding C>
698 	friend bool operator!=(const blockfraction<N, B, C>& lhs, const blockfraction<N, B, C>& rhs);
699 	// the other logic operators are defined in terms of arithmetic terms
700 
701 	template<size_t N, typename B, BitEncoding C>
702 	friend std::ostream& operator<<(std::ostream& ostr, const blockfraction<N, B, C>& v);
703 };
704 
705 //////////////////////////////////////////////////////////////////////////////////
706 // stream operators
707 
708 // ostream operator
709 template<size_t nbits, typename bt, BitEncoding encoding>
operator <<(std::ostream & ostr,const blockfraction<nbits,bt,encoding> & number)710 std::ostream& operator<<(std::ostream& ostr, const blockfraction<nbits, bt, encoding>& number) {
711 	return ostr << double(number);
712 }
713 
714 //////////////////////////////////////////////////////////////////////////////
715 // conversions to string representations
716 
717 // create a binary representation of the blockfraction: 00h.ffff
718 // by design, the radix point is at nbits-3
719 template<size_t nbits, typename bt, BitEncoding encoding>
to_binary(const blockfraction<nbits,bt,encoding> & number,bool nibbleMarker=false)720 std::string to_binary(const blockfraction<nbits, bt, encoding>& number, bool nibbleMarker = false) {
721 	std::stringstream s;
722 	s << "0b";
723 #ifdef DEPRECATED
724 	int i = nbits - 1;
725 	s << (number.at(size_t(i--)) ? '1' : '0'); // sign indicator of 2's complement
726 	s << (number.at(size_t(i--)) ? '1' : '0'); // overflow indicator to trigger right shift
727 	s << (number.at(size_t(i--)) ? '1' : '0'); // the hidden bit
728 	s << '.';
729 	for (; i >= 0; --i) {
730 		s << (number.at(size_t(i)) ? '1' : '0');
731 		if (i > 0 && (i % 4) == 0 && nibbleMarker) s << '\'';
732 	}
733 #endif
734 	for (int i = nbits - 1; i >= 0; --i) {
735 		s << (number.at(size_t(i)) ? '1' : '0');
736 		if (i == number.radix()) {
737 			s << '.';
738 		}
739 		else {
740 			if (i > 0 && (i % 4) == 0 && nibbleMarker) s << '\'';
741 		}
742 	}
743 	return s.str();
744 }
745 
746 // local helper to display the contents of a byte array
747 template<size_t nbits, typename bt, BitEncoding encoding>
to_hex(const blockfraction<nbits,bt,encoding> & number,bool wordMarker=true)748 std::string to_hex(const blockfraction<nbits, bt, encoding>& number, bool wordMarker = true) {
749 	static constexpr size_t bitsInByte = 8;
750 	static constexpr size_t bitsInBlock = sizeof(bt) * bitsInByte;
751 	char hexChar[16] = {
752 		'0', '1', '2', '3', '4', '5', '6', '7',
753 		'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
754 	};
755 	std::stringstream ss;
756 	ss << "0x" << std::hex;
757 	int nrNibbles = int(1 + ((nbits - 1) >> 2));
758 	for (int n = nrNibbles - 1; n >= 0; --n) {
759 		uint8_t nibble = number.nibble(static_cast<size_t>(n));
760 		ss << hexChar[nibble];
761 		if (wordMarker && n > 0 && ((n * 4ll) % bitsInBlock) == 0) ss << '\'';
762 	}
763 	return ss.str();
764 }
765 
766 //////////////////////////////////////////////////////////////////////////////////
767 // logic operators
768 
769 template<size_t N, typename B, BitEncoding C>
operator ==(const blockfraction<N,B,C> & lhs,const blockfraction<N,B,C> & rhs)770 inline bool operator==(const blockfraction<N, B, C>& lhs, const blockfraction<N, B, C>& rhs) {
771 	for (size_t i = 0; i < lhs.nrBlocks; ++i) {
772 		if (lhs._block[i] != rhs._block[i]) {
773 			return false;
774 		}
775 	}
776 	return true;
777 }
778 template<size_t N, typename B, BitEncoding C>
operator !=(const blockfraction<N,B,C> & lhs,const blockfraction<N,B,C> & rhs)779 inline bool operator!=(const blockfraction<N, B, C>& lhs, const blockfraction<N, B, C>& rhs) {
780 	return !operator==(lhs, rhs);
781 }
782 template<size_t N, typename B, BitEncoding C>
operator <(const blockfraction<N,B,C> & lhs,const blockfraction<N,B,C> & rhs)783 inline bool operator<(const blockfraction<N, B, C>& lhs, const blockfraction<N, B, C>& rhs) {
784 	if (lhs.ispos() && rhs.isneg()) return false; // need to filter out possible overflow conditions
785 	if (lhs.isneg() && rhs.ispos()) return true;  // need to filter out possible underflow conditions
786 	if (lhs == rhs) return false; // so the maxneg logic works
787 	blockfraction<N, B, C> mneg; maxneg<N, B>(mneg);
788 	if (rhs == mneg) return false; // special case: nothing is smaller than maximum negative
789 	blockfraction<N, B, C> diff = lhs - rhs;
790 	return diff.isneg();
791 }
792 template<size_t N, typename B, BitEncoding C>
operator <=(const blockfraction<N,B,C> & lhs,const blockfraction<N,B,C> & rhs)793 inline bool operator<=(const blockfraction<N, B, C>& lhs, const blockfraction<N, B, C>& rhs) {
794 	return (lhs < rhs || lhs == rhs);
795 }
796 template<size_t N, typename B, BitEncoding C>
operator >(const blockfraction<N,B,C> & lhs,const blockfraction<N,B,C> & rhs)797 inline bool operator>(const blockfraction<N, B, C>& lhs, const blockfraction<N, B, C>& rhs) {
798 	return !(lhs <= rhs);
799 }
800 template<size_t N, typename B, BitEncoding C>
operator >=(const blockfraction<N,B,C> & lhs,const blockfraction<N,B,C> & rhs)801 inline bool operator>=(const blockfraction<N, B, C>& lhs, const blockfraction<N, B, C>& rhs) {
802 	return !(lhs < rhs);
803 }
804 
805 ///////////////////////////////////////////////////////////////////////////////
806 // binary operators
807 
808 template<size_t nbits, typename bt, BitEncoding encoding>
operator <<(const blockfraction<nbits,bt,encoding> & a,const long b)809 inline blockfraction<nbits, bt, encoding> operator<<(const blockfraction<nbits, bt, encoding>& a, const long b) {
810 	blockfraction<nbits, bt, encoding> c(a);
811 	return c <<= b;
812 }
813 template<size_t nbits, typename bt, BitEncoding encoding>
operator >>(const blockfraction<nbits,bt,encoding> & a,const long b)814 inline blockfraction<nbits, bt, encoding> operator>>(const blockfraction<nbits, bt, encoding>& a, const long b) {
815 	blockfraction<nbits, bt, encoding> c(a);
816 	return c >>= b;
817 }
818 
819 // divide a by b and return both quotient and remainder
820 template<size_t nbits, typename bt, BitEncoding encoding>
longdivision(const blockfraction<nbits,bt,encoding> & _a,const blockfraction<nbits,bt,encoding> & _b)821 bfquorem<nbits, bt, encoding> longdivision(const blockfraction<nbits, bt, encoding>& _a, const blockfraction<nbits, bt, encoding>& _b) {
822 	bfquorem<nbits, bt, encoding> result;
823 	if (_b.iszero()) {
824 		result.exceptionId = 1; // division by zero
825 		return result;
826 	}
827 #ifdef LATER
828 	// generate the absolute values to do long division
829 	// 2's complement special case -max requires an signed int that is 1 bit bigger to represent abs()
830 	bool a_sign = _a.sign();
831 	bool b_sign = _b.sign();
832 	bool result_negative = (a_sign ^ b_sign);
833 	// normalize both arguments to positive, which requires expansion by 1-bit to deal with maxneg
834 	blockfraction<nbits + 1, bt, encoding> a(_a);
835 	blockfraction<nbits + 1, bt, encoding> b(_b);
836 	if (a_sign) a.twosComplement();
837 	if (b_sign) b.twosComplement();
838 
839 	if (a < b) { // optimization for integer numbers
840 		result.rem = _a; // a % b = a when a / b = 0
841 		return result;   // a / b = 0 when b > a
842 	}
843 	// initialize the long division
844 	blockfraction<nbits + 1, bt, encoding> accumulator = a;
845 	// prepare the subtractand
846 	blockfraction<nbits + 1, bt, encoding> subtractand = b;
847 	int msb_b = b.msb();
848 	int msb_a = a.msb();
849 	int shift = msb_a - msb_b;
850 	subtractand <<= shift;
851 	// long division
852 	for (int i = shift; i >= 0; --i) {
853 		if (subtractand <= accumulator) {
854 			accumulator -= subtractand;
855 			result.quo.set(static_cast<size_t>(i));
856 		}
857 		else {
858 			result.quo.reset(static_cast<size_t>(i));
859 		}
860 		subtractand >>= 1;
861 	}
862 	if (result_negative) {  // take 2's complement
863 		result.quo.flip();
864 		result.quo += 1;
865 	}
866 	if (_a.isneg()) {
867 		result.rem = -accumulator;
868 	}
869 	else {
870 		result.rem = accumulator;
871 	}
872 #endif
873 	return result;
874 }
875 
876 ///////////////////////////////////////////////////////////////////////////////
877 // specialty binary operators
878 
879 
880 #define TRACE_DIV 0
881 // unrounded division, returns a blockfraction that is of size 2*nbits
882 template<size_t nbits, size_t roundingBits, typename bt, BitEncoding encoding>
urdiv(const blockfraction<nbits,bt,encoding> & a,const blockfraction<nbits,bt,encoding> & b,blockfraction<roundingBits,bt,encoding> & r)883 inline blockfraction<2 * nbits + roundingBits, bt, encoding> urdiv(const blockfraction<nbits, bt, encoding>& a, const blockfraction<nbits, bt, encoding>& b, blockfraction<roundingBits, bt, encoding>& r) {
884 	if (b.iszero()) {
885 		// division by zero
886 		throw "urdiv divide by zero";
887 	}
888 	// generate the absolute values to do long division
889 	// 2's complement special case -max requires an signed int that is 1 bit bigger to represent abs()
890 	bool a_sign = a.sign();
891 	bool b_sign = b.sign();
892 	bool result_negative = (a_sign ^ b_sign);
893 
894 	// normalize both arguments to positive in new size
895 	blockfraction<nbits + 1, bt, encoding> a_new(a); // TODO optimize: now create a, create _a.bb, copy, destroy _a.bb_copy
896 	blockfraction<nbits + 1, bt, encoding> b_new(b);
897 	if (a_sign) a_new.twoscomplement();
898 	if (b_sign) b_new.twoscomplement();
899 
900 	// initialize the long division
901 	blockfraction<2 * nbits + roundingBits, bt, encoding> decimator(a_new);
902 	blockfraction<2 * nbits + roundingBits, bt, encoding> subtractand(b_new); // prepare the subtractand
903 	blockfraction<2 * nbits + roundingBits, bt, encoding> result;
904 
905 	int msp = nbits + roundingBits - 1; // msp = most significant position
906 	decimator <<= msp; // scale the decimator to the largest possible positive value
907 
908 	int msb_b = subtractand.msb();
909 	int msb_a = decimator.msb();
910 	int shift = msb_a - msb_b;
911 	int scale = shift - msp;   // scale of the result quotient
912 	subtractand <<= shift;
913 
914 #if TRACE_DIV
915 	std::cout << "  " << to_binary(decimator) << std::endl;
916 	std::cout << "- " << to_binary(subtractand) << " shift: " << shift << std::endl;
917 #endif
918 	// long division
919 	for (int i = msb_a; i >= 0; --i) {
920 
921 		if (subtractand <= decimator) {
922 			decimator -= subtractand;
923 			result.set(static_cast<size_t>(i));
924 		}
925 		else {
926 			result.reset(static_cast<size_t>(i));
927 		}
928 		subtractand >>= 1;
929 
930 #if TRACE_DIV
931 		std::cout << "  " << to_binary(decimator) << ' ' << to_binary(result) << std::endl;
932 		std::cout << "- " << to_binary(subtractand) << std::endl;
933 #endif
934 	}
935 	result <<= scale;
936 	if (result_negative) result.twosComplement();
937 	r.assign(result); // copy the lowest bits which represent the bits on which we need to apply the rounding test
938 	return result;
939 }
940 
941 // free function generator of the 2's complement of a blockfraction
942 template<size_t nbits, typename bt, BitEncoding encoding>
twosComplement(const blockfraction<nbits,bt,encoding> & a)943 inline constexpr blockfraction<nbits, bt, encoding> twosComplement(const blockfraction<nbits, bt, encoding>& a) {
944 	blockfraction<nbits, bt, encoding> b(a);
945 	return b.twosComplement();
946 }
947 
948 } // namespace sw::universal
949