1 #pragma once
2 // blockbinary.hpp: parameterized blocked binary number system representing a 2's complement binary number
3 //
4 // Copyright (C) 2017-2021 Stillwater Supercomputing, Inc.
5 //
6 // This file is part of the universal numbers project, which is released under an MIT Open Source license.
7 #include <iostream>
8 #include <string>
9 #include <sstream>
10 
11 // should be defined by calling environment, just catching it here just in case it is not
12 #ifndef LONG_DOUBLE_SUPPORT
13 #pragma message("LONG_DOUBLE_SUPPORT is not defined")
14 #define LONG_DOUBLE_SUPPORT 0
15 #endif
16 
17 namespace sw::universal {
18 
19 // forward references
20 template<size_t nbits, typename bt> class blockbinary;
21 template<size_t nbits, typename bt> blockbinary<nbits, bt> twosComplement(const blockbinary<nbits, bt>&);
22 template<size_t nbits, typename bt> struct quorem;
23 template<size_t nbits, typename bt> quorem<nbits, bt> longdivision(const blockbinary<nbits, bt>&, const blockbinary<nbits, bt>&);
24 
25 // idiv_t for blockbinary<nbits> to capture quotient and remainder during long division
26 template<size_t nbits, typename bt>
27 struct quorem {
28 	int exceptionId;
29 	blockbinary<nbits, bt> quo; // quotient
30 	blockbinary<nbits, bt> rem;  // remainder
31 };
32 
33 // maximum positive 2's complement number: b01111...1111
34 template<size_t nbits, typename bt = uint8_t>
maxpos(blockbinary<nbits,bt> & a)35 constexpr blockbinary<nbits, bt>& maxpos(blockbinary<nbits, bt>& a) {
36 	a.clear();
37 	a.flip();
38 	a.setbit(nbits - 1, false);
39 	return a;
40 }
41 
42 // maximum negative 2's complement number: b1000...0000
43 template<size_t nbits, typename bt = uint8_t>
maxneg(blockbinary<nbits,bt> & a)44 constexpr blockbinary<nbits, bt>& maxneg(blockbinary<nbits, bt>& a) {
45 	a.clear();
46 	a.setbit(nbits - 1);
47 	return a;
48 }
49 
50 // generate the 2's complement of the block binary number
51 template<size_t nbits, typename bt>
twosComplement(const blockbinary<nbits,bt> & orig)52 blockbinary<nbits, bt> twosComplement(const blockbinary<nbits, bt>& orig) {
53 	blockbinary<nbits, bt> twosC(orig);
54 	blockbinary<nbits, bt> plusOne(1);
55 	twosC.flip();
56 	twosC += plusOne;
57 	return twosC;
58 }
59 
60 /*
61 NOTES
62 
63 for block arithmetic, we need to manage a carry bit.
64 This disqualifies using uint64_t as a block type as we can't catch the overflow condition
65 in the same way as the other native types, uint8_t, uint16_t, uint32_t.
66 
67 We could use a sint64_t and then convert to uint64_t and observe the MSB. Very different
68 logic though.
69 */
70 
71 // a block-based 2's complement binary number
72 template<size_t _nbits, typename bt = uint8_t>
73 class blockbinary {
74 public:
75 	static constexpr size_t nbits = _nbits;
76 	typedef bt BlockType;
77 
78 	static constexpr size_t bitsInByte = 8;
79 	static constexpr size_t bitsInBlock = sizeof(bt) * bitsInByte;
80 	static_assert(bitsInBlock <= 64, "storage unit for block arithmetic needs to be <= uint64_t");
81 
82 	static constexpr size_t nrBlocks = 1ull + ((nbits - 1ull) / bitsInBlock);
83 	static constexpr uint64_t storageMask = (0xFFFFFFFFFFFFFFFFul >> (64 - bitsInBlock));
84 	static constexpr bt maxBlockValue = bt(-1);
85 
86 	static constexpr size_t MSU = nrBlocks - 1; // MSU == Most Significant Unit
87 	static constexpr bt ALL_ONES = bt(~0);
88 	static constexpr bt MSU_MASK = (ALL_ONES >> (nrBlocks * bitsInBlock - nbits));
89 	static constexpr bt SIGN_BIT_MASK = bt(bt(1) << ((nbits - 1ull) % bitsInBlock));
90 
91 	// constructors
blockbinary()92 	constexpr blockbinary() noexcept : _block{ 0 } {}
93 
94 	blockbinary(const blockbinary&) noexcept = default;
95 	blockbinary(blockbinary&&) noexcept = default;
96 
97 	blockbinary& operator=(const blockbinary&) noexcept = default;
98 	blockbinary& operator=(blockbinary&&) noexcept = default;
99 
100 	/// construct a blockbinary from another: bt must be the same
101 	template<size_t nnbits>
blockbinary(const blockbinary<nnbits,bt> & rhs)102 	blockbinary(const blockbinary<nnbits, bt>& rhs) { this->assign(rhs); }
103 
104 	// initializer for long long
blockbinary(long long initial_value)105 	constexpr blockbinary(long long initial_value) noexcept : _block{ 0 } { *this = initial_value; }
106 
operator =(long long rhs)107 	constexpr blockbinary& operator=(long long rhs) noexcept {
108 		if constexpr (1 < nrBlocks) {
109 			for (unsigned i = 0; i < nrBlocks; ++i) {
110 				_block[i] = rhs & storageMask;
111 				rhs >>= bitsInBlock;
112 			}
113 			// enforce precondition for fast comparison by properly nulling bits that are outside of nbits
114 			_block[MSU] &= MSU_MASK;
115 		}
116 		else if constexpr (1 == nrBlocks) {
117 			_block[0] = rhs & storageMask;
118 			// enforce precondition for fast comparison by properly nulling bits that are outside of nbits
119 			_block[MSU] &= MSU_MASK;
120 		}
121 		return *this;
122 	}
123 
124 	// conversion operators
operator int() const125 	explicit operator int() const                { return int(to_long_long()); }
operator long() const126 	explicit operator long() const               { return long(to_long_long()); }
operator long long() const127 	explicit operator long long() const          { return to_long_long(); }
operator unsigned int() const128 	explicit operator unsigned int() const       { return unsigned(to_ull()); }
operator unsigned long() const129 	explicit operator unsigned long() const      { return (unsigned long)to_ull(); }
operator unsigned long long() const130 	explicit operator unsigned long long() const { return to_ull(); }
131 	// TODO: these need proper implementations that can convert very large integers to the proper scale afforded by the floating-point formats
operator float() const132 	explicit operator float() const              { return float(to_long_long()); }
operator double() const133 	explicit operator double() const             { return double(to_long_long()); }
134 
135 #if LONG_DOUBLE_SUPPORT
operator long double() const136 	explicit operator long double() const        { return (long double)to_long_long(); }
137 #endif
138 
139 	// prefix operators
operator -() const140 	blockbinary operator-() const {
141 		blockbinary negated(*this);
142 		blockbinary plusOne(1);
143 		negated.flip();
144 		negated += plusOne;
145 		return negated;
146 	}
147 	// one's complement
operator ~() const148 	blockbinary operator~() const {
149 		blockbinary complement(*this);
150 		complement.flip();
151 		return complement;
152 	}
153 	// increment/decrement
operator ++(int)154 	blockbinary operator++(int) {
155 		blockbinary tmp(*this);
156 		operator++();
157 		return tmp;
158 	}
operator ++()159 	blockbinary& operator++() {
160 		blockbinary increment;
161 		increment.setbits(0x1);
162 		*this += increment;
163 		return *this;
164 	}
operator --(int)165 	blockbinary operator--(int) {
166 		blockbinary tmp(*this);
167 		operator--();
168 		return tmp;
169 	}
operator --()170 	blockbinary& operator--() {
171 		blockbinary decrement;
172 		decrement.setbits(0x1);
173 		return *this -= decrement;
174 	}
175 	// logic operators
operator ~()176 	blockbinary  operator~() {
177 		blockbinary<nbits, bt> complement(*this);
178 		complement.flip();
179 		return complement;
180 	}
181 	// arithmetic operators
operator +=(const blockbinary & rhs)182 	blockbinary& operator+=(const blockbinary& rhs) {
183 		bool carry = false;
184 		for (unsigned i = 0; i < nrBlocks; ++i) {
185 			// cast up so we can test for overflow
186 			uint64_t l = uint64_t(_block[i]);
187 			uint64_t r = uint64_t(rhs._block[i]);
188 			uint64_t s = l + r + (carry ? uint64_t(1) : uint64_t(0));
189 			carry = (s > maxBlockValue ? true : false);
190 			_block[i] = bt(s);
191 		}
192 		// enforce precondition for fast comparison by properly nulling bits that are outside of nbits
193 		_block[MSU] &= MSU_MASK;
194 		return *this;
195 	}
operator -=(const blockbinary & rhs)196 	blockbinary& operator-=(const blockbinary& rhs) {
197 		return operator+=(sw::universal::twosComplement(rhs));
198 	}
operator *=(const blockbinary & rhs)199 	blockbinary& operator*=(const blockbinary& rhs) { // modulo in-place
200 		blockbinary base(*this);
201 		blockbinary multiplicant(rhs);
202 		clear();
203 		for (size_t i = 0; i < nbits; ++i) {
204 			if (base.at(i)) {
205 				operator+=(multiplicant);
206 			}
207 			multiplicant <<= 1;
208 		}
209 		// since we used operator+=, which enforces the nulling of leading bits
210 		// we don't need to null here
211 		return *this;
212 	}
operator /=(const blockbinary & rhs)213 	blockbinary& operator/=(const blockbinary& rhs) {
214 		quorem<nbits, bt> result = longdivision(*this, rhs);
215 		*this = result.quo;
216 		return *this;
217 	}
operator %=(const blockbinary & rhs)218 	blockbinary& operator%=(const blockbinary& rhs) {
219 		quorem<nbits, bt> result = longdivision(*this, rhs);
220 		*this = result.rem;
221 		return *this;
222 	}
223 	// shift left operator
operator <<=(int bitsToShift)224 	blockbinary& operator<<=(int bitsToShift) {
225 		if (bitsToShift == 0) return *this;
226 		if (bitsToShift < 0) return operator>>=(-bitsToShift);
227 		if (bitsToShift > long(nbits)) bitsToShift = nbits; // clip to max
228 		if (bitsToShift >= long(bitsInBlock)) {
229 			int blockShift = bitsToShift / static_cast<int>(bitsInBlock);
230 			for (int i = static_cast<int>(MSU); i >= blockShift; --i) {
231 				_block[i] = _block[i - blockShift];
232 			}
233 			for (int i = blockShift - 1; i >= 0; --i) {
234 				_block[i] = bt(0);
235 			}
236 			// adjust the shift
237 			bitsToShift -= static_cast<int>(blockShift * bitsInBlock);
238 			if (bitsToShift == 0) return *this;
239 		}
240 		if constexpr (MSU > 0) {
241 			// construct the mask for the upper bits in the block that need to move to the higher word
242 			bt mask = 0xFFFFFFFFFFFFFFFF << (bitsInBlock - bitsToShift);
243 			for (size_t i = MSU; i > 0; --i) {
244 				_block[i] <<= bitsToShift;
245 				// mix in the bits from the right
246 				bt bits = bt(mask & _block[i - 1]);
247 				_block[i] |= (bits >> (bitsInBlock - bitsToShift));
248 			}
249 		}
250 		_block[0] <<= bitsToShift;
251 		return *this;
252 	}
253 	// shift right operator
operator >>=(int bitsToShift)254 	blockbinary& operator>>=(int bitsToShift) {
255 		if (bitsToShift == 0) return *this;
256 		if (bitsToShift < 0) return operator<<=(-bitsToShift);
257 		if (bitsToShift >= static_cast<int>(nbits)) {
258 			setzero();
259 			return *this;
260 		}
261 		bool signext = sign();
262 		size_t blockShift = 0;
263 		if (bitsToShift >= static_cast<int>(bitsInBlock)) {
264 			blockShift = bitsToShift / bitsInBlock;
265 			if (MSU >= blockShift) {
266 				// shift by blocks
267 				for (size_t i = 0; i <= MSU - blockShift; ++i) {
268 					_block[i] = _block[i + blockShift];
269 				}
270 			}
271 			// adjust the shift
272 			bitsToShift -= static_cast<int>(blockShift * bitsInBlock);
273 			if (bitsToShift == 0) {
274 				// fix up the leading zeros if we have a negative number
275 				if (signext) {
276 					// bitsToShift is guaranteed to be less than nbits
277 					bitsToShift += static_cast<int>(blockShift * bitsInBlock);
278 					for (size_t i = nbits - bitsToShift; i < nbits; ++i) {
279 						this->setbit(i);
280 					}
281 				}
282 				else {
283 					// clean up the blocks we have shifted clean
284 					bitsToShift += static_cast<int>(blockShift * bitsInBlock);
285 					for (size_t i = nbits - bitsToShift; i < nbits; ++i) {
286 						this->setbit(i, false);
287 					}
288 				}
289 				return *this;
290 			}
291 		}
292 		if constexpr (MSU > 0) {
293 			bt mask = ALL_ONES;
294 			mask >>= (bitsInBlock - bitsToShift); // this is a mask for the lower bits in the block that need to move to the lower word
295 			for (size_t i = 0; i < MSU; ++i) {  // TODO: can this be improved? we should not have to work on the upper blocks in case we block shifted
296 				_block[i] >>= bitsToShift;
297 				// mix in the bits from the left
298 				bt bits = bt(mask & _block[i + 1]);
299 				_block[i] |= (bits << (bitsInBlock - bitsToShift));
300 			}
301 		}
302 		_block[MSU] >>= bitsToShift;
303 
304 		// fix up the leading zeros if we have a negative number
305 		if (signext) {
306 			// bitsToShift is guaranteed to be less than nbits
307 			bitsToShift += static_cast<int>(blockShift * bitsInBlock);
308 			for (size_t i = nbits - bitsToShift; i < nbits; ++i) {
309 				this->setbit(i);
310 			}
311 		}
312 		else {
313 			// clean up the blocks we have shifted clean
314 			bitsToShift += static_cast<int>(blockShift * bitsInBlock);
315 			for (size_t i = nbits - bitsToShift; i < nbits; ++i) {
316 				this->setbit(i, false);
317 			}
318 		}
319 
320 		// enforce precondition for fast comparison by properly nulling bits that are outside of nbits
321 		_block[MSU] &= MSU_MASK;
322 		return *this;
323 	}
324 
325 	// modifiers
326 	 // clear a block binary number
clear()327 	inline constexpr void clear() noexcept {
328 		for (size_t i = 0; i < nrBlocks; ++i) {
329 			_block[i] = bt(0ull);
330 		}
331 	}
setzero()332 	inline constexpr void setzero() noexcept { clear(); }
setbit(size_t i,bool v=true)333 	inline constexpr void setbit(size_t i, bool v = true) noexcept {
334 		if (i < nbits) {
335 			bt block = _block[i / bitsInBlock];
336 			bt null = ~(1ull << (i % bitsInBlock));
337 			bt bit = bt(v ? 1 : 0);
338 			bt mask = bt(bit << (i % bitsInBlock));
339 			_block[i / bitsInBlock] = bt((block & null) | mask);
340 		}
341 		// nop if i is out of range
342 	}
setbits(uint64_t value)343 	inline constexpr void setbits(uint64_t value) noexcept {
344 		if constexpr (1 == nrBlocks) {
345 			_block[0] = value & storageMask;
346 		}
347 		else if constexpr (1 < nrBlocks) {
348 			for (size_t i = 0; i < nrBlocks; ++i) {
349 				_block[i] = value & storageMask;
350 				value >>= bitsInBlock;
351 			}
352 		}
353 		_block[MSU] &= MSU_MASK; // enforce precondition for fast comparison by properly nulling bits that are outside of nbits
354 	}
setblock(size_t b,const bt & block)355 	inline constexpr void setblock(size_t b, const bt& block) noexcept {
356 		if (b < nrBlocks) _block[b] = block; // nop if b is out of range
357 	}
flip()358 	inline constexpr blockbinary& flip() noexcept { // in-place one's complement
359 		for (size_t i = 0; i < nrBlocks; ++i) {
360 			_block[i] = bt(~_block[i]);
361 		}
362 		_block[MSU] &= MSU_MASK; // assert precondition of properly nulled leading non-bits
363 		return *this;
364 	}
twosComplement()365 	inline constexpr blockbinary& twosComplement() noexcept { // in-place 2's complement
366 		blockbinary<nbits, bt> plusOne(1);
367 		flip();
368 		return *this += plusOne;
369 	}
370 
371 	// selectors
sign() const372 	inline constexpr bool sign() const noexcept { return _block[MSU] & SIGN_BIT_MASK; }
ispos() const373 	inline constexpr bool ispos() const noexcept { return !sign(); }
isneg() const374 	inline constexpr bool isneg() const noexcept { return sign(); }
iszero() const375 	inline constexpr bool iszero() const noexcept {
376 		for (size_t i = 0; i < nrBlocks; ++i) if (_block[i] != 0) return false;
377 		return true;
378 	}
isallones() const379 	inline constexpr bool isallones() const noexcept {
380 		if constexpr (nrBlocks > 1) for (size_t i = 0; i < nrBlocks-1; ++i) if (_block[i] != ALL_ONES) return false;
381 		if (_block[MSU] != MSU_MASK) return false;
382 		return true;
383 	}
isodd() const384 	inline constexpr bool isodd() const noexcept { return _block[0] & 0x1;	}
iseven() const385 	inline constexpr bool iseven() const noexcept { return !isodd(); }
test(size_t bitIndex) const386 	inline constexpr bool test(size_t bitIndex) const noexcept { return at(bitIndex); }
at(size_t bitIndex) const387 	inline constexpr bool at(size_t bitIndex) const noexcept {
388 		if (bitIndex >= nbits) return false; // fail silently as no-op
389 		bt word = _block[bitIndex / bitsInBlock];
390 		bt mask = bt(1ull << (bitIndex % bitsInBlock));
391 		return (word & mask);
392 	}
nibble(size_t n) const393 	inline constexpr uint8_t nibble(size_t n) const noexcept {
394 		uint8_t retval{ 0 };
395 		if (n < (1 + ((nbits - 1) >> 2))) {
396 			bt word = _block[(n * 4) / bitsInBlock];
397 			size_t nibbleIndexInWord = n % (bitsInBlock >> 2);
398 			bt mask = static_cast<bt>(0x0Fu << (nibbleIndexInWord*4));
399 			bt nibblebits = static_cast<bt>(mask & word);
400 			retval = static_cast<uint8_t>(nibblebits >> static_cast<bt>(nibbleIndexInWord*4));
401 		}
402 		else { // nop when nibble index out of bounds
403 			retval = 0;
404 		}
405 		return retval;
406 	}
block(size_t b) const407 	inline constexpr bt block(size_t b) const noexcept { // TODO: convert to noexcept function?
408 		if (b < nrBlocks) return _block[b];
409 		return bt(0); // return 0 when block index out of bounds
410 	}
411 
412 	// copy a value over from one blockbinary to this blockbinary
413 	// blockbinary is a 2's complement encoding, so we sign-extend by default
414 	template<size_t srcbits>
assign(const blockbinary<srcbits,bt> & rhs)415 	inline blockbinary<nbits, bt>& assign(const blockbinary<srcbits, bt>& rhs) {
416 		clear();
417 		// since bt is the same, we can simply copy the blocks in
418 		size_t minNrBlocks = (this->nrBlocks < rhs.nrBlocks) ? this->nrBlocks : rhs.nrBlocks;
419 		for (size_t i = 0; i < minNrBlocks; ++i) {
420 			_block[i] = rhs.block(i);
421 		}
422 		if constexpr (nbits > srcbits) { // check if we need to sign extend
423 			if (rhs.sign()) {
424 				for (size_t i = srcbits; i < nbits; ++i) { // TODO: replace bit-oriented sequence with block
425 					setbit(i);
426 				}
427 			}
428 		}
429 		// enforce precondition for fast comparison by properly nulling bits that are outside of nbits
430 		_block[MSU] &= MSU_MASK;
431 		return *this;
432 	}
433 
434 	// copy a value over from one blockbinary to this without sign-extending the value
435 	// blockbinary is a 2's complement encoding, so we sign-extend by default
436 	// for fraction/significent encodings, we need to turn off sign-extending.
437 	template<size_t srcbits>
assignWithoutSignExtend(const blockbinary<srcbits,bt> & rhs)438 	inline blockbinary<nbits, bt>& assignWithoutSignExtend(const blockbinary<srcbits, bt>& rhs) {
439 		clear();
440 		// since bt is the same, we can simply copy the blocks in
441 		size_t minNrBlocks = (this->nrBlocks < rhs.nrBlocks) ? this->nrBlocks : rhs.nrBlocks;
442 		for (size_t i = 0; i < minNrBlocks; ++i) {
443 			_block[i] = rhs.block(i);
444 		}
445 		// enforce precondition for fast comparison by properly nulling bits that are outside of nbits
446 		_block[MSU] &= MSU_MASK;
447 		return *this;
448 	}
449 
450 	// return the position of the most significant bit, -1 if v == 0
msb() const451 	inline int msb() const noexcept {
452 		for (int i = int(MSU); i >= 0; --i) {
453 			if (_block[i] != 0) {
454 				bt mask = (bt(1u) << (bitsInBlock-1));
455 				for (int j = bitsInBlock - 1; j >= 0; --j) {
456 					if (_block[i] & mask) {
457 						return i * static_cast<int>(bitsInBlock) + j;
458 					}
459 					mask >>= 1;
460 				}
461 			}
462 		}
463 		return -1; // no significant bit found, all bits are zero
464 	}
465 	// conversion to native types
to_long_long() const466 	int64_t to_long_long() const {
467 		constexpr unsigned sizeoflonglong = 8 * sizeof(long long);
468 		int64_t ll{ 0 };
469 		int64_t mask{ 1 };
470 		unsigned upper = (nbits < sizeoflonglong ? nbits : sizeoflonglong);
471 		for (unsigned i = 0; i < upper; ++i) {
472 			ll |= at(i) ? mask : 0;
473 			mask <<= 1;
474 		}
475 		if (sign() && upper < sizeoflonglong) { // sign extend
476 			for (unsigned i = upper; i < sizeoflonglong; ++i) {
477 				ll |= mask;
478 				mask <<= 1;
479 			}
480 		}
481 		return ll;
482 	}
to_ull() const483 	uint64_t to_ull() const {
484 		uint64_t ull{ 0 };
485 		uint64_t mask{ 1 };
486 		uint32_t msb = nbits < 64 ? nbits : 64;
487 		for (uint32_t i = 0; i < msb; ++i) {
488 			ull |= at(i) ? mask : 0;
489 			mask <<= 1;
490 		}
491 		return ull;
492 	}
493 
494 	// determine the rounding mode: result needs to be rounded up if true
roundingMode(size_t targetLsb) const495 	bool roundingMode(size_t targetLsb) const {
496 		bool lsb = at(targetLsb);
497 		bool guard = (targetLsb == 0 ? false : at(targetLsb - 1));
498 		bool round = (targetLsb > 1 ? at(targetLsb - 2) : false);
499 		bool sticky =(targetLsb < 3 ? false : any(targetLsb - 3));
500 		bool tie = guard && !round && !sticky;
501 		return (lsb && tie) || (guard && !tie);
502 	}
any(size_t msb) const503 	bool any(size_t msb) const {
504 		msb = (msb > nbits - 1 ? nbits - 1 : msb);
505 		size_t topBlock = msb / bitsInBlock;
506 		bt mask = bt(ALL_ONES >> (bitsInBlock - 1 - (msb % bitsInBlock)));
507 		for (size_t i = 0; i < topBlock; ++i) {
508 			if (_block[i] > 0) return true;
509 		}
510 		// process the partial block
511 		if (_block[topBlock] & mask) return true;
512 		return false;
513 	}
514 
515 protected:
516 	// HELPER methods
517 	// none
518 
519 private:
520 	bt _block[nrBlocks];
521 
522 	//////////////////////////////////////////////////////////////////////////////
523 	// friend functions
524 
525 	// integer - integer logic comparisons
526 	template<size_t N, typename B>
527 	friend bool operator==(const blockbinary<N, B>& lhs, const blockbinary<N, B>& rhs);
528 	template<size_t N, typename B>
529 	friend bool operator!=(const blockbinary<N, B>& lhs, const blockbinary<N, B>& rhs);
530 	// the other logic operators are defined in terms of arithmetic terms
531 
532 	template<size_t nnbits, typename Bbt>
533 	friend std::ostream& operator<<(std::ostream& ostr, const blockbinary<nnbits, Bbt>& v);
534 };
535 
536 //////////////////////////////////////////////////////////////////////////////////
537 // logic operators
538 
539 template<size_t N, typename B>
operator ==(const blockbinary<N,B> & lhs,const blockbinary<N,B> & rhs)540 inline bool operator==(const blockbinary<N, B>& lhs, const blockbinary<N, B>& rhs) {
541 	for (size_t i = 0; i < lhs.nrBlocks; ++i) {
542 		if (lhs._block[i] != rhs._block[i]) {
543 			return false;
544 		}
545 	}
546 	return true;
547 }
548 template<size_t N, typename B>
operator !=(const blockbinary<N,B> & lhs,const blockbinary<N,B> & rhs)549 inline bool operator!=(const blockbinary<N, B>& lhs, const blockbinary<N, B>& rhs) {
550 	return !operator==(lhs, rhs);
551 }
552 template<size_t N, typename B>
operator <(const blockbinary<N,B> & lhs,const blockbinary<N,B> & rhs)553 inline bool operator<(const blockbinary<N, B>& lhs, const blockbinary<N, B>& rhs) {
554 	if (lhs.ispos() && rhs.isneg()) return false; // need to filter out possible overflow conditions
555 	if (lhs.isneg() && rhs.ispos()) return true;  // need to filter out possible underflow conditions
556 	if (lhs == rhs) return false; // so the maxneg logic works
557 	blockbinary<N, B> mneg; maxneg<N, B>(mneg);
558 	if (rhs == mneg) return false; // special case: nothing is smaller than maximum negative
559 	blockbinary<N, B> diff = lhs - rhs;
560 	return diff.isneg();
561 }
562 template<size_t N, typename B>
operator <=(const blockbinary<N,B> & lhs,const blockbinary<N,B> & rhs)563 inline bool operator<=(const blockbinary<N, B>& lhs, const blockbinary<N, B>& rhs) {
564 	return (lhs < rhs || lhs == rhs);
565 }
566 template<size_t N, typename B>
operator >(const blockbinary<N,B> & lhs,const blockbinary<N,B> & rhs)567 inline bool operator>(const blockbinary<N, B>& lhs, const blockbinary<N, B>& rhs) {
568 	return !(lhs <= rhs);
569 }
570 template<size_t N, typename B>
operator >=(const blockbinary<N,B> & lhs,const blockbinary<N,B> & rhs)571 inline bool operator>=(const blockbinary<N, B>& lhs, const blockbinary<N, B>& rhs) {
572 	return !(lhs < rhs);
573 }
574 ///////////////////////////////////////////////////////////////////////////////
575 // binary operators
576 
577 template<size_t nbits, typename bt>
operator +(const blockbinary<nbits,bt> & a,const blockbinary<nbits,bt> & b)578 inline blockbinary<nbits, bt> operator+(const blockbinary<nbits, bt>& a, const blockbinary<nbits, bt>& b) {
579 	blockbinary<nbits, bt> c(a);
580 	return c += b;
581 }
582 template<size_t nbits, typename bt>
operator -(const blockbinary<nbits,bt> & a,const blockbinary<nbits,bt> & b)583 inline blockbinary<nbits, bt> operator-(const blockbinary<nbits, bt>& a, const blockbinary<nbits, bt>& b) {
584 	blockbinary<nbits, bt> c(a);
585 	return c -= b;
586 }
587 template<size_t nbits, typename bt>
operator *(const blockbinary<nbits,bt> & a,const blockbinary<nbits,bt> & b)588 inline blockbinary<nbits, bt> operator*(const blockbinary<nbits, bt>& a, const blockbinary<nbits, bt>& b) {
589 	blockbinary<nbits, bt> c(a);
590 	return c *= b;
591 }
592 template<size_t nbits, typename bt>
operator /(const blockbinary<nbits,bt> & a,const blockbinary<nbits,bt> & b)593 inline blockbinary<nbits, bt> operator/(const blockbinary<nbits, bt>& a, const blockbinary<nbits, bt>& b) {
594 	blockbinary<nbits, bt> c(a);
595 	return c /= b;
596 }
597 template<size_t nbits, typename bt>
operator %(const blockbinary<nbits,bt> & a,const blockbinary<nbits,bt> & b)598 inline blockbinary<nbits, bt> operator%(const blockbinary<nbits, bt>& a, const blockbinary<nbits, bt>& b) {
599 	blockbinary<nbits, bt> c(a);
600 	return c %= b;
601 }
602 
603 template<size_t nbits, typename bt>
operator <<(const blockbinary<nbits,bt> & a,const long b)604 inline blockbinary<nbits, bt> operator<<(const blockbinary<nbits, bt>& a, const long b) {
605 	blockbinary<nbits, bt> c(a);
606 	return c <<= b;
607 }
608 template<size_t nbits, typename bt>
operator >>(const blockbinary<nbits,bt> & a,const long b)609 inline blockbinary<nbits, bt> operator>>(const blockbinary<nbits, bt>& a, const long b) {
610 	blockbinary<nbits, bt> c(a);
611 	return c >>= b;
612 }
613 
614 // divide a by b and return both quotient and remainder
615 template<size_t nbits, typename bt>
longdivision(const blockbinary<nbits,bt> & _a,const blockbinary<nbits,bt> & _b)616 quorem<nbits, bt> longdivision(const blockbinary<nbits, bt>& _a, const blockbinary<nbits, bt>& _b) {
617 	quorem<nbits, bt> result = { 0, 0, 0 };
618 	if (_b.iszero()) {
619 		result.exceptionId = 1; // division by zero
620 		return result;
621 	}
622 	// generate the absolute values to do long division
623 	// 2's complement special case -max requires an signed int that is 1 bit bigger to represent abs()
624 	bool a_sign = _a.sign();
625 	bool b_sign = _b.sign();
626 	bool result_negative = (a_sign ^ b_sign);
627 	// normalize both arguments to positive, which requires expansion by 1-bit to deal with maxneg
628 	blockbinary<nbits + 1, bt> a(_a);
629 	blockbinary<nbits + 1, bt> b(_b);
630 	if (a_sign) a.twosComplement();
631 	if (b_sign) b.twosComplement();
632 
633 	if (a < b) { // optimization for integer numbers
634 		result.rem = _a; // a % b = a when a / b = 0
635 		return result;   // a / b = 0 when b > a
636 	}
637 	// initialize the long division
638 	blockbinary<nbits + 1, bt> accumulator = a;
639 	// prepare the subtractand
640 	blockbinary<nbits + 1, bt> subtractand = b;
641 	int msb_b = b.msb();
642 	int msb_a = a.msb();
643 	int shift = msb_a - msb_b;
644 	subtractand <<= shift;
645 	// long division
646 	for (int i = shift; i >= 0; --i) {
647 		if (subtractand <= accumulator) {
648 			accumulator -= subtractand;
649 			result.quo.setbit(static_cast<size_t>(i));
650 		}
651 		else {
652 			result.quo.setbit(static_cast<size_t>(i), false);
653 		}
654 		subtractand >>= 1;
655 	}
656 	if (result_negative) {  // take 2's complement
657 		result.quo.flip();
658 		result.quo += 1;
659 	}
660 	if (_a.isneg()) {
661 		result.rem = -accumulator;
662 	}
663 	else {
664 		result.rem = accumulator;
665 	}
666 	return result;
667 }
668 
669 ///////////////////////////////////////////////////////////////////////////////
670 // specialty binary operators
671 
672 // unrounded addition, returns a blockbinary that is of size nbits+1
673 template<size_t nbits, typename bt>
uradd(const blockbinary<nbits,bt> & a,const blockbinary<nbits,bt> & b)674 inline blockbinary<nbits + 1, bt> uradd(const blockbinary<nbits, bt>& a, const blockbinary<nbits, bt>& b) {
675 	blockbinary<nbits + 1, bt> result(a);
676 	return result += blockbinary<nbits + 1, bt>(b);
677 }
678 
679 // unrounded subtraction, returns a blockbinary that is of size nbits+1
680 template<size_t nbits, typename bt>
ursub(const blockbinary<nbits,bt> & a,const blockbinary<nbits,bt> & b)681 inline blockbinary<nbits + 1, bt> ursub(const blockbinary<nbits, bt>& a, const blockbinary<nbits, bt>& b) {
682 	blockbinary<nbits + 1, bt> result(a);
683 	return result -= blockbinary<nbits + 1, bt>(b);
684 }
685 
686 #define TRACE_URMUL 0
687 // unrounded multiplication, returns a blockbinary that is of size 2*nbits
688 // using brute-force sign-extending of operands to yield correct sign-extended result for 2*nbits 2's complement.
689 template<size_t nbits, typename bt>
urmul(const blockbinary<nbits,bt> & a,const blockbinary<nbits,bt> & b)690 inline blockbinary<2*nbits, bt> urmul(const blockbinary<nbits, bt>& a, const blockbinary<nbits, bt>& b) {
691 	blockbinary<2 * nbits, bt> result;
692 	if (a.iszero() || b.iszero()) return result;
693 
694 	// compute the result
695 	blockbinary<2 * nbits, bt> signextended_a(a);
696 	blockbinary<2 * nbits, bt> multiplicant(b);
697 #if TRACE_URMUL
698 	std::cout << "    " << to_binary(a) << " * " << to_binary(b) << std::endl;
699 	std::cout << std::setw(3) << 0 << ' ' << to_binary(multiplicant) << ' ' << to_binary(result) << std::endl;
700 #endif
701 	for (size_t i = 0; i < 2* nbits; ++i) {
702 		if (signextended_a.at(i)) {
703 			result += multiplicant;
704 		}
705 		multiplicant <<= 1;
706 #if TRACE_URMUL
707 		std::cout << std::setw(3) << i << ' ' << to_binary(multiplicant) << ' ' << to_binary(result) << std::endl;
708 #endif
709 
710 	}
711 #if TRACE_URMUL
712 	std::cout << "fnl " << to_binary(result) << std::endl;
713 #endif
714 	//blockbinary<2 * nbits, bt> clipped(result);
715 	// since we used operator+=, which enforces the nulling of leading bits
716 	// we don't need to null here
717 	return result;
718 }
719 
720 // unrounded multiplication, returns a blockbinary that is of size 2*nbits
721 // using nbits modulo arithmetic with final sign
722 template<size_t nbits, typename bt>
urmul2(const blockbinary<nbits,bt> & a,const blockbinary<nbits,bt> & b)723 inline blockbinary<2 * nbits, bt> urmul2(const blockbinary<nbits, bt>& a, const blockbinary<nbits, bt>& b) {
724 	blockbinary<2 * nbits, bt> result;
725 	if (a.iszero() || b.iszero()) return result;
726 
727 	// compute the result
728 	bool result_sign = a.sign() ^ b.sign();
729 	// normalize both arguments to positive in new size
730 	blockbinary<nbits + 1, bt> a_new(a); // TODO optimize: now create a, create _a.bb, copy, destroy _a.bb_copy
731 	blockbinary<nbits + 1, bt> b_new(b);
732 	if (a.sign()) a_new.twosComplement();
733 	if (b.sign()) b_new.twosComplement();
734 	blockbinary<2*nbits, bt> multiplicant(b_new);
735 
736 #if TRACE_URMUL
737 	std::cout << "    " << a_new << " * " << b_new << std::endl;
738 	std::cout << std::setw(3) << 0 << ' ' << multiplicant << ' ' << result << std::endl;
739 #endif
740 	for (size_t i = 0; i < (nbits+1); ++i) {
741 		if (a_new.at(i)) {
742 			result += multiplicant;  // if multiplicant is not the same size as result, the assignment will get sign-extended if the MSB is true, this is not correct because we are assuming unsigned binaries in this loop
743 		}
744 		multiplicant <<= 1;
745 #if TRACE_URMUL
746 		std::cout << std::setw(3) << i << ' ' << multiplicant << ' ' << result << std::endl;
747 #endif
748 	}
749 	if (result_sign) result.twosComplement();
750 #if TRACE_URMUL
751 	std::cout << "fnl " << result << std::endl;
752 #endif
753 	return result;
754 }
755 
756 #define TRACE_DIV 0
757 // unrounded division, returns a blockbinary that is of size 2*nbits
758 template<size_t nbits, size_t roundingBits, typename bt>
urdiv(const blockbinary<nbits,bt> & a,const blockbinary<nbits,bt> & b)759 inline blockbinary<2 * nbits + roundingBits, bt> urdiv(const blockbinary<nbits, bt>& a, const blockbinary<nbits, bt>& b) {
760 	if (b.iszero()) {
761 		// division by zero
762 		throw "urdiv divide by zero";
763 	}
764 	// generate the absolute values to do long division
765 	// 2's complement special case -max requires an signed int that is 1 bit bigger to represent abs()
766 	bool a_sign = a.sign();
767 	bool b_sign = b.sign();
768 	bool result_negative = (a_sign ^ b_sign);
769 
770 	// normalize both arguments to positive, which requires expansion by 1-bit to deal with maxneg
771 	blockbinary<nbits + 1, bt> a_new(a); // TODO optimize: now create a, create _a.bb, copy, destroy _a.bb_copy
772 	blockbinary<nbits + 1, bt> b_new(b);
773 #if TRACE_DIV
774 	std::cout << "a " << to_binary(a_new) << '\n';
775 	std::cout << "b " << to_binary(b_new) << '\n';
776 #endif
777 	if (a_sign) a_new.twosComplement();
778 	if (b_sign) b_new.twosComplement();
779 #if TRACE_DIV
780 	std::cout << "a " << to_binary(a_new) << '\n';
781 	std::cout << "b " << to_binary(b_new) << '\n';
782 #endif
783 
784 	// initialize the long division
785 	blockbinary<2 * nbits + roundingBits + 1, bt> decimator(a_new);
786 	blockbinary<2 * nbits + roundingBits + 1, bt> subtractand(b_new); // prepare the subtractand
787 	blockbinary<2 * nbits + roundingBits + 1, bt> result;
788 
789 	constexpr size_t msp = nbits + roundingBits; // msp = most significant position
790 	decimator <<= msp; // scale the decimator to the largest possible positive value
791 
792 	int msb_b = subtractand.msb();
793 	int msb_a = decimator.msb();
794 	int shift = msb_a - msb_b;
795 	subtractand <<= shift;
796 	int offset = msb_a - static_cast<int>(msp);  // msb of the result
797 	int scale  = shift - static_cast<int>(msp);  // scale of the result quotient
798 
799 #if TRACE_DIV
800 	std::cout << "  " << to_binary(decimator, true)   << " msp  : " << msp << '\n';
801 	std::cout << "- " << to_binary(subtractand, true) << " shift: " << shift << '\n';
802 #endif
803 	// long division
804 	for (int i = msb_a; i >= 0; --i) {
805 
806 		if (subtractand <= decimator) {
807 			decimator -= subtractand;
808 			result.setbit(static_cast<size_t>(i));
809 		}
810 		else {
811 			result.setbit(static_cast<size_t>(i), false);
812 		}
813 		subtractand >>= 1;
814 
815 #if TRACE_DIV
816 		std::cout << "  " << to_binary(decimator, true) << "  current quotient: " << to_binary(result, true) << '\n';
817 		std::cout << "- " << to_binary(subtractand, true) << '\n';
818 #endif
819 	}
820 	result <<= (scale - offset);
821 #if TRACE_DIV
822 	std::cout << "  " << "scaled result: " << to_binary(result, true) << " scale : " << scale << " offset : " << offset << '\n';
823 #endif
824 	if (result_negative) result.twosComplement();
825 	return result;
826 }
827 
828 //////////////////////////////////////////////////////////////////////////////
829 // conversions to string representations
830 
831 // create a binary representation of the storage
832 template<size_t nbits, typename bt>
to_binary(const blockbinary<nbits,bt> & number,bool nibbleMarker=false)833 std::string to_binary(const blockbinary<nbits, bt>& number, bool nibbleMarker = false) {
834 	std::stringstream s;
835 	s << "0b";
836 	for (int i = int(nbits - 1); i >= 0; --i) {
837 		s << (number.at(size_t(i)) ? '1' : '0');
838 		if (i > 0 && (i % 4) == 0 && nibbleMarker) s << '\'';
839 	}
840 	return s.str();
841 }
842 
843 // local helper to display the contents of a byte array
844 template<size_t nbits, typename bt>
to_hex(const blockbinary<nbits,bt> & number,bool wordMarker=true)845 std::string to_hex(const blockbinary<nbits, bt>& number, bool wordMarker = true) {
846 	static constexpr size_t bitsInByte = 8;
847 	static constexpr size_t bitsInBlock = sizeof(bt) * bitsInByte;
848 	char hexChar[16] = {
849 		'0', '1', '2', '3', '4', '5', '6', '7',
850 		'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
851 	};
852 	std::stringstream ss;
853 	ss << "0x" << std::hex;
854 	int nrNibbles = int(1 + ((nbits - 1) >> 2));
855 	for (int n = nrNibbles - 1; n >= 0; --n) {
856 		uint8_t nibble = number.nibble(static_cast<size_t>(n));
857 		ss << hexChar[nibble];
858 		if (wordMarker && n > 0 && ((n * 4ll) % bitsInBlock) == 0) ss << '\'';
859 	}
860 	return ss.str();
861 }
862 
863 // ostream operator
864 template<size_t nbits, typename bt>
operator <<(std::ostream & ostr,const blockbinary<nbits,bt> & number)865 std::ostream& operator<<(std::ostream& ostr, const blockbinary<nbits, bt>& number) {
866 	return ostr << number.to_long_long(); // TODO: add an decimal converter
867 }
868 
869 
870 } // namespace sw::universal
871