1 #pragma once
2 // posit_3_1.hpp: specialized 3-bit posit using lookup table arithmetic
3 //
4 // Copyright (C) 2017-2021 Stillwater Supercomputing, Inc.
5 //
6 // This file is part of the universal numbers project, which is released under an MIT Open Source license.
7 
8 // DO NOT USE DIRECTLY!
9 // the compile guards in this file are only valid in the context of the specialization logic
10 // configured in the main <universal/posit/posit>
11 
12 #ifndef POSIT_FAST_POSIT_3_1
13 #define POSIT_FAST_POSIT_3_1 0
14 #endif
15 
16 namespace sw::universal {
17 
18 		// set the fast specialization variable to indicate that we are running a special template specialization
19 #if POSIT_FAST_POSIT_3_1
20 #ifdef _MSC_VER
21 #pragma message("Fast specialization of posit<3,1>")
22 //#else
23 //#warning("Fast specialization of posit<3,1>")
24 #endif
25 
26 			constexpr uint8_t posit_3_1_addition_lookup[64] = {
27 				0,1,0,3,1,1,0,3,2,0,2,3,3,3,3,3,
28 				0,1,0,3,1,1,0,3,2,0,2,3,3,3,3,3,
29 				0,1,0,3,1,1,0,3,2,0,2,3,3,3,3,3,
30 				0,1,0,3,1,1,0,3,2,0,2,3,3,3,3,3,
31 			};
32 
33 			constexpr uint8_t posit_3_1_subtraction_lookup[64] = {
34 				0,2,1,3,1,0,1,3,2,2,0,3,3,3,3,3,
35 				0,2,1,3,1,0,1,3,2,2,0,3,3,3,3,3,
36 				0,2,1,3,1,0,1,3,2,2,0,3,3,3,3,3,
37 				0,2,1,3,1,0,1,3,2,2,0,3,3,3,3,3,
38 			};
39 
40 			constexpr uint8_t posit_3_1_multiplication_lookup[64] = {
41 				0,0,0,3,1,1,2,3,0,2,1,3,3,3,3,3,
42 				0,0,0,3,1,1,2,3,0,2,1,3,3,3,3,3,
43 				0,0,0,3,1,1,2,3,0,2,1,3,3,3,3,3,
44 				0,0,0,3,1,1,2,3,0,2,1,3,3,3,3,3,
45 			};
46 
47 			constexpr uint8_t posit_3_1_division_lookup[64] = {
48 				3,0,0,3,3,1,2,3,3,2,1,3,3,3,3,3,
49 				3,0,0,3,3,1,2,3,3,2,1,3,3,3,3,3,
50 				3,0,0,3,3,1,2,3,3,2,1,3,3,3,3,3,
51 				3,0,0,3,3,1,2,3,3,2,1,3,3,3,3,3,
52 			};
53 
54 			constexpr uint8_t posit_3_1_reciprocal_lookup[8] = {
55 				3,1,2,3,3,1,2,3,
56 			};
57 
58 			template<>
59 			class posit<NBITS_IS_3, ES_IS_1> {
60 			public:
61 				static constexpr size_t nbits = NBITS_IS_2;
62 				static constexpr size_t es = ES_IS_1;
63 				static constexpr size_t sbits = 1;
64 				static constexpr size_t rbits = nbits - sbits;
65 				static constexpr size_t ebits = 0;			// <--- special case that needed this specialization
66 				static constexpr size_t fbits = 0;
67 				static constexpr size_t fhbits = fbits + 1;
68 				static constexpr uint8_t index_shift = 3;
69 
posit()70 				posit() { _bits = 0; }
71 				posit(const posit&) = default;
72 				posit(posit&&) = default;
73 				posit& operator=(const posit&) = default;
74 				posit& operator=(posit&&) = default;
75 
posit(int initial_value)76 				posit(int initial_value) { _bits = uint8_t(initial_value & 0x07); }
77 				// assignment operators for native types
operator =(int rhs)78 				posit& operator=(int rhs) {
79 					return operator=((long long)(rhs));
80 				}
operator =(long int rhs)81 				posit& operator=(long int rhs) {
82 					return operator=((long long)(rhs));
83 				}
operator =(long long rhs)84 				posit& operator=(long long rhs) {
85 					// only valid integers are -1, 0, 1
86 					_bits = 0x0;
87 					if (rhs <= -1) {
88 						_bits = 0x2;   // value is -1, or -maxpos
89 					}
90 					else if (rhs == 0) {
91 						_bits = 0x0;   // value is 0
92 					}
93 					else if (1 <= rhs) {
94 						_bits = 0x1;   // value is 1, or maxpos
95 					}
96 					return *this;
97 				}
operator =(const float rhs)98 				posit& operator=(const float rhs) {
99 					return float_assign(rhs);
100 				}
operator =(const double rhs)101 				posit& operator=(const double rhs) {
102 					return float_assign(rhs);
103 				}
operator =(const long double rhs)104 				posit& operator=(const long double rhs) {
105 					return float_assign(rhs);
106 				}
107 
operator long double() const108 				explicit operator long double() const { return to_long_double(); }
operator double() const109 				explicit operator double() const { return to_double(); }
operator float() const110 				explicit operator float() const { return to_float(); }
operator long long() const111 				explicit operator long long() const { return to_long_long(); }
operator long() const112 				explicit operator long() const { return to_long(); }
operator int() const113 				explicit operator int() const { return to_int(); }
operator unsigned long long() const114 				explicit operator unsigned long long() const { return to_long_long(); }
operator unsigned long() const115 				explicit operator unsigned long() const { return to_long(); }
operator unsigned int() const116 				explicit operator unsigned int() const { return to_int(); }
117 
setBitblock(sw::universal::bitblock<NBITS_IS_3> & raw)118 				posit& setBitblock(sw::universal::bitblock<NBITS_IS_3>& raw) {
119 					_bits = uint8_t(raw.to_ulong());
120 					return *this;
121 				}
setbits(uint64_t value)122 				posit& setbits(uint64_t value) {
123 					_bits = uint8_t(value & 0x07);
124 					return *this;
125 				}
operator -() const126 				posit operator-() const {
127 					if (iszero()) {
128 						return *this;
129 					}
130 					if (isnar()) {
131 						return *this;
132 					}
133 					posit p;
134 					return p.setbits((~_bits) + 1);
135 				}
operator +=(const posit & b)136 				posit& operator+=(const posit& b) {
137 					uint16_t index = (_bits << index_shift) | b._bits;
138 					_bits = posit_3_1_addition_lookup[index];
139 					return *this;
140 				}
operator -=(const posit & b)141 				posit& operator-=(const posit& b) {
142 					uint16_t index = (_bits << index_shift) | b._bits;
143 					_bits = posit_3_1_subtraction_lookup[index];
144 					return *this;
145 				}
operator *=(const posit & b)146 				posit& operator*=(const posit& b) {
147 					uint16_t index = (_bits << index_shift) | b._bits;
148 					_bits = posit_3_1_multiplication_lookup[index];
149 					return *this;
150 				}
operator /=(const posit & b)151 				posit& operator/=(const posit& b) {
152 					uint16_t index = (_bits << index_shift) | b._bits;
153 					_bits = posit_3_1_division_lookup[index];
154 					return *this;
155 				}
operator ++()156 				posit& operator++() {
157 					_bits = (_bits + 1) & 0x07;
158 					return *this;
159 				}
operator ++(int)160 				posit operator++(int) {
161 					posit tmp(*this);
162 					operator++();
163 					return tmp;
164 				}
operator --()165 				posit& operator--() {
166 					_bits = (_bits - 1) & 0x07;
167 					return *this;
168 				}
operator --(int)169 				posit operator--(int) {
170 					posit tmp(*this);
171 					operator--();
172 					return tmp;
173 				}
reciprocate() const174 				posit reciprocate() const {
175 					posit p;
176 					p.setbits(posit_3_1_reciprocal_lookup[_bits & 0x07]);
177 					return p;
178 				}
179 				// SELECTORS
sign() const180 				inline bool sign() const { return (_bits & 0x4u); }
isnar() const181 				inline bool isnar() const { return (_bits == 0x4u); }
iszero() const182 				inline bool iszero() const { return (_bits == 0); }
isone() const183 				inline bool isone() const { // pattern 010....
184 					return (_bits == 0x2u);
185 				}
isminusone() const186 				inline bool isminusone() const { // pattern 110...
187 					return (_bits == 0x6u);
188 				}
isneg() const189 				inline bool isneg() const { return (_bits & 0x4u); }
ispos() const190 				inline bool ispos() const { return !isneg(); }
ispowerof2() const191 				inline bool ispowerof2() const { return !(_bits & 0x1u); }
192 
sign_value() const193 				inline int sign_value() const { return (_bits & 0x4u ? -1 : 1); }
194 
get() const195 				bitblock<NBITS_IS_3> get() const { bitblock<NBITS_IS_3> bb; bb = int(_bits); return bb; }
encoding() const196 				unsigned int encoding() const { return (unsigned int)(_bits & 0x7u); }
197 
clear()198 				inline void clear() { _bits = 0; }
setzero()199 				inline void setzero() { clear(); }
setnar()200 				inline void setnar() { _bits = 0x4u; }
201 
202 			private:
203 				uint8_t _bits;
204 
205 				// Conversion functions
206 #if POSIT_THROW_ARITHMETIC_EXCEPTION
to_int() const207 				int         to_int() const {
208 					if (iszero()) return 0;
209 					if (isnar()) throw posit_nar{};
210 					return int(to_float());
211 				}
to_long() const212 				long        to_long() const {
213 					if (iszero()) return 0;
214 					if (isnar()) throw posit_nar{};
215 					return long(to_double());
216 				}
to_long_long() const217 				long long   to_long_long() const {
218 					if (iszero()) return 0;
219 					if (isnar()) throw posit_nar{};
220 					return long(to_long_double());
221 				}
222 #else
to_int() const223 				int         to_int() const {
224 					if (iszero()) return 0;
225 					if (isnar())  return int(INFINITY);
226 					return int(to_float());
227 				}
to_long() const228 				long        to_long() const {
229 					if (iszero()) return 0;
230 					if (isnar())  return long(INFINITY);
231 					return long(to_double());
232 				}
to_long_long() const233 				long long   to_long_long() const {
234 					if (iszero()) return 0;
235 					if (isnar())  return (long long)(INFINITY);
236 					return long(to_long_double());
237 				}
238 #endif
to_float() const239 				float       to_float() const {
240 					return (float)to_double();
241 				}
to_double() const242 				double      to_double() const {
243 					if (iszero())	return 0.0;
244 					if (isnar())	return NAN;
245 					bool		     	 _sign;
246 					regime<nbits, es>    _regime;
247 					exponent<nbits, es>  _exponent;
248 					fraction<fbits>      _fraction;
249 					bitblock<nbits>		 _raw_bits;
250 					_raw_bits.reset();
251 					uint64_t mask = 1;
252 					for (size_t i = 0; i < nbits; i++) {
253 						_raw_bits.set(i, (_bits & mask));
254 						mask <<= 1;
255 					}
256 					decode(_raw_bits, _sign, _regime, _exponent, _fraction);
257 					double s = (_sign ? -1.0 : 1.0);
258 					double r = _regime.value();
259 					double e = _exponent.value();
260 					double f = (1.0 + _fraction.value());
261 					return s * r * e * f;
262 				}
to_long_double() const263 				long double to_long_double() const {
264 					if (iszero())  return 0.0;
265 					if (isnar())   return NAN;
266 					bool		     	 _sign;
267 					regime<nbits, es>    _regime;
268 					exponent<nbits, es>  _exponent;
269 					fraction<fbits>      _fraction;
270 					bitblock<nbits>		 _raw_bits;
271 					_raw_bits.reset();
272 					uint64_t mask = 1;
273 					for (size_t i = 0; i < nbits; i++) {
274 						_raw_bits.set(i, (_bits & mask));
275 						mask <<= 1;
276 					}
277 					decode(_raw_bits, _sign, _regime, _exponent, _fraction);
278 					long double s = (_sign ? -1.0 : 1.0);
279 					long double r = _regime.value();
280 					long double e = _exponent.value();
281 					long double f = (1.0 + _fraction.value());
282 					return s * r * e * f;
283 				}
284 
285 				template <typename T>
float_assign(const T & rhs)286 				posit& float_assign(const T& rhs) {
287 					constexpr int dfbits = std::numeric_limits<T>::digits - 1;
288 					internal::value<dfbits> v((T)rhs);
289 
290 					// special case processing
291 					if (v.iszero()) {
292 						setzero();
293 						return *this;
294 					}
295 					if (v.isinf() || v.isnan()) {  // posit encode for FP_INFINITE and NaN as NaR (Not a Real)
296 						setnar();
297 						return *this;
298 					}
299 
300 					if (rhs <= -0.5) {
301 						_bits = 0x2;   // value is -1, or -maxpos
302 					}
303 					else if (-0.5 < rhs && rhs < 0.5) {
304 						_bits = 0x0;   // value is 0
305 					}
306 					else if (rhs >= 0.5) {
307 						_bits = 0x1;   // value is 1, or maxpos
308 					}
309 					return *this;
310 				}
311 
312 				// I/O operators
313 				friend std::ostream& operator<< (std::ostream& ostr, const posit<NBITS_IS_3, 0>& p);
314 				friend std::istream& operator>> (std::istream& istr, posit<NBITS_IS_3, 0>& p);
315 
316 				// posit - posit logic functions
317 				friend bool operator==(const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs);
318 				friend bool operator!=(const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs);
319 				friend bool operator< (const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs);
320 				friend bool operator> (const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs);
321 				friend bool operator<=(const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs);
322 				friend bool operator>=(const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs);
323 
324 			};
325 
326 			// posit I/O operators
operator <<(std::ostream & ostr,const posit<NBITS_IS_3,ES_IS_1> & p)327 			inline std::ostream& operator<<(std::ostream& ostr, const posit<NBITS_IS_3, ES_IS_1>& p) {
328 				return ostr << NBITS_IS_3 << '.' << ES_IS_1 << 'x' << to_hex(p.get()) << 'p';
329 			}
330 
331 			// convert a posit value to a string using "nar" as designation of NaR
to_string(const posit<NBITS_IS_3,ES_IS_1> & p,std::streamsize precision)332 			inline std::string to_string(const posit<NBITS_IS_3, ES_IS_1>& p, std::streamsize precision) {
333 				if (p.isnar()) {
334 					return std::string("nar");
335 				}
336 				std::stringstream ss;
337 				ss << std::setprecision(precision) << float(p);
338 				return ss.str();
339 			}
340 
341 			// posit - posit binary logic operators
operator ==(const posit<NBITS_IS_3,ES_IS_1> & lhs,const posit<NBITS_IS_3,ES_IS_1> & rhs)342 			inline bool operator==(const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs) {
343 				return lhs._bits == rhs._bits;
344 			}
operator !=(const posit<NBITS_IS_3,ES_IS_1> & lhs,const posit<NBITS_IS_3,ES_IS_1> & rhs)345 			inline bool operator!=(const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs) {
346 				return !operator==(lhs, rhs);
347 			}
operator <(const posit<NBITS_IS_3,ES_IS_1> & lhs,const posit<NBITS_IS_3,ES_IS_1> & rhs)348 			inline bool operator< (const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs) {
349 				return lhs._bits < rhs._bits;
350 			}
operator >(const posit<NBITS_IS_3,ES_IS_1> & lhs,const posit<NBITS_IS_3,ES_IS_1> & rhs)351 			inline bool operator> (const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs) {
352 				return operator< (rhs, lhs);
353 			}
operator <=(const posit<NBITS_IS_3,ES_IS_1> & lhs,const posit<NBITS_IS_3,ES_IS_1> & rhs)354 			inline bool operator<=(const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs) {
355 				return operator< (lhs, rhs) || operator==(lhs, rhs);
356 			}
operator >=(const posit<NBITS_IS_3,ES_IS_1> & lhs,const posit<NBITS_IS_3,ES_IS_1> & rhs)357 			inline bool operator>=(const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs) {
358 				return !operator< (lhs, rhs);
359 			}
360 
operator +(const posit<NBITS_IS_3,ES_IS_1> & lhs,const posit<NBITS_IS_3,ES_IS_1> & rhs)361 			inline posit<NBITS_IS_3, ES_IS_1> operator+(const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs) {
362 				posit<NBITS_IS_3, ES_IS_1> sum = lhs;
363 				sum += rhs;
364 				return sum;
365 			}
366 #else  // POSIT_FAST_POSIT_3_1
367 // too verbose #pragma message("Standard posit<3,1>")
368 #	define POSIT_FAST_POSIT_3_1 0
369 #endif // POSIT_FAST_POSIT_3_1
370 
371 } // namespace sw::universal
372