1 #pragma once 2 // posit_3_1.hpp: specialized 3-bit posit using lookup table arithmetic 3 // 4 // Copyright (C) 2017-2021 Stillwater Supercomputing, Inc. 5 // 6 // This file is part of the universal numbers project, which is released under an MIT Open Source license. 7 8 // DO NOT USE DIRECTLY! 9 // the compile guards in this file are only valid in the context of the specialization logic 10 // configured in the main <universal/posit/posit> 11 12 #ifndef POSIT_FAST_POSIT_3_1 13 #define POSIT_FAST_POSIT_3_1 0 14 #endif 15 16 namespace sw::universal { 17 18 // set the fast specialization variable to indicate that we are running a special template specialization 19 #if POSIT_FAST_POSIT_3_1 20 #ifdef _MSC_VER 21 #pragma message("Fast specialization of posit<3,1>") 22 //#else 23 //#warning("Fast specialization of posit<3,1>") 24 #endif 25 26 constexpr uint8_t posit_3_1_addition_lookup[64] = { 27 0,1,0,3,1,1,0,3,2,0,2,3,3,3,3,3, 28 0,1,0,3,1,1,0,3,2,0,2,3,3,3,3,3, 29 0,1,0,3,1,1,0,3,2,0,2,3,3,3,3,3, 30 0,1,0,3,1,1,0,3,2,0,2,3,3,3,3,3, 31 }; 32 33 constexpr uint8_t posit_3_1_subtraction_lookup[64] = { 34 0,2,1,3,1,0,1,3,2,2,0,3,3,3,3,3, 35 0,2,1,3,1,0,1,3,2,2,0,3,3,3,3,3, 36 0,2,1,3,1,0,1,3,2,2,0,3,3,3,3,3, 37 0,2,1,3,1,0,1,3,2,2,0,3,3,3,3,3, 38 }; 39 40 constexpr uint8_t posit_3_1_multiplication_lookup[64] = { 41 0,0,0,3,1,1,2,3,0,2,1,3,3,3,3,3, 42 0,0,0,3,1,1,2,3,0,2,1,3,3,3,3,3, 43 0,0,0,3,1,1,2,3,0,2,1,3,3,3,3,3, 44 0,0,0,3,1,1,2,3,0,2,1,3,3,3,3,3, 45 }; 46 47 constexpr uint8_t posit_3_1_division_lookup[64] = { 48 3,0,0,3,3,1,2,3,3,2,1,3,3,3,3,3, 49 3,0,0,3,3,1,2,3,3,2,1,3,3,3,3,3, 50 3,0,0,3,3,1,2,3,3,2,1,3,3,3,3,3, 51 3,0,0,3,3,1,2,3,3,2,1,3,3,3,3,3, 52 }; 53 54 constexpr uint8_t posit_3_1_reciprocal_lookup[8] = { 55 3,1,2,3,3,1,2,3, 56 }; 57 58 template<> 59 class posit<NBITS_IS_3, ES_IS_1> { 60 public: 61 static constexpr size_t nbits = NBITS_IS_2; 62 static constexpr size_t es = ES_IS_1; 63 static constexpr size_t sbits = 1; 64 static constexpr size_t rbits = nbits - sbits; 65 static constexpr size_t ebits = 0; // <--- special case that needed this specialization 66 static constexpr size_t fbits = 0; 67 static constexpr size_t fhbits = fbits + 1; 68 static constexpr uint8_t index_shift = 3; 69 posit()70 posit() { _bits = 0; } 71 posit(const posit&) = default; 72 posit(posit&&) = default; 73 posit& operator=(const posit&) = default; 74 posit& operator=(posit&&) = default; 75 posit(int initial_value)76 posit(int initial_value) { _bits = uint8_t(initial_value & 0x07); } 77 // assignment operators for native types operator =(int rhs)78 posit& operator=(int rhs) { 79 return operator=((long long)(rhs)); 80 } operator =(long int rhs)81 posit& operator=(long int rhs) { 82 return operator=((long long)(rhs)); 83 } operator =(long long rhs)84 posit& operator=(long long rhs) { 85 // only valid integers are -1, 0, 1 86 _bits = 0x0; 87 if (rhs <= -1) { 88 _bits = 0x2; // value is -1, or -maxpos 89 } 90 else if (rhs == 0) { 91 _bits = 0x0; // value is 0 92 } 93 else if (1 <= rhs) { 94 _bits = 0x1; // value is 1, or maxpos 95 } 96 return *this; 97 } operator =(const float rhs)98 posit& operator=(const float rhs) { 99 return float_assign(rhs); 100 } operator =(const double rhs)101 posit& operator=(const double rhs) { 102 return float_assign(rhs); 103 } operator =(const long double rhs)104 posit& operator=(const long double rhs) { 105 return float_assign(rhs); 106 } 107 operator long double() const108 explicit operator long double() const { return to_long_double(); } operator double() const109 explicit operator double() const { return to_double(); } operator float() const110 explicit operator float() const { return to_float(); } operator long long() const111 explicit operator long long() const { return to_long_long(); } operator long() const112 explicit operator long() const { return to_long(); } operator int() const113 explicit operator int() const { return to_int(); } operator unsigned long long() const114 explicit operator unsigned long long() const { return to_long_long(); } operator unsigned long() const115 explicit operator unsigned long() const { return to_long(); } operator unsigned int() const116 explicit operator unsigned int() const { return to_int(); } 117 setBitblock(sw::universal::bitblock<NBITS_IS_3> & raw)118 posit& setBitblock(sw::universal::bitblock<NBITS_IS_3>& raw) { 119 _bits = uint8_t(raw.to_ulong()); 120 return *this; 121 } setbits(uint64_t value)122 posit& setbits(uint64_t value) { 123 _bits = uint8_t(value & 0x07); 124 return *this; 125 } operator -() const126 posit operator-() const { 127 if (iszero()) { 128 return *this; 129 } 130 if (isnar()) { 131 return *this; 132 } 133 posit p; 134 return p.setbits((~_bits) + 1); 135 } operator +=(const posit & b)136 posit& operator+=(const posit& b) { 137 uint16_t index = (_bits << index_shift) | b._bits; 138 _bits = posit_3_1_addition_lookup[index]; 139 return *this; 140 } operator -=(const posit & b)141 posit& operator-=(const posit& b) { 142 uint16_t index = (_bits << index_shift) | b._bits; 143 _bits = posit_3_1_subtraction_lookup[index]; 144 return *this; 145 } operator *=(const posit & b)146 posit& operator*=(const posit& b) { 147 uint16_t index = (_bits << index_shift) | b._bits; 148 _bits = posit_3_1_multiplication_lookup[index]; 149 return *this; 150 } operator /=(const posit & b)151 posit& operator/=(const posit& b) { 152 uint16_t index = (_bits << index_shift) | b._bits; 153 _bits = posit_3_1_division_lookup[index]; 154 return *this; 155 } operator ++()156 posit& operator++() { 157 _bits = (_bits + 1) & 0x07; 158 return *this; 159 } operator ++(int)160 posit operator++(int) { 161 posit tmp(*this); 162 operator++(); 163 return tmp; 164 } operator --()165 posit& operator--() { 166 _bits = (_bits - 1) & 0x07; 167 return *this; 168 } operator --(int)169 posit operator--(int) { 170 posit tmp(*this); 171 operator--(); 172 return tmp; 173 } reciprocate() const174 posit reciprocate() const { 175 posit p; 176 p.setbits(posit_3_1_reciprocal_lookup[_bits & 0x07]); 177 return p; 178 } 179 // SELECTORS sign() const180 inline bool sign() const { return (_bits & 0x4u); } isnar() const181 inline bool isnar() const { return (_bits == 0x4u); } iszero() const182 inline bool iszero() const { return (_bits == 0); } isone() const183 inline bool isone() const { // pattern 010.... 184 return (_bits == 0x2u); 185 } isminusone() const186 inline bool isminusone() const { // pattern 110... 187 return (_bits == 0x6u); 188 } isneg() const189 inline bool isneg() const { return (_bits & 0x4u); } ispos() const190 inline bool ispos() const { return !isneg(); } ispowerof2() const191 inline bool ispowerof2() const { return !(_bits & 0x1u); } 192 sign_value() const193 inline int sign_value() const { return (_bits & 0x4u ? -1 : 1); } 194 get() const195 bitblock<NBITS_IS_3> get() const { bitblock<NBITS_IS_3> bb; bb = int(_bits); return bb; } encoding() const196 unsigned int encoding() const { return (unsigned int)(_bits & 0x7u); } 197 clear()198 inline void clear() { _bits = 0; } setzero()199 inline void setzero() { clear(); } setnar()200 inline void setnar() { _bits = 0x4u; } 201 202 private: 203 uint8_t _bits; 204 205 // Conversion functions 206 #if POSIT_THROW_ARITHMETIC_EXCEPTION to_int() const207 int to_int() const { 208 if (iszero()) return 0; 209 if (isnar()) throw posit_nar{}; 210 return int(to_float()); 211 } to_long() const212 long to_long() const { 213 if (iszero()) return 0; 214 if (isnar()) throw posit_nar{}; 215 return long(to_double()); 216 } to_long_long() const217 long long to_long_long() const { 218 if (iszero()) return 0; 219 if (isnar()) throw posit_nar{}; 220 return long(to_long_double()); 221 } 222 #else to_int() const223 int to_int() const { 224 if (iszero()) return 0; 225 if (isnar()) return int(INFINITY); 226 return int(to_float()); 227 } to_long() const228 long to_long() const { 229 if (iszero()) return 0; 230 if (isnar()) return long(INFINITY); 231 return long(to_double()); 232 } to_long_long() const233 long long to_long_long() const { 234 if (iszero()) return 0; 235 if (isnar()) return (long long)(INFINITY); 236 return long(to_long_double()); 237 } 238 #endif to_float() const239 float to_float() const { 240 return (float)to_double(); 241 } to_double() const242 double to_double() const { 243 if (iszero()) return 0.0; 244 if (isnar()) return NAN; 245 bool _sign; 246 regime<nbits, es> _regime; 247 exponent<nbits, es> _exponent; 248 fraction<fbits> _fraction; 249 bitblock<nbits> _raw_bits; 250 _raw_bits.reset(); 251 uint64_t mask = 1; 252 for (size_t i = 0; i < nbits; i++) { 253 _raw_bits.set(i, (_bits & mask)); 254 mask <<= 1; 255 } 256 decode(_raw_bits, _sign, _regime, _exponent, _fraction); 257 double s = (_sign ? -1.0 : 1.0); 258 double r = _regime.value(); 259 double e = _exponent.value(); 260 double f = (1.0 + _fraction.value()); 261 return s * r * e * f; 262 } to_long_double() const263 long double to_long_double() const { 264 if (iszero()) return 0.0; 265 if (isnar()) return NAN; 266 bool _sign; 267 regime<nbits, es> _regime; 268 exponent<nbits, es> _exponent; 269 fraction<fbits> _fraction; 270 bitblock<nbits> _raw_bits; 271 _raw_bits.reset(); 272 uint64_t mask = 1; 273 for (size_t i = 0; i < nbits; i++) { 274 _raw_bits.set(i, (_bits & mask)); 275 mask <<= 1; 276 } 277 decode(_raw_bits, _sign, _regime, _exponent, _fraction); 278 long double s = (_sign ? -1.0 : 1.0); 279 long double r = _regime.value(); 280 long double e = _exponent.value(); 281 long double f = (1.0 + _fraction.value()); 282 return s * r * e * f; 283 } 284 285 template <typename T> float_assign(const T & rhs)286 posit& float_assign(const T& rhs) { 287 constexpr int dfbits = std::numeric_limits<T>::digits - 1; 288 internal::value<dfbits> v((T)rhs); 289 290 // special case processing 291 if (v.iszero()) { 292 setzero(); 293 return *this; 294 } 295 if (v.isinf() || v.isnan()) { // posit encode for FP_INFINITE and NaN as NaR (Not a Real) 296 setnar(); 297 return *this; 298 } 299 300 if (rhs <= -0.5) { 301 _bits = 0x2; // value is -1, or -maxpos 302 } 303 else if (-0.5 < rhs && rhs < 0.5) { 304 _bits = 0x0; // value is 0 305 } 306 else if (rhs >= 0.5) { 307 _bits = 0x1; // value is 1, or maxpos 308 } 309 return *this; 310 } 311 312 // I/O operators 313 friend std::ostream& operator<< (std::ostream& ostr, const posit<NBITS_IS_3, 0>& p); 314 friend std::istream& operator>> (std::istream& istr, posit<NBITS_IS_3, 0>& p); 315 316 // posit - posit logic functions 317 friend bool operator==(const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs); 318 friend bool operator!=(const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs); 319 friend bool operator< (const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs); 320 friend bool operator> (const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs); 321 friend bool operator<=(const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs); 322 friend bool operator>=(const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs); 323 324 }; 325 326 // posit I/O operators operator <<(std::ostream & ostr,const posit<NBITS_IS_3,ES_IS_1> & p)327 inline std::ostream& operator<<(std::ostream& ostr, const posit<NBITS_IS_3, ES_IS_1>& p) { 328 return ostr << NBITS_IS_3 << '.' << ES_IS_1 << 'x' << to_hex(p.get()) << 'p'; 329 } 330 331 // convert a posit value to a string using "nar" as designation of NaR to_string(const posit<NBITS_IS_3,ES_IS_1> & p,std::streamsize precision)332 inline std::string to_string(const posit<NBITS_IS_3, ES_IS_1>& p, std::streamsize precision) { 333 if (p.isnar()) { 334 return std::string("nar"); 335 } 336 std::stringstream ss; 337 ss << std::setprecision(precision) << float(p); 338 return ss.str(); 339 } 340 341 // posit - posit binary logic operators operator ==(const posit<NBITS_IS_3,ES_IS_1> & lhs,const posit<NBITS_IS_3,ES_IS_1> & rhs)342 inline bool operator==(const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs) { 343 return lhs._bits == rhs._bits; 344 } operator !=(const posit<NBITS_IS_3,ES_IS_1> & lhs,const posit<NBITS_IS_3,ES_IS_1> & rhs)345 inline bool operator!=(const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs) { 346 return !operator==(lhs, rhs); 347 } operator <(const posit<NBITS_IS_3,ES_IS_1> & lhs,const posit<NBITS_IS_3,ES_IS_1> & rhs)348 inline bool operator< (const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs) { 349 return lhs._bits < rhs._bits; 350 } operator >(const posit<NBITS_IS_3,ES_IS_1> & lhs,const posit<NBITS_IS_3,ES_IS_1> & rhs)351 inline bool operator> (const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs) { 352 return operator< (rhs, lhs); 353 } operator <=(const posit<NBITS_IS_3,ES_IS_1> & lhs,const posit<NBITS_IS_3,ES_IS_1> & rhs)354 inline bool operator<=(const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs) { 355 return operator< (lhs, rhs) || operator==(lhs, rhs); 356 } operator >=(const posit<NBITS_IS_3,ES_IS_1> & lhs,const posit<NBITS_IS_3,ES_IS_1> & rhs)357 inline bool operator>=(const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs) { 358 return !operator< (lhs, rhs); 359 } 360 operator +(const posit<NBITS_IS_3,ES_IS_1> & lhs,const posit<NBITS_IS_3,ES_IS_1> & rhs)361 inline posit<NBITS_IS_3, ES_IS_1> operator+(const posit<NBITS_IS_3, ES_IS_1>& lhs, const posit<NBITS_IS_3, ES_IS_1>& rhs) { 362 posit<NBITS_IS_3, ES_IS_1> sum = lhs; 363 sum += rhs; 364 return sum; 365 } 366 #else // POSIT_FAST_POSIT_3_1 367 // too verbose #pragma message("Standard posit<3,1>") 368 # define POSIT_FAST_POSIT_3_1 0 369 #endif // POSIT_FAST_POSIT_3_1 370 371 } // namespace sw::universal 372