1*f0fbc68bSmrg // A relatively minimal unsigned 128-bit integer class type, used by the 2*f0fbc68bSmrg // floating-point std::to_chars implementation on targets that lack __int128. 3*f0fbc68bSmrg 4*f0fbc68bSmrg // Copyright (C) 2021-2022 Free Software Foundation, Inc. 5*f0fbc68bSmrg // 6*f0fbc68bSmrg // This file is part of the GNU ISO C++ Library. This library is free 7*f0fbc68bSmrg // software; you can redistribute it and/or modify it under the 8*f0fbc68bSmrg // terms of the GNU General Public License as published by the 9*f0fbc68bSmrg // Free Software Foundation; either version 3, or (at your option) 10*f0fbc68bSmrg // any later version. 11*f0fbc68bSmrg 12*f0fbc68bSmrg // This library is distributed in the hope that it will be useful, 13*f0fbc68bSmrg // but WITHOUT ANY WARRANTY; without even the implied warranty of 14*f0fbc68bSmrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15*f0fbc68bSmrg // GNU General Public License for more details. 16*f0fbc68bSmrg 17*f0fbc68bSmrg // Under Section 7 of GPL version 3, you are granted additional 18*f0fbc68bSmrg // permissions described in the GCC Runtime Library Exception, version 19*f0fbc68bSmrg // 3.1, as published by the Free Software Foundation. 20*f0fbc68bSmrg 21*f0fbc68bSmrg // You should have received a copy of the GNU General Public License and 22*f0fbc68bSmrg // a copy of the GCC Runtime Library Exception along with this program; 23*f0fbc68bSmrg // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24*f0fbc68bSmrg // <http://www.gnu.org/licenses/>. 25*f0fbc68bSmrg 26*f0fbc68bSmrg struct uint128_t 27*f0fbc68bSmrg { 28*f0fbc68bSmrg #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 29*f0fbc68bSmrg uint64_t lo, hi; 30*f0fbc68bSmrg #else 31*f0fbc68bSmrg uint64_t hi, lo; 32*f0fbc68bSmrg #endif 33*f0fbc68bSmrg 34*f0fbc68bSmrg uint128_t() = default; 35*f0fbc68bSmrg 36*f0fbc68bSmrg constexpr 37*f0fbc68bSmrg uint128_t(uint64_t lo, uint64_t hi = 0) 38*f0fbc68bSmrg #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ louint128_t39*f0fbc68bSmrg : lo(lo), hi(hi) 40*f0fbc68bSmrg #else 41*f0fbc68bSmrg : hi(hi), lo(lo) 42*f0fbc68bSmrg #endif 43*f0fbc68bSmrg { } 44*f0fbc68bSmrg 45*f0fbc68bSmrg constexpr explicit 46*f0fbc68bSmrg operator bool() const 47*f0fbc68bSmrg { return *this != 0; } 48*f0fbc68bSmrg 49*f0fbc68bSmrg template<typename T, typename = std::enable_if_t<std::is_integral_v<T>>> 50*f0fbc68bSmrg constexpr explicit Tuint128_t51*f0fbc68bSmrg operator T() const 52*f0fbc68bSmrg { 53*f0fbc68bSmrg static_assert(sizeof(T) <= sizeof(uint64_t)); 54*f0fbc68bSmrg return static_cast<T>(lo); 55*f0fbc68bSmrg } 56*f0fbc68bSmrg 57*f0fbc68bSmrg friend constexpr uint128_t 58*f0fbc68bSmrg operator&(uint128_t x, const uint128_t y) 59*f0fbc68bSmrg { 60*f0fbc68bSmrg x.lo &= y.lo; 61*f0fbc68bSmrg x.hi &= y.hi; 62*f0fbc68bSmrg return x; 63*f0fbc68bSmrg } 64*f0fbc68bSmrg 65*f0fbc68bSmrg friend constexpr uint128_t 66*f0fbc68bSmrg operator|(uint128_t x, const uint128_t y) 67*f0fbc68bSmrg { 68*f0fbc68bSmrg x.lo |= y.lo; 69*f0fbc68bSmrg x.hi |= y.hi; 70*f0fbc68bSmrg return x; 71*f0fbc68bSmrg } 72*f0fbc68bSmrg 73*f0fbc68bSmrg friend constexpr uint128_t 74*f0fbc68bSmrg operator<<(uint128_t x, const uint128_t y) 75*f0fbc68bSmrg { 76*f0fbc68bSmrg __glibcxx_assert(y < 128); 77*f0fbc68bSmrg // TODO: Convince GCC to use shldq on x86 here. 78*f0fbc68bSmrg if (y.lo >= 64) 79*f0fbc68bSmrg { 80*f0fbc68bSmrg x.hi = x.lo << (y.lo - 64); 81*f0fbc68bSmrg x.lo = 0; 82*f0fbc68bSmrg } 83*f0fbc68bSmrg else if (y.lo != 0) 84*f0fbc68bSmrg { 85*f0fbc68bSmrg x.hi <<= y.lo; 86*f0fbc68bSmrg x.hi |= x.lo >> (64 - y.lo); 87*f0fbc68bSmrg x.lo <<= y.lo; 88*f0fbc68bSmrg } 89*f0fbc68bSmrg return x; 90*f0fbc68bSmrg } 91*f0fbc68bSmrg 92*f0fbc68bSmrg friend constexpr uint128_t 93*f0fbc68bSmrg operator>>(uint128_t x, const uint128_t y) 94*f0fbc68bSmrg { 95*f0fbc68bSmrg __glibcxx_assert(y < 128); 96*f0fbc68bSmrg // TODO: Convince GCC to use shrdq on x86 here. 97*f0fbc68bSmrg if (y.lo >= 64) 98*f0fbc68bSmrg { 99*f0fbc68bSmrg x.lo = x.hi >> (y.lo - 64); 100*f0fbc68bSmrg x.hi = 0; 101*f0fbc68bSmrg } 102*f0fbc68bSmrg else if (y.lo != 0) 103*f0fbc68bSmrg { 104*f0fbc68bSmrg x.lo >>= y.lo; 105*f0fbc68bSmrg x.lo |= x.hi << (64 - y.lo); 106*f0fbc68bSmrg x.hi >>= y.lo; 107*f0fbc68bSmrg } 108*f0fbc68bSmrg return x; 109*f0fbc68bSmrg } 110*f0fbc68bSmrg 111*f0fbc68bSmrg constexpr uint128_t 112*f0fbc68bSmrg operator~() const 113*f0fbc68bSmrg { return {~lo, ~hi}; } 114*f0fbc68bSmrg 115*f0fbc68bSmrg constexpr uint128_t 116*f0fbc68bSmrg operator-() const 117*f0fbc68bSmrg { return operator~() + 1; } 118*f0fbc68bSmrg 119*f0fbc68bSmrg friend constexpr uint128_t 120*f0fbc68bSmrg operator+(uint128_t x, const uint128_t y) 121*f0fbc68bSmrg { 122*f0fbc68bSmrg x.hi += __builtin_add_overflow(x.lo, y.lo, &x.lo); 123*f0fbc68bSmrg x.hi += y.hi; 124*f0fbc68bSmrg return x; 125*f0fbc68bSmrg } 126*f0fbc68bSmrg 127*f0fbc68bSmrg friend constexpr uint128_t 128*f0fbc68bSmrg operator-(uint128_t x, const uint128_t y) 129*f0fbc68bSmrg { 130*f0fbc68bSmrg x.hi -= __builtin_sub_overflow(x.lo, y.lo, &x.lo); 131*f0fbc68bSmrg x.hi -= y.hi; 132*f0fbc68bSmrg return x; 133*f0fbc68bSmrg } 134*f0fbc68bSmrg 135*f0fbc68bSmrg static constexpr uint128_t umul64_64_128uint128_t136*f0fbc68bSmrg umul64_64_128(const uint64_t x, const uint64_t y) 137*f0fbc68bSmrg { 138*f0fbc68bSmrg const uint64_t xl = x & 0xffffffff; 139*f0fbc68bSmrg const uint64_t xh = x >> 32; 140*f0fbc68bSmrg const uint64_t yl = y & 0xffffffff; 141*f0fbc68bSmrg const uint64_t yh = y >> 32; 142*f0fbc68bSmrg const uint64_t ll = xl * yl; 143*f0fbc68bSmrg const uint64_t lh = xl * yh; 144*f0fbc68bSmrg const uint64_t hl = xh * yl; 145*f0fbc68bSmrg const uint64_t hh = xh * yh; 146*f0fbc68bSmrg const uint64_t m = (ll >> 32) + lh + (hl & 0xffffffff); 147*f0fbc68bSmrg const uint64_t l = (ll & 0xffffffff ) | (m << 32); 148*f0fbc68bSmrg const uint64_t h = (m >> 32) + (hl >> 32) + hh; 149*f0fbc68bSmrg return {l, h}; 150*f0fbc68bSmrg } 151*f0fbc68bSmrg 152*f0fbc68bSmrg friend constexpr uint128_t 153*f0fbc68bSmrg operator*(const uint128_t x, const uint128_t y) 154*f0fbc68bSmrg { 155*f0fbc68bSmrg uint128_t z = umul64_64_128(x.lo, y.lo); 156*f0fbc68bSmrg z.hi += x.lo * y.hi + x.hi * y.lo; 157*f0fbc68bSmrg return z; 158*f0fbc68bSmrg } 159*f0fbc68bSmrg 160*f0fbc68bSmrg friend constexpr uint128_t 161*f0fbc68bSmrg operator/(const uint128_t x, const uint128_t y) 162*f0fbc68bSmrg { 163*f0fbc68bSmrg // Ryu performs 128-bit division only by 5 and 10, so that's what we 164*f0fbc68bSmrg // implement. The strategy here is to relate division of x with that of 165*f0fbc68bSmrg // x.hi and x.lo separately. 166*f0fbc68bSmrg __glibcxx_assert(y == 5 || y == 10); 167*f0fbc68bSmrg // The following implements division by 5 and 10. In either case, we 168*f0fbc68bSmrg // first compute division by 5: 169*f0fbc68bSmrg // x/5 = (x.hi*2^64 + x.lo)/5 170*f0fbc68bSmrg // = (x.hi*(2^64-1) + x.hi + x.lo)/5 171*f0fbc68bSmrg // = x.hi*((2^64-1)/5) + (x.hi + x.lo)/5 since CST=(2^64-1)/5 is exact 172*f0fbc68bSmrg // = x.hi*CST + x.hi/5 + x.lo/5 + ((x.lo%5) + (x.hi%5) >= 5) 173*f0fbc68bSmrg // We go a step further and replace the last adjustment term with a 174*f0fbc68bSmrg // lookup table, which we encode as a binary literal. This seems to 175*f0fbc68bSmrg // yield smaller code on x86 at least. 176*f0fbc68bSmrg constexpr auto cst = ~uint64_t(0) / 5; 177*f0fbc68bSmrg uint128_t q = uint128_t{x.hi}*cst + uint128_t{x.hi/5 + x.lo/5}; 178*f0fbc68bSmrg constexpr auto lookup = 0b111100000u; 179*f0fbc68bSmrg q += (lookup >> ((x.hi % 5) + (x.lo % 5))) & 1; 180*f0fbc68bSmrg if (y == 10) 181*f0fbc68bSmrg q >>= 1; 182*f0fbc68bSmrg return q; 183*f0fbc68bSmrg } 184*f0fbc68bSmrg 185*f0fbc68bSmrg friend constexpr uint128_t 186*f0fbc68bSmrg operator%(const uint128_t x, const uint128_t y) 187*f0fbc68bSmrg { 188*f0fbc68bSmrg // Ryu performs 128-bit modulus only by 2, 5 and 10, so that's what we 189*f0fbc68bSmrg // implement. The strategy here is to relate modulus of x with that of 190*f0fbc68bSmrg // x.hi and x.lo separately. 191*f0fbc68bSmrg if (y == 2) 192*f0fbc68bSmrg return x & 1; 193*f0fbc68bSmrg __glibcxx_assert(y == 5 || y == 10); 194*f0fbc68bSmrg // The following implements modulus by 5 and 10. In either case, 195*f0fbc68bSmrg // we first compute modulus by 5: 196*f0fbc68bSmrg // x (mod 5) = x.hi*2^64 + x.lo (mod 5) 197*f0fbc68bSmrg // = x.hi + x.lo (mod 5) since 2^64 ≡ 1 (mod 5) 198*f0fbc68bSmrg // So the straightforward implementation would be 199*f0fbc68bSmrg // ((x.hi % 5) + (x.lo % 5)) % 5 200*f0fbc68bSmrg // But we go a step further and replace the outermost % with a 201*f0fbc68bSmrg // lookup table: 202*f0fbc68bSmrg // = {0,1,2,3,4,0,1,2,3}[(x.hi % 5) + (x.lo % 5)] (mod 5) 203*f0fbc68bSmrg // which we encode as an octal literal. 204*f0fbc68bSmrg constexpr auto lookup = 0321043210u; 205*f0fbc68bSmrg auto r = (lookup >> 3*((x.hi % 5) + (x.lo % 5))) & 7; 206*f0fbc68bSmrg if (y == 10) 207*f0fbc68bSmrg // x % 10 = (x % 5) if x / 5 is even 208*f0fbc68bSmrg // (x % 5) + 5 if x / 5 is odd 209*f0fbc68bSmrg // The compiler should be able to CSE the below computation of x/5 and 210*f0fbc68bSmrg // the above modulus operations with a nearby inlined computation of x/10. 211*f0fbc68bSmrg r += 5 * ((x/5).lo & 1); 212*f0fbc68bSmrg return r; 213*f0fbc68bSmrg } 214*f0fbc68bSmrg 215*f0fbc68bSmrg friend constexpr bool 216*f0fbc68bSmrg operator==(const uint128_t x, const uint128_t y) 217*f0fbc68bSmrg { return x.hi == y.hi && x.lo == y.lo; } 218*f0fbc68bSmrg 219*f0fbc68bSmrg friend constexpr bool 220*f0fbc68bSmrg operator<(const uint128_t x, const uint128_t y) 221*f0fbc68bSmrg { return x.hi < y.hi || (x.hi == y.hi && x.lo < y.lo); } 222*f0fbc68bSmrg 223*f0fbc68bSmrg friend constexpr auto __bit_widthuint128_t224*f0fbc68bSmrg __bit_width(const uint128_t x) 225*f0fbc68bSmrg { 226*f0fbc68bSmrg if (auto w = std::__bit_width(x.hi)) 227*f0fbc68bSmrg return w + 64; 228*f0fbc68bSmrg else 229*f0fbc68bSmrg return std::__bit_width(x.lo); 230*f0fbc68bSmrg } 231*f0fbc68bSmrg 232*f0fbc68bSmrg friend constexpr auto __countr_zerouint128_t233*f0fbc68bSmrg __countr_zero(const uint128_t x) 234*f0fbc68bSmrg { 235*f0fbc68bSmrg auto c = std::__countr_zero(x.lo); 236*f0fbc68bSmrg if (c == 64) 237*f0fbc68bSmrg return 64 + std::__countr_zero(x.hi); 238*f0fbc68bSmrg else 239*f0fbc68bSmrg return c; 240*f0fbc68bSmrg } 241*f0fbc68bSmrg 242*f0fbc68bSmrg constexpr uint128_t& 243*f0fbc68bSmrg operator--() 244*f0fbc68bSmrg { return *this -= 1; } 245*f0fbc68bSmrg 246*f0fbc68bSmrg constexpr uint128_t& 247*f0fbc68bSmrg operator++() 248*f0fbc68bSmrg { return *this += 1; } 249*f0fbc68bSmrg 250*f0fbc68bSmrg constexpr uint128_t& 251*f0fbc68bSmrg operator+=(const uint128_t y) 252*f0fbc68bSmrg { return *this = *this + y; } 253*f0fbc68bSmrg 254*f0fbc68bSmrg constexpr uint128_t& 255*f0fbc68bSmrg operator-=(const uint128_t y) 256*f0fbc68bSmrg { return *this = *this - y; } 257*f0fbc68bSmrg 258*f0fbc68bSmrg constexpr uint128_t& 259*f0fbc68bSmrg operator*=(const uint128_t y) 260*f0fbc68bSmrg { return *this = *this * y; } 261*f0fbc68bSmrg 262*f0fbc68bSmrg constexpr uint128_t& 263*f0fbc68bSmrg operator<<=(const uint128_t y) 264*f0fbc68bSmrg { return *this = *this << y; } 265*f0fbc68bSmrg 266*f0fbc68bSmrg constexpr uint128_t& 267*f0fbc68bSmrg operator>>=(const uint128_t y) 268*f0fbc68bSmrg { return *this = *this >> y; } 269*f0fbc68bSmrg 270*f0fbc68bSmrg constexpr uint128_t& 271*f0fbc68bSmrg operator|=(const uint128_t y) 272*f0fbc68bSmrg { return *this = *this | y; } 273*f0fbc68bSmrg 274*f0fbc68bSmrg constexpr uint128_t& 275*f0fbc68bSmrg operator&=(const uint128_t y) 276*f0fbc68bSmrg { return *this = *this & y; } 277*f0fbc68bSmrg 278*f0fbc68bSmrg constexpr uint128_t& 279*f0fbc68bSmrg operator%=(const uint128_t y) 280*f0fbc68bSmrg { return *this = *this % y; } 281*f0fbc68bSmrg 282*f0fbc68bSmrg constexpr uint128_t& 283*f0fbc68bSmrg operator/=(const uint128_t y) 284*f0fbc68bSmrg { return *this = *this / y; } 285*f0fbc68bSmrg 286*f0fbc68bSmrg friend constexpr bool 287*f0fbc68bSmrg operator!=(const uint128_t x, const uint128_t y) 288*f0fbc68bSmrg { return !(x == y); } 289*f0fbc68bSmrg 290*f0fbc68bSmrg friend constexpr bool 291*f0fbc68bSmrg operator>(const uint128_t x, const uint128_t y) 292*f0fbc68bSmrg { return y < x; } 293*f0fbc68bSmrg 294*f0fbc68bSmrg friend constexpr bool 295*f0fbc68bSmrg operator>=(const uint128_t x, const uint128_t y) 296*f0fbc68bSmrg { return !(x < y); } 297*f0fbc68bSmrg }; 298