src/c%2B%2B17/uint128_t.h

*f0fbc68bSmrg// A relatively minimal unsigned 128-bit integer class type, used by the
*f0fbc68bSmrg// floating-point std::to_chars implementation on targets that lack __int128.
*f0fbc68bSmrg
*f0fbc68bSmrg// Copyright (C) 2021-2022 Free Software Foundation, Inc.
*f0fbc68bSmrg//
*f0fbc68bSmrg// This file is part of the GNU ISO C++ Library.  This library is free
*f0fbc68bSmrg// software; you can redistribute it and/or modify it under the
*f0fbc68bSmrg// terms of the GNU General Public License as published by the
*f0fbc68bSmrg// Free Software Foundation; either version 3, or (at your option)
*f0fbc68bSmrg// any later version.
*f0fbc68bSmrg
*f0fbc68bSmrg// This library is distributed in the hope that it will be useful,
*f0fbc68bSmrg// but WITHOUT ANY WARRANTY; without even the implied warranty of
*f0fbc68bSmrg// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
*f0fbc68bSmrg// GNU General Public License for more details.
*f0fbc68bSmrg
*f0fbc68bSmrg// Under Section 7 of GPL version 3, you are granted additional
*f0fbc68bSmrg// permissions described in the GCC Runtime Library Exception, version
*f0fbc68bSmrg// 3.1, as published by the Free Software Foundation.
*f0fbc68bSmrg
*f0fbc68bSmrg// You should have received a copy of the GNU General Public License and
*f0fbc68bSmrg// a copy of the GCC Runtime Library Exception along with this program;
*f0fbc68bSmrg// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
*f0fbc68bSmrg// <http://www.gnu.org/licenses/>.
*f0fbc68bSmrg
*f0fbc68bSmrgstruct uint128_t
*f0fbc68bSmrg{
*f0fbc68bSmrg#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
*f0fbc68bSmrg  uint64_t lo, hi;
*f0fbc68bSmrg#else
*f0fbc68bSmrg  uint64_t hi, lo;
*f0fbc68bSmrg#endif
*f0fbc68bSmrg
*f0fbc68bSmrg  uint128_t() = default;
*f0fbc68bSmrg
*f0fbc68bSmrg  constexpr
*f0fbc68bSmrg  uint128_t(uint64_t lo, uint64_t hi = 0)
*f0fbc68bSmrg#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
*f0fbc68bSmrg    : lo(lo), hi(hi)
*f0fbc68bSmrg#else
*f0fbc68bSmrg    : hi(hi), lo(lo)
*f0fbc68bSmrg#endif
*f0fbc68bSmrg  { }
*f0fbc68bSmrg
*f0fbc68bSmrg  constexpr explicit
*f0fbc68bSmrg  operator bool() const
*f0fbc68bSmrg  { return *this != 0; }
*f0fbc68bSmrg
*f0fbc68bSmrg  template<typename T, typename = std::enable_if_t<std::is_integral_v<T>>>
*f0fbc68bSmrg    constexpr explicit
*f0fbc68bSmrg    operator T() const
*f0fbc68bSmrg    {
*f0fbc68bSmrg      static_assert(sizeof(T) <= sizeof(uint64_t));
*f0fbc68bSmrg      return static_cast<T>(lo);
*f0fbc68bSmrg    }
*f0fbc68bSmrg
*f0fbc68bSmrg  friend constexpr uint128_t
*f0fbc68bSmrg  operator&(uint128_t x, const uint128_t y)
*f0fbc68bSmrg  {
*f0fbc68bSmrg    x.lo &= y.lo;
*f0fbc68bSmrg    x.hi &= y.hi;
*f0fbc68bSmrg    return x;
*f0fbc68bSmrg  }
*f0fbc68bSmrg
*f0fbc68bSmrg  friend constexpr uint128_t
*f0fbc68bSmrg  operator|(uint128_t x, const uint128_t y)
*f0fbc68bSmrg  {
*f0fbc68bSmrg    x.lo |= y.lo;
*f0fbc68bSmrg    x.hi |= y.hi;
*f0fbc68bSmrg    return x;
*f0fbc68bSmrg  }
*f0fbc68bSmrg
*f0fbc68bSmrg  friend constexpr uint128_t
*f0fbc68bSmrg  operator<<(uint128_t x, const uint128_t y)
*f0fbc68bSmrg  {
*f0fbc68bSmrg    __glibcxx_assert(y < 128);
*f0fbc68bSmrg    // TODO: Convince GCC to use shldq on x86 here.
*f0fbc68bSmrg    if (y.lo >= 64)
*f0fbc68bSmrg      {
*f0fbc68bSmrg	x.hi = x.lo << (y.lo - 64);
*f0fbc68bSmrg	x.lo = 0;
*f0fbc68bSmrg      }
*f0fbc68bSmrg    else if (y.lo != 0)
*f0fbc68bSmrg      {
*f0fbc68bSmrg	x.hi <<= y.lo;
*f0fbc68bSmrg	x.hi |= x.lo >> (64 - y.lo);
*f0fbc68bSmrg	x.lo <<= y.lo;
*f0fbc68bSmrg      }
*f0fbc68bSmrg    return x;
*f0fbc68bSmrg  }
*f0fbc68bSmrg
*f0fbc68bSmrg  friend constexpr uint128_t
*f0fbc68bSmrg  operator>>(uint128_t x, const uint128_t y)
*f0fbc68bSmrg  {
*f0fbc68bSmrg    __glibcxx_assert(y < 128);
*f0fbc68bSmrg    // TODO: Convince GCC to use shrdq on x86 here.
*f0fbc68bSmrg    if (y.lo >= 64)
*f0fbc68bSmrg      {
*f0fbc68bSmrg	x.lo = x.hi >> (y.lo - 64);
*f0fbc68bSmrg	x.hi = 0;
*f0fbc68bSmrg      }
*f0fbc68bSmrg    else if (y.lo != 0)
*f0fbc68bSmrg      {
*f0fbc68bSmrg	x.lo >>= y.lo;
*f0fbc68bSmrg	x.lo |= x.hi << (64 - y.lo);
*f0fbc68bSmrg	x.hi >>= y.lo;
*f0fbc68bSmrg      }
*f0fbc68bSmrg    return x;
*f0fbc68bSmrg  }
*f0fbc68bSmrg
*f0fbc68bSmrg  constexpr uint128_t
*f0fbc68bSmrg  operator~() const
*f0fbc68bSmrg  { return {~lo, ~hi}; }
*f0fbc68bSmrg
*f0fbc68bSmrg  constexpr uint128_t
*f0fbc68bSmrg  operator-() const
*f0fbc68bSmrg  { return operator~() + 1; }
*f0fbc68bSmrg
*f0fbc68bSmrg  friend constexpr uint128_t
*f0fbc68bSmrg  operator+(uint128_t x, const uint128_t y)
*f0fbc68bSmrg  {
*f0fbc68bSmrg    x.hi += __builtin_add_overflow(x.lo, y.lo, &x.lo);
*f0fbc68bSmrg    x.hi += y.hi;
*f0fbc68bSmrg    return x;
*f0fbc68bSmrg  }
*f0fbc68bSmrg
*f0fbc68bSmrg  friend constexpr uint128_t
*f0fbc68bSmrg  operator-(uint128_t x, const uint128_t y)
*f0fbc68bSmrg  {
*f0fbc68bSmrg    x.hi -= __builtin_sub_overflow(x.lo, y.lo, &x.lo);
*f0fbc68bSmrg    x.hi -= y.hi;
*f0fbc68bSmrg    return x;
*f0fbc68bSmrg  }
*f0fbc68bSmrg
*f0fbc68bSmrg  static constexpr uint128_t
*f0fbc68bSmrg  umul64_64_128(const uint64_t x, const uint64_t y)
*f0fbc68bSmrg  {
*f0fbc68bSmrg    const uint64_t xl = x & 0xffffffff;
*f0fbc68bSmrg    const uint64_t xh = x >> 32;
*f0fbc68bSmrg    const uint64_t yl = y & 0xffffffff;
*f0fbc68bSmrg    const uint64_t yh = y >> 32;
*f0fbc68bSmrg    const uint64_t ll = xl * yl;
*f0fbc68bSmrg    const uint64_t lh = xl * yh;
*f0fbc68bSmrg    const uint64_t hl = xh * yl;
*f0fbc68bSmrg    const uint64_t hh = xh * yh;
*f0fbc68bSmrg    const uint64_t m = (ll >> 32) + lh + (hl & 0xffffffff);
*f0fbc68bSmrg    const uint64_t l = (ll & 0xffffffff ) | (m << 32);
*f0fbc68bSmrg    const uint64_t h = (m >> 32) + (hl >> 32) + hh;
*f0fbc68bSmrg    return {l, h};
*f0fbc68bSmrg  }
*f0fbc68bSmrg
*f0fbc68bSmrg  friend constexpr uint128_t
*f0fbc68bSmrg  operator*(const uint128_t x, const uint128_t y)
*f0fbc68bSmrg  {
*f0fbc68bSmrg    uint128_t z = umul64_64_128(x.lo, y.lo);
*f0fbc68bSmrg    z.hi += x.lo * y.hi + x.hi * y.lo;
*f0fbc68bSmrg    return z;
*f0fbc68bSmrg  }
*f0fbc68bSmrg
*f0fbc68bSmrg  friend constexpr uint128_t
*f0fbc68bSmrg  operator/(const uint128_t x, const uint128_t y)
*f0fbc68bSmrg  {
*f0fbc68bSmrg    // Ryu performs 128-bit division only by 5 and 10, so that's what we
*f0fbc68bSmrg    // implement.  The strategy here is to relate division of x with that of
*f0fbc68bSmrg    // x.hi and x.lo separately.
*f0fbc68bSmrg    __glibcxx_assert(y == 5 || y == 10);
*f0fbc68bSmrg    // The following implements division by 5 and 10.  In either case, we
*f0fbc68bSmrg    // first compute division by 5:
*f0fbc68bSmrg    //   x/5 = (x.hi*2^64 + x.lo)/5
*f0fbc68bSmrg    //       = (x.hi*(2^64-1) + x.hi + x.lo)/5
*f0fbc68bSmrg    //       = x.hi*((2^64-1)/5) + (x.hi + x.lo)/5 since CST=(2^64-1)/5 is exact
*f0fbc68bSmrg    //       = x.hi*CST + x.hi/5 + x.lo/5 + ((x.lo%5) + (x.hi%5) >= 5)
*f0fbc68bSmrg    // We go a step further and replace the last adjustment term with a
*f0fbc68bSmrg    // lookup table, which we encode as a binary literal.  This seems to
*f0fbc68bSmrg    // yield smaller code on x86 at least.
*f0fbc68bSmrg    constexpr auto cst = ~uint64_t(0) / 5;
*f0fbc68bSmrg    uint128_t q = uint128_t{x.hi}*cst + uint128_t{x.hi/5 + x.lo/5};
*f0fbc68bSmrg    constexpr auto lookup = 0b111100000u;
*f0fbc68bSmrg    q += (lookup >> ((x.hi % 5) + (x.lo % 5))) & 1;
*f0fbc68bSmrg    if (y == 10)
*f0fbc68bSmrg      q >>= 1;
*f0fbc68bSmrg    return q;
*f0fbc68bSmrg  }
*f0fbc68bSmrg
*f0fbc68bSmrg  friend constexpr uint128_t
*f0fbc68bSmrg  operator%(const uint128_t x, const uint128_t y)
*f0fbc68bSmrg  {
*f0fbc68bSmrg    // Ryu performs 128-bit modulus only by 2, 5 and 10, so that's what we
*f0fbc68bSmrg    // implement.  The strategy here is to relate modulus of x with that of
*f0fbc68bSmrg    // x.hi and x.lo separately.
*f0fbc68bSmrg    if (y == 2)
*f0fbc68bSmrg      return x & 1;
*f0fbc68bSmrg    __glibcxx_assert(y == 5 || y == 10);
*f0fbc68bSmrg    // The following implements modulus by 5 and 10.  In either case,
*f0fbc68bSmrg    // we first compute modulus by 5:
*f0fbc68bSmrg    //   x (mod 5) = x.hi*2^64 + x.lo (mod 5)
*f0fbc68bSmrg    //             = x.hi + x.lo (mod 5) since 2^64 ≡ 1 (mod 5)
*f0fbc68bSmrg    // So the straightforward implementation would be
*f0fbc68bSmrg    //   ((x.hi % 5) + (x.lo % 5)) % 5
*f0fbc68bSmrg    // But we go a step further and replace the outermost % with a
*f0fbc68bSmrg    // lookup table:
*f0fbc68bSmrg    //             = {0,1,2,3,4,0,1,2,3}[(x.hi % 5) + (x.lo % 5)] (mod 5)
*f0fbc68bSmrg    // which we encode as an octal literal.
*f0fbc68bSmrg    constexpr auto lookup = 0321043210u;
*f0fbc68bSmrg    auto r = (lookup >> 3*((x.hi % 5) + (x.lo % 5))) & 7;
*f0fbc68bSmrg    if (y == 10)
*f0fbc68bSmrg      // x % 10 = (x % 5)      if x / 5 is even
*f0fbc68bSmrg      //          (x % 5) + 5  if x / 5 is odd
*f0fbc68bSmrg      // The compiler should be able to CSE the below computation of x/5 and
*f0fbc68bSmrg      // the above modulus operations with a nearby inlined computation of x/10.
*f0fbc68bSmrg      r += 5 * ((x/5).lo & 1);
*f0fbc68bSmrg    return r;
*f0fbc68bSmrg  }
*f0fbc68bSmrg
*f0fbc68bSmrg  friend constexpr bool
*f0fbc68bSmrg  operator==(const uint128_t x, const uint128_t y)
*f0fbc68bSmrg  { return x.hi == y.hi && x.lo == y.lo; }
*f0fbc68bSmrg
*f0fbc68bSmrg  friend constexpr bool
*f0fbc68bSmrg  operator<(const uint128_t x, const uint128_t y)
*f0fbc68bSmrg  { return x.hi < y.hi || (x.hi == y.hi && x.lo < y.lo); }
*f0fbc68bSmrg
*f0fbc68bSmrg  friend constexpr auto
*f0fbc68bSmrg  __bit_width(const uint128_t x)
*f0fbc68bSmrg  {
*f0fbc68bSmrg    if (auto w = std::__bit_width(x.hi))
*f0fbc68bSmrg      return w + 64;
*f0fbc68bSmrg    else
*f0fbc68bSmrg      return std::__bit_width(x.lo);
*f0fbc68bSmrg  }
*f0fbc68bSmrg
*f0fbc68bSmrg  friend constexpr auto
*f0fbc68bSmrg  __countr_zero(const uint128_t x)
*f0fbc68bSmrg  {
*f0fbc68bSmrg    auto c = std::__countr_zero(x.lo);
*f0fbc68bSmrg    if (c == 64)
*f0fbc68bSmrg      return 64 + std::__countr_zero(x.hi);
*f0fbc68bSmrg    else
*f0fbc68bSmrg      return c;
*f0fbc68bSmrg  }
*f0fbc68bSmrg
*f0fbc68bSmrg  constexpr uint128_t&
*f0fbc68bSmrg  operator--()
*f0fbc68bSmrg  { return *this -= 1; }
*f0fbc68bSmrg
*f0fbc68bSmrg  constexpr uint128_t&
*f0fbc68bSmrg  operator++()
*f0fbc68bSmrg  { return *this += 1; }
*f0fbc68bSmrg
*f0fbc68bSmrg  constexpr uint128_t&
*f0fbc68bSmrg  operator+=(const uint128_t y)
*f0fbc68bSmrg  { return *this = *this + y; }
*f0fbc68bSmrg
*f0fbc68bSmrg  constexpr uint128_t&
*f0fbc68bSmrg  operator-=(const uint128_t y)
*f0fbc68bSmrg  { return *this = *this - y; }
*f0fbc68bSmrg
*f0fbc68bSmrg  constexpr uint128_t&
*f0fbc68bSmrg  operator*=(const uint128_t y)
*f0fbc68bSmrg  { return *this = *this * y; }
*f0fbc68bSmrg
*f0fbc68bSmrg  constexpr uint128_t&
*f0fbc68bSmrg  operator<<=(const uint128_t y)
*f0fbc68bSmrg  { return *this = *this << y; }
*f0fbc68bSmrg
*f0fbc68bSmrg  constexpr uint128_t&
*f0fbc68bSmrg  operator>>=(const uint128_t y)
*f0fbc68bSmrg  { return *this = *this >> y; }
*f0fbc68bSmrg
*f0fbc68bSmrg  constexpr uint128_t&
*f0fbc68bSmrg  operator|=(const uint128_t y)
*f0fbc68bSmrg  { return *this = *this | y; }
*f0fbc68bSmrg
*f0fbc68bSmrg  constexpr uint128_t&
*f0fbc68bSmrg  operator&=(const uint128_t y)
*f0fbc68bSmrg  { return *this = *this & y; }
*f0fbc68bSmrg
*f0fbc68bSmrg  constexpr uint128_t&
*f0fbc68bSmrg  operator%=(const uint128_t y)
*f0fbc68bSmrg  { return *this = *this % y; }
*f0fbc68bSmrg
*f0fbc68bSmrg  constexpr uint128_t&
*f0fbc68bSmrg  operator/=(const uint128_t y)
*f0fbc68bSmrg  { return *this = *this / y; }
*f0fbc68bSmrg
*f0fbc68bSmrg  friend constexpr bool
*f0fbc68bSmrg  operator!=(const uint128_t x, const uint128_t y)
*f0fbc68bSmrg  { return !(x == y); }
*f0fbc68bSmrg
*f0fbc68bSmrg  friend constexpr bool
*f0fbc68bSmrg  operator>(const uint128_t x, const uint128_t y)
*f0fbc68bSmrg  { return y < x; }
*f0fbc68bSmrg
*f0fbc68bSmrg  friend constexpr bool
*f0fbc68bSmrg  operator>=(const uint128_t x, const uint128_t y)
*f0fbc68bSmrg  { return !(x < y); }
*f0fbc68bSmrg};