1*f0fbc68bSmrg // A relatively minimal unsigned 128-bit integer class type, used by the
2*f0fbc68bSmrg // floating-point std::to_chars implementation on targets that lack __int128.
3*f0fbc68bSmrg 
4*f0fbc68bSmrg // Copyright (C) 2021-2022 Free Software Foundation, Inc.
5*f0fbc68bSmrg //
6*f0fbc68bSmrg // This file is part of the GNU ISO C++ Library.  This library is free
7*f0fbc68bSmrg // software; you can redistribute it and/or modify it under the
8*f0fbc68bSmrg // terms of the GNU General Public License as published by the
9*f0fbc68bSmrg // Free Software Foundation; either version 3, or (at your option)
10*f0fbc68bSmrg // any later version.
11*f0fbc68bSmrg 
12*f0fbc68bSmrg // This library is distributed in the hope that it will be useful,
13*f0fbc68bSmrg // but WITHOUT ANY WARRANTY; without even the implied warranty of
14*f0fbc68bSmrg // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15*f0fbc68bSmrg // GNU General Public License for more details.
16*f0fbc68bSmrg 
17*f0fbc68bSmrg // Under Section 7 of GPL version 3, you are granted additional
18*f0fbc68bSmrg // permissions described in the GCC Runtime Library Exception, version
19*f0fbc68bSmrg // 3.1, as published by the Free Software Foundation.
20*f0fbc68bSmrg 
21*f0fbc68bSmrg // You should have received a copy of the GNU General Public License and
22*f0fbc68bSmrg // a copy of the GCC Runtime Library Exception along with this program;
23*f0fbc68bSmrg // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24*f0fbc68bSmrg // <http://www.gnu.org/licenses/>.
25*f0fbc68bSmrg 
26*f0fbc68bSmrg struct uint128_t
27*f0fbc68bSmrg {
28*f0fbc68bSmrg #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
29*f0fbc68bSmrg   uint64_t lo, hi;
30*f0fbc68bSmrg #else
31*f0fbc68bSmrg   uint64_t hi, lo;
32*f0fbc68bSmrg #endif
33*f0fbc68bSmrg 
34*f0fbc68bSmrg   uint128_t() = default;
35*f0fbc68bSmrg 
36*f0fbc68bSmrg   constexpr
37*f0fbc68bSmrg   uint128_t(uint64_t lo, uint64_t hi = 0)
38*f0fbc68bSmrg #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
louint128_t39*f0fbc68bSmrg     : lo(lo), hi(hi)
40*f0fbc68bSmrg #else
41*f0fbc68bSmrg     : hi(hi), lo(lo)
42*f0fbc68bSmrg #endif
43*f0fbc68bSmrg   { }
44*f0fbc68bSmrg 
45*f0fbc68bSmrg   constexpr explicit
46*f0fbc68bSmrg   operator bool() const
47*f0fbc68bSmrg   { return *this != 0; }
48*f0fbc68bSmrg 
49*f0fbc68bSmrg   template<typename T, typename = std::enable_if_t<std::is_integral_v<T>>>
50*f0fbc68bSmrg     constexpr explicit
Tuint128_t51*f0fbc68bSmrg     operator T() const
52*f0fbc68bSmrg     {
53*f0fbc68bSmrg       static_assert(sizeof(T) <= sizeof(uint64_t));
54*f0fbc68bSmrg       return static_cast<T>(lo);
55*f0fbc68bSmrg     }
56*f0fbc68bSmrg 
57*f0fbc68bSmrg   friend constexpr uint128_t
58*f0fbc68bSmrg   operator&(uint128_t x, const uint128_t y)
59*f0fbc68bSmrg   {
60*f0fbc68bSmrg     x.lo &= y.lo;
61*f0fbc68bSmrg     x.hi &= y.hi;
62*f0fbc68bSmrg     return x;
63*f0fbc68bSmrg   }
64*f0fbc68bSmrg 
65*f0fbc68bSmrg   friend constexpr uint128_t
66*f0fbc68bSmrg   operator|(uint128_t x, const uint128_t y)
67*f0fbc68bSmrg   {
68*f0fbc68bSmrg     x.lo |= y.lo;
69*f0fbc68bSmrg     x.hi |= y.hi;
70*f0fbc68bSmrg     return x;
71*f0fbc68bSmrg   }
72*f0fbc68bSmrg 
73*f0fbc68bSmrg   friend constexpr uint128_t
74*f0fbc68bSmrg   operator<<(uint128_t x, const uint128_t y)
75*f0fbc68bSmrg   {
76*f0fbc68bSmrg     __glibcxx_assert(y < 128);
77*f0fbc68bSmrg     // TODO: Convince GCC to use shldq on x86 here.
78*f0fbc68bSmrg     if (y.lo >= 64)
79*f0fbc68bSmrg       {
80*f0fbc68bSmrg 	x.hi = x.lo << (y.lo - 64);
81*f0fbc68bSmrg 	x.lo = 0;
82*f0fbc68bSmrg       }
83*f0fbc68bSmrg     else if (y.lo != 0)
84*f0fbc68bSmrg       {
85*f0fbc68bSmrg 	x.hi <<= y.lo;
86*f0fbc68bSmrg 	x.hi |= x.lo >> (64 - y.lo);
87*f0fbc68bSmrg 	x.lo <<= y.lo;
88*f0fbc68bSmrg       }
89*f0fbc68bSmrg     return x;
90*f0fbc68bSmrg   }
91*f0fbc68bSmrg 
92*f0fbc68bSmrg   friend constexpr uint128_t
93*f0fbc68bSmrg   operator>>(uint128_t x, const uint128_t y)
94*f0fbc68bSmrg   {
95*f0fbc68bSmrg     __glibcxx_assert(y < 128);
96*f0fbc68bSmrg     // TODO: Convince GCC to use shrdq on x86 here.
97*f0fbc68bSmrg     if (y.lo >= 64)
98*f0fbc68bSmrg       {
99*f0fbc68bSmrg 	x.lo = x.hi >> (y.lo - 64);
100*f0fbc68bSmrg 	x.hi = 0;
101*f0fbc68bSmrg       }
102*f0fbc68bSmrg     else if (y.lo != 0)
103*f0fbc68bSmrg       {
104*f0fbc68bSmrg 	x.lo >>= y.lo;
105*f0fbc68bSmrg 	x.lo |= x.hi << (64 - y.lo);
106*f0fbc68bSmrg 	x.hi >>= y.lo;
107*f0fbc68bSmrg       }
108*f0fbc68bSmrg     return x;
109*f0fbc68bSmrg   }
110*f0fbc68bSmrg 
111*f0fbc68bSmrg   constexpr uint128_t
112*f0fbc68bSmrg   operator~() const
113*f0fbc68bSmrg   { return {~lo, ~hi}; }
114*f0fbc68bSmrg 
115*f0fbc68bSmrg   constexpr uint128_t
116*f0fbc68bSmrg   operator-() const
117*f0fbc68bSmrg   { return operator~() + 1; }
118*f0fbc68bSmrg 
119*f0fbc68bSmrg   friend constexpr uint128_t
120*f0fbc68bSmrg   operator+(uint128_t x, const uint128_t y)
121*f0fbc68bSmrg   {
122*f0fbc68bSmrg     x.hi += __builtin_add_overflow(x.lo, y.lo, &x.lo);
123*f0fbc68bSmrg     x.hi += y.hi;
124*f0fbc68bSmrg     return x;
125*f0fbc68bSmrg   }
126*f0fbc68bSmrg 
127*f0fbc68bSmrg   friend constexpr uint128_t
128*f0fbc68bSmrg   operator-(uint128_t x, const uint128_t y)
129*f0fbc68bSmrg   {
130*f0fbc68bSmrg     x.hi -= __builtin_sub_overflow(x.lo, y.lo, &x.lo);
131*f0fbc68bSmrg     x.hi -= y.hi;
132*f0fbc68bSmrg     return x;
133*f0fbc68bSmrg   }
134*f0fbc68bSmrg 
135*f0fbc68bSmrg   static constexpr uint128_t
umul64_64_128uint128_t136*f0fbc68bSmrg   umul64_64_128(const uint64_t x, const uint64_t y)
137*f0fbc68bSmrg   {
138*f0fbc68bSmrg     const uint64_t xl = x & 0xffffffff;
139*f0fbc68bSmrg     const uint64_t xh = x >> 32;
140*f0fbc68bSmrg     const uint64_t yl = y & 0xffffffff;
141*f0fbc68bSmrg     const uint64_t yh = y >> 32;
142*f0fbc68bSmrg     const uint64_t ll = xl * yl;
143*f0fbc68bSmrg     const uint64_t lh = xl * yh;
144*f0fbc68bSmrg     const uint64_t hl = xh * yl;
145*f0fbc68bSmrg     const uint64_t hh = xh * yh;
146*f0fbc68bSmrg     const uint64_t m = (ll >> 32) + lh + (hl & 0xffffffff);
147*f0fbc68bSmrg     const uint64_t l = (ll & 0xffffffff ) | (m << 32);
148*f0fbc68bSmrg     const uint64_t h = (m >> 32) + (hl >> 32) + hh;
149*f0fbc68bSmrg     return {l, h};
150*f0fbc68bSmrg   }
151*f0fbc68bSmrg 
152*f0fbc68bSmrg   friend constexpr uint128_t
153*f0fbc68bSmrg   operator*(const uint128_t x, const uint128_t y)
154*f0fbc68bSmrg   {
155*f0fbc68bSmrg     uint128_t z = umul64_64_128(x.lo, y.lo);
156*f0fbc68bSmrg     z.hi += x.lo * y.hi + x.hi * y.lo;
157*f0fbc68bSmrg     return z;
158*f0fbc68bSmrg   }
159*f0fbc68bSmrg 
160*f0fbc68bSmrg   friend constexpr uint128_t
161*f0fbc68bSmrg   operator/(const uint128_t x, const uint128_t y)
162*f0fbc68bSmrg   {
163*f0fbc68bSmrg     // Ryu performs 128-bit division only by 5 and 10, so that's what we
164*f0fbc68bSmrg     // implement.  The strategy here is to relate division of x with that of
165*f0fbc68bSmrg     // x.hi and x.lo separately.
166*f0fbc68bSmrg     __glibcxx_assert(y == 5 || y == 10);
167*f0fbc68bSmrg     // The following implements division by 5 and 10.  In either case, we
168*f0fbc68bSmrg     // first compute division by 5:
169*f0fbc68bSmrg     //   x/5 = (x.hi*2^64 + x.lo)/5
170*f0fbc68bSmrg     //       = (x.hi*(2^64-1) + x.hi + x.lo)/5
171*f0fbc68bSmrg     //       = x.hi*((2^64-1)/5) + (x.hi + x.lo)/5 since CST=(2^64-1)/5 is exact
172*f0fbc68bSmrg     //       = x.hi*CST + x.hi/5 + x.lo/5 + ((x.lo%5) + (x.hi%5) >= 5)
173*f0fbc68bSmrg     // We go a step further and replace the last adjustment term with a
174*f0fbc68bSmrg     // lookup table, which we encode as a binary literal.  This seems to
175*f0fbc68bSmrg     // yield smaller code on x86 at least.
176*f0fbc68bSmrg     constexpr auto cst = ~uint64_t(0) / 5;
177*f0fbc68bSmrg     uint128_t q = uint128_t{x.hi}*cst + uint128_t{x.hi/5 + x.lo/5};
178*f0fbc68bSmrg     constexpr auto lookup = 0b111100000u;
179*f0fbc68bSmrg     q += (lookup >> ((x.hi % 5) + (x.lo % 5))) & 1;
180*f0fbc68bSmrg     if (y == 10)
181*f0fbc68bSmrg       q >>= 1;
182*f0fbc68bSmrg     return q;
183*f0fbc68bSmrg   }
184*f0fbc68bSmrg 
185*f0fbc68bSmrg   friend constexpr uint128_t
186*f0fbc68bSmrg   operator%(const uint128_t x, const uint128_t y)
187*f0fbc68bSmrg   {
188*f0fbc68bSmrg     // Ryu performs 128-bit modulus only by 2, 5 and 10, so that's what we
189*f0fbc68bSmrg     // implement.  The strategy here is to relate modulus of x with that of
190*f0fbc68bSmrg     // x.hi and x.lo separately.
191*f0fbc68bSmrg     if (y == 2)
192*f0fbc68bSmrg       return x & 1;
193*f0fbc68bSmrg     __glibcxx_assert(y == 5 || y == 10);
194*f0fbc68bSmrg     // The following implements modulus by 5 and 10.  In either case,
195*f0fbc68bSmrg     // we first compute modulus by 5:
196*f0fbc68bSmrg     //   x (mod 5) = x.hi*2^64 + x.lo (mod 5)
197*f0fbc68bSmrg     //             = x.hi + x.lo (mod 5) since 2^64 ≡ 1 (mod 5)
198*f0fbc68bSmrg     // So the straightforward implementation would be
199*f0fbc68bSmrg     //   ((x.hi % 5) + (x.lo % 5)) % 5
200*f0fbc68bSmrg     // But we go a step further and replace the outermost % with a
201*f0fbc68bSmrg     // lookup table:
202*f0fbc68bSmrg     //             = {0,1,2,3,4,0,1,2,3}[(x.hi % 5) + (x.lo % 5)] (mod 5)
203*f0fbc68bSmrg     // which we encode as an octal literal.
204*f0fbc68bSmrg     constexpr auto lookup = 0321043210u;
205*f0fbc68bSmrg     auto r = (lookup >> 3*((x.hi % 5) + (x.lo % 5))) & 7;
206*f0fbc68bSmrg     if (y == 10)
207*f0fbc68bSmrg       // x % 10 = (x % 5)      if x / 5 is even
208*f0fbc68bSmrg       //          (x % 5) + 5  if x / 5 is odd
209*f0fbc68bSmrg       // The compiler should be able to CSE the below computation of x/5 and
210*f0fbc68bSmrg       // the above modulus operations with a nearby inlined computation of x/10.
211*f0fbc68bSmrg       r += 5 * ((x/5).lo & 1);
212*f0fbc68bSmrg     return r;
213*f0fbc68bSmrg   }
214*f0fbc68bSmrg 
215*f0fbc68bSmrg   friend constexpr bool
216*f0fbc68bSmrg   operator==(const uint128_t x, const uint128_t y)
217*f0fbc68bSmrg   { return x.hi == y.hi && x.lo == y.lo; }
218*f0fbc68bSmrg 
219*f0fbc68bSmrg   friend constexpr bool
220*f0fbc68bSmrg   operator<(const uint128_t x, const uint128_t y)
221*f0fbc68bSmrg   { return x.hi < y.hi || (x.hi == y.hi && x.lo < y.lo); }
222*f0fbc68bSmrg 
223*f0fbc68bSmrg   friend constexpr auto
__bit_widthuint128_t224*f0fbc68bSmrg   __bit_width(const uint128_t x)
225*f0fbc68bSmrg   {
226*f0fbc68bSmrg     if (auto w = std::__bit_width(x.hi))
227*f0fbc68bSmrg       return w + 64;
228*f0fbc68bSmrg     else
229*f0fbc68bSmrg       return std::__bit_width(x.lo);
230*f0fbc68bSmrg   }
231*f0fbc68bSmrg 
232*f0fbc68bSmrg   friend constexpr auto
__countr_zerouint128_t233*f0fbc68bSmrg   __countr_zero(const uint128_t x)
234*f0fbc68bSmrg   {
235*f0fbc68bSmrg     auto c = std::__countr_zero(x.lo);
236*f0fbc68bSmrg     if (c == 64)
237*f0fbc68bSmrg       return 64 + std::__countr_zero(x.hi);
238*f0fbc68bSmrg     else
239*f0fbc68bSmrg       return c;
240*f0fbc68bSmrg   }
241*f0fbc68bSmrg 
242*f0fbc68bSmrg   constexpr uint128_t&
243*f0fbc68bSmrg   operator--()
244*f0fbc68bSmrg   { return *this -= 1; }
245*f0fbc68bSmrg 
246*f0fbc68bSmrg   constexpr uint128_t&
247*f0fbc68bSmrg   operator++()
248*f0fbc68bSmrg   { return *this += 1; }
249*f0fbc68bSmrg 
250*f0fbc68bSmrg   constexpr uint128_t&
251*f0fbc68bSmrg   operator+=(const uint128_t y)
252*f0fbc68bSmrg   { return *this = *this + y; }
253*f0fbc68bSmrg 
254*f0fbc68bSmrg   constexpr uint128_t&
255*f0fbc68bSmrg   operator-=(const uint128_t y)
256*f0fbc68bSmrg   { return *this = *this - y; }
257*f0fbc68bSmrg 
258*f0fbc68bSmrg   constexpr uint128_t&
259*f0fbc68bSmrg   operator*=(const uint128_t y)
260*f0fbc68bSmrg   { return *this = *this * y; }
261*f0fbc68bSmrg 
262*f0fbc68bSmrg   constexpr uint128_t&
263*f0fbc68bSmrg   operator<<=(const uint128_t y)
264*f0fbc68bSmrg   { return *this = *this << y; }
265*f0fbc68bSmrg 
266*f0fbc68bSmrg   constexpr uint128_t&
267*f0fbc68bSmrg   operator>>=(const uint128_t y)
268*f0fbc68bSmrg   { return *this = *this >> y; }
269*f0fbc68bSmrg 
270*f0fbc68bSmrg   constexpr uint128_t&
271*f0fbc68bSmrg   operator|=(const uint128_t y)
272*f0fbc68bSmrg   { return *this = *this | y; }
273*f0fbc68bSmrg 
274*f0fbc68bSmrg   constexpr uint128_t&
275*f0fbc68bSmrg   operator&=(const uint128_t y)
276*f0fbc68bSmrg   { return *this = *this & y; }
277*f0fbc68bSmrg 
278*f0fbc68bSmrg   constexpr uint128_t&
279*f0fbc68bSmrg   operator%=(const uint128_t y)
280*f0fbc68bSmrg   { return *this = *this % y; }
281*f0fbc68bSmrg 
282*f0fbc68bSmrg   constexpr uint128_t&
283*f0fbc68bSmrg   operator/=(const uint128_t y)
284*f0fbc68bSmrg   { return *this = *this / y; }
285*f0fbc68bSmrg 
286*f0fbc68bSmrg   friend constexpr bool
287*f0fbc68bSmrg   operator!=(const uint128_t x, const uint128_t y)
288*f0fbc68bSmrg   { return !(x == y); }
289*f0fbc68bSmrg 
290*f0fbc68bSmrg   friend constexpr bool
291*f0fbc68bSmrg   operator>(const uint128_t x, const uint128_t y)
292*f0fbc68bSmrg   { return y < x; }
293*f0fbc68bSmrg 
294*f0fbc68bSmrg   friend constexpr bool
295*f0fbc68bSmrg   operator>=(const uint128_t x, const uint128_t y)
296*f0fbc68bSmrg   { return !(x < y); }
297*f0fbc68bSmrg };
298