xref: /openbsd/gnu/llvm/libcxx/src/ryu/d2fixed.cpp (revision 4bdff4be)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // Copyright (c) Microsoft Corporation.
10 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 
12 // Copyright 2018 Ulf Adams
13 // Copyright (c) Microsoft Corporation. All rights reserved.
14 
15 // Boost Software License - Version 1.0 - August 17th, 2003
16 
17 // Permission is hereby granted, free of charge, to any person or organization
18 // obtaining a copy of the software and accompanying documentation covered by
19 // this license (the "Software") to use, reproduce, display, distribute,
20 // execute, and transmit the Software, and to prepare derivative works of the
21 // Software, and to permit third-parties to whom the Software is furnished to
22 // do so, all subject to the following:
23 
24 // The copyright notices in the Software and this entire statement, including
25 // the above license grant, this restriction and the following disclaimer,
26 // must be included in all copies of the Software, in whole or in part, and
27 // all derivative works of the Software, unless such copies or derivative
28 // works are solely in the form of machine-executable object code generated by
29 // a source language processor.
30 
31 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33 // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
34 // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
35 // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
36 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
37 // DEALINGS IN THE SOFTWARE.
38 
39 // Avoid formatting to keep the changes with the original code minimal.
40 // clang-format off
41 
42 #include <__assert>
43 #include <__config>
44 #include <charconv>
45 #include <cstring>
46 #include <system_error>
47 
48 #include "include/ryu/common.h"
49 #include "include/ryu/d2fixed.h"
50 #include "include/ryu/d2fixed_full_table.h"
51 #include "include/ryu/d2s.h"
52 #include "include/ryu/d2s_intrinsics.h"
53 #include "include/ryu/digit_table.h"
54 
55 _LIBCPP_BEGIN_NAMESPACE_STD
56 
57 inline constexpr int __POW10_ADDITIONAL_BITS = 120;
58 
59 #ifdef _LIBCPP_INTRINSIC128
60 // Returns the low 64 bits of the high 128 bits of the 256-bit product of a and b.
__umul256_hi128_lo64(const uint64_t __aHi,const uint64_t __aLo,const uint64_t __bHi,const uint64_t __bLo)61 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint64_t __umul256_hi128_lo64(
62   const uint64_t __aHi, const uint64_t __aLo, const uint64_t __bHi, const uint64_t __bLo) {
63   uint64_t __b00Hi;
64   const uint64_t __b00Lo = __ryu_umul128(__aLo, __bLo, &__b00Hi);
65   uint64_t __b01Hi;
66   const uint64_t __b01Lo = __ryu_umul128(__aLo, __bHi, &__b01Hi);
67   uint64_t __b10Hi;
68   const uint64_t __b10Lo = __ryu_umul128(__aHi, __bLo, &__b10Hi);
69   uint64_t __b11Hi;
70   const uint64_t __b11Lo = __ryu_umul128(__aHi, __bHi, &__b11Hi);
71   (void) __b00Lo; // unused
72   (void) __b11Hi; // unused
73   const uint64_t __temp1Lo = __b10Lo + __b00Hi;
74   const uint64_t __temp1Hi = __b10Hi + (__temp1Lo < __b10Lo);
75   const uint64_t __temp2Lo = __b01Lo + __temp1Lo;
76   const uint64_t __temp2Hi = __b01Hi + (__temp2Lo < __b01Lo);
77   return __b11Lo + __temp1Hi + __temp2Hi;
78 }
79 
__uint128_mod1e9(const uint64_t __vHi,const uint64_t __vLo)80 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __uint128_mod1e9(const uint64_t __vHi, const uint64_t __vLo) {
81   // After multiplying, we're going to shift right by 29, then truncate to uint32_t.
82   // This means that we need only 29 + 32 = 61 bits, so we can truncate to uint64_t before shifting.
83   const uint64_t __multiplied = __umul256_hi128_lo64(__vHi, __vLo, 0x89705F4136B4A597u, 0x31680A88F8953031u);
84 
85   // For uint32_t truncation, see the __mod1e9() comment in d2s_intrinsics.h.
86   const uint32_t __shifted = static_cast<uint32_t>(__multiplied >> 29);
87 
88   return static_cast<uint32_t>(__vLo) - 1000000000 * __shifted;
89 }
90 #endif // ^^^ intrinsics available ^^^
91 
__mulShift_mod1e9(const uint64_t __m,const uint64_t * const __mul,const int32_t __j)92 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __mulShift_mod1e9(const uint64_t __m, const uint64_t* const __mul, const int32_t __j) {
93   uint64_t __high0;                                               // 64
94   const uint64_t __low0 = __ryu_umul128(__m, __mul[0], &__high0); // 0
95   uint64_t __high1;                                               // 128
96   const uint64_t __low1 = __ryu_umul128(__m, __mul[1], &__high1); // 64
97   uint64_t __high2;                                               // 192
98   const uint64_t __low2 = __ryu_umul128(__m, __mul[2], &__high2); // 128
99   const uint64_t __s0low = __low0;                  // 0
100   (void) __s0low; // unused
101   const uint64_t __s0high = __low1 + __high0;       // 64
102   const uint32_t __c1 = __s0high < __low1;
103   const uint64_t __s1low = __low2 + __high1 + __c1; // 128
104   const uint32_t __c2 = __s1low < __low2; // __high1 + __c1 can't overflow, so compare against __low2
105   const uint64_t __s1high = __high2 + __c2;         // 192
106   _LIBCPP_ASSERT(__j >= 128, "");
107   _LIBCPP_ASSERT(__j <= 180, "");
108 #ifdef _LIBCPP_INTRINSIC128
109   const uint32_t __dist = static_cast<uint32_t>(__j - 128); // __dist: [0, 52]
110   const uint64_t __shiftedhigh = __s1high >> __dist;
111   const uint64_t __shiftedlow = __ryu_shiftright128(__s1low, __s1high, __dist);
112   return __uint128_mod1e9(__shiftedhigh, __shiftedlow);
113 #else // ^^^ intrinsics available ^^^ / vvv intrinsics unavailable vvv
114   if (__j < 160) { // __j: [128, 160)
115     const uint64_t __r0 = __mod1e9(__s1high);
116     const uint64_t __r1 = __mod1e9((__r0 << 32) | (__s1low >> 32));
117     const uint64_t __r2 = ((__r1 << 32) | (__s1low & 0xffffffff));
118     return __mod1e9(__r2 >> (__j - 128));
119   } else { // __j: [160, 192)
120     const uint64_t __r0 = __mod1e9(__s1high);
121     const uint64_t __r1 = ((__r0 << 32) | (__s1low >> 32));
122     return __mod1e9(__r1 >> (__j - 160));
123   }
124 #endif // ^^^ intrinsics unavailable ^^^
125 }
126 
__append_n_digits(const uint32_t __olength,uint32_t __digits,char * const __result)127 void __append_n_digits(const uint32_t __olength, uint32_t __digits, char* const __result) {
128   uint32_t __i = 0;
129   while (__digits >= 10000) {
130 #ifdef __clang__ // TRANSITION, LLVM-38217
131     const uint32_t __c = __digits - 10000 * (__digits / 10000);
132 #else
133     const uint32_t __c = __digits % 10000;
134 #endif
135     __digits /= 10000;
136     const uint32_t __c0 = (__c % 100) << 1;
137     const uint32_t __c1 = (__c / 100) << 1;
138     _VSTD::memcpy(__result + __olength - __i - 2, __DIGIT_TABLE + __c0, 2);
139     _VSTD::memcpy(__result + __olength - __i - 4, __DIGIT_TABLE + __c1, 2);
140     __i += 4;
141   }
142   if (__digits >= 100) {
143     const uint32_t __c = (__digits % 100) << 1;
144     __digits /= 100;
145     _VSTD::memcpy(__result + __olength - __i - 2, __DIGIT_TABLE + __c, 2);
146     __i += 2;
147   }
148   if (__digits >= 10) {
149     const uint32_t __c = __digits << 1;
150     _VSTD::memcpy(__result + __olength - __i - 2, __DIGIT_TABLE + __c, 2);
151   } else {
152     __result[0] = static_cast<char>('0' + __digits);
153   }
154 }
155 
__append_d_digits(const uint32_t __olength,uint32_t __digits,char * const __result)156 _LIBCPP_HIDE_FROM_ABI inline void __append_d_digits(const uint32_t __olength, uint32_t __digits, char* const __result) {
157   uint32_t __i = 0;
158   while (__digits >= 10000) {
159 #ifdef __clang__ // TRANSITION, LLVM-38217
160     const uint32_t __c = __digits - 10000 * (__digits / 10000);
161 #else
162     const uint32_t __c = __digits % 10000;
163 #endif
164     __digits /= 10000;
165     const uint32_t __c0 = (__c % 100) << 1;
166     const uint32_t __c1 = (__c / 100) << 1;
167     _VSTD::memcpy(__result + __olength + 1 - __i - 2, __DIGIT_TABLE + __c0, 2);
168     _VSTD::memcpy(__result + __olength + 1 - __i - 4, __DIGIT_TABLE + __c1, 2);
169     __i += 4;
170   }
171   if (__digits >= 100) {
172     const uint32_t __c = (__digits % 100) << 1;
173     __digits /= 100;
174     _VSTD::memcpy(__result + __olength + 1 - __i - 2, __DIGIT_TABLE + __c, 2);
175     __i += 2;
176   }
177   if (__digits >= 10) {
178     const uint32_t __c = __digits << 1;
179     __result[2] = __DIGIT_TABLE[__c + 1];
180     __result[1] = '.';
181     __result[0] = __DIGIT_TABLE[__c];
182   } else {
183     __result[1] = '.';
184     __result[0] = static_cast<char>('0' + __digits);
185   }
186 }
187 
__append_c_digits(const uint32_t __count,uint32_t __digits,char * const __result)188 _LIBCPP_HIDE_FROM_ABI inline void __append_c_digits(const uint32_t __count, uint32_t __digits, char* const __result) {
189   uint32_t __i = 0;
190   for (; __i < __count - 1; __i += 2) {
191     const uint32_t __c = (__digits % 100) << 1;
192     __digits /= 100;
193     _VSTD::memcpy(__result + __count - __i - 2, __DIGIT_TABLE + __c, 2);
194   }
195   if (__i < __count) {
196     const char __c = static_cast<char>('0' + (__digits % 10));
197     __result[__count - __i - 1] = __c;
198   }
199 }
200 
__append_nine_digits(uint32_t __digits,char * const __result)201 void __append_nine_digits(uint32_t __digits, char* const __result) {
202   if (__digits == 0) {
203     _VSTD::memset(__result, '0', 9);
204     return;
205   }
206 
207   for (uint32_t __i = 0; __i < 5; __i += 4) {
208 #ifdef __clang__ // TRANSITION, LLVM-38217
209     const uint32_t __c = __digits - 10000 * (__digits / 10000);
210 #else
211     const uint32_t __c = __digits % 10000;
212 #endif
213     __digits /= 10000;
214     const uint32_t __c0 = (__c % 100) << 1;
215     const uint32_t __c1 = (__c / 100) << 1;
216     _VSTD::memcpy(__result + 7 - __i, __DIGIT_TABLE + __c0, 2);
217     _VSTD::memcpy(__result + 5 - __i, __DIGIT_TABLE + __c1, 2);
218   }
219   __result[0] = static_cast<char>('0' + __digits);
220 }
221 
__indexForExponent(const uint32_t __e)222 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __indexForExponent(const uint32_t __e) {
223   return (__e + 15) / 16;
224 }
225 
__pow10BitsForIndex(const uint32_t __idx)226 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __pow10BitsForIndex(const uint32_t __idx) {
227   return 16 * __idx + __POW10_ADDITIONAL_BITS;
228 }
229 
__lengthForIndex(const uint32_t __idx)230 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline uint32_t __lengthForIndex(const uint32_t __idx) {
231   // +1 for ceil, +16 for mantissa, +8 to round up when dividing by 9
232   return (__log10Pow2(16 * static_cast<int32_t>(__idx)) + 1 + 16 + 8) / 9;
233 }
234 
__d2fixed_buffered_n(char * _First,char * const _Last,const double __d,const uint32_t __precision)235 [[nodiscard]] to_chars_result __d2fixed_buffered_n(char* _First, char* const _Last, const double __d,
236   const uint32_t __precision) {
237   char* const _Original_first = _First;
238 
239   const uint64_t __bits = __double_to_bits(__d);
240 
241   // Case distinction; exit early for the easy cases.
242   if (__bits == 0) {
243     const int32_t _Total_zero_length = 1 // leading zero
244       + static_cast<int32_t>(__precision != 0) // possible decimal point
245       + static_cast<int32_t>(__precision); // zeroes after decimal point
246 
247     if (_Last - _First < _Total_zero_length) {
248       return { _Last, errc::value_too_large };
249     }
250 
251     *_First++ = '0';
252     if (__precision > 0) {
253       *_First++ = '.';
254       _VSTD::memset(_First, '0', __precision);
255       _First += __precision;
256     }
257     return { _First, errc{} };
258   }
259 
260   // Decode __bits into mantissa and exponent.
261   const uint64_t __ieeeMantissa = __bits & ((1ull << __DOUBLE_MANTISSA_BITS) - 1);
262   const uint32_t __ieeeExponent = static_cast<uint32_t>(__bits >> __DOUBLE_MANTISSA_BITS);
263 
264   int32_t __e2;
265   uint64_t __m2;
266   if (__ieeeExponent == 0) {
267     __e2 = 1 - __DOUBLE_BIAS - __DOUBLE_MANTISSA_BITS;
268     __m2 = __ieeeMantissa;
269   } else {
270     __e2 = static_cast<int32_t>(__ieeeExponent) - __DOUBLE_BIAS - __DOUBLE_MANTISSA_BITS;
271     __m2 = (1ull << __DOUBLE_MANTISSA_BITS) | __ieeeMantissa;
272   }
273 
274   bool __nonzero = false;
275   if (__e2 >= -52) {
276     const uint32_t __idx = __e2 < 0 ? 0 : __indexForExponent(static_cast<uint32_t>(__e2));
277     const uint32_t __p10bits = __pow10BitsForIndex(__idx);
278     const int32_t __len = static_cast<int32_t>(__lengthForIndex(__idx));
279     for (int32_t __i = __len - 1; __i >= 0; --__i) {
280       const uint32_t __j = __p10bits - __e2;
281       // Temporary: __j is usually around 128, and by shifting a bit, we push it to 128 or above, which is
282       // a slightly faster code path in __mulShift_mod1e9. Instead, we can just increase the multipliers.
283       const uint32_t __digits = __mulShift_mod1e9(__m2 << 8, __POW10_SPLIT[__POW10_OFFSET[__idx] + __i],
284         static_cast<int32_t>(__j + 8));
285       if (__nonzero) {
286         if (_Last - _First < 9) {
287           return { _Last, errc::value_too_large };
288         }
289         __append_nine_digits(__digits, _First);
290         _First += 9;
291       } else if (__digits != 0) {
292         const uint32_t __olength = __decimalLength9(__digits);
293         if (_Last - _First < static_cast<ptrdiff_t>(__olength)) {
294           return { _Last, errc::value_too_large };
295         }
296         __append_n_digits(__olength, __digits, _First);
297         _First += __olength;
298         __nonzero = true;
299       }
300     }
301   }
302   if (!__nonzero) {
303     if (_First == _Last) {
304       return { _Last, errc::value_too_large };
305     }
306     *_First++ = '0';
307   }
308   if (__precision > 0) {
309     if (_First == _Last) {
310       return { _Last, errc::value_too_large };
311     }
312     *_First++ = '.';
313   }
314   if (__e2 < 0) {
315     const int32_t __idx = -__e2 / 16;
316     const uint32_t __blocks = __precision / 9 + 1;
317     // 0 = don't round up; 1 = round up unconditionally; 2 = round up if odd.
318     int __roundUp = 0;
319     uint32_t __i = 0;
320     if (__blocks <= __MIN_BLOCK_2[__idx]) {
321       __i = __blocks;
322       if (_Last - _First < static_cast<ptrdiff_t>(__precision)) {
323         return { _Last, errc::value_too_large };
324       }
325       _VSTD::memset(_First, '0', __precision);
326       _First += __precision;
327     } else if (__i < __MIN_BLOCK_2[__idx]) {
328       __i = __MIN_BLOCK_2[__idx];
329       if (_Last - _First < static_cast<ptrdiff_t>(9 * __i)) {
330         return { _Last, errc::value_too_large };
331       }
332       _VSTD::memset(_First, '0', 9 * __i);
333       _First += 9 * __i;
334     }
335     for (; __i < __blocks; ++__i) {
336       const int32_t __j = __ADDITIONAL_BITS_2 + (-__e2 - 16 * __idx);
337       const uint32_t __p = __POW10_OFFSET_2[__idx] + __i - __MIN_BLOCK_2[__idx];
338       if (__p >= __POW10_OFFSET_2[__idx + 1]) {
339         // If the remaining digits are all 0, then we might as well use memset.
340         // No rounding required in this case.
341         const uint32_t __fill = __precision - 9 * __i;
342         if (_Last - _First < static_cast<ptrdiff_t>(__fill)) {
343           return { _Last, errc::value_too_large };
344         }
345         _VSTD::memset(_First, '0', __fill);
346         _First += __fill;
347         break;
348       }
349       // Temporary: __j is usually around 128, and by shifting a bit, we push it to 128 or above, which is
350       // a slightly faster code path in __mulShift_mod1e9. Instead, we can just increase the multipliers.
351       uint32_t __digits = __mulShift_mod1e9(__m2 << 8, __POW10_SPLIT_2[__p], __j + 8);
352       if (__i < __blocks - 1) {
353         if (_Last - _First < 9) {
354           return { _Last, errc::value_too_large };
355         }
356         __append_nine_digits(__digits, _First);
357         _First += 9;
358       } else {
359         const uint32_t __maximum = __precision - 9 * __i;
360         uint32_t __lastDigit = 0;
361         for (uint32_t __k = 0; __k < 9 - __maximum; ++__k) {
362           __lastDigit = __digits % 10;
363           __digits /= 10;
364         }
365         if (__lastDigit != 5) {
366           __roundUp = __lastDigit > 5;
367         } else {
368           // Is m * 10^(additionalDigits + 1) / 2^(-__e2) integer?
369           const int32_t __requiredTwos = -__e2 - static_cast<int32_t>(__precision) - 1;
370           const bool __trailingZeros = __requiredTwos <= 0
371             || (__requiredTwos < 60 && __multipleOfPowerOf2(__m2, static_cast<uint32_t>(__requiredTwos)));
372           __roundUp = __trailingZeros ? 2 : 1;
373         }
374         if (__maximum > 0) {
375           if (_Last - _First < static_cast<ptrdiff_t>(__maximum)) {
376             return { _Last, errc::value_too_large };
377           }
378           __append_c_digits(__maximum, __digits, _First);
379           _First += __maximum;
380         }
381         break;
382       }
383     }
384     if (__roundUp != 0) {
385       char* _Round = _First;
386       char* _Dot = _Last;
387       while (true) {
388         if (_Round == _Original_first) {
389           _Round[0] = '1';
390           if (_Dot != _Last) {
391             _Dot[0] = '0';
392             _Dot[1] = '.';
393           }
394           if (_First == _Last) {
395             return { _Last, errc::value_too_large };
396           }
397           *_First++ = '0';
398           break;
399         }
400         --_Round;
401         const char __c = _Round[0];
402         if (__c == '.') {
403           _Dot = _Round;
404         } else if (__c == '9') {
405           _Round[0] = '0';
406           __roundUp = 1;
407         } else {
408           if (__roundUp == 1 || __c % 2 != 0) {
409             _Round[0] = __c + 1;
410           }
411           break;
412         }
413       }
414     }
415   } else {
416     if (_Last - _First < static_cast<ptrdiff_t>(__precision)) {
417       return { _Last, errc::value_too_large };
418     }
419     _VSTD::memset(_First, '0', __precision);
420     _First += __precision;
421   }
422   return { _First, errc{} };
423 }
424 
__d2exp_buffered_n(char * _First,char * const _Last,const double __d,uint32_t __precision)425 [[nodiscard]] to_chars_result __d2exp_buffered_n(char* _First, char* const _Last, const double __d,
426   uint32_t __precision) {
427   char* const _Original_first = _First;
428 
429   const uint64_t __bits = __double_to_bits(__d);
430 
431   // Case distinction; exit early for the easy cases.
432   if (__bits == 0) {
433     const int32_t _Total_zero_length = 1 // leading zero
434       + static_cast<int32_t>(__precision != 0) // possible decimal point
435       + static_cast<int32_t>(__precision) // zeroes after decimal point
436       + 4; // "e+00"
437     if (_Last - _First < _Total_zero_length) {
438       return { _Last, errc::value_too_large };
439     }
440     *_First++ = '0';
441     if (__precision > 0) {
442       *_First++ = '.';
443       _VSTD::memset(_First, '0', __precision);
444       _First += __precision;
445     }
446     _VSTD::memcpy(_First, "e+00", 4);
447     _First += 4;
448     return { _First, errc{} };
449   }
450 
451   // Decode __bits into mantissa and exponent.
452   const uint64_t __ieeeMantissa = __bits & ((1ull << __DOUBLE_MANTISSA_BITS) - 1);
453   const uint32_t __ieeeExponent = static_cast<uint32_t>(__bits >> __DOUBLE_MANTISSA_BITS);
454 
455   int32_t __e2;
456   uint64_t __m2;
457   if (__ieeeExponent == 0) {
458     __e2 = 1 - __DOUBLE_BIAS - __DOUBLE_MANTISSA_BITS;
459     __m2 = __ieeeMantissa;
460   } else {
461     __e2 = static_cast<int32_t>(__ieeeExponent) - __DOUBLE_BIAS - __DOUBLE_MANTISSA_BITS;
462     __m2 = (1ull << __DOUBLE_MANTISSA_BITS) | __ieeeMantissa;
463   }
464 
465   const bool __printDecimalPoint = __precision > 0;
466   ++__precision;
467   uint32_t __digits = 0;
468   uint32_t __printedDigits = 0;
469   uint32_t __availableDigits = 0;
470   int32_t __exp = 0;
471   if (__e2 >= -52) {
472     const uint32_t __idx = __e2 < 0 ? 0 : __indexForExponent(static_cast<uint32_t>(__e2));
473     const uint32_t __p10bits = __pow10BitsForIndex(__idx);
474     const int32_t __len = static_cast<int32_t>(__lengthForIndex(__idx));
475     for (int32_t __i = __len - 1; __i >= 0; --__i) {
476       const uint32_t __j = __p10bits - __e2;
477       // Temporary: __j is usually around 128, and by shifting a bit, we push it to 128 or above, which is
478       // a slightly faster code path in __mulShift_mod1e9. Instead, we can just increase the multipliers.
479       __digits = __mulShift_mod1e9(__m2 << 8, __POW10_SPLIT[__POW10_OFFSET[__idx] + __i],
480         static_cast<int32_t>(__j + 8));
481       if (__printedDigits != 0) {
482         if (__printedDigits + 9 > __precision) {
483           __availableDigits = 9;
484           break;
485         }
486         if (_Last - _First < 9) {
487           return { _Last, errc::value_too_large };
488         }
489         __append_nine_digits(__digits, _First);
490         _First += 9;
491         __printedDigits += 9;
492       } else if (__digits != 0) {
493         __availableDigits = __decimalLength9(__digits);
494         __exp = __i * 9 + static_cast<int32_t>(__availableDigits) - 1;
495         if (__availableDigits > __precision) {
496           break;
497         }
498         if (__printDecimalPoint) {
499           if (_Last - _First < static_cast<ptrdiff_t>(__availableDigits + 1)) {
500             return { _Last, errc::value_too_large };
501           }
502           __append_d_digits(__availableDigits, __digits, _First);
503           _First += __availableDigits + 1; // +1 for decimal point
504         } else {
505           if (_First == _Last) {
506             return { _Last, errc::value_too_large };
507           }
508           *_First++ = static_cast<char>('0' + __digits);
509         }
510         __printedDigits = __availableDigits;
511         __availableDigits = 0;
512       }
513     }
514   }
515 
516   if (__e2 < 0 && __availableDigits == 0) {
517     const int32_t __idx = -__e2 / 16;
518     for (int32_t __i = __MIN_BLOCK_2[__idx]; __i < 200; ++__i) {
519       const int32_t __j = __ADDITIONAL_BITS_2 + (-__e2 - 16 * __idx);
520       const uint32_t __p = __POW10_OFFSET_2[__idx] + static_cast<uint32_t>(__i) - __MIN_BLOCK_2[__idx];
521       // Temporary: __j is usually around 128, and by shifting a bit, we push it to 128 or above, which is
522       // a slightly faster code path in __mulShift_mod1e9. Instead, we can just increase the multipliers.
523       __digits = (__p >= __POW10_OFFSET_2[__idx + 1]) ? 0 : __mulShift_mod1e9(__m2 << 8, __POW10_SPLIT_2[__p], __j + 8);
524       if (__printedDigits != 0) {
525         if (__printedDigits + 9 > __precision) {
526           __availableDigits = 9;
527           break;
528         }
529         if (_Last - _First < 9) {
530           return { _Last, errc::value_too_large };
531         }
532         __append_nine_digits(__digits, _First);
533         _First += 9;
534         __printedDigits += 9;
535       } else if (__digits != 0) {
536         __availableDigits = __decimalLength9(__digits);
537         __exp = -(__i + 1) * 9 + static_cast<int32_t>(__availableDigits) - 1;
538         if (__availableDigits > __precision) {
539           break;
540         }
541         if (__printDecimalPoint) {
542           if (_Last - _First < static_cast<ptrdiff_t>(__availableDigits + 1)) {
543             return { _Last, errc::value_too_large };
544           }
545           __append_d_digits(__availableDigits, __digits, _First);
546           _First += __availableDigits + 1; // +1 for decimal point
547         } else {
548           if (_First == _Last) {
549             return { _Last, errc::value_too_large };
550           }
551           *_First++ = static_cast<char>('0' + __digits);
552         }
553         __printedDigits = __availableDigits;
554         __availableDigits = 0;
555       }
556     }
557   }
558 
559   const uint32_t __maximum = __precision - __printedDigits;
560   if (__availableDigits == 0) {
561     __digits = 0;
562   }
563   uint32_t __lastDigit = 0;
564   if (__availableDigits > __maximum) {
565     for (uint32_t __k = 0; __k < __availableDigits - __maximum; ++__k) {
566       __lastDigit = __digits % 10;
567       __digits /= 10;
568     }
569   }
570   // 0 = don't round up; 1 = round up unconditionally; 2 = round up if odd.
571   int __roundUp = 0;
572   if (__lastDigit != 5) {
573     __roundUp = __lastDigit > 5;
574   } else {
575     // Is m * 2^__e2 * 10^(__precision + 1 - __exp) integer?
576     // __precision was already increased by 1, so we don't need to write + 1 here.
577     const int32_t __rexp = static_cast<int32_t>(__precision) - __exp;
578     const int32_t __requiredTwos = -__e2 - __rexp;
579     bool __trailingZeros = __requiredTwos <= 0
580       || (__requiredTwos < 60 && __multipleOfPowerOf2(__m2, static_cast<uint32_t>(__requiredTwos)));
581     if (__rexp < 0) {
582       const int32_t __requiredFives = -__rexp;
583       __trailingZeros = __trailingZeros && __multipleOfPowerOf5(__m2, static_cast<uint32_t>(__requiredFives));
584     }
585     __roundUp = __trailingZeros ? 2 : 1;
586   }
587   if (__printedDigits != 0) {
588     if (_Last - _First < static_cast<ptrdiff_t>(__maximum)) {
589       return { _Last, errc::value_too_large };
590     }
591     if (__digits == 0) {
592       _VSTD::memset(_First, '0', __maximum);
593     } else {
594       __append_c_digits(__maximum, __digits, _First);
595     }
596     _First += __maximum;
597   } else {
598     if (__printDecimalPoint) {
599       if (_Last - _First < static_cast<ptrdiff_t>(__maximum + 1)) {
600         return { _Last, errc::value_too_large };
601       }
602       __append_d_digits(__maximum, __digits, _First);
603       _First += __maximum + 1; // +1 for decimal point
604     } else {
605       if (_First == _Last) {
606         return { _Last, errc::value_too_large };
607       }
608       *_First++ = static_cast<char>('0' + __digits);
609     }
610   }
611   if (__roundUp != 0) {
612     char* _Round = _First;
613     while (true) {
614       if (_Round == _Original_first) {
615         _Round[0] = '1';
616         ++__exp;
617         break;
618       }
619       --_Round;
620       const char __c = _Round[0];
621       if (__c == '.') {
622         // Keep going.
623       } else if (__c == '9') {
624         _Round[0] = '0';
625         __roundUp = 1;
626       } else {
627         if (__roundUp == 1 || __c % 2 != 0) {
628           _Round[0] = __c + 1;
629         }
630         break;
631       }
632     }
633   }
634 
635   char _Sign_character;
636 
637   if (__exp < 0) {
638     _Sign_character = '-';
639     __exp = -__exp;
640   } else {
641     _Sign_character = '+';
642   }
643 
644   const int _Exponent_part_length = __exp >= 100
645     ? 5 // "e+NNN"
646     : 4; // "e+NN"
647 
648   if (_Last - _First < _Exponent_part_length) {
649     return { _Last, errc::value_too_large };
650   }
651 
652   *_First++ = 'e';
653   *_First++ = _Sign_character;
654 
655   if (__exp >= 100) {
656     const int32_t __c = __exp % 10;
657     _VSTD::memcpy(_First, __DIGIT_TABLE + 2 * (__exp / 10), 2);
658     _First[2] = static_cast<char>('0' + __c);
659     _First += 3;
660   } else {
661     _VSTD::memcpy(_First, __DIGIT_TABLE + 2 * __exp, 2);
662     _First += 2;
663   }
664 
665   return { _First, errc{} };
666 }
667 
668 _LIBCPP_END_NAMESPACE_STD
669 
670 // clang-format on
671