1 // -*- C++ -*-
2 //===----------------------------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
11 #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
12 
13 /// \file Contains the std-format-spec parser.
14 ///
15 /// Most of the code can be reused in the chrono-format-spec.
16 /// This header has some support for the chrono-format-spec since it doesn't
17 /// affect the std-format-spec.
18 
19 #include <__algorithm/find_if.h>
20 #include <__algorithm/min.h>
21 #include <__assert>
22 #include <__config>
23 #include <__debug>
24 #include <__format/format_arg.h>
25 #include <__format/format_error.h>
26 #include <__format/format_parse_context.h>
27 #include <__format/format_string.h>
28 #include <__format/unicode.h>
29 #include <__variant/monostate.h>
30 #include <bit>
31 #include <concepts>
32 #include <cstdint>
33 #include <string_view>
34 #include <type_traits>
35 
36 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
37 #  pragma GCC system_header
38 #endif
39 
40 _LIBCPP_PUSH_MACROS
41 #include <__undef_macros>
42 
43 _LIBCPP_BEGIN_NAMESPACE_STD
44 
45 #if _LIBCPP_STD_VER > 17
46 
47 namespace __format_spec {
48 
49 template <class _CharT>
50 _LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result< _CharT>
51 __parse_arg_id(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) {
52   // This function is a wrapper to call the real parser. But it does the
53   // validation for the pre-conditions and post-conditions.
54   if (__begin == __end)
55     __throw_format_error("End of input while parsing format-spec arg-id");
56 
57   __format::__parse_number_result __r =
58       __format::__parse_arg_id(__begin, __end, __parse_ctx);
59 
60   if (__r.__ptr == __end || *__r.__ptr != _CharT('}'))
61     __throw_format_error("Invalid arg-id");
62 
63   ++__r.__ptr;
64   return __r;
65 }
66 
67 template <class _Context>
68 _LIBCPP_HIDE_FROM_ABI constexpr uint32_t
69 __substitute_arg_id(basic_format_arg<_Context> __format_arg) {
70   return visit_format_arg(
71       [](auto __arg) -> uint32_t {
72         using _Type = decltype(__arg);
73         if constexpr (integral<_Type>) {
74           if constexpr (signed_integral<_Type>) {
75             if (__arg < 0)
76               __throw_format_error("A format-spec arg-id replacement shouldn't "
77                                    "have a negative value");
78           }
79 
80           using _CT = common_type_t<_Type, decltype(__format::__number_max)>;
81           if (static_cast<_CT>(__arg) >
82               static_cast<_CT>(__format::__number_max))
83             __throw_format_error("A format-spec arg-id replacement exceeds "
84                                  "the maximum supported value");
85 
86           return __arg;
87         } else if constexpr (same_as<_Type, monostate>)
88           __throw_format_error("Argument index out of bounds");
89         else
90           __throw_format_error("A format-spec arg-id replacement argument "
91                                "isn't an integral type");
92       },
93       __format_arg);
94 }
95 
96 /// These fields are a filter for which elements to parse.
97 ///
98 /// They default to false so when a new field is added it needs to be opted in
99 /// explicitly.
100 struct __fields {
101   uint8_t __sign_ : 1 {false};
102   uint8_t __alternate_form_ : 1 {false};
103   uint8_t __zero_padding_ : 1 {false};
104   uint8_t __precision_ : 1 {false};
105   uint8_t __locale_specific_form_ : 1 {false};
106   uint8_t __type_ : 1 {false};
107 };
108 
109 // By not placing this constant in the formatter class it's not duplicated for
110 // char and wchar_t.
111 inline constexpr __fields __fields_integral{
112     .__sign_                 = true,
113     .__alternate_form_       = true,
114     .__zero_padding_         = true,
115     .__locale_specific_form_ = true,
116     .__type_                 = true};
117 inline constexpr __fields __fields_floating_point{
118     .__sign_                 = true,
119     .__alternate_form_       = true,
120     .__zero_padding_         = true,
121     .__precision_            = true,
122     .__locale_specific_form_ = true,
123     .__type_                 = true};
124 inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true};
125 inline constexpr __fields __fields_pointer{.__type_ = true};
126 
127 enum class _LIBCPP_ENUM_VIS __alignment : uint8_t {
128   /// No alignment is set in the format string.
129   __default,
130   __left,
131   __center,
132   __right,
133   __zero_padding
134 };
135 
136 enum class _LIBCPP_ENUM_VIS __sign : uint8_t {
137   /// No sign is set in the format string.
138   ///
139   /// The sign isn't allowed for certain format-types. By using this value
140   /// it's possible to detect whether or not the user explicitly set the sign
141   /// flag. For formatting purposes it behaves the same as \ref __minus.
142   __default,
143   __minus,
144   __plus,
145   __space
146 };
147 
148 enum class _LIBCPP_ENUM_VIS __type : uint8_t {
149   __default,
150   __string,
151   __binary_lower_case,
152   __binary_upper_case,
153   __octal,
154   __decimal,
155   __hexadecimal_lower_case,
156   __hexadecimal_upper_case,
157   __pointer,
158   __char,
159   __hexfloat_lower_case,
160   __hexfloat_upper_case,
161   __scientific_lower_case,
162   __scientific_upper_case,
163   __fixed_lower_case,
164   __fixed_upper_case,
165   __general_lower_case,
166   __general_upper_case
167 };
168 
169 struct __std {
170   __alignment __alignment_ : 3;
171   __sign __sign_ : 2;
172   bool __alternate_form_ : 1;
173   bool __locale_specific_form_ : 1;
174   __type __type_;
175 };
176 
177 struct __chrono {
178   __alignment __alignment_ : 3;
179   bool __weekday_name_ : 1;
180   bool __month_name_ : 1;
181 };
182 
183 /// Contains the parsed formatting specifications.
184 ///
185 /// This contains information for both the std-format-spec and the
186 /// chrono-format-spec. This results in some unused members for both
187 /// specifications. However these unused members don't increase the size
188 /// of the structure.
189 ///
190 /// This struct doesn't cross ABI boundaries so its layout doesn't need to be
191 /// kept stable.
192 template <class _CharT>
193 struct __parsed_specifications {
194   union {
195     // The field __alignment_ is the first element in __std_ and __chrono_.
196     // This allows the code to always inspect this value regards which member
197     // of the union is the active member [class.union.general]/2.
198     //
199     // This is needed since the generic output routines handle the alignment of
200     // the output.
201     __alignment __alignment_ : 3;
202     __std __std_;
203     __chrono __chrono_;
204   };
205 
206   /// The requested width.
207   ///
208   /// When the format-spec used an arg-id for this field it has already been
209   /// replaced with the value of that arg-id.
210   int32_t __width_;
211 
212   /// The requested precision.
213   ///
214   /// When the format-spec used an arg-id for this field it has already been
215   /// replaced with the value of that arg-id.
216   int32_t __precision_;
217 
218   _CharT __fill_;
219 
220   _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; }
221 
222   _LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; }
223 };
224 
225 // Validate the struct is small and cheap to copy since the struct is passed by
226 // value in formatting functions.
227 static_assert(sizeof(__parsed_specifications<char>) == 16);
228 static_assert(is_trivially_copyable_v<__parsed_specifications<char>>);
229 #  ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
230 static_assert(sizeof(__parsed_specifications<wchar_t>) == 16);
231 static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>);
232 #  endif
233 
234 /// The parser for the std-format-spec.
235 ///
236 /// Note this class is a member of std::formatter specializations. It's
237 /// expected developers will create their own formatter specializations that
238 /// inherit from the std::formatter specializations. This means this class
239 /// must be ABI stable. To aid the stability the unused bits in the class are
240 /// set to zero. That way they can be repurposed if a future revision of the
241 /// Standards adds new fields to std-format-spec.
242 template <class _CharT>
243 class _LIBCPP_TEMPLATE_VIS __parser {
244 public:
245   _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(basic_format_parse_context<_CharT>& __parse_ctx, __fields __fields)
246       -> decltype(__parse_ctx.begin()) {
247 
248     const _CharT* __begin = __parse_ctx.begin();
249     const _CharT* __end = __parse_ctx.end();
250     if (__begin == __end)
251       return __begin;
252 
253     if (__parse_fill_align(__begin, __end) && __begin == __end)
254       return __begin;
255 
256     if (__fields.__sign_ && __parse_sign(__begin) && __begin == __end)
257       return __begin;
258 
259     if (__fields.__alternate_form_ && __parse_alternate_form(__begin) && __begin == __end)
260       return __begin;
261 
262     if (__fields.__zero_padding_ && __parse_zero_padding(__begin) && __begin == __end)
263       return __begin;
264 
265     if (__parse_width(__begin, __end, __parse_ctx) && __begin == __end)
266       return __begin;
267 
268     if (__fields.__precision_ && __parse_precision(__begin, __end, __parse_ctx) && __begin == __end)
269       return __begin;
270 
271     if (__fields.__locale_specific_form_ && __parse_locale_specific_form(__begin) && __begin == __end)
272       return __begin;
273 
274     if (__fields.__type_) {
275       __parse_type(__begin);
276 
277       // When __type_ is false the calling parser is expected to do additional
278       // parsing. In that case that parser should do the end of format string
279       // validation.
280       if (__begin != __end && *__begin != _CharT('}'))
281         __throw_format_error("The format-spec should consume the input or end with a '}'");
282     }
283 
284     return __begin;
285   }
286 
287   /// \returns the `__parsed_specifications` with the resolved dynamic sizes..
288   _LIBCPP_HIDE_FROM_ABI
289   __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const {
290     return __parsed_specifications<_CharT>{
291         .__std_ =
292             __std{.__alignment_            = __alignment_,
293                   .__sign_                 = __sign_,
294                   .__alternate_form_       = __alternate_form_,
295                   .__locale_specific_form_ = __locale_specific_form_,
296                   .__type_                 = __type_},
297         .__width_{__get_width(__ctx)},
298         .__precision_{__get_precision(__ctx)},
299         .__fill_{__fill_}};
300   }
301 
302   __alignment __alignment_ : 3 {__alignment::__default};
303   __sign __sign_ : 2 {__sign::__default};
304   bool __alternate_form_ : 1 {false};
305   bool __locale_specific_form_ : 1 {false};
306   bool __reserved_0_ : 1 {false};
307   __type __type_{__type::__default};
308 
309   // These two flags are used for formatting chrono. Since the struct has
310   // padding space left it's added to this structure.
311   bool __weekday_name_ : 1 {false};
312   bool __month_name_ : 1 {false};
313 
314   uint8_t __reserved_1_ : 6 {0};
315   uint8_t __reserved_2_ : 6 {0};
316   // These two flags are only used internally and not part of the
317   // __parsed_specifications. Therefore put them at the end.
318   bool __width_as_arg_ : 1 {false};
319   bool __precision_as_arg_ : 1 {false};
320 
321   /// The requested width, either the value or the arg-id.
322   int32_t __width_{0};
323 
324   /// The requested precision, either the value or the arg-id.
325   int32_t __precision_{-1};
326 
327   // LWG 3576 will probably change this to always accept a Unicode code point
328   // To avoid changing the size with that change align the field so when it
329   // becomes 32-bit its alignment will remain the same. That also means the
330   // size will remain the same. (D2572 addresses the solution for LWG 3576.)
331   _CharT __fill_{_CharT(' ')};
332 
333 private:
334   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) {
335     switch (__c) {
336     case _CharT('<'):
337       __alignment_ = __alignment::__left;
338       return true;
339 
340     case _CharT('^'):
341       __alignment_ = __alignment::__center;
342       return true;
343 
344     case _CharT('>'):
345       __alignment_ = __alignment::__right;
346       return true;
347     }
348     return false;
349   }
350 
351   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(const _CharT*& __begin, const _CharT* __end) {
352     _LIBCPP_ASSERT(__begin != __end, "when called with an empty input the function will cause "
353                                      "undefined behavior by evaluating data not in the input");
354     if (__begin + 1 != __end) {
355       if (__parse_alignment(*(__begin + 1))) {
356         if (*__begin == _CharT('{') || *__begin == _CharT('}'))
357           __throw_format_error("The format-spec fill field contains an invalid character");
358 
359         __fill_ = *__begin;
360         __begin += 2;
361         return true;
362       }
363     }
364 
365     if (!__parse_alignment(*__begin))
366       return false;
367 
368     ++__begin;
369     return true;
370   }
371 
372   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(const _CharT*& __begin) {
373     switch (*__begin) {
374     case _CharT('-'):
375       __sign_ = __sign::__minus;
376       break;
377     case _CharT('+'):
378       __sign_ = __sign::__plus;
379       break;
380     case _CharT(' '):
381       __sign_ = __sign::__space;
382       break;
383     default:
384       return false;
385     }
386     ++__begin;
387     return true;
388   }
389 
390   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(const _CharT*& __begin) {
391     if (*__begin != _CharT('#'))
392       return false;
393 
394     __alternate_form_ = true;
395     ++__begin;
396     return true;
397   }
398 
399   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(const _CharT*& __begin) {
400     if (*__begin != _CharT('0'))
401       return false;
402 
403     if (__alignment_ == __alignment::__default)
404       __alignment_ = __alignment::__zero_padding;
405     ++__begin;
406     return true;
407   }
408 
409   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(const _CharT*& __begin, const _CharT* __end, auto& __parse_ctx) {
410     if (*__begin == _CharT('0'))
411       __throw_format_error("A format-spec width field shouldn't have a leading zero");
412 
413     if (*__begin == _CharT('{')) {
414       __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx);
415       __width_as_arg_ = true;
416       __width_ = __r.__value;
417       __begin = __r.__ptr;
418       return true;
419     }
420 
421     if (*__begin < _CharT('0') || *__begin > _CharT('9'))
422       return false;
423 
424     __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
425     __width_ = __r.__value;
426     _LIBCPP_ASSERT(__width_ != 0, "A zero value isn't allowed and should be impossible, "
427                                   "due to validations in this function");
428     __begin = __r.__ptr;
429     return true;
430   }
431 
432   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(const _CharT*& __begin, const _CharT* __end,
433                                                          auto& __parse_ctx) {
434     if (*__begin != _CharT('.'))
435       return false;
436 
437     ++__begin;
438     if (__begin == __end)
439       __throw_format_error("End of input while parsing format-spec precision");
440 
441     if (*__begin == _CharT('{')) {
442       __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx);
443       __precision_as_arg_ = true;
444       __precision_ = __arg_id.__value;
445       __begin = __arg_id.__ptr;
446       return true;
447     }
448 
449     if (*__begin < _CharT('0') || *__begin > _CharT('9'))
450       __throw_format_error("The format-spec precision field doesn't contain a value or arg-id");
451 
452     __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
453     __precision_ = __r.__value;
454     __precision_as_arg_ = false;
455     __begin = __r.__ptr;
456     return true;
457   }
458 
459   _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(const _CharT*& __begin) {
460     if (*__begin != _CharT('L'))
461       return false;
462 
463     __locale_specific_form_ = true;
464     ++__begin;
465     return true;
466   }
467 
468   _LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(const _CharT*& __begin) {
469     // Determines the type. It does not validate whether the selected type is
470     // valid. Most formatters have optional fields that are only allowed for
471     // certain types. These parsers need to do validation after the type has
472     // been parsed. So its easier to implement the validation for all types in
473     // the specific parse function.
474     switch (*__begin) {
475     case 'A':
476       __type_ = __type::__hexfloat_upper_case;
477       break;
478     case 'B':
479       __type_ = __type::__binary_upper_case;
480       break;
481     case 'E':
482       __type_ = __type::__scientific_upper_case;
483       break;
484     case 'F':
485       __type_ = __type::__fixed_upper_case;
486       break;
487     case 'G':
488       __type_ = __type::__general_upper_case;
489       break;
490     case 'X':
491       __type_ = __type::__hexadecimal_upper_case;
492       break;
493     case 'a':
494       __type_ = __type::__hexfloat_lower_case;
495       break;
496     case 'b':
497       __type_ = __type::__binary_lower_case;
498       break;
499     case 'c':
500       __type_ = __type::__char;
501       break;
502     case 'd':
503       __type_ = __type::__decimal;
504       break;
505     case 'e':
506       __type_ = __type::__scientific_lower_case;
507       break;
508     case 'f':
509       __type_ = __type::__fixed_lower_case;
510       break;
511     case 'g':
512       __type_ = __type::__general_lower_case;
513       break;
514     case 'o':
515       __type_ = __type::__octal;
516       break;
517     case 'p':
518       __type_ = __type::__pointer;
519       break;
520     case 's':
521       __type_ = __type::__string;
522       break;
523     case 'x':
524       __type_ = __type::__hexadecimal_lower_case;
525       break;
526     default:
527       return;
528     }
529     ++__begin;
530   }
531 
532   _LIBCPP_HIDE_FROM_ABI
533   int32_t __get_width(auto& __ctx) const {
534     if (!__width_as_arg_)
535       return __width_;
536 
537     int32_t __result = __format_spec::__substitute_arg_id(__ctx.arg(__width_));
538     if (__result == 0)
539       __throw_format_error("A format-spec width field replacement should have a positive value");
540     return __result;
541   }
542 
543   _LIBCPP_HIDE_FROM_ABI
544   int32_t __get_precision(auto& __ctx) const {
545     if (!__precision_as_arg_)
546       return __precision_;
547 
548     return __format_spec::__substitute_arg_id(__ctx.arg(__precision_));
549   }
550 };
551 
552 // Validates whether the reserved bitfields don't change the size.
553 static_assert(sizeof(__parser<char>) == 16);
554 #  ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
555 static_assert(sizeof(__parser<wchar_t>) == 16);
556 #  endif
557 
558 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) {
559   switch (__type) {
560   case __format_spec::__type::__default:
561   case __format_spec::__type::__string:
562     break;
563 
564   default:
565     std::__throw_format_error("The format-spec type has a type not supported for a string argument");
566   }
567 }
568 
569 template <class _CharT>
570 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_bool_string(__parser<_CharT>& __parser) {
571   if (__parser.__sign_ != __sign::__default)
572     std::__throw_format_error("A sign field isn't allowed in this format-spec");
573 
574   if (__parser.__alternate_form_)
575     std::__throw_format_error("An alternate form field isn't allowed in this format-spec");
576 
577   if (__parser.__alignment_ == __alignment::__zero_padding)
578     std::__throw_format_error("A zero-padding field isn't allowed in this format-spec");
579 
580   if (__parser.__alignment_ == __alignment::__default)
581     __parser.__alignment_ = __alignment::__left;
582 }
583 
584 template <class _CharT>
585 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT>& __parser) {
586   __format_spec::__process_display_type_bool_string(__parser);
587 }
588 
589 template <class _CharT>
590 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser) {
591   switch (__parser.__type_) {
592   case __format_spec::__type::__default:
593   case __format_spec::__type::__string:
594     __format_spec::__process_display_type_bool_string(__parser);
595     break;
596 
597   case __format_spec::__type::__binary_lower_case:
598   case __format_spec::__type::__binary_upper_case:
599   case __format_spec::__type::__octal:
600   case __format_spec::__type::__decimal:
601   case __format_spec::__type::__hexadecimal_lower_case:
602   case __format_spec::__type::__hexadecimal_upper_case:
603     break;
604 
605   default:
606     std::__throw_format_error("The format-spec type has a type not supported for a bool argument");
607   }
608 }
609 
610 template <class _CharT>
611 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser) {
612   switch (__parser.__type_) {
613   case __format_spec::__type::__default:
614   case __format_spec::__type::__char:
615     __format_spec::__process_display_type_char(__parser);
616     break;
617 
618   case __format_spec::__type::__binary_lower_case:
619   case __format_spec::__type::__binary_upper_case:
620   case __format_spec::__type::__octal:
621   case __format_spec::__type::__decimal:
622   case __format_spec::__type::__hexadecimal_lower_case:
623   case __format_spec::__type::__hexadecimal_upper_case:
624     break;
625 
626   default:
627     std::__throw_format_error("The format-spec type has a type not supported for a char argument");
628   }
629 }
630 
631 template <class _CharT>
632 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser) {
633   switch (__parser.__type_) {
634   case __format_spec::__type::__default:
635   case __format_spec::__type::__binary_lower_case:
636   case __format_spec::__type::__binary_upper_case:
637   case __format_spec::__type::__octal:
638   case __format_spec::__type::__decimal:
639   case __format_spec::__type::__hexadecimal_lower_case:
640   case __format_spec::__type::__hexadecimal_upper_case:
641     break;
642 
643   case __format_spec::__type::__char:
644     __format_spec::__process_display_type_char(__parser);
645     break;
646 
647   default:
648     std::__throw_format_error("The format-spec type has a type not supported for an integer argument");
649   }
650 }
651 
652 template <class _CharT>
653 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_floating_point(__parser<_CharT>& __parser) {
654   switch (__parser.__type_) {
655   case __format_spec::__type::__default:
656     // When no precision specified then it keeps default since that
657     // formatting differs from the other types.
658     if (__parser.__precision_as_arg_ || __parser.__precision_ != -1)
659       __parser.__type_ = __format_spec::__type::__general_lower_case;
660     break;
661   case __format_spec::__type::__hexfloat_lower_case:
662   case __format_spec::__type::__hexfloat_upper_case:
663     // Precision specific behavior will be handled later.
664     break;
665   case __format_spec::__type::__scientific_lower_case:
666   case __format_spec::__type::__scientific_upper_case:
667   case __format_spec::__type::__fixed_lower_case:
668   case __format_spec::__type::__fixed_upper_case:
669   case __format_spec::__type::__general_lower_case:
670   case __format_spec::__type::__general_upper_case:
671     if (!__parser.__precision_as_arg_ && __parser.__precision_ == -1)
672       // Set the default precision for the call to to_chars.
673       __parser.__precision_ = 6;
674     break;
675 
676   default:
677     std::__throw_format_error("The format-spec type has a type not supported for a floating-point argument");
678   }
679 }
680 
681 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spec::__type __type) {
682   switch (__type) {
683   case __format_spec::__type::__default:
684   case __format_spec::__type::__pointer:
685     break;
686 
687   default:
688     std::__throw_format_error("The format-spec type has a type not supported for a pointer argument");
689   }
690 }
691 
692 template <class _CharT>
693 struct __column_width_result {
694   /// The number of output columns.
695   size_t __width_;
696   /// One beyond the last code unit used in the estimation.
697   ///
698   /// This limits the original output to fit in the wanted number of columns.
699   const _CharT* __last_;
700 };
701 
702 /// Since a column width can be two it's possible that the requested column
703 /// width can't be achieved. Depending on the intended usage the policy can be
704 /// selected.
705 /// - When used as precision the maximum width may not be exceeded and the
706 ///   result should be "rounded down" to the previous boundary.
707 /// - When used as a width we're done once the minimum is reached, but
708 ///   exceeding is not an issue. Rounding down is an issue since that will
709 ///   result in writing fill characters. Therefore the result needs to be
710 ///   "rounded up".
711 enum class __column_width_rounding { __down, __up };
712 
713 #  ifndef _LIBCPP_HAS_NO_UNICODE
714 
715 namespace __detail {
716 
717 /// Converts a code point to the column width.
718 ///
719 /// The estimations are conforming to [format.string.general]/11
720 ///
721 /// This version expects a value less than 0x1'0000, which is a 3-byte UTF-8
722 /// character.
723 _LIBCPP_HIDE_FROM_ABI constexpr int __column_width_3(uint32_t __c) noexcept {
724   _LIBCPP_ASSERT(__c < 0x10000, "Use __column_width_4 or __column_width for larger values");
725 
726   // clang-format off
727   return 1 + (__c >= 0x1100 && (__c <= 0x115f ||
728              (__c >= 0x2329 && (__c <= 0x232a ||
729              (__c >= 0x2e80 && (__c <= 0x303e ||
730              (__c >= 0x3040 && (__c <= 0xa4cf ||
731              (__c >= 0xac00 && (__c <= 0xd7a3 ||
732              (__c >= 0xf900 && (__c <= 0xfaff ||
733              (__c >= 0xfe10 && (__c <= 0xfe19 ||
734              (__c >= 0xfe30 && (__c <= 0xfe6f ||
735              (__c >= 0xff00 && (__c <= 0xff60 ||
736              (__c >= 0xffe0 && (__c <= 0xffe6
737              ))))))))))))))))))));
738   // clang-format on
739 }
740 
741 /// @overload
742 ///
743 /// This version expects a value greater than or equal to 0x1'0000, which is a
744 /// 4-byte UTF-8 character.
745 _LIBCPP_HIDE_FROM_ABI constexpr int __column_width_4(uint32_t __c) noexcept {
746   _LIBCPP_ASSERT(__c >= 0x10000, "Use __column_width_3 or __column_width for smaller values");
747 
748   // clang-format off
749   return 1 + (__c >= 0x1'f300 && (__c <= 0x1'f64f ||
750              (__c >= 0x1'f900 && (__c <= 0x1'f9ff ||
751              (__c >= 0x2'0000 && (__c <= 0x2'fffd ||
752              (__c >= 0x3'0000 && (__c <= 0x3'fffd
753              ))))))));
754   // clang-format on
755 }
756 
757 /// @overload
758 ///
759 /// The general case, accepting all values.
760 _LIBCPP_HIDE_FROM_ABI constexpr int __column_width(uint32_t __c) noexcept {
761   if (__c < 0x10000)
762     return __detail::__column_width_3(__c);
763 
764   return __detail::__column_width_4(__c);
765 }
766 
767 template <class _CharT>
768 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> __estimate_column_width_grapheme_clustering(
769     const _CharT* __first, const _CharT* __last, size_t __maximum, __column_width_rounding __rounding) noexcept {
770   __unicode::__extended_grapheme_cluster_view<_CharT> __view{__first, __last};
771 
772   __column_width_result<_CharT> __result{0, __first};
773   while (__result.__last_ != __last && __result.__width_ <= __maximum) {
774     typename __unicode::__extended_grapheme_cluster_view<_CharT>::__cluster __cluster = __view.__consume();
775     int __width = __detail::__column_width(__cluster.__code_point_);
776 
777     // When the next entry would exceed the maximum width the previous width
778     // might be returned. For example when a width of 100 is requested the
779     // returned width might be 99, since the next code point has an estimated
780     // column width of 2. This depends on the rounding flag.
781     // When the maximum is exceeded the loop will abort the next iteration.
782     if (__rounding == __column_width_rounding::__down && __result.__width_ + __width > __maximum)
783       return __result;
784 
785     __result.__width_ += __width;
786     __result.__last_ = __cluster.__last_;
787   }
788 
789   return __result;
790 }
791 
792 } // namespace __detail
793 
794 // Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32.
795 // Depending on format the relation between the number of code units stored and
796 // the number of output columns differs. The first relation is the number of
797 // code units forming a code point. (The text assumes the code units are
798 // unsigned.)
799 // - UTF-8 The number of code units is between one and four. The first 127
800 //   Unicode code points match the ASCII character set. When the highest bit is
801 //   set it means the code point has more than one code unit.
802 // - UTF-16: The number of code units is between 1 and 2. When the first
803 //   code unit is in the range [0xd800,0xdfff) it means the code point uses two
804 //   code units.
805 // - UTF-32: The number of code units is always one.
806 //
807 // The code point to the number of columns is specified in
808 // [format.string.std]/11. This list might change in the future.
809 //
810 // Another thing to be taken into account is Grapheme clustering. This means
811 // that in some cases multiple code points are combined one element in the
812 // output. For example:
813 // - an ASCII character with a combined diacritical mark
814 // - an emoji with a skin tone modifier
815 // - a group of combined people emoji to create a family
816 // - a combination of flag emoji
817 //
818 // See also:
819 // - [format.string.general]/11
820 // - https://en.wikipedia.org/wiki/UTF-8#Encoding
821 // - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
822 
823 _LIBCPP_HIDE_FROM_ABI constexpr bool __is_ascii(char32_t __c) { return __c < 0x80; }
824 
825 /// Determines the number of output columns needed to render the input.
826 ///
827 /// \note When the scanner encounters malformed Unicode it acts as-if every
828 /// code unit is a one column code point. Typically a terminal uses the same
829 /// strategy and replaces every malformed code unit with a one column
830 /// replacement character.
831 ///
832 /// \param __first    Points to the first element of the input range.
833 /// \param __last     Points beyond the last element of the input range.
834 /// \param __maximum  The maximum number of output columns. The returned number
835 ///                   of estimated output columns will not exceed this value.
836 /// \param __rounding Selects the rounding method.
837 ///                   \c __down result.__width_ <= __maximum
838 ///                   \c __up result.__width_ <= __maximum + 1
839 template <class _CharT>
840 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> __estimate_column_width(
841     basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding __rounding) noexcept {
842   // The width estimation is done in two steps:
843   // - Quickly process for the ASCII part. ASCII has the following properties
844   //   - One code unit is one code point
845   //   - Every code point has an estimated width of one
846   // - When needed it will a Unicode Grapheme clustering algorithm to find
847   //   the proper place for truncation.
848 
849   if (__str.empty() || __maximum == 0)
850     return {0, __str.begin()};
851 
852   // ASCII has one caveat; when an ASCII character is followed by a non-ASCII
853   // character they might be part of an extended grapheme cluster. For example:
854   //   an ASCII letter and a COMBINING ACUTE ACCENT
855   // The truncate should happen after the COMBINING ACUTE ACCENT. Therefore we
856   // need to scan one code unit beyond the requested precision. When this code
857   // unit is non-ASCII we omit the current code unit and let the Grapheme
858   // clustering algorithm do its work.
859   const _CharT* __it = __str.begin();
860   if (__is_ascii(*__it)) {
861     do {
862       --__maximum;
863       ++__it;
864       if (__it == __str.end())
865         return {__str.size(), __str.end()};
866 
867       if (__maximum == 0) {
868         if (__is_ascii(*__it))
869           return {static_cast<size_t>(__it - __str.begin()), __it};
870 
871         break;
872       }
873     } while (__is_ascii(*__it));
874     --__it;
875     ++__maximum;
876   }
877 
878   ptrdiff_t __ascii_size = __it - __str.begin();
879   __column_width_result __result =
880       __detail::__estimate_column_width_grapheme_clustering(__it, __str.end(), __maximum, __rounding);
881 
882   __result.__width_ += __ascii_size;
883   return __result;
884 }
885 #  else // !defined(_LIBCPP_HAS_NO_UNICODE)
886 template <class _CharT>
887 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
888 __estimate_column_width(basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding) noexcept {
889   // When Unicode isn't supported assume ASCII and every code unit is one code
890   // point. In ASCII the estimated column width is always one. Thus there's no
891   // need for rounding.
892   size_t __width_ = _VSTD::min(__str.size(), __maximum);
893   return {__width_, __str.begin() + __width_};
894 }
895 
896 #  endif // !defined(_LIBCPP_HAS_NO_UNICODE)
897 
898 } // namespace __format_spec
899 
900 #endif //_LIBCPP_STD_VER > 17
901 
902 _LIBCPP_END_NAMESPACE_STD
903 
904 _LIBCPP_POP_MACROS
905 
906 #endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
907