1 // -*- C++ -*-
2 //===----------------------------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
11 #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
12 
13 #include <__algorithm/find_if.h>
14 #include <__algorithm/min.h>
15 #include <__config>
16 #include <__debug>
17 #include <__format/format_arg.h>
18 #include <__format/format_error.h>
19 #include <__format/format_string.h>
20 #include <__variant/monostate.h>
21 #include <bit>
22 #include <concepts>
23 #include <cstdint>
24 #include <type_traits>
25 
26 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
27 # pragma GCC system_header
28 #endif
29 
30 _LIBCPP_PUSH_MACROS
31 #include <__undef_macros>
32 
33 _LIBCPP_BEGIN_NAMESPACE_STD
34 
35 #if _LIBCPP_STD_VER > 17
36 
37 // TODO FMT Remove this once we require compilers with proper C++20 support.
38 // If the compiler has no concepts support, the format header will be disabled.
39 // Without concepts support enable_if needs to be used and that too much effort
40 // to support compilers with partial C++20 support.
41 # if !defined(_LIBCPP_HAS_NO_CONCEPTS)
42 
43 namespace __format_spec {
44 
45 /**
46  * Contains the flags for the std-format-spec.
47  *
48  * Some format-options can only be used for specific C++types and may depend on
49  * the selected format-type.
50  * * The C++type filtering can be done using the proper policies for
51  *   @ref __parser_std.
52  * * The format-type filtering needs to be done post parsing in the parser
53  *   derived from @ref __parser_std.
54  */
55 class _LIBCPP_TYPE_VIS _Flags {
56 public:
57   enum class _LIBCPP_ENUM_VIS _Alignment : uint8_t {
58     /**
59      * No alignment is set in the format string.
60      *
61      * Zero-padding is ignored when an alignment is selected.
62      * The default alignment depends on the selected format-type.
63      */
64     __default,
65     __left,
66     __center,
67     __right
68   };
69   enum class _LIBCPP_ENUM_VIS _Sign : uint8_t {
70     /**
71      * No sign is set in the format string.
72      *
73      * The sign isn't allowed for certain format-types. By using this value
74      * it's possible to detect whether or not the user explicitly set the sign
75      * flag. For formatting purposes it behaves the same as @ref __minus.
76      */
77     __default,
78     __minus,
79     __plus,
80     __space
81   };
82 
83   _Alignment __alignment : 2 {_Alignment::__default};
84   _Sign __sign : 2 {_Sign::__default};
85   uint8_t __alternate_form : 1 {false};
86   uint8_t __zero_padding : 1 {false};
87   uint8_t __locale_specific_form : 1 {false};
88 
89   enum class _LIBCPP_ENUM_VIS _Type : uint8_t {
90     __default,
91     __string,
92     __binary_lower_case,
93     __binary_upper_case,
94     __octal,
95     __decimal,
96     __hexadecimal_lower_case,
97     __hexadecimal_upper_case,
98     __pointer,
99     __char,
100     __float_hexadecimal_lower_case,
101     __float_hexadecimal_upper_case,
102     __scientific_lower_case,
103     __scientific_upper_case,
104     __fixed_lower_case,
105     __fixed_upper_case,
106     __general_lower_case,
107     __general_upper_case
108   };
109 
110   _Type __type{_Type::__default};
111 };
112 
113 namespace __detail {
114 template <class _CharT>
115 _LIBCPP_HIDE_FROM_ABI constexpr bool
__parse_alignment(_CharT __c,_Flags & __flags)116 __parse_alignment(_CharT __c, _Flags& __flags) noexcept {
117   switch (__c) {
118   case _CharT('<'):
119     __flags.__alignment = _Flags::_Alignment::__left;
120     return true;
121 
122   case _CharT('^'):
123     __flags.__alignment = _Flags::_Alignment::__center;
124     return true;
125 
126   case _CharT('>'):
127     __flags.__alignment = _Flags::_Alignment::__right;
128     return true;
129   }
130   return false;
131 }
132 } // namespace __detail
133 
134 template <class _CharT>
135 class _LIBCPP_TEMPLATE_VIS __parser_fill_align {
136 public:
137   // TODO FMT The standard doesn't specify this character is a Unicode
138   // character. Validate what fmt and MSVC have implemented.
139   _CharT __fill{_CharT(' ')};
140 
141 protected:
142   _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
__parse(const _CharT * __begin,const _CharT * __end,_Flags & __flags)143   __parse(const _CharT* __begin, const _CharT* __end, _Flags& __flags) {
144     _LIBCPP_ASSERT(__begin != __end,
145                    "When called with an empty input the function will cause "
146                    "undefined behavior by evaluating data not in the input");
147     if (__begin + 1 != __end) {
148       if (__detail::__parse_alignment(*(__begin + 1), __flags)) {
149         if (*__begin == _CharT('{') || *__begin == _CharT('}'))
150           __throw_format_error(
151               "The format-spec fill field contains an invalid character");
152         __fill = *__begin;
153         return __begin + 2;
154       }
155     }
156 
157     if (__detail::__parse_alignment(*__begin, __flags))
158       return __begin + 1;
159 
160     return __begin;
161   }
162 };
163 
164 template <class _CharT>
165 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
__parse_sign(const _CharT * __begin,_Flags & __flags)166 __parse_sign(const _CharT* __begin, _Flags& __flags) noexcept {
167   switch (*__begin) {
168   case _CharT('-'):
169     __flags.__sign = _Flags::_Sign::__minus;
170     break;
171   case _CharT('+'):
172     __flags.__sign = _Flags::_Sign::__plus;
173     break;
174   case _CharT(' '):
175     __flags.__sign = _Flags::_Sign::__space;
176     break;
177   default:
178     return __begin;
179   }
180   return __begin + 1;
181 }
182 
183 template <class _CharT>
184 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
__parse_alternate_form(const _CharT * __begin,_Flags & __flags)185 __parse_alternate_form(const _CharT* __begin, _Flags& __flags) noexcept {
186   if (*__begin == _CharT('#')) {
187     __flags.__alternate_form = true;
188     ++__begin;
189   }
190 
191   return __begin;
192 }
193 
194 template <class _CharT>
195 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
__parse_zero_padding(const _CharT * __begin,_Flags & __flags)196 __parse_zero_padding(const _CharT* __begin, _Flags& __flags) noexcept {
197   if (*__begin == _CharT('0')) {
198     __flags.__zero_padding = true;
199     ++__begin;
200   }
201 
202   return __begin;
203 }
204 
205 template <class _CharT>
206 _LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result< _CharT>
__parse_arg_id(const _CharT * __begin,const _CharT * __end,auto & __parse_ctx)207 __parse_arg_id(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) {
208   // This function is a wrapper to call the real parser. But it does the
209   // validation for the pre-conditions and post-conditions.
210   if (__begin == __end)
211     __throw_format_error("End of input while parsing format-spec arg-id");
212 
213   __format::__parse_number_result __r =
214       __format::__parse_arg_id(__begin, __end, __parse_ctx);
215 
216   if (__r.__ptr == __end || *__r.__ptr != _CharT('}'))
217     __throw_format_error("A format-spec arg-id should terminate at a '}'");
218 
219   ++__r.__ptr;
220   return __r;
221 }
222 
223 template <class _Context>
224 _LIBCPP_HIDE_FROM_ABI constexpr uint32_t
__substitute_arg_id(basic_format_arg<_Context> __arg)225 __substitute_arg_id(basic_format_arg<_Context> __arg) {
226   return visit_format_arg(
227       [](auto __arg) -> uint32_t {
228         using _Type = decltype(__arg);
229         if constexpr (integral<_Type>) {
230           if constexpr (signed_integral<_Type>) {
231             if (__arg < 0)
232               __throw_format_error("A format-spec arg-id replacement shouldn't "
233                                    "have a negative value");
234           }
235 
236           using _CT = common_type_t<_Type, decltype(__format::__number_max)>;
237           if (static_cast<_CT>(__arg) >
238               static_cast<_CT>(__format::__number_max))
239             __throw_format_error("A format-spec arg-id replacement exceeds "
240                                  "the maximum supported value");
241 
242           return __arg;
243         } else if constexpr (same_as<_Type, monostate>)
244           __throw_format_error("Argument index out of bounds");
245         else
246           __throw_format_error("A format-spec arg-id replacement argument "
247                                "isn't an integral type");
248       },
249       __arg);
250 }
251 
252 class _LIBCPP_TYPE_VIS __parser_width {
253 public:
254   /** Contains a width or an arg-id. */
255   uint32_t __width : 31 {0};
256   /** Determines whether the value stored is a width or an arg-id. */
257   uint32_t __width_as_arg : 1 {0};
258 
259 protected:
260   /**
261    * Does the supplied std-format-spec contain a width field?
262    *
263    * When the field isn't present there's no padding required. This can be used
264    * to optimize the formatting.
265    */
__has_width_field()266   constexpr bool __has_width_field() const noexcept {
267     return __width_as_arg || __width;
268   }
269 
270   /**
271    * Does the supplied width field contain an arg-id?
272    *
273    * If @c true the formatter needs to call @ref __substitute_width_arg_id.
274    */
__width_needs_substitution()275   constexpr bool __width_needs_substitution() const noexcept {
276     return __width_as_arg;
277   }
278 
279   template <class _CharT>
280   _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
__parse(const _CharT * __begin,const _CharT * __end,auto & __parse_ctx)281   __parse(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) {
282     if (*__begin == _CharT('0'))
283       __throw_format_error(
284           "A format-spec width field shouldn't have a leading zero");
285 
286     if (*__begin == _CharT('{')) {
287       __format::__parse_number_result __r =
288           __parse_arg_id(++__begin, __end, __parse_ctx);
289       __width = __r.__value;
290       __width_as_arg = 1;
291       return __r.__ptr;
292     }
293 
294     if (*__begin < _CharT('0') || *__begin > _CharT('9'))
295       return __begin;
296 
297     __format::__parse_number_result __r =
298         __format::__parse_number(__begin, __end);
299     __width = __r.__value;
300     _LIBCPP_ASSERT(__width != 0,
301                    "A zero value isn't allowed and should be impossible, "
302                    "due to validations in this function");
303     return __r.__ptr;
304   }
305 
__substitute_width_arg_id(auto __arg)306   void _LIBCPP_HIDE_FROM_ABI constexpr __substitute_width_arg_id(auto __arg) {
307     _LIBCPP_ASSERT(__width_as_arg == 1,
308                    "Substitute width called when no substitution is required");
309 
310     // The clearing of the flag isn't required but looks better when debugging
311     // the code.
312     __width_as_arg = 0;
313     __width = __substitute_arg_id(__arg);
314     if (__width == 0)
315       __throw_format_error(
316           "A format-spec width field replacement should have a positive value");
317   }
318 };
319 
320 class _LIBCPP_TYPE_VIS __parser_precision {
321 public:
322   /** Contains a precision or an arg-id. */
323   uint32_t __precision : 31 {__format::__number_max};
324   /**
325    * Determines whether the value stored is a precision or an arg-id.
326    *
327    * @note Since @ref __precision == @ref __format::__number_max is a valid
328    * value, the default value contains an arg-id of INT32_MAX. (This number of
329    * arguments isn't supported by compilers.)  This is used to detect whether
330    * the std-format-spec contains a precision field.
331    */
332   uint32_t __precision_as_arg : 1 {1};
333 
334 protected:
335   /**
336    * Does the supplied std-format-spec contain a precision field?
337    *
338    * When the field isn't present there's no truncating required. This can be
339    * used to optimize the formatting.
340    */
__has_precision_field()341   constexpr bool __has_precision_field() const noexcept {
342 
343     return __precision_as_arg == 0 ||             // Contains a value?
344            __precision != __format::__number_max; // The arg-id is valid?
345   }
346 
347   /**
348    * Does the supplied precision field contain an arg-id?
349    *
350    * If @c true the formatter needs to call @ref __substitute_precision_arg_id.
351    */
__precision_needs_substitution()352   constexpr bool __precision_needs_substitution() const noexcept {
353     return __precision_as_arg && __precision != __format::__number_max;
354   }
355 
356   template <class _CharT>
357   _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
__parse(const _CharT * __begin,const _CharT * __end,auto & __parse_ctx)358   __parse(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) {
359     if (*__begin != _CharT('.'))
360       return __begin;
361 
362     ++__begin;
363     if (__begin == __end)
364       __throw_format_error("End of input while parsing format-spec precision");
365 
366     if (*__begin == _CharT('0')) {
367       ++__begin;
368       if (__begin != __end && *__begin >= '0' && *__begin <= '9')
369         __throw_format_error(
370             "A format-spec precision field shouldn't have a leading zero");
371 
372       __precision = 0;
373       __precision_as_arg = 0;
374       return __begin;
375     }
376 
377     if (*__begin == _CharT('{')) {
378       __format::__parse_number_result __arg_id =
379           __parse_arg_id(++__begin, __end, __parse_ctx);
380       _LIBCPP_ASSERT(__arg_id.__value != __format::__number_max,
381                      "Unsupported number of arguments, since this number of "
382                      "arguments is used a special value");
383       __precision = __arg_id.__value;
384       return __arg_id.__ptr;
385     }
386 
387     if (*__begin < _CharT('0') || *__begin > _CharT('9'))
388       __throw_format_error(
389           "The format-spec precision field doesn't contain a value or arg-id");
390 
391     __format::__parse_number_result __r =
392         __format::__parse_number(__begin, __end);
393     __precision = __r.__value;
394     __precision_as_arg = 0;
395     return __r.__ptr;
396   }
397 
__substitute_precision_arg_id(auto __arg)398   void _LIBCPP_HIDE_FROM_ABI constexpr __substitute_precision_arg_id(
399       auto __arg) {
400     _LIBCPP_ASSERT(
401         __precision_as_arg == 1 && __precision != __format::__number_max,
402         "Substitute precision called when no substitution is required");
403 
404     // The clearing of the flag isn't required but looks better when debugging
405     // the code.
406     __precision_as_arg = 0;
407     __precision = __substitute_arg_id(__arg);
408   }
409 };
410 
411 template <class _CharT>
412 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
__parse_locale_specific_form(const _CharT * __begin,_Flags & __flags)413 __parse_locale_specific_form(const _CharT* __begin, _Flags& __flags) noexcept {
414   if (*__begin == _CharT('L')) {
415     __flags.__locale_specific_form = true;
416     ++__begin;
417   }
418 
419   return __begin;
420 }
421 
422 template <class _CharT>
423 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
__parse_type(const _CharT * __begin,_Flags & __flags)424 __parse_type(const _CharT* __begin, _Flags& __flags) {
425 
426   // Determines the type. It does not validate whether the selected type is
427   // valid. Most formatters have optional fields that are only allowed for
428   // certain types. These parsers need to do validation after the type has
429   // been parsed. So its easier to implement the validation for all types in
430   // the specific parse function.
431   switch (*__begin) {
432   case 'A':
433     __flags.__type = _Flags::_Type::__float_hexadecimal_upper_case;
434     break;
435   case 'B':
436     __flags.__type = _Flags::_Type::__binary_upper_case;
437     break;
438   case 'E':
439     __flags.__type = _Flags::_Type::__scientific_upper_case;
440     break;
441   case 'F':
442     __flags.__type = _Flags::_Type::__fixed_upper_case;
443     break;
444   case 'G':
445     __flags.__type = _Flags::_Type::__general_upper_case;
446     break;
447   case 'X':
448     __flags.__type = _Flags::_Type::__hexadecimal_upper_case;
449     break;
450   case 'a':
451     __flags.__type = _Flags::_Type::__float_hexadecimal_lower_case;
452     break;
453   case 'b':
454     __flags.__type = _Flags::_Type::__binary_lower_case;
455     break;
456   case 'c':
457     __flags.__type = _Flags::_Type::__char;
458     break;
459   case 'd':
460     __flags.__type = _Flags::_Type::__decimal;
461     break;
462   case 'e':
463     __flags.__type = _Flags::_Type::__scientific_lower_case;
464     break;
465   case 'f':
466     __flags.__type = _Flags::_Type::__fixed_lower_case;
467     break;
468   case 'g':
469     __flags.__type = _Flags::_Type::__general_lower_case;
470     break;
471   case 'o':
472     __flags.__type = _Flags::_Type::__octal;
473     break;
474   case 'p':
475     __flags.__type = _Flags::_Type::__pointer;
476     break;
477   case 's':
478     __flags.__type = _Flags::_Type::__string;
479     break;
480   case 'x':
481     __flags.__type = _Flags::_Type::__hexadecimal_lower_case;
482     break;
483   default:
484     return __begin;
485   }
486   return ++__begin;
487 }
488 
489 /**
490  * The parser for the std-format-spec.
491  *
492  * [format.string.std]/1 specifies the std-format-spec:
493  *   fill-and-align sign # 0 width precision L type
494  *
495  * All these fields are optional. Whether these fields can be used depend on:
496  * - The type supplied to the format string.
497  *   E.g. A string never uses the sign field so the field may not be set.
498  *   This constrain is validated by the parsers in this file.
499  * - The supplied value for the optional type field.
500  *   E.g. A int formatted as decimal uses the sign field.
501  *   When formatted as a char the sign field may no longer be set.
502  *   This constrain isn't validated by the parsers in this file.
503  *
504  * The base classes are ordered to minimize the amount of padding.
505  *
506  * This implements the parser for the string types.
507  */
508 template <class _CharT>
509 class _LIBCPP_TEMPLATE_VIS __parser_string
510     : public __parser_width,              // provides __width(|as_arg)
511       public __parser_precision,          // provides __precision(|as_arg)
512       public __parser_fill_align<_CharT>, // provides __fill and uses __flags
513       public _Flags                       // provides __flags
514 {
515 public:
516   using char_type = _CharT;
517 
__parser_string()518   _LIBCPP_HIDE_FROM_ABI constexpr __parser_string() {
519     this->__alignment = _Flags::_Alignment::__left;
520   }
521 
522   /**
523    * The low-level std-format-spec parse function.
524    *
525    * @pre __begin points at the beginning of the std-format-spec. This means
526    * directly after the ':'.
527    * @pre The std-format-spec parses the entire input, or the first unmatched
528    * character is a '}'.
529    *
530    * @returns The iterator pointing at the last parsed character.
531    */
532   _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx)
533       -> decltype(__parse_ctx.begin()) {
534     auto __it = __parse(__parse_ctx);
535     __process_display_type();
536     return __it;
537   }
538 
539 private:
540   /**
541    * Parses the std-format-spec.
542    *
543    * @throws __throw_format_error When @a __parse_ctx contains an ill-formed
544    *                               std-format-spec.
545    *
546    * @returns An iterator to the end of input or point at the closing '}'.
547    */
548   _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx)
549       -> decltype(__parse_ctx.begin()) {
550 
551     auto __begin = __parse_ctx.begin();
552     auto __end = __parse_ctx.end();
553     if (__begin == __end)
554       return __begin;
555 
556     __begin = __parser_fill_align<_CharT>::__parse(__begin, __end,
557                                                    static_cast<_Flags&>(*this));
558     if (__begin == __end)
559       return __begin;
560 
561     __begin = __parser_width::__parse(__begin, __end, __parse_ctx);
562     if (__begin == __end)
563       return __begin;
564 
565     __begin = __parser_precision::__parse(__begin, __end, __parse_ctx);
566     if (__begin == __end)
567       return __begin;
568 
569     __begin = __parse_type(__begin, static_cast<_Flags&>(*this));
570 
571     if (__begin != __end && *__begin != _CharT('}'))
572       __throw_format_error(
573           "The format-spec should consume the input or end with a '}'");
574 
575     return __begin;
576   }
577 
578   /** Processes the parsed std-format-spec based on the parsed display type. */
__process_display_type()579   void _LIBCPP_HIDE_FROM_ABI constexpr __process_display_type() {
580     switch (this->__type) {
581     case _Flags::_Type::__default:
582     case _Flags::_Type::__string:
583       break;
584 
585     default:
586       __throw_format_error("The format-spec type has a type not supported for "
587                            "a string argument");
588     }
589   }
590 };
591 
592 /**
593  * The parser for the std-format-spec.
594  *
595  * This implements the parser for the integral types. This includes the
596  * character type and boolean type.
597  *
598  * See @ref __parser_string.
599  */
600 template <class _CharT>
601 class _LIBCPP_TEMPLATE_VIS __parser_integral
602     : public __parser_width,              // provides __width(|as_arg)
603       public __parser_fill_align<_CharT>, // provides __fill and uses __flags
604       public _Flags                       // provides __flags
605 {
606 public:
607   using char_type = _CharT;
608 
609 protected:
610   /**
611    * The low-level std-format-spec parse function.
612    *
613    * @pre __begin points at the beginning of the std-format-spec. This means
614    * directly after the ':'.
615    * @pre The std-format-spec parses the entire input, or the first unmatched
616    * character is a '}'.
617    *
618    * @returns The iterator pointing at the last parsed character.
619    */
620   _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx)
621       -> decltype(__parse_ctx.begin()) {
622     auto __begin = __parse_ctx.begin();
623     auto __end = __parse_ctx.end();
624     if (__begin == __end)
625       return __begin;
626 
627     __begin = __parser_fill_align<_CharT>::__parse(__begin, __end,
628                                                    static_cast<_Flags&>(*this));
629     if (__begin == __end)
630       return __begin;
631 
632     __begin = __parse_sign(__begin, static_cast<_Flags&>(*this));
633     if (__begin == __end)
634       return __begin;
635 
636     __begin = __parse_alternate_form(__begin, static_cast<_Flags&>(*this));
637     if (__begin == __end)
638       return __begin;
639 
640     __begin = __parse_zero_padding(__begin, static_cast<_Flags&>(*this));
641     if (__begin == __end)
642       return __begin;
643 
644     __begin = __parser_width::__parse(__begin, __end, __parse_ctx);
645     if (__begin == __end)
646       return __begin;
647 
648     __begin =
649         __parse_locale_specific_form(__begin, static_cast<_Flags&>(*this));
650     if (__begin == __end)
651       return __begin;
652 
653     __begin = __parse_type(__begin, static_cast<_Flags&>(*this));
654 
655     if (__begin != __end && *__begin != _CharT('}'))
656       __throw_format_error(
657           "The format-spec should consume the input or end with a '}'");
658 
659     return __begin;
660   }
661 
662   /**
663    * Handles the post-parsing updates for the integer types.
664    *
665    * Updates the zero-padding and alignment for integer types.
666    *
667    * [format.string.std]/13
668    *   If the 0 character and an align option both appear, the 0 character is
669    *   ignored.
670    *
671    * For the formatter a @ref __default alignment means zero-padding. Update
672    * the alignment based on parsed format string.
673    */
__handle_integer()674   _LIBCPP_HIDE_FROM_ABI constexpr void __handle_integer() noexcept {
675     this->__zero_padding &= this->__alignment == _Flags::_Alignment::__default;
676     if (!this->__zero_padding &&
677         this->__alignment == _Flags::_Alignment::__default)
678       this->__alignment = _Flags::_Alignment::__right;
679   }
680 
681   /**
682    * Handles the post-parsing updates for the character types.
683    *
684    * Sets the alignment and validates the format flags set for a character type.
685    *
686    * At the moment the validation for a character and a Boolean behave the
687    * same, but this may change in the future.
688    * Specifically at the moment the locale-specific form is allowed for the
689    * char output type, but it has no effect on the output.
690    */
__handle_char()691   _LIBCPP_HIDE_FROM_ABI constexpr void __handle_char() { __handle_bool(); }
692 
693   /**
694    * Handles the post-parsing updates for the Boolean types.
695    *
696    * Sets the alignment and validates the format flags set for a Boolean type.
697    */
__handle_bool()698   _LIBCPP_HIDE_FROM_ABI constexpr void __handle_bool() {
699     if (this->__sign != _Flags::_Sign::__default)
700       __throw_format_error("A sign field isn't allowed in this format-spec");
701 
702     if (this->__alternate_form)
703       __throw_format_error(
704           "An alternate form field isn't allowed in this format-spec");
705 
706     if (this->__zero_padding)
707       __throw_format_error(
708           "A zero-padding field isn't allowed in this format-spec");
709 
710     if (this->__alignment == _Flags::_Alignment::__default)
711       this->__alignment = _Flags::_Alignment::__left;
712   }
713 };
714 
715 // TODO FMT Add a parser for floating-point values.
716 // TODO FMT Add a parser for pointer values.
717 
718 /** Helper struct returned from @ref __get_string_alignment. */
719 template <class _CharT>
720 struct _LIBCPP_TEMPLATE_VIS __string_alignment {
721   /** Points beyond the last character to write to the output. */
722   const _CharT* __last;
723   /**
724    * The estimated number of columns in the output or 0.
725    *
726    * Only when the output needs to be aligned it's required to know the exact
727    * number of columns in the output. So if the formatted output has only a
728    * minimum width the exact size isn't important. It's only important to know
729    * the minimum has been reached. The minimum width is the width specified in
730    * the format-spec.
731    *
732    * For example in this code @code std::format("{:10}", MyString); @endcode
733    * the width estimation can stop once the algorithm has determined the output
734    * width is 10 columns.
735    *
736    * So if:
737    * * @ref __align == @c true the @ref __size is the estimated number of
738    *   columns required.
739    * * @ref __align == @c false the @ref __size is the estimated number of
740    *   columns required or 0 when the estimation algorithm stopped prematurely.
741    */
742   ptrdiff_t __size;
743   /**
744    * Does the output need to be aligned.
745    *
746    * When alignment is needed the output algorithm needs to add the proper
747    * padding. Else the output algorithm just needs to copy the input up to
748    * @ref __last.
749    */
750   bool __align;
751 };
752 
753 #ifndef _LIBCPP_HAS_NO_UNICODE
754 namespace __detail {
755 
756 /**
757  * Unicode column width estimates.
758  *
759  * Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32.
760  * Depending on format the relation between the number of code units stored and
761  * the number of output columns differs. The first relation is the number of
762  * code units forming a code point. (The text assumes the code units are
763  * unsigned.)
764  * - UTF-8 The number of code units is between one and four. The first 127
765  *   Unicode code points match the ASCII character set. When the highest bit is
766  *   set it means the code point has more than one code unit.
767  * - UTF-16: The number of code units is between 1 and 2. When the first
768  *   code unit is in the range [0xd800,0xdfff) it means the code point uses two
769  *   code units.
770  * - UTF-32: The number of code units is always one.
771  *
772  * The code point to the number of columns isn't well defined. The code uses the
773  * estimations defined in [format.string.std]/11. This list might change in the
774  * future.
775  *
776  * The algorithm of @ref __get_string_alignment uses two different scanners:
777  * - The simple scanner @ref __estimate_column_width_fast. This scanner assumes
778  *   1 code unit is 1 column. This scanner stops when it can't be sure the
779  *   assumption is valid:
780  *   - UTF-8 when the code point is encoded in more than 1 code unit.
781  *   - UTF-16 and UTF-32 when the first multi-column code point is encountered.
782  *     (The code unit's value is lower than 0xd800 so the 2 code unit encoding
783  *     is irrelevant for this scanner.)
784  *   Due to these assumptions the scanner is faster than the full scanner. It
785  *   can process all text only containing ASCII. For UTF-16/32 it can process
786  *   most (all?) European languages. (Note the set it can process might be
787  *   reduced in the future, due to updates in the scanning rules.)
788  * - The full scanner @ref __estimate_column_width. This scanner, if needed,
789  *   converts multiple code units into one code point then converts the code
790  *   point to a column width.
791  *
792  * See also:
793  * - [format.string.general]/11
794  * - https://en.wikipedia.org/wiki/UTF-8#Encoding
795  * - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
796  */
797 
798 /**
799  * The first 2 column code point.
800  *
801  * This is the point where the fast UTF-16/32 scanner needs to stop processing.
802  */
803 inline constexpr uint32_t __two_column_code_point = 0x1100;
804 
805 /** Helper concept for an UTF-8 character type. */
806 template <class _CharT>
807 concept __utf8_character = same_as<_CharT, char> || same_as<_CharT, char8_t>;
808 
809 /** Helper concept for an UTF-16 character type. */
810 template <class _CharT>
811 concept __utf16_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) || same_as<_CharT, char16_t>;
812 
813 /** Helper concept for an UTF-32 character type. */
814 template <class _CharT>
815 concept __utf32_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) || same_as<_CharT, char32_t>;
816 
817 /** Helper concept for an UTF-16 or UTF-32 character type. */
818 template <class _CharT>
819 concept __utf16_or_32_character = __utf16_character<_CharT> || __utf32_character<_CharT>;
820 
821 /**
822  * Converts a code point to the column width.
823  *
824  * The estimations are conforming to [format.string.general]/11
825  *
826  * This version expects a value less than 0x1'0000, which is a 3-byte UTF-8
827  * character.
828  */
__column_width_3(uint32_t __c)829 _LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width_3(uint32_t __c) noexcept {
830   _LIBCPP_ASSERT(__c < 0x1'0000,
831                  "Use __column_width_4 or __column_width for larger values");
832 
833   // clang-format off
834   return 1 + (__c >= 0x1100 && (__c <= 0x115f ||
835              (__c >= 0x2329 && (__c <= 0x232a ||
836              (__c >= 0x2e80 && (__c <= 0x303e ||
837              (__c >= 0x3040 && (__c <= 0xa4cf ||
838              (__c >= 0xac00 && (__c <= 0xd7a3 ||
839              (__c >= 0xf900 && (__c <= 0xfaff ||
840              (__c >= 0xfe10 && (__c <= 0xfe19 ||
841              (__c >= 0xfe30 && (__c <= 0xfe6f ||
842              (__c >= 0xff00 && (__c <= 0xff60 ||
843              (__c >= 0xffe0 && (__c <= 0xffe6
844              ))))))))))))))))))));
845   // clang-format on
846 }
847 
848 /**
849  * @overload
850  *
851  * This version expects a value greater than or equal to 0x1'0000, which is a
852  * 4-byte UTF-8 character.
853  */
854 _LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width_4(uint32_t __c) noexcept {
855   _LIBCPP_ASSERT(__c >= 0x1'0000,
856                  "Use __column_width_3 or __column_width for smaller values");
857 
858   // clang-format off
859   return 1 + (__c >= 0x1'f300 && (__c <= 0x1'f64f ||
860              (__c >= 0x1'f900 && (__c <= 0x1'f9ff ||
861              (__c >= 0x2'0000 && (__c <= 0x2'fffd ||
862              (__c >= 0x3'0000 && (__c <= 0x3'fffd
863              ))))))));
864   // clang-format on
865 }
866 
867 /**
868  * @overload
869  *
870  * The general case, accepting all values.
871  */
872 _LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width(uint32_t __c) noexcept {
873   if (__c < 0x1'0000)
874     return __column_width_3(__c);
875 
876   return __column_width_4(__c);
877 }
878 
879 /**
880  * Estimate the column width for the UTF-8 sequence using the fast algorithm.
881  */
882 template <__utf8_character _CharT>
883 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
884 __estimate_column_width_fast(const _CharT* __first,
885                              const _CharT* __last) noexcept {
886   return _VSTD::find_if(__first, __last,
887                         [](unsigned char __c) { return __c & 0x80; });
888 }
889 
890 /**
891  * @overload
892  *
893  * The implementation for UTF-16/32.
894  */
895 template <__utf16_or_32_character _CharT>
896 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
897 __estimate_column_width_fast(const _CharT* __first,
898                              const _CharT* __last) noexcept {
899   return _VSTD::find_if(__first, __last,
900                         [](uint32_t __c) { return __c >= 0x1100; });
901 }
902 
903 template <class _CharT>
904 struct _LIBCPP_TEMPLATE_VIS __column_width_result {
905   /** The number of output columns. */
906   size_t __width;
907   /**
908    * The last parsed element.
909    *
910    * This limits the original output to fit in the wanted number of columns.
911    */
912   const _CharT* __ptr;
913 };
914 
915 /**
916  * Small helper to determine the width of malformed Unicode.
917  *
918  * @note This function's only needed for UTF-8. During scanning UTF-8 there
919  * are multiple place where it can be detected that the Unicode is malformed.
920  * UTF-16 only requires 1 test and UTF-32 requires no testing.
921  */
922 template <__utf8_character _CharT>
923 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
924 __estimate_column_width_malformed(const _CharT* __first, const _CharT* __last,
925                                   size_t __maximum, size_t __result) noexcept {
926   size_t __size = __last - __first;
927   size_t __n = _VSTD::min(__size, __maximum);
928   return {__result + __n, __first + __n};
929 }
930 
931 /**
932  * Determines the number of output columns needed to render the input.
933  *
934  * @note When the scanner encounters malformed Unicode it acts as-if every code
935  * unit at the end of the input is one output column. It's expected the output
936  * terminal will replace these malformed code units with a one column
937  * replacement characters.
938  *
939  * @param __first   Points to the first element of the input range.
940  * @param __last    Points beyond the last element of the input range.
941  * @param __maximum The maximum number of output columns. The returned number
942  *                  of estimated output columns will not exceed this value.
943  */
944 template <__utf8_character _CharT>
945 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
946 __estimate_column_width(const _CharT* __first, const _CharT* __last,
947                         size_t __maximum) noexcept {
948   size_t __result = 0;
949 
950   while (__first != __last) {
951     // Based on the number of leading 1 bits the number of code units in the
952     // code point can be determined. See
953     // https://en.wikipedia.org/wiki/UTF-8#Encoding
954     switch (_VSTD::countl_one(static_cast<unsigned char>(*__first))) {
955     case 0: // 1-code unit encoding: all 1 column
956       ++__result;
957       ++__first;
958       break;
959 
960     case 2: // 2-code unit encoding: all 1 column
961       // Malformed Unicode.
962       if (__last - __first < 2) [[unlikely]]
963         return __estimate_column_width_malformed(__first, __last, __maximum,
964                                                  __result);
965       __first += 2;
966       ++__result;
967       break;
968 
969     case 3: // 3-code unit encoding: either 1 or 2 columns
970       // Malformed Unicode.
971       if (__last - __first < 3) [[unlikely]]
972         return __estimate_column_width_malformed(__first, __last, __maximum,
973                                                  __result);
974       {
975         uint32_t __c = static_cast<unsigned char>(*__first++) & 0x0f;
976         __c <<= 6;
977         __c |= static_cast<unsigned char>(*__first++) & 0x3f;
978         __c <<= 6;
979         __c |= static_cast<unsigned char>(*__first++) & 0x3f;
980         __result += __column_width_3(__c);
981         if (__result > __maximum)
982           return {__result - 2, __first - 3};
983       }
984       break;
985     case 4: // 4-code unit encoding: either 1 or 2 columns
986       // Malformed Unicode.
987       if (__last - __first < 4) [[unlikely]]
988         return __estimate_column_width_malformed(__first, __last, __maximum,
989                                                  __result);
990       {
991         uint32_t __c = static_cast<unsigned char>(*__first++) & 0x07;
992         __c <<= 6;
993         __c |= static_cast<unsigned char>(*__first++) & 0x3f;
994         __c <<= 6;
995         __c |= static_cast<unsigned char>(*__first++) & 0x3f;
996         __c <<= 6;
997         __c |= static_cast<unsigned char>(*__first++) & 0x3f;
998         __result += __column_width_4(__c);
999         if (__result > __maximum)
1000           return {__result - 2, __first - 4};
1001       }
1002       break;
1003     default:
1004       // Malformed Unicode.
1005       return __estimate_column_width_malformed(__first, __last, __maximum,
1006                                                __result);
1007     }
1008 
1009     if (__result >= __maximum)
1010       return {__result, __first};
1011   }
1012   return {__result, __first};
1013 }
1014 
1015 template <__utf16_character _CharT>
1016 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
1017 __estimate_column_width(const _CharT* __first, const _CharT* __last,
1018                         size_t __maximum) noexcept {
1019   size_t __result = 0;
1020 
1021   while (__first != __last) {
1022     uint32_t __c = *__first;
1023     // Is the code unit part of a surrogate pair? See
1024     // https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
1025     if (__c >= 0xd800 && __c <= 0xDfff) {
1026       // Malformed Unicode.
1027       if (__last - __first < 2) [[unlikely]]
1028         return {__result + 1, __first + 1};
1029 
1030       __c -= 0xd800;
1031       __c <<= 10;
1032       __c += (*(__first + 1) - 0xdc00);
1033       __c += 0x10'000;
1034 
1035       __result += __column_width_4(__c);
1036       if (__result > __maximum)
1037         return {__result - 2, __first};
1038       __first += 2;
1039     } else {
1040       __result += __column_width_3(__c);
1041       if (__result > __maximum)
1042         return {__result - 2, __first};
1043       ++__first;
1044     }
1045 
1046     if (__result >= __maximum)
1047       return {__result, __first};
1048   }
1049 
1050   return {__result, __first};
1051 }
1052 
1053 template <__utf32_character _CharT>
1054 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
1055 __estimate_column_width(const _CharT* __first, const _CharT* __last,
1056                         size_t __maximum) noexcept {
1057   size_t __result = 0;
1058 
1059   while (__first != __last) {
1060     wchar_t __c = *__first;
1061     __result += __column_width(__c);
1062 
1063     if (__result > __maximum)
1064       return {__result - 2, __first};
1065 
1066     ++__first;
1067     if (__result >= __maximum)
1068       return {__result, __first};
1069   }
1070 
1071   return {__result, __first};
1072 }
1073 
1074 } // namespace __detail
1075 
1076 template <class _CharT>
1077 _LIBCPP_HIDE_FROM_ABI constexpr __string_alignment<_CharT>
1078 __get_string_alignment(const _CharT* __first, const _CharT* __last,
1079                        ptrdiff_t __width, ptrdiff_t __precision) noexcept {
1080   _LIBCPP_ASSERT(__width != 0 || __precision != -1,
1081                  "The function has no effect and shouldn't be used");
1082 
1083   // TODO FMT There might be more optimizations possible:
1084   // If __precision == __format::__number_max and the encoding is:
1085   // * UTF-8  : 4 * (__last - __first) >= __width
1086   // * UTF-16 : 2 * (__last - __first) >= __width
1087   // * UTF-32 : (__last - __first) >= __width
1088   // In these cases it's certain the output is at least the requested width.
1089   // It's unknown how often this happens in practice. For now the improvement
1090   // isn't implemented.
1091 
1092   /*
1093    * First assume there are no special Unicode code units in the input.
1094    * - Apply the precision (this may reduce the size of the input). When
1095    *   __precison == -1 this step is omitted.
1096    * - Scan for special code units in the input.
1097    * If our assumption was correct the __pos will be at the end of the input.
1098    */
1099   const ptrdiff_t __length = __last - __first;
1100   const _CharT* __limit =
1101       __first +
1102       (__precision == -1 ? __length : _VSTD::min(__length, __precision));
1103   ptrdiff_t __size = __limit - __first;
1104   const _CharT* __pos =
1105       __detail::__estimate_column_width_fast(__first, __limit);
1106 
1107   if (__pos == __limit)
1108     return {__limit, __size, __size < __width};
1109 
1110   /*
1111    * Our assumption was wrong, there are special Unicode code units.
1112    * The range [__first, __pos) contains a set of code units with the
1113    * following property:
1114    *      Every _CharT in the range will be rendered in 1 column.
1115    *
1116    * If there's no maximum width and the parsed size already exceeds the
1117    *   minimum required width. The real size isn't important. So bail out.
1118    */
1119   if (__precision == -1 && (__pos - __first) >= __width)
1120     return {__last, 0, false};
1121 
1122   /* If there's a __precision, truncate the output to that width. */
1123   ptrdiff_t __prefix = __pos - __first;
1124   if (__precision != -1) {
1125     _LIBCPP_ASSERT(__precision > __prefix, "Logic error.");
1126     auto __lengh_info = __detail::__estimate_column_width(
1127         __pos, __last, __precision - __prefix);
1128     __size = __lengh_info.__width + __prefix;
1129     return {__lengh_info.__ptr, __size, __size < __width};
1130   }
1131 
1132   /* Else use __width to determine the number of required padding characters. */
1133   _LIBCPP_ASSERT(__width > __prefix, "Logic error.");
1134   /*
1135    * The column width is always one or two columns. For the precision the wanted
1136    * column width is the maximum, for the width it's the minimum. Using the
1137    * width estimation with its truncating behavior will result in the wrong
1138    * result in the following case:
1139    * - The last code unit processed requires two columns and exceeds the
1140    *   maximum column width.
1141    * By increasing the __maximum by one avoids this issue. (It means it may
1142    * pass one code point more than required to determine the proper result;
1143    * that however isn't a problem for the algorithm.)
1144    */
1145   size_t __maximum = 1 + __width - __prefix;
1146   auto __lengh_info =
1147       __detail::__estimate_column_width(__pos, __last, __maximum);
1148   if (__lengh_info.__ptr != __last) {
1149     // Consumed the width number of code units. The exact size of the string
1150     // is unknown. We only know we don't need to align the output.
1151     _LIBCPP_ASSERT(static_cast<ptrdiff_t>(__lengh_info.__width + __prefix) >=
1152                        __width,
1153                    "Logic error");
1154     return {__last, 0, false};
1155   }
1156 
1157   __size = __lengh_info.__width + __prefix;
1158   return {__last, __size, __size < __width};
1159 }
1160 #else  // _LIBCPP_HAS_NO_UNICODE
1161 template <class _CharT>
1162 _LIBCPP_HIDE_FROM_ABI constexpr __string_alignment<_CharT>
1163 __get_string_alignment(const _CharT* __first, const _CharT* __last,
1164                        ptrdiff_t __width, ptrdiff_t __precision) noexcept {
1165   const ptrdiff_t __length = __last - __first;
1166   const _CharT* __limit =
1167       __first +
1168       (__precision == -1 ? __length : _VSTD::min(__length, __precision));
1169   ptrdiff_t __size = __limit - __first;
1170   return {__limit, __size, __size < __width};
1171 }
1172 #endif // _LIBCPP_HAS_NO_UNICODE
1173 
1174 } // namespace __format_spec
1175 
1176 # endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
1177 
1178 #endif //_LIBCPP_STD_VER > 17
1179 
1180 _LIBCPP_END_NAMESPACE_STD
1181 
1182 _LIBCPP_POP_MACROS
1183 
1184 #endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
1185