1 // -*- C++ -*-
2 //===----------------------------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9
10 #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
11 #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
12
13 #include <__algorithm/find_if.h>
14 #include <__algorithm/min.h>
15 #include <__config>
16 #include <__debug>
17 #include <__format/format_arg.h>
18 #include <__format/format_error.h>
19 #include <__format/format_string.h>
20 #include <__variant/monostate.h>
21 #include <bit>
22 #include <concepts>
23 #include <cstdint>
24 #include <type_traits>
25
26 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
27 # pragma GCC system_header
28 #endif
29
30 _LIBCPP_PUSH_MACROS
31 #include <__undef_macros>
32
33 _LIBCPP_BEGIN_NAMESPACE_STD
34
35 #if _LIBCPP_STD_VER > 17
36
37 // TODO FMT Remove this once we require compilers with proper C++20 support.
38 // If the compiler has no concepts support, the format header will be disabled.
39 // Without concepts support enable_if needs to be used and that too much effort
40 // to support compilers with partial C++20 support.
41 # if !defined(_LIBCPP_HAS_NO_CONCEPTS)
42
43 namespace __format_spec {
44
45 /**
46 * Contains the flags for the std-format-spec.
47 *
48 * Some format-options can only be used for specific C++types and may depend on
49 * the selected format-type.
50 * * The C++type filtering can be done using the proper policies for
51 * @ref __parser_std.
52 * * The format-type filtering needs to be done post parsing in the parser
53 * derived from @ref __parser_std.
54 */
55 class _LIBCPP_TYPE_VIS _Flags {
56 public:
57 enum class _LIBCPP_ENUM_VIS _Alignment : uint8_t {
58 /**
59 * No alignment is set in the format string.
60 *
61 * Zero-padding is ignored when an alignment is selected.
62 * The default alignment depends on the selected format-type.
63 */
64 __default,
65 __left,
66 __center,
67 __right
68 };
69 enum class _LIBCPP_ENUM_VIS _Sign : uint8_t {
70 /**
71 * No sign is set in the format string.
72 *
73 * The sign isn't allowed for certain format-types. By using this value
74 * it's possible to detect whether or not the user explicitly set the sign
75 * flag. For formatting purposes it behaves the same as @ref __minus.
76 */
77 __default,
78 __minus,
79 __plus,
80 __space
81 };
82
83 _Alignment __alignment : 2 {_Alignment::__default};
84 _Sign __sign : 2 {_Sign::__default};
85 uint8_t __alternate_form : 1 {false};
86 uint8_t __zero_padding : 1 {false};
87 uint8_t __locale_specific_form : 1 {false};
88
89 enum class _LIBCPP_ENUM_VIS _Type : uint8_t {
90 __default,
91 __string,
92 __binary_lower_case,
93 __binary_upper_case,
94 __octal,
95 __decimal,
96 __hexadecimal_lower_case,
97 __hexadecimal_upper_case,
98 __pointer,
99 __char,
100 __float_hexadecimal_lower_case,
101 __float_hexadecimal_upper_case,
102 __scientific_lower_case,
103 __scientific_upper_case,
104 __fixed_lower_case,
105 __fixed_upper_case,
106 __general_lower_case,
107 __general_upper_case
108 };
109
110 _Type __type{_Type::__default};
111 };
112
113 namespace __detail {
114 template <class _CharT>
115 _LIBCPP_HIDE_FROM_ABI constexpr bool
__parse_alignment(_CharT __c,_Flags & __flags)116 __parse_alignment(_CharT __c, _Flags& __flags) noexcept {
117 switch (__c) {
118 case _CharT('<'):
119 __flags.__alignment = _Flags::_Alignment::__left;
120 return true;
121
122 case _CharT('^'):
123 __flags.__alignment = _Flags::_Alignment::__center;
124 return true;
125
126 case _CharT('>'):
127 __flags.__alignment = _Flags::_Alignment::__right;
128 return true;
129 }
130 return false;
131 }
132 } // namespace __detail
133
134 template <class _CharT>
135 class _LIBCPP_TEMPLATE_VIS __parser_fill_align {
136 public:
137 // TODO FMT The standard doesn't specify this character is a Unicode
138 // character. Validate what fmt and MSVC have implemented.
139 _CharT __fill{_CharT(' ')};
140
141 protected:
142 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
__parse(const _CharT * __begin,const _CharT * __end,_Flags & __flags)143 __parse(const _CharT* __begin, const _CharT* __end, _Flags& __flags) {
144 _LIBCPP_ASSERT(__begin != __end,
145 "When called with an empty input the function will cause "
146 "undefined behavior by evaluating data not in the input");
147 if (__begin + 1 != __end) {
148 if (__detail::__parse_alignment(*(__begin + 1), __flags)) {
149 if (*__begin == _CharT('{') || *__begin == _CharT('}'))
150 __throw_format_error(
151 "The format-spec fill field contains an invalid character");
152 __fill = *__begin;
153 return __begin + 2;
154 }
155 }
156
157 if (__detail::__parse_alignment(*__begin, __flags))
158 return __begin + 1;
159
160 return __begin;
161 }
162 };
163
164 template <class _CharT>
165 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
__parse_sign(const _CharT * __begin,_Flags & __flags)166 __parse_sign(const _CharT* __begin, _Flags& __flags) noexcept {
167 switch (*__begin) {
168 case _CharT('-'):
169 __flags.__sign = _Flags::_Sign::__minus;
170 break;
171 case _CharT('+'):
172 __flags.__sign = _Flags::_Sign::__plus;
173 break;
174 case _CharT(' '):
175 __flags.__sign = _Flags::_Sign::__space;
176 break;
177 default:
178 return __begin;
179 }
180 return __begin + 1;
181 }
182
183 template <class _CharT>
184 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
__parse_alternate_form(const _CharT * __begin,_Flags & __flags)185 __parse_alternate_form(const _CharT* __begin, _Flags& __flags) noexcept {
186 if (*__begin == _CharT('#')) {
187 __flags.__alternate_form = true;
188 ++__begin;
189 }
190
191 return __begin;
192 }
193
194 template <class _CharT>
195 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
__parse_zero_padding(const _CharT * __begin,_Flags & __flags)196 __parse_zero_padding(const _CharT* __begin, _Flags& __flags) noexcept {
197 if (*__begin == _CharT('0')) {
198 __flags.__zero_padding = true;
199 ++__begin;
200 }
201
202 return __begin;
203 }
204
205 template <class _CharT>
206 _LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result< _CharT>
__parse_arg_id(const _CharT * __begin,const _CharT * __end,auto & __parse_ctx)207 __parse_arg_id(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) {
208 // This function is a wrapper to call the real parser. But it does the
209 // validation for the pre-conditions and post-conditions.
210 if (__begin == __end)
211 __throw_format_error("End of input while parsing format-spec arg-id");
212
213 __format::__parse_number_result __r =
214 __format::__parse_arg_id(__begin, __end, __parse_ctx);
215
216 if (__r.__ptr == __end || *__r.__ptr != _CharT('}'))
217 __throw_format_error("A format-spec arg-id should terminate at a '}'");
218
219 ++__r.__ptr;
220 return __r;
221 }
222
223 template <class _Context>
224 _LIBCPP_HIDE_FROM_ABI constexpr uint32_t
__substitute_arg_id(basic_format_arg<_Context> __arg)225 __substitute_arg_id(basic_format_arg<_Context> __arg) {
226 return visit_format_arg(
227 [](auto __arg) -> uint32_t {
228 using _Type = decltype(__arg);
229 if constexpr (integral<_Type>) {
230 if constexpr (signed_integral<_Type>) {
231 if (__arg < 0)
232 __throw_format_error("A format-spec arg-id replacement shouldn't "
233 "have a negative value");
234 }
235
236 using _CT = common_type_t<_Type, decltype(__format::__number_max)>;
237 if (static_cast<_CT>(__arg) >
238 static_cast<_CT>(__format::__number_max))
239 __throw_format_error("A format-spec arg-id replacement exceeds "
240 "the maximum supported value");
241
242 return __arg;
243 } else if constexpr (same_as<_Type, monostate>)
244 __throw_format_error("Argument index out of bounds");
245 else
246 __throw_format_error("A format-spec arg-id replacement argument "
247 "isn't an integral type");
248 },
249 __arg);
250 }
251
252 class _LIBCPP_TYPE_VIS __parser_width {
253 public:
254 /** Contains a width or an arg-id. */
255 uint32_t __width : 31 {0};
256 /** Determines whether the value stored is a width or an arg-id. */
257 uint32_t __width_as_arg : 1 {0};
258
259 protected:
260 /**
261 * Does the supplied std-format-spec contain a width field?
262 *
263 * When the field isn't present there's no padding required. This can be used
264 * to optimize the formatting.
265 */
__has_width_field()266 constexpr bool __has_width_field() const noexcept {
267 return __width_as_arg || __width;
268 }
269
270 /**
271 * Does the supplied width field contain an arg-id?
272 *
273 * If @c true the formatter needs to call @ref __substitute_width_arg_id.
274 */
__width_needs_substitution()275 constexpr bool __width_needs_substitution() const noexcept {
276 return __width_as_arg;
277 }
278
279 template <class _CharT>
280 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
__parse(const _CharT * __begin,const _CharT * __end,auto & __parse_ctx)281 __parse(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) {
282 if (*__begin == _CharT('0'))
283 __throw_format_error(
284 "A format-spec width field shouldn't have a leading zero");
285
286 if (*__begin == _CharT('{')) {
287 __format::__parse_number_result __r =
288 __parse_arg_id(++__begin, __end, __parse_ctx);
289 __width = __r.__value;
290 __width_as_arg = 1;
291 return __r.__ptr;
292 }
293
294 if (*__begin < _CharT('0') || *__begin > _CharT('9'))
295 return __begin;
296
297 __format::__parse_number_result __r =
298 __format::__parse_number(__begin, __end);
299 __width = __r.__value;
300 _LIBCPP_ASSERT(__width != 0,
301 "A zero value isn't allowed and should be impossible, "
302 "due to validations in this function");
303 return __r.__ptr;
304 }
305
__substitute_width_arg_id(auto __arg)306 void _LIBCPP_HIDE_FROM_ABI constexpr __substitute_width_arg_id(auto __arg) {
307 _LIBCPP_ASSERT(__width_as_arg == 1,
308 "Substitute width called when no substitution is required");
309
310 // The clearing of the flag isn't required but looks better when debugging
311 // the code.
312 __width_as_arg = 0;
313 __width = __substitute_arg_id(__arg);
314 if (__width == 0)
315 __throw_format_error(
316 "A format-spec width field replacement should have a positive value");
317 }
318 };
319
320 class _LIBCPP_TYPE_VIS __parser_precision {
321 public:
322 /** Contains a precision or an arg-id. */
323 uint32_t __precision : 31 {__format::__number_max};
324 /**
325 * Determines whether the value stored is a precision or an arg-id.
326 *
327 * @note Since @ref __precision == @ref __format::__number_max is a valid
328 * value, the default value contains an arg-id of INT32_MAX. (This number of
329 * arguments isn't supported by compilers.) This is used to detect whether
330 * the std-format-spec contains a precision field.
331 */
332 uint32_t __precision_as_arg : 1 {1};
333
334 protected:
335 /**
336 * Does the supplied std-format-spec contain a precision field?
337 *
338 * When the field isn't present there's no truncating required. This can be
339 * used to optimize the formatting.
340 */
__has_precision_field()341 constexpr bool __has_precision_field() const noexcept {
342
343 return __precision_as_arg == 0 || // Contains a value?
344 __precision != __format::__number_max; // The arg-id is valid?
345 }
346
347 /**
348 * Does the supplied precision field contain an arg-id?
349 *
350 * If @c true the formatter needs to call @ref __substitute_precision_arg_id.
351 */
__precision_needs_substitution()352 constexpr bool __precision_needs_substitution() const noexcept {
353 return __precision_as_arg && __precision != __format::__number_max;
354 }
355
356 template <class _CharT>
357 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
__parse(const _CharT * __begin,const _CharT * __end,auto & __parse_ctx)358 __parse(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) {
359 if (*__begin != _CharT('.'))
360 return __begin;
361
362 ++__begin;
363 if (__begin == __end)
364 __throw_format_error("End of input while parsing format-spec precision");
365
366 if (*__begin == _CharT('0')) {
367 ++__begin;
368 if (__begin != __end && *__begin >= '0' && *__begin <= '9')
369 __throw_format_error(
370 "A format-spec precision field shouldn't have a leading zero");
371
372 __precision = 0;
373 __precision_as_arg = 0;
374 return __begin;
375 }
376
377 if (*__begin == _CharT('{')) {
378 __format::__parse_number_result __arg_id =
379 __parse_arg_id(++__begin, __end, __parse_ctx);
380 _LIBCPP_ASSERT(__arg_id.__value != __format::__number_max,
381 "Unsupported number of arguments, since this number of "
382 "arguments is used a special value");
383 __precision = __arg_id.__value;
384 return __arg_id.__ptr;
385 }
386
387 if (*__begin < _CharT('0') || *__begin > _CharT('9'))
388 __throw_format_error(
389 "The format-spec precision field doesn't contain a value or arg-id");
390
391 __format::__parse_number_result __r =
392 __format::__parse_number(__begin, __end);
393 __precision = __r.__value;
394 __precision_as_arg = 0;
395 return __r.__ptr;
396 }
397
__substitute_precision_arg_id(auto __arg)398 void _LIBCPP_HIDE_FROM_ABI constexpr __substitute_precision_arg_id(
399 auto __arg) {
400 _LIBCPP_ASSERT(
401 __precision_as_arg == 1 && __precision != __format::__number_max,
402 "Substitute precision called when no substitution is required");
403
404 // The clearing of the flag isn't required but looks better when debugging
405 // the code.
406 __precision_as_arg = 0;
407 __precision = __substitute_arg_id(__arg);
408 }
409 };
410
411 template <class _CharT>
412 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
__parse_locale_specific_form(const _CharT * __begin,_Flags & __flags)413 __parse_locale_specific_form(const _CharT* __begin, _Flags& __flags) noexcept {
414 if (*__begin == _CharT('L')) {
415 __flags.__locale_specific_form = true;
416 ++__begin;
417 }
418
419 return __begin;
420 }
421
422 template <class _CharT>
423 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
__parse_type(const _CharT * __begin,_Flags & __flags)424 __parse_type(const _CharT* __begin, _Flags& __flags) {
425
426 // Determines the type. It does not validate whether the selected type is
427 // valid. Most formatters have optional fields that are only allowed for
428 // certain types. These parsers need to do validation after the type has
429 // been parsed. So its easier to implement the validation for all types in
430 // the specific parse function.
431 switch (*__begin) {
432 case 'A':
433 __flags.__type = _Flags::_Type::__float_hexadecimal_upper_case;
434 break;
435 case 'B':
436 __flags.__type = _Flags::_Type::__binary_upper_case;
437 break;
438 case 'E':
439 __flags.__type = _Flags::_Type::__scientific_upper_case;
440 break;
441 case 'F':
442 __flags.__type = _Flags::_Type::__fixed_upper_case;
443 break;
444 case 'G':
445 __flags.__type = _Flags::_Type::__general_upper_case;
446 break;
447 case 'X':
448 __flags.__type = _Flags::_Type::__hexadecimal_upper_case;
449 break;
450 case 'a':
451 __flags.__type = _Flags::_Type::__float_hexadecimal_lower_case;
452 break;
453 case 'b':
454 __flags.__type = _Flags::_Type::__binary_lower_case;
455 break;
456 case 'c':
457 __flags.__type = _Flags::_Type::__char;
458 break;
459 case 'd':
460 __flags.__type = _Flags::_Type::__decimal;
461 break;
462 case 'e':
463 __flags.__type = _Flags::_Type::__scientific_lower_case;
464 break;
465 case 'f':
466 __flags.__type = _Flags::_Type::__fixed_lower_case;
467 break;
468 case 'g':
469 __flags.__type = _Flags::_Type::__general_lower_case;
470 break;
471 case 'o':
472 __flags.__type = _Flags::_Type::__octal;
473 break;
474 case 'p':
475 __flags.__type = _Flags::_Type::__pointer;
476 break;
477 case 's':
478 __flags.__type = _Flags::_Type::__string;
479 break;
480 case 'x':
481 __flags.__type = _Flags::_Type::__hexadecimal_lower_case;
482 break;
483 default:
484 return __begin;
485 }
486 return ++__begin;
487 }
488
489 /**
490 * The parser for the std-format-spec.
491 *
492 * [format.string.std]/1 specifies the std-format-spec:
493 * fill-and-align sign # 0 width precision L type
494 *
495 * All these fields are optional. Whether these fields can be used depend on:
496 * - The type supplied to the format string.
497 * E.g. A string never uses the sign field so the field may not be set.
498 * This constrain is validated by the parsers in this file.
499 * - The supplied value for the optional type field.
500 * E.g. A int formatted as decimal uses the sign field.
501 * When formatted as a char the sign field may no longer be set.
502 * This constrain isn't validated by the parsers in this file.
503 *
504 * The base classes are ordered to minimize the amount of padding.
505 *
506 * This implements the parser for the string types.
507 */
508 template <class _CharT>
509 class _LIBCPP_TEMPLATE_VIS __parser_string
510 : public __parser_width, // provides __width(|as_arg)
511 public __parser_precision, // provides __precision(|as_arg)
512 public __parser_fill_align<_CharT>, // provides __fill and uses __flags
513 public _Flags // provides __flags
514 {
515 public:
516 using char_type = _CharT;
517
__parser_string()518 _LIBCPP_HIDE_FROM_ABI constexpr __parser_string() {
519 this->__alignment = _Flags::_Alignment::__left;
520 }
521
522 /**
523 * The low-level std-format-spec parse function.
524 *
525 * @pre __begin points at the beginning of the std-format-spec. This means
526 * directly after the ':'.
527 * @pre The std-format-spec parses the entire input, or the first unmatched
528 * character is a '}'.
529 *
530 * @returns The iterator pointing at the last parsed character.
531 */
532 _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx)
533 -> decltype(__parse_ctx.begin()) {
534 auto __it = __parse(__parse_ctx);
535 __process_display_type();
536 return __it;
537 }
538
539 private:
540 /**
541 * Parses the std-format-spec.
542 *
543 * @throws __throw_format_error When @a __parse_ctx contains an ill-formed
544 * std-format-spec.
545 *
546 * @returns An iterator to the end of input or point at the closing '}'.
547 */
548 _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx)
549 -> decltype(__parse_ctx.begin()) {
550
551 auto __begin = __parse_ctx.begin();
552 auto __end = __parse_ctx.end();
553 if (__begin == __end)
554 return __begin;
555
556 __begin = __parser_fill_align<_CharT>::__parse(__begin, __end,
557 static_cast<_Flags&>(*this));
558 if (__begin == __end)
559 return __begin;
560
561 __begin = __parser_width::__parse(__begin, __end, __parse_ctx);
562 if (__begin == __end)
563 return __begin;
564
565 __begin = __parser_precision::__parse(__begin, __end, __parse_ctx);
566 if (__begin == __end)
567 return __begin;
568
569 __begin = __parse_type(__begin, static_cast<_Flags&>(*this));
570
571 if (__begin != __end && *__begin != _CharT('}'))
572 __throw_format_error(
573 "The format-spec should consume the input or end with a '}'");
574
575 return __begin;
576 }
577
578 /** Processes the parsed std-format-spec based on the parsed display type. */
__process_display_type()579 void _LIBCPP_HIDE_FROM_ABI constexpr __process_display_type() {
580 switch (this->__type) {
581 case _Flags::_Type::__default:
582 case _Flags::_Type::__string:
583 break;
584
585 default:
586 __throw_format_error("The format-spec type has a type not supported for "
587 "a string argument");
588 }
589 }
590 };
591
592 /**
593 * The parser for the std-format-spec.
594 *
595 * This implements the parser for the integral types. This includes the
596 * character type and boolean type.
597 *
598 * See @ref __parser_string.
599 */
600 template <class _CharT>
601 class _LIBCPP_TEMPLATE_VIS __parser_integral
602 : public __parser_width, // provides __width(|as_arg)
603 public __parser_fill_align<_CharT>, // provides __fill and uses __flags
604 public _Flags // provides __flags
605 {
606 public:
607 using char_type = _CharT;
608
609 protected:
610 /**
611 * The low-level std-format-spec parse function.
612 *
613 * @pre __begin points at the beginning of the std-format-spec. This means
614 * directly after the ':'.
615 * @pre The std-format-spec parses the entire input, or the first unmatched
616 * character is a '}'.
617 *
618 * @returns The iterator pointing at the last parsed character.
619 */
620 _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx)
621 -> decltype(__parse_ctx.begin()) {
622 auto __begin = __parse_ctx.begin();
623 auto __end = __parse_ctx.end();
624 if (__begin == __end)
625 return __begin;
626
627 __begin = __parser_fill_align<_CharT>::__parse(__begin, __end,
628 static_cast<_Flags&>(*this));
629 if (__begin == __end)
630 return __begin;
631
632 __begin = __parse_sign(__begin, static_cast<_Flags&>(*this));
633 if (__begin == __end)
634 return __begin;
635
636 __begin = __parse_alternate_form(__begin, static_cast<_Flags&>(*this));
637 if (__begin == __end)
638 return __begin;
639
640 __begin = __parse_zero_padding(__begin, static_cast<_Flags&>(*this));
641 if (__begin == __end)
642 return __begin;
643
644 __begin = __parser_width::__parse(__begin, __end, __parse_ctx);
645 if (__begin == __end)
646 return __begin;
647
648 __begin =
649 __parse_locale_specific_form(__begin, static_cast<_Flags&>(*this));
650 if (__begin == __end)
651 return __begin;
652
653 __begin = __parse_type(__begin, static_cast<_Flags&>(*this));
654
655 if (__begin != __end && *__begin != _CharT('}'))
656 __throw_format_error(
657 "The format-spec should consume the input or end with a '}'");
658
659 return __begin;
660 }
661
662 /**
663 * Handles the post-parsing updates for the integer types.
664 *
665 * Updates the zero-padding and alignment for integer types.
666 *
667 * [format.string.std]/13
668 * If the 0 character and an align option both appear, the 0 character is
669 * ignored.
670 *
671 * For the formatter a @ref __default alignment means zero-padding. Update
672 * the alignment based on parsed format string.
673 */
__handle_integer()674 _LIBCPP_HIDE_FROM_ABI constexpr void __handle_integer() noexcept {
675 this->__zero_padding &= this->__alignment == _Flags::_Alignment::__default;
676 if (!this->__zero_padding &&
677 this->__alignment == _Flags::_Alignment::__default)
678 this->__alignment = _Flags::_Alignment::__right;
679 }
680
681 /**
682 * Handles the post-parsing updates for the character types.
683 *
684 * Sets the alignment and validates the format flags set for a character type.
685 *
686 * At the moment the validation for a character and a Boolean behave the
687 * same, but this may change in the future.
688 * Specifically at the moment the locale-specific form is allowed for the
689 * char output type, but it has no effect on the output.
690 */
__handle_char()691 _LIBCPP_HIDE_FROM_ABI constexpr void __handle_char() { __handle_bool(); }
692
693 /**
694 * Handles the post-parsing updates for the Boolean types.
695 *
696 * Sets the alignment and validates the format flags set for a Boolean type.
697 */
__handle_bool()698 _LIBCPP_HIDE_FROM_ABI constexpr void __handle_bool() {
699 if (this->__sign != _Flags::_Sign::__default)
700 __throw_format_error("A sign field isn't allowed in this format-spec");
701
702 if (this->__alternate_form)
703 __throw_format_error(
704 "An alternate form field isn't allowed in this format-spec");
705
706 if (this->__zero_padding)
707 __throw_format_error(
708 "A zero-padding field isn't allowed in this format-spec");
709
710 if (this->__alignment == _Flags::_Alignment::__default)
711 this->__alignment = _Flags::_Alignment::__left;
712 }
713 };
714
715 // TODO FMT Add a parser for floating-point values.
716 // TODO FMT Add a parser for pointer values.
717
718 /** Helper struct returned from @ref __get_string_alignment. */
719 template <class _CharT>
720 struct _LIBCPP_TEMPLATE_VIS __string_alignment {
721 /** Points beyond the last character to write to the output. */
722 const _CharT* __last;
723 /**
724 * The estimated number of columns in the output or 0.
725 *
726 * Only when the output needs to be aligned it's required to know the exact
727 * number of columns in the output. So if the formatted output has only a
728 * minimum width the exact size isn't important. It's only important to know
729 * the minimum has been reached. The minimum width is the width specified in
730 * the format-spec.
731 *
732 * For example in this code @code std::format("{:10}", MyString); @endcode
733 * the width estimation can stop once the algorithm has determined the output
734 * width is 10 columns.
735 *
736 * So if:
737 * * @ref __align == @c true the @ref __size is the estimated number of
738 * columns required.
739 * * @ref __align == @c false the @ref __size is the estimated number of
740 * columns required or 0 when the estimation algorithm stopped prematurely.
741 */
742 ptrdiff_t __size;
743 /**
744 * Does the output need to be aligned.
745 *
746 * When alignment is needed the output algorithm needs to add the proper
747 * padding. Else the output algorithm just needs to copy the input up to
748 * @ref __last.
749 */
750 bool __align;
751 };
752
753 #ifndef _LIBCPP_HAS_NO_UNICODE
754 namespace __detail {
755
756 /**
757 * Unicode column width estimates.
758 *
759 * Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32.
760 * Depending on format the relation between the number of code units stored and
761 * the number of output columns differs. The first relation is the number of
762 * code units forming a code point. (The text assumes the code units are
763 * unsigned.)
764 * - UTF-8 The number of code units is between one and four. The first 127
765 * Unicode code points match the ASCII character set. When the highest bit is
766 * set it means the code point has more than one code unit.
767 * - UTF-16: The number of code units is between 1 and 2. When the first
768 * code unit is in the range [0xd800,0xdfff) it means the code point uses two
769 * code units.
770 * - UTF-32: The number of code units is always one.
771 *
772 * The code point to the number of columns isn't well defined. The code uses the
773 * estimations defined in [format.string.std]/11. This list might change in the
774 * future.
775 *
776 * The algorithm of @ref __get_string_alignment uses two different scanners:
777 * - The simple scanner @ref __estimate_column_width_fast. This scanner assumes
778 * 1 code unit is 1 column. This scanner stops when it can't be sure the
779 * assumption is valid:
780 * - UTF-8 when the code point is encoded in more than 1 code unit.
781 * - UTF-16 and UTF-32 when the first multi-column code point is encountered.
782 * (The code unit's value is lower than 0xd800 so the 2 code unit encoding
783 * is irrelevant for this scanner.)
784 * Due to these assumptions the scanner is faster than the full scanner. It
785 * can process all text only containing ASCII. For UTF-16/32 it can process
786 * most (all?) European languages. (Note the set it can process might be
787 * reduced in the future, due to updates in the scanning rules.)
788 * - The full scanner @ref __estimate_column_width. This scanner, if needed,
789 * converts multiple code units into one code point then converts the code
790 * point to a column width.
791 *
792 * See also:
793 * - [format.string.general]/11
794 * - https://en.wikipedia.org/wiki/UTF-8#Encoding
795 * - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
796 */
797
798 /**
799 * The first 2 column code point.
800 *
801 * This is the point where the fast UTF-16/32 scanner needs to stop processing.
802 */
803 inline constexpr uint32_t __two_column_code_point = 0x1100;
804
805 /** Helper concept for an UTF-8 character type. */
806 template <class _CharT>
807 concept __utf8_character = same_as<_CharT, char> || same_as<_CharT, char8_t>;
808
809 /** Helper concept for an UTF-16 character type. */
810 template <class _CharT>
811 concept __utf16_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) || same_as<_CharT, char16_t>;
812
813 /** Helper concept for an UTF-32 character type. */
814 template <class _CharT>
815 concept __utf32_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) || same_as<_CharT, char32_t>;
816
817 /** Helper concept for an UTF-16 or UTF-32 character type. */
818 template <class _CharT>
819 concept __utf16_or_32_character = __utf16_character<_CharT> || __utf32_character<_CharT>;
820
821 /**
822 * Converts a code point to the column width.
823 *
824 * The estimations are conforming to [format.string.general]/11
825 *
826 * This version expects a value less than 0x1'0000, which is a 3-byte UTF-8
827 * character.
828 */
__column_width_3(uint32_t __c)829 _LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width_3(uint32_t __c) noexcept {
830 _LIBCPP_ASSERT(__c < 0x1'0000,
831 "Use __column_width_4 or __column_width for larger values");
832
833 // clang-format off
834 return 1 + (__c >= 0x1100 && (__c <= 0x115f ||
835 (__c >= 0x2329 && (__c <= 0x232a ||
836 (__c >= 0x2e80 && (__c <= 0x303e ||
837 (__c >= 0x3040 && (__c <= 0xa4cf ||
838 (__c >= 0xac00 && (__c <= 0xd7a3 ||
839 (__c >= 0xf900 && (__c <= 0xfaff ||
840 (__c >= 0xfe10 && (__c <= 0xfe19 ||
841 (__c >= 0xfe30 && (__c <= 0xfe6f ||
842 (__c >= 0xff00 && (__c <= 0xff60 ||
843 (__c >= 0xffe0 && (__c <= 0xffe6
844 ))))))))))))))))))));
845 // clang-format on
846 }
847
848 /**
849 * @overload
850 *
851 * This version expects a value greater than or equal to 0x1'0000, which is a
852 * 4-byte UTF-8 character.
853 */
854 _LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width_4(uint32_t __c) noexcept {
855 _LIBCPP_ASSERT(__c >= 0x1'0000,
856 "Use __column_width_3 or __column_width for smaller values");
857
858 // clang-format off
859 return 1 + (__c >= 0x1'f300 && (__c <= 0x1'f64f ||
860 (__c >= 0x1'f900 && (__c <= 0x1'f9ff ||
861 (__c >= 0x2'0000 && (__c <= 0x2'fffd ||
862 (__c >= 0x3'0000 && (__c <= 0x3'fffd
863 ))))))));
864 // clang-format on
865 }
866
867 /**
868 * @overload
869 *
870 * The general case, accepting all values.
871 */
872 _LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width(uint32_t __c) noexcept {
873 if (__c < 0x1'0000)
874 return __column_width_3(__c);
875
876 return __column_width_4(__c);
877 }
878
879 /**
880 * Estimate the column width for the UTF-8 sequence using the fast algorithm.
881 */
882 template <__utf8_character _CharT>
883 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
884 __estimate_column_width_fast(const _CharT* __first,
885 const _CharT* __last) noexcept {
886 return _VSTD::find_if(__first, __last,
887 [](unsigned char __c) { return __c & 0x80; });
888 }
889
890 /**
891 * @overload
892 *
893 * The implementation for UTF-16/32.
894 */
895 template <__utf16_or_32_character _CharT>
896 _LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
897 __estimate_column_width_fast(const _CharT* __first,
898 const _CharT* __last) noexcept {
899 return _VSTD::find_if(__first, __last,
900 [](uint32_t __c) { return __c >= 0x1100; });
901 }
902
903 template <class _CharT>
904 struct _LIBCPP_TEMPLATE_VIS __column_width_result {
905 /** The number of output columns. */
906 size_t __width;
907 /**
908 * The last parsed element.
909 *
910 * This limits the original output to fit in the wanted number of columns.
911 */
912 const _CharT* __ptr;
913 };
914
915 /**
916 * Small helper to determine the width of malformed Unicode.
917 *
918 * @note This function's only needed for UTF-8. During scanning UTF-8 there
919 * are multiple place where it can be detected that the Unicode is malformed.
920 * UTF-16 only requires 1 test and UTF-32 requires no testing.
921 */
922 template <__utf8_character _CharT>
923 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
924 __estimate_column_width_malformed(const _CharT* __first, const _CharT* __last,
925 size_t __maximum, size_t __result) noexcept {
926 size_t __size = __last - __first;
927 size_t __n = _VSTD::min(__size, __maximum);
928 return {__result + __n, __first + __n};
929 }
930
931 /**
932 * Determines the number of output columns needed to render the input.
933 *
934 * @note When the scanner encounters malformed Unicode it acts as-if every code
935 * unit at the end of the input is one output column. It's expected the output
936 * terminal will replace these malformed code units with a one column
937 * replacement characters.
938 *
939 * @param __first Points to the first element of the input range.
940 * @param __last Points beyond the last element of the input range.
941 * @param __maximum The maximum number of output columns. The returned number
942 * of estimated output columns will not exceed this value.
943 */
944 template <__utf8_character _CharT>
945 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
946 __estimate_column_width(const _CharT* __first, const _CharT* __last,
947 size_t __maximum) noexcept {
948 size_t __result = 0;
949
950 while (__first != __last) {
951 // Based on the number of leading 1 bits the number of code units in the
952 // code point can be determined. See
953 // https://en.wikipedia.org/wiki/UTF-8#Encoding
954 switch (_VSTD::countl_one(static_cast<unsigned char>(*__first))) {
955 case 0: // 1-code unit encoding: all 1 column
956 ++__result;
957 ++__first;
958 break;
959
960 case 2: // 2-code unit encoding: all 1 column
961 // Malformed Unicode.
962 if (__last - __first < 2) [[unlikely]]
963 return __estimate_column_width_malformed(__first, __last, __maximum,
964 __result);
965 __first += 2;
966 ++__result;
967 break;
968
969 case 3: // 3-code unit encoding: either 1 or 2 columns
970 // Malformed Unicode.
971 if (__last - __first < 3) [[unlikely]]
972 return __estimate_column_width_malformed(__first, __last, __maximum,
973 __result);
974 {
975 uint32_t __c = static_cast<unsigned char>(*__first++) & 0x0f;
976 __c <<= 6;
977 __c |= static_cast<unsigned char>(*__first++) & 0x3f;
978 __c <<= 6;
979 __c |= static_cast<unsigned char>(*__first++) & 0x3f;
980 __result += __column_width_3(__c);
981 if (__result > __maximum)
982 return {__result - 2, __first - 3};
983 }
984 break;
985 case 4: // 4-code unit encoding: either 1 or 2 columns
986 // Malformed Unicode.
987 if (__last - __first < 4) [[unlikely]]
988 return __estimate_column_width_malformed(__first, __last, __maximum,
989 __result);
990 {
991 uint32_t __c = static_cast<unsigned char>(*__first++) & 0x07;
992 __c <<= 6;
993 __c |= static_cast<unsigned char>(*__first++) & 0x3f;
994 __c <<= 6;
995 __c |= static_cast<unsigned char>(*__first++) & 0x3f;
996 __c <<= 6;
997 __c |= static_cast<unsigned char>(*__first++) & 0x3f;
998 __result += __column_width_4(__c);
999 if (__result > __maximum)
1000 return {__result - 2, __first - 4};
1001 }
1002 break;
1003 default:
1004 // Malformed Unicode.
1005 return __estimate_column_width_malformed(__first, __last, __maximum,
1006 __result);
1007 }
1008
1009 if (__result >= __maximum)
1010 return {__result, __first};
1011 }
1012 return {__result, __first};
1013 }
1014
1015 template <__utf16_character _CharT>
1016 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
1017 __estimate_column_width(const _CharT* __first, const _CharT* __last,
1018 size_t __maximum) noexcept {
1019 size_t __result = 0;
1020
1021 while (__first != __last) {
1022 uint32_t __c = *__first;
1023 // Is the code unit part of a surrogate pair? See
1024 // https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
1025 if (__c >= 0xd800 && __c <= 0xDfff) {
1026 // Malformed Unicode.
1027 if (__last - __first < 2) [[unlikely]]
1028 return {__result + 1, __first + 1};
1029
1030 __c -= 0xd800;
1031 __c <<= 10;
1032 __c += (*(__first + 1) - 0xdc00);
1033 __c += 0x10'000;
1034
1035 __result += __column_width_4(__c);
1036 if (__result > __maximum)
1037 return {__result - 2, __first};
1038 __first += 2;
1039 } else {
1040 __result += __column_width_3(__c);
1041 if (__result > __maximum)
1042 return {__result - 2, __first};
1043 ++__first;
1044 }
1045
1046 if (__result >= __maximum)
1047 return {__result, __first};
1048 }
1049
1050 return {__result, __first};
1051 }
1052
1053 template <__utf32_character _CharT>
1054 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
1055 __estimate_column_width(const _CharT* __first, const _CharT* __last,
1056 size_t __maximum) noexcept {
1057 size_t __result = 0;
1058
1059 while (__first != __last) {
1060 wchar_t __c = *__first;
1061 __result += __column_width(__c);
1062
1063 if (__result > __maximum)
1064 return {__result - 2, __first};
1065
1066 ++__first;
1067 if (__result >= __maximum)
1068 return {__result, __first};
1069 }
1070
1071 return {__result, __first};
1072 }
1073
1074 } // namespace __detail
1075
1076 template <class _CharT>
1077 _LIBCPP_HIDE_FROM_ABI constexpr __string_alignment<_CharT>
1078 __get_string_alignment(const _CharT* __first, const _CharT* __last,
1079 ptrdiff_t __width, ptrdiff_t __precision) noexcept {
1080 _LIBCPP_ASSERT(__width != 0 || __precision != -1,
1081 "The function has no effect and shouldn't be used");
1082
1083 // TODO FMT There might be more optimizations possible:
1084 // If __precision == __format::__number_max and the encoding is:
1085 // * UTF-8 : 4 * (__last - __first) >= __width
1086 // * UTF-16 : 2 * (__last - __first) >= __width
1087 // * UTF-32 : (__last - __first) >= __width
1088 // In these cases it's certain the output is at least the requested width.
1089 // It's unknown how often this happens in practice. For now the improvement
1090 // isn't implemented.
1091
1092 /*
1093 * First assume there are no special Unicode code units in the input.
1094 * - Apply the precision (this may reduce the size of the input). When
1095 * __precison == -1 this step is omitted.
1096 * - Scan for special code units in the input.
1097 * If our assumption was correct the __pos will be at the end of the input.
1098 */
1099 const ptrdiff_t __length = __last - __first;
1100 const _CharT* __limit =
1101 __first +
1102 (__precision == -1 ? __length : _VSTD::min(__length, __precision));
1103 ptrdiff_t __size = __limit - __first;
1104 const _CharT* __pos =
1105 __detail::__estimate_column_width_fast(__first, __limit);
1106
1107 if (__pos == __limit)
1108 return {__limit, __size, __size < __width};
1109
1110 /*
1111 * Our assumption was wrong, there are special Unicode code units.
1112 * The range [__first, __pos) contains a set of code units with the
1113 * following property:
1114 * Every _CharT in the range will be rendered in 1 column.
1115 *
1116 * If there's no maximum width and the parsed size already exceeds the
1117 * minimum required width. The real size isn't important. So bail out.
1118 */
1119 if (__precision == -1 && (__pos - __first) >= __width)
1120 return {__last, 0, false};
1121
1122 /* If there's a __precision, truncate the output to that width. */
1123 ptrdiff_t __prefix = __pos - __first;
1124 if (__precision != -1) {
1125 _LIBCPP_ASSERT(__precision > __prefix, "Logic error.");
1126 auto __lengh_info = __detail::__estimate_column_width(
1127 __pos, __last, __precision - __prefix);
1128 __size = __lengh_info.__width + __prefix;
1129 return {__lengh_info.__ptr, __size, __size < __width};
1130 }
1131
1132 /* Else use __width to determine the number of required padding characters. */
1133 _LIBCPP_ASSERT(__width > __prefix, "Logic error.");
1134 /*
1135 * The column width is always one or two columns. For the precision the wanted
1136 * column width is the maximum, for the width it's the minimum. Using the
1137 * width estimation with its truncating behavior will result in the wrong
1138 * result in the following case:
1139 * - The last code unit processed requires two columns and exceeds the
1140 * maximum column width.
1141 * By increasing the __maximum by one avoids this issue. (It means it may
1142 * pass one code point more than required to determine the proper result;
1143 * that however isn't a problem for the algorithm.)
1144 */
1145 size_t __maximum = 1 + __width - __prefix;
1146 auto __lengh_info =
1147 __detail::__estimate_column_width(__pos, __last, __maximum);
1148 if (__lengh_info.__ptr != __last) {
1149 // Consumed the width number of code units. The exact size of the string
1150 // is unknown. We only know we don't need to align the output.
1151 _LIBCPP_ASSERT(static_cast<ptrdiff_t>(__lengh_info.__width + __prefix) >=
1152 __width,
1153 "Logic error");
1154 return {__last, 0, false};
1155 }
1156
1157 __size = __lengh_info.__width + __prefix;
1158 return {__last, __size, __size < __width};
1159 }
1160 #else // _LIBCPP_HAS_NO_UNICODE
1161 template <class _CharT>
1162 _LIBCPP_HIDE_FROM_ABI constexpr __string_alignment<_CharT>
1163 __get_string_alignment(const _CharT* __first, const _CharT* __last,
1164 ptrdiff_t __width, ptrdiff_t __precision) noexcept {
1165 const ptrdiff_t __length = __last - __first;
1166 const _CharT* __limit =
1167 __first +
1168 (__precision == -1 ? __length : _VSTD::min(__length, __precision));
1169 ptrdiff_t __size = __limit - __first;
1170 return {__limit, __size, __size < __width};
1171 }
1172 #endif // _LIBCPP_HAS_NO_UNICODE
1173
1174 } // namespace __format_spec
1175
1176 # endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
1177
1178 #endif //_LIBCPP_STD_VER > 17
1179
1180 _LIBCPP_END_NAMESPACE_STD
1181
1182 _LIBCPP_POP_MACROS
1183
1184 #endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
1185