1 // -*- C++ -*-
2 //===----------------------------------------------------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9
10 #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
11 #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
12
13 /// \file Contains the std-format-spec parser.
14 ///
15 /// Most of the code can be reused in the chrono-format-spec.
16 /// This header has some support for the chrono-format-spec since it doesn't
17 /// affect the std-format-spec.
18
19 #include <__algorithm/find_if.h>
20 #include <__algorithm/min.h>
21 #include <__assert>
22 #include <__concepts/arithmetic.h>
23 #include <__concepts/same_as.h>
24 #include <__config>
25 #include <__debug>
26 #include <__format/format_arg.h>
27 #include <__format/format_error.h>
28 #include <__format/format_parse_context.h>
29 #include <__format/format_string.h>
30 #include <__format/unicode.h>
31 #include <__variant/monostate.h>
32 #include <bit>
33 #include <cstdint>
34 #include <string_view>
35 #include <type_traits>
36
37 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
38 # pragma GCC system_header
39 #endif
40
41 _LIBCPP_PUSH_MACROS
42 #include <__undef_macros>
43
44 _LIBCPP_BEGIN_NAMESPACE_STD
45
46 #if _LIBCPP_STD_VER > 17
47
48 namespace __format_spec {
49
50 template <class _CharT>
51 _LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result< _CharT>
__parse_arg_id(const _CharT * __begin,const _CharT * __end,auto & __parse_ctx)52 __parse_arg_id(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) {
53 // This function is a wrapper to call the real parser. But it does the
54 // validation for the pre-conditions and post-conditions.
55 if (__begin == __end)
56 std::__throw_format_error("End of input while parsing format-spec arg-id");
57
58 __format::__parse_number_result __r = __format::__parse_arg_id(__begin, __end, __parse_ctx);
59
60 if (__r.__ptr == __end || *__r.__ptr != _CharT('}'))
61 std::__throw_format_error("Invalid arg-id");
62
63 ++__r.__ptr;
64 return __r;
65 }
66
67 template <class _Context>
68 _LIBCPP_HIDE_FROM_ABI constexpr uint32_t
__substitute_arg_id(basic_format_arg<_Context> __format_arg)69 __substitute_arg_id(basic_format_arg<_Context> __format_arg) {
70 // [format.string.std]/8
71 // If the corresponding formatting argument is not of integral type...
72 // This wording allows char and bool too. LWG-3720 changes the wording to
73 // If the corresponding formatting argument is not of standard signed or
74 // unsigned integer type,
75 // This means the 128-bit will not be valid anymore.
76 // TODO FMT Verify this resolution is accepted and add a test to verify
77 // 128-bit integrals fail and switch to visit_format_arg.
78 return _VSTD::__visit_format_arg(
79 [](auto __arg) -> uint32_t {
80 using _Type = decltype(__arg);
81 if constexpr (integral<_Type>) {
82 if constexpr (signed_integral<_Type>) {
83 if (__arg < 0)
84 std::__throw_format_error("A format-spec arg-id replacement shouldn't have a negative value");
85 }
86
87 using _CT = common_type_t<_Type, decltype(__format::__number_max)>;
88 if (static_cast<_CT>(__arg) >
89 static_cast<_CT>(__format::__number_max))
90 std::__throw_format_error("A format-spec arg-id replacement exceeds the maximum supported value");
91
92 return __arg;
93 } else if constexpr (same_as<_Type, monostate>)
94 std::__throw_format_error("Argument index out of bounds");
95 else
96 std::__throw_format_error("A format-spec arg-id replacement argument isn't an integral type");
97 },
98 __format_arg);
99 }
100
101 /// These fields are a filter for which elements to parse.
102 ///
103 /// They default to false so when a new field is added it needs to be opted in
104 /// explicitly.
105 // TODO FMT Use an ABI tag for this struct.
106 struct __fields {
107 uint8_t __sign_ : 1 {false};
108 uint8_t __alternate_form_ : 1 {false};
109 uint8_t __zero_padding_ : 1 {false};
110 uint8_t __precision_ : 1 {false};
111 uint8_t __locale_specific_form_ : 1 {false};
112 uint8_t __type_ : 1 {false};
113 // Determines the valid values for fill.
114 //
115 // Originally the fill could be any character except { and }. Range-based
116 // formatters use the colon to mark the beginning of the
117 // underlying-format-spec. To avoid parsing ambiguities these formatter
118 // specializations prohibit the use of the colon as a fill character.
119 uint8_t __allow_colon_in_fill_ : 1 {false};
120 };
121
122 // By not placing this constant in the formatter class it's not duplicated for
123 // char and wchar_t.
124 inline constexpr __fields __fields_integral{
125 .__sign_ = true,
126 .__alternate_form_ = true,
127 .__zero_padding_ = true,
128 .__locale_specific_form_ = true,
129 .__type_ = true};
130 inline constexpr __fields __fields_floating_point{
131 .__sign_ = true,
132 .__alternate_form_ = true,
133 .__zero_padding_ = true,
134 .__precision_ = true,
135 .__locale_specific_form_ = true,
136 .__type_ = true};
137 inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true};
138 inline constexpr __fields __fields_pointer{.__type_ = true};
139
140 # if _LIBCPP_STD_VER > 20
141 inline constexpr __fields __fields_tuple{.__type_ = false, .__allow_colon_in_fill_ = true};
142 inline constexpr __fields __fields_range{.__type_ = false, .__allow_colon_in_fill_ = true};
143 # endif
144
145 enum class _LIBCPP_ENUM_VIS __alignment : uint8_t {
146 /// No alignment is set in the format string.
147 __default,
148 __left,
149 __center,
150 __right,
151 __zero_padding
152 };
153
154 enum class _LIBCPP_ENUM_VIS __sign : uint8_t {
155 /// No sign is set in the format string.
156 ///
157 /// The sign isn't allowed for certain format-types. By using this value
158 /// it's possible to detect whether or not the user explicitly set the sign
159 /// flag. For formatting purposes it behaves the same as \ref __minus.
160 __default,
161 __minus,
162 __plus,
163 __space
164 };
165
166 enum class _LIBCPP_ENUM_VIS __type : uint8_t {
167 __default,
168 __string,
169 __binary_lower_case,
170 __binary_upper_case,
171 __octal,
172 __decimal,
173 __hexadecimal_lower_case,
174 __hexadecimal_upper_case,
175 __pointer,
176 __char,
177 __hexfloat_lower_case,
178 __hexfloat_upper_case,
179 __scientific_lower_case,
180 __scientific_upper_case,
181 __fixed_lower_case,
182 __fixed_upper_case,
183 __general_lower_case,
184 __general_upper_case,
185 __debug
186 };
187
188 struct __std {
189 __alignment __alignment_ : 3;
190 __sign __sign_ : 2;
191 bool __alternate_form_ : 1;
192 bool __locale_specific_form_ : 1;
193 __type __type_;
194 };
195
196 struct __chrono {
197 __alignment __alignment_ : 3;
198 bool __locale_specific_form_ : 1;
199 bool __weekday_name_ : 1;
200 bool __weekday_ : 1;
201 bool __day_of_year_ : 1;
202 bool __week_of_year_ : 1;
203 bool __month_name_ : 1;
204 };
205
206 /// Contains the parsed formatting specifications.
207 ///
208 /// This contains information for both the std-format-spec and the
209 /// chrono-format-spec. This results in some unused members for both
210 /// specifications. However these unused members don't increase the size
211 /// of the structure.
212 ///
213 /// This struct doesn't cross ABI boundaries so its layout doesn't need to be
214 /// kept stable.
215 template <class _CharT>
216 struct __parsed_specifications {
217 union {
218 // The field __alignment_ is the first element in __std_ and __chrono_.
219 // This allows the code to always inspect this value regards which member
220 // of the union is the active member [class.union.general]/2.
221 //
222 // This is needed since the generic output routines handle the alignment of
223 // the output.
224 __alignment __alignment_ : 3;
225 __std __std_;
226 __chrono __chrono_;
227 };
228
229 /// The requested width.
230 ///
231 /// When the format-spec used an arg-id for this field it has already been
232 /// replaced with the value of that arg-id.
233 int32_t __width_;
234
235 /// The requested precision.
236 ///
237 /// When the format-spec used an arg-id for this field it has already been
238 /// replaced with the value of that arg-id.
239 int32_t __precision_;
240
241 _CharT __fill_;
242
__has_width__parsed_specifications243 _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; }
244
__has_precision__parsed_specifications245 _LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; }
246 };
247
248 // Validate the struct is small and cheap to copy since the struct is passed by
249 // value in formatting functions.
250 static_assert(sizeof(__parsed_specifications<char>) == 16);
251 static_assert(is_trivially_copyable_v<__parsed_specifications<char>>);
252 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
253 static_assert(sizeof(__parsed_specifications<wchar_t>) == 16);
254 static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>);
255 # endif
256
257 /// The parser for the std-format-spec.
258 ///
259 /// Note this class is a member of std::formatter specializations. It's
260 /// expected developers will create their own formatter specializations that
261 /// inherit from the std::formatter specializations. This means this class
262 /// must be ABI stable. To aid the stability the unused bits in the class are
263 /// set to zero. That way they can be repurposed if a future revision of the
264 /// Standards adds new fields to std-format-spec.
265 template <class _CharT>
266 class _LIBCPP_TEMPLATE_VIS __parser {
267 public:
268 _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(basic_format_parse_context<_CharT>& __parse_ctx, __fields __fields)
269 -> decltype(__parse_ctx.begin()) {
270
271 const _CharT* __begin = __parse_ctx.begin();
272 const _CharT* __end = __parse_ctx.end();
273 if (__begin == __end)
274 return __begin;
275
276 if (__parse_fill_align(__begin, __end, __fields.__allow_colon_in_fill_) && __begin == __end)
277 return __begin;
278
279 if (__fields.__sign_ && __parse_sign(__begin) && __begin == __end)
280 return __begin;
281
282 if (__fields.__alternate_form_ && __parse_alternate_form(__begin) && __begin == __end)
283 return __begin;
284
285 if (__fields.__zero_padding_ && __parse_zero_padding(__begin) && __begin == __end)
286 return __begin;
287
288 if (__parse_width(__begin, __end, __parse_ctx) && __begin == __end)
289 return __begin;
290
291 if (__fields.__precision_ && __parse_precision(__begin, __end, __parse_ctx) && __begin == __end)
292 return __begin;
293
294 if (__fields.__locale_specific_form_ && __parse_locale_specific_form(__begin) && __begin == __end)
295 return __begin;
296
297 if (__fields.__type_) {
298 __parse_type(__begin);
299
300 // When __type_ is false the calling parser is expected to do additional
301 // parsing. In that case that parser should do the end of format string
302 // validation.
303 if (__begin != __end && *__begin != _CharT('}'))
304 std::__throw_format_error("The format-spec should consume the input or end with a '}'");
305 }
306
307 return __begin;
308 }
309
310 /// \returns the `__parsed_specifications` with the resolved dynamic sizes..
311 _LIBCPP_HIDE_FROM_ABI
__get_parsed_std_specifications(auto & __ctx)312 __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const {
313 return __parsed_specifications<_CharT>{
314 .__std_ = __std{.__alignment_ = __alignment_,
315 .__sign_ = __sign_,
316 .__alternate_form_ = __alternate_form_,
317 .__locale_specific_form_ = __locale_specific_form_,
318 .__type_ = __type_},
319 .__width_{__get_width(__ctx)},
320 .__precision_{__get_precision(__ctx)},
321 .__fill_{__fill_}};
322 }
323
__get_parsed_chrono_specifications(auto & __ctx)324 _LIBCPP_HIDE_FROM_ABI __parsed_specifications<_CharT> __get_parsed_chrono_specifications(auto& __ctx) const {
325 return __parsed_specifications<_CharT>{
326 .__chrono_ =
327 __chrono{.__alignment_ = __alignment_,
328 .__locale_specific_form_ = __locale_specific_form_,
329 .__weekday_name_ = __weekday_name_,
330 .__weekday_ = __weekday_,
331 .__day_of_year_ = __day_of_year_,
332 .__week_of_year_ = __week_of_year_,
333 .__month_name_ = __month_name_},
334 .__width_{__get_width(__ctx)},
335 .__precision_{__get_precision(__ctx)},
336 .__fill_{__fill_}};
337 }
338
339 __alignment __alignment_ : 3 {__alignment::__default};
340 __sign __sign_ : 2 {__sign::__default};
341 bool __alternate_form_ : 1 {false};
342 bool __locale_specific_form_ : 1 {false};
343 bool __reserved_0_ : 1 {false};
344 __type __type_{__type::__default};
345
346 // These flags are only used for formatting chrono. Since the struct has
347 // padding space left it's added to this structure.
348 bool __weekday_name_ : 1 {false};
349 bool __weekday_ : 1 {false};
350
351 bool __day_of_year_ : 1 {false};
352 bool __week_of_year_ : 1 {false};
353
354 bool __month_name_ : 1 {false};
355
356 uint8_t __reserved_1_ : 3 {0};
357 uint8_t __reserved_2_ : 6 {0};
358 // These two flags are only used internally and not part of the
359 // __parsed_specifications. Therefore put them at the end.
360 bool __width_as_arg_ : 1 {false};
361 bool __precision_as_arg_ : 1 {false};
362
363 /// The requested width, either the value or the arg-id.
364 int32_t __width_{0};
365
366 /// The requested precision, either the value or the arg-id.
367 int32_t __precision_{-1};
368
369 // LWG 3576 will probably change this to always accept a Unicode code point
370 // To avoid changing the size with that change align the field so when it
371 // becomes 32-bit its alignment will remain the same. That also means the
372 // size will remain the same. (D2572 addresses the solution for LWG 3576.)
373 _CharT __fill_{_CharT(' ')};
374
375 private:
__parse_alignment(_CharT __c)376 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) {
377 switch (__c) {
378 case _CharT('<'):
379 __alignment_ = __alignment::__left;
380 return true;
381
382 case _CharT('^'):
383 __alignment_ = __alignment::__center;
384 return true;
385
386 case _CharT('>'):
387 __alignment_ = __alignment::__right;
388 return true;
389 }
390 return false;
391 }
392
393 // range-fill and tuple-fill are identical
394 _LIBCPP_HIDE_FROM_ABI constexpr bool
__parse_fill_align(const _CharT * & __begin,const _CharT * __end,bool __use_range_fill)395 __parse_fill_align(const _CharT*& __begin, const _CharT* __end, bool __use_range_fill) {
396 _LIBCPP_ASSERT(__begin != __end, "when called with an empty input the function will cause "
397 "undefined behavior by evaluating data not in the input");
398 if (__begin + 1 != __end) {
399 if (__parse_alignment(*(__begin + 1))) {
400 if (__use_range_fill && (*__begin == _CharT('{') || *__begin == _CharT('}') || *__begin == _CharT(':')))
401 std::__throw_format_error("The format-spec range-fill field contains an invalid character");
402 else if (*__begin == _CharT('{') || *__begin == _CharT('}'))
403 std::__throw_format_error("The format-spec fill field contains an invalid character");
404
405 __fill_ = *__begin;
406 __begin += 2;
407 return true;
408 }
409 }
410
411 if (!__parse_alignment(*__begin))
412 return false;
413
414 ++__begin;
415 return true;
416 }
417
__parse_sign(const _CharT * & __begin)418 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(const _CharT*& __begin) {
419 switch (*__begin) {
420 case _CharT('-'):
421 __sign_ = __sign::__minus;
422 break;
423 case _CharT('+'):
424 __sign_ = __sign::__plus;
425 break;
426 case _CharT(' '):
427 __sign_ = __sign::__space;
428 break;
429 default:
430 return false;
431 }
432 ++__begin;
433 return true;
434 }
435
__parse_alternate_form(const _CharT * & __begin)436 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(const _CharT*& __begin) {
437 if (*__begin != _CharT('#'))
438 return false;
439
440 __alternate_form_ = true;
441 ++__begin;
442 return true;
443 }
444
__parse_zero_padding(const _CharT * & __begin)445 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(const _CharT*& __begin) {
446 if (*__begin != _CharT('0'))
447 return false;
448
449 if (__alignment_ == __alignment::__default)
450 __alignment_ = __alignment::__zero_padding;
451 ++__begin;
452 return true;
453 }
454
__parse_width(const _CharT * & __begin,const _CharT * __end,auto & __parse_ctx)455 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(const _CharT*& __begin, const _CharT* __end, auto& __parse_ctx) {
456 if (*__begin == _CharT('0'))
457 std::__throw_format_error("A format-spec width field shouldn't have a leading zero");
458
459 if (*__begin == _CharT('{')) {
460 __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx);
461 __width_as_arg_ = true;
462 __width_ = __r.__value;
463 __begin = __r.__ptr;
464 return true;
465 }
466
467 if (*__begin < _CharT('0') || *__begin > _CharT('9'))
468 return false;
469
470 __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
471 __width_ = __r.__value;
472 _LIBCPP_ASSERT(__width_ != 0, "A zero value isn't allowed and should be impossible, "
473 "due to validations in this function");
474 __begin = __r.__ptr;
475 return true;
476 }
477
__parse_precision(const _CharT * & __begin,const _CharT * __end,auto & __parse_ctx)478 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(const _CharT*& __begin, const _CharT* __end,
479 auto& __parse_ctx) {
480 if (*__begin != _CharT('.'))
481 return false;
482
483 ++__begin;
484 if (__begin == __end)
485 std::__throw_format_error("End of input while parsing format-spec precision");
486
487 if (*__begin == _CharT('{')) {
488 __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx);
489 __precision_as_arg_ = true;
490 __precision_ = __arg_id.__value;
491 __begin = __arg_id.__ptr;
492 return true;
493 }
494
495 if (*__begin < _CharT('0') || *__begin > _CharT('9'))
496 std::__throw_format_error("The format-spec precision field doesn't contain a value or arg-id");
497
498 __format::__parse_number_result __r = __format::__parse_number(__begin, __end);
499 __precision_ = __r.__value;
500 __precision_as_arg_ = false;
501 __begin = __r.__ptr;
502 return true;
503 }
504
__parse_locale_specific_form(const _CharT * & __begin)505 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(const _CharT*& __begin) {
506 if (*__begin != _CharT('L'))
507 return false;
508
509 __locale_specific_form_ = true;
510 ++__begin;
511 return true;
512 }
513
__parse_type(const _CharT * & __begin)514 _LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(const _CharT*& __begin) {
515 // Determines the type. It does not validate whether the selected type is
516 // valid. Most formatters have optional fields that are only allowed for
517 // certain types. These parsers need to do validation after the type has
518 // been parsed. So its easier to implement the validation for all types in
519 // the specific parse function.
520 switch (*__begin) {
521 case 'A':
522 __type_ = __type::__hexfloat_upper_case;
523 break;
524 case 'B':
525 __type_ = __type::__binary_upper_case;
526 break;
527 case 'E':
528 __type_ = __type::__scientific_upper_case;
529 break;
530 case 'F':
531 __type_ = __type::__fixed_upper_case;
532 break;
533 case 'G':
534 __type_ = __type::__general_upper_case;
535 break;
536 case 'X':
537 __type_ = __type::__hexadecimal_upper_case;
538 break;
539 case 'a':
540 __type_ = __type::__hexfloat_lower_case;
541 break;
542 case 'b':
543 __type_ = __type::__binary_lower_case;
544 break;
545 case 'c':
546 __type_ = __type::__char;
547 break;
548 case 'd':
549 __type_ = __type::__decimal;
550 break;
551 case 'e':
552 __type_ = __type::__scientific_lower_case;
553 break;
554 case 'f':
555 __type_ = __type::__fixed_lower_case;
556 break;
557 case 'g':
558 __type_ = __type::__general_lower_case;
559 break;
560 case 'o':
561 __type_ = __type::__octal;
562 break;
563 case 'p':
564 __type_ = __type::__pointer;
565 break;
566 case 's':
567 __type_ = __type::__string;
568 break;
569 case 'x':
570 __type_ = __type::__hexadecimal_lower_case;
571 break;
572 # if _LIBCPP_STD_VER > 20
573 case '?':
574 __type_ = __type::__debug;
575 break;
576 # endif
577 default:
578 return;
579 }
580 ++__begin;
581 }
582
583 _LIBCPP_HIDE_FROM_ABI
__get_width(auto & __ctx)584 int32_t __get_width(auto& __ctx) const {
585 if (!__width_as_arg_)
586 return __width_;
587
588 return __format_spec::__substitute_arg_id(__ctx.arg(__width_));
589 }
590
591 _LIBCPP_HIDE_FROM_ABI
__get_precision(auto & __ctx)592 int32_t __get_precision(auto& __ctx) const {
593 if (!__precision_as_arg_)
594 return __precision_;
595
596 return __format_spec::__substitute_arg_id(__ctx.arg(__precision_));
597 }
598 };
599
600 // Validates whether the reserved bitfields don't change the size.
601 static_assert(sizeof(__parser<char>) == 16);
602 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
603 static_assert(sizeof(__parser<wchar_t>) == 16);
604 # endif
605
__process_display_type_string(__format_spec::__type __type)606 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) {
607 switch (__type) {
608 case __format_spec::__type::__default:
609 case __format_spec::__type::__string:
610 case __format_spec::__type::__debug:
611 break;
612
613 default:
614 std::__throw_format_error("The format-spec type has a type not supported for a string argument");
615 }
616 }
617
618 template <class _CharT>
__process_display_type_bool_string(__parser<_CharT> & __parser)619 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_bool_string(__parser<_CharT>& __parser) {
620 if (__parser.__sign_ != __sign::__default)
621 std::__throw_format_error("A sign field isn't allowed in this format-spec");
622
623 if (__parser.__alternate_form_)
624 std::__throw_format_error("An alternate form field isn't allowed in this format-spec");
625
626 if (__parser.__alignment_ == __alignment::__zero_padding)
627 std::__throw_format_error("A zero-padding field isn't allowed in this format-spec");
628
629 if (__parser.__alignment_ == __alignment::__default)
630 __parser.__alignment_ = __alignment::__left;
631 }
632
633 template <class _CharT>
__process_display_type_char(__parser<_CharT> & __parser)634 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT>& __parser) {
635 __format_spec::__process_display_type_bool_string(__parser);
636 }
637
638 template <class _CharT>
__process_parsed_bool(__parser<_CharT> & __parser)639 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser) {
640 switch (__parser.__type_) {
641 case __format_spec::__type::__default:
642 case __format_spec::__type::__string:
643 __format_spec::__process_display_type_bool_string(__parser);
644 break;
645
646 case __format_spec::__type::__binary_lower_case:
647 case __format_spec::__type::__binary_upper_case:
648 case __format_spec::__type::__octal:
649 case __format_spec::__type::__decimal:
650 case __format_spec::__type::__hexadecimal_lower_case:
651 case __format_spec::__type::__hexadecimal_upper_case:
652 break;
653
654 default:
655 std::__throw_format_error("The format-spec type has a type not supported for a bool argument");
656 }
657 }
658
659 template <class _CharT>
__process_parsed_char(__parser<_CharT> & __parser)660 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser) {
661 switch (__parser.__type_) {
662 case __format_spec::__type::__default:
663 case __format_spec::__type::__char:
664 case __format_spec::__type::__debug:
665 __format_spec::__process_display_type_char(__parser);
666 break;
667
668 case __format_spec::__type::__binary_lower_case:
669 case __format_spec::__type::__binary_upper_case:
670 case __format_spec::__type::__octal:
671 case __format_spec::__type::__decimal:
672 case __format_spec::__type::__hexadecimal_lower_case:
673 case __format_spec::__type::__hexadecimal_upper_case:
674 break;
675
676 default:
677 std::__throw_format_error("The format-spec type has a type not supported for a char argument");
678 }
679 }
680
681 template <class _CharT>
__process_parsed_integer(__parser<_CharT> & __parser)682 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser) {
683 switch (__parser.__type_) {
684 case __format_spec::__type::__default:
685 case __format_spec::__type::__binary_lower_case:
686 case __format_spec::__type::__binary_upper_case:
687 case __format_spec::__type::__octal:
688 case __format_spec::__type::__decimal:
689 case __format_spec::__type::__hexadecimal_lower_case:
690 case __format_spec::__type::__hexadecimal_upper_case:
691 break;
692
693 case __format_spec::__type::__char:
694 __format_spec::__process_display_type_char(__parser);
695 break;
696
697 default:
698 std::__throw_format_error("The format-spec type has a type not supported for an integer argument");
699 }
700 }
701
702 template <class _CharT>
__process_parsed_floating_point(__parser<_CharT> & __parser)703 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_floating_point(__parser<_CharT>& __parser) {
704 switch (__parser.__type_) {
705 case __format_spec::__type::__default:
706 case __format_spec::__type::__hexfloat_lower_case:
707 case __format_spec::__type::__hexfloat_upper_case:
708 // Precision specific behavior will be handled later.
709 break;
710 case __format_spec::__type::__scientific_lower_case:
711 case __format_spec::__type::__scientific_upper_case:
712 case __format_spec::__type::__fixed_lower_case:
713 case __format_spec::__type::__fixed_upper_case:
714 case __format_spec::__type::__general_lower_case:
715 case __format_spec::__type::__general_upper_case:
716 if (!__parser.__precision_as_arg_ && __parser.__precision_ == -1)
717 // Set the default precision for the call to to_chars.
718 __parser.__precision_ = 6;
719 break;
720
721 default:
722 std::__throw_format_error("The format-spec type has a type not supported for a floating-point argument");
723 }
724 }
725
__process_display_type_pointer(__format_spec::__type __type)726 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spec::__type __type) {
727 switch (__type) {
728 case __format_spec::__type::__default:
729 case __format_spec::__type::__pointer:
730 break;
731
732 default:
733 std::__throw_format_error("The format-spec type has a type not supported for a pointer argument");
734 }
735 }
736
737 template <class _CharT>
738 struct __column_width_result {
739 /// The number of output columns.
740 size_t __width_;
741 /// One beyond the last code unit used in the estimation.
742 ///
743 /// This limits the original output to fit in the wanted number of columns.
744 const _CharT* __last_;
745 };
746
747 template <class _CharT>
748 __column_width_result(size_t, const _CharT*) -> __column_width_result<_CharT>;
749
750 /// Since a column width can be two it's possible that the requested column
751 /// width can't be achieved. Depending on the intended usage the policy can be
752 /// selected.
753 /// - When used as precision the maximum width may not be exceeded and the
754 /// result should be "rounded down" to the previous boundary.
755 /// - When used as a width we're done once the minimum is reached, but
756 /// exceeding is not an issue. Rounding down is an issue since that will
757 /// result in writing fill characters. Therefore the result needs to be
758 /// "rounded up".
759 enum class __column_width_rounding { __down, __up };
760
761 # ifndef _LIBCPP_HAS_NO_UNICODE
762
763 namespace __detail {
764
765 /// Converts a code point to the column width.
766 ///
767 /// The estimations are conforming to [format.string.general]/11
768 ///
769 /// This version expects a value less than 0x1'0000, which is a 3-byte UTF-8
770 /// character.
__column_width_3(uint32_t __c)771 _LIBCPP_HIDE_FROM_ABI constexpr int __column_width_3(uint32_t __c) noexcept {
772 _LIBCPP_ASSERT(__c < 0x10000, "Use __column_width_4 or __column_width for larger values");
773
774 // clang-format off
775 return 1 + (__c >= 0x1100 && (__c <= 0x115f ||
776 (__c >= 0x2329 && (__c <= 0x232a ||
777 (__c >= 0x2e80 && (__c <= 0x303e ||
778 (__c >= 0x3040 && (__c <= 0xa4cf ||
779 (__c >= 0xac00 && (__c <= 0xd7a3 ||
780 (__c >= 0xf900 && (__c <= 0xfaff ||
781 (__c >= 0xfe10 && (__c <= 0xfe19 ||
782 (__c >= 0xfe30 && (__c <= 0xfe6f ||
783 (__c >= 0xff00 && (__c <= 0xff60 ||
784 (__c >= 0xffe0 && (__c <= 0xffe6
785 ))))))))))))))))))));
786 // clang-format on
787 }
788
789 /// @overload
790 ///
791 /// This version expects a value greater than or equal to 0x1'0000, which is a
792 /// 4-byte UTF-8 character.
__column_width_4(uint32_t __c)793 _LIBCPP_HIDE_FROM_ABI constexpr int __column_width_4(uint32_t __c) noexcept {
794 _LIBCPP_ASSERT(__c >= 0x10000, "Use __column_width_3 or __column_width for smaller values");
795
796 // clang-format off
797 return 1 + (__c >= 0x1'f300 && (__c <= 0x1'f64f ||
798 (__c >= 0x1'f900 && (__c <= 0x1'f9ff ||
799 (__c >= 0x2'0000 && (__c <= 0x2'fffd ||
800 (__c >= 0x3'0000 && (__c <= 0x3'fffd
801 ))))))));
802 // clang-format on
803 }
804
805 /// @overload
806 ///
807 /// The general case, accepting all values.
__column_width(uint32_t __c)808 _LIBCPP_HIDE_FROM_ABI constexpr int __column_width(uint32_t __c) noexcept {
809 if (__c < 0x10000)
810 return __detail::__column_width_3(__c);
811
812 return __detail::__column_width_4(__c);
813 }
814
815 template <class _CharT>
__estimate_column_width_grapheme_clustering(const _CharT * __first,const _CharT * __last,size_t __maximum,__column_width_rounding __rounding)816 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> __estimate_column_width_grapheme_clustering(
817 const _CharT* __first, const _CharT* __last, size_t __maximum, __column_width_rounding __rounding) noexcept {
818 __unicode::__extended_grapheme_cluster_view<_CharT> __view{__first, __last};
819
820 __column_width_result<_CharT> __result{0, __first};
821 while (__result.__last_ != __last && __result.__width_ <= __maximum) {
822 typename __unicode::__extended_grapheme_cluster_view<_CharT>::__cluster __cluster = __view.__consume();
823 int __width = __detail::__column_width(__cluster.__code_point_);
824
825 // When the next entry would exceed the maximum width the previous width
826 // might be returned. For example when a width of 100 is requested the
827 // returned width might be 99, since the next code point has an estimated
828 // column width of 2. This depends on the rounding flag.
829 // When the maximum is exceeded the loop will abort the next iteration.
830 if (__rounding == __column_width_rounding::__down && __result.__width_ + __width > __maximum)
831 return __result;
832
833 __result.__width_ += __width;
834 __result.__last_ = __cluster.__last_;
835 }
836
837 return __result;
838 }
839
840 } // namespace __detail
841
842 // Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32.
843 // Depending on format the relation between the number of code units stored and
844 // the number of output columns differs. The first relation is the number of
845 // code units forming a code point. (The text assumes the code units are
846 // unsigned.)
847 // - UTF-8 The number of code units is between one and four. The first 127
848 // Unicode code points match the ASCII character set. When the highest bit is
849 // set it means the code point has more than one code unit.
850 // - UTF-16: The number of code units is between 1 and 2. When the first
851 // code unit is in the range [0xd800,0xdfff) it means the code point uses two
852 // code units.
853 // - UTF-32: The number of code units is always one.
854 //
855 // The code point to the number of columns is specified in
856 // [format.string.std]/11. This list might change in the future.
857 //
858 // Another thing to be taken into account is Grapheme clustering. This means
859 // that in some cases multiple code points are combined one element in the
860 // output. For example:
861 // - an ASCII character with a combined diacritical mark
862 // - an emoji with a skin tone modifier
863 // - a group of combined people emoji to create a family
864 // - a combination of flag emoji
865 //
866 // See also:
867 // - [format.string.general]/11
868 // - https://en.wikipedia.org/wiki/UTF-8#Encoding
869 // - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
870
__is_ascii(char32_t __c)871 _LIBCPP_HIDE_FROM_ABI constexpr bool __is_ascii(char32_t __c) { return __c < 0x80; }
872
873 /// Determines the number of output columns needed to render the input.
874 ///
875 /// \note When the scanner encounters malformed Unicode it acts as-if every
876 /// code unit is a one column code point. Typically a terminal uses the same
877 /// strategy and replaces every malformed code unit with a one column
878 /// replacement character.
879 ///
880 /// \param __first Points to the first element of the input range.
881 /// \param __last Points beyond the last element of the input range.
882 /// \param __maximum The maximum number of output columns. The returned number
883 /// of estimated output columns will not exceed this value.
884 /// \param __rounding Selects the rounding method.
885 /// \c __down result.__width_ <= __maximum
886 /// \c __up result.__width_ <= __maximum + 1
887 template <class _CharT>
__estimate_column_width(basic_string_view<_CharT> __str,size_t __maximum,__column_width_rounding __rounding)888 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> __estimate_column_width(
889 basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding __rounding) noexcept {
890 // The width estimation is done in two steps:
891 // - Quickly process for the ASCII part. ASCII has the following properties
892 // - One code unit is one code point
893 // - Every code point has an estimated width of one
894 // - When needed it will a Unicode Grapheme clustering algorithm to find
895 // the proper place for truncation.
896
897 if (__str.empty() || __maximum == 0)
898 return {0, __str.begin()};
899
900 // ASCII has one caveat; when an ASCII character is followed by a non-ASCII
901 // character they might be part of an extended grapheme cluster. For example:
902 // an ASCII letter and a COMBINING ACUTE ACCENT
903 // The truncate should happen after the COMBINING ACUTE ACCENT. Therefore we
904 // need to scan one code unit beyond the requested precision. When this code
905 // unit is non-ASCII we omit the current code unit and let the Grapheme
906 // clustering algorithm do its work.
907 const _CharT* __it = __str.begin();
908 if (__format_spec::__is_ascii(*__it)) {
909 do {
910 --__maximum;
911 ++__it;
912 if (__it == __str.end())
913 return {__str.size(), __str.end()};
914
915 if (__maximum == 0) {
916 if (__format_spec::__is_ascii(*__it))
917 return {static_cast<size_t>(__it - __str.begin()), __it};
918
919 break;
920 }
921 } while (__format_spec::__is_ascii(*__it));
922 --__it;
923 ++__maximum;
924 }
925
926 ptrdiff_t __ascii_size = __it - __str.begin();
927 __column_width_result __result =
928 __detail::__estimate_column_width_grapheme_clustering(__it, __str.end(), __maximum, __rounding);
929
930 __result.__width_ += __ascii_size;
931 return __result;
932 }
933 # else // !defined(_LIBCPP_HAS_NO_UNICODE)
934 template <class _CharT>
935 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
__estimate_column_width(basic_string_view<_CharT> __str,size_t __maximum,__column_width_rounding)936 __estimate_column_width(basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding) noexcept {
937 // When Unicode isn't supported assume ASCII and every code unit is one code
938 // point. In ASCII the estimated column width is always one. Thus there's no
939 // need for rounding.
940 size_t __width_ = _VSTD::min(__str.size(), __maximum);
941 return {__width_, __str.begin() + __width_};
942 }
943
944 # endif // !defined(_LIBCPP_HAS_NO_UNICODE)
945
946 } // namespace __format_spec
947
948 #endif //_LIBCPP_STD_VER > 17
949
950 _LIBCPP_END_NAMESPACE_STD
951
952 _LIBCPP_POP_MACROS
953
954 #endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
955