1 // -*- C++ -*- 2 //===----------------------------------------------------------------------===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H 11 #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H 12 13 /// \file Contains the std-format-spec parser. 14 /// 15 /// Most of the code can be reused in the chrono-format-spec. 16 /// This header has some support for the chrono-format-spec since it doesn't 17 /// affect the std-format-spec. 18 19 #include <__algorithm/find_if.h> 20 #include <__algorithm/min.h> 21 #include <__assert> 22 #include <__concepts/arithmetic.h> 23 #include <__concepts/same_as.h> 24 #include <__config> 25 #include <__debug> 26 #include <__format/format_arg.h> 27 #include <__format/format_error.h> 28 #include <__format/format_parse_context.h> 29 #include <__format/format_string.h> 30 #include <__format/unicode.h> 31 #include <__variant/monostate.h> 32 #include <bit> 33 #include <cstdint> 34 #include <string_view> 35 #include <type_traits> 36 37 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 38 # pragma GCC system_header 39 #endif 40 41 _LIBCPP_PUSH_MACROS 42 #include <__undef_macros> 43 44 _LIBCPP_BEGIN_NAMESPACE_STD 45 46 #if _LIBCPP_STD_VER > 17 47 48 namespace __format_spec { 49 50 template <class _CharT> 51 _LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result< _CharT> 52 __parse_arg_id(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) { 53 // This function is a wrapper to call the real parser. But it does the 54 // validation for the pre-conditions and post-conditions. 55 if (__begin == __end) 56 std::__throw_format_error("End of input while parsing format-spec arg-id"); 57 58 __format::__parse_number_result __r = __format::__parse_arg_id(__begin, __end, __parse_ctx); 59 60 if (__r.__ptr == __end || *__r.__ptr != _CharT('}')) 61 std::__throw_format_error("Invalid arg-id"); 62 63 ++__r.__ptr; 64 return __r; 65 } 66 67 template <class _Context> 68 _LIBCPP_HIDE_FROM_ABI constexpr uint32_t 69 __substitute_arg_id(basic_format_arg<_Context> __format_arg) { 70 // [format.string.std]/8 71 // If the corresponding formatting argument is not of integral type... 72 // This wording allows char and bool too. LWG-3720 changes the wording to 73 // If the corresponding formatting argument is not of standard signed or 74 // unsigned integer type, 75 // This means the 128-bit will not be valid anymore. 76 // TODO FMT Verify this resolution is accepted and add a test to verify 77 // 128-bit integrals fail and switch to visit_format_arg. 78 return _VSTD::__visit_format_arg( 79 [](auto __arg) -> uint32_t { 80 using _Type = decltype(__arg); 81 if constexpr (integral<_Type>) { 82 if constexpr (signed_integral<_Type>) { 83 if (__arg < 0) 84 std::__throw_format_error("A format-spec arg-id replacement shouldn't have a negative value"); 85 } 86 87 using _CT = common_type_t<_Type, decltype(__format::__number_max)>; 88 if (static_cast<_CT>(__arg) > 89 static_cast<_CT>(__format::__number_max)) 90 std::__throw_format_error("A format-spec arg-id replacement exceeds the maximum supported value"); 91 92 return __arg; 93 } else if constexpr (same_as<_Type, monostate>) 94 std::__throw_format_error("Argument index out of bounds"); 95 else 96 std::__throw_format_error("A format-spec arg-id replacement argument isn't an integral type"); 97 }, 98 __format_arg); 99 } 100 101 /// These fields are a filter for which elements to parse. 102 /// 103 /// They default to false so when a new field is added it needs to be opted in 104 /// explicitly. 105 // TODO FMT Use an ABI tag for this struct. 106 struct __fields { 107 uint8_t __sign_ : 1 {false}; 108 uint8_t __alternate_form_ : 1 {false}; 109 uint8_t __zero_padding_ : 1 {false}; 110 uint8_t __precision_ : 1 {false}; 111 uint8_t __locale_specific_form_ : 1 {false}; 112 uint8_t __type_ : 1 {false}; 113 // Determines the valid values for fill. 114 // 115 // Originally the fill could be any character except { and }. Range-based 116 // formatters use the colon to mark the beginning of the 117 // underlying-format-spec. To avoid parsing ambiguities these formatter 118 // specializations prohibit the use of the colon as a fill character. 119 uint8_t __allow_colon_in_fill_ : 1 {false}; 120 }; 121 122 // By not placing this constant in the formatter class it's not duplicated for 123 // char and wchar_t. 124 inline constexpr __fields __fields_integral{ 125 .__sign_ = true, 126 .__alternate_form_ = true, 127 .__zero_padding_ = true, 128 .__locale_specific_form_ = true, 129 .__type_ = true}; 130 inline constexpr __fields __fields_floating_point{ 131 .__sign_ = true, 132 .__alternate_form_ = true, 133 .__zero_padding_ = true, 134 .__precision_ = true, 135 .__locale_specific_form_ = true, 136 .__type_ = true}; 137 inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true}; 138 inline constexpr __fields __fields_pointer{.__type_ = true}; 139 140 # if _LIBCPP_STD_VER > 20 141 inline constexpr __fields __fields_tuple{.__type_ = false, .__allow_colon_in_fill_ = true}; 142 inline constexpr __fields __fields_range{.__type_ = false, .__allow_colon_in_fill_ = true}; 143 # endif 144 145 enum class _LIBCPP_ENUM_VIS __alignment : uint8_t { 146 /// No alignment is set in the format string. 147 __default, 148 __left, 149 __center, 150 __right, 151 __zero_padding 152 }; 153 154 enum class _LIBCPP_ENUM_VIS __sign : uint8_t { 155 /// No sign is set in the format string. 156 /// 157 /// The sign isn't allowed for certain format-types. By using this value 158 /// it's possible to detect whether or not the user explicitly set the sign 159 /// flag. For formatting purposes it behaves the same as \ref __minus. 160 __default, 161 __minus, 162 __plus, 163 __space 164 }; 165 166 enum class _LIBCPP_ENUM_VIS __type : uint8_t { 167 __default, 168 __string, 169 __binary_lower_case, 170 __binary_upper_case, 171 __octal, 172 __decimal, 173 __hexadecimal_lower_case, 174 __hexadecimal_upper_case, 175 __pointer, 176 __char, 177 __hexfloat_lower_case, 178 __hexfloat_upper_case, 179 __scientific_lower_case, 180 __scientific_upper_case, 181 __fixed_lower_case, 182 __fixed_upper_case, 183 __general_lower_case, 184 __general_upper_case, 185 __debug 186 }; 187 188 struct __std { 189 __alignment __alignment_ : 3; 190 __sign __sign_ : 2; 191 bool __alternate_form_ : 1; 192 bool __locale_specific_form_ : 1; 193 __type __type_; 194 }; 195 196 struct __chrono { 197 __alignment __alignment_ : 3; 198 bool __locale_specific_form_ : 1; 199 bool __weekday_name_ : 1; 200 bool __weekday_ : 1; 201 bool __day_of_year_ : 1; 202 bool __week_of_year_ : 1; 203 bool __month_name_ : 1; 204 }; 205 206 /// Contains the parsed formatting specifications. 207 /// 208 /// This contains information for both the std-format-spec and the 209 /// chrono-format-spec. This results in some unused members for both 210 /// specifications. However these unused members don't increase the size 211 /// of the structure. 212 /// 213 /// This struct doesn't cross ABI boundaries so its layout doesn't need to be 214 /// kept stable. 215 template <class _CharT> 216 struct __parsed_specifications { 217 union { 218 // The field __alignment_ is the first element in __std_ and __chrono_. 219 // This allows the code to always inspect this value regards which member 220 // of the union is the active member [class.union.general]/2. 221 // 222 // This is needed since the generic output routines handle the alignment of 223 // the output. 224 __alignment __alignment_ : 3; 225 __std __std_; 226 __chrono __chrono_; 227 }; 228 229 /// The requested width. 230 /// 231 /// When the format-spec used an arg-id for this field it has already been 232 /// replaced with the value of that arg-id. 233 int32_t __width_; 234 235 /// The requested precision. 236 /// 237 /// When the format-spec used an arg-id for this field it has already been 238 /// replaced with the value of that arg-id. 239 int32_t __precision_; 240 241 _CharT __fill_; 242 243 _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; } 244 245 _LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; } 246 }; 247 248 // Validate the struct is small and cheap to copy since the struct is passed by 249 // value in formatting functions. 250 static_assert(sizeof(__parsed_specifications<char>) == 16); 251 static_assert(is_trivially_copyable_v<__parsed_specifications<char>>); 252 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS 253 static_assert(sizeof(__parsed_specifications<wchar_t>) == 16); 254 static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>); 255 # endif 256 257 /// The parser for the std-format-spec. 258 /// 259 /// Note this class is a member of std::formatter specializations. It's 260 /// expected developers will create their own formatter specializations that 261 /// inherit from the std::formatter specializations. This means this class 262 /// must be ABI stable. To aid the stability the unused bits in the class are 263 /// set to zero. That way they can be repurposed if a future revision of the 264 /// Standards adds new fields to std-format-spec. 265 template <class _CharT> 266 class _LIBCPP_TEMPLATE_VIS __parser { 267 public: 268 _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(basic_format_parse_context<_CharT>& __parse_ctx, __fields __fields) 269 -> decltype(__parse_ctx.begin()) { 270 271 const _CharT* __begin = __parse_ctx.begin(); 272 const _CharT* __end = __parse_ctx.end(); 273 if (__begin == __end) 274 return __begin; 275 276 if (__parse_fill_align(__begin, __end, __fields.__allow_colon_in_fill_) && __begin == __end) 277 return __begin; 278 279 if (__fields.__sign_ && __parse_sign(__begin) && __begin == __end) 280 return __begin; 281 282 if (__fields.__alternate_form_ && __parse_alternate_form(__begin) && __begin == __end) 283 return __begin; 284 285 if (__fields.__zero_padding_ && __parse_zero_padding(__begin) && __begin == __end) 286 return __begin; 287 288 if (__parse_width(__begin, __end, __parse_ctx) && __begin == __end) 289 return __begin; 290 291 if (__fields.__precision_ && __parse_precision(__begin, __end, __parse_ctx) && __begin == __end) 292 return __begin; 293 294 if (__fields.__locale_specific_form_ && __parse_locale_specific_form(__begin) && __begin == __end) 295 return __begin; 296 297 if (__fields.__type_) { 298 __parse_type(__begin); 299 300 // When __type_ is false the calling parser is expected to do additional 301 // parsing. In that case that parser should do the end of format string 302 // validation. 303 if (__begin != __end && *__begin != _CharT('}')) 304 std::__throw_format_error("The format-spec should consume the input or end with a '}'"); 305 } 306 307 return __begin; 308 } 309 310 /// \returns the `__parsed_specifications` with the resolved dynamic sizes.. 311 _LIBCPP_HIDE_FROM_ABI 312 __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const { 313 return __parsed_specifications<_CharT>{ 314 .__std_ = __std{.__alignment_ = __alignment_, 315 .__sign_ = __sign_, 316 .__alternate_form_ = __alternate_form_, 317 .__locale_specific_form_ = __locale_specific_form_, 318 .__type_ = __type_}, 319 .__width_{__get_width(__ctx)}, 320 .__precision_{__get_precision(__ctx)}, 321 .__fill_{__fill_}}; 322 } 323 324 _LIBCPP_HIDE_FROM_ABI __parsed_specifications<_CharT> __get_parsed_chrono_specifications(auto& __ctx) const { 325 return __parsed_specifications<_CharT>{ 326 .__chrono_ = 327 __chrono{.__alignment_ = __alignment_, 328 .__locale_specific_form_ = __locale_specific_form_, 329 .__weekday_name_ = __weekday_name_, 330 .__weekday_ = __weekday_, 331 .__day_of_year_ = __day_of_year_, 332 .__week_of_year_ = __week_of_year_, 333 .__month_name_ = __month_name_}, 334 .__width_{__get_width(__ctx)}, 335 .__precision_{__get_precision(__ctx)}, 336 .__fill_{__fill_}}; 337 } 338 339 __alignment __alignment_ : 3 {__alignment::__default}; 340 __sign __sign_ : 2 {__sign::__default}; 341 bool __alternate_form_ : 1 {false}; 342 bool __locale_specific_form_ : 1 {false}; 343 bool __reserved_0_ : 1 {false}; 344 __type __type_{__type::__default}; 345 346 // These flags are only used for formatting chrono. Since the struct has 347 // padding space left it's added to this structure. 348 bool __weekday_name_ : 1 {false}; 349 bool __weekday_ : 1 {false}; 350 351 bool __day_of_year_ : 1 {false}; 352 bool __week_of_year_ : 1 {false}; 353 354 bool __month_name_ : 1 {false}; 355 356 uint8_t __reserved_1_ : 3 {0}; 357 uint8_t __reserved_2_ : 6 {0}; 358 // These two flags are only used internally and not part of the 359 // __parsed_specifications. Therefore put them at the end. 360 bool __width_as_arg_ : 1 {false}; 361 bool __precision_as_arg_ : 1 {false}; 362 363 /// The requested width, either the value or the arg-id. 364 int32_t __width_{0}; 365 366 /// The requested precision, either the value or the arg-id. 367 int32_t __precision_{-1}; 368 369 // LWG 3576 will probably change this to always accept a Unicode code point 370 // To avoid changing the size with that change align the field so when it 371 // becomes 32-bit its alignment will remain the same. That also means the 372 // size will remain the same. (D2572 addresses the solution for LWG 3576.) 373 _CharT __fill_{_CharT(' ')}; 374 375 private: 376 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) { 377 switch (__c) { 378 case _CharT('<'): 379 __alignment_ = __alignment::__left; 380 return true; 381 382 case _CharT('^'): 383 __alignment_ = __alignment::__center; 384 return true; 385 386 case _CharT('>'): 387 __alignment_ = __alignment::__right; 388 return true; 389 } 390 return false; 391 } 392 393 // range-fill and tuple-fill are identical 394 _LIBCPP_HIDE_FROM_ABI constexpr bool 395 __parse_fill_align(const _CharT*& __begin, const _CharT* __end, bool __use_range_fill) { 396 _LIBCPP_ASSERT(__begin != __end, "when called with an empty input the function will cause " 397 "undefined behavior by evaluating data not in the input"); 398 if (__begin + 1 != __end) { 399 if (__parse_alignment(*(__begin + 1))) { 400 if (__use_range_fill && (*__begin == _CharT('{') || *__begin == _CharT('}') || *__begin == _CharT(':'))) 401 std::__throw_format_error("The format-spec range-fill field contains an invalid character"); 402 else if (*__begin == _CharT('{') || *__begin == _CharT('}')) 403 std::__throw_format_error("The format-spec fill field contains an invalid character"); 404 405 __fill_ = *__begin; 406 __begin += 2; 407 return true; 408 } 409 } 410 411 if (!__parse_alignment(*__begin)) 412 return false; 413 414 ++__begin; 415 return true; 416 } 417 418 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(const _CharT*& __begin) { 419 switch (*__begin) { 420 case _CharT('-'): 421 __sign_ = __sign::__minus; 422 break; 423 case _CharT('+'): 424 __sign_ = __sign::__plus; 425 break; 426 case _CharT(' '): 427 __sign_ = __sign::__space; 428 break; 429 default: 430 return false; 431 } 432 ++__begin; 433 return true; 434 } 435 436 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(const _CharT*& __begin) { 437 if (*__begin != _CharT('#')) 438 return false; 439 440 __alternate_form_ = true; 441 ++__begin; 442 return true; 443 } 444 445 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(const _CharT*& __begin) { 446 if (*__begin != _CharT('0')) 447 return false; 448 449 if (__alignment_ == __alignment::__default) 450 __alignment_ = __alignment::__zero_padding; 451 ++__begin; 452 return true; 453 } 454 455 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(const _CharT*& __begin, const _CharT* __end, auto& __parse_ctx) { 456 if (*__begin == _CharT('0')) 457 std::__throw_format_error("A format-spec width field shouldn't have a leading zero"); 458 459 if (*__begin == _CharT('{')) { 460 __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx); 461 __width_as_arg_ = true; 462 __width_ = __r.__value; 463 __begin = __r.__ptr; 464 return true; 465 } 466 467 if (*__begin < _CharT('0') || *__begin > _CharT('9')) 468 return false; 469 470 __format::__parse_number_result __r = __format::__parse_number(__begin, __end); 471 __width_ = __r.__value; 472 _LIBCPP_ASSERT(__width_ != 0, "A zero value isn't allowed and should be impossible, " 473 "due to validations in this function"); 474 __begin = __r.__ptr; 475 return true; 476 } 477 478 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(const _CharT*& __begin, const _CharT* __end, 479 auto& __parse_ctx) { 480 if (*__begin != _CharT('.')) 481 return false; 482 483 ++__begin; 484 if (__begin == __end) 485 std::__throw_format_error("End of input while parsing format-spec precision"); 486 487 if (*__begin == _CharT('{')) { 488 __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx); 489 __precision_as_arg_ = true; 490 __precision_ = __arg_id.__value; 491 __begin = __arg_id.__ptr; 492 return true; 493 } 494 495 if (*__begin < _CharT('0') || *__begin > _CharT('9')) 496 std::__throw_format_error("The format-spec precision field doesn't contain a value or arg-id"); 497 498 __format::__parse_number_result __r = __format::__parse_number(__begin, __end); 499 __precision_ = __r.__value; 500 __precision_as_arg_ = false; 501 __begin = __r.__ptr; 502 return true; 503 } 504 505 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(const _CharT*& __begin) { 506 if (*__begin != _CharT('L')) 507 return false; 508 509 __locale_specific_form_ = true; 510 ++__begin; 511 return true; 512 } 513 514 _LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(const _CharT*& __begin) { 515 // Determines the type. It does not validate whether the selected type is 516 // valid. Most formatters have optional fields that are only allowed for 517 // certain types. These parsers need to do validation after the type has 518 // been parsed. So its easier to implement the validation for all types in 519 // the specific parse function. 520 switch (*__begin) { 521 case 'A': 522 __type_ = __type::__hexfloat_upper_case; 523 break; 524 case 'B': 525 __type_ = __type::__binary_upper_case; 526 break; 527 case 'E': 528 __type_ = __type::__scientific_upper_case; 529 break; 530 case 'F': 531 __type_ = __type::__fixed_upper_case; 532 break; 533 case 'G': 534 __type_ = __type::__general_upper_case; 535 break; 536 case 'X': 537 __type_ = __type::__hexadecimal_upper_case; 538 break; 539 case 'a': 540 __type_ = __type::__hexfloat_lower_case; 541 break; 542 case 'b': 543 __type_ = __type::__binary_lower_case; 544 break; 545 case 'c': 546 __type_ = __type::__char; 547 break; 548 case 'd': 549 __type_ = __type::__decimal; 550 break; 551 case 'e': 552 __type_ = __type::__scientific_lower_case; 553 break; 554 case 'f': 555 __type_ = __type::__fixed_lower_case; 556 break; 557 case 'g': 558 __type_ = __type::__general_lower_case; 559 break; 560 case 'o': 561 __type_ = __type::__octal; 562 break; 563 case 'p': 564 __type_ = __type::__pointer; 565 break; 566 case 's': 567 __type_ = __type::__string; 568 break; 569 case 'x': 570 __type_ = __type::__hexadecimal_lower_case; 571 break; 572 # if _LIBCPP_STD_VER > 20 573 case '?': 574 __type_ = __type::__debug; 575 break; 576 # endif 577 default: 578 return; 579 } 580 ++__begin; 581 } 582 583 _LIBCPP_HIDE_FROM_ABI 584 int32_t __get_width(auto& __ctx) const { 585 if (!__width_as_arg_) 586 return __width_; 587 588 return __format_spec::__substitute_arg_id(__ctx.arg(__width_)); 589 } 590 591 _LIBCPP_HIDE_FROM_ABI 592 int32_t __get_precision(auto& __ctx) const { 593 if (!__precision_as_arg_) 594 return __precision_; 595 596 return __format_spec::__substitute_arg_id(__ctx.arg(__precision_)); 597 } 598 }; 599 600 // Validates whether the reserved bitfields don't change the size. 601 static_assert(sizeof(__parser<char>) == 16); 602 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS 603 static_assert(sizeof(__parser<wchar_t>) == 16); 604 # endif 605 606 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) { 607 switch (__type) { 608 case __format_spec::__type::__default: 609 case __format_spec::__type::__string: 610 case __format_spec::__type::__debug: 611 break; 612 613 default: 614 std::__throw_format_error("The format-spec type has a type not supported for a string argument"); 615 } 616 } 617 618 template <class _CharT> 619 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_bool_string(__parser<_CharT>& __parser) { 620 if (__parser.__sign_ != __sign::__default) 621 std::__throw_format_error("A sign field isn't allowed in this format-spec"); 622 623 if (__parser.__alternate_form_) 624 std::__throw_format_error("An alternate form field isn't allowed in this format-spec"); 625 626 if (__parser.__alignment_ == __alignment::__zero_padding) 627 std::__throw_format_error("A zero-padding field isn't allowed in this format-spec"); 628 629 if (__parser.__alignment_ == __alignment::__default) 630 __parser.__alignment_ = __alignment::__left; 631 } 632 633 template <class _CharT> 634 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT>& __parser) { 635 __format_spec::__process_display_type_bool_string(__parser); 636 } 637 638 template <class _CharT> 639 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser) { 640 switch (__parser.__type_) { 641 case __format_spec::__type::__default: 642 case __format_spec::__type::__string: 643 __format_spec::__process_display_type_bool_string(__parser); 644 break; 645 646 case __format_spec::__type::__binary_lower_case: 647 case __format_spec::__type::__binary_upper_case: 648 case __format_spec::__type::__octal: 649 case __format_spec::__type::__decimal: 650 case __format_spec::__type::__hexadecimal_lower_case: 651 case __format_spec::__type::__hexadecimal_upper_case: 652 break; 653 654 default: 655 std::__throw_format_error("The format-spec type has a type not supported for a bool argument"); 656 } 657 } 658 659 template <class _CharT> 660 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser) { 661 switch (__parser.__type_) { 662 case __format_spec::__type::__default: 663 case __format_spec::__type::__char: 664 case __format_spec::__type::__debug: 665 __format_spec::__process_display_type_char(__parser); 666 break; 667 668 case __format_spec::__type::__binary_lower_case: 669 case __format_spec::__type::__binary_upper_case: 670 case __format_spec::__type::__octal: 671 case __format_spec::__type::__decimal: 672 case __format_spec::__type::__hexadecimal_lower_case: 673 case __format_spec::__type::__hexadecimal_upper_case: 674 break; 675 676 default: 677 std::__throw_format_error("The format-spec type has a type not supported for a char argument"); 678 } 679 } 680 681 template <class _CharT> 682 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser) { 683 switch (__parser.__type_) { 684 case __format_spec::__type::__default: 685 case __format_spec::__type::__binary_lower_case: 686 case __format_spec::__type::__binary_upper_case: 687 case __format_spec::__type::__octal: 688 case __format_spec::__type::__decimal: 689 case __format_spec::__type::__hexadecimal_lower_case: 690 case __format_spec::__type::__hexadecimal_upper_case: 691 break; 692 693 case __format_spec::__type::__char: 694 __format_spec::__process_display_type_char(__parser); 695 break; 696 697 default: 698 std::__throw_format_error("The format-spec type has a type not supported for an integer argument"); 699 } 700 } 701 702 template <class _CharT> 703 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_floating_point(__parser<_CharT>& __parser) { 704 switch (__parser.__type_) { 705 case __format_spec::__type::__default: 706 case __format_spec::__type::__hexfloat_lower_case: 707 case __format_spec::__type::__hexfloat_upper_case: 708 // Precision specific behavior will be handled later. 709 break; 710 case __format_spec::__type::__scientific_lower_case: 711 case __format_spec::__type::__scientific_upper_case: 712 case __format_spec::__type::__fixed_lower_case: 713 case __format_spec::__type::__fixed_upper_case: 714 case __format_spec::__type::__general_lower_case: 715 case __format_spec::__type::__general_upper_case: 716 if (!__parser.__precision_as_arg_ && __parser.__precision_ == -1) 717 // Set the default precision for the call to to_chars. 718 __parser.__precision_ = 6; 719 break; 720 721 default: 722 std::__throw_format_error("The format-spec type has a type not supported for a floating-point argument"); 723 } 724 } 725 726 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spec::__type __type) { 727 switch (__type) { 728 case __format_spec::__type::__default: 729 case __format_spec::__type::__pointer: 730 break; 731 732 default: 733 std::__throw_format_error("The format-spec type has a type not supported for a pointer argument"); 734 } 735 } 736 737 template <class _CharT> 738 struct __column_width_result { 739 /// The number of output columns. 740 size_t __width_; 741 /// One beyond the last code unit used in the estimation. 742 /// 743 /// This limits the original output to fit in the wanted number of columns. 744 const _CharT* __last_; 745 }; 746 747 template <class _CharT> 748 __column_width_result(size_t, const _CharT*) -> __column_width_result<_CharT>; 749 750 /// Since a column width can be two it's possible that the requested column 751 /// width can't be achieved. Depending on the intended usage the policy can be 752 /// selected. 753 /// - When used as precision the maximum width may not be exceeded and the 754 /// result should be "rounded down" to the previous boundary. 755 /// - When used as a width we're done once the minimum is reached, but 756 /// exceeding is not an issue. Rounding down is an issue since that will 757 /// result in writing fill characters. Therefore the result needs to be 758 /// "rounded up". 759 enum class __column_width_rounding { __down, __up }; 760 761 # ifndef _LIBCPP_HAS_NO_UNICODE 762 763 namespace __detail { 764 765 /// Converts a code point to the column width. 766 /// 767 /// The estimations are conforming to [format.string.general]/11 768 /// 769 /// This version expects a value less than 0x1'0000, which is a 3-byte UTF-8 770 /// character. 771 _LIBCPP_HIDE_FROM_ABI constexpr int __column_width_3(uint32_t __c) noexcept { 772 _LIBCPP_ASSERT(__c < 0x10000, "Use __column_width_4 or __column_width for larger values"); 773 774 // clang-format off 775 return 1 + (__c >= 0x1100 && (__c <= 0x115f || 776 (__c >= 0x2329 && (__c <= 0x232a || 777 (__c >= 0x2e80 && (__c <= 0x303e || 778 (__c >= 0x3040 && (__c <= 0xa4cf || 779 (__c >= 0xac00 && (__c <= 0xd7a3 || 780 (__c >= 0xf900 && (__c <= 0xfaff || 781 (__c >= 0xfe10 && (__c <= 0xfe19 || 782 (__c >= 0xfe30 && (__c <= 0xfe6f || 783 (__c >= 0xff00 && (__c <= 0xff60 || 784 (__c >= 0xffe0 && (__c <= 0xffe6 785 )))))))))))))))))))); 786 // clang-format on 787 } 788 789 /// @overload 790 /// 791 /// This version expects a value greater than or equal to 0x1'0000, which is a 792 /// 4-byte UTF-8 character. 793 _LIBCPP_HIDE_FROM_ABI constexpr int __column_width_4(uint32_t __c) noexcept { 794 _LIBCPP_ASSERT(__c >= 0x10000, "Use __column_width_3 or __column_width for smaller values"); 795 796 // clang-format off 797 return 1 + (__c >= 0x1'f300 && (__c <= 0x1'f64f || 798 (__c >= 0x1'f900 && (__c <= 0x1'f9ff || 799 (__c >= 0x2'0000 && (__c <= 0x2'fffd || 800 (__c >= 0x3'0000 && (__c <= 0x3'fffd 801 )))))))); 802 // clang-format on 803 } 804 805 /// @overload 806 /// 807 /// The general case, accepting all values. 808 _LIBCPP_HIDE_FROM_ABI constexpr int __column_width(uint32_t __c) noexcept { 809 if (__c < 0x10000) 810 return __detail::__column_width_3(__c); 811 812 return __detail::__column_width_4(__c); 813 } 814 815 template <class _CharT> 816 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> __estimate_column_width_grapheme_clustering( 817 const _CharT* __first, const _CharT* __last, size_t __maximum, __column_width_rounding __rounding) noexcept { 818 __unicode::__extended_grapheme_cluster_view<_CharT> __view{__first, __last}; 819 820 __column_width_result<_CharT> __result{0, __first}; 821 while (__result.__last_ != __last && __result.__width_ <= __maximum) { 822 typename __unicode::__extended_grapheme_cluster_view<_CharT>::__cluster __cluster = __view.__consume(); 823 int __width = __detail::__column_width(__cluster.__code_point_); 824 825 // When the next entry would exceed the maximum width the previous width 826 // might be returned. For example when a width of 100 is requested the 827 // returned width might be 99, since the next code point has an estimated 828 // column width of 2. This depends on the rounding flag. 829 // When the maximum is exceeded the loop will abort the next iteration. 830 if (__rounding == __column_width_rounding::__down && __result.__width_ + __width > __maximum) 831 return __result; 832 833 __result.__width_ += __width; 834 __result.__last_ = __cluster.__last_; 835 } 836 837 return __result; 838 } 839 840 } // namespace __detail 841 842 // Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32. 843 // Depending on format the relation between the number of code units stored and 844 // the number of output columns differs. The first relation is the number of 845 // code units forming a code point. (The text assumes the code units are 846 // unsigned.) 847 // - UTF-8 The number of code units is between one and four. The first 127 848 // Unicode code points match the ASCII character set. When the highest bit is 849 // set it means the code point has more than one code unit. 850 // - UTF-16: The number of code units is between 1 and 2. When the first 851 // code unit is in the range [0xd800,0xdfff) it means the code point uses two 852 // code units. 853 // - UTF-32: The number of code units is always one. 854 // 855 // The code point to the number of columns is specified in 856 // [format.string.std]/11. This list might change in the future. 857 // 858 // Another thing to be taken into account is Grapheme clustering. This means 859 // that in some cases multiple code points are combined one element in the 860 // output. For example: 861 // - an ASCII character with a combined diacritical mark 862 // - an emoji with a skin tone modifier 863 // - a group of combined people emoji to create a family 864 // - a combination of flag emoji 865 // 866 // See also: 867 // - [format.string.general]/11 868 // - https://en.wikipedia.org/wiki/UTF-8#Encoding 869 // - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF 870 871 _LIBCPP_HIDE_FROM_ABI constexpr bool __is_ascii(char32_t __c) { return __c < 0x80; } 872 873 /// Determines the number of output columns needed to render the input. 874 /// 875 /// \note When the scanner encounters malformed Unicode it acts as-if every 876 /// code unit is a one column code point. Typically a terminal uses the same 877 /// strategy and replaces every malformed code unit with a one column 878 /// replacement character. 879 /// 880 /// \param __first Points to the first element of the input range. 881 /// \param __last Points beyond the last element of the input range. 882 /// \param __maximum The maximum number of output columns. The returned number 883 /// of estimated output columns will not exceed this value. 884 /// \param __rounding Selects the rounding method. 885 /// \c __down result.__width_ <= __maximum 886 /// \c __up result.__width_ <= __maximum + 1 887 template <class _CharT> 888 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> __estimate_column_width( 889 basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding __rounding) noexcept { 890 // The width estimation is done in two steps: 891 // - Quickly process for the ASCII part. ASCII has the following properties 892 // - One code unit is one code point 893 // - Every code point has an estimated width of one 894 // - When needed it will a Unicode Grapheme clustering algorithm to find 895 // the proper place for truncation. 896 897 if (__str.empty() || __maximum == 0) 898 return {0, __str.begin()}; 899 900 // ASCII has one caveat; when an ASCII character is followed by a non-ASCII 901 // character they might be part of an extended grapheme cluster. For example: 902 // an ASCII letter and a COMBINING ACUTE ACCENT 903 // The truncate should happen after the COMBINING ACUTE ACCENT. Therefore we 904 // need to scan one code unit beyond the requested precision. When this code 905 // unit is non-ASCII we omit the current code unit and let the Grapheme 906 // clustering algorithm do its work. 907 const _CharT* __it = __str.begin(); 908 if (__format_spec::__is_ascii(*__it)) { 909 do { 910 --__maximum; 911 ++__it; 912 if (__it == __str.end()) 913 return {__str.size(), __str.end()}; 914 915 if (__maximum == 0) { 916 if (__format_spec::__is_ascii(*__it)) 917 return {static_cast<size_t>(__it - __str.begin()), __it}; 918 919 break; 920 } 921 } while (__format_spec::__is_ascii(*__it)); 922 --__it; 923 ++__maximum; 924 } 925 926 ptrdiff_t __ascii_size = __it - __str.begin(); 927 __column_width_result __result = 928 __detail::__estimate_column_width_grapheme_clustering(__it, __str.end(), __maximum, __rounding); 929 930 __result.__width_ += __ascii_size; 931 return __result; 932 } 933 # else // !defined(_LIBCPP_HAS_NO_UNICODE) 934 template <class _CharT> 935 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> 936 __estimate_column_width(basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding) noexcept { 937 // When Unicode isn't supported assume ASCII and every code unit is one code 938 // point. In ASCII the estimated column width is always one. Thus there's no 939 // need for rounding. 940 size_t __width_ = _VSTD::min(__str.size(), __maximum); 941 return {__width_, __str.begin() + __width_}; 942 } 943 944 # endif // !defined(_LIBCPP_HAS_NO_UNICODE) 945 946 } // namespace __format_spec 947 948 #endif //_LIBCPP_STD_VER > 17 949 950 _LIBCPP_END_NAMESPACE_STD 951 952 _LIBCPP_POP_MACROS 953 954 #endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H 955