1 // -*- C++ -*- 2 //===----------------------------------------------------------------------===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H 11 #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H 12 13 /// \file Contains the std-format-spec parser. 14 /// 15 /// Most of the code can be reused in the chrono-format-spec. 16 /// This header has some support for the chrono-format-spec since it doesn't 17 /// affect the std-format-spec. 18 19 #include <__algorithm/find_if.h> 20 #include <__algorithm/min.h> 21 #include <__assert> 22 #include <__config> 23 #include <__debug> 24 #include <__format/format_arg.h> 25 #include <__format/format_error.h> 26 #include <__format/format_parse_context.h> 27 #include <__format/format_string.h> 28 #include <__format/unicode.h> 29 #include <__variant/monostate.h> 30 #include <bit> 31 #include <concepts> 32 #include <cstdint> 33 #include <string_view> 34 #include <type_traits> 35 36 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 37 # pragma GCC system_header 38 #endif 39 40 _LIBCPP_PUSH_MACROS 41 #include <__undef_macros> 42 43 _LIBCPP_BEGIN_NAMESPACE_STD 44 45 #if _LIBCPP_STD_VER > 17 46 47 namespace __format_spec { 48 49 template <class _CharT> 50 _LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result< _CharT> 51 __parse_arg_id(const _CharT* __begin, const _CharT* __end, auto& __parse_ctx) { 52 // This function is a wrapper to call the real parser. But it does the 53 // validation for the pre-conditions and post-conditions. 54 if (__begin == __end) 55 __throw_format_error("End of input while parsing format-spec arg-id"); 56 57 __format::__parse_number_result __r = 58 __format::__parse_arg_id(__begin, __end, __parse_ctx); 59 60 if (__r.__ptr == __end || *__r.__ptr != _CharT('}')) 61 __throw_format_error("Invalid arg-id"); 62 63 ++__r.__ptr; 64 return __r; 65 } 66 67 template <class _Context> 68 _LIBCPP_HIDE_FROM_ABI constexpr uint32_t 69 __substitute_arg_id(basic_format_arg<_Context> __format_arg) { 70 return visit_format_arg( 71 [](auto __arg) -> uint32_t { 72 using _Type = decltype(__arg); 73 if constexpr (integral<_Type>) { 74 if constexpr (signed_integral<_Type>) { 75 if (__arg < 0) 76 __throw_format_error("A format-spec arg-id replacement shouldn't " 77 "have a negative value"); 78 } 79 80 using _CT = common_type_t<_Type, decltype(__format::__number_max)>; 81 if (static_cast<_CT>(__arg) > 82 static_cast<_CT>(__format::__number_max)) 83 __throw_format_error("A format-spec arg-id replacement exceeds " 84 "the maximum supported value"); 85 86 return __arg; 87 } else if constexpr (same_as<_Type, monostate>) 88 __throw_format_error("Argument index out of bounds"); 89 else 90 __throw_format_error("A format-spec arg-id replacement argument " 91 "isn't an integral type"); 92 }, 93 __format_arg); 94 } 95 96 /// These fields are a filter for which elements to parse. 97 /// 98 /// They default to false so when a new field is added it needs to be opted in 99 /// explicitly. 100 struct __fields { 101 uint8_t __sign_ : 1 {false}; 102 uint8_t __alternate_form_ : 1 {false}; 103 uint8_t __zero_padding_ : 1 {false}; 104 uint8_t __precision_ : 1 {false}; 105 uint8_t __locale_specific_form_ : 1 {false}; 106 uint8_t __type_ : 1 {false}; 107 }; 108 109 // By not placing this constant in the formatter class it's not duplicated for 110 // char and wchar_t. 111 inline constexpr __fields __fields_integral{ 112 .__sign_ = true, 113 .__alternate_form_ = true, 114 .__zero_padding_ = true, 115 .__locale_specific_form_ = true, 116 .__type_ = true}; 117 inline constexpr __fields __fields_floating_point{ 118 .__sign_ = true, 119 .__alternate_form_ = true, 120 .__zero_padding_ = true, 121 .__precision_ = true, 122 .__locale_specific_form_ = true, 123 .__type_ = true}; 124 inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true}; 125 inline constexpr __fields __fields_pointer{.__type_ = true}; 126 127 enum class _LIBCPP_ENUM_VIS __alignment : uint8_t { 128 /// No alignment is set in the format string. 129 __default, 130 __left, 131 __center, 132 __right, 133 __zero_padding 134 }; 135 136 enum class _LIBCPP_ENUM_VIS __sign : uint8_t { 137 /// No sign is set in the format string. 138 /// 139 /// The sign isn't allowed for certain format-types. By using this value 140 /// it's possible to detect whether or not the user explicitly set the sign 141 /// flag. For formatting purposes it behaves the same as \ref __minus. 142 __default, 143 __minus, 144 __plus, 145 __space 146 }; 147 148 enum class _LIBCPP_ENUM_VIS __type : uint8_t { 149 __default, 150 __string, 151 __binary_lower_case, 152 __binary_upper_case, 153 __octal, 154 __decimal, 155 __hexadecimal_lower_case, 156 __hexadecimal_upper_case, 157 __pointer, 158 __char, 159 __hexfloat_lower_case, 160 __hexfloat_upper_case, 161 __scientific_lower_case, 162 __scientific_upper_case, 163 __fixed_lower_case, 164 __fixed_upper_case, 165 __general_lower_case, 166 __general_upper_case 167 }; 168 169 struct __std { 170 __alignment __alignment_ : 3; 171 __sign __sign_ : 2; 172 bool __alternate_form_ : 1; 173 bool __locale_specific_form_ : 1; 174 __type __type_; 175 }; 176 177 struct __chrono { 178 __alignment __alignment_ : 3; 179 bool __weekday_name_ : 1; 180 bool __month_name_ : 1; 181 }; 182 183 /// Contains the parsed formatting specifications. 184 /// 185 /// This contains information for both the std-format-spec and the 186 /// chrono-format-spec. This results in some unused members for both 187 /// specifications. However these unused members don't increase the size 188 /// of the structure. 189 /// 190 /// This struct doesn't cross ABI boundaries so its layout doesn't need to be 191 /// kept stable. 192 template <class _CharT> 193 struct __parsed_specifications { 194 union { 195 // The field __alignment_ is the first element in __std_ and __chrono_. 196 // This allows the code to always inspect this value regards which member 197 // of the union is the active member [class.union.general]/2. 198 // 199 // This is needed since the generic output routines handle the alignment of 200 // the output. 201 __alignment __alignment_ : 3; 202 __std __std_; 203 __chrono __chrono_; 204 }; 205 206 /// The requested width. 207 /// 208 /// When the format-spec used an arg-id for this field it has already been 209 /// replaced with the value of that arg-id. 210 int32_t __width_; 211 212 /// The requested precision. 213 /// 214 /// When the format-spec used an arg-id for this field it has already been 215 /// replaced with the value of that arg-id. 216 int32_t __precision_; 217 218 _CharT __fill_; 219 220 _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; } 221 222 _LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; } 223 }; 224 225 // Validate the struct is small and cheap to copy since the struct is passed by 226 // value in formatting functions. 227 static_assert(sizeof(__parsed_specifications<char>) == 16); 228 static_assert(is_trivially_copyable_v<__parsed_specifications<char>>); 229 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS 230 static_assert(sizeof(__parsed_specifications<wchar_t>) == 16); 231 static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>); 232 # endif 233 234 /// The parser for the std-format-spec. 235 /// 236 /// Note this class is a member of std::formatter specializations. It's 237 /// expected developers will create their own formatter specializations that 238 /// inherit from the std::formatter specializations. This means this class 239 /// must be ABI stable. To aid the stability the unused bits in the class are 240 /// set to zero. That way they can be repurposed if a future revision of the 241 /// Standards adds new fields to std-format-spec. 242 template <class _CharT> 243 class _LIBCPP_TEMPLATE_VIS __parser { 244 public: 245 _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(basic_format_parse_context<_CharT>& __parse_ctx, __fields __fields) 246 -> decltype(__parse_ctx.begin()) { 247 248 const _CharT* __begin = __parse_ctx.begin(); 249 const _CharT* __end = __parse_ctx.end(); 250 if (__begin == __end) 251 return __begin; 252 253 if (__parse_fill_align(__begin, __end) && __begin == __end) 254 return __begin; 255 256 if (__fields.__sign_ && __parse_sign(__begin) && __begin == __end) 257 return __begin; 258 259 if (__fields.__alternate_form_ && __parse_alternate_form(__begin) && __begin == __end) 260 return __begin; 261 262 if (__fields.__zero_padding_ && __parse_zero_padding(__begin) && __begin == __end) 263 return __begin; 264 265 if (__parse_width(__begin, __end, __parse_ctx) && __begin == __end) 266 return __begin; 267 268 if (__fields.__precision_ && __parse_precision(__begin, __end, __parse_ctx) && __begin == __end) 269 return __begin; 270 271 if (__fields.__locale_specific_form_ && __parse_locale_specific_form(__begin) && __begin == __end) 272 return __begin; 273 274 if (__fields.__type_) { 275 __parse_type(__begin); 276 277 // When __type_ is false the calling parser is expected to do additional 278 // parsing. In that case that parser should do the end of format string 279 // validation. 280 if (__begin != __end && *__begin != _CharT('}')) 281 __throw_format_error("The format-spec should consume the input or end with a '}'"); 282 } 283 284 return __begin; 285 } 286 287 /// \returns the `__parsed_specifications` with the resolved dynamic sizes.. 288 _LIBCPP_HIDE_FROM_ABI 289 __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const { 290 return __parsed_specifications<_CharT>{ 291 .__std_ = 292 __std{.__alignment_ = __alignment_, 293 .__sign_ = __sign_, 294 .__alternate_form_ = __alternate_form_, 295 .__locale_specific_form_ = __locale_specific_form_, 296 .__type_ = __type_}, 297 .__width_{__get_width(__ctx)}, 298 .__precision_{__get_precision(__ctx)}, 299 .__fill_{__fill_}}; 300 } 301 302 __alignment __alignment_ : 3 {__alignment::__default}; 303 __sign __sign_ : 2 {__sign::__default}; 304 bool __alternate_form_ : 1 {false}; 305 bool __locale_specific_form_ : 1 {false}; 306 bool __reserved_0_ : 1 {false}; 307 __type __type_{__type::__default}; 308 309 // These two flags are used for formatting chrono. Since the struct has 310 // padding space left it's added to this structure. 311 bool __weekday_name_ : 1 {false}; 312 bool __month_name_ : 1 {false}; 313 314 uint8_t __reserved_1_ : 6 {0}; 315 uint8_t __reserved_2_ : 6 {0}; 316 // These two flags are only used internally and not part of the 317 // __parsed_specifications. Therefore put them at the end. 318 bool __width_as_arg_ : 1 {false}; 319 bool __precision_as_arg_ : 1 {false}; 320 321 /// The requested width, either the value or the arg-id. 322 int32_t __width_{0}; 323 324 /// The requested precision, either the value or the arg-id. 325 int32_t __precision_{-1}; 326 327 // LWG 3576 will probably change this to always accept a Unicode code point 328 // To avoid changing the size with that change align the field so when it 329 // becomes 32-bit its alignment will remain the same. That also means the 330 // size will remain the same. (D2572 addresses the solution for LWG 3576.) 331 _CharT __fill_{_CharT(' ')}; 332 333 private: 334 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) { 335 switch (__c) { 336 case _CharT('<'): 337 __alignment_ = __alignment::__left; 338 return true; 339 340 case _CharT('^'): 341 __alignment_ = __alignment::__center; 342 return true; 343 344 case _CharT('>'): 345 __alignment_ = __alignment::__right; 346 return true; 347 } 348 return false; 349 } 350 351 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(const _CharT*& __begin, const _CharT* __end) { 352 _LIBCPP_ASSERT(__begin != __end, "when called with an empty input the function will cause " 353 "undefined behavior by evaluating data not in the input"); 354 if (__begin + 1 != __end) { 355 if (__parse_alignment(*(__begin + 1))) { 356 if (*__begin == _CharT('{') || *__begin == _CharT('}')) 357 __throw_format_error("The format-spec fill field contains an invalid character"); 358 359 __fill_ = *__begin; 360 __begin += 2; 361 return true; 362 } 363 } 364 365 if (!__parse_alignment(*__begin)) 366 return false; 367 368 ++__begin; 369 return true; 370 } 371 372 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(const _CharT*& __begin) { 373 switch (*__begin) { 374 case _CharT('-'): 375 __sign_ = __sign::__minus; 376 break; 377 case _CharT('+'): 378 __sign_ = __sign::__plus; 379 break; 380 case _CharT(' '): 381 __sign_ = __sign::__space; 382 break; 383 default: 384 return false; 385 } 386 ++__begin; 387 return true; 388 } 389 390 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(const _CharT*& __begin) { 391 if (*__begin != _CharT('#')) 392 return false; 393 394 __alternate_form_ = true; 395 ++__begin; 396 return true; 397 } 398 399 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(const _CharT*& __begin) { 400 if (*__begin != _CharT('0')) 401 return false; 402 403 if (__alignment_ == __alignment::__default) 404 __alignment_ = __alignment::__zero_padding; 405 ++__begin; 406 return true; 407 } 408 409 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(const _CharT*& __begin, const _CharT* __end, auto& __parse_ctx) { 410 if (*__begin == _CharT('0')) 411 __throw_format_error("A format-spec width field shouldn't have a leading zero"); 412 413 if (*__begin == _CharT('{')) { 414 __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx); 415 __width_as_arg_ = true; 416 __width_ = __r.__value; 417 __begin = __r.__ptr; 418 return true; 419 } 420 421 if (*__begin < _CharT('0') || *__begin > _CharT('9')) 422 return false; 423 424 __format::__parse_number_result __r = __format::__parse_number(__begin, __end); 425 __width_ = __r.__value; 426 _LIBCPP_ASSERT(__width_ != 0, "A zero value isn't allowed and should be impossible, " 427 "due to validations in this function"); 428 __begin = __r.__ptr; 429 return true; 430 } 431 432 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(const _CharT*& __begin, const _CharT* __end, 433 auto& __parse_ctx) { 434 if (*__begin != _CharT('.')) 435 return false; 436 437 ++__begin; 438 if (__begin == __end) 439 __throw_format_error("End of input while parsing format-spec precision"); 440 441 if (*__begin == _CharT('{')) { 442 __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __parse_ctx); 443 __precision_as_arg_ = true; 444 __precision_ = __arg_id.__value; 445 __begin = __arg_id.__ptr; 446 return true; 447 } 448 449 if (*__begin < _CharT('0') || *__begin > _CharT('9')) 450 __throw_format_error("The format-spec precision field doesn't contain a value or arg-id"); 451 452 __format::__parse_number_result __r = __format::__parse_number(__begin, __end); 453 __precision_ = __r.__value; 454 __precision_as_arg_ = false; 455 __begin = __r.__ptr; 456 return true; 457 } 458 459 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(const _CharT*& __begin) { 460 if (*__begin != _CharT('L')) 461 return false; 462 463 __locale_specific_form_ = true; 464 ++__begin; 465 return true; 466 } 467 468 _LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(const _CharT*& __begin) { 469 // Determines the type. It does not validate whether the selected type is 470 // valid. Most formatters have optional fields that are only allowed for 471 // certain types. These parsers need to do validation after the type has 472 // been parsed. So its easier to implement the validation for all types in 473 // the specific parse function. 474 switch (*__begin) { 475 case 'A': 476 __type_ = __type::__hexfloat_upper_case; 477 break; 478 case 'B': 479 __type_ = __type::__binary_upper_case; 480 break; 481 case 'E': 482 __type_ = __type::__scientific_upper_case; 483 break; 484 case 'F': 485 __type_ = __type::__fixed_upper_case; 486 break; 487 case 'G': 488 __type_ = __type::__general_upper_case; 489 break; 490 case 'X': 491 __type_ = __type::__hexadecimal_upper_case; 492 break; 493 case 'a': 494 __type_ = __type::__hexfloat_lower_case; 495 break; 496 case 'b': 497 __type_ = __type::__binary_lower_case; 498 break; 499 case 'c': 500 __type_ = __type::__char; 501 break; 502 case 'd': 503 __type_ = __type::__decimal; 504 break; 505 case 'e': 506 __type_ = __type::__scientific_lower_case; 507 break; 508 case 'f': 509 __type_ = __type::__fixed_lower_case; 510 break; 511 case 'g': 512 __type_ = __type::__general_lower_case; 513 break; 514 case 'o': 515 __type_ = __type::__octal; 516 break; 517 case 'p': 518 __type_ = __type::__pointer; 519 break; 520 case 's': 521 __type_ = __type::__string; 522 break; 523 case 'x': 524 __type_ = __type::__hexadecimal_lower_case; 525 break; 526 default: 527 return; 528 } 529 ++__begin; 530 } 531 532 _LIBCPP_HIDE_FROM_ABI 533 int32_t __get_width(auto& __ctx) const { 534 if (!__width_as_arg_) 535 return __width_; 536 537 int32_t __result = __format_spec::__substitute_arg_id(__ctx.arg(__width_)); 538 if (__result == 0) 539 __throw_format_error("A format-spec width field replacement should have a positive value"); 540 return __result; 541 } 542 543 _LIBCPP_HIDE_FROM_ABI 544 int32_t __get_precision(auto& __ctx) const { 545 if (!__precision_as_arg_) 546 return __precision_; 547 548 return __format_spec::__substitute_arg_id(__ctx.arg(__precision_)); 549 } 550 }; 551 552 // Validates whether the reserved bitfields don't change the size. 553 static_assert(sizeof(__parser<char>) == 16); 554 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS 555 static_assert(sizeof(__parser<wchar_t>) == 16); 556 # endif 557 558 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) { 559 switch (__type) { 560 case __format_spec::__type::__default: 561 case __format_spec::__type::__string: 562 break; 563 564 default: 565 std::__throw_format_error("The format-spec type has a type not supported for a string argument"); 566 } 567 } 568 569 template <class _CharT> 570 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_bool_string(__parser<_CharT>& __parser) { 571 if (__parser.__sign_ != __sign::__default) 572 std::__throw_format_error("A sign field isn't allowed in this format-spec"); 573 574 if (__parser.__alternate_form_) 575 std::__throw_format_error("An alternate form field isn't allowed in this format-spec"); 576 577 if (__parser.__alignment_ == __alignment::__zero_padding) 578 std::__throw_format_error("A zero-padding field isn't allowed in this format-spec"); 579 580 if (__parser.__alignment_ == __alignment::__default) 581 __parser.__alignment_ = __alignment::__left; 582 } 583 584 template <class _CharT> 585 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT>& __parser) { 586 __format_spec::__process_display_type_bool_string(__parser); 587 } 588 589 template <class _CharT> 590 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser) { 591 switch (__parser.__type_) { 592 case __format_spec::__type::__default: 593 case __format_spec::__type::__string: 594 __format_spec::__process_display_type_bool_string(__parser); 595 break; 596 597 case __format_spec::__type::__binary_lower_case: 598 case __format_spec::__type::__binary_upper_case: 599 case __format_spec::__type::__octal: 600 case __format_spec::__type::__decimal: 601 case __format_spec::__type::__hexadecimal_lower_case: 602 case __format_spec::__type::__hexadecimal_upper_case: 603 break; 604 605 default: 606 std::__throw_format_error("The format-spec type has a type not supported for a bool argument"); 607 } 608 } 609 610 template <class _CharT> 611 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser) { 612 switch (__parser.__type_) { 613 case __format_spec::__type::__default: 614 case __format_spec::__type::__char: 615 __format_spec::__process_display_type_char(__parser); 616 break; 617 618 case __format_spec::__type::__binary_lower_case: 619 case __format_spec::__type::__binary_upper_case: 620 case __format_spec::__type::__octal: 621 case __format_spec::__type::__decimal: 622 case __format_spec::__type::__hexadecimal_lower_case: 623 case __format_spec::__type::__hexadecimal_upper_case: 624 break; 625 626 default: 627 std::__throw_format_error("The format-spec type has a type not supported for a char argument"); 628 } 629 } 630 631 template <class _CharT> 632 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser) { 633 switch (__parser.__type_) { 634 case __format_spec::__type::__default: 635 case __format_spec::__type::__binary_lower_case: 636 case __format_spec::__type::__binary_upper_case: 637 case __format_spec::__type::__octal: 638 case __format_spec::__type::__decimal: 639 case __format_spec::__type::__hexadecimal_lower_case: 640 case __format_spec::__type::__hexadecimal_upper_case: 641 break; 642 643 case __format_spec::__type::__char: 644 __format_spec::__process_display_type_char(__parser); 645 break; 646 647 default: 648 std::__throw_format_error("The format-spec type has a type not supported for an integer argument"); 649 } 650 } 651 652 template <class _CharT> 653 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_floating_point(__parser<_CharT>& __parser) { 654 switch (__parser.__type_) { 655 case __format_spec::__type::__default: 656 // When no precision specified then it keeps default since that 657 // formatting differs from the other types. 658 if (__parser.__precision_as_arg_ || __parser.__precision_ != -1) 659 __parser.__type_ = __format_spec::__type::__general_lower_case; 660 break; 661 case __format_spec::__type::__hexfloat_lower_case: 662 case __format_spec::__type::__hexfloat_upper_case: 663 // Precision specific behavior will be handled later. 664 break; 665 case __format_spec::__type::__scientific_lower_case: 666 case __format_spec::__type::__scientific_upper_case: 667 case __format_spec::__type::__fixed_lower_case: 668 case __format_spec::__type::__fixed_upper_case: 669 case __format_spec::__type::__general_lower_case: 670 case __format_spec::__type::__general_upper_case: 671 if (!__parser.__precision_as_arg_ && __parser.__precision_ == -1) 672 // Set the default precision for the call to to_chars. 673 __parser.__precision_ = 6; 674 break; 675 676 default: 677 std::__throw_format_error("The format-spec type has a type not supported for a floating-point argument"); 678 } 679 } 680 681 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spec::__type __type) { 682 switch (__type) { 683 case __format_spec::__type::__default: 684 case __format_spec::__type::__pointer: 685 break; 686 687 default: 688 std::__throw_format_error("The format-spec type has a type not supported for a pointer argument"); 689 } 690 } 691 692 template <class _CharT> 693 struct __column_width_result { 694 /// The number of output columns. 695 size_t __width_; 696 /// One beyond the last code unit used in the estimation. 697 /// 698 /// This limits the original output to fit in the wanted number of columns. 699 const _CharT* __last_; 700 }; 701 702 /// Since a column width can be two it's possible that the requested column 703 /// width can't be achieved. Depending on the intended usage the policy can be 704 /// selected. 705 /// - When used as precision the maximum width may not be exceeded and the 706 /// result should be "rounded down" to the previous boundary. 707 /// - When used as a width we're done once the minimum is reached, but 708 /// exceeding is not an issue. Rounding down is an issue since that will 709 /// result in writing fill characters. Therefore the result needs to be 710 /// "rounded up". 711 enum class __column_width_rounding { __down, __up }; 712 713 # ifndef _LIBCPP_HAS_NO_UNICODE 714 715 namespace __detail { 716 717 /// Converts a code point to the column width. 718 /// 719 /// The estimations are conforming to [format.string.general]/11 720 /// 721 /// This version expects a value less than 0x1'0000, which is a 3-byte UTF-8 722 /// character. 723 _LIBCPP_HIDE_FROM_ABI constexpr int __column_width_3(uint32_t __c) noexcept { 724 _LIBCPP_ASSERT(__c < 0x10000, "Use __column_width_4 or __column_width for larger values"); 725 726 // clang-format off 727 return 1 + (__c >= 0x1100 && (__c <= 0x115f || 728 (__c >= 0x2329 && (__c <= 0x232a || 729 (__c >= 0x2e80 && (__c <= 0x303e || 730 (__c >= 0x3040 && (__c <= 0xa4cf || 731 (__c >= 0xac00 && (__c <= 0xd7a3 || 732 (__c >= 0xf900 && (__c <= 0xfaff || 733 (__c >= 0xfe10 && (__c <= 0xfe19 || 734 (__c >= 0xfe30 && (__c <= 0xfe6f || 735 (__c >= 0xff00 && (__c <= 0xff60 || 736 (__c >= 0xffe0 && (__c <= 0xffe6 737 )))))))))))))))))))); 738 // clang-format on 739 } 740 741 /// @overload 742 /// 743 /// This version expects a value greater than or equal to 0x1'0000, which is a 744 /// 4-byte UTF-8 character. 745 _LIBCPP_HIDE_FROM_ABI constexpr int __column_width_4(uint32_t __c) noexcept { 746 _LIBCPP_ASSERT(__c >= 0x10000, "Use __column_width_3 or __column_width for smaller values"); 747 748 // clang-format off 749 return 1 + (__c >= 0x1'f300 && (__c <= 0x1'f64f || 750 (__c >= 0x1'f900 && (__c <= 0x1'f9ff || 751 (__c >= 0x2'0000 && (__c <= 0x2'fffd || 752 (__c >= 0x3'0000 && (__c <= 0x3'fffd 753 )))))))); 754 // clang-format on 755 } 756 757 /// @overload 758 /// 759 /// The general case, accepting all values. 760 _LIBCPP_HIDE_FROM_ABI constexpr int __column_width(uint32_t __c) noexcept { 761 if (__c < 0x10000) 762 return __detail::__column_width_3(__c); 763 764 return __detail::__column_width_4(__c); 765 } 766 767 template <class _CharT> 768 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> __estimate_column_width_grapheme_clustering( 769 const _CharT* __first, const _CharT* __last, size_t __maximum, __column_width_rounding __rounding) noexcept { 770 __unicode::__extended_grapheme_cluster_view<_CharT> __view{__first, __last}; 771 772 __column_width_result<_CharT> __result{0, __first}; 773 while (__result.__last_ != __last && __result.__width_ <= __maximum) { 774 typename __unicode::__extended_grapheme_cluster_view<_CharT>::__cluster __cluster = __view.__consume(); 775 int __width = __detail::__column_width(__cluster.__code_point_); 776 777 // When the next entry would exceed the maximum width the previous width 778 // might be returned. For example when a width of 100 is requested the 779 // returned width might be 99, since the next code point has an estimated 780 // column width of 2. This depends on the rounding flag. 781 // When the maximum is exceeded the loop will abort the next iteration. 782 if (__rounding == __column_width_rounding::__down && __result.__width_ + __width > __maximum) 783 return __result; 784 785 __result.__width_ += __width; 786 __result.__last_ = __cluster.__last_; 787 } 788 789 return __result; 790 } 791 792 } // namespace __detail 793 794 // Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32. 795 // Depending on format the relation between the number of code units stored and 796 // the number of output columns differs. The first relation is the number of 797 // code units forming a code point. (The text assumes the code units are 798 // unsigned.) 799 // - UTF-8 The number of code units is between one and four. The first 127 800 // Unicode code points match the ASCII character set. When the highest bit is 801 // set it means the code point has more than one code unit. 802 // - UTF-16: The number of code units is between 1 and 2. When the first 803 // code unit is in the range [0xd800,0xdfff) it means the code point uses two 804 // code units. 805 // - UTF-32: The number of code units is always one. 806 // 807 // The code point to the number of columns is specified in 808 // [format.string.std]/11. This list might change in the future. 809 // 810 // Another thing to be taken into account is Grapheme clustering. This means 811 // that in some cases multiple code points are combined one element in the 812 // output. For example: 813 // - an ASCII character with a combined diacritical mark 814 // - an emoji with a skin tone modifier 815 // - a group of combined people emoji to create a family 816 // - a combination of flag emoji 817 // 818 // See also: 819 // - [format.string.general]/11 820 // - https://en.wikipedia.org/wiki/UTF-8#Encoding 821 // - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF 822 823 _LIBCPP_HIDE_FROM_ABI constexpr bool __is_ascii(char32_t __c) { return __c < 0x80; } 824 825 /// Determines the number of output columns needed to render the input. 826 /// 827 /// \note When the scanner encounters malformed Unicode it acts as-if every 828 /// code unit is a one column code point. Typically a terminal uses the same 829 /// strategy and replaces every malformed code unit with a one column 830 /// replacement character. 831 /// 832 /// \param __first Points to the first element of the input range. 833 /// \param __last Points beyond the last element of the input range. 834 /// \param __maximum The maximum number of output columns. The returned number 835 /// of estimated output columns will not exceed this value. 836 /// \param __rounding Selects the rounding method. 837 /// \c __down result.__width_ <= __maximum 838 /// \c __up result.__width_ <= __maximum + 1 839 template <class _CharT> 840 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> __estimate_column_width( 841 basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding __rounding) noexcept { 842 // The width estimation is done in two steps: 843 // - Quickly process for the ASCII part. ASCII has the following properties 844 // - One code unit is one code point 845 // - Every code point has an estimated width of one 846 // - When needed it will a Unicode Grapheme clustering algorithm to find 847 // the proper place for truncation. 848 849 if (__str.empty() || __maximum == 0) 850 return {0, __str.begin()}; 851 852 // ASCII has one caveat; when an ASCII character is followed by a non-ASCII 853 // character they might be part of an extended grapheme cluster. For example: 854 // an ASCII letter and a COMBINING ACUTE ACCENT 855 // The truncate should happen after the COMBINING ACUTE ACCENT. Therefore we 856 // need to scan one code unit beyond the requested precision. When this code 857 // unit is non-ASCII we omit the current code unit and let the Grapheme 858 // clustering algorithm do its work. 859 const _CharT* __it = __str.begin(); 860 if (__is_ascii(*__it)) { 861 do { 862 --__maximum; 863 ++__it; 864 if (__it == __str.end()) 865 return {__str.size(), __str.end()}; 866 867 if (__maximum == 0) { 868 if (__is_ascii(*__it)) 869 return {static_cast<size_t>(__it - __str.begin()), __it}; 870 871 break; 872 } 873 } while (__is_ascii(*__it)); 874 --__it; 875 ++__maximum; 876 } 877 878 ptrdiff_t __ascii_size = __it - __str.begin(); 879 __column_width_result __result = 880 __detail::__estimate_column_width_grapheme_clustering(__it, __str.end(), __maximum, __rounding); 881 882 __result.__width_ += __ascii_size; 883 return __result; 884 } 885 # else // !defined(_LIBCPP_HAS_NO_UNICODE) 886 template <class _CharT> 887 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT> 888 __estimate_column_width(basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding) noexcept { 889 // When Unicode isn't supported assume ASCII and every code unit is one code 890 // point. In ASCII the estimated column width is always one. Thus there's no 891 // need for rounding. 892 size_t __width_ = _VSTD::min(__str.size(), __maximum); 893 return {__width_, __str.begin() + __width_}; 894 } 895 896 # endif // !defined(_LIBCPP_HAS_NO_UNICODE) 897 898 } // namespace __format_spec 899 900 #endif //_LIBCPP_STD_VER > 17 901 902 _LIBCPP_END_NAMESPACE_STD 903 904 _LIBCPP_POP_MACROS 905 906 #endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H 907