1 // -*- C++ -*- 2 //===----------------------------------------------------------------------===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H 11 #define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H 12 13 /// \file Contains the std-format-spec parser. 14 /// 15 /// Most of the code can be reused in the chrono-format-spec. 16 /// This header has some support for the chrono-format-spec since it doesn't 17 /// affect the std-format-spec. 18 19 #include <__algorithm/copy_n.h> 20 #include <__algorithm/min.h> 21 #include <__assert> 22 #include <__concepts/arithmetic.h> 23 #include <__concepts/same_as.h> 24 #include <__config> 25 #include <__format/format_arg.h> 26 #include <__format/format_error.h> 27 #include <__format/format_parse_context.h> 28 #include <__format/format_string.h> 29 #include <__format/unicode.h> 30 #include <__format/width_estimation_table.h> 31 #include <__iterator/concepts.h> 32 #include <__iterator/iterator_traits.h> // iter_value_t 33 #include <__memory/addressof.h> 34 #include <__type_traits/common_type.h> 35 #include <__type_traits/is_constant_evaluated.h> 36 #include <__type_traits/is_trivially_copyable.h> 37 #include <__variant/monostate.h> 38 #include <cstdint> 39 #include <string> 40 #include <string_view> 41 42 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 43 # pragma GCC system_header 44 #endif 45 46 _LIBCPP_PUSH_MACROS 47 #include <__undef_macros> 48 49 _LIBCPP_BEGIN_NAMESPACE_STD 50 51 #if _LIBCPP_STD_VER >= 20 52 53 namespace __format_spec { 54 55 _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI inline void 56 __throw_invalid_option_format_error(const char* __id, const char* __option) { 57 std::__throw_format_error( 58 (string("The format specifier for ") + __id + " does not allow the " + __option + " option").c_str()); 59 } 60 61 _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI inline void __throw_invalid_type_format_error(const char* __id) { 62 std::__throw_format_error( 63 (string("The type option contains an invalid value for ") + __id + " formatting argument").c_str()); 64 } 65 66 template <contiguous_iterator _Iterator, class _ParseContext> 67 _LIBCPP_HIDE_FROM_ABI constexpr __format::__parse_number_result<_Iterator> 68 __parse_arg_id(_Iterator __begin, _Iterator __end, _ParseContext& __ctx) { 69 using _CharT = iter_value_t<_Iterator>; 70 // This function is a wrapper to call the real parser. But it does the 71 // validation for the pre-conditions and post-conditions. 72 if (__begin == __end) 73 std::__throw_format_error("End of input while parsing an argument index"); 74 75 __format::__parse_number_result __r = __format::__parse_arg_id(__begin, __end, __ctx); 76 77 if (__r.__last == __end || *__r.__last != _CharT('}')) 78 std::__throw_format_error("The argument index is invalid"); 79 80 ++__r.__last; 81 return __r; 82 } 83 84 template <class _Context> 85 _LIBCPP_HIDE_FROM_ABI constexpr uint32_t 86 __substitute_arg_id(basic_format_arg<_Context> __format_arg) { 87 // [format.string.std]/8 88 // If the corresponding formatting argument is not of integral type... 89 // This wording allows char and bool too. LWG-3720 changes the wording to 90 // If the corresponding formatting argument is not of standard signed or 91 // unsigned integer type, 92 // This means the 128-bit will not be valid anymore. 93 // TODO FMT Verify this resolution is accepted and add a test to verify 94 // 128-bit integrals fail and switch to visit_format_arg. 95 return _VSTD::__visit_format_arg( 96 [](auto __arg) -> uint32_t { 97 using _Type = decltype(__arg); 98 if constexpr (same_as<_Type, monostate>) 99 std::__throw_format_error("The argument index value is too large for the number of arguments supplied"); 100 101 // [format.string.std]/8 102 // If { arg-idopt } is used in a width or precision, the value of the 103 // corresponding formatting argument is used in its place. If the 104 // corresponding formatting argument is not of standard signed or unsigned 105 // integer type, or its value is negative for precision or non-positive for 106 // width, an exception of type format_error is thrown. 107 // 108 // When an integral is used in a format function, it is stored as one of 109 // the types checked below. Other integral types are promoted. For example, 110 // a signed char is stored as an int. 111 if constexpr (same_as<_Type, int> || same_as<_Type, unsigned int> || // 112 same_as<_Type, long long> || same_as<_Type, unsigned long long>) { 113 if constexpr (signed_integral<_Type>) { 114 if (__arg < 0) 115 std::__throw_format_error("An argument index may not have a negative value"); 116 } 117 118 using _CT = common_type_t<_Type, decltype(__format::__number_max)>; 119 if (static_cast<_CT>(__arg) > static_cast<_CT>(__format::__number_max)) 120 std::__throw_format_error("The value of the argument index exceeds its maximum value"); 121 122 return __arg; 123 } else 124 std::__throw_format_error("Replacement argument isn't a standard signed or unsigned integer type"); 125 }, 126 __format_arg); 127 } 128 129 /// These fields are a filter for which elements to parse. 130 /// 131 /// They default to false so when a new field is added it needs to be opted in 132 /// explicitly. 133 // TODO FMT Use an ABI tag for this struct. 134 struct __fields { 135 uint16_t __sign_ : 1 {false}; 136 uint16_t __alternate_form_ : 1 {false}; 137 uint16_t __zero_padding_ : 1 {false}; 138 uint16_t __precision_ : 1 {false}; 139 uint16_t __locale_specific_form_ : 1 {false}; 140 uint16_t __type_ : 1 {false}; 141 // Determines the valid values for fill. 142 // 143 // Originally the fill could be any character except { and }. Range-based 144 // formatters use the colon to mark the beginning of the 145 // underlying-format-spec. To avoid parsing ambiguities these formatter 146 // specializations prohibit the use of the colon as a fill character. 147 uint16_t __use_range_fill_ : 1 {false}; 148 uint16_t __clear_brackets_ : 1 {false}; 149 uint16_t __consume_all_ : 1 {false}; 150 }; 151 152 // By not placing this constant in the formatter class it's not duplicated for 153 // char and wchar_t. 154 inline constexpr __fields __fields_bool{.__locale_specific_form_ = true, .__type_ = true, .__consume_all_ = true}; 155 inline constexpr __fields __fields_integral{ 156 .__sign_ = true, 157 .__alternate_form_ = true, 158 .__zero_padding_ = true, 159 .__locale_specific_form_ = true, 160 .__type_ = true, 161 .__consume_all_ = true}; 162 inline constexpr __fields __fields_floating_point{ 163 .__sign_ = true, 164 .__alternate_form_ = true, 165 .__zero_padding_ = true, 166 .__precision_ = true, 167 .__locale_specific_form_ = true, 168 .__type_ = true, 169 .__consume_all_ = true}; 170 inline constexpr __fields __fields_string{.__precision_ = true, .__type_ = true, .__consume_all_ = true}; 171 inline constexpr __fields __fields_pointer{.__zero_padding_ = true, .__type_ = true, .__consume_all_ = true}; 172 173 # if _LIBCPP_STD_VER >= 23 174 inline constexpr __fields __fields_tuple{.__use_range_fill_ = true, .__clear_brackets_ = true}; 175 inline constexpr __fields __fields_range{.__use_range_fill_ = true, .__clear_brackets_ = true}; 176 inline constexpr __fields __fields_fill_align_width{}; 177 # endif 178 179 enum class _LIBCPP_ENUM_VIS __alignment : uint8_t { 180 /// No alignment is set in the format string. 181 __default, 182 __left, 183 __center, 184 __right, 185 __zero_padding 186 }; 187 188 enum class _LIBCPP_ENUM_VIS __sign : uint8_t { 189 /// No sign is set in the format string. 190 /// 191 /// The sign isn't allowed for certain format-types. By using this value 192 /// it's possible to detect whether or not the user explicitly set the sign 193 /// flag. For formatting purposes it behaves the same as \ref __minus. 194 __default, 195 __minus, 196 __plus, 197 __space 198 }; 199 200 enum class _LIBCPP_ENUM_VIS __type : uint8_t { 201 __default = 0, 202 __string, 203 __binary_lower_case, 204 __binary_upper_case, 205 __octal, 206 __decimal, 207 __hexadecimal_lower_case, 208 __hexadecimal_upper_case, 209 __pointer_lower_case, 210 __pointer_upper_case, 211 __char, 212 __hexfloat_lower_case, 213 __hexfloat_upper_case, 214 __scientific_lower_case, 215 __scientific_upper_case, 216 __fixed_lower_case, 217 __fixed_upper_case, 218 __general_lower_case, 219 __general_upper_case, 220 __debug 221 }; 222 223 _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __create_type_mask(__type __t) { 224 uint32_t __shift = static_cast<uint32_t>(__t); 225 if (__shift == 0) 226 return 1; 227 228 if (__shift > 31) 229 std::__throw_format_error("The type does not fit in the mask"); 230 231 return 1 << __shift; 232 } 233 234 inline constexpr uint32_t __type_mask_integer = 235 __create_type_mask(__type::__binary_lower_case) | // 236 __create_type_mask(__type::__binary_upper_case) | // 237 __create_type_mask(__type::__decimal) | // 238 __create_type_mask(__type::__octal) | // 239 __create_type_mask(__type::__hexadecimal_lower_case) | // 240 __create_type_mask(__type::__hexadecimal_upper_case); 241 242 struct __std { 243 __alignment __alignment_ : 3; 244 __sign __sign_ : 2; 245 bool __alternate_form_ : 1; 246 bool __locale_specific_form_ : 1; 247 __type __type_; 248 }; 249 250 struct __chrono { 251 __alignment __alignment_ : 3; 252 bool __locale_specific_form_ : 1; 253 bool __hour_ : 1; 254 bool __weekday_name_ : 1; 255 bool __weekday_ : 1; 256 bool __day_of_year_ : 1; 257 bool __week_of_year_ : 1; 258 bool __month_name_ : 1; 259 }; 260 261 // The fill UCS scalar value. 262 // 263 // This is always an array, with 1, 2, or 4 elements. 264 // The size of the data structure is always 32-bits. 265 template <class _CharT> 266 struct __code_point; 267 268 template <> 269 struct __code_point<char> { 270 char __data[4] = {' '}; 271 }; 272 273 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS 274 template <> 275 struct __code_point<wchar_t> { 276 wchar_t __data[4 / sizeof(wchar_t)] = {L' '}; 277 }; 278 # endif 279 280 /// Contains the parsed formatting specifications. 281 /// 282 /// This contains information for both the std-format-spec and the 283 /// chrono-format-spec. This results in some unused members for both 284 /// specifications. However these unused members don't increase the size 285 /// of the structure. 286 /// 287 /// This struct doesn't cross ABI boundaries so its layout doesn't need to be 288 /// kept stable. 289 template <class _CharT> 290 struct __parsed_specifications { 291 union { 292 // The field __alignment_ is the first element in __std_ and __chrono_. 293 // This allows the code to always inspect this value regards which member 294 // of the union is the active member [class.union.general]/2. 295 // 296 // This is needed since the generic output routines handle the alignment of 297 // the output. 298 __alignment __alignment_ : 3; 299 __std __std_; 300 __chrono __chrono_; 301 }; 302 303 /// The requested width. 304 /// 305 /// When the format-spec used an arg-id for this field it has already been 306 /// replaced with the value of that arg-id. 307 int32_t __width_; 308 309 /// The requested precision. 310 /// 311 /// When the format-spec used an arg-id for this field it has already been 312 /// replaced with the value of that arg-id. 313 int32_t __precision_; 314 315 __code_point<_CharT> __fill_; 316 317 _LIBCPP_HIDE_FROM_ABI constexpr bool __has_width() const { return __width_ > 0; } 318 319 _LIBCPP_HIDE_FROM_ABI constexpr bool __has_precision() const { return __precision_ >= 0; } 320 }; 321 322 // Validate the struct is small and cheap to copy since the struct is passed by 323 // value in formatting functions. 324 static_assert(sizeof(__parsed_specifications<char>) == 16); 325 static_assert(is_trivially_copyable_v<__parsed_specifications<char>>); 326 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS 327 static_assert(sizeof(__parsed_specifications<wchar_t>) == 16); 328 static_assert(is_trivially_copyable_v<__parsed_specifications<wchar_t>>); 329 # endif 330 331 /// The parser for the std-format-spec. 332 /// 333 /// Note this class is a member of std::formatter specializations. It's 334 /// expected developers will create their own formatter specializations that 335 /// inherit from the std::formatter specializations. This means this class 336 /// must be ABI stable. To aid the stability the unused bits in the class are 337 /// set to zero. That way they can be repurposed if a future revision of the 338 /// Standards adds new fields to std-format-spec. 339 template <class _CharT> 340 class _LIBCPP_TEMPLATE_VIS __parser { 341 public: 342 // Parses the format specification. 343 // 344 // Depending on whether the parsing is done compile-time or run-time 345 // the method slightly differs. 346 // - Only parses a field when it is in the __fields. Accepting all 347 // fields and then validating the valid ones has a performance impact. 348 // This is faster but gives slighly worse error messages. 349 // - At compile-time when a field is not accepted the parser will still 350 // parse it and give an error when it's present. This gives a more 351 // accurate error. 352 // The idea is that most times the format instead of the vformat 353 // functions are used. In that case the error will be detected during 354 // compilation and there is no need to pay for the run-time overhead. 355 template <class _ParseContext> 356 _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator __parse(_ParseContext& __ctx, __fields __fields) { 357 auto __begin = __ctx.begin(); 358 auto __end = __ctx.end(); 359 if (__begin == __end) 360 return __begin; 361 362 if (__parse_fill_align(__begin, __end, __fields.__use_range_fill_) && __begin == __end) 363 return __begin; 364 365 if (__fields.__sign_) { 366 if (__parse_sign(__begin) && __begin == __end) 367 return __begin; 368 } else if (std::is_constant_evaluated() && __parse_sign(__begin)) { 369 std::__throw_format_error("The format specification does not allow the sign option"); 370 } 371 372 if (__fields.__alternate_form_) { 373 if (__parse_alternate_form(__begin) && __begin == __end) 374 return __begin; 375 } else if (std::is_constant_evaluated() && __parse_alternate_form(__begin)) { 376 std::__throw_format_error("The format specifier does not allow the alternate form option"); 377 } 378 379 if (__fields.__zero_padding_) { 380 if (__parse_zero_padding(__begin) && __begin == __end) 381 return __begin; 382 } else if (std::is_constant_evaluated() && __parse_zero_padding(__begin)) { 383 std::__throw_format_error("The format specifier does not allow the zero-padding option"); 384 } 385 386 if (__parse_width(__begin, __end, __ctx) && __begin == __end) 387 return __begin; 388 389 if (__fields.__precision_) { 390 if (__parse_precision(__begin, __end, __ctx) && __begin == __end) 391 return __begin; 392 } else if (std::is_constant_evaluated() && __parse_precision(__begin, __end, __ctx)) { 393 std::__throw_format_error("The format specifier does not allow the precision option"); 394 } 395 396 if (__fields.__locale_specific_form_) { 397 if (__parse_locale_specific_form(__begin) && __begin == __end) 398 return __begin; 399 } else if (std::is_constant_evaluated() && __parse_locale_specific_form(__begin)) { 400 std::__throw_format_error("The format specifier does not allow the locale-specific form option"); 401 } 402 403 if (__fields.__clear_brackets_) { 404 if (__parse_clear_brackets(__begin) && __begin == __end) 405 return __begin; 406 } else if (std::is_constant_evaluated() && __parse_clear_brackets(__begin)) { 407 std::__throw_format_error("The format specifier does not allow the n option"); 408 } 409 410 if (__fields.__type_) 411 __parse_type(__begin); 412 413 if (!__fields.__consume_all_) 414 return __begin; 415 416 if (__begin != __end && *__begin != _CharT('}')) 417 std::__throw_format_error("The format specifier should consume the input or end with a '}'"); 418 419 return __begin; 420 } 421 422 // Validates the selected the parsed data. 423 // 424 // The valid fields in the parser may depend on the display type 425 // selected. But the type is the last optional field, so by the time 426 // it's known an option can't be used, it already has been parsed. 427 // This does the validation again. 428 // 429 // For example an integral may have a sign, zero-padding, or alternate 430 // form when the type option is not 'c'. So the generic approach is: 431 // 432 // typename _ParseContext::iterator __result = __parser_.__parse(__ctx, __format_spec::__fields_integral); 433 // if (__parser.__type_ == __format_spec::__type::__char) { 434 // __parser.__validate((__format_spec::__fields_bool, "an integer"); 435 // ... // more char adjustments 436 // } else { 437 // ... // validate an integral type. 438 // } 439 // 440 // For some types all valid options need a second validation run, like 441 // boolean types. 442 // 443 // Depending on whether the validation is done at compile-time or 444 // run-time the error differs 445 // - run-time the exception is thrown and contains the type of field 446 // being validated. 447 // - at compile-time the line with `std::__throw_format_error` is shown 448 // in the output. In that case it's important for the error to be on one 449 // line. 450 // Note future versions of C++ may allow better compile-time error 451 // reporting. 452 _LIBCPP_HIDE_FROM_ABI constexpr void 453 __validate(__fields __fields, const char* __id, uint32_t __type_mask = -1) const { 454 if (!__fields.__sign_ && __sign_ != __sign::__default) { 455 if (std::is_constant_evaluated()) 456 std::__throw_format_error("The format specifier does not allow the sign option"); 457 else 458 __format_spec::__throw_invalid_option_format_error(__id, "sign"); 459 } 460 461 if (!__fields.__alternate_form_ && __alternate_form_) { 462 if (std::is_constant_evaluated()) 463 std::__throw_format_error("The format specifier does not allow the alternate form option"); 464 else 465 __format_spec::__throw_invalid_option_format_error(__id, "alternate form"); 466 } 467 468 if (!__fields.__zero_padding_ && __alignment_ == __alignment::__zero_padding) { 469 if (std::is_constant_evaluated()) 470 std::__throw_format_error("The format specifier does not allow the zero-padding option"); 471 else 472 __format_spec::__throw_invalid_option_format_error(__id, "zero-padding"); 473 } 474 475 if (!__fields.__precision_ && __precision_ != -1) { // Works both when the precision has a value or an arg-id. 476 if (std::is_constant_evaluated()) 477 std::__throw_format_error("The format specifier does not allow the precision option"); 478 else 479 __format_spec::__throw_invalid_option_format_error(__id, "precision"); 480 } 481 482 if (!__fields.__locale_specific_form_ && __locale_specific_form_) { 483 if (std::is_constant_evaluated()) 484 std::__throw_format_error("The format specifier does not allow the locale-specific form option"); 485 else 486 __format_spec::__throw_invalid_option_format_error(__id, "locale-specific form"); 487 } 488 489 if ((__create_type_mask(__type_) & __type_mask) == 0) { 490 if (std::is_constant_evaluated()) 491 std::__throw_format_error("The format specifier uses an invalid value for the type option"); 492 else 493 __format_spec::__throw_invalid_type_format_error(__id); 494 } 495 } 496 497 /// \returns the `__parsed_specifications` with the resolved dynamic sizes.. 498 _LIBCPP_HIDE_FROM_ABI 499 __parsed_specifications<_CharT> __get_parsed_std_specifications(auto& __ctx) const { 500 return __parsed_specifications<_CharT>{ 501 .__std_ = __std{.__alignment_ = __alignment_, 502 .__sign_ = __sign_, 503 .__alternate_form_ = __alternate_form_, 504 .__locale_specific_form_ = __locale_specific_form_, 505 .__type_ = __type_}, 506 .__width_{__get_width(__ctx)}, 507 .__precision_{__get_precision(__ctx)}, 508 .__fill_{__fill_}}; 509 } 510 511 _LIBCPP_HIDE_FROM_ABI __parsed_specifications<_CharT> __get_parsed_chrono_specifications(auto& __ctx) const { 512 return __parsed_specifications<_CharT>{ 513 .__chrono_ = 514 __chrono{.__alignment_ = __alignment_, 515 .__locale_specific_form_ = __locale_specific_form_, 516 .__hour_ = __hour_, 517 .__weekday_name_ = __weekday_name_, 518 .__weekday_ = __weekday_, 519 .__day_of_year_ = __day_of_year_, 520 .__week_of_year_ = __week_of_year_, 521 .__month_name_ = __month_name_}, 522 .__width_{__get_width(__ctx)}, 523 .__precision_{__get_precision(__ctx)}, 524 .__fill_{__fill_}}; 525 } 526 527 __alignment __alignment_ : 3 {__alignment::__default}; 528 __sign __sign_ : 2 {__sign::__default}; 529 bool __alternate_form_ : 1 {false}; 530 bool __locale_specific_form_ : 1 {false}; 531 bool __clear_brackets_ : 1 {false}; 532 __type __type_{__type::__default}; 533 534 // These flags are only used for formatting chrono. Since the struct has 535 // padding space left it's added to this structure. 536 bool __hour_ : 1 {false}; 537 538 bool __weekday_name_ : 1 {false}; 539 bool __weekday_ : 1 {false}; 540 541 bool __day_of_year_ : 1 {false}; 542 bool __week_of_year_ : 1 {false}; 543 544 bool __month_name_ : 1 {false}; 545 546 uint8_t __reserved_0_ : 2 {0}; 547 uint8_t __reserved_1_ : 6 {0}; 548 // These two flags are only used internally and not part of the 549 // __parsed_specifications. Therefore put them at the end. 550 bool __width_as_arg_ : 1 {false}; 551 bool __precision_as_arg_ : 1 {false}; 552 553 /// The requested width, either the value or the arg-id. 554 int32_t __width_{0}; 555 556 /// The requested precision, either the value or the arg-id. 557 int32_t __precision_{-1}; 558 559 __code_point<_CharT> __fill_{}; 560 561 private: 562 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alignment(_CharT __c) { 563 switch (__c) { 564 case _CharT('<'): 565 __alignment_ = __alignment::__left; 566 return true; 567 568 case _CharT('^'): 569 __alignment_ = __alignment::__center; 570 return true; 571 572 case _CharT('>'): 573 __alignment_ = __alignment::__right; 574 return true; 575 } 576 return false; 577 } 578 579 _LIBCPP_HIDE_FROM_ABI constexpr void __validate_fill_character(_CharT __fill, bool __use_range_fill) { 580 // The forbidden fill characters all code points formed from a single code unit, thus the 581 // check can be omitted when more code units are used. 582 if (__use_range_fill && (__fill == _CharT('{') || __fill == _CharT('}') || __fill == _CharT(':'))) 583 std::__throw_format_error("The fill option contains an invalid value"); 584 else if (__fill == _CharT('{') || __fill == _CharT('}')) 585 std::__throw_format_error("The fill option contains an invalid value"); 586 } 587 588 # ifndef _LIBCPP_HAS_NO_UNICODE 589 // range-fill and tuple-fill are identical 590 template <contiguous_iterator _Iterator> 591 requires same_as<_CharT, char> 592 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS 593 || (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) 594 # endif 595 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) { 596 _LIBCPP_ASSERT_UNCATEGORIZED(__begin != __end, 597 "when called with an empty input the function will cause " 598 "undefined behavior by evaluating data not in the input"); 599 __unicode::__code_point_view<_CharT> __view{__begin, __end}; 600 __unicode::__consume_result __consumed = __view.__consume(); 601 if (__consumed.__status != __unicode::__consume_result::__ok) 602 std::__throw_format_error("The format specifier contains malformed Unicode characters"); 603 604 if (__view.__position() < __end && __parse_alignment(*__view.__position())) { 605 ptrdiff_t __code_units = __view.__position() - __begin; 606 if (__code_units == 1) 607 // The forbidden fill characters all are code points encoded 608 // in one code unit, thus the check can be omitted when more 609 // code units are used. 610 __validate_fill_character(*__begin, __use_range_fill); 611 612 std::copy_n(__begin, __code_units, std::addressof(__fill_.__data[0])); 613 __begin += __code_units + 1; 614 return true; 615 } 616 617 if (!__parse_alignment(*__begin)) 618 return false; 619 620 ++__begin; 621 return true; 622 } 623 624 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS 625 template <contiguous_iterator _Iterator> 626 requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) 627 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) { 628 _LIBCPP_ASSERT_UNCATEGORIZED(__begin != __end, 629 "when called with an empty input the function will cause " 630 "undefined behavior by evaluating data not in the input"); 631 if (__begin + 1 != __end && __parse_alignment(*(__begin + 1))) { 632 if (!__unicode::__is_scalar_value(*__begin)) 633 std::__throw_format_error("The fill option contains an invalid value"); 634 635 __validate_fill_character(*__begin, __use_range_fill); 636 637 __fill_.__data[0] = *__begin; 638 __begin += 2; 639 return true; 640 } 641 642 if (!__parse_alignment(*__begin)) 643 return false; 644 645 ++__begin; 646 return true; 647 } 648 649 # endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS 650 651 # else // _LIBCPP_HAS_NO_UNICODE 652 // range-fill and tuple-fill are identical 653 template <contiguous_iterator _Iterator> 654 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end, bool __use_range_fill) { 655 _LIBCPP_ASSERT_UNCATEGORIZED(__begin != __end, 656 "when called with an empty input the function will cause " 657 "undefined behavior by evaluating data not in the input"); 658 if (__begin + 1 != __end) { 659 if (__parse_alignment(*(__begin + 1))) { 660 __validate_fill_character(*__begin, __use_range_fill); 661 662 __fill_.__data[0] = *__begin; 663 __begin += 2; 664 return true; 665 } 666 } 667 668 if (!__parse_alignment(*__begin)) 669 return false; 670 671 ++__begin; 672 return true; 673 } 674 675 # endif // _LIBCPP_HAS_NO_UNICODE 676 677 template <contiguous_iterator _Iterator> 678 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(_Iterator& __begin) { 679 switch (*__begin) { 680 case _CharT('-'): 681 __sign_ = __sign::__minus; 682 break; 683 case _CharT('+'): 684 __sign_ = __sign::__plus; 685 break; 686 case _CharT(' '): 687 __sign_ = __sign::__space; 688 break; 689 default: 690 return false; 691 } 692 ++__begin; 693 return true; 694 } 695 696 template <contiguous_iterator _Iterator> 697 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_alternate_form(_Iterator& __begin) { 698 if (*__begin != _CharT('#')) 699 return false; 700 701 __alternate_form_ = true; 702 ++__begin; 703 return true; 704 } 705 706 template <contiguous_iterator _Iterator> 707 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_zero_padding(_Iterator& __begin) { 708 if (*__begin != _CharT('0')) 709 return false; 710 711 if (__alignment_ == __alignment::__default) 712 __alignment_ = __alignment::__zero_padding; 713 ++__begin; 714 return true; 715 } 716 717 template <contiguous_iterator _Iterator> 718 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_width(_Iterator& __begin, _Iterator __end, auto& __ctx) { 719 if (*__begin == _CharT('0')) 720 std::__throw_format_error("The width option should not have a leading zero"); 721 722 if (*__begin == _CharT('{')) { 723 __format::__parse_number_result __r = __format_spec::__parse_arg_id(++__begin, __end, __ctx); 724 __width_as_arg_ = true; 725 __width_ = __r.__value; 726 __begin = __r.__last; 727 return true; 728 } 729 730 if (*__begin < _CharT('0') || *__begin > _CharT('9')) 731 return false; 732 733 __format::__parse_number_result __r = __format::__parse_number(__begin, __end); 734 __width_ = __r.__value; 735 _LIBCPP_ASSERT_UNCATEGORIZED(__width_ != 0, "A zero value isn't allowed and should be impossible, " 736 "due to validations in this function"); 737 __begin = __r.__last; 738 return true; 739 } 740 741 template <contiguous_iterator _Iterator> 742 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_precision(_Iterator& __begin, _Iterator __end, auto& __ctx) { 743 if (*__begin != _CharT('.')) 744 return false; 745 746 ++__begin; 747 if (__begin == __end) 748 std::__throw_format_error("End of input while parsing format specifier precision"); 749 750 if (*__begin == _CharT('{')) { 751 __format::__parse_number_result __arg_id = __format_spec::__parse_arg_id(++__begin, __end, __ctx); 752 __precision_as_arg_ = true; 753 __precision_ = __arg_id.__value; 754 __begin = __arg_id.__last; 755 return true; 756 } 757 758 if (*__begin < _CharT('0') || *__begin > _CharT('9')) 759 std::__throw_format_error("The precision option does not contain a value or an argument index"); 760 761 __format::__parse_number_result __r = __format::__parse_number(__begin, __end); 762 __precision_ = __r.__value; 763 __precision_as_arg_ = false; 764 __begin = __r.__last; 765 return true; 766 } 767 768 template <contiguous_iterator _Iterator> 769 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_locale_specific_form(_Iterator& __begin) { 770 if (*__begin != _CharT('L')) 771 return false; 772 773 __locale_specific_form_ = true; 774 ++__begin; 775 return true; 776 } 777 778 template <contiguous_iterator _Iterator> 779 _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_clear_brackets(_Iterator& __begin) { 780 if (*__begin != _CharT('n')) 781 return false; 782 783 __clear_brackets_ = true; 784 ++__begin; 785 return true; 786 } 787 788 template <contiguous_iterator _Iterator> 789 _LIBCPP_HIDE_FROM_ABI constexpr void __parse_type(_Iterator& __begin) { 790 // Determines the type. It does not validate whether the selected type is 791 // valid. Most formatters have optional fields that are only allowed for 792 // certain types. These parsers need to do validation after the type has 793 // been parsed. So its easier to implement the validation for all types in 794 // the specific parse function. 795 switch (*__begin) { 796 case 'A': 797 __type_ = __type::__hexfloat_upper_case; 798 break; 799 case 'B': 800 __type_ = __type::__binary_upper_case; 801 break; 802 case 'E': 803 __type_ = __type::__scientific_upper_case; 804 break; 805 case 'F': 806 __type_ = __type::__fixed_upper_case; 807 break; 808 case 'G': 809 __type_ = __type::__general_upper_case; 810 break; 811 case 'X': 812 __type_ = __type::__hexadecimal_upper_case; 813 break; 814 case 'a': 815 __type_ = __type::__hexfloat_lower_case; 816 break; 817 case 'b': 818 __type_ = __type::__binary_lower_case; 819 break; 820 case 'c': 821 __type_ = __type::__char; 822 break; 823 case 'd': 824 __type_ = __type::__decimal; 825 break; 826 case 'e': 827 __type_ = __type::__scientific_lower_case; 828 break; 829 case 'f': 830 __type_ = __type::__fixed_lower_case; 831 break; 832 case 'g': 833 __type_ = __type::__general_lower_case; 834 break; 835 case 'o': 836 __type_ = __type::__octal; 837 break; 838 case 'p': 839 __type_ = __type::__pointer_lower_case; 840 break; 841 case 'P': 842 __type_ = __type::__pointer_upper_case; 843 break; 844 case 's': 845 __type_ = __type::__string; 846 break; 847 case 'x': 848 __type_ = __type::__hexadecimal_lower_case; 849 break; 850 # if _LIBCPP_STD_VER >= 23 851 case '?': 852 __type_ = __type::__debug; 853 break; 854 # endif 855 default: 856 return; 857 } 858 ++__begin; 859 } 860 861 _LIBCPP_HIDE_FROM_ABI 862 int32_t __get_width(auto& __ctx) const { 863 if (!__width_as_arg_) 864 return __width_; 865 866 return __format_spec::__substitute_arg_id(__ctx.arg(__width_)); 867 } 868 869 _LIBCPP_HIDE_FROM_ABI 870 int32_t __get_precision(auto& __ctx) const { 871 if (!__precision_as_arg_) 872 return __precision_; 873 874 return __format_spec::__substitute_arg_id(__ctx.arg(__precision_)); 875 } 876 }; 877 878 // Validates whether the reserved bitfields don't change the size. 879 static_assert(sizeof(__parser<char>) == 16); 880 # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS 881 static_assert(sizeof(__parser<wchar_t>) == 16); 882 # endif 883 884 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_string(__format_spec::__type __type) { 885 switch (__type) { 886 case __format_spec::__type::__default: 887 case __format_spec::__type::__string: 888 case __format_spec::__type::__debug: 889 break; 890 891 default: 892 std::__throw_format_error("The type option contains an invalid value for a string formatting argument"); 893 } 894 } 895 896 template <class _CharT> 897 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_bool_string(__parser<_CharT>& __parser, const char* __id) { 898 __parser.__validate(__format_spec::__fields_bool, __id); 899 if (__parser.__alignment_ == __alignment::__default) 900 __parser.__alignment_ = __alignment::__left; 901 } 902 903 template <class _CharT> 904 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_char(__parser<_CharT>& __parser, const char* __id) { 905 __format_spec::__process_display_type_bool_string(__parser, __id); 906 } 907 908 template <class _CharT> 909 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_bool(__parser<_CharT>& __parser, const char* __id) { 910 switch (__parser.__type_) { 911 case __format_spec::__type::__default: 912 case __format_spec::__type::__string: 913 __format_spec::__process_display_type_bool_string(__parser, __id); 914 break; 915 916 case __format_spec::__type::__binary_lower_case: 917 case __format_spec::__type::__binary_upper_case: 918 case __format_spec::__type::__octal: 919 case __format_spec::__type::__decimal: 920 case __format_spec::__type::__hexadecimal_lower_case: 921 case __format_spec::__type::__hexadecimal_upper_case: 922 break; 923 924 default: 925 __format_spec::__throw_invalid_type_format_error(__id); 926 } 927 } 928 929 template <class _CharT> 930 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_char(__parser<_CharT>& __parser, const char* __id) { 931 switch (__parser.__type_) { 932 case __format_spec::__type::__default: 933 case __format_spec::__type::__char: 934 case __format_spec::__type::__debug: 935 __format_spec::__process_display_type_char(__parser, __id); 936 break; 937 938 case __format_spec::__type::__binary_lower_case: 939 case __format_spec::__type::__binary_upper_case: 940 case __format_spec::__type::__octal: 941 case __format_spec::__type::__decimal: 942 case __format_spec::__type::__hexadecimal_lower_case: 943 case __format_spec::__type::__hexadecimal_upper_case: 944 break; 945 946 default: 947 __format_spec::__throw_invalid_type_format_error(__id); 948 } 949 } 950 951 template <class _CharT> 952 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_integer(__parser<_CharT>& __parser, const char* __id) { 953 switch (__parser.__type_) { 954 case __format_spec::__type::__default: 955 case __format_spec::__type::__binary_lower_case: 956 case __format_spec::__type::__binary_upper_case: 957 case __format_spec::__type::__octal: 958 case __format_spec::__type::__decimal: 959 case __format_spec::__type::__hexadecimal_lower_case: 960 case __format_spec::__type::__hexadecimal_upper_case: 961 break; 962 963 case __format_spec::__type::__char: 964 __format_spec::__process_display_type_char(__parser, __id); 965 break; 966 967 default: 968 __format_spec::__throw_invalid_type_format_error(__id); 969 } 970 } 971 972 template <class _CharT> 973 _LIBCPP_HIDE_FROM_ABI constexpr void __process_parsed_floating_point(__parser<_CharT>& __parser, const char* __id) { 974 switch (__parser.__type_) { 975 case __format_spec::__type::__default: 976 case __format_spec::__type::__hexfloat_lower_case: 977 case __format_spec::__type::__hexfloat_upper_case: 978 // Precision specific behavior will be handled later. 979 break; 980 case __format_spec::__type::__scientific_lower_case: 981 case __format_spec::__type::__scientific_upper_case: 982 case __format_spec::__type::__fixed_lower_case: 983 case __format_spec::__type::__fixed_upper_case: 984 case __format_spec::__type::__general_lower_case: 985 case __format_spec::__type::__general_upper_case: 986 if (!__parser.__precision_as_arg_ && __parser.__precision_ == -1) 987 // Set the default precision for the call to to_chars. 988 __parser.__precision_ = 6; 989 break; 990 991 default: 992 __format_spec::__throw_invalid_type_format_error(__id); 993 } 994 } 995 996 _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type_pointer(__format_spec::__type __type, const char* __id) { 997 switch (__type) { 998 case __format_spec::__type::__default: 999 case __format_spec::__type::__pointer_lower_case: 1000 case __format_spec::__type::__pointer_upper_case: 1001 break; 1002 1003 default: 1004 __format_spec::__throw_invalid_type_format_error(__id); 1005 } 1006 } 1007 1008 template <contiguous_iterator _Iterator> 1009 struct __column_width_result { 1010 /// The number of output columns. 1011 size_t __width_; 1012 /// One beyond the last code unit used in the estimation. 1013 /// 1014 /// This limits the original output to fit in the wanted number of columns. 1015 _Iterator __last_; 1016 }; 1017 1018 template <contiguous_iterator _Iterator> 1019 __column_width_result(size_t, _Iterator) -> __column_width_result<_Iterator>; 1020 1021 /// Since a column width can be two it's possible that the requested column 1022 /// width can't be achieved. Depending on the intended usage the policy can be 1023 /// selected. 1024 /// - When used as precision the maximum width may not be exceeded and the 1025 /// result should be "rounded down" to the previous boundary. 1026 /// - When used as a width we're done once the minimum is reached, but 1027 /// exceeding is not an issue. Rounding down is an issue since that will 1028 /// result in writing fill characters. Therefore the result needs to be 1029 /// "rounded up". 1030 enum class __column_width_rounding { __down, __up }; 1031 1032 # ifndef _LIBCPP_HAS_NO_UNICODE 1033 1034 namespace __detail { 1035 template <contiguous_iterator _Iterator> 1036 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width_grapheme_clustering( 1037 _Iterator __first, _Iterator __last, size_t __maximum, __column_width_rounding __rounding) noexcept { 1038 using _CharT = iter_value_t<_Iterator>; 1039 __unicode::__extended_grapheme_cluster_view<_CharT> __view{__first, __last}; 1040 1041 __column_width_result<_Iterator> __result{0, __first}; 1042 while (__result.__last_ != __last && __result.__width_ <= __maximum) { 1043 typename __unicode::__extended_grapheme_cluster_view<_CharT>::__cluster __cluster = __view.__consume(); 1044 int __width = __width_estimation_table::__estimated_width(__cluster.__code_point_); 1045 1046 // When the next entry would exceed the maximum width the previous width 1047 // might be returned. For example when a width of 100 is requested the 1048 // returned width might be 99, since the next code point has an estimated 1049 // column width of 2. This depends on the rounding flag. 1050 // When the maximum is exceeded the loop will abort the next iteration. 1051 if (__rounding == __column_width_rounding::__down && __result.__width_ + __width > __maximum) 1052 return __result; 1053 1054 __result.__width_ += __width; 1055 __result.__last_ = __cluster.__last_; 1056 } 1057 1058 return __result; 1059 } 1060 1061 } // namespace __detail 1062 1063 // Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32. 1064 // Depending on format the relation between the number of code units stored and 1065 // the number of output columns differs. The first relation is the number of 1066 // code units forming a code point. (The text assumes the code units are 1067 // unsigned.) 1068 // - UTF-8 The number of code units is between one and four. The first 127 1069 // Unicode code points match the ASCII character set. When the highest bit is 1070 // set it means the code point has more than one code unit. 1071 // - UTF-16: The number of code units is between 1 and 2. When the first 1072 // code unit is in the range [0xd800,0xdfff) it means the code point uses two 1073 // code units. 1074 // - UTF-32: The number of code units is always one. 1075 // 1076 // The code point to the number of columns is specified in 1077 // [format.string.std]/11. This list might change in the future. 1078 // 1079 // Another thing to be taken into account is Grapheme clustering. This means 1080 // that in some cases multiple code points are combined one element in the 1081 // output. For example: 1082 // - an ASCII character with a combined diacritical mark 1083 // - an emoji with a skin tone modifier 1084 // - a group of combined people emoji to create a family 1085 // - a combination of flag emoji 1086 // 1087 // See also: 1088 // - [format.string.general]/11 1089 // - https://en.wikipedia.org/wiki/UTF-8#Encoding 1090 // - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF 1091 1092 _LIBCPP_HIDE_FROM_ABI constexpr bool __is_ascii(char32_t __c) { return __c < 0x80; } 1093 1094 /// Determines the number of output columns needed to render the input. 1095 /// 1096 /// \note When the scanner encounters malformed Unicode it acts as-if every 1097 /// code unit is a one column code point. Typically a terminal uses the same 1098 /// strategy and replaces every malformed code unit with a one column 1099 /// replacement character. 1100 /// 1101 /// \param __first Points to the first element of the input range. 1102 /// \param __last Points beyond the last element of the input range. 1103 /// \param __maximum The maximum number of output columns. The returned number 1104 /// of estimated output columns will not exceed this value. 1105 /// \param __rounding Selects the rounding method. 1106 /// \c __down result.__width_ <= __maximum 1107 /// \c __up result.__width_ <= __maximum + 1 1108 template <class _CharT, class _Iterator = typename basic_string_view<_CharT>::const_iterator> 1109 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_column_width( 1110 basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding __rounding) noexcept { 1111 // The width estimation is done in two steps: 1112 // - Quickly process for the ASCII part. ASCII has the following properties 1113 // - One code unit is one code point 1114 // - Every code point has an estimated width of one 1115 // - When needed it will a Unicode Grapheme clustering algorithm to find 1116 // the proper place for truncation. 1117 1118 if (__str.empty() || __maximum == 0) 1119 return {0, __str.begin()}; 1120 1121 // ASCII has one caveat; when an ASCII character is followed by a non-ASCII 1122 // character they might be part of an extended grapheme cluster. For example: 1123 // an ASCII letter and a COMBINING ACUTE ACCENT 1124 // The truncate should happen after the COMBINING ACUTE ACCENT. Therefore we 1125 // need to scan one code unit beyond the requested precision. When this code 1126 // unit is non-ASCII we omit the current code unit and let the Grapheme 1127 // clustering algorithm do its work. 1128 auto __it = __str.begin(); 1129 if (__format_spec::__is_ascii(*__it)) { 1130 do { 1131 --__maximum; 1132 ++__it; 1133 if (__it == __str.end()) 1134 return {__str.size(), __str.end()}; 1135 1136 if (__maximum == 0) { 1137 if (__format_spec::__is_ascii(*__it)) 1138 return {static_cast<size_t>(__it - __str.begin()), __it}; 1139 1140 break; 1141 } 1142 } while (__format_spec::__is_ascii(*__it)); 1143 --__it; 1144 ++__maximum; 1145 } 1146 1147 ptrdiff_t __ascii_size = __it - __str.begin(); 1148 __column_width_result __result = 1149 __detail::__estimate_column_width_grapheme_clustering(__it, __str.end(), __maximum, __rounding); 1150 1151 __result.__width_ += __ascii_size; 1152 return __result; 1153 } 1154 # else // !defined(_LIBCPP_HAS_NO_UNICODE) 1155 template <class _CharT> 1156 _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<typename basic_string_view<_CharT>::const_iterator> 1157 __estimate_column_width(basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding) noexcept { 1158 // When Unicode isn't supported assume ASCII and every code unit is one code 1159 // point. In ASCII the estimated column width is always one. Thus there's no 1160 // need for rounding. 1161 size_t __width_ = _VSTD::min(__str.size(), __maximum); 1162 return {__width_, __str.begin() + __width_}; 1163 } 1164 1165 # endif // !defined(_LIBCPP_HAS_NO_UNICODE) 1166 1167 } // namespace __format_spec 1168 1169 #endif //_LIBCPP_STD_VER >= 20 1170 1171 _LIBCPP_END_NAMESPACE_STD 1172 1173 _LIBCPP_POP_MACROS 1174 1175 #endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H 1176