1 // Locale support (codecvt) -*- C++ -*- 2 3 // Copyright (C) 2000-2018 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 /** @file bits/codecvt.h 26 * This is an internal header file, included by other library headers. 27 * Do not attempt to use it directly. @headername{locale} 28 */ 29 30 // 31 // ISO C++ 14882: 22.2.1.5 Template class codecvt 32 // 33 34 // Written by Benjamin Kosnik <bkoz@redhat.com> 35 36 #ifndef _CODECVT_H 37 #define _CODECVT_H 1 38 39 #pragma GCC system_header 40 41 namespace std _GLIBCXX_VISIBILITY(default) 42 { 43 _GLIBCXX_BEGIN_NAMESPACE_VERSION 44 45 /// Empty base class for codecvt facet [22.2.1.5]. 46 class codecvt_base 47 { 48 public: 49 enum result 50 { 51 ok, 52 partial, 53 error, 54 noconv 55 }; 56 }; 57 58 /** 59 * @brief Common base for codecvt functions. 60 * 61 * This template class provides implementations of the public functions 62 * that forward to the protected virtual functions. 63 * 64 * This template also provides abstract stubs for the protected virtual 65 * functions. 66 */ 67 template<typename _InternT, typename _ExternT, typename _StateT> 68 class __codecvt_abstract_base 69 : public locale::facet, public codecvt_base 70 { 71 public: 72 // Types: 73 typedef codecvt_base::result result; 74 typedef _InternT intern_type; 75 typedef _ExternT extern_type; 76 typedef _StateT state_type; 77 78 // 22.2.1.5.1 codecvt members 79 /** 80 * @brief Convert from internal to external character set. 81 * 82 * Converts input string of intern_type to output string of 83 * extern_type. This is analogous to wcsrtombs. It does this by 84 * calling codecvt::do_out. 85 * 86 * The source and destination character sets are determined by the 87 * facet's locale, internal and external types. 88 * 89 * The characters in [from,from_end) are converted and written to 90 * [to,to_end). from_next and to_next are set to point to the 91 * character following the last successfully converted character, 92 * respectively. If the result needed no conversion, from_next and 93 * to_next are not affected. 94 * 95 * The @a state argument should be initialized if the input is at the 96 * beginning and carried from a previous call if continuing 97 * conversion. There are no guarantees about how @a state is used. 98 * 99 * The result returned is a member of codecvt_base::result. If 100 * all the input is converted, returns codecvt_base::ok. If no 101 * conversion is necessary, returns codecvt_base::noconv. If 102 * the input ends early or there is insufficient space in the 103 * output, returns codecvt_base::partial. Otherwise the 104 * conversion failed and codecvt_base::error is returned. 105 * 106 * @param __state Persistent conversion state data. 107 * @param __from Start of input. 108 * @param __from_end End of input. 109 * @param __from_next Returns start of unconverted data. 110 * @param __to Start of output buffer. 111 * @param __to_end End of output buffer. 112 * @param __to_next Returns start of unused output area. 113 * @return codecvt_base::result. 114 */ 115 result 116 out(state_type& __state, const intern_type* __from, 117 const intern_type* __from_end, const intern_type*& __from_next, 118 extern_type* __to, extern_type* __to_end, 119 extern_type*& __to_next) const 120 { 121 return this->do_out(__state, __from, __from_end, __from_next, 122 __to, __to_end, __to_next); 123 } 124 125 /** 126 * @brief Reset conversion state. 127 * 128 * Writes characters to output that would restore @a state to initial 129 * conditions. The idea is that if a partial conversion occurs, then 130 * the converting the characters written by this function would leave 131 * the state in initial conditions, rather than partial conversion 132 * state. It does this by calling codecvt::do_unshift(). 133 * 134 * For example, if 4 external characters always converted to 1 internal 135 * character, and input to in() had 6 external characters with state 136 * saved, this function would write two characters to the output and 137 * set the state to initialized conditions. 138 * 139 * The source and destination character sets are determined by the 140 * facet's locale, internal and external types. 141 * 142 * The result returned is a member of codecvt_base::result. If the 143 * state could be reset and data written, returns codecvt_base::ok. If 144 * no conversion is necessary, returns codecvt_base::noconv. If the 145 * output has insufficient space, returns codecvt_base::partial. 146 * Otherwise the reset failed and codecvt_base::error is returned. 147 * 148 * @param __state Persistent conversion state data. 149 * @param __to Start of output buffer. 150 * @param __to_end End of output buffer. 151 * @param __to_next Returns start of unused output area. 152 * @return codecvt_base::result. 153 */ 154 result 155 unshift(state_type& __state, extern_type* __to, extern_type* __to_end, 156 extern_type*& __to_next) const 157 { return this->do_unshift(__state, __to,__to_end,__to_next); } 158 159 /** 160 * @brief Convert from external to internal character set. 161 * 162 * Converts input string of extern_type to output string of 163 * intern_type. This is analogous to mbsrtowcs. It does this by 164 * calling codecvt::do_in. 165 * 166 * The source and destination character sets are determined by the 167 * facet's locale, internal and external types. 168 * 169 * The characters in [from,from_end) are converted and written to 170 * [to,to_end). from_next and to_next are set to point to the 171 * character following the last successfully converted character, 172 * respectively. If the result needed no conversion, from_next and 173 * to_next are not affected. 174 * 175 * The @a state argument should be initialized if the input is at the 176 * beginning and carried from a previous call if continuing 177 * conversion. There are no guarantees about how @a state is used. 178 * 179 * The result returned is a member of codecvt_base::result. If 180 * all the input is converted, returns codecvt_base::ok. If no 181 * conversion is necessary, returns codecvt_base::noconv. If 182 * the input ends early or there is insufficient space in the 183 * output, returns codecvt_base::partial. Otherwise the 184 * conversion failed and codecvt_base::error is returned. 185 * 186 * @param __state Persistent conversion state data. 187 * @param __from Start of input. 188 * @param __from_end End of input. 189 * @param __from_next Returns start of unconverted data. 190 * @param __to Start of output buffer. 191 * @param __to_end End of output buffer. 192 * @param __to_next Returns start of unused output area. 193 * @return codecvt_base::result. 194 */ 195 result 196 in(state_type& __state, const extern_type* __from, 197 const extern_type* __from_end, const extern_type*& __from_next, 198 intern_type* __to, intern_type* __to_end, 199 intern_type*& __to_next) const 200 { 201 return this->do_in(__state, __from, __from_end, __from_next, 202 __to, __to_end, __to_next); 203 } 204 205 int 206 encoding() const throw() 207 { return this->do_encoding(); } 208 209 bool 210 always_noconv() const throw() 211 { return this->do_always_noconv(); } 212 213 int 214 length(state_type& __state, const extern_type* __from, 215 const extern_type* __end, size_t __max) const 216 { return this->do_length(__state, __from, __end, __max); } 217 218 int 219 max_length() const throw() 220 { return this->do_max_length(); } 221 222 protected: 223 explicit 224 __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { } 225 226 virtual 227 ~__codecvt_abstract_base() { } 228 229 /** 230 * @brief Convert from internal to external character set. 231 * 232 * Converts input string of intern_type to output string of 233 * extern_type. This function is a hook for derived classes to change 234 * the value returned. @see out for more information. 235 */ 236 virtual result 237 do_out(state_type& __state, const intern_type* __from, 238 const intern_type* __from_end, const intern_type*& __from_next, 239 extern_type* __to, extern_type* __to_end, 240 extern_type*& __to_next) const = 0; 241 242 virtual result 243 do_unshift(state_type& __state, extern_type* __to, 244 extern_type* __to_end, extern_type*& __to_next) const = 0; 245 246 virtual result 247 do_in(state_type& __state, const extern_type* __from, 248 const extern_type* __from_end, const extern_type*& __from_next, 249 intern_type* __to, intern_type* __to_end, 250 intern_type*& __to_next) const = 0; 251 252 virtual int 253 do_encoding() const throw() = 0; 254 255 virtual bool 256 do_always_noconv() const throw() = 0; 257 258 virtual int 259 do_length(state_type&, const extern_type* __from, 260 const extern_type* __end, size_t __max) const = 0; 261 262 virtual int 263 do_max_length() const throw() = 0; 264 }; 265 266 /** 267 * @brief Primary class template codecvt. 268 * @ingroup locales 269 * 270 * NB: Generic, mostly useless implementation. 271 * 272 */ 273 template<typename _InternT, typename _ExternT, typename _StateT> 274 class codecvt 275 : public __codecvt_abstract_base<_InternT, _ExternT, _StateT> 276 { 277 public: 278 // Types: 279 typedef codecvt_base::result result; 280 typedef _InternT intern_type; 281 typedef _ExternT extern_type; 282 typedef _StateT state_type; 283 284 protected: 285 __c_locale _M_c_locale_codecvt; 286 287 public: 288 static locale::id id; 289 290 explicit 291 codecvt(size_t __refs = 0) 292 : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs), 293 _M_c_locale_codecvt(0) 294 { } 295 296 explicit 297 codecvt(__c_locale __cloc, size_t __refs = 0); 298 299 protected: 300 virtual 301 ~codecvt() { } 302 303 virtual result 304 do_out(state_type& __state, const intern_type* __from, 305 const intern_type* __from_end, const intern_type*& __from_next, 306 extern_type* __to, extern_type* __to_end, 307 extern_type*& __to_next) const; 308 309 virtual result 310 do_unshift(state_type& __state, extern_type* __to, 311 extern_type* __to_end, extern_type*& __to_next) const; 312 313 virtual result 314 do_in(state_type& __state, const extern_type* __from, 315 const extern_type* __from_end, const extern_type*& __from_next, 316 intern_type* __to, intern_type* __to_end, 317 intern_type*& __to_next) const; 318 319 virtual int 320 do_encoding() const throw(); 321 322 virtual bool 323 do_always_noconv() const throw(); 324 325 virtual int 326 do_length(state_type&, const extern_type* __from, 327 const extern_type* __end, size_t __max) const; 328 329 virtual int 330 do_max_length() const throw(); 331 }; 332 333 template<typename _InternT, typename _ExternT, typename _StateT> 334 locale::id codecvt<_InternT, _ExternT, _StateT>::id; 335 336 /// class codecvt<char, char, mbstate_t> specialization. 337 template<> 338 class codecvt<char, char, mbstate_t> 339 : public __codecvt_abstract_base<char, char, mbstate_t> 340 { 341 friend class messages<char>; 342 343 public: 344 // Types: 345 typedef char intern_type; 346 typedef char extern_type; 347 typedef mbstate_t state_type; 348 349 protected: 350 __c_locale _M_c_locale_codecvt; 351 352 public: 353 static locale::id id; 354 355 explicit 356 codecvt(size_t __refs = 0); 357 358 explicit 359 codecvt(__c_locale __cloc, size_t __refs = 0); 360 361 protected: 362 virtual 363 ~codecvt(); 364 365 virtual result 366 do_out(state_type& __state, const intern_type* __from, 367 const intern_type* __from_end, const intern_type*& __from_next, 368 extern_type* __to, extern_type* __to_end, 369 extern_type*& __to_next) const; 370 371 virtual result 372 do_unshift(state_type& __state, extern_type* __to, 373 extern_type* __to_end, extern_type*& __to_next) const; 374 375 virtual result 376 do_in(state_type& __state, const extern_type* __from, 377 const extern_type* __from_end, const extern_type*& __from_next, 378 intern_type* __to, intern_type* __to_end, 379 intern_type*& __to_next) const; 380 381 virtual int 382 do_encoding() const throw(); 383 384 virtual bool 385 do_always_noconv() const throw(); 386 387 virtual int 388 do_length(state_type&, const extern_type* __from, 389 const extern_type* __end, size_t __max) const; 390 391 virtual int 392 do_max_length() const throw(); 393 }; 394 395 #ifdef _GLIBCXX_USE_WCHAR_T 396 /** @brief Class codecvt<wchar_t, char, mbstate_t> specialization. 397 * 398 * Converts between narrow and wide characters in the native character set 399 */ 400 template<> 401 class codecvt<wchar_t, char, mbstate_t> 402 : public __codecvt_abstract_base<wchar_t, char, mbstate_t> 403 { 404 friend class messages<wchar_t>; 405 406 public: 407 // Types: 408 typedef wchar_t intern_type; 409 typedef char extern_type; 410 typedef mbstate_t state_type; 411 412 protected: 413 __c_locale _M_c_locale_codecvt; 414 415 public: 416 static locale::id id; 417 418 explicit 419 codecvt(size_t __refs = 0); 420 421 explicit 422 codecvt(__c_locale __cloc, size_t __refs = 0); 423 424 protected: 425 virtual 426 ~codecvt(); 427 428 virtual result 429 do_out(state_type& __state, const intern_type* __from, 430 const intern_type* __from_end, const intern_type*& __from_next, 431 extern_type* __to, extern_type* __to_end, 432 extern_type*& __to_next) const; 433 434 virtual result 435 do_unshift(state_type& __state, 436 extern_type* __to, extern_type* __to_end, 437 extern_type*& __to_next) const; 438 439 virtual result 440 do_in(state_type& __state, 441 const extern_type* __from, const extern_type* __from_end, 442 const extern_type*& __from_next, 443 intern_type* __to, intern_type* __to_end, 444 intern_type*& __to_next) const; 445 446 virtual 447 int do_encoding() const throw(); 448 449 virtual 450 bool do_always_noconv() const throw(); 451 452 virtual 453 int do_length(state_type&, const extern_type* __from, 454 const extern_type* __end, size_t __max) const; 455 456 virtual int 457 do_max_length() const throw(); 458 }; 459 #endif //_GLIBCXX_USE_WCHAR_T 460 461 #if __cplusplus >= 201103L 462 #ifdef _GLIBCXX_USE_C99_STDINT_TR1 463 /** @brief Class codecvt<char16_t, char, mbstate_t> specialization. 464 * 465 * Converts between UTF-16 and UTF-8. 466 */ 467 template<> 468 class codecvt<char16_t, char, mbstate_t> 469 : public __codecvt_abstract_base<char16_t, char, mbstate_t> 470 { 471 public: 472 // Types: 473 typedef char16_t intern_type; 474 typedef char extern_type; 475 typedef mbstate_t state_type; 476 477 public: 478 static locale::id id; 479 480 explicit 481 codecvt(size_t __refs = 0) 482 : __codecvt_abstract_base<char16_t, char, mbstate_t>(__refs) { } 483 484 protected: 485 virtual 486 ~codecvt(); 487 488 virtual result 489 do_out(state_type& __state, const intern_type* __from, 490 const intern_type* __from_end, const intern_type*& __from_next, 491 extern_type* __to, extern_type* __to_end, 492 extern_type*& __to_next) const; 493 494 virtual result 495 do_unshift(state_type& __state, 496 extern_type* __to, extern_type* __to_end, 497 extern_type*& __to_next) const; 498 499 virtual result 500 do_in(state_type& __state, 501 const extern_type* __from, const extern_type* __from_end, 502 const extern_type*& __from_next, 503 intern_type* __to, intern_type* __to_end, 504 intern_type*& __to_next) const; 505 506 virtual 507 int do_encoding() const throw(); 508 509 virtual 510 bool do_always_noconv() const throw(); 511 512 virtual 513 int do_length(state_type&, const extern_type* __from, 514 const extern_type* __end, size_t __max) const; 515 516 virtual int 517 do_max_length() const throw(); 518 }; 519 520 /** @brief Class codecvt<char32_t, char, mbstate_t> specialization. 521 * 522 * Converts between UTF-32 and UTF-8. 523 */ 524 template<> 525 class codecvt<char32_t, char, mbstate_t> 526 : public __codecvt_abstract_base<char32_t, char, mbstate_t> 527 { 528 public: 529 // Types: 530 typedef char32_t intern_type; 531 typedef char extern_type; 532 typedef mbstate_t state_type; 533 534 public: 535 static locale::id id; 536 537 explicit 538 codecvt(size_t __refs = 0) 539 : __codecvt_abstract_base<char32_t, char, mbstate_t>(__refs) { } 540 541 protected: 542 virtual 543 ~codecvt(); 544 545 virtual result 546 do_out(state_type& __state, const intern_type* __from, 547 const intern_type* __from_end, const intern_type*& __from_next, 548 extern_type* __to, extern_type* __to_end, 549 extern_type*& __to_next) const; 550 551 virtual result 552 do_unshift(state_type& __state, 553 extern_type* __to, extern_type* __to_end, 554 extern_type*& __to_next) const; 555 556 virtual result 557 do_in(state_type& __state, 558 const extern_type* __from, const extern_type* __from_end, 559 const extern_type*& __from_next, 560 intern_type* __to, intern_type* __to_end, 561 intern_type*& __to_next) const; 562 563 virtual 564 int do_encoding() const throw(); 565 566 virtual 567 bool do_always_noconv() const throw(); 568 569 virtual 570 int do_length(state_type&, const extern_type* __from, 571 const extern_type* __end, size_t __max) const; 572 573 virtual int 574 do_max_length() const throw(); 575 }; 576 577 #endif // _GLIBCXX_USE_C99_STDINT_TR1 578 #endif // C++11 579 580 /// class codecvt_byname [22.2.1.6]. 581 template<typename _InternT, typename _ExternT, typename _StateT> 582 class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT> 583 { 584 public: 585 explicit 586 codecvt_byname(const char* __s, size_t __refs = 0) 587 : codecvt<_InternT, _ExternT, _StateT>(__refs) 588 { 589 if (__builtin_strcmp(__s, "C") != 0 590 && __builtin_strcmp(__s, "POSIX") != 0) 591 { 592 this->_S_destroy_c_locale(this->_M_c_locale_codecvt); 593 this->_S_create_c_locale(this->_M_c_locale_codecvt, __s); 594 } 595 } 596 597 #if __cplusplus >= 201103L 598 explicit 599 codecvt_byname(const string& __s, size_t __refs = 0) 600 : codecvt_byname(__s.c_str(), __refs) { } 601 #endif 602 603 protected: 604 virtual 605 ~codecvt_byname() { } 606 }; 607 608 #if __cplusplus >= 201103L && defined(_GLIBCXX_USE_C99_STDINT_TR1) 609 template<> 610 class codecvt_byname<char16_t, char, mbstate_t> 611 : public codecvt<char16_t, char, mbstate_t> 612 { 613 public: 614 explicit 615 codecvt_byname(const char*, size_t __refs = 0) 616 : codecvt<char16_t, char, mbstate_t>(__refs) { } 617 618 explicit 619 codecvt_byname(const string& __s, size_t __refs = 0) 620 : codecvt_byname(__s.c_str(), __refs) { } 621 622 protected: 623 virtual 624 ~codecvt_byname() { } 625 }; 626 627 template<> 628 class codecvt_byname<char32_t, char, mbstate_t> 629 : public codecvt<char32_t, char, mbstate_t> 630 { 631 public: 632 explicit 633 codecvt_byname(const char*, size_t __refs = 0) 634 : codecvt<char32_t, char, mbstate_t>(__refs) { } 635 636 explicit 637 codecvt_byname(const string& __s, size_t __refs = 0) 638 : codecvt_byname(__s.c_str(), __refs) { } 639 640 protected: 641 virtual 642 ~codecvt_byname() { } 643 }; 644 #endif 645 646 // Inhibit implicit instantiations for required instantiations, 647 // which are defined via explicit instantiations elsewhere. 648 #if _GLIBCXX_EXTERN_TEMPLATE 649 extern template class codecvt_byname<char, char, mbstate_t>; 650 651 extern template 652 const codecvt<char, char, mbstate_t>& 653 use_facet<codecvt<char, char, mbstate_t> >(const locale&); 654 655 extern template 656 bool 657 has_facet<codecvt<char, char, mbstate_t> >(const locale&); 658 659 #ifdef _GLIBCXX_USE_WCHAR_T 660 extern template class codecvt_byname<wchar_t, char, mbstate_t>; 661 662 extern template 663 const codecvt<wchar_t, char, mbstate_t>& 664 use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&); 665 666 extern template 667 bool 668 has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&); 669 #endif 670 671 #if __cplusplus >= 201103L && defined(_GLIBCXX_USE_C99_STDINT_TR1) 672 extern template class codecvt_byname<char16_t, char, mbstate_t>; 673 extern template class codecvt_byname<char32_t, char, mbstate_t>; 674 #endif 675 676 #endif 677 678 _GLIBCXX_END_NAMESPACE_VERSION 679 } // namespace std 680 681 #endif // _CODECVT_H 682