1 // wstring_convert implementation -*- C++ -*- 2 3 // Copyright (C) 2015-2018 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 /** @file bits/locale_conv.h 26 * This is an internal header file, included by other library headers. 27 * Do not attempt to use it directly. @headername{locale} 28 */ 29 30 #ifndef _LOCALE_CONV_H 31 #define _LOCALE_CONV_H 1 32 33 #if __cplusplus < 201103L 34 # include <bits/c++0x_warning.h> 35 #else 36 37 #include <streambuf> 38 #include <bits/stringfwd.h> 39 #include <bits/allocator.h> 40 #include <bits/codecvt.h> 41 #include <bits/unique_ptr.h> 42 43 namespace std _GLIBCXX_VISIBILITY(default) 44 { 45 _GLIBCXX_BEGIN_NAMESPACE_VERSION 46 47 /** 48 * @addtogroup locales 49 * @{ 50 */ 51 52 template<typename _OutStr, typename _InChar, typename _Codecvt, 53 typename _State, typename _Fn> 54 bool 55 __do_str_codecvt(const _InChar* __first, const _InChar* __last, 56 _OutStr& __outstr, const _Codecvt& __cvt, _State& __state, 57 size_t& __count, _Fn __fn) 58 { 59 if (__first == __last) 60 { 61 __outstr.clear(); 62 __count = 0; 63 return true; 64 } 65 66 size_t __outchars = 0; 67 auto __next = __first; 68 const auto __maxlen = __cvt.max_length() + 1; 69 70 codecvt_base::result __result; 71 do 72 { 73 __outstr.resize(__outstr.size() + (__last - __next) * __maxlen); 74 auto __outnext = &__outstr.front() + __outchars; 75 auto const __outlast = &__outstr.back() + 1; 76 __result = (__cvt.*__fn)(__state, __next, __last, __next, 77 __outnext, __outlast, __outnext); 78 __outchars = __outnext - &__outstr.front(); 79 } 80 while (__result == codecvt_base::partial && __next != __last 81 && (__outstr.size() - __outchars) < __maxlen); 82 83 if (__result == codecvt_base::error) 84 { 85 __count = __next - __first; 86 return false; 87 } 88 89 if (__result == codecvt_base::noconv) 90 { 91 __outstr.assign(__first, __last); 92 __count = __last - __first; 93 } 94 else 95 { 96 __outstr.resize(__outchars); 97 __count = __next - __first; 98 } 99 100 return true; 101 } 102 103 // Convert narrow character string to wide. 104 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 105 inline bool 106 __str_codecvt_in(const char* __first, const char* __last, 107 basic_string<_CharT, _Traits, _Alloc>& __outstr, 108 const codecvt<_CharT, char, _State>& __cvt, 109 _State& __state, size_t& __count) 110 { 111 using _Codecvt = codecvt<_CharT, char, _State>; 112 using _ConvFn 113 = codecvt_base::result 114 (_Codecvt::*)(_State&, const char*, const char*, const char*&, 115 _CharT*, _CharT*, _CharT*&) const; 116 _ConvFn __fn = &codecvt<_CharT, char, _State>::in; 117 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, 118 __count, __fn); 119 } 120 121 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 122 inline bool 123 __str_codecvt_in(const char* __first, const char* __last, 124 basic_string<_CharT, _Traits, _Alloc>& __outstr, 125 const codecvt<_CharT, char, _State>& __cvt) 126 { 127 _State __state = {}; 128 size_t __n; 129 return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n); 130 } 131 132 // Convert wide character string to narrow. 133 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 134 inline bool 135 __str_codecvt_out(const _CharT* __first, const _CharT* __last, 136 basic_string<char, _Traits, _Alloc>& __outstr, 137 const codecvt<_CharT, char, _State>& __cvt, 138 _State& __state, size_t& __count) 139 { 140 using _Codecvt = codecvt<_CharT, char, _State>; 141 using _ConvFn 142 = codecvt_base::result 143 (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&, 144 char*, char*, char*&) const; 145 _ConvFn __fn = &codecvt<_CharT, char, _State>::out; 146 return __do_str_codecvt(__first, __last, __outstr, __cvt, __state, 147 __count, __fn); 148 } 149 150 template<typename _CharT, typename _Traits, typename _Alloc, typename _State> 151 inline bool 152 __str_codecvt_out(const _CharT* __first, const _CharT* __last, 153 basic_string<char, _Traits, _Alloc>& __outstr, 154 const codecvt<_CharT, char, _State>& __cvt) 155 { 156 _State __state = {}; 157 size_t __n; 158 return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n); 159 } 160 161 #ifdef _GLIBCXX_USE_WCHAR_T 162 163 _GLIBCXX_BEGIN_NAMESPACE_CXX11 164 165 /// String conversions 166 template<typename _Codecvt, typename _Elem = wchar_t, 167 typename _Wide_alloc = allocator<_Elem>, 168 typename _Byte_alloc = allocator<char>> 169 class wstring_convert 170 { 171 public: 172 typedef basic_string<char, char_traits<char>, _Byte_alloc> byte_string; 173 typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string; 174 typedef typename _Codecvt::state_type state_type; 175 typedef typename wide_string::traits_type::int_type int_type; 176 177 /** Default constructor. 178 * 179 * @param __pcvt The facet to use for conversions. 180 * 181 * Takes ownership of @p __pcvt and will delete it in the destructor. 182 */ 183 explicit 184 wstring_convert(_Codecvt* __pcvt = new _Codecvt()) : _M_cvt(__pcvt) 185 { 186 if (!_M_cvt) 187 __throw_logic_error("wstring_convert"); 188 } 189 190 /** Construct with an initial converstion state. 191 * 192 * @param __pcvt The facet to use for conversions. 193 * @param __state Initial conversion state. 194 * 195 * Takes ownership of @p __pcvt and will delete it in the destructor. 196 * The object's conversion state will persist between conversions. 197 */ 198 wstring_convert(_Codecvt* __pcvt, state_type __state) 199 : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true) 200 { 201 if (!_M_cvt) 202 __throw_logic_error("wstring_convert"); 203 } 204 205 /** Construct with error strings. 206 * 207 * @param __byte_err A string to return on failed conversions. 208 * @param __wide_err A wide string to return on failed conversions. 209 */ 210 explicit 211 wstring_convert(const byte_string& __byte_err, 212 const wide_string& __wide_err = wide_string()) 213 : _M_cvt(new _Codecvt), 214 _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err), 215 _M_with_strings(true) 216 { 217 if (!_M_cvt) 218 __throw_logic_error("wstring_convert"); 219 } 220 221 ~wstring_convert() = default; 222 223 // _GLIBCXX_RESOLVE_LIB_DEFECTS 224 // 2176. Special members for wstring_convert and wbuffer_convert 225 wstring_convert(const wstring_convert&) = delete; 226 wstring_convert& operator=(const wstring_convert&) = delete; 227 228 /// @{ Convert from bytes. 229 wide_string 230 from_bytes(char __byte) 231 { 232 char __bytes[2] = { __byte }; 233 return from_bytes(__bytes, __bytes+1); 234 } 235 236 wide_string 237 from_bytes(const char* __ptr) 238 { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); } 239 240 wide_string 241 from_bytes(const byte_string& __str) 242 { 243 auto __ptr = __str.data(); 244 return from_bytes(__ptr, __ptr + __str.size()); 245 } 246 247 wide_string 248 from_bytes(const char* __first, const char* __last) 249 { 250 if (!_M_with_cvtstate) 251 _M_state = state_type(); 252 wide_string __out{ _M_wide_err_string.get_allocator() }; 253 if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state, 254 _M_count)) 255 return __out; 256 if (_M_with_strings) 257 return _M_wide_err_string; 258 __throw_range_error("wstring_convert::from_bytes"); 259 } 260 /// @} 261 262 /// @{ Convert to bytes. 263 byte_string 264 to_bytes(_Elem __wchar) 265 { 266 _Elem __wchars[2] = { __wchar }; 267 return to_bytes(__wchars, __wchars+1); 268 } 269 270 byte_string 271 to_bytes(const _Elem* __ptr) 272 { 273 return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr)); 274 } 275 276 byte_string 277 to_bytes(const wide_string& __wstr) 278 { 279 auto __ptr = __wstr.data(); 280 return to_bytes(__ptr, __ptr + __wstr.size()); 281 } 282 283 byte_string 284 to_bytes(const _Elem* __first, const _Elem* __last) 285 { 286 if (!_M_with_cvtstate) 287 _M_state = state_type(); 288 byte_string __out{ _M_byte_err_string.get_allocator() }; 289 if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state, 290 _M_count)) 291 return __out; 292 if (_M_with_strings) 293 return _M_byte_err_string; 294 __throw_range_error("wstring_convert::to_bytes"); 295 } 296 /// @} 297 298 // _GLIBCXX_RESOLVE_LIB_DEFECTS 299 // 2174. wstring_convert::converted() should be noexcept 300 /// The number of elements successfully converted in the last conversion. 301 size_t converted() const noexcept { return _M_count; } 302 303 /// The final conversion state of the last conversion. 304 state_type state() const { return _M_state; } 305 306 private: 307 unique_ptr<_Codecvt> _M_cvt; 308 byte_string _M_byte_err_string; 309 wide_string _M_wide_err_string; 310 state_type _M_state = state_type(); 311 size_t _M_count = 0; 312 bool _M_with_cvtstate = false; 313 bool _M_with_strings = false; 314 }; 315 316 _GLIBCXX_END_NAMESPACE_CXX11 317 318 /// Buffer conversions 319 template<typename _Codecvt, typename _Elem = wchar_t, 320 typename _Tr = char_traits<_Elem>> 321 class wbuffer_convert : public basic_streambuf<_Elem, _Tr> 322 { 323 typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf; 324 325 public: 326 typedef typename _Codecvt::state_type state_type; 327 328 /** Default constructor. 329 * 330 * @param __bytebuf The underlying byte stream buffer. 331 * @param __pcvt The facet to use for conversions. 332 * @param __state Initial conversion state. 333 * 334 * Takes ownership of @p __pcvt and will delete it in the destructor. 335 */ 336 explicit 337 wbuffer_convert(streambuf* __bytebuf = 0, _Codecvt* __pcvt = new _Codecvt, 338 state_type __state = state_type()) 339 : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state) 340 { 341 if (!_M_cvt) 342 __throw_logic_error("wbuffer_convert"); 343 344 _M_always_noconv = _M_cvt->always_noconv(); 345 346 if (_M_buf) 347 { 348 this->setp(_M_put_area, _M_put_area + _S_buffer_length); 349 this->setg(_M_get_area + _S_putback_length, 350 _M_get_area + _S_putback_length, 351 _M_get_area + _S_putback_length); 352 } 353 } 354 355 ~wbuffer_convert() = default; 356 357 // _GLIBCXX_RESOLVE_LIB_DEFECTS 358 // 2176. Special members for wstring_convert and wbuffer_convert 359 wbuffer_convert(const wbuffer_convert&) = delete; 360 wbuffer_convert& operator=(const wbuffer_convert&) = delete; 361 362 streambuf* rdbuf() const noexcept { return _M_buf; } 363 364 streambuf* 365 rdbuf(streambuf *__bytebuf) noexcept 366 { 367 auto __prev = _M_buf; 368 _M_buf = __bytebuf; 369 return __prev; 370 } 371 372 /// The conversion state following the last conversion. 373 state_type state() const noexcept { return _M_state; } 374 375 protected: 376 int 377 sync() 378 { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; } 379 380 typename _Wide_streambuf::int_type 381 overflow(typename _Wide_streambuf::int_type __out) 382 { 383 if (!_M_buf || !_M_conv_put()) 384 return _Tr::eof(); 385 else if (!_Tr::eq_int_type(__out, _Tr::eof())) 386 return this->sputc(__out); 387 return _Tr::not_eof(__out); 388 } 389 390 typename _Wide_streambuf::int_type 391 underflow() 392 { 393 if (!_M_buf) 394 return _Tr::eof(); 395 396 if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get())) 397 return _Tr::to_int_type(*this->gptr()); 398 else 399 return _Tr::eof(); 400 } 401 402 streamsize 403 xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n) 404 { 405 if (!_M_buf || __n == 0) 406 return 0; 407 streamsize __done = 0; 408 do 409 { 410 auto __nn = std::min<streamsize>(this->epptr() - this->pptr(), 411 __n - __done); 412 _Tr::copy(this->pptr(), __s + __done, __nn); 413 this->pbump(__nn); 414 __done += __nn; 415 } while (__done < __n && _M_conv_put()); 416 return __done; 417 } 418 419 private: 420 // fill the get area from converted contents of the byte stream buffer 421 bool 422 _M_conv_get() 423 { 424 const streamsize __pb1 = this->gptr() - this->eback(); 425 const streamsize __pb2 = _S_putback_length; 426 const streamsize __npb = std::min(__pb1, __pb2); 427 428 _Tr::move(_M_get_area + _S_putback_length - __npb, 429 this->gptr() - __npb, __npb); 430 431 streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv; 432 __nbytes = std::min(__nbytes, _M_buf->in_avail()); 433 if (__nbytes < 1) 434 __nbytes = 1; 435 __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes); 436 if (__nbytes < 1) 437 return false; 438 __nbytes += _M_unconv; 439 440 // convert _M_get_buf into _M_get_area 441 442 _Elem* __outbuf = _M_get_area + _S_putback_length; 443 _Elem* __outnext = __outbuf; 444 const char* __bnext = _M_get_buf; 445 446 codecvt_base::result __result; 447 if (_M_always_noconv) 448 __result = codecvt_base::noconv; 449 else 450 { 451 _Elem* __outend = _M_get_area + _S_buffer_length; 452 453 __result = _M_cvt->in(_M_state, 454 __bnext, __bnext + __nbytes, __bnext, 455 __outbuf, __outend, __outnext); 456 } 457 458 if (__result == codecvt_base::noconv) 459 { 460 // cast is safe because noconv means _Elem is same type as char 461 auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf); 462 _Tr::copy(__outbuf, __get_buf, __nbytes); 463 _M_unconv = 0; 464 return true; 465 } 466 467 if ((_M_unconv = _M_get_buf + __nbytes - __bnext)) 468 char_traits<char>::move(_M_get_buf, __bnext, _M_unconv); 469 470 this->setg(__outbuf, __outbuf, __outnext); 471 472 return __result != codecvt_base::error; 473 } 474 475 // unused 476 bool 477 _M_put(...) 478 { return false; } 479 480 bool 481 _M_put(const char* __p, streamsize __n) 482 { 483 if (_M_buf->sputn(__p, __n) < __n) 484 return false; 485 return true; 486 } 487 488 // convert the put area and write to the byte stream buffer 489 bool 490 _M_conv_put() 491 { 492 _Elem* const __first = this->pbase(); 493 const _Elem* const __last = this->pptr(); 494 const streamsize __pending = __last - __first; 495 496 if (_M_always_noconv) 497 return _M_put(__first, __pending); 498 499 char __outbuf[2 * _S_buffer_length]; 500 501 const _Elem* __next = __first; 502 const _Elem* __start; 503 do 504 { 505 __start = __next; 506 char* __outnext = __outbuf; 507 char* const __outlast = __outbuf + sizeof(__outbuf); 508 auto __result = _M_cvt->out(_M_state, __next, __last, __next, 509 __outnext, __outlast, __outnext); 510 if (__result == codecvt_base::error) 511 return false; 512 else if (__result == codecvt_base::noconv) 513 return _M_put(__next, __pending); 514 515 if (!_M_put(__outbuf, __outnext - __outbuf)) 516 return false; 517 } 518 while (__next != __last && __next != __start); 519 520 if (__next != __last) 521 _Tr::move(__first, __next, __last - __next); 522 523 this->pbump(__first - __next); 524 return __next != __first; 525 } 526 527 streambuf* _M_buf; 528 unique_ptr<_Codecvt> _M_cvt; 529 state_type _M_state; 530 531 static const streamsize _S_buffer_length = 32; 532 static const streamsize _S_putback_length = 3; 533 _Elem _M_put_area[_S_buffer_length]; 534 _Elem _M_get_area[_S_buffer_length]; 535 streamsize _M_unconv = 0; 536 char _M_get_buf[_S_buffer_length-_S_putback_length]; 537 bool _M_always_noconv; 538 }; 539 540 #endif // _GLIBCXX_USE_WCHAR_T 541 542 /// @} group locales 543 544 _GLIBCXX_END_NAMESPACE_VERSION 545 } // namespace 546 547 #endif // __cplusplus 548 549 #endif /* _LOCALE_CONV_H */ 550