1 // wstring_convert implementation -*- C++ -*-
2 
3 // Copyright (C) 2015-2021 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /** @file bits/locale_conv.h
26  *  This is an internal header file, included by other library headers.
27  *  Do not attempt to use it directly. @headername{locale}
28  */
29 
30 #ifndef _LOCALE_CONV_H
31 #define _LOCALE_CONV_H 1
32 
33 #if __cplusplus < 201103L
34 # include <bits/c++0x_warning.h>
35 #else
36 
37 #include <streambuf>
38 #include <bits/stringfwd.h>
39 #include <bits/allocator.h>
40 #include <bits/codecvt.h>
41 #include <bits/unique_ptr.h>
42 
_GLIBCXX_VISIBILITY(default)43 namespace std _GLIBCXX_VISIBILITY(default)
44 {
45 _GLIBCXX_BEGIN_NAMESPACE_VERSION
46 
47   /**
48    * @addtogroup locales
49    * @{
50    */
51 
52   template<typename _OutStr, typename _InChar, typename _Codecvt,
53 	   typename _State, typename _Fn>
54     bool
55     __do_str_codecvt(const _InChar* __first, const _InChar* __last,
56 		     _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
57 		     size_t& __count, _Fn __fn)
58     {
59       if (__first == __last)
60 	{
61 	  __outstr.clear();
62 	  __count = 0;
63 	  return true;
64 	}
65 
66       size_t __outchars = 0;
67       auto __next = __first;
68       const auto __maxlen = __cvt.max_length() + 1;
69 
70       codecvt_base::result __result;
71       do
72 	{
73 	  __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
74 	  auto __outnext = &__outstr.front() + __outchars;
75 	  auto const __outlast = &__outstr.back() + 1;
76 	  __result = (__cvt.*__fn)(__state, __next, __last, __next,
77 					__outnext, __outlast, __outnext);
78 	  __outchars = __outnext - &__outstr.front();
79 	}
80       while (__result == codecvt_base::partial && __next != __last
81 	     && ptrdiff_t(__outstr.size() - __outchars) < __maxlen);
82 
83       if (__result == codecvt_base::error)
84 	{
85 	  __count = __next - __first;
86 	  return false;
87 	}
88 
89       // The codecvt facet will only return noconv when the types are
90       // the same, so avoid instantiating basic_string::assign otherwise
91       if _GLIBCXX17_CONSTEXPR (is_same<typename _Codecvt::intern_type,
92 				       typename _Codecvt::extern_type>())
93 	if (__result == codecvt_base::noconv)
94 	  {
95 	    __outstr.assign(__first, __last);
96 	    __count = __last - __first;
97 	    return true;
98 	  }
99 
100       __outstr.resize(__outchars);
101       __count = __next - __first;
102       return true;
103     }
104 
105   // Convert narrow character string to wide.
106   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
107     inline bool
108     __str_codecvt_in(const char* __first, const char* __last,
109 		     basic_string<_CharT, _Traits, _Alloc>& __outstr,
110 		     const codecvt<_CharT, char, _State>& __cvt,
111 		     _State& __state, size_t& __count)
112     {
113       using _Codecvt = codecvt<_CharT, char, _State>;
114       using _ConvFn
115 	= codecvt_base::result
116 	  (_Codecvt::*)(_State&, const char*, const char*, const char*&,
117 			_CharT*, _CharT*, _CharT*&) const;
118       _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
119       return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
120 			      __count, __fn);
121     }
122 
123   // As above, but with no __count parameter
124   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
125     inline bool
126     __str_codecvt_in(const char* __first, const char* __last,
127 		     basic_string<_CharT, _Traits, _Alloc>& __outstr,
128 		     const codecvt<_CharT, char, _State>& __cvt)
129     {
130       _State __state = {};
131       size_t __n;
132       return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
133     }
134 
135   // As above, but returns false for partial conversion
136   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
137     inline bool
138     __str_codecvt_in_all(const char* __first, const char* __last,
139 			 basic_string<_CharT, _Traits, _Alloc>& __outstr,
140 			 const codecvt<_CharT, char, _State>& __cvt)
141     {
142       _State __state = {};
143       size_t __n;
144       return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n)
145 	&& (__n == size_t(__last - __first));
146     }
147 
148   // Convert wide character string to narrow.
149   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
150     inline bool
151     __str_codecvt_out(const _CharT* __first, const _CharT* __last,
152 		      basic_string<char, _Traits, _Alloc>& __outstr,
153 		      const codecvt<_CharT, char, _State>& __cvt,
154 		      _State& __state, size_t& __count)
155     {
156       using _Codecvt = codecvt<_CharT, char, _State>;
157       using _ConvFn
158 	= codecvt_base::result
159 	  (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
160 			char*, char*, char*&) const;
161       _ConvFn __fn = &codecvt<_CharT, char, _State>::out;
162       return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
163 			      __count, __fn);
164     }
165 
166   // As above, but with no __count parameter
167   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
168     inline bool
169     __str_codecvt_out(const _CharT* __first, const _CharT* __last,
170 		      basic_string<char, _Traits, _Alloc>& __outstr,
171 		      const codecvt<_CharT, char, _State>& __cvt)
172     {
173       _State __state = {};
174       size_t __n;
175       return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
176     }
177 
178   // As above, but returns false for partial conversions
179   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
180     inline bool
181     __str_codecvt_out_all(const _CharT* __first, const _CharT* __last,
182 			  basic_string<char, _Traits, _Alloc>& __outstr,
183 			  const codecvt<_CharT, char, _State>& __cvt)
184     {
185       _State __state = {};
186       size_t __n;
187       return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n)
188 	&& (__n == size_t(__last - __first));
189     }
190 
191 #ifdef _GLIBCXX_USE_CHAR8_T
192 
193   // Convert wide character string to narrow.
194   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
195     inline bool
196     __str_codecvt_out(const _CharT* __first, const _CharT* __last,
197 		      basic_string<char8_t, _Traits, _Alloc>& __outstr,
198 		      const codecvt<_CharT, char8_t, _State>& __cvt,
199 		      _State& __state, size_t& __count)
200     {
201       using _Codecvt = codecvt<_CharT, char8_t, _State>;
202       using _ConvFn
203 	= codecvt_base::result
204 	  (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
205 			char8_t*, char8_t*, char8_t*&) const;
206       _ConvFn __fn = &codecvt<_CharT, char8_t, _State>::out;
207       return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
208 			      __count, __fn);
209     }
210 
211   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
212     inline bool
213     __str_codecvt_out(const _CharT* __first, const _CharT* __last,
214 		      basic_string<char8_t, _Traits, _Alloc>& __outstr,
215 		      const codecvt<_CharT, char8_t, _State>& __cvt)
216     {
217       _State __state = {};
218       size_t __n;
219       return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
220     }
221 
222 #endif  // _GLIBCXX_USE_CHAR8_T
223 
224 #ifdef _GLIBCXX_USE_WCHAR_T
225 
226 _GLIBCXX_BEGIN_NAMESPACE_CXX11
227 
228   /// String conversions
229   template<typename _Codecvt, typename _Elem = wchar_t,
230 	   typename _Wide_alloc = allocator<_Elem>,
231 	   typename _Byte_alloc = allocator<char>>
232     class wstring_convert
233     {
234     public:
235       typedef basic_string<char, char_traits<char>, _Byte_alloc>   byte_string;
236       typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string;
237       typedef typename _Codecvt::state_type 			   state_type;
238       typedef typename wide_string::traits_type::int_type	   int_type;
239 
240       /// Default constructor.
241       wstring_convert() : _M_cvt(new _Codecvt()) { }
242 
243       /** Constructor.
244        *
245        * @param  __pcvt The facet to use for conversions.
246        *
247        * Takes ownership of @p __pcvt and will delete it in the destructor.
248        */
249       explicit
250       wstring_convert(_Codecvt* __pcvt) : _M_cvt(__pcvt)
251       {
252 	if (!_M_cvt)
253 	  __throw_logic_error("wstring_convert");
254       }
255 
256       /** Construct with an initial converstion state.
257        *
258        * @param  __pcvt The facet to use for conversions.
259        * @param  __state Initial conversion state.
260        *
261        * Takes ownership of @p __pcvt and will delete it in the destructor.
262        * The object's conversion state will persist between conversions.
263        */
264       wstring_convert(_Codecvt* __pcvt, state_type __state)
265       : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true)
266       {
267 	if (!_M_cvt)
268 	  __throw_logic_error("wstring_convert");
269       }
270 
271       /** Construct with error strings.
272        *
273        * @param  __byte_err A string to return on failed conversions.
274        * @param  __wide_err A wide string to return on failed conversions.
275        */
276       explicit
277       wstring_convert(const byte_string& __byte_err,
278 		      const wide_string& __wide_err = wide_string())
279       : _M_cvt(new _Codecvt),
280 	_M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
281 	_M_with_strings(true)
282       {
283 	if (!_M_cvt)
284 	  __throw_logic_error("wstring_convert");
285       }
286 
287       ~wstring_convert() = default;
288 
289       // _GLIBCXX_RESOLVE_LIB_DEFECTS
290       // 2176. Special members for wstring_convert and wbuffer_convert
291       wstring_convert(const wstring_convert&) = delete;
292       wstring_convert& operator=(const wstring_convert&) = delete;
293 
294       /// @{ Convert from bytes.
295       wide_string
296       from_bytes(char __byte)
297       {
298 	char __bytes[2] = { __byte };
299 	return from_bytes(__bytes, __bytes+1);
300       }
301 
302       wide_string
303       from_bytes(const char* __ptr)
304       { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
305 
306       wide_string
307       from_bytes(const byte_string& __str)
308       {
309 	auto __ptr = __str.data();
310 	return from_bytes(__ptr, __ptr + __str.size());
311       }
312 
313       wide_string
314       from_bytes(const char* __first, const char* __last)
315       {
316 	if (!_M_with_cvtstate)
317 	  _M_state = state_type();
318 	wide_string __out{ _M_wide_err_string.get_allocator() };
319 	if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
320 			     _M_count))
321 	  return __out;
322 	if (_M_with_strings)
323 	  return _M_wide_err_string;
324 	__throw_range_error("wstring_convert::from_bytes");
325       }
326       /// @}
327 
328       /// @{ Convert to bytes.
329       byte_string
330       to_bytes(_Elem __wchar)
331       {
332 	_Elem __wchars[2] = { __wchar };
333 	return to_bytes(__wchars, __wchars+1);
334       }
335 
336       byte_string
337       to_bytes(const _Elem* __ptr)
338       {
339 	return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
340       }
341 
342       byte_string
343       to_bytes(const wide_string& __wstr)
344       {
345 	auto __ptr = __wstr.data();
346 	return to_bytes(__ptr, __ptr + __wstr.size());
347       }
348 
349       byte_string
350       to_bytes(const _Elem* __first, const _Elem* __last)
351       {
352 	if (!_M_with_cvtstate)
353 	  _M_state = state_type();
354 	byte_string __out{ _M_byte_err_string.get_allocator() };
355 	if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
356 			      _M_count))
357 	  return __out;
358 	if (_M_with_strings)
359 	  return _M_byte_err_string;
360 	__throw_range_error("wstring_convert::to_bytes");
361       }
362       /// @}
363 
364       // _GLIBCXX_RESOLVE_LIB_DEFECTS
365       // 2174. wstring_convert::converted() should be noexcept
366       /// The number of elements successfully converted in the last conversion.
367       size_t converted() const noexcept { return _M_count; }
368 
369       /// The final conversion state of the last conversion.
370       state_type state() const { return _M_state; }
371 
372     private:
373       unique_ptr<_Codecvt>	_M_cvt;
374       byte_string		_M_byte_err_string;
375       wide_string		_M_wide_err_string;
376       state_type		_M_state = state_type();
377       size_t			_M_count = 0;
378       bool			_M_with_cvtstate = false;
379       bool			_M_with_strings = false;
380     };
381 
382 _GLIBCXX_END_NAMESPACE_CXX11
383 
384   /// Buffer conversions
385   template<typename _Codecvt, typename _Elem = wchar_t,
386 	   typename _Tr = char_traits<_Elem>>
387     class wbuffer_convert : public basic_streambuf<_Elem, _Tr>
388     {
389       typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf;
390 
391     public:
392       typedef typename _Codecvt::state_type state_type;
393 
394       /// Default constructor.
395       wbuffer_convert() : wbuffer_convert(nullptr) { }
396 
397       /** Constructor.
398        *
399        * @param  __bytebuf The underlying byte stream buffer.
400        * @param  __pcvt    The facet to use for conversions.
401        * @param  __state   Initial conversion state.
402        *
403        * Takes ownership of @p __pcvt and will delete it in the destructor.
404        */
405       explicit
406       wbuffer_convert(streambuf* __bytebuf, _Codecvt* __pcvt = new _Codecvt,
407 		      state_type __state = state_type())
408       : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state)
409       {
410 	if (!_M_cvt)
411 	  __throw_logic_error("wbuffer_convert");
412 
413 	_M_always_noconv = _M_cvt->always_noconv();
414 
415 	if (_M_buf)
416 	  {
417 	    this->setp(_M_put_area, _M_put_area + _S_buffer_length);
418 	    this->setg(_M_get_area + _S_putback_length,
419 		       _M_get_area + _S_putback_length,
420 		       _M_get_area + _S_putback_length);
421 	  }
422       }
423 
424       ~wbuffer_convert() = default;
425 
426       // _GLIBCXX_RESOLVE_LIB_DEFECTS
427       // 2176. Special members for wstring_convert and wbuffer_convert
428       wbuffer_convert(const wbuffer_convert&) = delete;
429       wbuffer_convert& operator=(const wbuffer_convert&) = delete;
430 
431       streambuf* rdbuf() const noexcept { return _M_buf; }
432 
433       streambuf*
434       rdbuf(streambuf *__bytebuf) noexcept
435       {
436 	auto __prev = _M_buf;
437 	_M_buf = __bytebuf;
438 	return __prev;
439       }
440 
441       /// The conversion state following the last conversion.
442       state_type state() const noexcept { return _M_state; }
443 
444     protected:
445       int
446       sync()
447       { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; }
448 
449       typename _Wide_streambuf::int_type
450       overflow(typename _Wide_streambuf::int_type __out)
451       {
452 	if (!_M_buf || !_M_conv_put())
453 	  return _Tr::eof();
454 	else if (!_Tr::eq_int_type(__out, _Tr::eof()))
455 	  return this->sputc(__out);
456 	return _Tr::not_eof(__out);
457       }
458 
459       typename _Wide_streambuf::int_type
460       underflow()
461       {
462 	if (!_M_buf)
463 	  return _Tr::eof();
464 
465 	if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
466 	  return _Tr::to_int_type(*this->gptr());
467 	else
468 	  return _Tr::eof();
469       }
470 
471       streamsize
472       xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
473       {
474 	if (!_M_buf || __n == 0)
475 	  return 0;
476 	streamsize __done = 0;
477 	do
478 	{
479 	  auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
480 					   __n - __done);
481 	  _Tr::copy(this->pptr(), __s + __done, __nn);
482 	  this->pbump(__nn);
483 	  __done += __nn;
484 	} while (__done < __n && _M_conv_put());
485 	return __done;
486       }
487 
488     private:
489       // fill the get area from converted contents of the byte stream buffer
490       bool
491       _M_conv_get()
492       {
493 	const streamsize __pb1 = this->gptr() - this->eback();
494 	const streamsize __pb2 = _S_putback_length;
495 	const streamsize __npb = std::min(__pb1, __pb2);
496 
497 	_Tr::move(_M_get_area + _S_putback_length - __npb,
498 		  this->gptr() - __npb, __npb);
499 
500 	streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
501 	__nbytes = std::min(__nbytes, _M_buf->in_avail());
502 	if (__nbytes < 1)
503 	  __nbytes = 1;
504 	__nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
505 	if (__nbytes < 1)
506 	  return false;
507 	__nbytes += _M_unconv;
508 
509 	// convert _M_get_buf into _M_get_area
510 
511 	_Elem* __outbuf = _M_get_area + _S_putback_length;
512 	_Elem* __outnext = __outbuf;
513 	const char* __bnext = _M_get_buf;
514 
515 	codecvt_base::result __result;
516 	if (_M_always_noconv)
517 	  __result = codecvt_base::noconv;
518 	else
519 	  {
520 	    _Elem* __outend = _M_get_area + _S_buffer_length;
521 
522 	    __result = _M_cvt->in(_M_state,
523 				  __bnext, __bnext + __nbytes, __bnext,
524 				  __outbuf, __outend, __outnext);
525 	  }
526 
527 	if (__result == codecvt_base::noconv)
528 	  {
529 	    // cast is safe because noconv means _Elem is same type as char
530 	    auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
531 	    _Tr::copy(__outbuf, __get_buf, __nbytes);
532 	    _M_unconv = 0;
533 	    return true;
534 	  }
535 
536 	if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
537 	  char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
538 
539 	this->setg(__outbuf, __outbuf, __outnext);
540 
541 	return __result != codecvt_base::error;
542       }
543 
544       // unused
545       bool
546       _M_put(...)
547       { return false; }
548 
549       bool
550       _M_put(const char* __p, streamsize __n)
551       {
552 	if (_M_buf->sputn(__p, __n) < __n)
553 	  return false;
554 	return true;
555       }
556 
557       // convert the put area and write to the byte stream buffer
558       bool
559       _M_conv_put()
560       {
561 	_Elem* const __first = this->pbase();
562 	const _Elem* const __last = this->pptr();
563 	const streamsize __pending = __last - __first;
564 
565 	if (_M_always_noconv)
566 	  return _M_put(__first, __pending);
567 
568 	char __outbuf[2 * _S_buffer_length];
569 
570 	const _Elem* __next = __first;
571 	const _Elem* __start;
572 	do
573 	  {
574 	    __start = __next;
575 	    char* __outnext = __outbuf;
576 	    char* const __outlast = __outbuf + sizeof(__outbuf);
577 	    auto __result = _M_cvt->out(_M_state, __next, __last, __next,
578 					__outnext, __outlast, __outnext);
579 	    if (__result == codecvt_base::error)
580 	      return false;
581 	    else if (__result == codecvt_base::noconv)
582 	      return _M_put(__next, __pending);
583 
584 	    if (!_M_put(__outbuf, __outnext - __outbuf))
585 	      return false;
586 	  }
587 	while (__next != __last && __next != __start);
588 
589 	if (__next != __last)
590 	  _Tr::move(__first, __next, __last - __next);
591 
592 	this->pbump(__first - __next);
593 	return __next != __first;
594       }
595 
596       streambuf*		_M_buf;
597       unique_ptr<_Codecvt>	_M_cvt;
598       state_type		_M_state;
599 
600       static const streamsize	_S_buffer_length = 32;
601       static const streamsize	_S_putback_length = 3;
602       _Elem                     _M_put_area[_S_buffer_length];
603       _Elem                     _M_get_area[_S_buffer_length];
604       streamsize		_M_unconv = 0;
605       char			_M_get_buf[_S_buffer_length-_S_putback_length];
606       bool			_M_always_noconv;
607     };
608 
609 #endif  // _GLIBCXX_USE_WCHAR_T
610 
611   /// @} group locales
612 
613 _GLIBCXX_END_NAMESPACE_VERSION
614 } // namespace
615 
616 #endif // __cplusplus
617 
618 #endif /* _LOCALE_CONV_H */
619