1 // wstring_convert implementation -*- C++ -*-
2 
3 // Copyright (C) 2015-2018 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /** @file bits/locale_conv.h
26  *  This is an internal header file, included by other library headers.
27  *  Do not attempt to use it directly. @headername{locale}
28  */
29 
30 #ifndef _LOCALE_CONV_H
31 #define _LOCALE_CONV_H 1
32 
33 #if __cplusplus < 201103L
34 # include <bits/c++0x_warning.h>
35 #else
36 
37 #include <streambuf>
38 #include <bits/stringfwd.h>
39 #include <bits/allocator.h>
40 #include <bits/codecvt.h>
41 #include <bits/unique_ptr.h>
42 
43 namespace std _GLIBCXX_VISIBILITY(default)
44 {
45 _GLIBCXX_BEGIN_NAMESPACE_VERSION
46 
47   /**
48    * @addtogroup locales
49    * @{
50    */
51 
52   template<typename _OutStr, typename _InChar, typename _Codecvt,
53 	   typename _State, typename _Fn>
54     bool
55     __do_str_codecvt(const _InChar* __first, const _InChar* __last,
56 		     _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
57 		     size_t& __count, _Fn __fn)
58     {
59       if (__first == __last)
60 	{
61 	  __outstr.clear();
62 	  __count = 0;
63 	  return true;
64 	}
65 
66       size_t __outchars = 0;
67       auto __next = __first;
68       const auto __maxlen = __cvt.max_length() + 1;
69 
70       codecvt_base::result __result;
71       do
72 	{
73 	  __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
74 	  auto __outnext = &__outstr.front() + __outchars;
75 	  auto const __outlast = &__outstr.back() + 1;
76 	  __result = (__cvt.*__fn)(__state, __next, __last, __next,
77 					__outnext, __outlast, __outnext);
78 	  __outchars = __outnext - &__outstr.front();
79 	}
80       while (__result == codecvt_base::partial && __next != __last
81 	     && (__outstr.size() - __outchars) < __maxlen);
82 
83       if (__result == codecvt_base::error)
84 	{
85 	  __count = __next - __first;
86 	  return false;
87 	}
88 
89       if (__result == codecvt_base::noconv)
90 	{
91 	  __outstr.assign(__first, __last);
92 	  __count = __last - __first;
93 	}
94       else
95 	{
96 	  __outstr.resize(__outchars);
97 	  __count = __next - __first;
98 	}
99 
100       return true;
101     }
102 
103   // Convert narrow character string to wide.
104   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
105     inline bool
106     __str_codecvt_in(const char* __first, const char* __last,
107 		     basic_string<_CharT, _Traits, _Alloc>& __outstr,
108 		     const codecvt<_CharT, char, _State>& __cvt,
109 		     _State& __state, size_t& __count)
110     {
111       using _Codecvt = codecvt<_CharT, char, _State>;
112       using _ConvFn
113 	= codecvt_base::result
114 	  (_Codecvt::*)(_State&, const char*, const char*, const char*&,
115 			_CharT*, _CharT*, _CharT*&) const;
116       _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
117       return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
118 			      __count, __fn);
119     }
120 
121   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
122     inline bool
123     __str_codecvt_in(const char* __first, const char* __last,
124 		     basic_string<_CharT, _Traits, _Alloc>& __outstr,
125 		     const codecvt<_CharT, char, _State>& __cvt)
126     {
127       _State __state = {};
128       size_t __n;
129       return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
130     }
131 
132   // Convert wide character string to narrow.
133   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
134     inline bool
135     __str_codecvt_out(const _CharT* __first, const _CharT* __last,
136 		      basic_string<char, _Traits, _Alloc>& __outstr,
137 		      const codecvt<_CharT, char, _State>& __cvt,
138 		      _State& __state, size_t& __count)
139     {
140       using _Codecvt = codecvt<_CharT, char, _State>;
141       using _ConvFn
142 	= codecvt_base::result
143 	  (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
144 			char*, char*, char*&) const;
145       _ConvFn __fn = &codecvt<_CharT, char, _State>::out;
146       return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
147 			      __count, __fn);
148     }
149 
150   template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
151     inline bool
152     __str_codecvt_out(const _CharT* __first, const _CharT* __last,
153 		      basic_string<char, _Traits, _Alloc>& __outstr,
154 		      const codecvt<_CharT, char, _State>& __cvt)
155     {
156       _State __state = {};
157       size_t __n;
158       return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
159     }
160 
161 #ifdef _GLIBCXX_USE_WCHAR_T
162 
163 _GLIBCXX_BEGIN_NAMESPACE_CXX11
164 
165   /// String conversions
166   template<typename _Codecvt, typename _Elem = wchar_t,
167 	   typename _Wide_alloc = allocator<_Elem>,
168 	   typename _Byte_alloc = allocator<char>>
169     class wstring_convert
170     {
171     public:
172       typedef basic_string<char, char_traits<char>, _Byte_alloc>   byte_string;
173       typedef basic_string<_Elem, char_traits<_Elem>, _Wide_alloc> wide_string;
174       typedef typename _Codecvt::state_type 			   state_type;
175       typedef typename wide_string::traits_type::int_type	   int_type;
176 
177       /** Default constructor.
178        *
179        * @param  __pcvt The facet to use for conversions.
180        *
181        * Takes ownership of @p __pcvt and will delete it in the destructor.
182        */
183       explicit
184       wstring_convert(_Codecvt* __pcvt = new _Codecvt()) : _M_cvt(__pcvt)
185       {
186 	if (!_M_cvt)
187 	  __throw_logic_error("wstring_convert");
188       }
189 
190       /** Construct with an initial converstion state.
191        *
192        * @param  __pcvt The facet to use for conversions.
193        * @param  __state Initial conversion state.
194        *
195        * Takes ownership of @p __pcvt and will delete it in the destructor.
196        * The object's conversion state will persist between conversions.
197        */
198       wstring_convert(_Codecvt* __pcvt, state_type __state)
199       : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true)
200       {
201 	if (!_M_cvt)
202 	  __throw_logic_error("wstring_convert");
203       }
204 
205       /** Construct with error strings.
206        *
207        * @param  __byte_err A string to return on failed conversions.
208        * @param  __wide_err A wide string to return on failed conversions.
209        */
210       explicit
211       wstring_convert(const byte_string& __byte_err,
212 		      const wide_string& __wide_err = wide_string())
213       : _M_cvt(new _Codecvt),
214 	_M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
215 	_M_with_strings(true)
216       {
217 	if (!_M_cvt)
218 	  __throw_logic_error("wstring_convert");
219       }
220 
221       ~wstring_convert() = default;
222 
223       // _GLIBCXX_RESOLVE_LIB_DEFECTS
224       // 2176. Special members for wstring_convert and wbuffer_convert
225       wstring_convert(const wstring_convert&) = delete;
226       wstring_convert& operator=(const wstring_convert&) = delete;
227 
228       /// @{ Convert from bytes.
229       wide_string
230       from_bytes(char __byte)
231       {
232 	char __bytes[2] = { __byte };
233 	return from_bytes(__bytes, __bytes+1);
234       }
235 
236       wide_string
237       from_bytes(const char* __ptr)
238       { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
239 
240       wide_string
241       from_bytes(const byte_string& __str)
242       {
243 	auto __ptr = __str.data();
244 	return from_bytes(__ptr, __ptr + __str.size());
245       }
246 
247       wide_string
248       from_bytes(const char* __first, const char* __last)
249       {
250 	if (!_M_with_cvtstate)
251 	  _M_state = state_type();
252 	wide_string __out{ _M_wide_err_string.get_allocator() };
253 	if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
254 			     _M_count))
255 	  return __out;
256 	if (_M_with_strings)
257 	  return _M_wide_err_string;
258 	__throw_range_error("wstring_convert::from_bytes");
259       }
260       /// @}
261 
262       /// @{ Convert to bytes.
263       byte_string
264       to_bytes(_Elem __wchar)
265       {
266 	_Elem __wchars[2] = { __wchar };
267 	return to_bytes(__wchars, __wchars+1);
268       }
269 
270       byte_string
271       to_bytes(const _Elem* __ptr)
272       {
273 	return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
274       }
275 
276       byte_string
277       to_bytes(const wide_string& __wstr)
278       {
279 	auto __ptr = __wstr.data();
280 	return to_bytes(__ptr, __ptr + __wstr.size());
281       }
282 
283       byte_string
284       to_bytes(const _Elem* __first, const _Elem* __last)
285       {
286 	if (!_M_with_cvtstate)
287 	  _M_state = state_type();
288 	byte_string __out{ _M_byte_err_string.get_allocator() };
289 	if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
290 			      _M_count))
291 	  return __out;
292 	if (_M_with_strings)
293 	  return _M_byte_err_string;
294 	__throw_range_error("wstring_convert::to_bytes");
295       }
296       /// @}
297 
298       // _GLIBCXX_RESOLVE_LIB_DEFECTS
299       // 2174. wstring_convert::converted() should be noexcept
300       /// The number of elements successfully converted in the last conversion.
301       size_t converted() const noexcept { return _M_count; }
302 
303       /// The final conversion state of the last conversion.
304       state_type state() const { return _M_state; }
305 
306     private:
307       unique_ptr<_Codecvt>	_M_cvt;
308       byte_string		_M_byte_err_string;
309       wide_string		_M_wide_err_string;
310       state_type		_M_state = state_type();
311       size_t			_M_count = 0;
312       bool			_M_with_cvtstate = false;
313       bool			_M_with_strings = false;
314     };
315 
316 _GLIBCXX_END_NAMESPACE_CXX11
317 
318   /// Buffer conversions
319   template<typename _Codecvt, typename _Elem = wchar_t,
320 	   typename _Tr = char_traits<_Elem>>
321     class wbuffer_convert : public basic_streambuf<_Elem, _Tr>
322     {
323       typedef basic_streambuf<_Elem, _Tr> _Wide_streambuf;
324 
325     public:
326       typedef typename _Codecvt::state_type state_type;
327 
328       /** Default constructor.
329        *
330        * @param  __bytebuf The underlying byte stream buffer.
331        * @param  __pcvt    The facet to use for conversions.
332        * @param  __state   Initial conversion state.
333        *
334        * Takes ownership of @p __pcvt and will delete it in the destructor.
335        */
336       explicit
337       wbuffer_convert(streambuf* __bytebuf = 0, _Codecvt* __pcvt = new _Codecvt,
338 		      state_type __state = state_type())
339       : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state)
340       {
341 	if (!_M_cvt)
342 	  __throw_logic_error("wbuffer_convert");
343 
344 	_M_always_noconv = _M_cvt->always_noconv();
345 
346 	if (_M_buf)
347 	  {
348 	    this->setp(_M_put_area, _M_put_area + _S_buffer_length);
349 	    this->setg(_M_get_area + _S_putback_length,
350 		       _M_get_area + _S_putback_length,
351 		       _M_get_area + _S_putback_length);
352 	  }
353       }
354 
355       ~wbuffer_convert() = default;
356 
357       // _GLIBCXX_RESOLVE_LIB_DEFECTS
358       // 2176. Special members for wstring_convert and wbuffer_convert
359       wbuffer_convert(const wbuffer_convert&) = delete;
360       wbuffer_convert& operator=(const wbuffer_convert&) = delete;
361 
362       streambuf* rdbuf() const noexcept { return _M_buf; }
363 
364       streambuf*
365       rdbuf(streambuf *__bytebuf) noexcept
366       {
367 	auto __prev = _M_buf;
368 	_M_buf = __bytebuf;
369 	return __prev;
370       }
371 
372       /// The conversion state following the last conversion.
373       state_type state() const noexcept { return _M_state; }
374 
375     protected:
376       int
377       sync()
378       { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; }
379 
380       typename _Wide_streambuf::int_type
381       overflow(typename _Wide_streambuf::int_type __out)
382       {
383 	if (!_M_buf || !_M_conv_put())
384 	  return _Tr::eof();
385 	else if (!_Tr::eq_int_type(__out, _Tr::eof()))
386 	  return this->sputc(__out);
387 	return _Tr::not_eof(__out);
388       }
389 
390       typename _Wide_streambuf::int_type
391       underflow()
392       {
393 	if (!_M_buf)
394 	  return _Tr::eof();
395 
396 	if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
397 	  return _Tr::to_int_type(*this->gptr());
398 	else
399 	  return _Tr::eof();
400       }
401 
402       streamsize
403       xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
404       {
405 	if (!_M_buf || __n == 0)
406 	  return 0;
407 	streamsize __done = 0;
408 	do
409 	{
410 	  auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
411 					   __n - __done);
412 	  _Tr::copy(this->pptr(), __s + __done, __nn);
413 	  this->pbump(__nn);
414 	  __done += __nn;
415 	} while (__done < __n && _M_conv_put());
416 	return __done;
417       }
418 
419     private:
420       // fill the get area from converted contents of the byte stream buffer
421       bool
422       _M_conv_get()
423       {
424 	const streamsize __pb1 = this->gptr() - this->eback();
425 	const streamsize __pb2 = _S_putback_length;
426 	const streamsize __npb = std::min(__pb1, __pb2);
427 
428 	_Tr::move(_M_get_area + _S_putback_length - __npb,
429 		  this->gptr() - __npb, __npb);
430 
431 	streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
432 	__nbytes = std::min(__nbytes, _M_buf->in_avail());
433 	if (__nbytes < 1)
434 	  __nbytes = 1;
435 	__nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
436 	if (__nbytes < 1)
437 	  return false;
438 	__nbytes += _M_unconv;
439 
440 	// convert _M_get_buf into _M_get_area
441 
442 	_Elem* __outbuf = _M_get_area + _S_putback_length;
443 	_Elem* __outnext = __outbuf;
444 	const char* __bnext = _M_get_buf;
445 
446 	codecvt_base::result __result;
447 	if (_M_always_noconv)
448 	  __result = codecvt_base::noconv;
449 	else
450 	  {
451 	    _Elem* __outend = _M_get_area + _S_buffer_length;
452 
453 	    __result = _M_cvt->in(_M_state,
454 				  __bnext, __bnext + __nbytes, __bnext,
455 				  __outbuf, __outend, __outnext);
456 	  }
457 
458 	if (__result == codecvt_base::noconv)
459 	  {
460 	    // cast is safe because noconv means _Elem is same type as char
461 	    auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
462 	    _Tr::copy(__outbuf, __get_buf, __nbytes);
463 	    _M_unconv = 0;
464 	    return true;
465 	  }
466 
467 	if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
468 	  char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
469 
470 	this->setg(__outbuf, __outbuf, __outnext);
471 
472 	return __result != codecvt_base::error;
473       }
474 
475       // unused
476       bool
477       _M_put(...)
478       { return false; }
479 
480       bool
481       _M_put(const char* __p, streamsize __n)
482       {
483 	if (_M_buf->sputn(__p, __n) < __n)
484 	  return false;
485 	return true;
486       }
487 
488       // convert the put area and write to the byte stream buffer
489       bool
490       _M_conv_put()
491       {
492 	_Elem* const __first = this->pbase();
493 	const _Elem* const __last = this->pptr();
494 	const streamsize __pending = __last - __first;
495 
496 	if (_M_always_noconv)
497 	  return _M_put(__first, __pending);
498 
499 	char __outbuf[2 * _S_buffer_length];
500 
501 	const _Elem* __next = __first;
502 	const _Elem* __start;
503 	do
504 	  {
505 	    __start = __next;
506 	    char* __outnext = __outbuf;
507 	    char* const __outlast = __outbuf + sizeof(__outbuf);
508 	    auto __result = _M_cvt->out(_M_state, __next, __last, __next,
509 					__outnext, __outlast, __outnext);
510 	    if (__result == codecvt_base::error)
511 	      return false;
512 	    else if (__result == codecvt_base::noconv)
513 	      return _M_put(__next, __pending);
514 
515 	    if (!_M_put(__outbuf, __outnext - __outbuf))
516 	      return false;
517 	  }
518 	while (__next != __last && __next != __start);
519 
520 	if (__next != __last)
521 	  _Tr::move(__first, __next, __last - __next);
522 
523 	this->pbump(__first - __next);
524 	return __next != __first;
525       }
526 
527       streambuf*		_M_buf;
528       unique_ptr<_Codecvt>	_M_cvt;
529       state_type		_M_state;
530 
531       static const streamsize	_S_buffer_length = 32;
532       static const streamsize	_S_putback_length = 3;
533       _Elem                     _M_put_area[_S_buffer_length];
534       _Elem                     _M_get_area[_S_buffer_length];
535       streamsize		_M_unconv = 0;
536       char			_M_get_buf[_S_buffer_length-_S_putback_length];
537       bool			_M_always_noconv;
538     };
539 
540 #endif  // _GLIBCXX_USE_WCHAR_T
541 
542   /// @} group locales
543 
544 _GLIBCXX_END_NAMESPACE_VERSION
545 } // namespace
546 
547 #endif // __cplusplus
548 
549 #endif /* _LOCALE_CONV_H */
550