1 // std::codecvt implementation details, DragonFly version -*- C++ -*-
2 
3 // Copyright (C) 2015-2020 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 //
26 // ISO C++ 14882: 22.2.1.5 - Template class codecvt
27 //
28 
29 // Written by Benjamin Kosnik <bkoz@redhat.com>
30 // Modified for DragonFly by John Marino <gnugcc@marino.st>
31 
32 #include <locale>
33 #include <cstring>
34 #include <cstdlib>  // For MB_CUR_MAX
35 #include <climits>  // For MB_LEN_MAX
36 
37 #include "xlocale_port.h"
38 
39 namespace std _GLIBCXX_VISIBILITY(default)
40 {
41 _GLIBCXX_BEGIN_NAMESPACE_VERSION
42 
43   // Specializations.
44 #ifdef _GLIBCXX_USE_WCHAR_T
45   codecvt_base::result
46   codecvt<wchar_t, char, mbstate_t>::
do_out(state_type & __state,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const47   do_out(state_type& __state, const intern_type* __from,
48 	 const intern_type* __from_end, const intern_type*& __from_next,
49 	 extern_type* __to, extern_type* __to_end,
50 	 extern_type*& __to_next) const
51   {
52     result __ret = ok;
53     state_type __tmp_state(__state);
54 
55     // wcsnrtombs is *very* fast but stops if encounters NUL characters:
56     // in case we fall back to wcrtomb and then continue, in a loop.
57     // NB: wcsnrtombs is a GNU extension
58     for (__from_next = __from, __to_next = __to;
59 	 __from_next < __from_end && __to_next < __to_end
60 	 && __ret == ok;)
61       {
62 	const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0',
63 						      __from_end - __from_next);
64 	if (!__from_chunk_end)
65 	  __from_chunk_end = __from_end;
66 
67 	__from = __from_next;
68 	const size_t __conv = wcsnrtombs_l(__to_next, &__from_next,
69 					 __from_chunk_end - __from_next,
70 					 __to_end - __to_next, &__state,
71 					 (locale_t)_M_c_locale_codecvt);
72 	if (__conv == static_cast<size_t>(-1))
73 	  {
74 	    // In case of error, in order to stop at the exact place we
75 	    // have to start again from the beginning with a series of
76 	    // wcrtomb.
77 	    for (; __from < __from_next; ++__from)
78 	      __to_next += wcrtomb_l(__to_next, *__from, &__tmp_state,
79 		(locale_t)_M_c_locale_codecvt);
80 	    __state = __tmp_state;
81 	    __ret = error;
82 	  }
83 	else if (__from_next && __from_next < __from_chunk_end)
84 	  {
85 	    __to_next += __conv;
86 	    __ret = partial;
87 	  }
88 	else
89 	  {
90 	    __from_next = __from_chunk_end;
91 	    __to_next += __conv;
92 	  }
93 
94 	if (__from_next < __from_end && __ret == ok)
95 	  {
96 	    extern_type __buf[MB_LEN_MAX];
97 	    __tmp_state = __state;
98 	    const size_t __conv2 = wcrtomb_l(__buf, *__from_next, &__tmp_state,
99 		(locale_t)_M_c_locale_codecvt);
100 	    if (__conv2 > static_cast<size_t>(__to_end - __to_next))
101 	      __ret = partial;
102 	    else
103 	      {
104 		memcpy(__to_next, __buf, __conv2);
105 		__state = __tmp_state;
106 		__to_next += __conv2;
107 		++__from_next;
108 	      }
109 	  }
110       }
111 
112     return __ret;
113   }
114 
115   codecvt_base::result
116   codecvt<wchar_t, char, mbstate_t>::
do_in(state_type & __state,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const117   do_in(state_type& __state, const extern_type* __from,
118 	const extern_type* __from_end, const extern_type*& __from_next,
119 	intern_type* __to, intern_type* __to_end,
120 	intern_type*& __to_next) const
121   {
122     result __ret = ok;
123     state_type __tmp_state(__state);
124 
125     // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
126     // in case we store a L'\0' and then continue, in a loop.
127     // NB: mbsnrtowcs is a GNU extension
128     for (__from_next = __from, __to_next = __to;
129 	 __from_next < __from_end && __to_next < __to_end
130 	 && __ret == ok;)
131       {
132 	const extern_type* __from_chunk_end;
133 	__from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0',
134 								  __from_end
135 								  - __from_next));
136 	if (!__from_chunk_end)
137 	  __from_chunk_end = __from_end;
138 
139 	__from = __from_next;
140 	size_t __conv = mbsnrtowcs_l(__to_next, &__from_next,
141 				   __from_chunk_end - __from_next,
142 				   __to_end - __to_next, &__state,
143 				   (locale_t)_M_c_locale_codecvt);
144 	if (__conv == static_cast<size_t>(-1))
145 	  {
146 	    // In case of error, in order to stop at the exact place we
147 	    // have to start again from the beginning with a series of
148 	    // mbrtowc.
149 	    for (;; ++__to_next, __from += __conv)
150 	      {
151 		__conv = mbrtowc_l(__to_next, __from, __from_end - __from,
152 				 &__tmp_state, (locale_t)_M_c_locale_codecvt);
153 		if (__conv == static_cast<size_t>(-1)
154 		    || __conv == static_cast<size_t>(-2))
155 		  break;
156 	      }
157 	    __from_next = __from;
158 	    __state = __tmp_state;
159 	    __ret = error;
160 	  }
161 	else if (__from_next && __from_next < __from_chunk_end)
162 	  {
163 	    // It is unclear what to return in this case (see DR 382).
164 	    __to_next += __conv;
165 	    __ret = partial;
166 	  }
167 	else
168 	  {
169 	    __from_next = __from_chunk_end;
170 	    __to_next += __conv;
171 	  }
172 
173 	if (__from_next < __from_end && __ret == ok)
174 	  {
175 	    if (__to_next < __to_end)
176 	      {
177 		// XXX Probably wrong for stateful encodings
178 		__tmp_state = __state;
179 		++__from_next;
180 		*__to_next++ = L'\0';
181 	      }
182 	    else
183 	      __ret = partial;
184 	  }
185       }
186 
187     return __ret;
188   }
189 
190   int
191   codecvt<wchar_t, char, mbstate_t>::
do_encoding() const192   do_encoding() const throw()
193   {
194     // XXX This implementation assumes that the encoding is
195     // stateless and is either single-byte or variable-width.
196     return MB_CUR_MAX_L((locale_t)_M_c_locale_codecvt) == 1 ? 1 : 0;
197   }
198 
199   int
200   codecvt<wchar_t, char, mbstate_t>::
do_max_length() const201   do_max_length() const throw()
202   {
203     // XXX Probably wrong for stateful encodings.
204     return MB_CUR_MAX_L((locale_t)_M_c_locale_codecvt);
205   }
206 
207   int
208   codecvt<wchar_t, char, mbstate_t>::
do_length(state_type & __state,const extern_type * __from,const extern_type * __end,size_t __max) const209   do_length(state_type& __state, const extern_type* __from,
210 	    const extern_type* __end, size_t __max) const
211   {
212     int __ret = 0;
213     state_type __tmp_state(__state);
214 
215     // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
216     // in case we advance past it and then continue, in a loop.
217     // NB: mbsnrtowcs is a GNU extension
218 
219     // A dummy internal buffer is needed in order for mbsnrtocws to consider
220     // its fourth parameter (it wouldn't with NULL as first parameter).
221     wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t)
222 							   * __max));
223     while (__from < __end && __max)
224       {
225 	const extern_type* __from_chunk_end;
226 	__from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0',
227 								  __end
228 								  - __from));
229 	if (!__from_chunk_end)
230 	  __from_chunk_end = __end;
231 
232 	const extern_type* __tmp_from = __from;
233 	size_t __conv = mbsnrtowcs_l(__to, &__from,
234 				   __from_chunk_end - __from,
235 				   __max, &__state,
236 				   (locale_t)_M_c_locale_codecvt);
237 	if (__conv == static_cast<size_t>(-1))
238 	  {
239 	    // In case of error, in order to stop at the exact place we
240 	    // have to start again from the beginning with a series of
241 	    // mbrtowc.
242 	    for (__from = __tmp_from;; __from += __conv)
243 	      {
244 		__conv = mbrtowc_l(0, __from, __end - __from,
245 				 &__tmp_state, (locale_t)_M_c_locale_codecvt);
246 		if (__conv == static_cast<size_t>(-1)
247 		    || __conv == static_cast<size_t>(-2))
248 		  break;
249 	      }
250 	    __state = __tmp_state;
251 	    __ret += __from - __tmp_from;
252 	    break;
253 	  }
254 	if (!__from)
255 	  __from = __from_chunk_end;
256 
257 	__ret += __from - __tmp_from;
258 	__max -= __conv;
259 
260 	if (__from < __end && __max)
261 	  {
262 	    // XXX Probably wrong for stateful encodings
263 	    __tmp_state = __state;
264 	    ++__from;
265 	    ++__ret;
266 	    --__max;
267 	  }
268       }
269 
270     return __ret;
271   }
272 #endif
273 
274 _GLIBCXX_END_NAMESPACE_VERSION
275 } // namespace
276