1 // std::codecvt implementation details, DragonFly version -*- C++ -*-
2 
3 // Copyright (C) 2015-2018 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 //
26 // ISO C++ 14882: 22.2.1.5 - Template class codecvt
27 //
28 
29 // Written by Benjamin Kosnik <bkoz@redhat.com>
30 // Modified for DragonFly by John Marino <gnugcc@marino.st>
31 
32 #include <locale>
33 #include <cstring>
34 #include <cstdlib>  // For MB_CUR_MAX
35 #include <climits>  // For MB_LEN_MAX
36 
37 namespace std _GLIBCXX_VISIBILITY(default)
38 {
39 _GLIBCXX_BEGIN_NAMESPACE_VERSION
40 
41   // Specializations.
42 #ifdef _GLIBCXX_USE_WCHAR_T
43   codecvt_base::result
44   codecvt<wchar_t, char, mbstate_t>::
45   do_out(state_type& __state, const intern_type* __from,
46 	 const intern_type* __from_end, const intern_type*& __from_next,
47 	 extern_type* __to, extern_type* __to_end,
48 	 extern_type*& __to_next) const
49   {
50     result __ret = ok;
51     state_type __tmp_state(__state);
52 
53     __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt);
54 
55     // wcsnrtombs is *very* fast but stops if encounters NUL characters:
56     // in case we fall back to wcrtomb and then continue, in a loop.
57     // NB: wcsnrtombs is a GNU extension
58     for (__from_next = __from, __to_next = __to;
59 	 __from_next < __from_end && __to_next < __to_end
60 	 && __ret == ok;)
61       {
62 	const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0',
63 						      __from_end - __from_next);
64 	if (!__from_chunk_end)
65 	  __from_chunk_end = __from_end;
66 
67 	__from = __from_next;
68 	const size_t __conv = wcsnrtombs(__to_next, &__from_next,
69 					 __from_chunk_end - __from_next,
70 					 __to_end - __to_next, &__state);
71 	if (__conv == static_cast<size_t>(-1))
72 	  {
73 	    // In case of error, in order to stop at the exact place we
74 	    // have to start again from the beginning with a series of
75 	    // wcrtomb.
76 	    for (; __from < __from_next; ++__from)
77 	      __to_next += wcrtomb(__to_next, *__from, &__tmp_state);
78 	    __state = __tmp_state;
79 	    __ret = error;
80 	  }
81 	else if (__from_next && __from_next < __from_chunk_end)
82 	  {
83 	    __to_next += __conv;
84 	    __ret = partial;
85 	  }
86 	else
87 	  {
88 	    __from_next = __from_chunk_end;
89 	    __to_next += __conv;
90 	  }
91 
92 	if (__from_next < __from_end && __ret == ok)
93 	  {
94 	    extern_type __buf[MB_LEN_MAX];
95 	    __tmp_state = __state;
96 	    const size_t __conv2 = wcrtomb(__buf, *__from_next, &__tmp_state);
97 	    if (__conv2 > static_cast<size_t>(__to_end - __to_next))
98 	      __ret = partial;
99 	    else
100 	      {
101 		memcpy(__to_next, __buf, __conv2);
102 		__state = __tmp_state;
103 		__to_next += __conv2;
104 		++__from_next;
105 	      }
106 	  }
107       }
108 
109     uselocale((locale_t)__old);
110 
111     return __ret;
112   }
113 
114   codecvt_base::result
115   codecvt<wchar_t, char, mbstate_t>::
116   do_in(state_type& __state, const extern_type* __from,
117 	const extern_type* __from_end, const extern_type*& __from_next,
118 	intern_type* __to, intern_type* __to_end,
119 	intern_type*& __to_next) const
120   {
121     result __ret = ok;
122     state_type __tmp_state(__state);
123 
124     __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt);
125 
126     // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
127     // in case we store a L'\0' and then continue, in a loop.
128     // NB: mbsnrtowcs is a GNU extension
129     for (__from_next = __from, __to_next = __to;
130 	 __from_next < __from_end && __to_next < __to_end
131 	 && __ret == ok;)
132       {
133 	const extern_type* __from_chunk_end;
134 	__from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0',
135 								  __from_end
136 								  - __from_next));
137 	if (!__from_chunk_end)
138 	  __from_chunk_end = __from_end;
139 
140 	__from = __from_next;
141 	size_t __conv = mbsnrtowcs(__to_next, &__from_next,
142 				   __from_chunk_end - __from_next,
143 				   __to_end - __to_next, &__state);
144 	if (__conv == static_cast<size_t>(-1))
145 	  {
146 	    // In case of error, in order to stop at the exact place we
147 	    // have to start again from the beginning with a series of
148 	    // mbrtowc.
149 	    for (;; ++__to_next, __from += __conv)
150 	      {
151 		__conv = mbrtowc(__to_next, __from, __from_end - __from,
152 				 &__tmp_state);
153 		if (__conv == static_cast<size_t>(-1)
154 		    || __conv == static_cast<size_t>(-2))
155 		  break;
156 	      }
157 	    __from_next = __from;
158 	    __state = __tmp_state;
159 	    __ret = error;
160 	  }
161 	else if (__from_next && __from_next < __from_chunk_end)
162 	  {
163 	    // It is unclear what to return in this case (see DR 382).
164 	    __to_next += __conv;
165 	    __ret = partial;
166 	  }
167 	else
168 	  {
169 	    __from_next = __from_chunk_end;
170 	    __to_next += __conv;
171 	  }
172 
173 	if (__from_next < __from_end && __ret == ok)
174 	  {
175 	    if (__to_next < __to_end)
176 	      {
177 		// XXX Probably wrong for stateful encodings
178 		__tmp_state = __state;
179 		++__from_next;
180 		*__to_next++ = L'\0';
181 	      }
182 	    else
183 	      __ret = partial;
184 	  }
185       }
186 
187     uselocale((locale_t)__old);
188 
189     return __ret;
190   }
191 
192   int
193   codecvt<wchar_t, char, mbstate_t>::
194   do_encoding() const throw()
195   {
196     // XXX This implementation assumes that the encoding is
197     // stateless and is either single-byte or variable-width.
198     int __ret = 0;
199     __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt);
200     if (MB_CUR_MAX == 1)
201       __ret = 1;
202     uselocale((locale_t)__old);
203     return __ret;
204   }
205 
206   int
207   codecvt<wchar_t, char, mbstate_t>::
208   do_max_length() const throw()
209   {
210     __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt);
211     // XXX Probably wrong for stateful encodings.
212     int __ret = MB_CUR_MAX;
213     uselocale((locale_t)__old);
214     return __ret;
215   }
216 
217   int
218   codecvt<wchar_t, char, mbstate_t>::
219   do_length(state_type& __state, const extern_type* __from,
220 	    const extern_type* __end, size_t __max) const
221   {
222     int __ret = 0;
223     state_type __tmp_state(__state);
224 
225     __c_locale __old = (__c_locale)uselocale((locale_t)_M_c_locale_codecvt);
226 
227     // mbsnrtowcs is *very* fast but stops if encounters NUL characters:
228     // in case we advance past it and then continue, in a loop.
229     // NB: mbsnrtowcs is a GNU extension
230 
231     // A dummy internal buffer is needed in order for mbsnrtocws to consider
232     // its fourth parameter (it wouldn't with NULL as first parameter).
233     wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t)
234 							   * __max));
235     while (__from < __end && __max)
236       {
237 	const extern_type* __from_chunk_end;
238 	__from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0',
239 								  __end
240 								  - __from));
241 	if (!__from_chunk_end)
242 	  __from_chunk_end = __end;
243 
244 	const extern_type* __tmp_from = __from;
245 	size_t __conv = mbsnrtowcs(__to, &__from,
246 				   __from_chunk_end - __from,
247 				   __max, &__state);
248 	if (__conv == static_cast<size_t>(-1))
249 	  {
250 	    // In case of error, in order to stop at the exact place we
251 	    // have to start again from the beginning with a series of
252 	    // mbrtowc.
253 	    for (__from = __tmp_from;; __from += __conv)
254 	      {
255 		__conv = mbrtowc(0, __from, __end - __from,
256 				 &__tmp_state);
257 		if (__conv == static_cast<size_t>(-1)
258 		    || __conv == static_cast<size_t>(-2))
259 		  break;
260 	      }
261 	    __state = __tmp_state;
262 	    __ret += __from - __tmp_from;
263 	    break;
264 	  }
265 	if (!__from)
266 	  __from = __from_chunk_end;
267 
268 	__ret += __from - __tmp_from;
269 	__max -= __conv;
270 
271 	if (__from < __end && __max)
272 	  {
273 	    // XXX Probably wrong for stateful encodings
274 	    __tmp_state = __state;
275 	    ++__from;
276 	    ++__ret;
277 	    --__max;
278 	  }
279       }
280 
281     uselocale((locale_t)__old);
282 
283     return __ret;
284   }
285 #endif
286 
287 _GLIBCXX_END_NAMESPACE_VERSION
288 } // namespace
289