1 // std::codecvt implementation details, generic version -*- C++ -*-
2 
3 // Copyright (C) 2002-2018 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 //
26 // ISO C++ 14882: 22.2.1.5 - Template class codecvt
27 //
28 
29 // Written by Benjamin Kosnik <bkoz@redhat.com>
30 
31 #include <locale>
32 #include <cstdlib>  // For MB_CUR_MAX
33 #include <climits>  // For MB_LEN_MAX
34 #include <cstring>
35 
36 namespace std _GLIBCXX_VISIBILITY(default)
37 {
38 _GLIBCXX_BEGIN_NAMESPACE_VERSION
39 
40   // Specializations.
41 #ifdef _GLIBCXX_USE_WCHAR_T
42   codecvt_base::result
43   codecvt<wchar_t, char, mbstate_t>::
do_out(state_type & __state,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const44   do_out(state_type& __state, const intern_type* __from,
45 	 const intern_type* __from_end, const intern_type*& __from_next,
46 	 extern_type* __to, extern_type* __to_end,
47 	 extern_type*& __to_next) const
48   {
49     result __ret = ok;
50     // The conversion must be done using a temporary destination buffer
51     // since it is not possible to pass the size of the buffer to wcrtomb
52     state_type __tmp_state(__state);
53 
54     // The conversion must be done by calling wcrtomb in a loop rather
55     // than using wcsrtombs because wcsrtombs assumes that the input is
56     // zero-terminated.
57 
58     // Either we can upper bound the total number of external characters to
59     // something smaller than __to_end - __to or the conversion must be done
60     // using a temporary destination buffer since it is not possible to
61     // pass the size of the buffer to wcrtomb
62     if (MB_CUR_MAX * (__from_end - __from) - (__to_end - __to) <= 0)
63       while (__from < __from_end)
64 	{
65 	  const size_t __conv = wcrtomb(__to, *__from, &__tmp_state);
66 	  if (__conv == static_cast<size_t>(-1))
67 	    {
68 	      __ret = error;
69 	      break;
70 	    }
71 	  __state = __tmp_state;
72 	  __to += __conv;
73 	  __from++;
74 	}
75     else
76       {
77 	extern_type __buf[MB_LEN_MAX];
78 	while (__from < __from_end && __to < __to_end)
79 	  {
80 	    const size_t __conv = wcrtomb(__buf, *__from, &__tmp_state);
81 	    if (__conv == static_cast<size_t>(-1))
82 	      {
83 		__ret = error;
84 		break;
85 	      }
86 	    else if (__conv > static_cast<size_t>(__to_end - __to))
87 	      {
88 		__ret = partial;
89 		break;
90 	      }
91 
92 	    memcpy(__to, __buf, __conv);
93 	    __state = __tmp_state;
94 	    __to += __conv;
95 	    __from++;
96 	  }
97       }
98 
99     if (__ret == ok && __from < __from_end)
100       __ret = partial;
101 
102     __from_next = __from;
103     __to_next = __to;
104     return __ret;
105   }
106 
107   codecvt_base::result
108   codecvt<wchar_t, char, mbstate_t>::
do_in(state_type & __state,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const109   do_in(state_type& __state, const extern_type* __from,
110 	const extern_type* __from_end, const extern_type*& __from_next,
111 	intern_type* __to, intern_type* __to_end,
112 	intern_type*& __to_next) const
113   {
114     result __ret = ok;
115     // This temporary state object is necessary so __state won't be modified
116     // if [__from, __from_end) is a partial multibyte character.
117     state_type __tmp_state(__state);
118 
119     // Conversion must be done by calling mbrtowc in a loop rather than
120     // by calling mbsrtowcs because mbsrtowcs assumes that the input
121     // sequence is zero-terminated.
122     while (__from < __from_end && __to < __to_end)
123       {
124 	size_t __conv = mbrtowc(__to, __from, __from_end - __from,
125 				&__tmp_state);
126 	if (__conv == static_cast<size_t>(-1))
127 	  {
128 	    __ret = error;
129 	    break;
130 	  }
131 	else if (__conv == static_cast<size_t>(-2))
132 	  {
133 	    // It is unclear what to return in this case (see DR 382).
134 	    __ret = partial;
135 	    break;
136 	  }
137 	else if (__conv == 0)
138 	  {
139 	    // XXX Probably wrong for stateful encodings
140 	    __conv = 1;
141 	    *__to = L'\0';
142 	  }
143 
144 	__state = __tmp_state;
145 	__to++;
146 	__from += __conv;
147       }
148 
149     // It is not clear that __from < __from_end implies __ret != ok
150     // (see DR 382).
151     if (__ret == ok && __from < __from_end)
152       __ret = partial;
153 
154     __from_next = __from;
155     __to_next = __to;
156     return __ret;
157   }
158 
159   int
160   codecvt<wchar_t, char, mbstate_t>::
do_encoding() const161   do_encoding() const throw()
162   {
163     // XXX This implementation assumes that the encoding is
164     // stateless and is either single-byte or variable-width.
165     int __ret = 0;
166     if (MB_CUR_MAX == 1)
167       __ret = 1;
168     return __ret;
169   }
170 
171   int
172   codecvt<wchar_t, char, mbstate_t>::
do_max_length() const173   do_max_length() const throw()
174   {
175     // XXX Probably wrong for stateful encodings.
176     int __ret = MB_CUR_MAX;
177     return __ret;
178   }
179 
180   int
181   codecvt<wchar_t, char, mbstate_t>::
do_length(state_type & __state,const extern_type * __from,const extern_type * __end,size_t __max) const182   do_length(state_type& __state, const extern_type* __from,
183 	    const extern_type* __end, size_t __max) const
184   {
185     int __ret = 0;
186     state_type __tmp_state(__state);
187 
188     while (__from < __end && __max)
189       {
190 	size_t __conv = mbrtowc(0, __from, __end - __from, &__tmp_state);
191 	if (__conv == static_cast<size_t>(-1))
192 	  {
193 	    // Invalid source character
194 	    break;
195 	  }
196 	else if (__conv == static_cast<size_t>(-2))
197 	  {
198 	    // Remainder of input does not form a complete destination
199 	    // character.
200 	    break;
201 	  }
202 	else if (__conv == 0)
203 	  {
204 	    // XXX Probably wrong for stateful encodings
205 	    __conv = 1;
206 	  }
207 
208 	__state = __tmp_state;
209 	__from += __conv;
210 	__ret += __conv;
211 	__max--;
212       }
213 
214     return __ret;
215   }
216 #endif
217 
218 _GLIBCXX_END_NAMESPACE_VERSION
219 } // namespace
220