1 // std::codecvt implementation details, generic version -*- C++ -*-
2 
3 // Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2009, 2010
4 // Free Software Foundation, Inc.
5 //
6 // This file is part of the GNU ISO C++ Library.  This library is free
7 // software; you can redistribute it and/or modify it under the
8 // terms of the GNU General Public License as published by the
9 // Free Software Foundation; either version 3, or (at your option)
10 // any later version.
11 
12 // This library is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 // GNU General Public License for more details.
16 
17 // Under Section 7 of GPL version 3, you are granted additional
18 // permissions described in the GCC Runtime Library Exception, version
19 // 3.1, as published by the Free Software Foundation.
20 
21 // You should have received a copy of the GNU General Public License and
22 // a copy of the GCC Runtime Library Exception along with this program;
23 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24 // <http://www.gnu.org/licenses/>.
25 
26 //
27 // ISO C++ 14882: 22.2.1.5 - Template class codecvt
28 //
29 
30 // Written by Benjamin Kosnik <bkoz@redhat.com>
31 
32 #include <locale>
33 #include <cstdlib>  // For MB_CUR_MAX
34 #include <climits>  // For MB_LEN_MAX
35 #include <cstring>
36 
37 namespace std _GLIBCXX_VISIBILITY(default)
38 {
39 _GLIBCXX_BEGIN_NAMESPACE_VERSION
40 
41   // Specializations.
42 #ifdef _GLIBCXX_USE_WCHAR_T
43   codecvt_base::result
44   codecvt<wchar_t, char, mbstate_t>::
45   do_out(state_type& __state, const intern_type* __from,
46 	 const intern_type* __from_end, const intern_type*& __from_next,
47 	 extern_type* __to, extern_type* __to_end,
48 	 extern_type*& __to_next) const
49   {
50     result __ret = ok;
51     // The conversion must be done using a temporary destination buffer
52     // since it is not possible to pass the size of the buffer to wcrtomb
53     state_type __tmp_state(__state);
54 
55     // The conversion must be done by calling wcrtomb in a loop rather
56     // than using wcsrtombs because wcsrtombs assumes that the input is
57     // zero-terminated.
58 
59     // Either we can upper bound the total number of external characters to
60     // something smaller than __to_end - __to or the conversion must be done
61     // using a temporary destination buffer since it is not possible to
62     // pass the size of the buffer to wcrtomb
63     if (MB_CUR_MAX * (__from_end - __from) - (__to_end - __to) <= 0)
64       while (__from < __from_end)
65 	{
66 	  const size_t __conv = wcrtomb(__to, *__from, &__tmp_state);
67 	  if (__conv == static_cast<size_t>(-1))
68 	    {
69 	      __ret = error;
70 	      break;
71 	    }
72 	  __state = __tmp_state;
73 	  __to += __conv;
74 	  __from++;
75 	}
76     else
77       {
78 	extern_type __buf[MB_LEN_MAX];
79 	while (__from < __from_end && __to < __to_end)
80 	  {
81 	    const size_t __conv = wcrtomb(__buf, *__from, &__tmp_state);
82 	    if (__conv == static_cast<size_t>(-1))
83 	      {
84 		__ret = error;
85 		break;
86 	      }
87 	    else if (__conv > static_cast<size_t>(__to_end - __to))
88 	      {
89 		__ret = partial;
90 		break;
91 	      }
92 
93 	    memcpy(__to, __buf, __conv);
94 	    __state = __tmp_state;
95 	    __to += __conv;
96 	    __from++;
97 	  }
98       }
99 
100     if (__ret == ok && __from < __from_end)
101       __ret = partial;
102 
103     __from_next = __from;
104     __to_next = __to;
105     return __ret;
106   }
107 
108   codecvt_base::result
109   codecvt<wchar_t, char, mbstate_t>::
110   do_in(state_type& __state, const extern_type* __from,
111 	const extern_type* __from_end, const extern_type*& __from_next,
112 	intern_type* __to, intern_type* __to_end,
113 	intern_type*& __to_next) const
114   {
115     result __ret = ok;
116     // This temporary state object is neccessary so __state won't be modified
117     // if [__from, __from_end) is a partial multibyte character.
118     state_type __tmp_state(__state);
119 
120     // Conversion must be done by calling mbrtowc in a loop rather than
121     // by calling mbsrtowcs because mbsrtowcs assumes that the input
122     // sequence is zero-terminated.
123     while (__from < __from_end && __to < __to_end)
124       {
125 	size_t __conv = mbrtowc(__to, __from, __from_end - __from,
126 				&__tmp_state);
127 	if (__conv == static_cast<size_t>(-1))
128 	  {
129 	    __ret = error;
130 	    break;
131 	  }
132 	else if (__conv == static_cast<size_t>(-2))
133 	  {
134 	    // It is unclear what to return in this case (see DR 382).
135 	    __ret = partial;
136 	    break;
137 	  }
138 	else if (__conv == 0)
139 	  {
140 	    // XXX Probably wrong for stateful encodings
141 	    __conv = 1;
142 	    *__to = L'\0';
143 	  }
144 
145 	__state = __tmp_state;
146 	__to++;
147 	__from += __conv;
148       }
149 
150     // It is not clear that __from < __from_end implies __ret != ok
151     // (see DR 382).
152     if (__ret == ok && __from < __from_end)
153       __ret = partial;
154 
155     __from_next = __from;
156     __to_next = __to;
157     return __ret;
158   }
159 
160   int
161   codecvt<wchar_t, char, mbstate_t>::
162   do_encoding() const throw()
163   {
164     // XXX This implementation assumes that the encoding is
165     // stateless and is either single-byte or variable-width.
166     int __ret = 0;
167     if (MB_CUR_MAX == 1)
168       __ret = 1;
169     return __ret;
170   }
171 
172   int
173   codecvt<wchar_t, char, mbstate_t>::
174   do_max_length() const throw()
175   {
176     // XXX Probably wrong for stateful encodings.
177     int __ret = MB_CUR_MAX;
178     return __ret;
179   }
180 
181   int
182   codecvt<wchar_t, char, mbstate_t>::
183   do_length(state_type& __state, const extern_type* __from,
184 	    const extern_type* __end, size_t __max) const
185   {
186     int __ret = 0;
187     state_type __tmp_state(__state);
188 
189     while (__from < __end && __max)
190       {
191 	size_t __conv = mbrtowc(0, __from, __end - __from, &__tmp_state);
192 	if (__conv == static_cast<size_t>(-1))
193 	  {
194 	    // Invalid source character
195 	    break;
196 	  }
197 	else if (__conv == static_cast<size_t>(-2))
198 	  {
199 	    // Remainder of input does not form a complete destination
200 	    // character.
201 	    break;
202 	  }
203 	else if (__conv == 0)
204 	  {
205 	    // XXX Probably wrong for stateful encodings
206 	    __conv = 1;
207 	  }
208 
209 	__state = __tmp_state;
210 	__from += __conv;
211 	__ret += __conv;
212 	__max--;
213       }
214 
215     return __ret;
216   }
217 #endif
218 
219 _GLIBCXX_END_NAMESPACE_VERSION
220 } // namespace
221