1 // std::codecvt implementation details, generic version -*- C++ -*- 2 3 // Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2009, 2010 4 // Free Software Foundation, Inc. 5 // 6 // This file is part of the GNU ISO C++ Library. This library is free 7 // software; you can redistribute it and/or modify it under the 8 // terms of the GNU General Public License as published by the 9 // Free Software Foundation; either version 3, or (at your option) 10 // any later version. 11 12 // This library is distributed in the hope that it will be useful, 13 // but WITHOUT ANY WARRANTY; without even the implied warranty of 14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 // GNU General Public License for more details. 16 17 // Under Section 7 of GPL version 3, you are granted additional 18 // permissions described in the GCC Runtime Library Exception, version 19 // 3.1, as published by the Free Software Foundation. 20 21 // You should have received a copy of the GNU General Public License and 22 // a copy of the GCC Runtime Library Exception along with this program; 23 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24 // <http://www.gnu.org/licenses/>. 25 26 // 27 // ISO C++ 14882: 22.2.1.5 - Template class codecvt 28 // 29 30 // Written by Benjamin Kosnik <bkoz@redhat.com> 31 32 #include <locale> 33 #include <cstdlib> // For MB_CUR_MAX 34 #include <climits> // For MB_LEN_MAX 35 #include <cstring> 36 37 namespace std _GLIBCXX_VISIBILITY(default) 38 { 39 _GLIBCXX_BEGIN_NAMESPACE_VERSION 40 41 // Specializations. 42 #ifdef _GLIBCXX_USE_WCHAR_T 43 codecvt_base::result 44 codecvt<wchar_t, char, mbstate_t>:: 45 do_out(state_type& __state, const intern_type* __from, 46 const intern_type* __from_end, const intern_type*& __from_next, 47 extern_type* __to, extern_type* __to_end, 48 extern_type*& __to_next) const 49 { 50 result __ret = ok; 51 // The conversion must be done using a temporary destination buffer 52 // since it is not possible to pass the size of the buffer to wcrtomb 53 state_type __tmp_state(__state); 54 55 // The conversion must be done by calling wcrtomb in a loop rather 56 // than using wcsrtombs because wcsrtombs assumes that the input is 57 // zero-terminated. 58 59 // Either we can upper bound the total number of external characters to 60 // something smaller than __to_end - __to or the conversion must be done 61 // using a temporary destination buffer since it is not possible to 62 // pass the size of the buffer to wcrtomb 63 if (MB_CUR_MAX * (__from_end - __from) - (__to_end - __to) <= 0) 64 while (__from < __from_end) 65 { 66 const size_t __conv = wcrtomb(__to, *__from, &__tmp_state); 67 if (__conv == static_cast<size_t>(-1)) 68 { 69 __ret = error; 70 break; 71 } 72 __state = __tmp_state; 73 __to += __conv; 74 __from++; 75 } 76 else 77 { 78 extern_type __buf[MB_LEN_MAX]; 79 while (__from < __from_end && __to < __to_end) 80 { 81 const size_t __conv = wcrtomb(__buf, *__from, &__tmp_state); 82 if (__conv == static_cast<size_t>(-1)) 83 { 84 __ret = error; 85 break; 86 } 87 else if (__conv > static_cast<size_t>(__to_end - __to)) 88 { 89 __ret = partial; 90 break; 91 } 92 93 memcpy(__to, __buf, __conv); 94 __state = __tmp_state; 95 __to += __conv; 96 __from++; 97 } 98 } 99 100 if (__ret == ok && __from < __from_end) 101 __ret = partial; 102 103 __from_next = __from; 104 __to_next = __to; 105 return __ret; 106 } 107 108 codecvt_base::result 109 codecvt<wchar_t, char, mbstate_t>:: 110 do_in(state_type& __state, const extern_type* __from, 111 const extern_type* __from_end, const extern_type*& __from_next, 112 intern_type* __to, intern_type* __to_end, 113 intern_type*& __to_next) const 114 { 115 result __ret = ok; 116 // This temporary state object is neccessary so __state won't be modified 117 // if [__from, __from_end) is a partial multibyte character. 118 state_type __tmp_state(__state); 119 120 // Conversion must be done by calling mbrtowc in a loop rather than 121 // by calling mbsrtowcs because mbsrtowcs assumes that the input 122 // sequence is zero-terminated. 123 while (__from < __from_end && __to < __to_end) 124 { 125 size_t __conv = mbrtowc(__to, __from, __from_end - __from, 126 &__tmp_state); 127 if (__conv == static_cast<size_t>(-1)) 128 { 129 __ret = error; 130 break; 131 } 132 else if (__conv == static_cast<size_t>(-2)) 133 { 134 // It is unclear what to return in this case (see DR 382). 135 __ret = partial; 136 break; 137 } 138 else if (__conv == 0) 139 { 140 // XXX Probably wrong for stateful encodings 141 __conv = 1; 142 *__to = L'\0'; 143 } 144 145 __state = __tmp_state; 146 __to++; 147 __from += __conv; 148 } 149 150 // It is not clear that __from < __from_end implies __ret != ok 151 // (see DR 382). 152 if (__ret == ok && __from < __from_end) 153 __ret = partial; 154 155 __from_next = __from; 156 __to_next = __to; 157 return __ret; 158 } 159 160 int 161 codecvt<wchar_t, char, mbstate_t>:: 162 do_encoding() const throw() 163 { 164 // XXX This implementation assumes that the encoding is 165 // stateless and is either single-byte or variable-width. 166 int __ret = 0; 167 if (MB_CUR_MAX == 1) 168 __ret = 1; 169 return __ret; 170 } 171 172 int 173 codecvt<wchar_t, char, mbstate_t>:: 174 do_max_length() const throw() 175 { 176 // XXX Probably wrong for stateful encodings. 177 int __ret = MB_CUR_MAX; 178 return __ret; 179 } 180 181 int 182 codecvt<wchar_t, char, mbstate_t>:: 183 do_length(state_type& __state, const extern_type* __from, 184 const extern_type* __end, size_t __max) const 185 { 186 int __ret = 0; 187 state_type __tmp_state(__state); 188 189 while (__from < __end && __max) 190 { 191 size_t __conv = mbrtowc(0, __from, __end - __from, &__tmp_state); 192 if (__conv == static_cast<size_t>(-1)) 193 { 194 // Invalid source character 195 break; 196 } 197 else if (__conv == static_cast<size_t>(-2)) 198 { 199 // Remainder of input does not form a complete destination 200 // character. 201 break; 202 } 203 else if (__conv == 0) 204 { 205 // XXX Probably wrong for stateful encodings 206 __conv = 1; 207 } 208 209 __state = __tmp_state; 210 __from += __conv; 211 __ret += __conv; 212 __max--; 213 } 214 215 return __ret; 216 } 217 #endif 218 219 _GLIBCXX_END_NAMESPACE_VERSION 220 } // namespace 221