1 // std::codecvt implementation details, DragonFly version -*- C++ -*- 2 3 // Copyright (C) 2015-2020 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 // 26 // ISO C++ 14882: 22.2.1.5 - Template class codecvt 27 // 28 29 // Written by Benjamin Kosnik <bkoz@redhat.com> 30 // Modified for DragonFly by John Marino <gnugcc@marino.st> 31 32 #include <locale> 33 #include <cstring> 34 #include <cstdlib> // For MB_CUR_MAX 35 #include <climits> // For MB_LEN_MAX 36 37 #include "xlocale_port.h" 38 39 namespace std _GLIBCXX_VISIBILITY(default) 40 { 41 _GLIBCXX_BEGIN_NAMESPACE_VERSION 42 43 // Specializations. 44 #ifdef _GLIBCXX_USE_WCHAR_T 45 codecvt_base::result 46 codecvt<wchar_t, char, mbstate_t>:: do_out(state_type & __state,const intern_type * __from,const intern_type * __from_end,const intern_type * & __from_next,extern_type * __to,extern_type * __to_end,extern_type * & __to_next) const47 do_out(state_type& __state, const intern_type* __from, 48 const intern_type* __from_end, const intern_type*& __from_next, 49 extern_type* __to, extern_type* __to_end, 50 extern_type*& __to_next) const 51 { 52 result __ret = ok; 53 state_type __tmp_state(__state); 54 55 // wcsnrtombs is *very* fast but stops if encounters NUL characters: 56 // in case we fall back to wcrtomb and then continue, in a loop. 57 // NB: wcsnrtombs is a GNU extension 58 for (__from_next = __from, __to_next = __to; 59 __from_next < __from_end && __to_next < __to_end 60 && __ret == ok;) 61 { 62 const intern_type* __from_chunk_end = wmemchr(__from_next, L'\0', 63 __from_end - __from_next); 64 if (!__from_chunk_end) 65 __from_chunk_end = __from_end; 66 67 __from = __from_next; 68 const size_t __conv = wcsnrtombs_l(__to_next, &__from_next, 69 __from_chunk_end - __from_next, 70 __to_end - __to_next, &__state, 71 (locale_t)_M_c_locale_codecvt); 72 if (__conv == static_cast<size_t>(-1)) 73 { 74 // In case of error, in order to stop at the exact place we 75 // have to start again from the beginning with a series of 76 // wcrtomb. 77 for (; __from < __from_next; ++__from) 78 __to_next += wcrtomb_l(__to_next, *__from, &__tmp_state, 79 (locale_t)_M_c_locale_codecvt); 80 __state = __tmp_state; 81 __ret = error; 82 } 83 else if (__from_next && __from_next < __from_chunk_end) 84 { 85 __to_next += __conv; 86 __ret = partial; 87 } 88 else 89 { 90 __from_next = __from_chunk_end; 91 __to_next += __conv; 92 } 93 94 if (__from_next < __from_end && __ret == ok) 95 { 96 extern_type __buf[MB_LEN_MAX]; 97 __tmp_state = __state; 98 const size_t __conv2 = wcrtomb_l(__buf, *__from_next, &__tmp_state, 99 (locale_t)_M_c_locale_codecvt); 100 if (__conv2 > static_cast<size_t>(__to_end - __to_next)) 101 __ret = partial; 102 else 103 { 104 memcpy(__to_next, __buf, __conv2); 105 __state = __tmp_state; 106 __to_next += __conv2; 107 ++__from_next; 108 } 109 } 110 } 111 112 return __ret; 113 } 114 115 codecvt_base::result 116 codecvt<wchar_t, char, mbstate_t>:: do_in(state_type & __state,const extern_type * __from,const extern_type * __from_end,const extern_type * & __from_next,intern_type * __to,intern_type * __to_end,intern_type * & __to_next) const117 do_in(state_type& __state, const extern_type* __from, 118 const extern_type* __from_end, const extern_type*& __from_next, 119 intern_type* __to, intern_type* __to_end, 120 intern_type*& __to_next) const 121 { 122 result __ret = ok; 123 state_type __tmp_state(__state); 124 125 // mbsnrtowcs is *very* fast but stops if encounters NUL characters: 126 // in case we store a L'\0' and then continue, in a loop. 127 // NB: mbsnrtowcs is a GNU extension 128 for (__from_next = __from, __to_next = __to; 129 __from_next < __from_end && __to_next < __to_end 130 && __ret == ok;) 131 { 132 const extern_type* __from_chunk_end; 133 __from_chunk_end = static_cast<const extern_type*>(memchr(__from_next, '\0', 134 __from_end 135 - __from_next)); 136 if (!__from_chunk_end) 137 __from_chunk_end = __from_end; 138 139 __from = __from_next; 140 size_t __conv = mbsnrtowcs_l(__to_next, &__from_next, 141 __from_chunk_end - __from_next, 142 __to_end - __to_next, &__state, 143 (locale_t)_M_c_locale_codecvt); 144 if (__conv == static_cast<size_t>(-1)) 145 { 146 // In case of error, in order to stop at the exact place we 147 // have to start again from the beginning with a series of 148 // mbrtowc. 149 for (;; ++__to_next, __from += __conv) 150 { 151 __conv = mbrtowc_l(__to_next, __from, __from_end - __from, 152 &__tmp_state, (locale_t)_M_c_locale_codecvt); 153 if (__conv == static_cast<size_t>(-1) 154 || __conv == static_cast<size_t>(-2)) 155 break; 156 } 157 __from_next = __from; 158 __state = __tmp_state; 159 __ret = error; 160 } 161 else if (__from_next && __from_next < __from_chunk_end) 162 { 163 // It is unclear what to return in this case (see DR 382). 164 __to_next += __conv; 165 __ret = partial; 166 } 167 else 168 { 169 __from_next = __from_chunk_end; 170 __to_next += __conv; 171 } 172 173 if (__from_next < __from_end && __ret == ok) 174 { 175 if (__to_next < __to_end) 176 { 177 // XXX Probably wrong for stateful encodings 178 __tmp_state = __state; 179 ++__from_next; 180 *__to_next++ = L'\0'; 181 } 182 else 183 __ret = partial; 184 } 185 } 186 187 return __ret; 188 } 189 190 int 191 codecvt<wchar_t, char, mbstate_t>:: do_encoding() const192 do_encoding() const throw() 193 { 194 // XXX This implementation assumes that the encoding is 195 // stateless and is either single-byte or variable-width. 196 return MB_CUR_MAX_L((locale_t)_M_c_locale_codecvt) == 1 ? 1 : 0; 197 } 198 199 int 200 codecvt<wchar_t, char, mbstate_t>:: do_max_length() const201 do_max_length() const throw() 202 { 203 // XXX Probably wrong for stateful encodings. 204 return MB_CUR_MAX_L((locale_t)_M_c_locale_codecvt); 205 } 206 207 int 208 codecvt<wchar_t, char, mbstate_t>:: do_length(state_type & __state,const extern_type * __from,const extern_type * __end,size_t __max) const209 do_length(state_type& __state, const extern_type* __from, 210 const extern_type* __end, size_t __max) const 211 { 212 int __ret = 0; 213 state_type __tmp_state(__state); 214 215 // mbsnrtowcs is *very* fast but stops if encounters NUL characters: 216 // in case we advance past it and then continue, in a loop. 217 // NB: mbsnrtowcs is a GNU extension 218 219 // A dummy internal buffer is needed in order for mbsnrtocws to consider 220 // its fourth parameter (it wouldn't with NULL as first parameter). 221 wchar_t* __to = static_cast<wchar_t*>(__builtin_alloca(sizeof(wchar_t) 222 * __max)); 223 while (__from < __end && __max) 224 { 225 const extern_type* __from_chunk_end; 226 __from_chunk_end = static_cast<const extern_type*>(memchr(__from, '\0', 227 __end 228 - __from)); 229 if (!__from_chunk_end) 230 __from_chunk_end = __end; 231 232 const extern_type* __tmp_from = __from; 233 size_t __conv = mbsnrtowcs_l(__to, &__from, 234 __from_chunk_end - __from, 235 __max, &__state, 236 (locale_t)_M_c_locale_codecvt); 237 if (__conv == static_cast<size_t>(-1)) 238 { 239 // In case of error, in order to stop at the exact place we 240 // have to start again from the beginning with a series of 241 // mbrtowc. 242 for (__from = __tmp_from;; __from += __conv) 243 { 244 __conv = mbrtowc_l(0, __from, __end - __from, 245 &__tmp_state, (locale_t)_M_c_locale_codecvt); 246 if (__conv == static_cast<size_t>(-1) 247 || __conv == static_cast<size_t>(-2)) 248 break; 249 } 250 __state = __tmp_state; 251 __ret += __from - __tmp_from; 252 break; 253 } 254 if (!__from) 255 __from = __from_chunk_end; 256 257 __ret += __from - __tmp_from; 258 __max -= __conv; 259 260 if (__from < __end && __max) 261 { 262 // XXX Probably wrong for stateful encodings 263 __tmp_state = __state; 264 ++__from; 265 ++__ret; 266 --__max; 267 } 268 } 269 270 return __ret; 271 } 272 #endif 273 274 _GLIBCXX_END_NAMESPACE_VERSION 275 } // namespace 276