1 // Locale support (codecvt) -*- C++ -*- 2 3 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006 4 // Free Software Foundation, Inc. 5 // 6 // This file is part of the GNU ISO C++ Library. This library is free 7 // software; you can redistribute it and/or modify it under the 8 // terms of the GNU General Public License as published by the 9 // Free Software Foundation; either version 2, or (at your option) 10 // any later version. 11 12 // This library is distributed in the hope that it will be useful, 13 // but WITHOUT ANY WARRANTY; without even the implied warranty of 14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 // GNU General Public License for more details. 16 17 // You should have received a copy of the GNU General Public License along 18 // with this library; see the file COPYING. If not, write to the Free 19 // Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 20 // USA. 21 22 // As a special exception, you may use this file as part of a free software 23 // library without restriction. Specifically, if other files instantiate 24 // templates or use macros or inline functions from this file, or you compile 25 // this file and link it with other files to produce an executable, this 26 // file does not by itself cause the resulting executable to be covered by 27 // the GNU General Public License. This exception does not however 28 // invalidate any other reasons why the executable file might be covered by 29 // the GNU General Public License. 30 31 // 32 // ISO C++ 14882: 22.2.1.5 Template class codecvt 33 // 34 35 // Written by Benjamin Kosnik <bkoz@redhat.com> 36 37 /** @file ext/codecvt_specializations.h 38 * This file is a GNU extension to the Standard C++ Library. 39 */ 40 41 #ifndef _EXT_CODECVT_SPECIALIZATIONS_H 42 #define _EXT_CODECVT_SPECIALIZATIONS_H 1 43 44 #include <bits/c++config.h> 45 46 #ifdef _GLIBCXX_USE_ICONV 47 48 #include <locale> 49 #include <iconv.h> 50 51 // XXX 52 // Define this here so codecvt.cc can have _S_max_size definition. 53 #define _GLIBCXX_USE_ENCODING_STATE 1 54 55 _GLIBCXX_BEGIN_NAMESPACE(__gnu_cxx) 56 57 /// @brief Extension to use icov for dealing with character encodings. 58 // This includes conversions and comparisons between various character 59 // sets. This object encapsulates data that may need to be shared between 60 // char_traits, codecvt and ctype. 61 class encoding_state 62 { 63 public: 64 // Types: 65 // NB: A conversion descriptor subsumes and enhances the 66 // functionality of a simple state type such as mbstate_t. 67 typedef iconv_t descriptor_type; 68 69 protected: 70 // Name of internal character set encoding. 71 std::string _M_int_enc; 72 73 // Name of external character set encoding. 74 std::string _M_ext_enc; 75 76 // Conversion descriptor between external encoding to internal encoding. 77 descriptor_type _M_in_desc; 78 79 // Conversion descriptor between internal encoding to external encoding. 80 descriptor_type _M_out_desc; 81 82 // The byte-order marker for the external encoding, if necessary. 83 int _M_ext_bom; 84 85 // The byte-order marker for the internal encoding, if necessary. 86 int _M_int_bom; 87 88 // Number of external bytes needed to construct one complete 89 // character in the internal encoding. 90 // NB: -1 indicates variable, or stateful, encodings. 91 int _M_bytes; 92 93 public: 94 explicit 95 encoding_state() 96 : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0) 97 { } 98 99 explicit 100 encoding_state(const char* __int, const char* __ext, 101 int __ibom = 0, int __ebom = 0, int __bytes = 1) 102 : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0), 103 _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes) 104 { init(); } 105 106 // 21.1.2 traits typedefs 107 // p4 108 // typedef STATE_T state_type 109 // requires: state_type shall meet the requirements of 110 // CopyConstructible types (20.1.3) 111 // NB: This does not preseve the actual state of the conversion 112 // descriptor member, but it does duplicate the encoding 113 // information. 114 encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0) 115 { construct(__obj); } 116 117 // Need assignment operator as well. 118 encoding_state& 119 operator=(const encoding_state& __obj) 120 { 121 construct(__obj); 122 return *this; 123 } 124 125 ~encoding_state() 126 { destroy(); } 127 128 bool 129 good() const throw() 130 { 131 const descriptor_type __err = reinterpret_cast<iconv_t>(-1); 132 bool __test = _M_in_desc && _M_in_desc != __err; 133 __test &= _M_out_desc && _M_out_desc != __err; 134 return __test; 135 } 136 137 int 138 character_ratio() const 139 { return _M_bytes; } 140 141 const std::string 142 internal_encoding() const 143 { return _M_int_enc; } 144 145 int 146 internal_bom() const 147 { return _M_int_bom; } 148 149 const std::string 150 external_encoding() const 151 { return _M_ext_enc; } 152 153 int 154 external_bom() const 155 { return _M_ext_bom; } 156 157 const descriptor_type& 158 in_descriptor() const 159 { return _M_in_desc; } 160 161 const descriptor_type& 162 out_descriptor() const 163 { return _M_out_desc; } 164 165 protected: 166 void 167 init() 168 { 169 const descriptor_type __err = reinterpret_cast<iconv_t>(-1); 170 const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size(); 171 if (!_M_in_desc && __have_encodings) 172 { 173 _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str()); 174 if (_M_in_desc == __err) 175 std::__throw_runtime_error(__N("encoding_state::_M_init " 176 "creating iconv input descriptor failed")); 177 } 178 if (!_M_out_desc && __have_encodings) 179 { 180 _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str()); 181 if (_M_out_desc == __err) 182 std::__throw_runtime_error(__N("encoding_state::_M_init " 183 "creating iconv output descriptor failed")); 184 } 185 } 186 187 void 188 construct(const encoding_state& __obj) 189 { 190 destroy(); 191 _M_int_enc = __obj._M_int_enc; 192 _M_ext_enc = __obj._M_ext_enc; 193 _M_ext_bom = __obj._M_ext_bom; 194 _M_int_bom = __obj._M_int_bom; 195 _M_bytes = __obj._M_bytes; 196 init(); 197 } 198 199 void 200 destroy() throw() 201 { 202 const descriptor_type __err = reinterpret_cast<iconv_t>(-1); 203 if (_M_in_desc && _M_in_desc != __err) 204 { 205 iconv_close(_M_in_desc); 206 _M_in_desc = 0; 207 } 208 if (_M_out_desc && _M_out_desc != __err) 209 { 210 iconv_close(_M_out_desc); 211 _M_out_desc = 0; 212 } 213 } 214 }; 215 216 /// @brief encoding_char_traits. 217 // Custom traits type with encoding_state for the state type, and the 218 // associated fpos<encoding_state> for the position type, all other 219 // bits equivalent to the required char_traits instantiations. 220 template<typename _CharT> 221 struct encoding_char_traits : public std::char_traits<_CharT> 222 { 223 typedef encoding_state state_type; 224 typedef typename std::fpos<state_type> pos_type; 225 }; 226 227 _GLIBCXX_END_NAMESPACE 228 229 230 _GLIBCXX_BEGIN_NAMESPACE(std) 231 232 using __gnu_cxx::encoding_state; 233 234 /// @brief codecvt<InternT, _ExternT, encoding_state> specialization. 235 // This partial specialization takes advantage of iconv to provide 236 // code conversions between a large number of character encodings. 237 template<typename _InternT, typename _ExternT> 238 class codecvt<_InternT, _ExternT, encoding_state> 239 : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state> 240 { 241 public: 242 // Types: 243 typedef codecvt_base::result result; 244 typedef _InternT intern_type; 245 typedef _ExternT extern_type; 246 typedef __gnu_cxx::encoding_state state_type; 247 typedef state_type::descriptor_type descriptor_type; 248 249 // Data Members: 250 static locale::id id; 251 252 explicit 253 codecvt(size_t __refs = 0) 254 : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) 255 { } 256 257 explicit 258 codecvt(state_type& __enc, size_t __refs = 0) 259 : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs) 260 { } 261 262 protected: 263 virtual 264 ~codecvt() { } 265 266 virtual result 267 do_out(state_type& __state, const intern_type* __from, 268 const intern_type* __from_end, const intern_type*& __from_next, 269 extern_type* __to, extern_type* __to_end, 270 extern_type*& __to_next) const; 271 272 virtual result 273 do_unshift(state_type& __state, extern_type* __to, 274 extern_type* __to_end, extern_type*& __to_next) const; 275 276 virtual result 277 do_in(state_type& __state, const extern_type* __from, 278 const extern_type* __from_end, const extern_type*& __from_next, 279 intern_type* __to, intern_type* __to_end, 280 intern_type*& __to_next) const; 281 282 virtual int 283 do_encoding() const throw(); 284 285 virtual bool 286 do_always_noconv() const throw(); 287 288 virtual int 289 do_length(state_type&, const extern_type* __from, 290 const extern_type* __end, size_t __max) const; 291 292 virtual int 293 do_max_length() const throw(); 294 }; 295 296 template<typename _InternT, typename _ExternT> 297 locale::id 298 codecvt<_InternT, _ExternT, encoding_state>::id; 299 300 // This adaptor works around the signature problems of the second 301 // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2 302 // uses 'char**', which matches the POSIX 1003.1-2001 standard. 303 // Using this adaptor, g++ will do the work for us. 304 template<typename _Tp> 305 inline size_t 306 __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*), 307 iconv_t __cd, char** __inbuf, size_t* __inbytes, 308 char** __outbuf, size_t* __outbytes) 309 { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); } 310 311 template<typename _InternT, typename _ExternT> 312 codecvt_base::result 313 codecvt<_InternT, _ExternT, encoding_state>:: 314 do_out(state_type& __state, const intern_type* __from, 315 const intern_type* __from_end, const intern_type*& __from_next, 316 extern_type* __to, extern_type* __to_end, 317 extern_type*& __to_next) const 318 { 319 result __ret = codecvt_base::error; 320 if (__state.good()) 321 { 322 const descriptor_type& __desc = __state.out_descriptor(); 323 const size_t __fmultiple = sizeof(intern_type); 324 size_t __fbytes = __fmultiple * (__from_end - __from); 325 const size_t __tmultiple = sizeof(extern_type); 326 size_t __tbytes = __tmultiple * (__to_end - __to); 327 328 // Argument list for iconv specifies a byte sequence. Thus, 329 // all to/from arrays must be brutally casted to char*. 330 char* __cto = reinterpret_cast<char*>(__to); 331 char* __cfrom; 332 size_t __conv; 333 334 // Some encodings need a byte order marker as the first item 335 // in the byte stream, to designate endian-ness. The default 336 // value for the byte order marker is NULL, so if this is 337 // the case, it's not necessary and we can just go on our 338 // merry way. 339 int __int_bom = __state.internal_bom(); 340 if (__int_bom) 341 { 342 size_t __size = __from_end - __from; 343 intern_type* __cfixed = static_cast<intern_type*> 344 (__builtin_alloca(sizeof(intern_type) * (__size + 1))); 345 __cfixed[0] = static_cast<intern_type>(__int_bom); 346 char_traits<intern_type>::copy(__cfixed + 1, __from, __size); 347 __cfrom = reinterpret_cast<char*>(__cfixed); 348 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 349 &__fbytes, &__cto, &__tbytes); 350 } 351 else 352 { 353 intern_type* __cfixed = const_cast<intern_type*>(__from); 354 __cfrom = reinterpret_cast<char*>(__cfixed); 355 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes, 356 &__cto, &__tbytes); 357 } 358 359 if (__conv != size_t(-1)) 360 { 361 __from_next = reinterpret_cast<const intern_type*>(__cfrom); 362 __to_next = reinterpret_cast<extern_type*>(__cto); 363 __ret = codecvt_base::ok; 364 } 365 else 366 { 367 if (__fbytes < __fmultiple * (__from_end - __from)) 368 { 369 __from_next = reinterpret_cast<const intern_type*>(__cfrom); 370 __to_next = reinterpret_cast<extern_type*>(__cto); 371 __ret = codecvt_base::partial; 372 } 373 else 374 __ret = codecvt_base::error; 375 } 376 } 377 return __ret; 378 } 379 380 template<typename _InternT, typename _ExternT> 381 codecvt_base::result 382 codecvt<_InternT, _ExternT, encoding_state>:: 383 do_unshift(state_type& __state, extern_type* __to, 384 extern_type* __to_end, extern_type*& __to_next) const 385 { 386 result __ret = codecvt_base::error; 387 if (__state.good()) 388 { 389 const descriptor_type& __desc = __state.in_descriptor(); 390 const size_t __tmultiple = sizeof(intern_type); 391 size_t __tlen = __tmultiple * (__to_end - __to); 392 393 // Argument list for iconv specifies a byte sequence. Thus, 394 // all to/from arrays must be brutally casted to char*. 395 char* __cto = reinterpret_cast<char*>(__to); 396 size_t __conv = __iconv_adaptor(iconv,__desc, NULL, NULL, 397 &__cto, &__tlen); 398 399 if (__conv != size_t(-1)) 400 { 401 __to_next = reinterpret_cast<extern_type*>(__cto); 402 if (__tlen == __tmultiple * (__to_end - __to)) 403 __ret = codecvt_base::noconv; 404 else if (__tlen == 0) 405 __ret = codecvt_base::ok; 406 else 407 __ret = codecvt_base::partial; 408 } 409 else 410 __ret = codecvt_base::error; 411 } 412 return __ret; 413 } 414 415 template<typename _InternT, typename _ExternT> 416 codecvt_base::result 417 codecvt<_InternT, _ExternT, encoding_state>:: 418 do_in(state_type& __state, const extern_type* __from, 419 const extern_type* __from_end, const extern_type*& __from_next, 420 intern_type* __to, intern_type* __to_end, 421 intern_type*& __to_next) const 422 { 423 result __ret = codecvt_base::error; 424 if (__state.good()) 425 { 426 const descriptor_type& __desc = __state.in_descriptor(); 427 const size_t __fmultiple = sizeof(extern_type); 428 size_t __flen = __fmultiple * (__from_end - __from); 429 const size_t __tmultiple = sizeof(intern_type); 430 size_t __tlen = __tmultiple * (__to_end - __to); 431 432 // Argument list for iconv specifies a byte sequence. Thus, 433 // all to/from arrays must be brutally casted to char*. 434 char* __cto = reinterpret_cast<char*>(__to); 435 char* __cfrom; 436 size_t __conv; 437 438 // Some encodings need a byte order marker as the first item 439 // in the byte stream, to designate endian-ness. The default 440 // value for the byte order marker is NULL, so if this is 441 // the case, it's not necessary and we can just go on our 442 // merry way. 443 int __ext_bom = __state.external_bom(); 444 if (__ext_bom) 445 { 446 size_t __size = __from_end - __from; 447 extern_type* __cfixed = static_cast<extern_type*> 448 (__builtin_alloca(sizeof(extern_type) * (__size + 1))); 449 __cfixed[0] = static_cast<extern_type>(__ext_bom); 450 char_traits<extern_type>::copy(__cfixed + 1, __from, __size); 451 __cfrom = reinterpret_cast<char*>(__cfixed); 452 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 453 &__flen, &__cto, &__tlen); 454 } 455 else 456 { 457 extern_type* __cfixed = const_cast<extern_type*>(__from); 458 __cfrom = reinterpret_cast<char*>(__cfixed); 459 __conv = __iconv_adaptor(iconv, __desc, &__cfrom, 460 &__flen, &__cto, &__tlen); 461 } 462 463 464 if (__conv != size_t(-1)) 465 { 466 __from_next = reinterpret_cast<const extern_type*>(__cfrom); 467 __to_next = reinterpret_cast<intern_type*>(__cto); 468 __ret = codecvt_base::ok; 469 } 470 else 471 { 472 if (__flen < static_cast<size_t>(__from_end - __from)) 473 { 474 __from_next = reinterpret_cast<const extern_type*>(__cfrom); 475 __to_next = reinterpret_cast<intern_type*>(__cto); 476 __ret = codecvt_base::partial; 477 } 478 else 479 __ret = codecvt_base::error; 480 } 481 } 482 return __ret; 483 } 484 485 template<typename _InternT, typename _ExternT> 486 int 487 codecvt<_InternT, _ExternT, encoding_state>:: 488 do_encoding() const throw() 489 { 490 int __ret = 0; 491 if (sizeof(_ExternT) <= sizeof(_InternT)) 492 __ret = sizeof(_InternT) / sizeof(_ExternT); 493 return __ret; 494 } 495 496 template<typename _InternT, typename _ExternT> 497 bool 498 codecvt<_InternT, _ExternT, encoding_state>:: 499 do_always_noconv() const throw() 500 { return false; } 501 502 template<typename _InternT, typename _ExternT> 503 int 504 codecvt<_InternT, _ExternT, encoding_state>:: 505 do_length(state_type&, const extern_type* __from, 506 const extern_type* __end, size_t __max) const 507 { return std::min(__max, static_cast<size_t>(__end - __from)); } 508 509 // _GLIBCXX_RESOLVE_LIB_DEFECTS 510 // 74. Garbled text for codecvt::do_max_length 511 template<typename _InternT, typename _ExternT> 512 int 513 codecvt<_InternT, _ExternT, encoding_state>:: 514 do_max_length() const throw() 515 { return 1; } 516 517 _GLIBCXX_END_NAMESPACE 518 519 #endif 520 521 #endif 522