1 // Locale support (codecvt) -*- C++ -*- 2 3 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 4 // Free Software Foundation, Inc. 5 // 6 // This file is part of the GNU ISO C++ Library. This library is free 7 // software; you can redistribute it and/or modify it under the 8 // terms of the GNU General Public License as published by the 9 // Free Software Foundation; either version 2, or (at your option) 10 // any later version. 11 12 // This library is distributed in the hope that it will be useful, 13 // but WITHOUT ANY WARRANTY; without even the implied warranty of 14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 // GNU General Public License for more details. 16 17 // You should have received a copy of the GNU General Public License along 18 // with this library; see the file COPYING. If not, write to the Free 19 // Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, 20 // USA. 21 22 // As a special exception, you may use this file as part of a free software 23 // library without restriction. Specifically, if other files instantiate 24 // templates or use macros or inline functions from this file, or you compile 25 // this file and link it with other files to produce an executable, this 26 // file does not by itself cause the resulting executable to be covered by 27 // the GNU General Public License. This exception does not however 28 // invalidate any other reasons why the executable file might be covered by 29 // the GNU General Public License. 30 31 /** @file bits/codecvt.h 32 * This is an internal header file, included by other library headers. 33 * You should not attempt to use it directly. 34 */ 35 36 // 37 // ISO C++ 14882: 22.2.1.5 Template class codecvt 38 // 39 40 // Written by Benjamin Kosnik <bkoz@redhat.com> 41 42 #ifndef _CODECVT_H 43 #define _CODECVT_H 1 44 45 #pragma GCC system_header 46 47 _GLIBCXX_BEGIN_NAMESPACE(std) 48 49 /// @brief Empty base class for codecvt facet [22.2.1.5]. 50 class codecvt_base 51 { 52 public: 53 enum result 54 { 55 ok, 56 partial, 57 error, 58 noconv 59 }; 60 }; 61 62 /** 63 * @brief Common base for codecvt functions. 64 * 65 * This template class provides implementations of the public functions 66 * that forward to the protected virtual functions. 67 * 68 * This template also provides abstract stubs for the protected virtual 69 * functions. 70 */ 71 template<typename _InternT, typename _ExternT, typename _StateT> 72 class __codecvt_abstract_base 73 : public locale::facet, public codecvt_base 74 { 75 public: 76 // Types: 77 typedef codecvt_base::result result; 78 typedef _InternT intern_type; 79 typedef _ExternT extern_type; 80 typedef _StateT state_type; 81 82 // 22.2.1.5.1 codecvt members 83 /** 84 * @brief Convert from internal to external character set. 85 * 86 * Converts input string of intern_type to output string of 87 * extern_type. This is analogous to wcsrtombs. It does this by 88 * calling codecvt::do_out. 89 * 90 * The source and destination character sets are determined by the 91 * facet's locale, internal and external types. 92 * 93 * The characters in [from,from_end) are converted and written to 94 * [to,to_end). from_next and to_next are set to point to the 95 * character following the last successfully converted character, 96 * respectively. If the result needed no conversion, from_next and 97 * to_next are not affected. 98 * 99 * The @a state argument should be intialized if the input is at the 100 * beginning and carried from a previous call if continuing 101 * conversion. There are no guarantees about how @a state is used. 102 * 103 * The result returned is a member of codecvt_base::result. If 104 * all the input is converted, returns codecvt_base::ok. If no 105 * conversion is necessary, returns codecvt_base::noconv. If 106 * the input ends early or there is insufficient space in the 107 * output, returns codecvt_base::partial. Otherwise the 108 * conversion failed and codecvt_base::error is returned. 109 * 110 * @param state Persistent conversion state data. 111 * @param from Start of input. 112 * @param from_end End of input. 113 * @param from_next Returns start of unconverted data. 114 * @param to Start of output buffer. 115 * @param to_end End of output buffer. 116 * @param to_next Returns start of unused output area. 117 * @return codecvt_base::result. 118 */ 119 result 120 out(state_type& __state, const intern_type* __from, 121 const intern_type* __from_end, const intern_type*& __from_next, 122 extern_type* __to, extern_type* __to_end, 123 extern_type*& __to_next) const 124 { 125 return this->do_out(__state, __from, __from_end, __from_next, 126 __to, __to_end, __to_next); 127 } 128 129 /** 130 * @brief Reset conversion state. 131 * 132 * Writes characters to output that would restore @a state to initial 133 * conditions. The idea is that if a partial conversion occurs, then 134 * the converting the characters written by this function would leave 135 * the state in initial conditions, rather than partial conversion 136 * state. It does this by calling codecvt::do_unshift(). 137 * 138 * For example, if 4 external characters always converted to 1 internal 139 * character, and input to in() had 6 external characters with state 140 * saved, this function would write two characters to the output and 141 * set the state to initialized conditions. 142 * 143 * The source and destination character sets are determined by the 144 * facet's locale, internal and external types. 145 * 146 * The result returned is a member of codecvt_base::result. If the 147 * state could be reset and data written, returns codecvt_base::ok. If 148 * no conversion is necessary, returns codecvt_base::noconv. If the 149 * output has insufficient space, returns codecvt_base::partial. 150 * Otherwise the reset failed and codecvt_base::error is returned. 151 * 152 * @param state Persistent conversion state data. 153 * @param to Start of output buffer. 154 * @param to_end End of output buffer. 155 * @param to_next Returns start of unused output area. 156 * @return codecvt_base::result. 157 */ 158 result 159 unshift(state_type& __state, extern_type* __to, extern_type* __to_end, 160 extern_type*& __to_next) const 161 { return this->do_unshift(__state, __to,__to_end,__to_next); } 162 163 /** 164 * @brief Convert from external to internal character set. 165 * 166 * Converts input string of extern_type to output string of 167 * intern_type. This is analogous to mbsrtowcs. It does this by 168 * calling codecvt::do_in. 169 * 170 * The source and destination character sets are determined by the 171 * facet's locale, internal and external types. 172 * 173 * The characters in [from,from_end) are converted and written to 174 * [to,to_end). from_next and to_next are set to point to the 175 * character following the last successfully converted character, 176 * respectively. If the result needed no conversion, from_next and 177 * to_next are not affected. 178 * 179 * The @a state argument should be intialized if the input is at the 180 * beginning and carried from a previous call if continuing 181 * conversion. There are no guarantees about how @a state is used. 182 * 183 * The result returned is a member of codecvt_base::result. If 184 * all the input is converted, returns codecvt_base::ok. If no 185 * conversion is necessary, returns codecvt_base::noconv. If 186 * the input ends early or there is insufficient space in the 187 * output, returns codecvt_base::partial. Otherwise the 188 * conversion failed and codecvt_base::error is returned. 189 * 190 * @param state Persistent conversion state data. 191 * @param from Start of input. 192 * @param from_end End of input. 193 * @param from_next Returns start of unconverted data. 194 * @param to Start of output buffer. 195 * @param to_end End of output buffer. 196 * @param to_next Returns start of unused output area. 197 * @return codecvt_base::result. 198 */ 199 result 200 in(state_type& __state, const extern_type* __from, 201 const extern_type* __from_end, const extern_type*& __from_next, 202 intern_type* __to, intern_type* __to_end, 203 intern_type*& __to_next) const 204 { 205 return this->do_in(__state, __from, __from_end, __from_next, 206 __to, __to_end, __to_next); 207 } 208 209 int 210 encoding() const throw() 211 { return this->do_encoding(); } 212 213 bool 214 always_noconv() const throw() 215 { return this->do_always_noconv(); } 216 217 int 218 length(state_type& __state, const extern_type* __from, 219 const extern_type* __end, size_t __max) const 220 { return this->do_length(__state, __from, __end, __max); } 221 222 int 223 max_length() const throw() 224 { return this->do_max_length(); } 225 226 protected: 227 explicit 228 __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { } 229 230 virtual 231 ~__codecvt_abstract_base() { } 232 233 /** 234 * @brief Convert from internal to external character set. 235 * 236 * Converts input string of intern_type to output string of 237 * extern_type. This function is a hook for derived classes to change 238 * the value returned. @see out for more information. 239 */ 240 virtual result 241 do_out(state_type& __state, const intern_type* __from, 242 const intern_type* __from_end, const intern_type*& __from_next, 243 extern_type* __to, extern_type* __to_end, 244 extern_type*& __to_next) const = 0; 245 246 virtual result 247 do_unshift(state_type& __state, extern_type* __to, 248 extern_type* __to_end, extern_type*& __to_next) const = 0; 249 250 virtual result 251 do_in(state_type& __state, const extern_type* __from, 252 const extern_type* __from_end, const extern_type*& __from_next, 253 intern_type* __to, intern_type* __to_end, 254 intern_type*& __to_next) const = 0; 255 256 virtual int 257 do_encoding() const throw() = 0; 258 259 virtual bool 260 do_always_noconv() const throw() = 0; 261 262 virtual int 263 do_length(state_type&, const extern_type* __from, 264 const extern_type* __end, size_t __max) const = 0; 265 266 virtual int 267 do_max_length() const throw() = 0; 268 }; 269 270 /// @brief class codecvt [22.2.1.5]. 271 /// NB: Generic, mostly useless implementation. 272 template<typename _InternT, typename _ExternT, typename _StateT> 273 class codecvt 274 : public __codecvt_abstract_base<_InternT, _ExternT, _StateT> 275 { 276 public: 277 // Types: 278 typedef codecvt_base::result result; 279 typedef _InternT intern_type; 280 typedef _ExternT extern_type; 281 typedef _StateT state_type; 282 283 protected: 284 __c_locale _M_c_locale_codecvt; 285 286 public: 287 static locale::id id; 288 289 explicit 290 codecvt(size_t __refs = 0) 291 : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs) { } 292 293 explicit 294 codecvt(__c_locale __cloc, size_t __refs = 0); 295 296 protected: 297 virtual 298 ~codecvt() { } 299 300 virtual result 301 do_out(state_type& __state, const intern_type* __from, 302 const intern_type* __from_end, const intern_type*& __from_next, 303 extern_type* __to, extern_type* __to_end, 304 extern_type*& __to_next) const; 305 306 virtual result 307 do_unshift(state_type& __state, extern_type* __to, 308 extern_type* __to_end, extern_type*& __to_next) const; 309 310 virtual result 311 do_in(state_type& __state, const extern_type* __from, 312 const extern_type* __from_end, const extern_type*& __from_next, 313 intern_type* __to, intern_type* __to_end, 314 intern_type*& __to_next) const; 315 316 virtual int 317 do_encoding() const throw(); 318 319 virtual bool 320 do_always_noconv() const throw(); 321 322 virtual int 323 do_length(state_type&, const extern_type* __from, 324 const extern_type* __end, size_t __max) const; 325 326 virtual int 327 do_max_length() const throw(); 328 }; 329 330 template<typename _InternT, typename _ExternT, typename _StateT> 331 locale::id codecvt<_InternT, _ExternT, _StateT>::id; 332 333 /// @brief class codecvt<char, char, mbstate_t> specialization. 334 template<> 335 class codecvt<char, char, mbstate_t> 336 : public __codecvt_abstract_base<char, char, mbstate_t> 337 { 338 public: 339 // Types: 340 typedef char intern_type; 341 typedef char extern_type; 342 typedef mbstate_t state_type; 343 344 protected: 345 __c_locale _M_c_locale_codecvt; 346 347 public: 348 static locale::id id; 349 350 explicit 351 codecvt(size_t __refs = 0); 352 353 explicit 354 codecvt(__c_locale __cloc, size_t __refs = 0); 355 356 protected: 357 virtual 358 ~codecvt(); 359 360 virtual result 361 do_out(state_type& __state, const intern_type* __from, 362 const intern_type* __from_end, const intern_type*& __from_next, 363 extern_type* __to, extern_type* __to_end, 364 extern_type*& __to_next) const; 365 366 virtual result 367 do_unshift(state_type& __state, extern_type* __to, 368 extern_type* __to_end, extern_type*& __to_next) const; 369 370 virtual result 371 do_in(state_type& __state, const extern_type* __from, 372 const extern_type* __from_end, const extern_type*& __from_next, 373 intern_type* __to, intern_type* __to_end, 374 intern_type*& __to_next) const; 375 376 virtual int 377 do_encoding() const throw(); 378 379 virtual bool 380 do_always_noconv() const throw(); 381 382 virtual int 383 do_length(state_type&, const extern_type* __from, 384 const extern_type* __end, size_t __max) const; 385 386 virtual int 387 do_max_length() const throw(); 388 }; 389 390 #ifdef _GLIBCXX_USE_WCHAR_T 391 /// @brief class codecvt<wchar_t, char, mbstate_t> specialization. 392 template<> 393 class codecvt<wchar_t, char, mbstate_t> 394 : public __codecvt_abstract_base<wchar_t, char, mbstate_t> 395 { 396 public: 397 // Types: 398 typedef wchar_t intern_type; 399 typedef char extern_type; 400 typedef mbstate_t state_type; 401 402 protected: 403 __c_locale _M_c_locale_codecvt; 404 405 public: 406 static locale::id id; 407 408 explicit 409 codecvt(size_t __refs = 0); 410 411 explicit 412 codecvt(__c_locale __cloc, size_t __refs = 0); 413 414 protected: 415 virtual 416 ~codecvt(); 417 418 virtual result 419 do_out(state_type& __state, const intern_type* __from, 420 const intern_type* __from_end, const intern_type*& __from_next, 421 extern_type* __to, extern_type* __to_end, 422 extern_type*& __to_next) const; 423 424 virtual result 425 do_unshift(state_type& __state, 426 extern_type* __to, extern_type* __to_end, 427 extern_type*& __to_next) const; 428 429 virtual result 430 do_in(state_type& __state, 431 const extern_type* __from, const extern_type* __from_end, 432 const extern_type*& __from_next, 433 intern_type* __to, intern_type* __to_end, 434 intern_type*& __to_next) const; 435 436 virtual 437 int do_encoding() const throw(); 438 439 virtual 440 bool do_always_noconv() const throw(); 441 442 virtual 443 int do_length(state_type&, const extern_type* __from, 444 const extern_type* __end, size_t __max) const; 445 446 virtual int 447 do_max_length() const throw(); 448 }; 449 #endif //_GLIBCXX_USE_WCHAR_T 450 451 /// @brief class codecvt_byname [22.2.1.6]. 452 template<typename _InternT, typename _ExternT, typename _StateT> 453 class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT> 454 { 455 public: 456 explicit 457 codecvt_byname(const char* __s, size_t __refs = 0) 458 : codecvt<_InternT, _ExternT, _StateT>(__refs) 459 { 460 if (std::strcmp(__s, "C") != 0 && std::strcmp(__s, "POSIX") != 0) 461 { 462 this->_S_destroy_c_locale(this->_M_c_locale_codecvt); 463 this->_S_create_c_locale(this->_M_c_locale_codecvt, __s); 464 } 465 } 466 467 protected: 468 virtual 469 ~codecvt_byname() { } 470 }; 471 472 _GLIBCXX_END_NAMESPACE 473 474 #endif // _CODECVT_H 475