1 // Locale support (codecvt) -*- C++ -*-
2 
3 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
4 // 2009, 2010, 2011  Free Software Foundation, Inc.
5 //
6 // This file is part of the GNU ISO C++ Library.  This library is free
7 // software; you can redistribute it and/or modify it under the
8 // terms of the GNU General Public License as published by the
9 // Free Software Foundation; either version 3, or (at your option)
10 // any later version.
11 
12 // This library is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 // GNU General Public License for more details.
16 
17 // Under Section 7 of GPL version 3, you are granted additional
18 // permissions described in the GCC Runtime Library Exception, version
19 // 3.1, as published by the Free Software Foundation.
20 
21 // You should have received a copy of the GNU General Public License and
22 // a copy of the GCC Runtime Library Exception along with this program;
23 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24 // <http://www.gnu.org/licenses/>.
25 
26 /** @file bits/codecvt.h
27  *  This is an internal header file, included by other library headers.
28  *  Do not attempt to use it directly. @headername{locale}
29  */
30 
31 //
32 // ISO C++ 14882: 22.2.1.5 Template class codecvt
33 //
34 
35 // Written by Benjamin Kosnik <bkoz@redhat.com>
36 
37 #ifndef _CODECVT_H
38 #define _CODECVT_H 1
39 
40 #pragma GCC system_header
41 
42 namespace std _GLIBCXX_VISIBILITY(default)
43 {
44 _GLIBCXX_BEGIN_NAMESPACE_VERSION
45 
46   /// Empty base class for codecvt facet [22.2.1.5].
47   class codecvt_base
48   {
49   public:
50     enum result
51     {
52       ok,
53       partial,
54       error,
55       noconv
56     };
57   };
58 
59   /**
60    *  @brief  Common base for codecvt functions.
61    *
62    *  This template class provides implementations of the public functions
63    *  that forward to the protected virtual functions.
64    *
65    *  This template also provides abstract stubs for the protected virtual
66    *  functions.
67   */
68   template<typename _InternT, typename _ExternT, typename _StateT>
69     class __codecvt_abstract_base
70     : public locale::facet, public codecvt_base
71     {
72     public:
73       // Types:
74       typedef codecvt_base::result	result;
75       typedef _InternT			intern_type;
76       typedef _ExternT			extern_type;
77       typedef _StateT			state_type;
78 
79       // 22.2.1.5.1 codecvt members
80       /**
81        *  @brief  Convert from internal to external character set.
82        *
83        *  Converts input string of intern_type to output string of
84        *  extern_type.  This is analogous to wcsrtombs.  It does this by
85        *  calling codecvt::do_out.
86        *
87        *  The source and destination character sets are determined by the
88        *  facet's locale, internal and external types.
89        *
90        *  The characters in [from,from_end) are converted and written to
91        *  [to,to_end).  from_next and to_next are set to point to the
92        *  character following the last successfully converted character,
93        *  respectively.  If the result needed no conversion, from_next and
94        *  to_next are not affected.
95        *
96        *  The @a state argument should be initialized if the input is at the
97        *  beginning and carried from a previous call if continuing
98        *  conversion.  There are no guarantees about how @a state is used.
99        *
100        *  The result returned is a member of codecvt_base::result.  If
101        *  all the input is converted, returns codecvt_base::ok.  If no
102        *  conversion is necessary, returns codecvt_base::noconv.  If
103        *  the input ends early or there is insufficient space in the
104        *  output, returns codecvt_base::partial.  Otherwise the
105        *  conversion failed and codecvt_base::error is returned.
106        *
107        *  @param  __state  Persistent conversion state data.
108        *  @param  __from  Start of input.
109        *  @param  __from_end  End of input.
110        *  @param  __from_next  Returns start of unconverted data.
111        *  @param  __to  Start of output buffer.
112        *  @param  __to_end  End of output buffer.
113        *  @param  __to_next  Returns start of unused output area.
114        *  @return  codecvt_base::result.
115       */
116       result
117       out(state_type& __state, const intern_type* __from,
118 	  const intern_type* __from_end, const intern_type*& __from_next,
119 	  extern_type* __to, extern_type* __to_end,
120 	  extern_type*& __to_next) const
121       {
122 	return this->do_out(__state, __from, __from_end, __from_next,
123 			    __to, __to_end, __to_next);
124       }
125 
126       /**
127        *  @brief  Reset conversion state.
128        *
129        *  Writes characters to output that would restore @a state to initial
130        *  conditions.  The idea is that if a partial conversion occurs, then
131        *  the converting the characters written by this function would leave
132        *  the state in initial conditions, rather than partial conversion
133        *  state.  It does this by calling codecvt::do_unshift().
134        *
135        *  For example, if 4 external characters always converted to 1 internal
136        *  character, and input to in() had 6 external characters with state
137        *  saved, this function would write two characters to the output and
138        *  set the state to initialized conditions.
139        *
140        *  The source and destination character sets are determined by the
141        *  facet's locale, internal and external types.
142        *
143        *  The result returned is a member of codecvt_base::result.  If the
144        *  state could be reset and data written, returns codecvt_base::ok.  If
145        *  no conversion is necessary, returns codecvt_base::noconv.  If the
146        *  output has insufficient space, returns codecvt_base::partial.
147        *  Otherwise the reset failed and codecvt_base::error is returned.
148        *
149        *  @param  __state  Persistent conversion state data.
150        *  @param  __to  Start of output buffer.
151        *  @param  __to_end  End of output buffer.
152        *  @param  __to_next  Returns start of unused output area.
153        *  @return  codecvt_base::result.
154       */
155       result
156       unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
157 	      extern_type*& __to_next) const
158       { return this->do_unshift(__state, __to,__to_end,__to_next); }
159 
160       /**
161        *  @brief  Convert from external to internal character set.
162        *
163        *  Converts input string of extern_type to output string of
164        *  intern_type.  This is analogous to mbsrtowcs.  It does this by
165        *  calling codecvt::do_in.
166        *
167        *  The source and destination character sets are determined by the
168        *  facet's locale, internal and external types.
169        *
170        *  The characters in [from,from_end) are converted and written to
171        *  [to,to_end).  from_next and to_next are set to point to the
172        *  character following the last successfully converted character,
173        *  respectively.  If the result needed no conversion, from_next and
174        *  to_next are not affected.
175        *
176        *  The @a state argument should be initialized if the input is at the
177        *  beginning and carried from a previous call if continuing
178        *  conversion.  There are no guarantees about how @a state is used.
179        *
180        *  The result returned is a member of codecvt_base::result.  If
181        *  all the input is converted, returns codecvt_base::ok.  If no
182        *  conversion is necessary, returns codecvt_base::noconv.  If
183        *  the input ends early or there is insufficient space in the
184        *  output, returns codecvt_base::partial.  Otherwise the
185        *  conversion failed and codecvt_base::error is returned.
186        *
187        *  @param  __state  Persistent conversion state data.
188        *  @param  __from  Start of input.
189        *  @param  __from_end  End of input.
190        *  @param  __from_next  Returns start of unconverted data.
191        *  @param  __to  Start of output buffer.
192        *  @param  __to_end  End of output buffer.
193        *  @param  __to_next  Returns start of unused output area.
194        *  @return  codecvt_base::result.
195       */
196       result
197       in(state_type& __state, const extern_type* __from,
198 	 const extern_type* __from_end, const extern_type*& __from_next,
199 	 intern_type* __to, intern_type* __to_end,
200 	 intern_type*& __to_next) const
201       {
202 	return this->do_in(__state, __from, __from_end, __from_next,
203 			   __to, __to_end, __to_next);
204       }
205 
206       int
207       encoding() const throw()
208       { return this->do_encoding(); }
209 
210       bool
211       always_noconv() const throw()
212       { return this->do_always_noconv(); }
213 
214       int
215       length(state_type& __state, const extern_type* __from,
216 	     const extern_type* __end, size_t __max) const
217       { return this->do_length(__state, __from, __end, __max); }
218 
219       int
220       max_length() const throw()
221       { return this->do_max_length(); }
222 
223     protected:
224       explicit
225       __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
226 
227       virtual
228       ~__codecvt_abstract_base() { }
229 
230       /**
231        *  @brief  Convert from internal to external character set.
232        *
233        *  Converts input string of intern_type to output string of
234        *  extern_type.  This function is a hook for derived classes to change
235        *  the value returned.  @see out for more information.
236       */
237       virtual result
238       do_out(state_type& __state, const intern_type* __from,
239 	     const intern_type* __from_end, const intern_type*& __from_next,
240 	     extern_type* __to, extern_type* __to_end,
241 	     extern_type*& __to_next) const = 0;
242 
243       virtual result
244       do_unshift(state_type& __state, extern_type* __to,
245 		 extern_type* __to_end, extern_type*& __to_next) const = 0;
246 
247       virtual result
248       do_in(state_type& __state, const extern_type* __from,
249 	    const extern_type* __from_end, const extern_type*& __from_next,
250 	    intern_type* __to, intern_type* __to_end,
251 	    intern_type*& __to_next) const = 0;
252 
253       virtual int
254       do_encoding() const throw() = 0;
255 
256       virtual bool
257       do_always_noconv() const throw() = 0;
258 
259       virtual int
260       do_length(state_type&, const extern_type* __from,
261 		const extern_type* __end, size_t __max) const = 0;
262 
263       virtual int
264       do_max_length() const throw() = 0;
265     };
266 
267 
268 
269   /**
270    *  @brief  Primary class template codecvt.
271    *  @ingroup locales
272    *
273    *  NB: Generic, mostly useless implementation.
274    *
275   */
276    template<typename _InternT, typename _ExternT, typename _StateT>
277     class codecvt
278     : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
279     {
280     public:
281       // Types:
282       typedef codecvt_base::result	result;
283       typedef _InternT			intern_type;
284       typedef _ExternT			extern_type;
285       typedef _StateT			state_type;
286 
287     protected:
288       __c_locale			_M_c_locale_codecvt;
289 
290     public:
291       static locale::id			id;
292 
293       explicit
294       codecvt(size_t __refs = 0)
295       : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs),
296 	_M_c_locale_codecvt(0)
297       { }
298 
299       explicit
300       codecvt(__c_locale __cloc, size_t __refs = 0);
301 
302     protected:
303       virtual
304       ~codecvt() { }
305 
306       virtual result
307       do_out(state_type& __state, const intern_type* __from,
308 	     const intern_type* __from_end, const intern_type*& __from_next,
309 	     extern_type* __to, extern_type* __to_end,
310 	     extern_type*& __to_next) const;
311 
312       virtual result
313       do_unshift(state_type& __state, extern_type* __to,
314 		 extern_type* __to_end, extern_type*& __to_next) const;
315 
316       virtual result
317       do_in(state_type& __state, const extern_type* __from,
318 	    const extern_type* __from_end, const extern_type*& __from_next,
319 	    intern_type* __to, intern_type* __to_end,
320 	    intern_type*& __to_next) const;
321 
322       virtual int
323       do_encoding() const throw();
324 
325       virtual bool
326       do_always_noconv() const throw();
327 
328       virtual int
329       do_length(state_type&, const extern_type* __from,
330 		const extern_type* __end, size_t __max) const;
331 
332       virtual int
333       do_max_length() const throw();
334     };
335 
336   template<typename _InternT, typename _ExternT, typename _StateT>
337     locale::id codecvt<_InternT, _ExternT, _StateT>::id;
338 
339   /// class codecvt<char, char, mbstate_t> specialization.
340   template<>
341     class codecvt<char, char, mbstate_t>
342     : public __codecvt_abstract_base<char, char, mbstate_t>
343     {
344     public:
345       // Types:
346       typedef char			intern_type;
347       typedef char			extern_type;
348       typedef mbstate_t			state_type;
349 
350     protected:
351       __c_locale			_M_c_locale_codecvt;
352 
353     public:
354       static locale::id id;
355 
356       explicit
357       codecvt(size_t __refs = 0);
358 
359       explicit
360       codecvt(__c_locale __cloc, size_t __refs = 0);
361 
362     protected:
363       virtual
364       ~codecvt();
365 
366       virtual result
367       do_out(state_type& __state, const intern_type* __from,
368 	     const intern_type* __from_end, const intern_type*& __from_next,
369 	     extern_type* __to, extern_type* __to_end,
370 	     extern_type*& __to_next) const;
371 
372       virtual result
373       do_unshift(state_type& __state, extern_type* __to,
374 		 extern_type* __to_end, extern_type*& __to_next) const;
375 
376       virtual result
377       do_in(state_type& __state, const extern_type* __from,
378 	    const extern_type* __from_end, const extern_type*& __from_next,
379 	    intern_type* __to, intern_type* __to_end,
380 	    intern_type*& __to_next) const;
381 
382       virtual int
383       do_encoding() const throw();
384 
385       virtual bool
386       do_always_noconv() const throw();
387 
388       virtual int
389       do_length(state_type&, const extern_type* __from,
390 		const extern_type* __end, size_t __max) const;
391 
392       virtual int
393       do_max_length() const throw();
394   };
395 
396 #ifdef _GLIBCXX_USE_WCHAR_T
397   /// class codecvt<wchar_t, char, mbstate_t> specialization.
398   template<>
399     class codecvt<wchar_t, char, mbstate_t>
400     : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
401     {
402     public:
403       // Types:
404       typedef wchar_t			intern_type;
405       typedef char			extern_type;
406       typedef mbstate_t			state_type;
407 
408     protected:
409       __c_locale			_M_c_locale_codecvt;
410 
411     public:
412       static locale::id			id;
413 
414       explicit
415       codecvt(size_t __refs = 0);
416 
417       explicit
418       codecvt(__c_locale __cloc, size_t __refs = 0);
419 
420     protected:
421       virtual
422       ~codecvt();
423 
424       virtual result
425       do_out(state_type& __state, const intern_type* __from,
426 	     const intern_type* __from_end, const intern_type*& __from_next,
427 	     extern_type* __to, extern_type* __to_end,
428 	     extern_type*& __to_next) const;
429 
430       virtual result
431       do_unshift(state_type& __state,
432 		 extern_type* __to, extern_type* __to_end,
433 		 extern_type*& __to_next) const;
434 
435       virtual result
436       do_in(state_type& __state,
437 	     const extern_type* __from, const extern_type* __from_end,
438 	     const extern_type*& __from_next,
439 	     intern_type* __to, intern_type* __to_end,
440 	     intern_type*& __to_next) const;
441 
442       virtual
443       int do_encoding() const throw();
444 
445       virtual
446       bool do_always_noconv() const throw();
447 
448       virtual
449       int do_length(state_type&, const extern_type* __from,
450 		    const extern_type* __end, size_t __max) const;
451 
452       virtual int
453       do_max_length() const throw();
454     };
455 #endif //_GLIBCXX_USE_WCHAR_T
456 
457   /// class codecvt_byname [22.2.1.6].
458   template<typename _InternT, typename _ExternT, typename _StateT>
459     class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
460     {
461     public:
462       explicit
463       codecvt_byname(const char* __s, size_t __refs = 0)
464       : codecvt<_InternT, _ExternT, _StateT>(__refs)
465       {
466 	if (__builtin_strcmp(__s, "C") != 0
467 	    && __builtin_strcmp(__s, "POSIX") != 0)
468 	  {
469 	    this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
470 	    this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
471 	  }
472       }
473 
474     protected:
475       virtual
476       ~codecvt_byname() { }
477     };
478 
479   // Inhibit implicit instantiations for required instantiations,
480   // which are defined via explicit instantiations elsewhere.
481 #if _GLIBCXX_EXTERN_TEMPLATE
482   extern template class codecvt_byname<char, char, mbstate_t>;
483 
484   extern template
485     const codecvt<char, char, mbstate_t>&
486     use_facet<codecvt<char, char, mbstate_t> >(const locale&);
487 
488   extern template
489     bool
490     has_facet<codecvt<char, char, mbstate_t> >(const locale&);
491 
492 #ifdef _GLIBCXX_USE_WCHAR_T
493   extern template class codecvt_byname<wchar_t, char, mbstate_t>;
494 
495   extern template
496     const codecvt<wchar_t, char, mbstate_t>&
497     use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
498 
499   extern template
500     bool
501     has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
502 #endif
503 #endif
504 
505 _GLIBCXX_END_NAMESPACE_VERSION
506 } // namespace std
507 
508 #endif // _CODECVT_H
509