1 // Locale support (codecvt) -*- C++ -*-
2 
3 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005
4 //  Free Software Foundation, Inc.
5 //
6 // This file is part of the GNU ISO C++ Library.  This library is free
7 // software; you can redistribute it and/or modify it under the
8 // terms of the GNU General Public License as published by the
9 // Free Software Foundation; either version 2, or (at your option)
10 // any later version.
11 
12 // This library is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 // GNU General Public License for more details.
16 
17 // You should have received a copy of the GNU General Public License along
18 // with this library; see the file COPYING.  If not, write to the Free
19 // Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
20 // USA.
21 
22 // As a special exception, you may use this file as part of a free software
23 // library without restriction.  Specifically, if other files instantiate
24 // templates or use macros or inline functions from this file, or you compile
25 // this file and link it with other files to produce an executable, this
26 // file does not by itself cause the resulting executable to be covered by
27 // the GNU General Public License.  This exception does not however
28 // invalidate any other reasons why the executable file might be covered by
29 // the GNU General Public License.
30 
31 /** @file bits/codecvt.h
32  *  This is an internal header file, included by other library headers.
33  *  You should not attempt to use it directly.
34  */
35 
36 //
37 // ISO C++ 14882: 22.2.1.5 Template class codecvt
38 //
39 
40 // Written by Benjamin Kosnik <bkoz@redhat.com>
41 
42 #ifndef _CODECVT_H
43 #define _CODECVT_H 1
44 
45 #pragma GCC system_header
46 
47 _GLIBCXX_BEGIN_NAMESPACE(std)
48 
49   /// @brief  Empty base class for codecvt facet [22.2.1.5].
50   class codecvt_base
51   {
52   public:
53     enum result
54     {
55       ok,
56       partial,
57       error,
58       noconv
59     };
60   };
61 
62   /**
63    *  @brief  Common base for codecvt functions.
64    *
65    *  This template class provides implementations of the public functions
66    *  that forward to the protected virtual functions.
67    *
68    *  This template also provides abstract stubs for the protected virtual
69    *  functions.
70   */
71   template<typename _InternT, typename _ExternT, typename _StateT>
72     class __codecvt_abstract_base
73     : public locale::facet, public codecvt_base
74     {
75     public:
76       // Types:
77       typedef codecvt_base::result	result;
78       typedef _InternT			intern_type;
79       typedef _ExternT			extern_type;
80       typedef _StateT			state_type;
81 
82       // 22.2.1.5.1 codecvt members
83       /**
84        *  @brief  Convert from internal to external character set.
85        *
86        *  Converts input string of intern_type to output string of
87        *  extern_type.  This is analogous to wcsrtombs.  It does this by
88        *  calling codecvt::do_out.
89        *
90        *  The source and destination character sets are determined by the
91        *  facet's locale, internal and external types.
92        *
93        *  The characters in [from,from_end) are converted and written to
94        *  [to,to_end).  from_next and to_next are set to point to the
95        *  character following the last successfully converted character,
96        *  respectively.  If the result needed no conversion, from_next and
97        *  to_next are not affected.
98        *
99        *  The @a state argument should be intialized if the input is at the
100        *  beginning and carried from a previous call if continuing
101        *  conversion.  There are no guarantees about how @a state is used.
102        *
103        *  The result returned is a member of codecvt_base::result.  If
104        *  all the input is converted, returns codecvt_base::ok.  If no
105        *  conversion is necessary, returns codecvt_base::noconv.  If
106        *  the input ends early or there is insufficient space in the
107        *  output, returns codecvt_base::partial.  Otherwise the
108        *  conversion failed and codecvt_base::error is returned.
109        *
110        *  @param  state  Persistent conversion state data.
111        *  @param  from  Start of input.
112        *  @param  from_end  End of input.
113        *  @param  from_next  Returns start of unconverted data.
114        *  @param  to  Start of output buffer.
115        *  @param  to_end  End of output buffer.
116        *  @param  to_next  Returns start of unused output area.
117        *  @return  codecvt_base::result.
118       */
119       result
120       out(state_type& __state, const intern_type* __from,
121 	  const intern_type* __from_end, const intern_type*& __from_next,
122 	  extern_type* __to, extern_type* __to_end,
123 	  extern_type*& __to_next) const
124       {
125 	return this->do_out(__state, __from, __from_end, __from_next,
126 			    __to, __to_end, __to_next);
127       }
128 
129       /**
130        *  @brief  Reset conversion state.
131        *
132        *  Writes characters to output that would restore @a state to initial
133        *  conditions.  The idea is that if a partial conversion occurs, then
134        *  the converting the characters written by this function would leave
135        *  the state in initial conditions, rather than partial conversion
136        *  state.  It does this by calling codecvt::do_unshift().
137        *
138        *  For example, if 4 external characters always converted to 1 internal
139        *  character, and input to in() had 6 external characters with state
140        *  saved, this function would write two characters to the output and
141        *  set the state to initialized conditions.
142        *
143        *  The source and destination character sets are determined by the
144        *  facet's locale, internal and external types.
145        *
146        *  The result returned is a member of codecvt_base::result.  If the
147        *  state could be reset and data written, returns codecvt_base::ok.  If
148        *  no conversion is necessary, returns codecvt_base::noconv.  If the
149        *  output has insufficient space, returns codecvt_base::partial.
150        *  Otherwise the reset failed and codecvt_base::error is returned.
151        *
152        *  @param  state  Persistent conversion state data.
153        *  @param  to  Start of output buffer.
154        *  @param  to_end  End of output buffer.
155        *  @param  to_next  Returns start of unused output area.
156        *  @return  codecvt_base::result.
157       */
158       result
159       unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
160 	      extern_type*& __to_next) const
161       { return this->do_unshift(__state, __to,__to_end,__to_next); }
162 
163       /**
164        *  @brief  Convert from external to internal character set.
165        *
166        *  Converts input string of extern_type to output string of
167        *  intern_type.  This is analogous to mbsrtowcs.  It does this by
168        *  calling codecvt::do_in.
169        *
170        *  The source and destination character sets are determined by the
171        *  facet's locale, internal and external types.
172        *
173        *  The characters in [from,from_end) are converted and written to
174        *  [to,to_end).  from_next and to_next are set to point to the
175        *  character following the last successfully converted character,
176        *  respectively.  If the result needed no conversion, from_next and
177        *  to_next are not affected.
178        *
179        *  The @a state argument should be intialized if the input is at the
180        *  beginning and carried from a previous call if continuing
181        *  conversion.  There are no guarantees about how @a state is used.
182        *
183        *  The result returned is a member of codecvt_base::result.  If
184        *  all the input is converted, returns codecvt_base::ok.  If no
185        *  conversion is necessary, returns codecvt_base::noconv.  If
186        *  the input ends early or there is insufficient space in the
187        *  output, returns codecvt_base::partial.  Otherwise the
188        *  conversion failed and codecvt_base::error is returned.
189        *
190        *  @param  state  Persistent conversion state data.
191        *  @param  from  Start of input.
192        *  @param  from_end  End of input.
193        *  @param  from_next  Returns start of unconverted data.
194        *  @param  to  Start of output buffer.
195        *  @param  to_end  End of output buffer.
196        *  @param  to_next  Returns start of unused output area.
197        *  @return  codecvt_base::result.
198       */
199       result
200       in(state_type& __state, const extern_type* __from,
201 	 const extern_type* __from_end, const extern_type*& __from_next,
202 	 intern_type* __to, intern_type* __to_end,
203 	 intern_type*& __to_next) const
204       {
205 	return this->do_in(__state, __from, __from_end, __from_next,
206 			   __to, __to_end, __to_next);
207       }
208 
209       int
210       encoding() const throw()
211       { return this->do_encoding(); }
212 
213       bool
214       always_noconv() const throw()
215       { return this->do_always_noconv(); }
216 
217       int
218       length(state_type& __state, const extern_type* __from,
219 	     const extern_type* __end, size_t __max) const
220       { return this->do_length(__state, __from, __end, __max); }
221 
222       int
223       max_length() const throw()
224       { return this->do_max_length(); }
225 
226     protected:
227       explicit
228       __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
229 
230       virtual
231       ~__codecvt_abstract_base() { }
232 
233       /**
234        *  @brief  Convert from internal to external character set.
235        *
236        *  Converts input string of intern_type to output string of
237        *  extern_type.  This function is a hook for derived classes to change
238        *  the value returned.  @see out for more information.
239       */
240       virtual result
241       do_out(state_type& __state, const intern_type* __from,
242 	     const intern_type* __from_end, const intern_type*& __from_next,
243 	     extern_type* __to, extern_type* __to_end,
244 	     extern_type*& __to_next) const = 0;
245 
246       virtual result
247       do_unshift(state_type& __state, extern_type* __to,
248 		 extern_type* __to_end, extern_type*& __to_next) const = 0;
249 
250       virtual result
251       do_in(state_type& __state, const extern_type* __from,
252 	    const extern_type* __from_end, const extern_type*& __from_next,
253 	    intern_type* __to, intern_type* __to_end,
254 	    intern_type*& __to_next) const = 0;
255 
256       virtual int
257       do_encoding() const throw() = 0;
258 
259       virtual bool
260       do_always_noconv() const throw() = 0;
261 
262       virtual int
263       do_length(state_type&, const extern_type* __from,
264 		const extern_type* __end, size_t __max) const = 0;
265 
266       virtual int
267       do_max_length() const throw() = 0;
268     };
269 
270   /// @brief class codecvt [22.2.1.5].
271   /// NB: Generic, mostly useless implementation.
272   template<typename _InternT, typename _ExternT, typename _StateT>
273     class codecvt
274     : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
275     {
276     public:
277       // Types:
278       typedef codecvt_base::result	result;
279       typedef _InternT			intern_type;
280       typedef _ExternT			extern_type;
281       typedef _StateT			state_type;
282 
283     protected:
284       __c_locale			_M_c_locale_codecvt;
285 
286     public:
287       static locale::id			id;
288 
289       explicit
290       codecvt(size_t __refs = 0)
291       : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs) { }
292 
293       explicit
294       codecvt(__c_locale __cloc, size_t __refs = 0);
295 
296     protected:
297       virtual
298       ~codecvt() { }
299 
300       virtual result
301       do_out(state_type& __state, const intern_type* __from,
302 	     const intern_type* __from_end, const intern_type*& __from_next,
303 	     extern_type* __to, extern_type* __to_end,
304 	     extern_type*& __to_next) const;
305 
306       virtual result
307       do_unshift(state_type& __state, extern_type* __to,
308 		 extern_type* __to_end, extern_type*& __to_next) const;
309 
310       virtual result
311       do_in(state_type& __state, const extern_type* __from,
312 	    const extern_type* __from_end, const extern_type*& __from_next,
313 	    intern_type* __to, intern_type* __to_end,
314 	    intern_type*& __to_next) const;
315 
316       virtual int
317       do_encoding() const throw();
318 
319       virtual bool
320       do_always_noconv() const throw();
321 
322       virtual int
323       do_length(state_type&, const extern_type* __from,
324 		const extern_type* __end, size_t __max) const;
325 
326       virtual int
327       do_max_length() const throw();
328     };
329 
330   template<typename _InternT, typename _ExternT, typename _StateT>
331     locale::id codecvt<_InternT, _ExternT, _StateT>::id;
332 
333   /// @brief class codecvt<char, char, mbstate_t> specialization.
334   template<>
335     class codecvt<char, char, mbstate_t>
336     : public __codecvt_abstract_base<char, char, mbstate_t>
337     {
338     public:
339       // Types:
340       typedef char			intern_type;
341       typedef char			extern_type;
342       typedef mbstate_t			state_type;
343 
344     protected:
345       __c_locale			_M_c_locale_codecvt;
346 
347     public:
348       static locale::id id;
349 
350       explicit
351       codecvt(size_t __refs = 0);
352 
353       explicit
354       codecvt(__c_locale __cloc, size_t __refs = 0);
355 
356     protected:
357       virtual
358       ~codecvt();
359 
360       virtual result
361       do_out(state_type& __state, const intern_type* __from,
362 	     const intern_type* __from_end, const intern_type*& __from_next,
363 	     extern_type* __to, extern_type* __to_end,
364 	     extern_type*& __to_next) const;
365 
366       virtual result
367       do_unshift(state_type& __state, extern_type* __to,
368 		 extern_type* __to_end, extern_type*& __to_next) const;
369 
370       virtual result
371       do_in(state_type& __state, const extern_type* __from,
372 	    const extern_type* __from_end, const extern_type*& __from_next,
373 	    intern_type* __to, intern_type* __to_end,
374 	    intern_type*& __to_next) const;
375 
376       virtual int
377       do_encoding() const throw();
378 
379       virtual bool
380       do_always_noconv() const throw();
381 
382       virtual int
383       do_length(state_type&, const extern_type* __from,
384 		const extern_type* __end, size_t __max) const;
385 
386       virtual int
387       do_max_length() const throw();
388   };
389 
390 #ifdef _GLIBCXX_USE_WCHAR_T
391   /// @brief  class codecvt<wchar_t, char, mbstate_t> specialization.
392   template<>
393     class codecvt<wchar_t, char, mbstate_t>
394     : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
395     {
396     public:
397       // Types:
398       typedef wchar_t			intern_type;
399       typedef char			extern_type;
400       typedef mbstate_t			state_type;
401 
402     protected:
403       __c_locale			_M_c_locale_codecvt;
404 
405     public:
406       static locale::id			id;
407 
408       explicit
409       codecvt(size_t __refs = 0);
410 
411       explicit
412       codecvt(__c_locale __cloc, size_t __refs = 0);
413 
414     protected:
415       virtual
416       ~codecvt();
417 
418       virtual result
419       do_out(state_type& __state, const intern_type* __from,
420 	     const intern_type* __from_end, const intern_type*& __from_next,
421 	     extern_type* __to, extern_type* __to_end,
422 	     extern_type*& __to_next) const;
423 
424       virtual result
425       do_unshift(state_type& __state,
426 		 extern_type* __to, extern_type* __to_end,
427 		 extern_type*& __to_next) const;
428 
429       virtual result
430       do_in(state_type& __state,
431 	     const extern_type* __from, const extern_type* __from_end,
432 	     const extern_type*& __from_next,
433 	     intern_type* __to, intern_type* __to_end,
434 	     intern_type*& __to_next) const;
435 
436       virtual
437       int do_encoding() const throw();
438 
439       virtual
440       bool do_always_noconv() const throw();
441 
442       virtual
443       int do_length(state_type&, const extern_type* __from,
444 		    const extern_type* __end, size_t __max) const;
445 
446       virtual int
447       do_max_length() const throw();
448     };
449 #endif //_GLIBCXX_USE_WCHAR_T
450 
451   /// @brief class codecvt_byname [22.2.1.6].
452   template<typename _InternT, typename _ExternT, typename _StateT>
453     class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
454     {
455     public:
456       explicit
457       codecvt_byname(const char* __s, size_t __refs = 0)
458       : codecvt<_InternT, _ExternT, _StateT>(__refs)
459       {
460 	if (std::strcmp(__s, "C") != 0 && std::strcmp(__s, "POSIX") != 0)
461 	  {
462 	    this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
463 	    this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
464 	  }
465       }
466 
467     protected:
468       virtual
469       ~codecvt_byname() { }
470     };
471 
472 _GLIBCXX_END_NAMESPACE
473 
474 #endif // _CODECVT_H
475