1 // Locale support (codecvt) -*- C++ -*-
2 
3 // Copyright (C) 2000-2018 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /** @file bits/codecvt.h
26  *  This is an internal header file, included by other library headers.
27  *  Do not attempt to use it directly. @headername{locale}
28  */
29 
30 //
31 // ISO C++ 14882: 22.2.1.5 Template class codecvt
32 //
33 
34 // Written by Benjamin Kosnik <bkoz@redhat.com>
35 
36 #ifndef _CODECVT_H
37 #define _CODECVT_H 1
38 
39 #pragma GCC system_header
40 
41 namespace std _GLIBCXX_VISIBILITY(default)
42 {
43 _GLIBCXX_BEGIN_NAMESPACE_VERSION
44 
45   /// Empty base class for codecvt facet [22.2.1.5].
46   class codecvt_base
47   {
48   public:
49     enum result
50     {
51       ok,
52       partial,
53       error,
54       noconv
55     };
56   };
57 
58   /**
59    *  @brief  Common base for codecvt functions.
60    *
61    *  This template class provides implementations of the public functions
62    *  that forward to the protected virtual functions.
63    *
64    *  This template also provides abstract stubs for the protected virtual
65    *  functions.
66   */
67   template<typename _InternT, typename _ExternT, typename _StateT>
68     class __codecvt_abstract_base
69     : public locale::facet, public codecvt_base
70     {
71     public:
72       // Types:
73       typedef codecvt_base::result	result;
74       typedef _InternT			intern_type;
75       typedef _ExternT			extern_type;
76       typedef _StateT			state_type;
77 
78       // 22.2.1.5.1 codecvt members
79       /**
80        *  @brief  Convert from internal to external character set.
81        *
82        *  Converts input string of intern_type to output string of
83        *  extern_type.  This is analogous to wcsrtombs.  It does this by
84        *  calling codecvt::do_out.
85        *
86        *  The source and destination character sets are determined by the
87        *  facet's locale, internal and external types.
88        *
89        *  The characters in [from,from_end) are converted and written to
90        *  [to,to_end).  from_next and to_next are set to point to the
91        *  character following the last successfully converted character,
92        *  respectively.  If the result needed no conversion, from_next and
93        *  to_next are not affected.
94        *
95        *  The @a state argument should be initialized if the input is at the
96        *  beginning and carried from a previous call if continuing
97        *  conversion.  There are no guarantees about how @a state is used.
98        *
99        *  The result returned is a member of codecvt_base::result.  If
100        *  all the input is converted, returns codecvt_base::ok.  If no
101        *  conversion is necessary, returns codecvt_base::noconv.  If
102        *  the input ends early or there is insufficient space in the
103        *  output, returns codecvt_base::partial.  Otherwise the
104        *  conversion failed and codecvt_base::error is returned.
105        *
106        *  @param  __state  Persistent conversion state data.
107        *  @param  __from  Start of input.
108        *  @param  __from_end  End of input.
109        *  @param  __from_next  Returns start of unconverted data.
110        *  @param  __to  Start of output buffer.
111        *  @param  __to_end  End of output buffer.
112        *  @param  __to_next  Returns start of unused output area.
113        *  @return  codecvt_base::result.
114       */
115       result
116       out(state_type& __state, const intern_type* __from,
117 	  const intern_type* __from_end, const intern_type*& __from_next,
118 	  extern_type* __to, extern_type* __to_end,
119 	  extern_type*& __to_next) const
120       {
121 	return this->do_out(__state, __from, __from_end, __from_next,
122 			    __to, __to_end, __to_next);
123       }
124 
125       /**
126        *  @brief  Reset conversion state.
127        *
128        *  Writes characters to output that would restore @a state to initial
129        *  conditions.  The idea is that if a partial conversion occurs, then
130        *  the converting the characters written by this function would leave
131        *  the state in initial conditions, rather than partial conversion
132        *  state.  It does this by calling codecvt::do_unshift().
133        *
134        *  For example, if 4 external characters always converted to 1 internal
135        *  character, and input to in() had 6 external characters with state
136        *  saved, this function would write two characters to the output and
137        *  set the state to initialized conditions.
138        *
139        *  The source and destination character sets are determined by the
140        *  facet's locale, internal and external types.
141        *
142        *  The result returned is a member of codecvt_base::result.  If the
143        *  state could be reset and data written, returns codecvt_base::ok.  If
144        *  no conversion is necessary, returns codecvt_base::noconv.  If the
145        *  output has insufficient space, returns codecvt_base::partial.
146        *  Otherwise the reset failed and codecvt_base::error is returned.
147        *
148        *  @param  __state  Persistent conversion state data.
149        *  @param  __to  Start of output buffer.
150        *  @param  __to_end  End of output buffer.
151        *  @param  __to_next  Returns start of unused output area.
152        *  @return  codecvt_base::result.
153       */
154       result
155       unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
156 	      extern_type*& __to_next) const
157       { return this->do_unshift(__state, __to,__to_end,__to_next); }
158 
159       /**
160        *  @brief  Convert from external to internal character set.
161        *
162        *  Converts input string of extern_type to output string of
163        *  intern_type.  This is analogous to mbsrtowcs.  It does this by
164        *  calling codecvt::do_in.
165        *
166        *  The source and destination character sets are determined by the
167        *  facet's locale, internal and external types.
168        *
169        *  The characters in [from,from_end) are converted and written to
170        *  [to,to_end).  from_next and to_next are set to point to the
171        *  character following the last successfully converted character,
172        *  respectively.  If the result needed no conversion, from_next and
173        *  to_next are not affected.
174        *
175        *  The @a state argument should be initialized if the input is at the
176        *  beginning and carried from a previous call if continuing
177        *  conversion.  There are no guarantees about how @a state is used.
178        *
179        *  The result returned is a member of codecvt_base::result.  If
180        *  all the input is converted, returns codecvt_base::ok.  If no
181        *  conversion is necessary, returns codecvt_base::noconv.  If
182        *  the input ends early or there is insufficient space in the
183        *  output, returns codecvt_base::partial.  Otherwise the
184        *  conversion failed and codecvt_base::error is returned.
185        *
186        *  @param  __state  Persistent conversion state data.
187        *  @param  __from  Start of input.
188        *  @param  __from_end  End of input.
189        *  @param  __from_next  Returns start of unconverted data.
190        *  @param  __to  Start of output buffer.
191        *  @param  __to_end  End of output buffer.
192        *  @param  __to_next  Returns start of unused output area.
193        *  @return  codecvt_base::result.
194       */
195       result
196       in(state_type& __state, const extern_type* __from,
197 	 const extern_type* __from_end, const extern_type*& __from_next,
198 	 intern_type* __to, intern_type* __to_end,
199 	 intern_type*& __to_next) const
200       {
201 	return this->do_in(__state, __from, __from_end, __from_next,
202 			   __to, __to_end, __to_next);
203       }
204 
205       int
206       encoding() const throw()
207       { return this->do_encoding(); }
208 
209       bool
210       always_noconv() const throw()
211       { return this->do_always_noconv(); }
212 
213       int
214       length(state_type& __state, const extern_type* __from,
215 	     const extern_type* __end, size_t __max) const
216       { return this->do_length(__state, __from, __end, __max); }
217 
218       int
219       max_length() const throw()
220       { return this->do_max_length(); }
221 
222     protected:
223       explicit
224       __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
225 
226       virtual
227       ~__codecvt_abstract_base() { }
228 
229       /**
230        *  @brief  Convert from internal to external character set.
231        *
232        *  Converts input string of intern_type to output string of
233        *  extern_type.  This function is a hook for derived classes to change
234        *  the value returned.  @see out for more information.
235       */
236       virtual result
237       do_out(state_type& __state, const intern_type* __from,
238 	     const intern_type* __from_end, const intern_type*& __from_next,
239 	     extern_type* __to, extern_type* __to_end,
240 	     extern_type*& __to_next) const = 0;
241 
242       virtual result
243       do_unshift(state_type& __state, extern_type* __to,
244 		 extern_type* __to_end, extern_type*& __to_next) const = 0;
245 
246       virtual result
247       do_in(state_type& __state, const extern_type* __from,
248 	    const extern_type* __from_end, const extern_type*& __from_next,
249 	    intern_type* __to, intern_type* __to_end,
250 	    intern_type*& __to_next) const = 0;
251 
252       virtual int
253       do_encoding() const throw() = 0;
254 
255       virtual bool
256       do_always_noconv() const throw() = 0;
257 
258       virtual int
259       do_length(state_type&, const extern_type* __from,
260 		const extern_type* __end, size_t __max) const = 0;
261 
262       virtual int
263       do_max_length() const throw() = 0;
264     };
265 
266   /**
267    *  @brief  Primary class template codecvt.
268    *  @ingroup locales
269    *
270    *  NB: Generic, mostly useless implementation.
271    *
272   */
273    template<typename _InternT, typename _ExternT, typename _StateT>
274     class codecvt
275     : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
276     {
277     public:
278       // Types:
279       typedef codecvt_base::result	result;
280       typedef _InternT			intern_type;
281       typedef _ExternT			extern_type;
282       typedef _StateT			state_type;
283 
284     protected:
285       __c_locale			_M_c_locale_codecvt;
286 
287     public:
288       static locale::id			id;
289 
290       explicit
291       codecvt(size_t __refs = 0)
292       : __codecvt_abstract_base<_InternT, _ExternT, _StateT> (__refs),
293 	_M_c_locale_codecvt(0)
294       { }
295 
296       explicit
297       codecvt(__c_locale __cloc, size_t __refs = 0);
298 
299     protected:
300       virtual
301       ~codecvt() { }
302 
303       virtual result
304       do_out(state_type& __state, const intern_type* __from,
305 	     const intern_type* __from_end, const intern_type*& __from_next,
306 	     extern_type* __to, extern_type* __to_end,
307 	     extern_type*& __to_next) const;
308 
309       virtual result
310       do_unshift(state_type& __state, extern_type* __to,
311 		 extern_type* __to_end, extern_type*& __to_next) const;
312 
313       virtual result
314       do_in(state_type& __state, const extern_type* __from,
315 	    const extern_type* __from_end, const extern_type*& __from_next,
316 	    intern_type* __to, intern_type* __to_end,
317 	    intern_type*& __to_next) const;
318 
319       virtual int
320       do_encoding() const throw();
321 
322       virtual bool
323       do_always_noconv() const throw();
324 
325       virtual int
326       do_length(state_type&, const extern_type* __from,
327 		const extern_type* __end, size_t __max) const;
328 
329       virtual int
330       do_max_length() const throw();
331     };
332 
333   template<typename _InternT, typename _ExternT, typename _StateT>
334     locale::id codecvt<_InternT, _ExternT, _StateT>::id;
335 
336   /// class codecvt<char, char, mbstate_t> specialization.
337   template<>
338     class codecvt<char, char, mbstate_t>
339     : public __codecvt_abstract_base<char, char, mbstate_t>
340     {
341       friend class messages<char>;
342 
343     public:
344       // Types:
345       typedef char			intern_type;
346       typedef char			extern_type;
347       typedef mbstate_t			state_type;
348 
349     protected:
350       __c_locale			_M_c_locale_codecvt;
351 
352     public:
353       static locale::id id;
354 
355       explicit
356       codecvt(size_t __refs = 0);
357 
358       explicit
359       codecvt(__c_locale __cloc, size_t __refs = 0);
360 
361     protected:
362       virtual
363       ~codecvt();
364 
365       virtual result
366       do_out(state_type& __state, const intern_type* __from,
367 	     const intern_type* __from_end, const intern_type*& __from_next,
368 	     extern_type* __to, extern_type* __to_end,
369 	     extern_type*& __to_next) const;
370 
371       virtual result
372       do_unshift(state_type& __state, extern_type* __to,
373 		 extern_type* __to_end, extern_type*& __to_next) const;
374 
375       virtual result
376       do_in(state_type& __state, const extern_type* __from,
377 	    const extern_type* __from_end, const extern_type*& __from_next,
378 	    intern_type* __to, intern_type* __to_end,
379 	    intern_type*& __to_next) const;
380 
381       virtual int
382       do_encoding() const throw();
383 
384       virtual bool
385       do_always_noconv() const throw();
386 
387       virtual int
388       do_length(state_type&, const extern_type* __from,
389 		const extern_type* __end, size_t __max) const;
390 
391       virtual int
392       do_max_length() const throw();
393   };
394 
395 #ifdef _GLIBCXX_USE_WCHAR_T
396   /** @brief  Class codecvt<wchar_t, char, mbstate_t> specialization.
397    *
398    *  Converts between narrow and wide characters in the native character set
399    */
400   template<>
401     class codecvt<wchar_t, char, mbstate_t>
402     : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
403     {
404       friend class messages<wchar_t>;
405 
406     public:
407       // Types:
408       typedef wchar_t			intern_type;
409       typedef char			extern_type;
410       typedef mbstate_t			state_type;
411 
412     protected:
413       __c_locale			_M_c_locale_codecvt;
414 
415     public:
416       static locale::id			id;
417 
418       explicit
419       codecvt(size_t __refs = 0);
420 
421       explicit
422       codecvt(__c_locale __cloc, size_t __refs = 0);
423 
424     protected:
425       virtual
426       ~codecvt();
427 
428       virtual result
429       do_out(state_type& __state, const intern_type* __from,
430 	     const intern_type* __from_end, const intern_type*& __from_next,
431 	     extern_type* __to, extern_type* __to_end,
432 	     extern_type*& __to_next) const;
433 
434       virtual result
435       do_unshift(state_type& __state,
436 		 extern_type* __to, extern_type* __to_end,
437 		 extern_type*& __to_next) const;
438 
439       virtual result
440       do_in(state_type& __state,
441 	     const extern_type* __from, const extern_type* __from_end,
442 	     const extern_type*& __from_next,
443 	     intern_type* __to, intern_type* __to_end,
444 	     intern_type*& __to_next) const;
445 
446       virtual
447       int do_encoding() const throw();
448 
449       virtual
450       bool do_always_noconv() const throw();
451 
452       virtual
453       int do_length(state_type&, const extern_type* __from,
454 		    const extern_type* __end, size_t __max) const;
455 
456       virtual int
457       do_max_length() const throw();
458     };
459 #endif //_GLIBCXX_USE_WCHAR_T
460 
461 #if __cplusplus >= 201103L
462 #ifdef _GLIBCXX_USE_C99_STDINT_TR1
463   /** @brief  Class codecvt<char16_t, char, mbstate_t> specialization.
464    *
465    *  Converts between UTF-16 and UTF-8.
466    */
467   template<>
468     class codecvt<char16_t, char, mbstate_t>
469     : public __codecvt_abstract_base<char16_t, char, mbstate_t>
470     {
471     public:
472       // Types:
473       typedef char16_t			intern_type;
474       typedef char			extern_type;
475       typedef mbstate_t			state_type;
476 
477     public:
478       static locale::id			id;
479 
480       explicit
481       codecvt(size_t __refs = 0)
482       : __codecvt_abstract_base<char16_t, char, mbstate_t>(__refs) { }
483 
484     protected:
485       virtual
486       ~codecvt();
487 
488       virtual result
489       do_out(state_type& __state, const intern_type* __from,
490 	     const intern_type* __from_end, const intern_type*& __from_next,
491 	     extern_type* __to, extern_type* __to_end,
492 	     extern_type*& __to_next) const;
493 
494       virtual result
495       do_unshift(state_type& __state,
496 		 extern_type* __to, extern_type* __to_end,
497 		 extern_type*& __to_next) const;
498 
499       virtual result
500       do_in(state_type& __state,
501 	     const extern_type* __from, const extern_type* __from_end,
502 	     const extern_type*& __from_next,
503 	     intern_type* __to, intern_type* __to_end,
504 	     intern_type*& __to_next) const;
505 
506       virtual
507       int do_encoding() const throw();
508 
509       virtual
510       bool do_always_noconv() const throw();
511 
512       virtual
513       int do_length(state_type&, const extern_type* __from,
514 		    const extern_type* __end, size_t __max) const;
515 
516       virtual int
517       do_max_length() const throw();
518     };
519 
520   /** @brief  Class codecvt<char32_t, char, mbstate_t> specialization.
521    *
522    *  Converts between UTF-32 and UTF-8.
523    */
524   template<>
525     class codecvt<char32_t, char, mbstate_t>
526     : public __codecvt_abstract_base<char32_t, char, mbstate_t>
527     {
528     public:
529       // Types:
530       typedef char32_t			intern_type;
531       typedef char			extern_type;
532       typedef mbstate_t			state_type;
533 
534     public:
535       static locale::id			id;
536 
537       explicit
538       codecvt(size_t __refs = 0)
539       : __codecvt_abstract_base<char32_t, char, mbstate_t>(__refs) { }
540 
541     protected:
542       virtual
543       ~codecvt();
544 
545       virtual result
546       do_out(state_type& __state, const intern_type* __from,
547 	     const intern_type* __from_end, const intern_type*& __from_next,
548 	     extern_type* __to, extern_type* __to_end,
549 	     extern_type*& __to_next) const;
550 
551       virtual result
552       do_unshift(state_type& __state,
553 		 extern_type* __to, extern_type* __to_end,
554 		 extern_type*& __to_next) const;
555 
556       virtual result
557       do_in(state_type& __state,
558 	     const extern_type* __from, const extern_type* __from_end,
559 	     const extern_type*& __from_next,
560 	     intern_type* __to, intern_type* __to_end,
561 	     intern_type*& __to_next) const;
562 
563       virtual
564       int do_encoding() const throw();
565 
566       virtual
567       bool do_always_noconv() const throw();
568 
569       virtual
570       int do_length(state_type&, const extern_type* __from,
571 		    const extern_type* __end, size_t __max) const;
572 
573       virtual int
574       do_max_length() const throw();
575     };
576 
577 #endif // _GLIBCXX_USE_C99_STDINT_TR1
578 #endif // C++11
579 
580   /// class codecvt_byname [22.2.1.6].
581   template<typename _InternT, typename _ExternT, typename _StateT>
582     class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
583     {
584     public:
585       explicit
586       codecvt_byname(const char* __s, size_t __refs = 0)
587       : codecvt<_InternT, _ExternT, _StateT>(__refs)
588       {
589 	if (__builtin_strcmp(__s, "C") != 0
590 	    && __builtin_strcmp(__s, "POSIX") != 0)
591 	  {
592 	    this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
593 	    this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
594 	  }
595       }
596 
597 #if __cplusplus >= 201103L
598       explicit
599       codecvt_byname(const string& __s, size_t __refs = 0)
600       : codecvt_byname(__s.c_str(), __refs) { }
601 #endif
602 
603     protected:
604       virtual
605       ~codecvt_byname() { }
606     };
607 
608 #if __cplusplus >= 201103L && defined(_GLIBCXX_USE_C99_STDINT_TR1)
609   template<>
610     class codecvt_byname<char16_t, char, mbstate_t>
611     : public codecvt<char16_t, char, mbstate_t>
612     {
613     public:
614       explicit
615       codecvt_byname(const char*, size_t __refs = 0)
616       : codecvt<char16_t, char, mbstate_t>(__refs) { }
617 
618       explicit
619       codecvt_byname(const string& __s, size_t __refs = 0)
620       : codecvt_byname(__s.c_str(), __refs) { }
621 
622     protected:
623       virtual
624       ~codecvt_byname() { }
625     };
626 
627   template<>
628     class codecvt_byname<char32_t, char, mbstate_t>
629     : public codecvt<char32_t, char, mbstate_t>
630     {
631     public:
632       explicit
633       codecvt_byname(const char*, size_t __refs = 0)
634       : codecvt<char32_t, char, mbstate_t>(__refs) { }
635 
636       explicit
637       codecvt_byname(const string& __s, size_t __refs = 0)
638       : codecvt_byname(__s.c_str(), __refs) { }
639 
640     protected:
641       virtual
642       ~codecvt_byname() { }
643     };
644 #endif
645 
646   // Inhibit implicit instantiations for required instantiations,
647   // which are defined via explicit instantiations elsewhere.
648 #if _GLIBCXX_EXTERN_TEMPLATE
649   extern template class codecvt_byname<char, char, mbstate_t>;
650 
651   extern template
652     const codecvt<char, char, mbstate_t>&
653     use_facet<codecvt<char, char, mbstate_t> >(const locale&);
654 
655   extern template
656     bool
657     has_facet<codecvt<char, char, mbstate_t> >(const locale&);
658 
659 #ifdef _GLIBCXX_USE_WCHAR_T
660   extern template class codecvt_byname<wchar_t, char, mbstate_t>;
661 
662   extern template
663     const codecvt<wchar_t, char, mbstate_t>&
664     use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
665 
666   extern template
667     bool
668     has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
669 #endif
670 
671 #if __cplusplus >= 201103L && defined(_GLIBCXX_USE_C99_STDINT_TR1)
672   extern template class codecvt_byname<char16_t, char, mbstate_t>;
673   extern template class codecvt_byname<char32_t, char, mbstate_t>;
674 #endif
675 
676 #endif
677 
678 _GLIBCXX_END_NAMESPACE_VERSION
679 } // namespace std
680 
681 #endif // _CODECVT_H
682