1 //
2 //  Copyright (c) 2015 Artyom Beilis (Tonkikh)
3 //
4 //  Distributed under the Boost Software License, Version 1.0. (See
5 //  accompanying file LICENSE_1_0.txt or copy at
6 //  http://www.boost.org/LICENSE_1_0.txt)
7 //
8 #ifndef BOOST_LOCALE_UTF8_CODECVT_HPP
9 #define BOOST_LOCALE_UTF8_CODECVT_HPP
10 
11 #include <boost/locale/utf.hpp>
12 #include <boost/locale/generic_codecvt.hpp>
13 #include <boost/cstdint.hpp>
14 #include <locale>
15 
16 namespace boost {
17 namespace locale {
18 
19 ///
20 /// \brief Geneneric utf8 codecvt facet, it allows to convert UTF-8 strings to UTF-16 and UTF-32 using wchar_t, char32_t and char16_t
21 ///
22 template<typename CharType>
23 class utf8_codecvt : public generic_codecvt<CharType,utf8_codecvt<CharType> >
24 {
25 public:
26 
27     struct state_type {};
28 
utf8_codecvt(size_t refs=0)29     utf8_codecvt(size_t refs = 0) : generic_codecvt<CharType,utf8_codecvt<CharType> >(refs)
30     {
31     }
32 
max_encoding_length()33     static int max_encoding_length()
34     {
35         return 4;
36     }
37 
initial_state(generic_codecvt_base::initial_convertion_state)38     static state_type initial_state(generic_codecvt_base::initial_convertion_state /* unused */)
39     {
40         return state_type();
41     }
to_unicode(state_type &,char const * & begin,char const * end)42     static utf::code_point to_unicode(state_type &,char const *&begin,char const *end)
43     {
44         char const *p=begin;
45 
46         utf::code_point c = utf::utf_traits<char>::decode(p,end);
47         if(c!=utf::illegal && c!=utf::incomplete)
48             begin = p;
49         return c;
50     }
51 
from_unicode(state_type &,utf::code_point u,char * begin,char const * end)52     static utf::code_point from_unicode(state_type &,utf::code_point u,char *begin,char const *end)
53     {
54         if(!utf::is_valid_codepoint(u))
55             return utf::illegal;
56         int width;
57         if((width=utf::utf_traits<char>::width(u)) > end - begin)
58             return utf::incomplete;
59         utf::utf_traits<char>::encode(u,begin);
60         return width;
61     }
62 };
63 
64 } // locale
65 } // namespace boost
66 
67 #endif
68 ///
69 // vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
70