1 /*=============================================================================
2     Copyright (c) 2001-2014 Joel de Guzman
3 
4     Distributed under the Boost Software License, Version 1.0. (See accompanying
5     file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 ==============================================================================*/
7 #if !defined(BOOST_SPIRIT_X3_UC_TYPES_NOVEMBER_23_2008_0840PM)
8 #define BOOST_SPIRIT_X3_UC_TYPES_NOVEMBER_23_2008_0840PM
9 
10 #include <boost/cstdint.hpp>
11 #include <boost/regex/pending/unicode_iterator.hpp>
12 #include <boost/type_traits/make_unsigned.hpp>
13 #include <string>
14 
15 namespace boost { namespace spirit { namespace x3
16 {
17     typedef ::boost::uint32_t ucs4_char;
18     typedef char utf8_char;
19     typedef std::basic_string<ucs4_char> ucs4_string;
20     typedef std::basic_string<utf8_char> utf8_string;
21 
22     template <typename Char>
to_utf8(Char value)23     inline utf8_string to_utf8(Char value)
24     {
25         // always store as UTF8
26         utf8_string result;
27         typedef std::back_insert_iterator<utf8_string> insert_iter;
28         insert_iter out_iter(result);
29         utf8_output_iterator<insert_iter> utf8_iter(out_iter);
30         typedef typename make_unsigned<Char>::type UChar;
31         *utf8_iter = (UChar)value;
32         return result;
33     }
34 
35     template <typename Char>
to_utf8(Char const * str)36     inline utf8_string to_utf8(Char const* str)
37     {
38         // always store as UTF8
39         utf8_string result;
40         typedef std::back_insert_iterator<utf8_string> insert_iter;
41         insert_iter out_iter(result);
42         utf8_output_iterator<insert_iter> utf8_iter(out_iter);
43         typedef typename make_unsigned<Char>::type UChar;
44         while (*str)
45             *utf8_iter++ = (UChar)*str++;
46         return result;
47     }
48 
49     template <typename Char, typename Traits, typename Allocator>
50     inline utf8_string
to_utf8(std::basic_string<Char,Traits,Allocator> const & str)51     to_utf8(std::basic_string<Char, Traits, Allocator> const& str)
52     {
53         // always store as UTF8
54         utf8_string result;
55         typedef std::back_insert_iterator<utf8_string> insert_iter;
56         insert_iter out_iter(result);
57         utf8_output_iterator<insert_iter> utf8_iter(out_iter);
58         typedef typename make_unsigned<Char>::type UChar;
59         for (Char ch : str)
60         {
61             *utf8_iter++ = (UChar)ch;
62         }
63         return result;
64     }
65 
66     // Assume wchar_t content is UTF-16 on MSVC, or mingw/wineg++ with -fshort-wchar
67 #if defined(_MSC_VER) || defined(__SIZEOF_WCHAR_T__) && __SIZEOF_WCHAR_T__ == 2
to_utf8(wchar_t value)68     inline utf8_string to_utf8(wchar_t value)
69     {
70         utf8_string result;
71         typedef std::back_insert_iterator<utf8_string> insert_iter;
72         insert_iter out_iter(result);
73         utf8_output_iterator<insert_iter> utf8_iter(out_iter);
74 
75         u16_to_u32_iterator<wchar_t const*, ucs4_char> ucs4_iter(&value);
76         *utf8_iter++ = *ucs4_iter;
77 
78         return result;
79     }
80 
to_utf8(wchar_t const * str)81     inline utf8_string to_utf8(wchar_t const* str)
82     {
83         utf8_string result;
84         typedef std::back_insert_iterator<utf8_string> insert_iter;
85         insert_iter out_iter(result);
86         utf8_output_iterator<insert_iter> utf8_iter(out_iter);
87 
88         u16_to_u32_iterator<wchar_t const*, ucs4_char> ucs4_iter(str);
89         for (ucs4_char c; (c = *ucs4_iter) != ucs4_char(); ++ucs4_iter) {
90             *utf8_iter++ = c;
91         }
92 
93         return result;
94     }
95 
96     template <typename Traits, typename Allocator>
97     inline utf8_string
to_utf8(std::basic_string<wchar_t,Traits,Allocator> const & str)98     to_utf8(std::basic_string<wchar_t, Traits, Allocator> const& str)
99     {
100         return to_utf8(str.c_str());
101     }
102 #endif
103 }}}
104 
105 #endif
106