1 /*=============================================================================
2     Copyright (c) 2001-2011 Joel de Guzman
3 
4     Distributed under the Boost Software License, Version 1.0. (See accompanying
5     file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 ==============================================================================*/
7 #if !defined(BOOST_SPIRIT_UC_TYPES_NOVEMBER_23_2008_0840PM)
8 #define BOOST_SPIRIT_UC_TYPES_NOVEMBER_23_2008_0840PM
9 
10 #if defined(_MSC_VER)
11 #pragma once
12 #endif
13 
14 #include <boost/cstdint.hpp>
15 #include <boost/foreach.hpp>
16 #include <boost/regex/pending/unicode_iterator.hpp>
17 #include <boost/type_traits/make_unsigned.hpp>
18 #include <string>
19 
20 namespace boost { namespace spirit
21 {
22     typedef ::boost::uint32_t ucs4_char;
23     typedef char utf8_char;
24     typedef std::basic_string<ucs4_char> ucs4_string;
25     typedef std::basic_string<utf8_char> utf8_string;
26 
27     template <typename Char>
to_utf8(Char value)28     inline utf8_string to_utf8(Char value)
29     {
30         // always store as UTF8
31         utf8_string result;
32         typedef std::back_insert_iterator<utf8_string> insert_iter;
33         insert_iter out_iter(result);
34         utf8_output_iterator<insert_iter> utf8_iter(out_iter);
35         typedef typename make_unsigned<Char>::type UChar;
36         *utf8_iter = (UChar)value;
37         return result;
38     }
39 
40     template <typename Char>
to_utf8(Char const * str)41     inline utf8_string to_utf8(Char const* str)
42     {
43         // always store as UTF8
44         utf8_string result;
45         typedef std::back_insert_iterator<utf8_string> insert_iter;
46         insert_iter out_iter(result);
47         utf8_output_iterator<insert_iter> utf8_iter(out_iter);
48         typedef typename make_unsigned<Char>::type UChar;
49         while (*str)
50             *utf8_iter++ = (UChar)*str++;
51         return result;
52     }
53 
54     template <typename Char, typename Traits, typename Allocator>
55     inline utf8_string
to_utf8(std::basic_string<Char,Traits,Allocator> const & str)56     to_utf8(std::basic_string<Char, Traits, Allocator> const& str)
57     {
58         // always store as UTF8
59         utf8_string result;
60         typedef std::back_insert_iterator<utf8_string> insert_iter;
61         insert_iter out_iter(result);
62         utf8_output_iterator<insert_iter> utf8_iter(out_iter);
63         typedef typename make_unsigned<Char>::type UChar;
64         BOOST_FOREACH(Char ch, str)
65         {
66             *utf8_iter++ = (UChar)ch;
67         }
68         return result;
69     }
70 
71     // Assume wchar_t content is UTF-16 on MSVC, or mingw/wineg++ with -fshort-wchar
72 #if defined(_MSC_VER) || defined(__SIZEOF_WCHAR_T__) && __SIZEOF_WCHAR_T__ == 2
to_utf8(wchar_t value)73     inline utf8_string to_utf8(wchar_t value)
74     {
75         utf8_string result;
76         typedef std::back_insert_iterator<utf8_string> insert_iter;
77         insert_iter out_iter(result);
78         utf8_output_iterator<insert_iter> utf8_iter(out_iter);
79 
80         u16_to_u32_iterator<wchar_t const*, ucs4_char> ucs4_iter(&value);
81         *utf8_iter++ = *ucs4_iter;
82 
83         return result;
84     }
85 
to_utf8(wchar_t const * str)86     inline utf8_string to_utf8(wchar_t const* str)
87     {
88         utf8_string result;
89         typedef std::back_insert_iterator<utf8_string> insert_iter;
90         insert_iter out_iter(result);
91         utf8_output_iterator<insert_iter> utf8_iter(out_iter);
92 
93         u16_to_u32_iterator<wchar_t const*, ucs4_char> ucs4_iter(str);
94         for (ucs4_char c; (c = *ucs4_iter) != ucs4_char(); ++ucs4_iter) {
95             *utf8_iter++ = c;
96         }
97 
98         return result;
99     }
100 
101     template <typename Traits, typename Allocator>
102     inline utf8_string
to_utf8(std::basic_string<wchar_t,Traits,Allocator> const & str)103     to_utf8(std::basic_string<wchar_t, Traits, Allocator> const& str)
104     {
105         return to_utf8(str.c_str());
106     }
107 #endif
108 }}
109 
110 #endif
111