1 /*
2  *          Copyright Andrey Semashev 2007 - 2015.
3  * Distributed under the Boost Software License, Version 1.0.
4  *    (See accompanying file LICENSE_1_0.txt or copy at
5  *          http://www.boost.org/LICENSE_1_0.txt)
6  */
7 /*!
8  * \file   code_conversion.cpp
9  * \author Andrey Semashev
10  * \date   08.11.2008
11  *
12  * \brief  This header is the Boost.Log library implementation, see the library documentation
13  *         at http://www.boost.org/doc/libs/release/libs/log/doc/html/index.html.
14  */
15 
16 #include <boost/log/detail/config.hpp>
17 #include <cstddef>
18 #include <locale>
19 #include <string>
20 #include <stdexcept>
21 #include <algorithm>
22 #include <boost/log/exceptions.hpp>
23 #include <boost/log/detail/code_conversion.hpp>
24 #if defined(BOOST_WINDOWS)
25 #include <cstring>
26 #include <limits>
27 #include <boost/winapi/get_last_error.hpp>
28 #include <boost/winapi/character_code_conversion.hpp>
29 #endif
30 #include <boost/log/detail/header.hpp>
31 
32 namespace boost {
33 
34 BOOST_LOG_OPEN_NAMESPACE
35 
36 namespace aux {
37 
38 BOOST_LOG_ANONYMOUS_NAMESPACE {
39 
40 //! The function performs character conversion with the specified facet
41 template< typename LocalCharT >
42 inline std::codecvt_base::result convert(
43     std::codecvt< LocalCharT, char, std::mbstate_t > const& fac,
44     std::mbstate_t& state,
45     const char*& pSrcBegin,
46     const char* pSrcEnd,
47     LocalCharT*& pDstBegin,
48     LocalCharT* pDstEnd)
49 {
50     return fac.in(state, pSrcBegin, pSrcEnd, pSrcBegin, pDstBegin, pDstEnd, pDstBegin);
51 }
52 
53 //! The function performs character conversion with the specified facet
54 template< typename LocalCharT >
55 inline std::codecvt_base::result convert(
56     std::codecvt< LocalCharT, char, std::mbstate_t > const& fac,
57     std::mbstate_t& state,
58     const LocalCharT*& pSrcBegin,
59     const LocalCharT* pSrcEnd,
60     char*& pDstBegin,
61     char* pDstEnd)
62 {
63     return fac.out(state, pSrcBegin, pSrcEnd, pSrcBegin, pDstBegin, pDstEnd, pDstBegin);
64 }
65 
66 } // namespace
67 
68 template< typename SourceCharT, typename TargetCharT, typename FacetT >
69 inline std::size_t code_convert(const SourceCharT* begin, const SourceCharT* end, std::basic_string< TargetCharT >& converted, std::size_t max_size, FacetT const& fac)
70 {
71     typedef typename FacetT::state_type state_type;
72     TargetCharT converted_buffer[256];
73 
74     const SourceCharT* const original_begin = begin;
75     state_type state = state_type();
76     std::size_t buf_size = (std::min)(max_size, sizeof(converted_buffer) / sizeof(*converted_buffer));
77     while (begin != end && buf_size > 0u)
78     {
79         TargetCharT* dest = converted_buffer;
80         std::codecvt_base::result res = convert(
81             fac,
82             state,
83             begin,
84             end,
85             dest,
86             dest + buf_size);
87 
88         switch (res)
89         {
90         case std::codecvt_base::ok:
91             // All characters were successfully converted
92             // NOTE: MSVC 11 also returns ok when the source buffer was only partially consumed, so we also check that the begin pointer has reached the end.
93             converted.append(converted_buffer, dest);
94             max_size -= dest - converted_buffer;
95             break;
96 
97         case std::codecvt_base::noconv:
98             {
99                 // Not possible, unless both character types are actually equivalent
100                 const std::size_t size = (std::min)(max_size, static_cast< std::size_t >(end - begin));
101                 converted.append(begin, begin + size);
102                 begin += size;
103                 max_size -= size;
104             }
105             goto done;
106 
107         case std::codecvt_base::partial:
108             // Some characters were converted, some were not
109             if (dest != converted_buffer)
110             {
111                 // Some conversion took place, so it seems like
112                 // the destination buffer might not have been long enough
113                 converted.append(converted_buffer, dest);
114                 max_size -= dest - converted_buffer;
115 
116                 // ...and go on for the next part
117                 break;
118             }
119             else
120             {
121                 // Nothing was converted
122                 if (begin == end)
123                     goto done;
124 
125                 // Looks like the tail of the source buffer contains only part of the last character.
126                 // In this case we intentionally fall through to throw an exception.
127             }
128 
129         default: // std::codecvt_base::error
130             BOOST_LOG_THROW_DESCR(conversion_error, "Could not convert character encoding");
131         }
132 
133         buf_size = (std::min)(max_size, sizeof(converted_buffer) / sizeof(*converted_buffer));
134     }
135 
136 done:
137     return static_cast< std::size_t >(begin - original_begin);
138 }
139 
140 //! The function converts one string to the character type of another
code_convert_impl(const wchar_t * str1,std::size_t len,std::string & str2,std::size_t max_size,std::locale const & loc)141 BOOST_LOG_API bool code_convert_impl(const wchar_t* str1, std::size_t len, std::string& str2, std::size_t max_size, std::locale const& loc)
142 {
143     return code_convert(str1, str1 + len, str2, max_size, std::use_facet< std::codecvt< wchar_t, char, std::mbstate_t > >(loc)) == len;
144 }
145 
146 //! The function converts one string to the character type of another
code_convert_impl(const char * str1,std::size_t len,std::wstring & str2,std::size_t max_size,std::locale const & loc)147 BOOST_LOG_API bool code_convert_impl(const char* str1, std::size_t len, std::wstring& str2, std::size_t max_size, std::locale const& loc)
148 {
149     return code_convert(str1, str1 + len, str2, max_size, std::use_facet< std::codecvt< wchar_t, char, std::mbstate_t > >(loc)) == len;
150 }
151 
152 #if !defined(BOOST_LOG_NO_CXX11_CODECVT_FACETS)
153 
154 #if !defined(BOOST_NO_CXX11_CHAR16_T)
155 
156 //! The function converts one string to the character type of another
code_convert_impl(const char16_t * str1,std::size_t len,std::string & str2,std::size_t max_size,std::locale const & loc)157 BOOST_LOG_API bool code_convert_impl(const char16_t* str1, std::size_t len, std::string& str2, std::size_t max_size, std::locale const& loc)
158 {
159     return code_convert(str1, str1 + len, str2, max_size, std::use_facet< std::codecvt< char16_t, char, std::mbstate_t > >(loc)) == len;
160 }
161 
162 //! The function converts one string to the character type of another
code_convert_impl(const char * str1,std::size_t len,std::u16string & str2,std::size_t max_size,std::locale const & loc)163 BOOST_LOG_API bool code_convert_impl(const char* str1, std::size_t len, std::u16string& str2, std::size_t max_size, std::locale const& loc)
164 {
165     return code_convert(str1, str1 + len, str2, max_size, std::use_facet< std::codecvt< char16_t, char, std::mbstate_t > >(loc)) == len;
166 }
167 
168 //! The function converts one string to the character type of another
code_convert_impl(const char16_t * str1,std::size_t len,std::wstring & str2,std::size_t max_size,std::locale const & loc)169 BOOST_LOG_API bool code_convert_impl(const char16_t* str1, std::size_t len, std::wstring& str2, std::size_t max_size, std::locale const& loc)
170 {
171     std::string temp_str;
172     code_convert(str1, str1 + len, temp_str, temp_str.max_size(), std::use_facet< std::codecvt< char16_t, char, std::mbstate_t > >(loc));
173     const std::size_t temp_size = temp_str.size();
174     return code_convert(temp_str.c_str(), temp_str.c_str() + temp_size, str2, max_size, std::use_facet< std::codecvt< wchar_t, char, std::mbstate_t > >(loc)) == temp_size;
175 }
176 
177 #endif
178 
179 #if !defined(BOOST_NO_CXX11_CHAR32_T)
180 
181 //! The function converts one string to the character type of another
code_convert_impl(const char32_t * str1,std::size_t len,std::string & str2,std::size_t max_size,std::locale const & loc)182 BOOST_LOG_API bool code_convert_impl(const char32_t* str1, std::size_t len, std::string& str2, std::size_t max_size, std::locale const& loc)
183 {
184     return code_convert(str1, str1 + len, str2, max_size, std::use_facet< std::codecvt< char32_t, char, std::mbstate_t > >(loc)) == len;
185 }
186 
187 //! The function converts one string to the character type of another
code_convert_impl(const char * str1,std::size_t len,std::u32string & str2,std::size_t max_size,std::locale const & loc)188 BOOST_LOG_API bool code_convert_impl(const char* str1, std::size_t len, std::u32string& str2, std::size_t max_size, std::locale const& loc)
189 {
190     return code_convert(str1, str1 + len, str2, max_size, std::use_facet< std::codecvt< char32_t, char, std::mbstate_t > >(loc)) == len;
191 }
192 
193 //! The function converts one string to the character type of another
code_convert_impl(const char32_t * str1,std::size_t len,std::wstring & str2,std::size_t max_size,std::locale const & loc)194 BOOST_LOG_API bool code_convert_impl(const char32_t* str1, std::size_t len, std::wstring& str2, std::size_t max_size, std::locale const& loc)
195 {
196     std::string temp_str;
197     code_convert(str1, str1 + len, temp_str, temp_str.max_size(), std::use_facet< std::codecvt< char32_t, char, std::mbstate_t > >(loc));
198     const std::size_t temp_size = temp_str.size();
199     return code_convert(temp_str.c_str(), temp_str.c_str() + temp_size, str2, max_size, std::use_facet< std::codecvt< wchar_t, char, std::mbstate_t > >(loc)) == temp_size;
200 }
201 
202 #endif
203 
204 #if !defined(BOOST_NO_CXX11_CHAR16_T) && !defined(BOOST_NO_CXX11_CHAR32_T)
205 
206 //! The function converts one string to the character type of another
code_convert_impl(const char16_t * str1,std::size_t len,std::u32string & str2,std::size_t max_size,std::locale const & loc)207 BOOST_LOG_API bool code_convert_impl(const char16_t* str1, std::size_t len, std::u32string& str2, std::size_t max_size, std::locale const& loc)
208 {
209     std::string temp_str;
210     code_convert(str1, str1 + len, temp_str, temp_str.max_size(), std::use_facet< std::codecvt< char16_t, char, std::mbstate_t > >(loc));
211     const std::size_t temp_size = temp_str.size();
212     return code_convert(temp_str.c_str(), temp_str.c_str() + temp_size, str2, max_size, std::use_facet< std::codecvt< char32_t, char, std::mbstate_t > >(loc)) == temp_size;
213 }
214 
215 //! The function converts one string to the character type of another
code_convert_impl(const char32_t * str1,std::size_t len,std::u16string & str2,std::size_t max_size,std::locale const & loc)216 BOOST_LOG_API bool code_convert_impl(const char32_t* str1, std::size_t len, std::u16string& str2, std::size_t max_size, std::locale const& loc)
217 {
218     std::string temp_str;
219     code_convert(str1, str1 + len, temp_str, temp_str.max_size(), std::use_facet< std::codecvt< char32_t, char, std::mbstate_t > >(loc));
220     const std::size_t temp_size = temp_str.size();
221     return code_convert(temp_str.c_str(), temp_str.c_str() + temp_size, str2, max_size, std::use_facet< std::codecvt< char16_t, char, std::mbstate_t > >(loc)) == temp_size;
222 }
223 
224 #endif
225 
226 #endif // !defined(BOOST_LOG_NO_CXX11_CODECVT_FACETS)
227 
228 #if defined(BOOST_WINDOWS)
229 
230 //! Converts UTF-8 to UTF-16
utf8_to_utf16(const char * str)231 std::wstring utf8_to_utf16(const char* str)
232 {
233     std::size_t utf8_len = std::strlen(str);
234     if (utf8_len == 0)
235         return std::wstring();
236     else if (BOOST_UNLIKELY(utf8_len > static_cast< std::size_t >((std::numeric_limits< int >::max)())))
237         BOOST_LOG_THROW_DESCR(bad_alloc, "UTF-8 string too long");
238 
239     int len = boost::winapi::MultiByteToWideChar(boost::winapi::CP_UTF8_, boost::winapi::MB_ERR_INVALID_CHARS_, str, static_cast< int >(utf8_len), NULL, 0);
240     if (BOOST_LIKELY(len > 0))
241     {
242         std::wstring wstr;
243         wstr.resize(len);
244 
245         len = boost::winapi::MultiByteToWideChar(boost::winapi::CP_UTF8_, boost::winapi::MB_ERR_INVALID_CHARS_, str, static_cast< int >(utf8_len), &wstr[0], len);
246         if (BOOST_LIKELY(len > 0))
247         {
248             return wstr;
249         }
250     }
251 
252     const boost::winapi::DWORD_ err = boost::winapi::GetLastError();
253     BOOST_LOG_THROW_DESCR_PARAMS(system_error, "Failed to convert UTF-8 to UTF-16", (err));
254     BOOST_LOG_UNREACHABLE_RETURN(std::wstring());
255 }
256 
257 //! Converts UTF-16 to UTF-8
utf16_to_utf8(const wchar_t * wstr)258 std::string utf16_to_utf8(const wchar_t* wstr)
259 {
260     std::size_t utf16_len = std::wcslen(wstr);
261     if (utf16_len == 0)
262         return std::string();
263     else if (BOOST_UNLIKELY(utf16_len > static_cast< std::size_t >((std::numeric_limits< int >::max)())))
264         BOOST_LOG_THROW_DESCR(bad_alloc, "UTF-16 string too long");
265 
266     const boost::winapi::DWORD_ flags =
267 #if BOOST_USE_WINAPI_VERSION >= BOOST_WINAPI_VERSION_WIN6
268         boost::winapi::WC_ERR_INVALID_CHARS_;
269 #else
270         0u;
271 #endif
272     int len = boost::winapi::WideCharToMultiByte(boost::winapi::CP_UTF8_, flags, wstr, static_cast< int >(utf16_len), NULL, 0, NULL, NULL);
273     if (BOOST_LIKELY(len > 0))
274     {
275         std::string str;
276         str.resize(len);
277 
278         len = boost::winapi::WideCharToMultiByte(boost::winapi::CP_UTF8_, flags, wstr, static_cast< int >(utf16_len), &str[0], len, NULL, NULL);
279         if (BOOST_LIKELY(len > 0))
280         {
281             return str;
282         }
283     }
284 
285     const boost::winapi::DWORD_ err = boost::winapi::GetLastError();
286     BOOST_LOG_THROW_DESCR_PARAMS(system_error, "Failed to convert UTF-16 to UTF-8", (err));
287     BOOST_LOG_UNREACHABLE_RETURN(std::string());
288 }
289 
290 #endif // defined(BOOST_WINDOWS)
291 
292 } // namespace aux
293 
294 BOOST_LOG_CLOSE_NAMESPACE // namespace log
295 
296 } // namespace boost
297 
298 #include <boost/log/detail/footer.hpp>
299