1 /**
2  *  \file SC_Codecvt.hpp
3  *
4  *  \brief Codepage conversion utilities.
5  *
6  *  This header provides convenience functions to convert strings and paths between UTF-8 and the OS's
7  *  filesystem API encoding. On POSIX systems, this does nothing. On Windows, this converts between
8  *  UTF-8 and UTF-16. On Windows, additional functions are provided to convert between UTF-8 std::string
9  *  and UTF-16 std::wstring.
10  *
11  *  $Author: Brian Heim $
12  *
13  *  \version 1.1
14  *
15  *  $Date: 2017-05-14 $
16  *
17  *  $Contact: brianlheim@gmail.com $
18  *
19  *  Created on: 2017-05-14
20  */
21 /*
22  *  Copyright (C) Brian Heim, 2017. All rights reserved.
23  *
24  *  This program is free software; you can redistribute it and/or
25  *  modify it under the terms of the GNU General Public License as
26  *  published by the Free Software Foundation; either version 2 of the
27  *  License, or (at your option) any later version.
28  *
29  *  This program is distributed in the hope that it will be useful, but
30  *  WITHOUT ANY WARRANTY; without even the implied warranty of
31  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
32  *  General Public License for more details.
33  *
34  *  You should have received a copy of the GNU General Public License
35  *  along with this program; if not, write to the Free Software
36  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
37  *  02110-1301 USA
38  */
39 #pragma once
40 
41 #include <string> // string
42 #include <boost/filesystem/path.hpp> // path
43 
44 #ifdef _WIN32
45 #    include <codecvt> // std::codecvt_utf8_utf16, utf16
46 #    include <locale> // std::wstring_convert
47 #endif
48 
49 /** \namespace SC_Codecvt
50  *  \brief Namespace for codepage conversion functions. */
51 namespace SC_Codecvt {
52 
53 // Windows helper functions. Only defined on Windows to avoid having
54 // to unnecessarily include <codecvt> and <locale>
55 #ifdef _WIN32
56 
57 /** \brief Converts a UTF-8 char str to UTF-16 std::wstring
58  *
59  * This function is only defined on Windows, to avoid unnecessary header includes. */
utf8_cstr_to_utf16_wstring(const char * s)60 inline std::wstring utf8_cstr_to_utf16_wstring(const char* s) {
61     return std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>>().from_bytes(s);
62 }
63 
64 /** \brief Converts a UTF-16 wchar_t str to UTF-8 std::string
65  *
66  * This function is only defined on Windows, to avoid unnecessary header includes. */
utf16_wcstr_to_utf8_string(const wchar_t * s)67 inline std::string utf16_wcstr_to_utf8_string(const wchar_t* s) {
68     return std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>>().to_bytes(s);
69 }
70 
71 #endif // _WIN32
72 
73 /** \brief Converts a path to a UTF-8 encoded string.
74  *
75  * On POSIX platforms, this just converts using .string(). On Windows, uses
76  * conversion between UTF-16 and UTF-8. */
path_to_utf8_str(const boost::filesystem::path & p)77 inline std::string path_to_utf8_str(const boost::filesystem::path& p) {
78 #ifdef _WIN32
79     return p.string(std::codecvt_utf8_utf16<wchar_t>());
80 #else
81     return p.string();
82 #endif // _WIN32
83 }
84 
85 /** \brief Converts a UTF-8 encoded string to a path.
86  *
87  * On POSIX platforms, this converts using the default constructor. On Windows,
88  * uses conversion between UTF-16 and UTF-8. */
utf8_str_to_path(const std::string & s)89 inline boost::filesystem::path utf8_str_to_path(const std::string& s) {
90 #ifdef _WIN32
91     return boost::filesystem::path(s, std::codecvt_utf8_utf16<wchar_t>());
92 #else
93     return boost::filesystem::path(s);
94 #endif // _WIN32
95 }
96 
97 /** \brief Converts a native filesystem-encoded string to a UTF-8 string.
98  *
99  * On Windows, converts between UTF-16 and UTF-8. On POSIX systems, no-op. */
utf8_to_native_str(const std::string & s)100 inline std::string utf8_to_native_str(const std::string& s) {
101 #ifdef _WIN32
102     // first to wide string (native format)
103     std::wstring ws = utf8_cstr_to_utf16_wstring(s.c_str());
104 
105     // then to string (still native)
106     std::wstring_convert<std::codecvt_utf16<wchar_t>> conv_16to16;
107     std::string ret = conv_16to16.to_bytes(ws);
108     return ret;
109 #else
110     return s;
111 #endif // _WIN32
112 }
113 
114 /** \brief Converts a UTF-8 string to a native filesystem-encoded string.
115  *
116  * On Windows, converts between UTF-16 and UTF-8. On POSIX systems, no-op. */
native_to_utf8_str(const std::string & s)117 inline std::string native_to_utf8_str(const std::string& s) {
118 #ifdef _WIN32
119     // first to wide string (still native format)
120     std::wstring_convert<std::codecvt_utf16<wchar_t>> conv_16to16;
121     std::wstring ws = conv_16to16.from_bytes(s);
122 
123     // then to string (utf8)
124     std::string ret = utf16_wcstr_to_utf8_string(ws.c_str());
125     return ret;
126 #else
127     return s;
128 #endif // _WIN32
129 }
130 
131 } // SC_Codecvt
132