1 /**
2 * \file SC_Codecvt.hpp
3 *
4 * \brief Codepage conversion utilities.
5 *
6 * This header provides convenience functions to convert strings and paths between UTF-8 and the OS's
7 * filesystem API encoding. On POSIX systems, this does nothing. On Windows, this converts between
8 * UTF-8 and UTF-16. On Windows, additional functions are provided to convert between UTF-8 std::string
9 * and UTF-16 std::wstring.
10 *
11 * $Author: Brian Heim $
12 *
13 * \version 1.1
14 *
15 * $Date: 2017-05-14 $
16 *
17 * $Contact: brianlheim@gmail.com $
18 *
19 * Created on: 2017-05-14
20 */
21 /*
22 * Copyright (C) Brian Heim, 2017. All rights reserved.
23 *
24 * This program is free software; you can redistribute it and/or
25 * modify it under the terms of the GNU General Public License as
26 * published by the Free Software Foundation; either version 2 of the
27 * License, or (at your option) any later version.
28 *
29 * This program is distributed in the hope that it will be useful, but
30 * WITHOUT ANY WARRANTY; without even the implied warranty of
31 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
32 * General Public License for more details.
33 *
34 * You should have received a copy of the GNU General Public License
35 * along with this program; if not, write to the Free Software
36 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
37 * 02110-1301 USA
38 */
39 #pragma once
40
41 #include <string> // string
42 #include <boost/filesystem/path.hpp> // path
43
44 #ifdef _WIN32
45 # include <codecvt> // std::codecvt_utf8_utf16, utf16
46 # include <locale> // std::wstring_convert
47 #endif
48
49 /** \namespace SC_Codecvt
50 * \brief Namespace for codepage conversion functions. */
51 namespace SC_Codecvt {
52
53 // Windows helper functions. Only defined on Windows to avoid having
54 // to unnecessarily include <codecvt> and <locale>
55 #ifdef _WIN32
56
57 /** \brief Converts a UTF-8 char str to UTF-16 std::wstring
58 *
59 * This function is only defined on Windows, to avoid unnecessary header includes. */
utf8_cstr_to_utf16_wstring(const char * s)60 inline std::wstring utf8_cstr_to_utf16_wstring(const char* s) {
61 return std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>>().from_bytes(s);
62 }
63
64 /** \brief Converts a UTF-16 wchar_t str to UTF-8 std::string
65 *
66 * This function is only defined on Windows, to avoid unnecessary header includes. */
utf16_wcstr_to_utf8_string(const wchar_t * s)67 inline std::string utf16_wcstr_to_utf8_string(const wchar_t* s) {
68 return std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>>().to_bytes(s);
69 }
70
71 #endif // _WIN32
72
73 /** \brief Converts a path to a UTF-8 encoded string.
74 *
75 * On POSIX platforms, this just converts using .string(). On Windows, uses
76 * conversion between UTF-16 and UTF-8. */
path_to_utf8_str(const boost::filesystem::path & p)77 inline std::string path_to_utf8_str(const boost::filesystem::path& p) {
78 #ifdef _WIN32
79 return p.string(std::codecvt_utf8_utf16<wchar_t>());
80 #else
81 return p.string();
82 #endif // _WIN32
83 }
84
85 /** \brief Converts a UTF-8 encoded string to a path.
86 *
87 * On POSIX platforms, this converts using the default constructor. On Windows,
88 * uses conversion between UTF-16 and UTF-8. */
utf8_str_to_path(const std::string & s)89 inline boost::filesystem::path utf8_str_to_path(const std::string& s) {
90 #ifdef _WIN32
91 return boost::filesystem::path(s, std::codecvt_utf8_utf16<wchar_t>());
92 #else
93 return boost::filesystem::path(s);
94 #endif // _WIN32
95 }
96
97 /** \brief Converts a native filesystem-encoded string to a UTF-8 string.
98 *
99 * On Windows, converts between UTF-16 and UTF-8. On POSIX systems, no-op. */
utf8_to_native_str(const std::string & s)100 inline std::string utf8_to_native_str(const std::string& s) {
101 #ifdef _WIN32
102 // first to wide string (native format)
103 std::wstring ws = utf8_cstr_to_utf16_wstring(s.c_str());
104
105 // then to string (still native)
106 std::wstring_convert<std::codecvt_utf16<wchar_t>> conv_16to16;
107 std::string ret = conv_16to16.to_bytes(ws);
108 return ret;
109 #else
110 return s;
111 #endif // _WIN32
112 }
113
114 /** \brief Converts a UTF-8 string to a native filesystem-encoded string.
115 *
116 * On Windows, converts between UTF-16 and UTF-8. On POSIX systems, no-op. */
native_to_utf8_str(const std::string & s)117 inline std::string native_to_utf8_str(const std::string& s) {
118 #ifdef _WIN32
119 // first to wide string (still native format)
120 std::wstring_convert<std::codecvt_utf16<wchar_t>> conv_16to16;
121 std::wstring ws = conv_16to16.from_bytes(s);
122
123 // then to string (utf8)
124 std::string ret = utf16_wcstr_to_utf8_string(ws.c_str());
125 return ret;
126 #else
127 return s;
128 #endif // _WIN32
129 }
130
131 } // SC_Codecvt
132