1 ///////////////////////////////////////////////////////////////////////////////
2 // Copyright (C) 2004-2011 by The Allacrost Project
3 // Copyright (C) 2012-2016 by Bertram (Valyria Tear)
4 // All Rights Reserved
5 //
6 // This code is licensed under the GNU GPL version 2. It is free software
7 // and you may modify it and/or redistribute it under the terms of this license.
8 // See https://www.gnu.org/copyleft/gpl.html for details.
9 ///////////////////////////////////////////////////////////////////////////////
10
11 /** ****************************************************************************
12 *** \file ustring.cpp
13 *** \author Tyler Olsen, roots@allacrost.org
14 *** \author Yohann Ferreira, yohann ferreira orange fr
15 *** \brief Source file for the UTF16 string code.
16 *** ***************************************************************************/
17
18 #include "ustring.h"
19
20 #include <iconv.h>
21
22 #include <stdexcept>
23 #include <limits>
24
25 // For correct endianess support
26 #include <SDL2/SDL_endian.h>
27
28 namespace vt_utils
29 {
30
31 ////////////////////////////////////////////////////////////////////////////////
32 ///// ustring Class
33 ////////////////////////////////////////////////////////////////////////////////
34
35 const size_t ustring::npos = ~0;
36
ustring()37 ustring::ustring()
38 {
39 _str.push_back(0);
40 }
41
ustring(const uint16_t * s)42 ustring::ustring(const uint16_t *s)
43 {
44 _str.clear();
45
46 if(!s) {
47 _str.push_back(0);
48 return;
49 }
50
51 // Avoid memory reallocations when pushing back
52 size_t i = 0;
53 while(s[i] != 0) {
54 ++i;
55 }
56 _str.reserve(i);
57
58 while(*s != 0) {
59 _str.push_back(*s);
60 ++s;
61 }
62
63 _str.push_back(0);
64 }
65
66 // Return a substring starting at pos, continuing for n elements
substr(size_t pos,size_t n) const67 ustring ustring::substr(size_t pos, size_t n) const
68 {
69 size_t len = length();
70
71 if(pos >= len)
72 throw std::out_of_range("pos passed to substr() was too large");
73
74 ustring s;
75 if(n == std::numeric_limits<size_t>::max() || pos + n > len) {
76 n = len - pos;
77 }
78 s._str.reserve(n + 1);
79 s._str.assign(_str.begin() + pos, _str.begin() + pos + n);
80 s._str.push_back(0);
81
82 return s;
83 }
84
85 // Concatenates string to another
operator +(const ustring & s) const86 ustring ustring::operator + (const ustring &s) const
87 {
88 ustring temp(*this);
89 return (temp += s);
90 }
91
92 // Adds a character to end of this string
operator +=(uint16_t c)93 ustring& ustring::operator += (uint16_t c)
94 {
95 _str.insert(_str.end() - 1, c);
96 return *this;
97 }
98
99 // Concatenate another string on to the end of this string
operator +=(const ustring & s)100 ustring &ustring::operator += (const ustring &s)
101 {
102 // nothing to do for empty string
103 if(s.empty())
104 return *this;
105
106 _str.insert(_str.end() - 1, s._str.begin(), s._str.end() - 1);
107 return *this;
108 }
109
110 // Compare two substrings
operator ==(const ustring & s) const111 bool ustring::operator == (const ustring &s) const
112 {
113 return (s._str == _str);
114 } // bool ustring::operator == (const ustring &s)
115
116 // Finds a character within a string, starting at pos. If nothing is found, npos is returned
find(uint16_t c,size_t pos) const117 size_t ustring::find(uint16_t c, size_t pos) const
118 {
119 size_t len = length();
120
121 for(size_t j = pos; j < len; ++j) {
122 if(_str[j] == c)
123 return j;
124 }
125
126 return npos;
127 } // size_t ustring::find(uint16_t c, size_t pos) const
128
129 // Finds a string within a string, starting at pos. If nothing is found, npos is returned
find(const ustring & s,size_t pos) const130 size_t ustring::find(const ustring &s, size_t pos) const
131 {
132 size_t len = length();
133 size_t total_chars = s.length();
134 size_t chars_found = 0;
135
136 for(size_t j = pos; j < len; ++j) {
137 if(_str[j] == s[chars_found]) {
138 ++chars_found;
139 if(chars_found == total_chars) {
140 return (j - chars_found + 1);
141 }
142 } else {
143 chars_found = 0;
144 }
145 }
146
147 return npos;
148 } // size_t ustring::find(const ustring &s, size_t pos) const
149
150 ////////////////////////////////////////////////////////////////////////////////
151 ///// ustring manipulator functions
152 ////////////////////////////////////////////////////////////////////////////////
153
154 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
155 #define UTF_16_ICONV_NAME "UTF-16LE"
156 #else
157 #define UTF_16_ICONV_NAME "UTF-16BE"
158 #endif
159
160 #define UTF_16_BOM_STD 0xFEFF
161 #define UTF_16_BOM_REV 0xFFFE
162
UTF8ToUTF16(const std::string & source,uint16_t * dest)163 static bool UTF8ToUTF16(const std::string& source, uint16_t *dest)
164 {
165 if (source.empty()) {
166 return true;
167 }
168
169 iconv_t convertor = iconv_open(UTF_16_ICONV_NAME, "UTF-8");
170 if(convertor == (iconv_t) - 1) {
171 return false;
172 }
173
174 #if defined(_LIBICONV_VERSION) && _LIBICONV_VERSION == 0x0109
175 // We are using an iconv API that uses const char*
176 const char *sourceChar = source.c_str();
177 #else
178 // The iconv API doesn't specify a const source for legacy support reasons.
179 // Versions after 0x0109 changed back to char* for POSIX reasons.
180 char *sourceChar = const_cast<char *>(source.c_str());
181 #endif
182 char *destChar = reinterpret_cast<char *>(dest);
183 size_t sourceLen = source.length() + 1;
184 size_t destLen = (source.length() + 1) * 2;
185 size_t ret = iconv(convertor, &sourceChar, &sourceLen,
186 &destChar, &destLen);
187 iconv_close(convertor);
188 if(ret == (size_t) - 1) {
189 perror("iconv");
190 return false;
191 }
192 return true;
193 }
194
195 // Creates a ustring from a normal string
MakeUnicodeString(const std::string & text)196 ustring MakeUnicodeString(const std::string& text)
197 {
198 size_t length = text.length() + 1;
199 std::vector<uint16_t> ubuff(length, 0);
200 ubuff.reserve(length);
201 // Point to the buffer start after reservation to avoid invalidating it.
202 uint16_t *utf16String = &ubuff[0];
203 if(UTF8ToUTF16(text, &ubuff[0])) {
204 // Skip the "Byte Order Mark" from the UTF16 specification
205 if(utf16String[0] == UTF_16_BOM_STD || utf16String[0] == UTF_16_BOM_REV) {
206 utf16String = &ubuff[1];
207 }
208
209 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
210 // For some reason, using UTF-16BE to iconv on big-endian machines
211 // still does not create correctly accented characters, so this
212 // byte swapping must be performed (only for irregular characters,
213 // hence the mask).
214
215 for(size_t c = 0; c < length; ++c)
216 if(utf16String[c] & 0xFF80)
217 utf16String[c] = (utf16String[c] << 8) | (utf16String[c] >> 8);
218 #endif
219 } else {
220 for(size_t c = 0; c < length; ++c) {
221 ubuff.push_back(text[c]);
222 }
223 ubuff.push_back(0);
224 }
225
226 ustring new_ustr(utf16String);
227 return new_ustr;
228 } // ustring MakeUnicodeString(const string& text)
229
230
231 // Creates a normal string from a ustring
MakeStandardString(const ustring & text)232 std::string MakeStandardString(const ustring &text)
233 {
234 const size_t length = text.length();
235 std::vector<unsigned char> strbuff(length + 1,'\0');
236
237 for(size_t c = 0; c < length; ++c) {
238 uint16_t curr_char = text[c];
239
240 if(curr_char > 0xff)
241 strbuff[c] = '?';
242 else
243 strbuff[c] = static_cast<unsigned char>(curr_char);
244 }
245
246 return std::string(reinterpret_cast<char *>(&strbuff[0]));
247 } // string MakeStandardString(const ustring& text)
248
249 } // namespace utils
250