1 ///////////////////////////////////////////////////////////////////////////////
2 //            Copyright (C) 2004-2011 by The Allacrost Project
3 //            Copyright (C) 2012-2016 by Bertram (Valyria Tear)
4 //                         All Rights Reserved
5 //
6 // This code is licensed under the GNU GPL version 2. It is free software
7 // and you may modify it and/or redistribute it under the terms of this license.
8 // See https://www.gnu.org/copyleft/gpl.html for details.
9 ///////////////////////////////////////////////////////////////////////////////
10 
11 /** ****************************************************************************
12 *** \file    ustring.cpp
13 *** \author  Tyler Olsen, roots@allacrost.org
14 *** \author  Yohann Ferreira, yohann ferreira orange fr
15 *** \brief   Source file for the UTF16 string code.
16 *** ***************************************************************************/
17 
18 #include "ustring.h"
19 
20 #include <iconv.h>
21 
22 #include <stdexcept>
23 #include <limits>
24 
25 // For correct endianess support
26 #include <SDL2/SDL_endian.h>
27 
28 namespace vt_utils
29 {
30 
31 ////////////////////////////////////////////////////////////////////////////////
32 ///// ustring Class
33 ////////////////////////////////////////////////////////////////////////////////
34 
35 const size_t ustring::npos = ~0;
36 
ustring()37 ustring::ustring()
38 {
39     _str.push_back(0);
40 }
41 
ustring(const uint16_t * s)42 ustring::ustring(const uint16_t *s)
43 {
44     _str.clear();
45 
46     if(!s) {
47         _str.push_back(0);
48         return;
49     }
50 
51     // Avoid memory reallocations when pushing back
52     size_t i = 0;
53     while(s[i] != 0) {
54         ++i;
55     }
56     _str.reserve(i);
57 
58     while(*s != 0) {
59         _str.push_back(*s);
60         ++s;
61     }
62 
63     _str.push_back(0);
64 }
65 
66 // Return a substring starting at pos, continuing for n elements
substr(size_t pos,size_t n) const67 ustring ustring::substr(size_t pos, size_t n) const
68 {
69     size_t len = length();
70 
71     if(pos >= len)
72         throw std::out_of_range("pos passed to substr() was too large");
73 
74     ustring s;
75     if(n == std::numeric_limits<size_t>::max() || pos + n > len) {
76         n = len - pos;
77     }
78     s._str.reserve(n + 1);
79     s._str.assign(_str.begin() + pos, _str.begin() + pos + n);
80     s._str.push_back(0);
81 
82     return s;
83 }
84 
85 // Concatenates string to another
operator +(const ustring & s) const86 ustring ustring::operator + (const ustring &s) const
87 {
88     ustring temp(*this);
89     return (temp += s);
90 }
91 
92 // Adds a character to end of this string
operator +=(uint16_t c)93 ustring& ustring::operator += (uint16_t c)
94 {
95     _str.insert(_str.end() - 1, c);
96     return *this;
97 }
98 
99 // Concatenate another string on to the end of this string
operator +=(const ustring & s)100 ustring &ustring::operator += (const ustring &s)
101 {
102     // nothing to do for empty string
103     if(s.empty())
104         return *this;
105 
106     _str.insert(_str.end() - 1, s._str.begin(), s._str.end() - 1);
107     return *this;
108 }
109 
110 // Compare two substrings
operator ==(const ustring & s) const111 bool ustring::operator == (const ustring &s) const
112 {
113     return (s._str == _str);
114 } // bool ustring::operator == (const ustring &s)
115 
116 // Finds a character within a string, starting at pos. If nothing is found, npos is returned
find(uint16_t c,size_t pos) const117 size_t ustring::find(uint16_t c, size_t pos) const
118 {
119     size_t len = length();
120 
121     for(size_t j = pos; j < len; ++j) {
122         if(_str[j] == c)
123             return j;
124     }
125 
126     return npos;
127 } // size_t ustring::find(uint16_t c, size_t pos) const
128 
129 // Finds a string within a string, starting at pos. If nothing is found, npos is returned
find(const ustring & s,size_t pos) const130 size_t ustring::find(const ustring &s, size_t pos) const
131 {
132     size_t len = length();
133     size_t total_chars = s.length();
134     size_t chars_found = 0;
135 
136     for(size_t j = pos; j < len; ++j) {
137         if(_str[j] == s[chars_found]) {
138             ++chars_found;
139             if(chars_found == total_chars) {
140                 return (j - chars_found + 1);
141             }
142         } else {
143             chars_found = 0;
144         }
145     }
146 
147     return npos;
148 } // size_t ustring::find(const ustring &s, size_t pos) const
149 
150 ////////////////////////////////////////////////////////////////////////////////
151 ///// ustring manipulator functions
152 ////////////////////////////////////////////////////////////////////////////////
153 
154 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
155 #define UTF_16_ICONV_NAME "UTF-16LE"
156 #else
157 #define UTF_16_ICONV_NAME "UTF-16BE"
158 #endif
159 
160 #define UTF_16_BOM_STD 0xFEFF
161 #define UTF_16_BOM_REV 0xFFFE
162 
UTF8ToUTF16(const std::string & source,uint16_t * dest)163 static bool UTF8ToUTF16(const std::string& source, uint16_t *dest)
164 {
165     if (source.empty()) {
166         return true;
167     }
168 
169     iconv_t convertor = iconv_open(UTF_16_ICONV_NAME, "UTF-8");
170     if(convertor == (iconv_t) - 1) {
171         return false;
172     }
173 
174 #if defined(_LIBICONV_VERSION) && _LIBICONV_VERSION == 0x0109
175     // We are using an iconv API that uses const char*
176     const char *sourceChar = source.c_str();
177 #else
178     // The iconv API doesn't specify a const source for legacy support reasons.
179     // Versions after 0x0109 changed back to char* for POSIX reasons.
180     char *sourceChar = const_cast<char *>(source.c_str());
181 #endif
182     char *destChar = reinterpret_cast<char *>(dest);
183     size_t sourceLen = source.length() + 1;
184     size_t destLen = (source.length() + 1) * 2;
185     size_t ret = iconv(convertor, &sourceChar, &sourceLen,
186                        &destChar, &destLen);
187     iconv_close(convertor);
188     if(ret == (size_t) - 1) {
189         perror("iconv");
190         return false;
191     }
192     return true;
193 }
194 
195 // Creates a ustring from a normal string
MakeUnicodeString(const std::string & text)196 ustring MakeUnicodeString(const std::string& text)
197 {
198     size_t length = text.length() + 1;
199     std::vector<uint16_t> ubuff(length, 0);
200     ubuff.reserve(length);
201     // Point to the buffer start after reservation to avoid invalidating it.
202     uint16_t *utf16String = &ubuff[0];
203     if(UTF8ToUTF16(text, &ubuff[0])) {
204         // Skip the "Byte Order Mark" from the UTF16 specification
205         if(utf16String[0] == UTF_16_BOM_STD ||  utf16String[0] == UTF_16_BOM_REV) {
206             utf16String = &ubuff[1];
207         }
208 
209 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
210         // For some reason, using UTF-16BE to iconv on big-endian machines
211         // still does not create correctly accented characters, so this
212         // byte swapping must be performed (only for irregular characters,
213         // hence the mask).
214 
215         for(size_t c = 0; c < length; ++c)
216             if(utf16String[c] & 0xFF80)
217                 utf16String[c] = (utf16String[c] << 8) | (utf16String[c] >> 8);
218 #endif
219     } else {
220         for(size_t c = 0; c < length; ++c) {
221             ubuff.push_back(text[c]);
222         }
223         ubuff.push_back(0);
224     }
225 
226     ustring new_ustr(utf16String);
227     return new_ustr;
228 } // ustring MakeUnicodeString(const string& text)
229 
230 
231 // Creates a normal string from a ustring
MakeStandardString(const ustring & text)232 std::string MakeStandardString(const ustring &text)
233 {
234     const size_t length = text.length();
235     std::vector<unsigned char> strbuff(length + 1,'\0');
236 
237     for(size_t c = 0; c < length; ++c) {
238         uint16_t curr_char = text[c];
239 
240         if(curr_char > 0xff)
241             strbuff[c] = '?';
242         else
243             strbuff[c] = static_cast<unsigned char>(curr_char);
244     }
245 
246     return std::string(reinterpret_cast<char *>(&strbuff[0]));
247 } // string MakeStandardString(const ustring& text)
248 
249 } // namespace utils
250