1 /* This file is part of the Spring engine (GPL v2 or later), see LICENSE.html */
2
3 #include "System/Util.h"
4 #if defined(_MSC_VER) && (_MSC_VER >= 1310)
5 #include <intrin.h>
6 #endif
7 #include <cstring>
8
9
StringReplace(const std::string & text,const std::string & from,const std::string & to)10 std::string StringReplace(const std::string& text,
11 const std::string& from,
12 const std::string& to)
13 {
14 std::string working = text;
15 std::string::size_type pos = 0;
16 while (true) {
17 pos = working.find(from, pos);
18 if (pos == std::string::npos) {
19 break;
20 }
21 std::string tmp = working.substr(0, pos);
22 tmp += to;
23 tmp += working.substr(pos + from.size(), std::string::npos);
24 pos += to.size();
25 working = tmp;
26 }
27 return working;
28 }
29
StringStrip(const std::string & str,const std::string & chars)30 std::string StringStrip(const std::string& str, const std::string& chars)
31 {
32 std::string ret;
33 ret.reserve(str.size());
34
35 for (size_t n = 0; n < str.size(); n++) {
36 if (chars.find(str[n]) != std::string::npos)
37 continue;
38
39 ret.push_back(str[n]);
40 }
41
42 return ret;
43 }
44
45
46
47 /// @see http://www.codeproject.com/KB/stl/stdstringtrim.aspx
StringTrimInPlace(std::string & str,const std::string & ws)48 void StringTrimInPlace(std::string& str, const std::string& ws)
49 {
50 std::string::size_type pos = str.find_last_not_of(ws);
51 if (pos != std::string::npos) {
52 str.erase(pos + 1);
53 pos = str.find_first_not_of(ws);
54 if (pos != std::string::npos) {
55 str.erase(0, pos);
56 }
57 } else {
58 str.erase(str.begin(), str.end());
59 }
60 }
61
StringTrim(const std::string & str,const std::string & ws)62 std::string StringTrim(const std::string& str, const std::string& ws)
63 {
64 std::string copy(str);
65 StringTrimInPlace(copy, ws);
66 return copy;
67 }
68
StringToBool(std::string str)69 bool StringToBool(std::string str)
70 {
71 bool value = true;
72
73 StringTrimInPlace(str);
74 StringToLowerInPlace(str);
75
76 // regex would probably be more appropriate,
77 // but it is better not to rely on any external lib here
78 if (
79 (str.empty()) ||
80 (str == "0") ||
81 (str == "n") ||
82 (str == "no") ||
83 (str == "f") ||
84 (str == "false") ||
85 (str == "off")
86 ) {
87 value = false;
88 }
89
90 return value;
91 }
92
StringStartsWith(const std::string & str,const char * prefix)93 bool StringStartsWith(const std::string& str, const char* prefix)
94 {
95 if ((prefix == NULL) || (str.size() < strlen(prefix))) {
96 return false;
97 } else {
98 return (str.compare(0, strlen(prefix), prefix) == 0);
99 }
100 }
101
StringEndsWith(const std::string & str,const char * postfix)102 bool StringEndsWith(const std::string& str, const char* postfix)
103 {
104 if ((postfix == NULL) || (str.size() < strlen(postfix))) {
105 return false;
106 } else {
107 return (str.compare(str.size() - strlen(postfix), str.size(), postfix) == 0);
108 }
109 }
110
111
InverseOrSetBool(bool & container,const std::string & argValue,const bool inverseArg)112 void InverseOrSetBool(bool& container, const std::string& argValue, const bool inverseArg)
113 {
114 if (argValue.empty()) {
115 // toggle
116 container = !container;
117 } else {
118 // set
119 const bool value = StringToBool(argValue);
120 container = inverseArg ? (!value) : (value);
121 }
122 }
123
124
125
count_leading_ones(uint8_t x)126 static inline unsigned count_leading_ones(uint8_t x)
127 {
128 uint32_t i = ~x;
129 return __builtin_clz((i<<24) | 0x00FFFFFF);
130 }
131
132
Utf8GetNextChar(const std::string & text,int & pos)133 char32_t Utf8GetNextChar(const std::string& text, int& pos)
134 {
135 // UTF8 looks like this
136 // 1Byte == ASCII: 0xxxxxxxxx
137 // 2Bytes encoded char: 110xxxxxxx 10xxxxxx
138 // 3Bytes encoded char: 1110xxxxxx 10xxxxxx 10xxxxxx
139 // 4Bytes encoded char: 11110xxxxx 10xxxxxx 10xxxxxx 10xxxxxx
140 // Originaly there were 5&6 byte versions too, but they were dropped in RFC 3629.
141 // So UTF8 maps to UTF16 range only.
142
143 static const auto UTF8_CONT_MASK = 0xC0; // 11xxxxxx
144 static const auto UTF8_CONT_OKAY = 0x80; // 10xxxxxx
145
146 union UTF8_4Byte {
147 uint32_t i;
148 uint8_t c[4];
149 };
150
151 // read next 4bytes and check if it is an utf8 sequence
152 UTF8_4Byte utf8 = { 0 };
153 const int remainingChars = text.length() - pos;
154 if (remainingChars >= 4) {
155 utf8.i = *(uint32_t*)(&text[pos]);
156 } else {
157 // read ahead of end of string
158 if (remainingChars <= 0)
159 return 0;
160
161 // end of string reached, only read till end
162 switch (remainingChars) {
163 case 3: utf8.c[2] = uint8_t(text[pos + 2]);
164 case 2: utf8.c[1] = uint8_t(text[pos + 1]);
165 case 1: utf8.c[0] = uint8_t(text[pos ]);
166 };
167 }
168
169 // how many bytes are requested for our multi-byte utf8 sequence
170 unsigned clo = count_leading_ones(utf8.c[0]);
171 if (clo>4 || clo==0) clo = 1; // ignore >=5 byte ones cause of RFC 3629
172
173 // how many healthy utf8 bytes are following
174 unsigned numValidUtf8Bytes = 1; // first char is always valid
175 numValidUtf8Bytes += int((utf8.c[1] & UTF8_CONT_MASK) == UTF8_CONT_OKAY);
176 numValidUtf8Bytes += int((utf8.c[2] & UTF8_CONT_MASK) == UTF8_CONT_OKAY);
177 numValidUtf8Bytes += int((utf8.c[3] & UTF8_CONT_MASK) == UTF8_CONT_OKAY);
178
179 // check if enough trailing utf8 bytes are healthy
180 // else ignore utf8 and parse it as 8bit Latin-1 char (extended ASCII)
181 // this adds backwardcompatibility with the old renderer
182 // which supported extended ASCII with umlauts etc.
183 const auto usedUtf8Bytes = (clo <= numValidUtf8Bytes) ? clo : 1u;
184
185 char32_t u = 0;
186 switch (usedUtf8Bytes) {
187 case 0:
188 case 1: {
189 u = utf8.c[0];
190 } break;
191 case 2: {
192 u = (char32_t(utf8.c[0] & 0x1F)) << 6;
193 u |= (char32_t(utf8.c[1] & 0x3F));
194 } break;
195 case 3: {
196 u = (char32_t(utf8.c[0] & 0x0F)) << 12;
197 u |= (char32_t(utf8.c[1] & 0x3F)) << 6;
198 u |= (char32_t(utf8.c[2] & 0x3F));
199 } break;
200 case 4: {
201 u = (char32_t(utf8.c[0] & 0x07)) << 18;
202 u |= (char32_t(utf8.c[1] & 0x3F)) << 12;
203 u |= (char32_t(utf8.c[2] & 0x3F)) << 6;
204 u |= (char32_t(utf8.c[3] & 0x3F));
205 //TODO limit range to UTF16!
206 } break;
207 }
208 pos += usedUtf8Bytes;
209
210 // replace tabs with spaces
211 if (u == 0x9)
212 u = 0x2007;
213
214 return u;
215 }
216
217
UnicodeToUtf8(char32_t ch)218 std::string UnicodeToUtf8(char32_t ch)
219 {
220 std::string str;
221
222 // in: 0000 0000 0000 0000 0000 0000 0aaa aaaa
223 // out: 0aaa aaaa
224 if(ch<(1<<7))
225 {
226 str += (char)ch;
227 }
228 // in: 0000 0000 0000 0000 0000 0bbb bbaa aaaa
229 // out: 110b bbbb 10aa aaaa
230 else if(ch<(1<<11))
231 {
232 str += 0xC0 | (char)(ch>>6);
233 str += 0x80 | (char)(ch&0x3F);
234 }
235 // in: 0000 0000 0000 0000 cccc bbbb bbaa aaaa
236 // out: 1110 cccc 10bb bbbb 10aa aaaa
237 else if(ch<(1<<16))
238 {
239 str += 0xE0 | (char)(ch>>12);
240 str += 0x80 | (char)((ch>>6)&0x3F);
241 str += 0x80 | (char)(ch&0x3F);
242 }
243 // in: 0000 0000 000d ddcc cccc bbbb bbaa aaaa
244 // out: 1111 0ddd 10cc cccc 10bb bbbb 10aa aaaa
245 else if(ch<(1<<21))
246 {
247 str += 0xF0 | (char)(ch>>18);
248 str += 0x80 | (char)((ch>>12)&0x3F);
249 str += 0x80 | (char)((ch>>6)&0x3F);
250 str += 0x80 | (char)(ch&0x3F);
251 }
252
253 return str;
254 }
255