1 /* This file is part of the Spring engine (GPL v2 or later), see LICENSE.html */
2 
3 #include "System/Util.h"
4 #if defined(_MSC_VER) && (_MSC_VER >= 1310)
5 	#include <intrin.h>
6 #endif
7 #include <cstring>
8 
9 
StringReplace(const std::string & text,const std::string & from,const std::string & to)10 std::string StringReplace(const std::string& text,
11                           const std::string& from,
12                           const std::string& to)
13 {
14 	std::string working = text;
15 	std::string::size_type pos = 0;
16 	while (true) {
17 		pos = working.find(from, pos);
18 		if (pos == std::string::npos) {
19 			break;
20 		}
21 		std::string tmp = working.substr(0, pos);
22 		tmp += to;
23 		tmp += working.substr(pos + from.size(), std::string::npos);
24 		pos += to.size();
25 		working = tmp;
26 	}
27 	return working;
28 }
29 
StringStrip(const std::string & str,const std::string & chars)30 std::string StringStrip(const std::string& str, const std::string& chars)
31 {
32 	std::string ret;
33 	ret.reserve(str.size());
34 
35 	for (size_t n = 0; n < str.size(); n++) {
36 		if (chars.find(str[n]) != std::string::npos)
37 			continue;
38 
39 		ret.push_back(str[n]);
40 	}
41 
42 	return ret;
43 }
44 
45 
46 
47 /// @see http://www.codeproject.com/KB/stl/stdstringtrim.aspx
StringTrimInPlace(std::string & str,const std::string & ws)48 void StringTrimInPlace(std::string& str, const std::string& ws)
49 {
50 	std::string::size_type pos = str.find_last_not_of(ws);
51 	if (pos != std::string::npos) {
52 		str.erase(pos + 1);
53 		pos = str.find_first_not_of(ws);
54 		if (pos != std::string::npos) {
55 			str.erase(0, pos);
56 		}
57 	} else {
58 		str.erase(str.begin(), str.end());
59 	}
60 }
61 
StringTrim(const std::string & str,const std::string & ws)62 std::string StringTrim(const std::string& str, const std::string& ws)
63 {
64 	std::string copy(str);
65 	StringTrimInPlace(copy, ws);
66 	return copy;
67 }
68 
StringToBool(std::string str)69 bool StringToBool(std::string str)
70 {
71 	bool value = true;
72 
73 	StringTrimInPlace(str);
74 	StringToLowerInPlace(str);
75 
76 	// regex would probably be more appropriate,
77 	// but it is better not to rely on any external lib here
78 	if (
79 			(str.empty())    ||
80 			(str == "0")     ||
81 			(str == "n")     ||
82 			(str == "no")    ||
83 			(str == "f")     ||
84 			(str == "false") ||
85 			(str == "off")
86 		) {
87 		value = false;
88 	}
89 
90 	return value;
91 }
92 
StringStartsWith(const std::string & str,const char * prefix)93 bool StringStartsWith(const std::string& str, const char* prefix)
94 {
95 	if ((prefix == NULL) || (str.size() < strlen(prefix))) {
96 		return false;
97 	} else {
98 		return (str.compare(0, strlen(prefix), prefix) == 0);
99 	}
100 }
101 
StringEndsWith(const std::string & str,const char * postfix)102 bool StringEndsWith(const std::string& str, const char* postfix)
103 {
104 	if ((postfix == NULL) || (str.size() < strlen(postfix))) {
105 		return false;
106 	} else {
107 		return (str.compare(str.size() - strlen(postfix), str.size(), postfix) == 0);
108 	}
109 }
110 
111 
InverseOrSetBool(bool & container,const std::string & argValue,const bool inverseArg)112 void InverseOrSetBool(bool& container, const std::string& argValue, const bool inverseArg)
113 {
114 	if (argValue.empty()) {
115 		// toggle
116 		container = !container;
117 	} else {
118 		// set
119 		const bool value = StringToBool(argValue);
120 		container = inverseArg ? (!value) : (value);
121 	}
122 }
123 
124 
125 
count_leading_ones(uint8_t x)126 static inline unsigned count_leading_ones(uint8_t x)
127 {
128 	uint32_t i = ~x;
129 	return __builtin_clz((i<<24) | 0x00FFFFFF);
130 }
131 
132 
Utf8GetNextChar(const std::string & text,int & pos)133 char32_t Utf8GetNextChar(const std::string& text, int& pos)
134 {
135 	// UTF8 looks like this
136 	// 1Byte == ASCII:      0xxxxxxxxx
137 	// 2Bytes encoded char: 110xxxxxxx 10xxxxxx
138 	// 3Bytes encoded char: 1110xxxxxx 10xxxxxx 10xxxxxx
139 	// 4Bytes encoded char: 11110xxxxx 10xxxxxx 10xxxxxx 10xxxxxx
140 	// Originaly there were 5&6 byte versions too, but they were dropped in RFC 3629.
141 	// So UTF8 maps to UTF16 range only.
142 
143 	static const auto UTF8_CONT_MASK = 0xC0; // 11xxxxxx
144 	static const auto UTF8_CONT_OKAY = 0x80; // 10xxxxxx
145 
146 	union UTF8_4Byte {
147 		uint32_t i;
148 		uint8_t  c[4];
149 	};
150 
151 	// read next 4bytes and check if it is an utf8 sequence
152 	UTF8_4Byte utf8 = { 0 };
153 	const int remainingChars = text.length() - pos;
154 	if (remainingChars >= 4) {
155 		utf8.i = *(uint32_t*)(&text[pos]);
156 	} else {
157 		// read ahead of end of string
158 		if (remainingChars <= 0)
159 			return 0;
160 
161 		// end of string reached, only read till end
162 		switch (remainingChars) {
163 			case 3: utf8.c[2] = uint8_t(text[pos + 2]);
164 			case 2: utf8.c[1] = uint8_t(text[pos + 1]);
165 			case 1: utf8.c[0] = uint8_t(text[pos    ]);
166 		};
167 	}
168 
169 	// how many bytes are requested for our multi-byte utf8 sequence
170 	unsigned clo = count_leading_ones(utf8.c[0]);
171 	if (clo>4 || clo==0) clo = 1; // ignore >=5 byte ones cause of RFC 3629
172 
173 	// how many healthy utf8 bytes are following
174 	unsigned numValidUtf8Bytes = 1; // first char is always valid
175 	numValidUtf8Bytes += int((utf8.c[1] & UTF8_CONT_MASK) == UTF8_CONT_OKAY);
176 	numValidUtf8Bytes += int((utf8.c[2] & UTF8_CONT_MASK) == UTF8_CONT_OKAY);
177 	numValidUtf8Bytes += int((utf8.c[3] & UTF8_CONT_MASK) == UTF8_CONT_OKAY);
178 
179 	// check if enough trailing utf8 bytes are healthy
180 	// else ignore utf8 and parse it as 8bit Latin-1 char (extended ASCII)
181 	// this adds backwardcompatibility with the old renderer
182 	// which supported extended ASCII with umlauts etc.
183 	const auto usedUtf8Bytes = (clo <= numValidUtf8Bytes) ? clo : 1u;
184 
185 	char32_t u = 0;
186 	switch (usedUtf8Bytes) {
187 		case 0:
188 		case 1: {
189 			u  = utf8.c[0];
190 		} break;
191 		case 2: {
192 			u  = (char32_t(utf8.c[0] & 0x1F)) << 6;
193 			u |= (char32_t(utf8.c[1] & 0x3F));
194 		} break;
195 		case 3: {
196 			u  = (char32_t(utf8.c[0] & 0x0F)) << 12;
197 			u |= (char32_t(utf8.c[1] & 0x3F)) << 6;
198 			u |= (char32_t(utf8.c[2] & 0x3F));
199 		} break;
200 		case 4: {
201 			u  = (char32_t(utf8.c[0] & 0x07)) << 18;
202 			u |= (char32_t(utf8.c[1] & 0x3F)) << 12;
203 			u |= (char32_t(utf8.c[2] & 0x3F)) << 6;
204 			u |= (char32_t(utf8.c[3] & 0x3F));
205 			//TODO limit range to UTF16!
206 		} break;
207 	}
208 	pos += usedUtf8Bytes;
209 
210 	// replace tabs with spaces
211 	if (u == 0x9)
212 		u = 0x2007;
213 
214 	return u;
215 }
216 
217 
UnicodeToUtf8(char32_t ch)218 std::string UnicodeToUtf8(char32_t ch)
219 {
220 	std::string str;
221 
222 	// in:  0000 0000  0000 0000  0000 0000  0aaa aaaa
223 	// out:                                  0aaa aaaa
224 	if(ch<(1<<7))
225 	{
226 		str += (char)ch;
227 	}
228 	// in:  0000 0000  0000 0000  0000 0bbb  bbaa aaaa
229 	// out:                       110b bbbb  10aa aaaa
230 	else if(ch<(1<<11))
231 	{
232 		str += 0xC0 | (char)(ch>>6);
233 		str += 0x80 | (char)(ch&0x3F);
234 	}
235 	// in:  0000 0000  0000 0000  cccc bbbb  bbaa aaaa
236 	// out:            1110 cccc  10bb bbbb  10aa aaaa
237 	else if(ch<(1<<16))
238 	{
239 		str += 0xE0 | (char)(ch>>12);
240 		str += 0x80 | (char)((ch>>6)&0x3F);
241 		str += 0x80 | (char)(ch&0x3F);
242 	}
243 	// in:  0000 0000  000d ddcc  cccc bbbb  bbaa aaaa
244 	// out: 1111 0ddd  10cc cccc  10bb bbbb  10aa aaaa
245 	else if(ch<(1<<21))
246 	{
247 		str += 0xF0 | (char)(ch>>18);
248 		str += 0x80 | (char)((ch>>12)&0x3F);
249 		str += 0x80 | (char)((ch>>6)&0x3F);
250 		str += 0x80 | (char)(ch&0x3F);
251 	}
252 
253 	return str;
254 }
255