1 // SciTE - Scintilla based Text Editor
2 /** @file StringHelpers.cxx
3  ** Implementation of widely useful string functions.
4  **/
5 // Copyright 2010 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7 
8 #include <cstdlib>
9 #include <cstring>
10 #include <cstdio>
11 
12 #include <stdexcept>
13 #include <string>
14 #include <vector>
15 #include <algorithm>
16 #include <functional>
17 #include <chrono>
18 
19 #include "GUI.h"
20 #include "StringHelpers.h"
21 
StartsWith(std::wstring_view s,std::wstring_view start)22 bool StartsWith(std::wstring_view s, std::wstring_view start) {
23 	return (s.size() >= start.size()) &&
24 	       (std::equal(s.begin(), s.begin() + start.size(), start.begin()));
25 }
26 
StartsWith(std::string_view s,std::string_view start)27 bool StartsWith(std::string_view s, std::string_view start) {
28 	return (s.size() >= start.size()) &&
29 	       (std::equal(s.begin(), s.begin() + start.size(), start.begin()));
30 }
31 
EndsWith(std::wstring_view s,std::wstring_view end)32 bool EndsWith(std::wstring_view s, std::wstring_view end) {
33 	return (s.size() >= end.size()) &&
34 	       (std::equal(s.begin() + s.size() - end.size(), s.end(), end.begin()));
35 }
36 
Contains(std::string const & s,char ch)37 bool Contains(std::string const &s, char ch) noexcept {
38 	return s.find(ch) != std::string::npos;
39 }
40 
Substitute(std::wstring & s,const std::wstring & sFind,const std::wstring & sReplace)41 int Substitute(std::wstring &s, const std::wstring &sFind, const std::wstring &sReplace) {
42 	int c = 0;
43 	const size_t lenFind = sFind.size();
44 	const size_t lenReplace = sReplace.size();
45 	size_t posFound = s.find(sFind);
46 	while (posFound != std::wstring::npos) {
47 		s.replace(posFound, lenFind, sReplace);
48 		posFound = s.find(sFind, posFound + lenReplace);
49 		c++;
50 	}
51 	return c;
52 }
53 
Substitute(std::string & s,const std::string & sFind,const std::string & sReplace)54 int Substitute(std::string &s, const std::string &sFind, const std::string &sReplace) {
55 	int c = 0;
56 	const size_t lenFind = sFind.size();
57 	const size_t lenReplace = sReplace.size();
58 	size_t posFound = s.find(sFind);
59 	while (posFound != std::string::npos) {
60 		s.replace(posFound, lenFind, sReplace);
61 		posFound = s.find(sFind, posFound + lenReplace);
62 		c++;
63 	}
64 	return c;
65 }
66 
RemoveStringOnce(std::string & s,const char * marker)67 bool RemoveStringOnce(std::string &s, const char *marker) {
68 	const size_t modText = s.find(marker);
69 	if (modText != std::string::npos) {
70 		s.erase(modText, strlen(marker));
71 		return true;
72 	}
73 	return false;
74 }
75 
StdStringFromInteger(int i)76 std::string StdStringFromInteger(int i) {
77 	return std::to_string(i);
78 }
79 
StdStringFromSizeT(size_t i)80 std::string StdStringFromSizeT(size_t i) {
81 	return std::to_string(i);
82 }
83 
StdStringFromDouble(double d,int precision)84 std::string StdStringFromDouble(double d, int precision) {
85 	char number[32];
86 	sprintf(number, "%.*f", precision, d);
87 	return std::string(number);
88 }
89 
IntegerFromString(const std::string & val,int defaultValue)90 int IntegerFromString(const std::string &val, int defaultValue) {
91 	try {
92 		if (val.length()) {
93 			return std::stoi(val);
94 		}
95 	} catch (std::logic_error &) {
96 		// Ignore bad values, either non-numeric or out of range numeric
97 	}
98 	return defaultValue;
99 }
100 
IntPtrFromString(const std::string & val,intptr_t defaultValue)101 intptr_t IntPtrFromString(const std::string &val, intptr_t defaultValue) {
102 	try {
103 		if (val.length()) {
104 			return static_cast<intptr_t>(std::stoll(val));
105 		}
106 	} catch (std::logic_error &) {
107 		// Ignore bad values, either non-numeric or out of range numeric
108 	}
109 	return defaultValue;
110 }
111 
LongLongFromString(const std::string & val,long long defaultValue)112 long long LongLongFromString(const std::string &val, long long defaultValue) {
113 	try {
114 		if (val.length()) {
115 			return std::stoll(val);
116 		}
117 	} catch (std::logic_error &) {
118 		// Ignore bad values, either non-numeric or out of range numeric
119 	}
120 	return defaultValue;
121 }
122 
LowerCaseAZ(std::string & s)123 void LowerCaseAZ(std::string &s) {
124 	std::transform(s.begin(), s.end(), s.begin(), MakeLowerCase);
125 }
126 
IntegerFromText(const char * s)127 intptr_t IntegerFromText(const char *s) noexcept {
128 	return static_cast<intptr_t>(atoll(s));
129 }
130 
CompareNoCase(const char * a,const char * b)131 int CompareNoCase(const char *a, const char *b) noexcept {
132 	while (*a && *b) {
133 		if (*a != *b) {
134 			const char upperA = MakeUpperCase(*a);
135 			const char upperB = MakeUpperCase(*b);
136 			if (upperA != upperB)
137 				return upperA - upperB;
138 		}
139 		a++;
140 		b++;
141 	}
142 	// Either *a or *b is nul
143 	return *a - *b;
144 }
145 
EqualCaseInsensitive(const char * a,const char * b)146 bool EqualCaseInsensitive(const char *a, const char *b) noexcept {
147 	return 0 == CompareNoCase(a, b);
148 }
149 
EqualCaseInsensitive(std::string_view a,std::string_view b)150 bool EqualCaseInsensitive(std::string_view a, std::string_view b) noexcept {
151 	if (a.length() != b.length()) {
152 		return false;
153 	}
154 	for (size_t i = 0; i < a.length(); i++) {
155 		if (MakeUpperCase(a[i]) != MakeUpperCase(b[i])) {
156 			return false;
157 		}
158 	}
159 	return true;
160 }
161 
isprefix(const char * target,const char * prefix)162 bool isprefix(const char *target, const char *prefix) noexcept {
163 	while (*target && *prefix) {
164 		if (*target != *prefix)
165 			return false;
166 		target++;
167 		prefix++;
168 	}
169 	if (*prefix)
170 		return false;
171 	else
172 		return true;
173 }
174 
UTF32FromUTF8(std::string_view s)175 std::u32string UTF32FromUTF8(std::string_view s) {
176 	std::u32string ret;
177 	while (!s.empty()) {
178 		const unsigned char uc = static_cast<unsigned char>(s.front());
179 		size_t lenChar = 1;
180 		if (uc >= 0x80 + 0x40 + 0x20 + 0x10) {
181 			lenChar = 4;
182 		} else if (uc >= 0x80 + 0x40 + 0x20) {
183 			lenChar = 3;
184 		} else if (uc >= 0x80 + 0x40) {
185 			lenChar = 2;
186 		}
187 		if (lenChar > s.length()) {
188 			// Character fragment
189 			for (size_t i = 0; i < s.length(); i++) {
190 				ret.push_back(static_cast<unsigned char>(s[i]));
191 			}
192 			break;
193 		}
194 		const char32_t ch32 = UTF32Character(s.data());
195 		ret.push_back(ch32);
196 		s.remove_prefix(lenChar);
197 	}
198 	return ret;
199 }
200 
UTF32Character(const char * utf8)201 unsigned int UTF32Character(const char *utf8) noexcept {
202 	unsigned char ch = utf8[0];
203 	unsigned int u32Char;
204 	if (ch < 0x80) {
205 		u32Char = ch;
206 	} else if (ch < 0x80 + 0x40 + 0x20) {
207 		u32Char = (ch & 0x1F) << 6;
208 		ch = utf8[1];
209 		u32Char += ch & 0x7F;
210 	} else if (ch < 0x80 + 0x40 + 0x20 + 0x10) {
211 		u32Char = (ch & 0xF) << 12;
212 		ch = utf8[1];
213 		u32Char += (ch & 0x7F) << 6;
214 		ch = utf8[2];
215 		u32Char += ch & 0x7F;
216 	} else {
217 		u32Char = (ch & 0x7) << 18;
218 		ch = utf8[1];
219 		u32Char += (ch & 0x3F) << 12;
220 		ch = utf8[2];
221 		u32Char += (ch & 0x3F) << 6;
222 		ch = utf8[3];
223 		u32Char += ch & 0x3F;
224 	}
225 	return u32Char;
226 }
227 
228 /**
229  * Convert a string into C string literal form using \a, \b, \f, \n, \r, \t, \v, and \ooo.
230  */
Slash(const std::string & s,bool quoteQuotes)231 std::string Slash(const std::string &s, bool quoteQuotes) {
232 	std::string oRet;
233 	for (const char ch : s) {
234 		if (ch == '\a') {
235 			oRet.append("\\a");
236 		} else if (ch == '\b') {
237 			oRet.append("\\b");
238 		} else if (ch == '\f') {
239 			oRet.append("\\f");
240 		} else if (ch == '\n') {
241 			oRet.append("\\n");
242 		} else if (ch == '\r') {
243 			oRet.append("\\r");
244 		} else if (ch == '\t') {
245 			oRet.append("\\t");
246 		} else if (ch == '\v') {
247 			oRet.append("\\v");
248 		} else if (ch == '\\') {
249 			oRet.append("\\\\");
250 		} else if (quoteQuotes && (ch == '\'')) {
251 			oRet.append("\\\'");
252 		} else if (quoteQuotes && (ch == '\"')) {
253 			oRet.append("\\\"");
254 		} else if (IsASCII(ch) && (ch < ' ')) {
255 			oRet.push_back('\\');
256 			oRet.push_back(static_cast<char>((ch >> 6) + '0'));
257 			oRet.push_back(static_cast<char>((ch >> 3) + '0'));
258 			oRet.push_back(static_cast<char>((ch & 0x7) + '0'));
259 		} else {
260 			oRet.push_back(ch);
261 		}
262 	}
263 	return oRet;
264 }
265 
266 /**
267  * Is the character an octal digit?
268  */
IsOctalDigit(char ch)269 static bool IsOctalDigit(char ch) noexcept {
270 	return ch >= '0' && ch <= '7';
271 }
272 
273 /**
274  * If the character is an hexa digit, get its value.
275  */
GetHexaDigit(char ch)276 static int GetHexaDigit(char ch) noexcept {
277 	if (ch >= '0' && ch <= '9') {
278 		return ch - '0';
279 	}
280 	if (ch >= 'A' && ch <= 'F') {
281 		return ch - 'A' + 10;
282 	}
283 	if (ch >= 'a' && ch <= 'f') {
284 		return ch - 'a' + 10;
285 	}
286 	return -1;
287 }
288 
289 /**
290  * Convert C style \a, \b, \f, \n, \r, \t, \v, \ooo and \xhh into their indicated characters.
291  */
UnSlash(char * s)292 unsigned int UnSlash(char *s) noexcept {
293 	const char *sStart = s;
294 	char *o = s;
295 
296 	while (*s) {
297 		if (*s == '\\') {
298 			s++;
299 			if (*s == 'a') {
300 				*o = '\a';
301 			} else if (*s == 'b') {
302 				*o = '\b';
303 			} else if (*s == 'f') {
304 				*o = '\f';
305 			} else if (*s == 'n') {
306 				*o = '\n';
307 			} else if (*s == 'r') {
308 				*o = '\r';
309 			} else if (*s == 't') {
310 				*o = '\t';
311 			} else if (*s == 'v') {
312 				*o = '\v';
313 			} else if (IsOctalDigit(*s)) {
314 				int val = *s - '0';
315 				if (IsOctalDigit(*(s + 1))) {
316 					s++;
317 					val *= 8;
318 					val += *s - '0';
319 					if (IsOctalDigit(*(s + 1))) {
320 						s++;
321 						val *= 8;
322 						val += *s - '0';
323 					}
324 				}
325 				*o = static_cast<char>(val);
326 			} else if (*s == 'x') {
327 				s++;
328 				int val = 0;
329 				int ghd = GetHexaDigit(*s);
330 				if (ghd >= 0) {
331 					s++;
332 					val = ghd;
333 					ghd = GetHexaDigit(*s);
334 					if (ghd >= 0) {
335 						s++;
336 						val *= 16;
337 						val += ghd;
338 					}
339 				}
340 				*o = static_cast<char>(val);
341 			} else {
342 				*o = *s;
343 			}
344 		} else {
345 			*o = *s;
346 		}
347 		o++;
348 		if (*s) {
349 			s++;
350 		}
351 	}
352 	*o = '\0';
353 	return static_cast<unsigned int>(o - sStart);
354 }
355 
UnSlashString(const char * s)356 std::string UnSlashString(const char *s) {
357 	std::string sCopy(s, strlen(s) + 1);
358 	const unsigned int len = UnSlash(&sCopy[0]);
359 	return sCopy.substr(0, len);
360 }
361 
362 /**
363  * Convert C style \0oo into their indicated characters.
364  * This is used to get control characters into the regular expression engine.
365  */
UnSlashLowOctal(char * s)366 static unsigned int UnSlashLowOctal(char *s) noexcept {
367 	const char *sStart = s;
368 	char *o = s;
369 	while (*s) {
370 		if ((s[0] == '\\') && (s[1] == '0') && IsOctalDigit(s[2]) && IsOctalDigit(s[3])) {
371 			*o = static_cast<char>(8 * (s[2] - '0') + (s[3] - '0'));
372 			s += 3;
373 		} else {
374 			*o = *s;
375 		}
376 		o++;
377 		if (*s)
378 			s++;
379 	}
380 	*o = '\0';
381 	return static_cast<unsigned int>(o - sStart);
382 }
383 
UnSlashLowOctalString(const char * s)384 std::string UnSlashLowOctalString(const char *s) {
385 	std::string sCopy(s, strlen(s) + 1);
386 	const unsigned int len = UnSlashLowOctal(&sCopy[0]);
387 	return sCopy.substr(0, len);
388 }
389