1 // SciTE - Scintilla based Text Editor
2 /** @file StringHelpers.cxx
3 ** Implementation of widely useful string functions.
4 **/
5 // Copyright 2010 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7
8 #include <cstdlib>
9 #include <cstring>
10 #include <cstdio>
11
12 #include <stdexcept>
13 #include <string>
14 #include <vector>
15 #include <algorithm>
16 #include <functional>
17 #include <chrono>
18
19 #include "GUI.h"
20 #include "StringHelpers.h"
21
StartsWith(std::wstring_view s,std::wstring_view start)22 bool StartsWith(std::wstring_view s, std::wstring_view start) {
23 return (s.size() >= start.size()) &&
24 (std::equal(s.begin(), s.begin() + start.size(), start.begin()));
25 }
26
StartsWith(std::string_view s,std::string_view start)27 bool StartsWith(std::string_view s, std::string_view start) {
28 return (s.size() >= start.size()) &&
29 (std::equal(s.begin(), s.begin() + start.size(), start.begin()));
30 }
31
EndsWith(std::wstring_view s,std::wstring_view end)32 bool EndsWith(std::wstring_view s, std::wstring_view end) {
33 return (s.size() >= end.size()) &&
34 (std::equal(s.begin() + s.size() - end.size(), s.end(), end.begin()));
35 }
36
Contains(std::string const & s,char ch)37 bool Contains(std::string const &s, char ch) noexcept {
38 return s.find(ch) != std::string::npos;
39 }
40
Substitute(std::wstring & s,const std::wstring & sFind,const std::wstring & sReplace)41 int Substitute(std::wstring &s, const std::wstring &sFind, const std::wstring &sReplace) {
42 int c = 0;
43 const size_t lenFind = sFind.size();
44 const size_t lenReplace = sReplace.size();
45 size_t posFound = s.find(sFind);
46 while (posFound != std::wstring::npos) {
47 s.replace(posFound, lenFind, sReplace);
48 posFound = s.find(sFind, posFound + lenReplace);
49 c++;
50 }
51 return c;
52 }
53
Substitute(std::string & s,const std::string & sFind,const std::string & sReplace)54 int Substitute(std::string &s, const std::string &sFind, const std::string &sReplace) {
55 int c = 0;
56 const size_t lenFind = sFind.size();
57 const size_t lenReplace = sReplace.size();
58 size_t posFound = s.find(sFind);
59 while (posFound != std::string::npos) {
60 s.replace(posFound, lenFind, sReplace);
61 posFound = s.find(sFind, posFound + lenReplace);
62 c++;
63 }
64 return c;
65 }
66
RemoveStringOnce(std::string & s,const char * marker)67 bool RemoveStringOnce(std::string &s, const char *marker) {
68 const size_t modText = s.find(marker);
69 if (modText != std::string::npos) {
70 s.erase(modText, strlen(marker));
71 return true;
72 }
73 return false;
74 }
75
StdStringFromInteger(int i)76 std::string StdStringFromInteger(int i) {
77 return std::to_string(i);
78 }
79
StdStringFromSizeT(size_t i)80 std::string StdStringFromSizeT(size_t i) {
81 return std::to_string(i);
82 }
83
StdStringFromDouble(double d,int precision)84 std::string StdStringFromDouble(double d, int precision) {
85 char number[32];
86 sprintf(number, "%.*f", precision, d);
87 return std::string(number);
88 }
89
IntegerFromString(const std::string & val,int defaultValue)90 int IntegerFromString(const std::string &val, int defaultValue) {
91 try {
92 if (val.length()) {
93 return std::stoi(val);
94 }
95 } catch (std::logic_error &) {
96 // Ignore bad values, either non-numeric or out of range numeric
97 }
98 return defaultValue;
99 }
100
IntPtrFromString(const std::string & val,intptr_t defaultValue)101 intptr_t IntPtrFromString(const std::string &val, intptr_t defaultValue) {
102 try {
103 if (val.length()) {
104 return static_cast<intptr_t>(std::stoll(val));
105 }
106 } catch (std::logic_error &) {
107 // Ignore bad values, either non-numeric or out of range numeric
108 }
109 return defaultValue;
110 }
111
LongLongFromString(const std::string & val,long long defaultValue)112 long long LongLongFromString(const std::string &val, long long defaultValue) {
113 try {
114 if (val.length()) {
115 return std::stoll(val);
116 }
117 } catch (std::logic_error &) {
118 // Ignore bad values, either non-numeric or out of range numeric
119 }
120 return defaultValue;
121 }
122
LowerCaseAZ(std::string & s)123 void LowerCaseAZ(std::string &s) {
124 std::transform(s.begin(), s.end(), s.begin(), MakeLowerCase);
125 }
126
IntegerFromText(const char * s)127 intptr_t IntegerFromText(const char *s) noexcept {
128 return static_cast<intptr_t>(atoll(s));
129 }
130
CompareNoCase(const char * a,const char * b)131 int CompareNoCase(const char *a, const char *b) noexcept {
132 while (*a && *b) {
133 if (*a != *b) {
134 const char upperA = MakeUpperCase(*a);
135 const char upperB = MakeUpperCase(*b);
136 if (upperA != upperB)
137 return upperA - upperB;
138 }
139 a++;
140 b++;
141 }
142 // Either *a or *b is nul
143 return *a - *b;
144 }
145
EqualCaseInsensitive(const char * a,const char * b)146 bool EqualCaseInsensitive(const char *a, const char *b) noexcept {
147 return 0 == CompareNoCase(a, b);
148 }
149
EqualCaseInsensitive(std::string_view a,std::string_view b)150 bool EqualCaseInsensitive(std::string_view a, std::string_view b) noexcept {
151 if (a.length() != b.length()) {
152 return false;
153 }
154 for (size_t i = 0; i < a.length(); i++) {
155 if (MakeUpperCase(a[i]) != MakeUpperCase(b[i])) {
156 return false;
157 }
158 }
159 return true;
160 }
161
isprefix(const char * target,const char * prefix)162 bool isprefix(const char *target, const char *prefix) noexcept {
163 while (*target && *prefix) {
164 if (*target != *prefix)
165 return false;
166 target++;
167 prefix++;
168 }
169 if (*prefix)
170 return false;
171 else
172 return true;
173 }
174
UTF32FromUTF8(std::string_view s)175 std::u32string UTF32FromUTF8(std::string_view s) {
176 std::u32string ret;
177 while (!s.empty()) {
178 const unsigned char uc = static_cast<unsigned char>(s.front());
179 size_t lenChar = 1;
180 if (uc >= 0x80 + 0x40 + 0x20 + 0x10) {
181 lenChar = 4;
182 } else if (uc >= 0x80 + 0x40 + 0x20) {
183 lenChar = 3;
184 } else if (uc >= 0x80 + 0x40) {
185 lenChar = 2;
186 }
187 if (lenChar > s.length()) {
188 // Character fragment
189 for (size_t i = 0; i < s.length(); i++) {
190 ret.push_back(static_cast<unsigned char>(s[i]));
191 }
192 break;
193 }
194 const char32_t ch32 = UTF32Character(s.data());
195 ret.push_back(ch32);
196 s.remove_prefix(lenChar);
197 }
198 return ret;
199 }
200
UTF32Character(const char * utf8)201 unsigned int UTF32Character(const char *utf8) noexcept {
202 unsigned char ch = utf8[0];
203 unsigned int u32Char;
204 if (ch < 0x80) {
205 u32Char = ch;
206 } else if (ch < 0x80 + 0x40 + 0x20) {
207 u32Char = (ch & 0x1F) << 6;
208 ch = utf8[1];
209 u32Char += ch & 0x7F;
210 } else if (ch < 0x80 + 0x40 + 0x20 + 0x10) {
211 u32Char = (ch & 0xF) << 12;
212 ch = utf8[1];
213 u32Char += (ch & 0x7F) << 6;
214 ch = utf8[2];
215 u32Char += ch & 0x7F;
216 } else {
217 u32Char = (ch & 0x7) << 18;
218 ch = utf8[1];
219 u32Char += (ch & 0x3F) << 12;
220 ch = utf8[2];
221 u32Char += (ch & 0x3F) << 6;
222 ch = utf8[3];
223 u32Char += ch & 0x3F;
224 }
225 return u32Char;
226 }
227
228 /**
229 * Convert a string into C string literal form using \a, \b, \f, \n, \r, \t, \v, and \ooo.
230 */
Slash(const std::string & s,bool quoteQuotes)231 std::string Slash(const std::string &s, bool quoteQuotes) {
232 std::string oRet;
233 for (const char ch : s) {
234 if (ch == '\a') {
235 oRet.append("\\a");
236 } else if (ch == '\b') {
237 oRet.append("\\b");
238 } else if (ch == '\f') {
239 oRet.append("\\f");
240 } else if (ch == '\n') {
241 oRet.append("\\n");
242 } else if (ch == '\r') {
243 oRet.append("\\r");
244 } else if (ch == '\t') {
245 oRet.append("\\t");
246 } else if (ch == '\v') {
247 oRet.append("\\v");
248 } else if (ch == '\\') {
249 oRet.append("\\\\");
250 } else if (quoteQuotes && (ch == '\'')) {
251 oRet.append("\\\'");
252 } else if (quoteQuotes && (ch == '\"')) {
253 oRet.append("\\\"");
254 } else if (IsASCII(ch) && (ch < ' ')) {
255 oRet.push_back('\\');
256 oRet.push_back(static_cast<char>((ch >> 6) + '0'));
257 oRet.push_back(static_cast<char>((ch >> 3) + '0'));
258 oRet.push_back(static_cast<char>((ch & 0x7) + '0'));
259 } else {
260 oRet.push_back(ch);
261 }
262 }
263 return oRet;
264 }
265
266 /**
267 * Is the character an octal digit?
268 */
IsOctalDigit(char ch)269 static bool IsOctalDigit(char ch) noexcept {
270 return ch >= '0' && ch <= '7';
271 }
272
273 /**
274 * If the character is an hexa digit, get its value.
275 */
GetHexaDigit(char ch)276 static int GetHexaDigit(char ch) noexcept {
277 if (ch >= '0' && ch <= '9') {
278 return ch - '0';
279 }
280 if (ch >= 'A' && ch <= 'F') {
281 return ch - 'A' + 10;
282 }
283 if (ch >= 'a' && ch <= 'f') {
284 return ch - 'a' + 10;
285 }
286 return -1;
287 }
288
289 /**
290 * Convert C style \a, \b, \f, \n, \r, \t, \v, \ooo and \xhh into their indicated characters.
291 */
UnSlash(char * s)292 unsigned int UnSlash(char *s) noexcept {
293 const char *sStart = s;
294 char *o = s;
295
296 while (*s) {
297 if (*s == '\\') {
298 s++;
299 if (*s == 'a') {
300 *o = '\a';
301 } else if (*s == 'b') {
302 *o = '\b';
303 } else if (*s == 'f') {
304 *o = '\f';
305 } else if (*s == 'n') {
306 *o = '\n';
307 } else if (*s == 'r') {
308 *o = '\r';
309 } else if (*s == 't') {
310 *o = '\t';
311 } else if (*s == 'v') {
312 *o = '\v';
313 } else if (IsOctalDigit(*s)) {
314 int val = *s - '0';
315 if (IsOctalDigit(*(s + 1))) {
316 s++;
317 val *= 8;
318 val += *s - '0';
319 if (IsOctalDigit(*(s + 1))) {
320 s++;
321 val *= 8;
322 val += *s - '0';
323 }
324 }
325 *o = static_cast<char>(val);
326 } else if (*s == 'x') {
327 s++;
328 int val = 0;
329 int ghd = GetHexaDigit(*s);
330 if (ghd >= 0) {
331 s++;
332 val = ghd;
333 ghd = GetHexaDigit(*s);
334 if (ghd >= 0) {
335 s++;
336 val *= 16;
337 val += ghd;
338 }
339 }
340 *o = static_cast<char>(val);
341 } else {
342 *o = *s;
343 }
344 } else {
345 *o = *s;
346 }
347 o++;
348 if (*s) {
349 s++;
350 }
351 }
352 *o = '\0';
353 return static_cast<unsigned int>(o - sStart);
354 }
355
UnSlashString(const char * s)356 std::string UnSlashString(const char *s) {
357 std::string sCopy(s, strlen(s) + 1);
358 const unsigned int len = UnSlash(&sCopy[0]);
359 return sCopy.substr(0, len);
360 }
361
362 /**
363 * Convert C style \0oo into their indicated characters.
364 * This is used to get control characters into the regular expression engine.
365 */
UnSlashLowOctal(char * s)366 static unsigned int UnSlashLowOctal(char *s) noexcept {
367 const char *sStart = s;
368 char *o = s;
369 while (*s) {
370 if ((s[0] == '\\') && (s[1] == '0') && IsOctalDigit(s[2]) && IsOctalDigit(s[3])) {
371 *o = static_cast<char>(8 * (s[2] - '0') + (s[3] - '0'));
372 s += 3;
373 } else {
374 *o = *s;
375 }
376 o++;
377 if (*s)
378 s++;
379 }
380 *o = '\0';
381 return static_cast<unsigned int>(o - sStart);
382 }
383
UnSlashLowOctalString(const char * s)384 std::string UnSlashLowOctalString(const char *s) {
385 std::string sCopy(s, strlen(s) + 1);
386 const unsigned int len = UnSlashLowOctal(&sCopy[0]);
387 return sCopy.substr(0, len);
388 }
389