1 #include "objects/containers/String.h"
2 #include "objects/wrappers/Optional.h"
3 #include "thread/CheckFunction.h"
4 #include <codecvt>
5 #include <cwchar>
6 #include <cwctype>
7 #include <iomanip>
8 #include <locale>
9
10 NAMESPACE_SPH_BEGIN
11
12 Size String::npos = NumericLimits<Size>::max();
13
String(const wchar_t * s)14 String::String(const wchar_t* s) {
15 data.pop();
16 const Size length = Size(std::wcslen(s));
17 data.reserve(length + 1);
18 for (Size i = 0; i < length; ++i) {
19 data.push(s[i]);
20 }
21 data.push(L'\0');
22 }
23
fromAscii(const char * s)24 String String::fromAscii(const char* s) {
25 Array<wchar_t> data;
26 const Size length = Size(std::strlen(s));
27 data.reserve(length + 1);
28 for (Size i = 0; i < length; ++i) {
29 data.push(s[i]);
30 }
31 data.push(L'\0');
32 return data;
33 }
34
fromUtf8(const char * s)35 String String::fromUtf8(const char* s) {
36 CHECK_FUNCTION(CheckFunction::NO_THROW);
37 std::wstring_convert<std::codecvt_utf8<char16_t>, char16_t> convert;
38 try {
39 std::u16string s16 = convert.from_bytes(s);
40 return String::fromWstring(std::wstring(s16.begin(), s16.end()));
41 } catch (const std::range_error& e) {
42 SPH_ASSERT(false, e.what());
43 return String::fromAscii(s);
44 }
45 }
46
toAscii() const47 CharString String::toAscii() const {
48 Array<char> chars;
49 chars.resize(data.size());
50 for (Size i = 0; i < data.size(); ++i) {
51 if (data[i] <= 127) {
52 chars[i] = char(data[i]);
53 } else {
54 SPH_ASSERT(false, "Is there a valid usecase of this??");
55 chars[i] = '_';
56 }
57 }
58 return CharString(chars);
59 }
60
isAscii() const61 bool String::isAscii() const {
62 for (Size i = 0; i < data.size(); ++i) {
63 if (data[i] > 127) {
64 return false;
65 }
66 }
67 return true;
68 }
69
toUtf8() const70 CharString String::toUtf8() const {
71 std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> convert;
72 std::string str = convert.to_bytes(&data[0]);
73 return ArrayView<const char>(str.data(), str.size());
74 }
75
toWstring() const76 std::wstring String::toWstring() const {
77 return std::wstring(this->toUnicode());
78 }
79
find(const String & s,const Size pos) const80 Size String::find(const String& s, const Size pos) const {
81 SPH_ASSERT(pos <= this->size());
82 if (s.size() + pos > this->size()) {
83 return npos;
84 }
85 for (Size i = pos; i <= this->size() - s.size(); ++i) {
86 if (data[i] == s[0]) {
87 bool matching = true;
88 for (Size j = 1; j < s.size(); ++j) {
89 if (data[i + j] != s[j]) {
90 matching = false;
91 break;
92 }
93 }
94 if (matching) {
95 return i;
96 }
97 }
98 }
99 return npos;
100 }
101
find(const wchar_t c,const Size pos) const102 Size String::find(const wchar_t c, const Size pos) const {
103 SPH_ASSERT(pos <= this->size());
104 for (Size i = pos; i < this->size(); ++i) {
105 if (data[i] == c) {
106 return i;
107 }
108 }
109 return npos;
110 }
111
findAny(ArrayView<const String> ss,const Size pos) const112 Size String::findAny(ArrayView<const String> ss, const Size pos) const {
113 Size n = npos;
114 for (const String& s : ss) {
115 n = min(n, this->find(s, pos));
116 }
117 return n;
118 }
119
findAny(ArrayView<const wchar_t> cs,const Size pos) const120 Size String::findAny(ArrayView<const wchar_t> cs, const Size pos) const {
121 Size n = npos;
122 for (wchar_t c : cs) {
123 n = min(n, this->find(c, pos));
124 }
125 return n;
126 }
127
findLast(const String & s) const128 Size String::findLast(const String& s) const {
129 SPH_ASSERT(!s.empty());
130 if (s.size() > this->size()) {
131 return npos;
132 }
133 for (Size i = this->size() - s.size() + 1; i > 0; --i) {
134 if (data[i - 1] == s[0]) {
135 bool matching = true;
136 for (Size j = 1; j < s.size(); ++j) {
137 if (data[i + j - 1] != s[j]) {
138 matching = false;
139 break;
140 }
141 }
142 if (matching) {
143 return i - 1;
144 }
145 }
146 }
147 return npos;
148 }
149
findLast(const wchar_t c) const150 Size String::findLast(const wchar_t c) const {
151 for (Size i = this->size(); i > 0; --i) {
152 if (data[i - 1] == c) {
153 return i - 1;
154 }
155 }
156 return npos;
157 }
158
replace(const Size pos,const Size n,const String & s)159 void String::replace(const Size pos, const Size n, const String& s) {
160 Size n1 = (n == npos) ? this->size() - pos : n;
161 SPH_ASSERT(pos + n1 <= this->size());
162 Array<wchar_t> replaced;
163 replaced.reserve(data.size() + s.size() - n1);
164 for (Size i = 0; i < pos; ++i) {
165 replaced.push(data[i]);
166 }
167 for (Size i = 0; i < s.size(); ++i) {
168 replaced.push(s[i]);
169 }
170 for (Size i = pos + n1; i < data.size(); ++i) {
171 replaced.push(data[i]);
172 }
173 *this = String(std::move(replaced));
174 }
175
replaceFirst(const String & old,const String & s)176 bool String::replaceFirst(const String& old, const String& s) {
177 const Size n = this->find(old);
178 if (n == npos) {
179 return false;
180 }
181 this->replace(n, old.size(), s);
182 return true;
183 }
184
replaceAll(const String & old,const String & s)185 Size String::replaceAll(const String& old, const String& s) {
186 Size count = 0;
187 String current = *this;
188 Size pos = 0;
189 while (true) {
190 const Size n = current.find(old, pos);
191 if (n == String::npos) {
192 *this = current;
193 return count;
194 }
195 current.replace(n, old.size(), s);
196 ++count;
197 pos = n + s.size();
198 }
199 }
200
insert(const Size pos,const String & s)201 void String::insert(const Size pos, const String& s) {
202 SPH_ASSERT(pos <= this->size());
203 data.insert(pos, s.begin(), s.end());
204 }
205
erase(const Size pos,const Size n)206 void String::erase(const Size pos, const Size n) {
207 SPH_ASSERT(pos + n <= this->size());
208 data.remove(data.begin() + pos, data.begin() + pos + n);
209 }
210
clear()211 void String::clear() {
212 data.clear();
213 data.push(L'\0');
214 }
215
substr(const Size pos,const Size n) const216 String String::substr(const Size pos, const Size n) const {
217 SPH_ASSERT(pos <= this->size());
218 Array<wchar_t> ss;
219 const Size m = min(n, this->size() - pos);
220 ss.reserve(m + 1);
221 for (Size i = pos; i < pos + m; ++i) {
222 ss.push(data[i]);
223 }
224 ss.push(L'\0');
225 return ss;
226 }
227
shouldTrim(const wchar_t c,const Flags<String::TrimFlag> flags)228 static bool shouldTrim(const wchar_t c, const Flags<String::TrimFlag> flags) {
229 return (flags.has(String::TrimFlag::SPACE) && c == L' ') ||
230 (flags.has(String::TrimFlag::END_LINE) && c == L'\n') ||
231 (flags.has(String::TrimFlag::TAB) && c == L'\t');
232 }
233
trim(const Flags<TrimFlag> flags) const234 String String::trim(const Flags<TrimFlag> flags) const {
235 Size i1 = 0;
236 for (; i1 < data.size(); ++i1) {
237 if (!shouldTrim(data[i1], flags)) {
238 break;
239 }
240 }
241 Size i2 = data.size() - 1;
242 for (; i2 > 0; --i2) {
243 if (!shouldTrim(data[i2 - 1], flags)) {
244 break;
245 }
246 }
247 Array<wchar_t> trimmed;
248 for (Size i = i1; i < i2; ++i) {
249 trimmed.push(data[i]);
250 }
251 trimmed.push(L'\0');
252 return trimmed;
253 }
254
toLowercase() const255 String String::toLowercase() const {
256 String s = *this;
257 for (wchar_t& c : s) {
258 c = std::towlower(c);
259 }
260 return s;
261 }
262
263 template <>
fromString(const String & s)264 Optional<String> fromString(const String& s) {
265 return s;
266 }
267
268 template <>
fromString(const String & s)269 Optional<bool> fromString(const String& s) {
270 try {
271 std::size_t idx;
272 /// \todo could be a StringView
273 String trimmed = s.trim(String::TrimFlag::SPACE | String::TrimFlag::END_LINE);
274 const bool value = bool(std::stoi(trimmed.toWstring(), &idx));
275 if (idx == trimmed.size()) {
276 return value;
277 } else {
278 return NOTHING;
279 }
280 } catch (const std::exception&) {
281 return NOTHING;
282 }
283 }
284
285 template <>
fromString(const String & s)286 Optional<int> fromString(const String& s) {
287 try {
288 std::size_t idx;
289 String trimmed = s.trim(String::TrimFlag::SPACE | String::TrimFlag::END_LINE);
290 const int value = std::stoi(trimmed.toWstring(), &idx);
291 if (idx == trimmed.size()) {
292 return value;
293 } else {
294 return NOTHING;
295 }
296 } catch (const std::exception&) {
297 return NOTHING;
298 }
299 }
300
301 template <>
fromString(const String & s)302 Optional<Size> fromString(const String& s) {
303 try {
304 std::size_t idx;
305 String trimmed = s.trim(String::TrimFlag::SPACE | String::TrimFlag::END_LINE);
306 const Size value = std::stoul(trimmed.toWstring(), &idx);
307 if (idx == trimmed.size()) {
308 return value;
309 } else {
310 return NOTHING;
311 }
312 } catch (const std::exception&) {
313 return NOTHING;
314 }
315 }
316
317 template <>
fromString(const String & s)318 Optional<float> fromString(const String& s) {
319 try {
320 std::size_t idx;
321 String trimmed = s.trim(String::TrimFlag::SPACE | String::TrimFlag::END_LINE);
322 const float value = std::stof(trimmed.toWstring(), &idx);
323 if (idx == trimmed.size()) {
324 return value;
325 } else {
326 return NOTHING;
327 }
328 } catch (const std::exception&) {
329 return NOTHING;
330 }
331 }
332
333 template <>
fromString(const String & s)334 Optional<double> fromString(const String& s) {
335 try {
336 std::size_t idx;
337 String trimmed = s.trim(String::TrimFlag::SPACE | String::TrimFlag::END_LINE);
338 const double value = std::stod(trimmed.toWstring(), &idx);
339 if (idx == trimmed.size()) {
340 return value;
341 } else {
342 return NOTHING;
343 }
344 } catch (const std::exception&) {
345 return NOTHING;
346 }
347 }
348
exceptionMessage(const std::exception & e)349 String exceptionMessage(const std::exception& e) {
350 CHECK_FUNCTION(CheckFunction::NO_THROW);
351 return String::fromUtf8(e.what());
352 }
353
setLineBreak(const String & s,const Size lineWidth)354 String setLineBreak(const String& s, const Size lineWidth) {
355 const static String emptyChars = " \t\r";
356 const static String canBreakChars = ".,;!?\n)]" + emptyChars;
357 String result = s;
358 Size lastLineBreak = 0;
359 Size lastSpaceNum = 0;
360 bool commaFound = false;
361
362 for (Size n = 0; n < result.size();) {
363 // find the next possible break
364 Size pos = result.findAny(canBreakChars.view(), n);
365 if (pos == String::npos) {
366 pos = result.size();
367 }
368 if (pos < result.size() && result[pos] == '\n') {
369 // there already is a line break, reset the counter and continue
370 n = pos + 1;
371 lastLineBreak = n;
372 commaFound = false;
373 lastSpaceNum = 0;
374 continue;
375 }
376 if (pos - lastLineBreak <= lineWidth) {
377 // no need to break
378 n = pos + 1;
379 continue;
380 } else {
381 // remove all empty chars before the break
382 --n;
383 while (n < result.size() && emptyChars.find(result[n]) != String::npos) {
384 result.erase(n, 1);
385 --n;
386 }
387 ++n;
388
389 if (n > 0) {
390 // insert a line break here
391 result.insert(n, "\n");
392 }
393
394 n++;
395
396 if (commaFound && lastSpaceNum > 0) {
397 result.insert(n, String::fromChar(' ', lastSpaceNum));
398 n += lastSpaceNum;
399 }
400 // indent if there is a pattern ' - %s: ' on the previous line
401 const std::size_t comma = result.find("- ", lastLineBreak);
402 if (comma < n) {
403 const std::size_t colon = result.find(": ", comma);
404 if (colon < n) {
405 Size spaceNum = colon + 2 - lastLineBreak;
406 result.insert(n, String::fromChar(' ', spaceNum));
407 n += spaceNum;
408 lastSpaceNum = spaceNum;
409 commaFound = true;
410 }
411 }
412
413 lastLineBreak = n;
414
415 // remove all following empty chars
416 while (n < result.size() && emptyChars.find(result[n]) != String::npos) {
417 result.erase(n, 1);
418 }
419
420 n = result.findAny(canBreakChars.view(), n);
421 }
422 }
423 return result;
424 }
425
split(const String & s,const wchar_t delimiter)426 Array<String> split(const String& s, const wchar_t delimiter) {
427 Array<String> parts;
428 Size n1 = Size(-1); // yes, -1, unsigned int overflow is well defined
429 Size n2;
430 while ((n2 = s.find(delimiter, n1 + 1)) != String::npos) {
431 parts.push(s.substr(n1 + 1, n2 - n1 - 1));
432 n1 = n2;
433 }
434 // add the last part
435 parts.push(s.substr(n1 + 1));
436 return parts;
437 }
438
splitByFirst(const String & s,const wchar_t delimiter)439 Pair<String> splitByFirst(const String& s, const wchar_t delimiter) {
440 const Size n = s.find(delimiter);
441 if (n == String::npos) {
442 return {};
443 } else {
444 Pair<String> parts;
445 parts[0] = s.substr(0, n);
446 parts[1] = s.substr(n + 1);
447 return parts;
448 }
449 }
450
451
452 static Array<String> capitalizationBlacklist{ "and", "or", "of", "for", "to", "et", "al" };
453
shouldCapitalize(const String & s)454 static bool shouldCapitalize(const String& s) {
455 for (const String& b : capitalizationBlacklist) {
456 if (s.size() < b.size()) {
457 continue;
458 }
459 if (s.substr(0, b.size()) == b && (s.size() == b.size() || s[b.size()] == ' ')) {
460 return false;
461 }
462 }
463 return true;
464 }
465
capitalize(const String & input)466 String capitalize(const String& input) {
467 String result = input;
468 for (Size i = 0; i < result.size(); ++i) {
469 if (i == 0 || (result[i - 1] == ' ' && shouldCapitalize(result.substr(i)))) {
470 result[i] = std::towupper(result[i]);
471 }
472 }
473 return result;
474 }
475
476 NAMESPACE_SPH_END
477