1 /**
2 * @file
3 * @brief String manipulation functions that don't fit elsewhere.
4 **/
5
6 #include "AppHdr.h"
7
8 #include "stringutil.h"
9
10 #include <cwctype>
11 #include <sstream>
12
13 #include "libutil.h"
14 #include "random.h"
15 #include "unicode.h"
16
17 #ifndef CRAWL_HAVE_STRLCPY
strlcpy(char * dst,const char * src,size_t n)18 size_t strlcpy(char *dst, const char *src, size_t n)
19 {
20 if (!n)
21 return strlen(src);
22
23 const char *s = src;
24
25 while (--n > 0)
26 if (!(*dst++ = *s++))
27 break;
28
29 if (!n)
30 {
31 *dst++ = 0;
32 while (*s++)
33 ;
34 }
35
36 return s - src - 1;
37 }
38 #endif
39
40
lowercase_string(const string & s)41 string lowercase_string(const string &s)
42 {
43 string res;
44 char32_t c;
45 char buf[4];
46 for (const char *tp = s.c_str(); int len = utf8towc(&c, tp); tp += len)
47 {
48 // crawl breaks horribly if this is allowed to affect ascii chars,
49 // so override locale-specific casing for ascii. (For example, in
50 // Turkish; tr_TR.utf8 lowercase I is a dotless i that is not
51 // ascii, which breaks many things.)
52 if (isaalpha(tp[0]))
53 res.append(1, toalower(tp[0]));
54 else
55 res.append(buf, wctoutf8(buf, towlower(c)));
56 }
57 return res;
58 }
59
lowercase(string & s)60 string &lowercase(string &s)
61 {
62 s = lowercase_string(s);
63 return s;
64 }
65
uppercase(string & s)66 string &uppercase(string &s)
67 {
68 for (char &ch : s)
69 ch = toupper_safe(ch);
70 return s;
71 }
72
uppercase_string(string s)73 string uppercase_string(string s)
74 {
75 return uppercase(s);
76 }
77
78 // Warning: this (and uppercase_first()) relies on no libc (glibc, BSD libc,
79 // MSVC crt) supporting letters that expand or contract, like German ß (-> SS)
80 // upon capitalization / lowercasing. This is mostly a fault of the API --
81 // there's no way to return two characters in one code point.
82 // Also, all characters must have the same length in bytes before and after
83 // lowercasing, all platforms currently have this property.
84 //
85 // A non-hacky version would be slower for no gain other than sane code; at
86 // least unless you use some more powerful API.
lowercase_first(string s)87 string lowercase_first(string s)
88 {
89 char32_t c;
90 if (!s.empty())
91 {
92 utf8towc(&c, &s[0]);
93 wctoutf8(&s[0], towlower(c));
94 }
95 return s;
96 }
97
uppercase_first(string s)98 string uppercase_first(string s)
99 {
100 // Incorrect due to those pesky Dutch having "ij" as a single letter (wtf?).
101 // Too bad, there's no standard function to handle that character, and I
102 // don't care enough.
103 char32_t c;
104 if (!s.empty())
105 {
106 utf8towc(&c, &s[0]);
107 wctoutf8(&s[0], towupper(c));
108 }
109 return s;
110 }
111
ends_with(const string & s,const char * const suffixes[])112 int ends_with(const string &s, const char * const suffixes[])
113 {
114 if (!suffixes)
115 return 0;
116
117 for (int i = 0; suffixes[i]; ++i)
118 if (ends_with(s, suffixes[i]))
119 return 1 + i;
120
121 return 0;
122 }
123
124
_get_indent(const string & s)125 static const string _get_indent(const string &s)
126 {
127 size_t prefix = 0;
128 if (starts_with(s, "\"") // ASCII quotes
129 || starts_with(s, "“") // English quotes
130 || starts_with(s, "„") // Polish/German/... quotes
131 || starts_with(s, "«") // French quotes
132 || starts_with(s, "»") // Danish/... quotes
133 || starts_with(s, "•")) // bulleted lists
134 {
135 prefix = 1;
136 }
137 else if (starts_with(s, "「")) // Chinese/Japanese quotes
138 prefix = 2;
139
140 size_t nspaces = s.find_first_not_of(' ', prefix);
141 if (nspaces == string::npos)
142 nspaces = 0;
143 if (!(prefix += nspaces))
144 return "";
145 return string(prefix, ' ');
146 }
147
148
149 // The provided string is consumed!
wordwrap_line(string & s,int width,bool tags,bool indent)150 string wordwrap_line(string &s, int width, bool tags, bool indent)
151 {
152 ASSERT(width > 0);
153
154 const char *cp0 = s.c_str();
155 const char *cp = cp0, *space = 0;
156 char32_t c;
157 bool seen_nonspace = false;
158
159 while (int clen = utf8towc(&c, cp))
160 {
161 int cw = wcwidth(c);
162 if (c == ' ')
163 {
164 if (seen_nonspace)
165 space = cp;
166 }
167 else if (c == '\n')
168 {
169 space = cp;
170 break;
171 }
172 else
173 seen_nonspace = true;
174
175 if (c == '<' && tags)
176 {
177 ASSERT(cw == 1);
178 if (cp[1] == '<') // "<<" escape
179 {
180 // Note: this must be after a possible wrap, otherwise we could
181 // split the escape between lines.
182 cp++;
183 }
184 else
185 {
186 cw = 0;
187 // Skip the whole tag.
188 while (*cp != '>')
189 {
190 if (!*cp)
191 {
192 // Everything so far fitted, report error.
193 string ret = s + ">";
194 s = "<lightred>ERROR: string above had unterminated tag</lightred>";
195 return ret;
196 }
197 cp++;
198 }
199 }
200 }
201
202 if (cw > width)
203 break;
204
205 if (cw >= 0)
206 width -= cw;
207 cp += clen;
208 }
209
210 if (!c)
211 {
212 // everything fits
213 string ret = s;
214 s.clear();
215 return ret;
216 }
217
218 if (space)
219 cp = space;
220 const string ret = s.substr(0, cp - cp0);
221
222 const string indentation = (indent && c != '\n' && seen_nonspace)
223 ? _get_indent(s) : "";
224
225 // eat all trailing spaces and up to one newline
226 while (*cp == ' ')
227 cp++;
228 if (*cp == '\n')
229 cp++;
230
231 #ifdef ASSERTS
232 const size_t inputlength = s.length();
233 #endif
234 s.erase(0, cp - cp0);
235
236 // if we had to break a line, reinsert the indendation
237 if (indent && c != '\n')
238 s = indentation + s;
239
240 // Make sure the remaining string actually shrank, or else we're likely
241 // to throw our caller into an infinite loop.
242 ASSERT(inputlength > s.length());
243 return ret;
244 }
245
strip_filename_unsafe_chars(const string & s)246 string strip_filename_unsafe_chars(const string &s)
247 {
248 return replace_all_of(s, " .&`\"\'|;{}()[]<>*%$#@!~?", "");
249 }
250
vmake_stringf(const char * s,va_list args)251 string vmake_stringf(const char* s, va_list args)
252 {
253 char buf1[8000];
254 va_list orig_args;
255 va_copy(orig_args, args);
256 size_t len = vsnprintf(buf1, sizeof buf1, s, orig_args);
257 va_end(orig_args);
258 if (len < sizeof buf1)
259 return buf1;
260
261 char *buf2 = (char*)malloc(len + 1);
262 va_copy(orig_args, args);
263 vsnprintf(buf2, len + 1, s, orig_args);
264 va_end(orig_args);
265 string ret(buf2);
266 free(buf2);
267
268 return ret;
269 }
270
make_stringf(const char * s,...)271 string make_stringf(const char *s, ...)
272 {
273 va_list args;
274 va_start(args, s);
275 string ret = vmake_stringf(s, args);
276 va_end(args);
277 return ret;
278 }
279
strip_suffix(string & s,const string & suffix)280 bool strip_suffix(string &s, const string &suffix)
281 {
282 if (ends_with(s, suffix))
283 {
284 s.erase(s.length() - suffix.length(), suffix.length());
285 trim_string(s);
286 return true;
287 }
288 return false;
289 }
290
replace_all(string s,const string & find,const string & repl)291 string replace_all(string s, const string &find, const string &repl)
292 {
293 ASSERT(!find.empty());
294 string::size_type start = 0;
295 string::size_type found;
296
297 while ((found = s.find(find, start)) != string::npos)
298 {
299 s.replace(found, find.length(), repl);
300 start = found + repl.length();
301 }
302
303 return s;
304 }
305
306 // Replaces all occurrences of any of the characters in tofind with the
307 // replacement string.
replace_all_of(string s,const string & tofind,const string & replacement)308 string replace_all_of(string s, const string &tofind, const string &replacement)
309 {
310 ASSERT(!tofind.empty());
311 string::size_type start = 0;
312 string::size_type found;
313
314 while ((found = s.find_first_of(tofind, start)) != string::npos)
315 {
316 s.replace(found, 1, replacement);
317 start = found + replacement.length();
318 }
319
320 return s;
321 }
322
323 // Capitalise phrases encased in @CAPS@ ... @NOCAPS@. If @NOCAPS@ is
324 // missing, change the rest of the line to uppercase.
maybe_capitalise_substring(string s)325 string maybe_capitalise_substring(string s)
326 {
327 string::size_type start = 0;
328 while ((start = s.find("@CAPS@", start)) != string::npos)
329 {
330 string::size_type cap_start = start + 6;
331 string::size_type cap_end = string::npos;
332 string::size_type end = s.find("@NOCAPS@", cap_start);
333 string::size_type length = string::npos;
334 string::size_type cap_length = string::npos;
335 if (end != string::npos)
336 {
337 cap_end = end + 8;
338 cap_length = end - cap_start;
339 length = cap_end - start;
340 }
341 string substring = s.substr(cap_start, cap_length);
342 trim_string(substring);
343 s.replace(start, length, uppercase(substring));
344 }
345 return s;
346 }
347
348 /**
349 * Make @-replacements on the given text.
350 *
351 * @param text the string to be processed
352 * @param replacements contains information on what replacements are to be made.
353 * @returns a string with substitutions based on the arguments. For example, if
354 * given "baz@foo@" and { "foo", "bar" } then this returns "bazbar".
355 * If a string not in replacements is found between @ signs, then the
356 * original, unedited string is returned.
357 */
replace_keys(const string & text,const map<string,string> & replacements)358 string replace_keys(const string &text, const map<string, string>& replacements)
359 {
360 string::size_type at = 0, last = 0;
361 ostringstream res;
362 while ((at = text.find('@', last)) != string::npos)
363 {
364 res << text.substr(last, at - last);
365 const string::size_type end = text.find('@', at + 1);
366 if (end == string::npos)
367 break;
368
369 const string key = text.substr(at + 1, end - at - 1);
370 const string* value = map_find(replacements, key);
371
372 if (!value)
373 return text;
374
375 res << *value;
376
377 last = end + 1;
378 }
379 if (!last)
380 return text;
381
382 res << text.substr(last);
383 return res.str();
384 }
385
386 // For each set of [phrase|term|word] contained in the string, replace the set with a random subphrase.
387 // NOTE: Doesn't work for nested patterns!
maybe_pick_random_substring(string s)388 string maybe_pick_random_substring(string s)
389 {
390 string::size_type start = 0;
391 while ((start = s.find("[", start)) != string::npos)
392 {
393 string::size_type end = s.find("]", start);
394 if (end == string::npos)
395 break;
396
397 string substring = s.substr(start + 1, end - start - 1);
398 vector<string> split = split_string("|", substring, false, true);
399 int index = random2(split.size());
400 s.replace(start, end + 1 - start, split[index]);
401 }
402 return s;
403 }
404
count_occurrences(const string & text,const string & s)405 int count_occurrences(const string &text, const string &s)
406 {
407 ASSERT(!s.empty());
408 int nfound = 0;
409 string::size_type pos = 0;
410
411 while ((pos = text.find(s, pos)) != string::npos)
412 {
413 ++nfound;
414 pos += s.length();
415 }
416
417 return nfound;
418 }
419
420 // also used with macros
trim_string(string & str)421 string &trim_string(string &str)
422 {
423 str.erase(0, str.find_first_not_of(" \t\n\r"));
424 str.erase(str.find_last_not_of(" \t\n\r") + 1);
425
426 return str;
427 }
428
trim_string_right(string & str)429 string &trim_string_right(string &str)
430 {
431 str.erase(str.find_last_not_of(" \t\n\r") + 1);
432 return str;
433 }
434
trimmed_string(string s)435 string trimmed_string(string s)
436 {
437 trim_string(s);
438 return s;
439 }
440
add_segment(vector<string> & segs,string s,bool trim,bool accept_empty)441 static void add_segment(vector<string> &segs, string s, bool trim,
442 bool accept_empty)
443 {
444 if (trim && !s.empty())
445 trim_string(s);
446
447 if (accept_empty || !s.empty())
448 segs.push_back(s);
449 }
450
split_string(const string & sep,string s,bool trim_segments,bool accept_empty_segments,int nsplits)451 vector<string> split_string(const string &sep, string s, bool trim_segments,
452 bool accept_empty_segments, int nsplits)
453 {
454 vector<string> segments;
455 int separator_length = sep.length();
456
457 string::size_type pos;
458 while (nsplits && (pos = s.find(sep)) != string::npos)
459 {
460 add_segment(segments, s.substr(0, pos),
461 trim_segments, accept_empty_segments);
462
463 s.erase(0, pos + separator_length);
464
465 if (nsplits > 0)
466 --nsplits;
467 }
468
469 add_segment(segments, s, trim_segments, accept_empty_segments);
470
471 return segments;
472 }
473
474
475 // Crude, but functional.
make_time_string(time_t abs_time,bool terse)476 string make_time_string(time_t abs_time, bool terse)
477 {
478 const int days = abs_time / 86400;
479 const int hours = (abs_time % 86400) / 3600;
480 const int mins = (abs_time % 3600) / 60;
481 const int secs = abs_time % 60;
482
483 string buff;
484 if (days > 0)
485 {
486 buff += make_stringf("%d %s ", days, terse ? ","
487 : days > 1 ? "days" : "day");
488 }
489 return buff + make_stringf("%02d:%02d:%02d", hours, mins, secs);
490 }
491
make_file_time(time_t when)492 string make_file_time(time_t when)
493 {
494 if (tm *loc = TIME_FN(&when))
495 {
496 return make_stringf("%04d%02d%02d-%02d%02d%02d",
497 loc->tm_year + 1900,
498 loc->tm_mon + 1,
499 loc->tm_mday,
500 loc->tm_hour,
501 loc->tm_min,
502 loc->tm_sec);
503 }
504 return "";
505 }
506