1 /**
2 * @file
3 * @brief String manipulation functions that don't fit elsewhere.
4 **/
5
6 #pragma once
7
8 #include <vector>
9
10 #include "config.h"
11 #include "libutil.h" // always_true
12
13 #ifdef CRAWL_HAVE_STRLCPY
14 #include <cstring>
15 #else
16 size_t strlcpy(char *dst, const char *src, size_t n);
17 #endif
18
19 using std::vector;
20
21 string lowercase_string(const string &s);
22 string &lowercase(string &s);
23 string &uppercase(string &s);
24 string uppercase_string(string s);
25 string lowercase_first(string);
26 string uppercase_first(string);
27
28 /**
29 * Returns 1 + the index of the first suffix that matches the given string,
30 * 0 if no suffixes match.
31 */
32 int ends_with(const string &s, const char * const suffixes[]);
33
34 string wordwrap_line(string &s, int cols, bool tags = false,
35 bool indent = false);
36
37 string strip_filename_unsafe_chars(const string &s);
38
39 string vmake_stringf(const char *format, va_list args);
40 string make_stringf(PRINTF(0, ));
41
42 bool strip_suffix(string &s, const string &suffix);
43
44 string replace_all(string s, const string &tofind, const string &replacement);
45
46 string replace_all_of(string s, const string &tofind, const string &replacement);
47
48 string replace_keys(const string &text, const map<string, string>& replacements);
49
50 string maybe_capitalise_substring(string s);
51 string maybe_pick_random_substring(string s);
52
53 int count_occurrences(const string &text, const string &searchfor);
54
55 string &trim_string(string &str);
56 string &trim_string_right(string &str);
57 string trimmed_string(string s);
58
59 /**
60 * Find the enumerator e between begin and end that satisfies pred(e) and
61 * whose name, as given by namefunc(e), has the earliest occurrence of the
62 * substring spec.
63 *
64 * @tparam Enum An integer-like or C-style enum type no larger than
65 * size_t. More specifically, Enum must be implicitly
66 * convertible to size_t, and must be explicitly convertible
67 * from size_t with static_cast. There should be no gaps
68 * in enumerator values between begin and end.
69 *
70 * @param spec The substring to search for.
71 * @param begin The beginning of the enumerator range to search in.
72 * @param end One past the end of the enum range to search in.
73 * @param pred A function from Enum to bool. Enumerators that do not
74 * satisfy the predicate are ignored.
75 * @param namefunc A function from Enum to string or const char * giving
76 * the name of the enumerator.
77 * @return The enumerator that satisfies pred and whose name contains the
78 * spec substring beginning at the earliest position. If no such
79 * enumerator exists, returns end. If there are multiple strings
80 * containing the spec as a prefix, returns the shortest such string
81 * (so exact matches are preferred); otherwise ties are broken in
82 * an unspecified manner.
83 */
84 template<class Enum, class Pred, class NameFunc>
find_earliest_match(const string & spec,Enum begin,Enum end,Pred pred,NameFunc namefunc)85 Enum find_earliest_match(const string &spec, Enum begin, Enum end,
86 Pred pred, NameFunc namefunc)
87 {
88 Enum selected = end;
89 const size_t speclen = spec.length();
90 size_t bestpos = string::npos;
91 size_t bestlen = string::npos;
92 for (size_t i = begin; i < (size_t) end; ++i)
93 {
94 const Enum curr = static_cast<Enum>(i);
95
96 if (!pred(curr))
97 continue;
98
99 const string name = lowercase_string(namefunc(curr));
100 const size_t pos = name.find(spec);
101 const size_t len = name.length();
102
103 if (pos < bestpos || pos == 0 && len < bestlen)
104 {
105 // Exit early if we found an exact match.
106 if (pos == 0 && len == speclen)
107 return curr;
108
109 // npos is never less than bestpos, so the spec was found.
110 bestpos = pos;
111 if (pos == 0)
112 bestlen = len;
113 selected = curr;
114 }
115 }
116 return selected;
117 }
118
119 /**
120 * Join together strings computed by a function applied to some elements
121 * of a range.
122 *
123 * @tparam Z An iterator or pointer type.
124 * @tparam F A callable type that takes whatever Z points to, and
125 * returns a string or null-terminated char *.
126 * @tparam G A callable type that takes whatever Z points to, and
127 * returns some type that is explicitly convertable to bool
128 *
129 * @param start An iterator to the beginning of the range of elements to
130 * consider.
131 * @param end An iterator to one spot past the end of the range of
132 * elements to consider.
133 * @param stringify A function or function-like object that takes an
134 * element from the range and returns a string or C string. Will be
135 * called once per selected element.
136 * @param andc The separator to use before the last selected element.
137 * @param comma The separator to use between elements other than the last.
138 * @param filter A function or function-like object to select elements.
139 * Should accept as a single argument an element from the range, and
140 * return true if the element should be included in the result string.
141 * Will be called between N and 2N times, where N is the total number
142 * of elements in the range.
143 *
144 * @return A string containing the stringifications of all the elements
145 * for which filter returns true, with andc separating the last two
146 * elements and comma separating the other elements. If the range is
147 * empty, returns the empty string.
148 */
149 template <typename Z, typename F, typename G>
comma_separated_fn(Z start,Z end,F stringify,const string & andc,const string & comma,G filter)150 string comma_separated_fn(Z start, Z end, F stringify,
151 const string &andc, const string &comma,
152 G filter)
153 {
154 string text;
155 bool first = true;
156 for (Z i = start; i != end; ++i)
157 {
158 if (!filter(*i))
159 continue;
160
161 if (first)
162 first = false;
163 else
164 {
165 Z tmp = i;
166 // Advance until we find an item selected by the filter.
167 //
168 // This loop iterates (and calls filter) a linear number of times
169 // over the entire call to comma_separated_fn. Some cases:
170 //
171 // filter is always true: do loop iterates once, is reached N-1
172 // times: N-1 iterations total.
173 //
174 // filter is true half the time: do loop iterates twice on average,
175 // is reached N/2 - 1 times: N-2 iterations total.
176 //
177 // filter is true for sqrt(N) elements: do loop iterates sqrt(N)
178 // times on average, is reached sqrt(N) - 1 times: N - sqrt(N)
179 // iterations total.
180 //
181 // filter is always false: do loop is never reached: 0 iterations.
182 do
183 {
184 // TODO: really, we could update i here (one fewer time than
185 // tmp): if the filter returns false, we might as well have
186 // the outer for loop skip that element, so it doesn't have
187 // to call the filter again before deciding to "continue;".
188 ++tmp;
189 }
190 while (tmp != end && !filter(*tmp));
191
192 if (tmp != end)
193 text += comma;
194 else
195 text += andc;
196 }
197
198 text += stringify(*i);
199 }
200 return text;
201 }
202
203 template <typename Z, typename F>
204 string comma_separated_fn(Z start, Z end, F stringify,
205 const string &andc = " and ",
206 const string &comma = ", ")
207 {
208 return comma_separated_fn(start, end, stringify, andc, comma,
209 always_true<decltype(*start)>);
210 }
211
212 template <typename Z>
213 string comma_separated_line(Z start, Z end, const string &andc = " and ",
214 const string &comma = ", ")
215 {
216 return comma_separated_fn(start, end, [] (const string &s) { return s; },
217 andc, comma);
218 }
219
220 /**
221 * For when the above functions are a bit over-elaborate...
222 */
223 template <typename Z>
224 string join_strings(Z start, Z end, const string &sep = " ")
225 {
226 return comma_separated_line(start, end, sep, sep);
227 }
228
starts_with(const string & s,const string & prefix)229 static inline bool starts_with(const string &s, const string &prefix)
230 {
231 return s.compare(0, prefix.size(), prefix) == 0;
232 }
233
ends_with(const string & s,const string & suffix)234 static inline bool ends_with(const string &s, const string &suffix)
235 {
236 if (s.length() < suffix.length())
237 return false;
238 return s.find(suffix, s.length() - suffix.length()) != string::npos;
239 }
240
241 // Splits string 's' on the separator 'sep'. If trim == true, trims each
242 // segment. If accept_empties == true, accepts empty segments. If nsplits >= 0,
243 // splits on the first nsplits occurrences of the separator, and stores the
244 // remainder of the string as the last segment; negative values of nsplits
245 // split on all occurrences of the separator.
246 vector<string> split_string(const string &sep, string s, bool trim = true,
247 bool accept_empties = false, int nsplits = -1);
248
249 // time
250
251 string make_time_string(time_t abs_time, bool terse = false);
252 string make_file_time(time_t when);
253
254 // Work around older Cygwin's missing std::to_string, resulting from a lack
255 // of long double support. Newer versions do provide long double and
256 // std::to_string.
257 //
258 // See https://cygwin.com/ml/cygwin/2015-01/msg00245.html for more info.
259 #ifdef _GLIBCXX_HAVE_BROKEN_VSWPRINTF
260 // Inject into std:: because we sometimes use std::to_string to
261 // disambiguate.
262 namespace std
263 {
to_string(int value)264 static inline string to_string(int value)
265 {
266 return make_stringf("%d", value);
267 }
to_string(long value)268 static inline string to_string(long value)
269 {
270 return make_stringf("%ld", value);
271 }
to_string(long long value)272 static inline string to_string(long long value)
273 {
274 return make_stringf("%lld", value);
275 }
to_string(unsigned value)276 static inline string to_string(unsigned value)
277 {
278 return make_stringf("%u", value);
279 }
to_string(unsigned long value)280 static inline string to_string(unsigned long value)
281 {
282 return make_stringf("%lu", value);
283 }
to_string(unsigned long long value)284 static inline string to_string(unsigned long long value)
285 {
286 return make_stringf("%llu", value);
287 }
to_string(float value)288 static inline string to_string(float value)
289 {
290 return make_stringf("%f", value);
291 }
to_string(double value)292 static inline string to_string(double value)
293 {
294 return make_stringf("%f", value);
295 }
296 }
297 #endif
298