1 /**
2  * @file
3  * @brief String manipulation functions that don't fit elsewhere.
4  **/
5 
6 #pragma once
7 
8 #include <vector>
9 
10 #include "config.h"
11 #include "libutil.h" // always_true
12 
13 #ifdef CRAWL_HAVE_STRLCPY
14 #include <cstring>
15 #else
16 size_t strlcpy(char *dst, const char *src, size_t n);
17 #endif
18 
19 using std::vector;
20 
21 string lowercase_string(const string &s);
22 string &lowercase(string &s);
23 string &uppercase(string &s);
24 string uppercase_string(string s);
25 string lowercase_first(string);
26 string uppercase_first(string);
27 
28 /**
29  * Returns 1 + the index of the first suffix that matches the given string,
30  * 0 if no suffixes match.
31  */
32 int ends_with(const string &s, const char * const suffixes[]);
33 
34 string wordwrap_line(string &s, int cols, bool tags = false,
35                      bool indent = false);
36 
37 string strip_filename_unsafe_chars(const string &s);
38 
39 string vmake_stringf(const char *format, va_list args);
40 string make_stringf(PRINTF(0, ));
41 
42 bool strip_suffix(string &s, const string &suffix);
43 
44 string replace_all(string s, const string &tofind, const string &replacement);
45 
46 string replace_all_of(string s, const string &tofind, const string &replacement);
47 
48 string replace_keys(const string &text, const map<string, string>& replacements);
49 
50 string maybe_capitalise_substring(string s);
51 string maybe_pick_random_substring(string s);
52 
53 int count_occurrences(const string &text, const string &searchfor);
54 
55 string &trim_string(string &str);
56 string &trim_string_right(string &str);
57 string trimmed_string(string s);
58 
59 /**
60  * Find the enumerator e between begin and end that satisfies pred(e) and
61  * whose name, as given by namefunc(e), has the earliest occurrence of the
62  * substring spec.
63  *
64  * @tparam Enum     An integer-like or C-style enum type no larger than
65  *                  size_t. More specifically, Enum must be implicitly
66  *                  convertible to size_t, and must be explicitly convertible
67  *                  from size_t with static_cast. There should be no gaps
68  *                  in enumerator values between begin and end.
69  *
70  * @param spec      The substring to search for.
71  * @param begin     The beginning of the enumerator range to search in.
72  * @param end       One past the end of the enum range to search in.
73  * @param pred      A function from Enum to bool. Enumerators that do not
74  *                  satisfy the predicate are ignored.
75  * @param namefunc  A function from Enum to string or const char * giving
76  *                  the name of the enumerator.
77  * @return The enumerator that satisfies pred and whose name contains the
78  *         spec substring beginning at the earliest position. If no such
79  *         enumerator exists, returns end. If there are multiple strings
80  *         containing the spec as a prefix, returns the shortest such string
81  *         (so exact matches are preferred); otherwise ties are broken in
82  *         an unspecified manner.
83  */
84 template<class Enum, class Pred, class NameFunc>
find_earliest_match(const string & spec,Enum begin,Enum end,Pred pred,NameFunc namefunc)85 Enum find_earliest_match(const string &spec, Enum begin, Enum end,
86                          Pred pred, NameFunc namefunc)
87 {
88     Enum selected = end;
89     const size_t speclen = spec.length();
90     size_t bestpos = string::npos;
91     size_t bestlen = string::npos;
92     for (size_t i = begin; i < (size_t) end; ++i)
93     {
94         const Enum curr = static_cast<Enum>(i);
95 
96         if (!pred(curr))
97             continue;
98 
99         const string name = lowercase_string(namefunc(curr));
100         const size_t pos = name.find(spec);
101         const size_t len = name.length();
102 
103         if (pos < bestpos || pos == 0 && len < bestlen)
104         {
105             // Exit early if we found an exact match.
106             if (pos == 0 && len == speclen)
107                 return curr;
108 
109             // npos is never less than bestpos, so the spec was found.
110             bestpos = pos;
111             if (pos == 0)
112                 bestlen = len;
113             selected = curr;
114         }
115     }
116     return selected;
117 }
118 
119 /**
120  * Join together strings computed by a function applied to some elements
121  * of a range.
122  *
123  * @tparam Z An iterator or pointer type.
124  * @tparam F A callable type that takes whatever Z points to, and
125  *     returns a string or null-terminated char *.
126  * @tparam G A callable type that takes whatever Z points to, and
127  *     returns some type that is explicitly convertable to bool
128  *
129  * @param start An iterator to the beginning of the range of elements to
130  *     consider.
131  * @param end An iterator to one spot past the end of the range of
132  *     elements to consider.
133  * @param stringify A function or function-like object that takes an
134  *     element from the range and returns a string or C string. Will be
135  *     called once per selected element.
136  * @param andc The separator to use before the last selected element.
137  * @param comma The separator to use between elements other than the last.
138  * @param filter A function or function-like object to select elements.
139  *     Should accept as a single argument an element from the range, and
140  *     return true if the element should be included in the result string.
141  *     Will be called between N and 2N times, where N is the total number
142  *     of elements in the range.
143  *
144  * @return A string containing the stringifications of all the elements
145  *     for which filter returns true, with andc separating the last two
146  *     elements and comma separating the other elements. If the range is
147  *     empty, returns the empty string.
148  */
149 template <typename Z, typename F, typename G>
comma_separated_fn(Z start,Z end,F stringify,const string & andc,const string & comma,G filter)150 string comma_separated_fn(Z start, Z end, F stringify,
151                           const string &andc, const string &comma,
152                           G filter)
153 {
154     string text;
155     bool first = true;
156     for (Z i = start; i != end; ++i)
157     {
158         if (!filter(*i))
159             continue;
160 
161         if (first)
162             first = false;
163         else
164         {
165             Z tmp = i;
166             // Advance until we find an item selected by the filter.
167             //
168             // This loop iterates (and calls filter) a linear number of times
169             // over the entire call to comma_separated_fn. Some cases:
170             //
171             // filter is always true: do loop iterates once, is reached N-1
172             //   times: N-1 iterations total.
173             //
174             // filter is true half the time: do loop iterates twice on average,
175             //   is reached N/2 - 1 times: N-2 iterations total.
176             //
177             // filter is true for sqrt(N) elements: do loop iterates sqrt(N)
178             //   times on average, is reached sqrt(N) - 1 times: N - sqrt(N)
179             //   iterations total.
180             //
181             // filter is always false: do loop is never reached: 0 iterations.
182             do
183             {
184                 // TODO: really, we could update i here (one fewer time than
185                 // tmp): if the filter returns false, we might as well have
186                 // the outer for loop skip that element, so it doesn't have
187                 // to call the filter again before deciding to "continue;".
188                 ++tmp;
189             }
190             while (tmp != end && !filter(*tmp));
191 
192             if (tmp != end)
193                 text += comma;
194             else
195                 text += andc;
196         }
197 
198         text += stringify(*i);
199     }
200     return text;
201 }
202 
203 template <typename Z, typename F>
204 string comma_separated_fn(Z start, Z end, F stringify,
205                           const string &andc = " and ",
206                           const string &comma = ", ")
207 {
208     return comma_separated_fn(start, end, stringify, andc, comma,
209                               always_true<decltype(*start)>);
210 }
211 
212 template <typename Z>
213 string comma_separated_line(Z start, Z end, const string &andc = " and ",
214                             const string &comma = ", ")
215 {
216     return comma_separated_fn(start, end, [] (const string &s) { return s; },
217                               andc, comma);
218 }
219 
220 /**
221  * For when the above functions are a bit over-elaborate...
222  */
223 template <typename Z>
224 string join_strings(Z start, Z end, const string &sep = " ")
225 {
226     return comma_separated_line(start, end, sep, sep);
227 }
228 
starts_with(const string & s,const string & prefix)229 static inline bool starts_with(const string &s, const string &prefix)
230 {
231     return s.compare(0, prefix.size(), prefix) == 0;
232 }
233 
ends_with(const string & s,const string & suffix)234 static inline bool ends_with(const string &s, const string &suffix)
235 {
236     if (s.length() < suffix.length())
237         return false;
238     return s.find(suffix, s.length() - suffix.length()) != string::npos;
239 }
240 
241 // Splits string 's' on the separator 'sep'. If trim == true, trims each
242 // segment. If accept_empties == true, accepts empty segments. If nsplits >= 0,
243 // splits on the first nsplits occurrences of the separator, and stores the
244 // remainder of the string as the last segment; negative values of nsplits
245 // split on all occurrences of the separator.
246 vector<string> split_string(const string &sep, string s, bool trim = true,
247                             bool accept_empties = false, int nsplits = -1);
248 
249 // time
250 
251 string make_time_string(time_t abs_time, bool terse = false);
252 string make_file_time(time_t when);
253 
254 // Work around older Cygwin's missing std::to_string, resulting from a lack
255 // of long double support. Newer versions do provide long double and
256 // std::to_string.
257 //
258 // See https://cygwin.com/ml/cygwin/2015-01/msg00245.html for more info.
259 #ifdef _GLIBCXX_HAVE_BROKEN_VSWPRINTF
260 // Inject into std:: because we sometimes use std::to_string to
261 // disambiguate.
262 namespace std
263 {
to_string(int value)264     static inline string to_string(int value)
265     {
266         return make_stringf("%d", value);
267     }
to_string(long value)268     static inline string to_string(long value)
269     {
270         return make_stringf("%ld", value);
271     }
to_string(long long value)272     static inline string to_string(long long value)
273     {
274         return make_stringf("%lld", value);
275     }
to_string(unsigned value)276     static inline string to_string(unsigned value)
277     {
278         return make_stringf("%u", value);
279     }
to_string(unsigned long value)280     static inline string to_string(unsigned long value)
281     {
282         return make_stringf("%lu", value);
283     }
to_string(unsigned long long value)284     static inline string to_string(unsigned long long value)
285     {
286         return make_stringf("%llu", value);
287     }
to_string(float value)288     static inline string to_string(float value)
289     {
290         return make_stringf("%f", value);
291     }
to_string(double value)292     static inline string to_string(double value)
293     {
294         return make_stringf("%f", value);
295     }
296 }
297 #endif
298