1 #include "string_utils.hh"
2 
3 #include "exception.hh"
4 #include "utf8_iterator.hh"
5 #include "unit_tests.hh"
6 
7 #include <cstdio>
8 
9 namespace Kakoune
10 {
11 
trim_indent(StringView str)12 String trim_indent(StringView str)
13 {
14     if (str.empty())
15         return {};
16 
17     if (str[0_byte] == '\n')
18         str = str.substr(1_byte);
19     while (not str.empty() and is_blank(str.back()))
20         str = str.substr(0, str.length() - 1);
21 
22     utf8::iterator it{str.begin(), str};
23     while (it != str.end() and is_horizontal_blank(*it))
24         ++it;
25 
26     const StringView indent{str.begin(), it.base()};
27     return accumulate(str | split_after<StringView>('\n') | transform([&](auto&& line) {
28             if (line == "\n")
29                 return line;
30             else if (not prefix_match(line, indent))
31                 throw runtime_error("inconsistent indentation in the string");
32 
33             return line.substr(indent.length());
34         }), String{}, [](String& s, StringView l) -> decltype(auto) { return s += l; });
35 }
36 
escape(StringView str,StringView characters,char escape)37 String escape(StringView str, StringView characters, char escape)
38 {
39     String res;
40     res.reserve(str.length());
41     auto cbeg = characters.begin(), cend = characters.end();
42     for (auto it = str.begin(), end = str.end(); it != end; )
43     {
44         auto next = std::find_first_of(it, end, cbeg, cend);
45         if (next != end)
46         {
47             res += StringView{it, next+1};
48             res.back() = escape;
49             res += *next;
50             it = next+1;
51         }
52         else
53         {
54             res += StringView{it, next};
55             break;
56         }
57     }
58     return res;
59 }
60 
unescape(StringView str,StringView characters,char escape)61 String unescape(StringView str, StringView characters, char escape)
62 {
63     String res;
64     res.reserve(str.length());
65     for (auto it = str.begin(), end = str.end(); it != end; )
66     {
67         auto next = std::find(it, end, escape);
68         if (next != end and next+1 != end and contains(characters, *(next+1)))
69         {
70             res += StringView{it, next+1};
71             res.back() = *(next+1);
72             it = next + 2;
73         }
74         else
75         {
76             res += StringView{it, next == end ? next : next + 1};
77             it = next == end ? next : next + 1;
78         }
79     }
80     return res;
81 }
82 
indent(StringView str,StringView indent)83 String indent(StringView str, StringView indent)
84 {
85     String res;
86     res.reserve(str.length());
87     bool was_eol = true;
88     for (ByteCount i = 0; i < str.length(); ++i)
89     {
90         if (was_eol)
91             res += indent;
92         res += str[i];
93         was_eol = is_eol(str[i]);
94     }
95     return res;
96 }
97 
replace(StringView str,StringView substr,StringView replacement)98 String replace(StringView str, StringView substr, StringView replacement)
99 {
100     String res;
101     for (auto it = str.begin(); it != str.end(); )
102     {
103         auto match = std::search(it, str.end(), substr.begin(), substr.end());
104         res += StringView{it, match};
105         if (match == str.end())
106             break;
107 
108         res += replacement;
109         it = match + (int)substr.length();
110     }
111     return res;
112 }
113 
left_pad(StringView str,ColumnCount size,Codepoint c)114 String left_pad(StringView str, ColumnCount size, Codepoint c)
115 {
116     return String(c, std::max(0_col, size - str.column_length())) + str.substr(0, size);
117 }
118 
right_pad(StringView str,ColumnCount size,Codepoint c)119 String right_pad(StringView str, ColumnCount size, Codepoint c)
120 {
121     return str.substr(0, size) + String(c, std::max(0_col, size - str.column_length()));
122 }
123 
str_to_int_ifp(StringView str)124 Optional<int> str_to_int_ifp(StringView str)
125 {
126     bool negative = not str.empty() and str[0] == '-';
127     if (negative)
128         str = str.substr(1_byte);
129     if (str.empty())
130         return {};
131 
132     unsigned int res = 0;
133     for (auto c : str)
134     {
135         if (c < '0' or c > '9')
136             return {};
137         res = res * 10 + c - '0';
138     }
139     return negative ? -res : res;
140 }
141 
str_to_int(StringView str)142 int str_to_int(StringView str)
143 {
144     if (auto val = str_to_int_ifp(str))
145         return *val;
146     throw runtime_error{str + " is not a number"};
147 }
148 
to_string(int val)149 InplaceString<15> to_string(int val)
150 {
151     InplaceString<15> res;
152     res.m_length = sprintf(res.m_data, "%i", val);
153     return res;
154 }
155 
to_string(unsigned val)156 InplaceString<15> to_string(unsigned val)
157 {
158     InplaceString<15> res;
159     res.m_length = sprintf(res.m_data, "%u", val);
160     return res;
161 }
162 
to_string(long int val)163 InplaceString<23> to_string(long int val)
164 {
165     InplaceString<23> res;
166     res.m_length = sprintf(res.m_data, "%li", val);
167     return res;
168 }
169 
to_string(long long int val)170 InplaceString<23> to_string(long long int val)
171 {
172     InplaceString<23> res;
173     res.m_length = sprintf(res.m_data, "%lli", val);
174     return res;
175 }
176 
to_string(unsigned long val)177 InplaceString<23> to_string(unsigned long val)
178 {
179     InplaceString<23> res;
180     res.m_length = sprintf(res.m_data, "%lu", val);
181     return res;
182 }
183 
to_string(Hex val)184 InplaceString<23> to_string(Hex val)
185 {
186     InplaceString<23> res;
187     res.m_length = sprintf(res.m_data, "%zx", val.val);
188     return res;
189 }
190 
to_string(float val)191 InplaceString<23> to_string(float val)
192 {
193     InplaceString<23> res;
194     res.m_length = sprintf(res.m_data, "%f", val);
195     return res;
196 }
197 
to_string(Codepoint c)198 InplaceString<7> to_string(Codepoint c)
199 {
200     InplaceString<7> res;
201     char* ptr = res.m_data;
202     utf8::dump(ptr, c);
203     res.m_length = (int)(ptr - res.m_data);
204     return res;
205 }
206 
subsequence_match(StringView str,StringView subseq)207 bool subsequence_match(StringView str, StringView subseq)
208 {
209     auto it = str.begin();
210     for (auto& c : subseq)
211     {
212         if (it == str.end())
213             return false;
214         while (*it != c)
215         {
216             if (++it == str.end())
217                 return false;
218         }
219         ++it;
220     }
221     return true;
222 }
223 
expand_tabs(StringView line,ColumnCount tabstop,ColumnCount col)224 String expand_tabs(StringView line, ColumnCount tabstop, ColumnCount col)
225 {
226     String res;
227     res.reserve(line.length());
228     for (auto it = line.begin(), end = line.end(); it != end; )
229     {
230         if (*it == '\t')
231         {
232             ColumnCount end_col = (col / tabstop + 1) * tabstop;
233             res += String{' ', end_col - col};
234             col = end_col;
235             ++it;
236         }
237         else
238         {
239             auto char_beg = it;
240             auto cp = utf8::read_codepoint(it, end);
241             res += {char_beg, it};
242             col += codepoint_width(cp);
243         }
244     }
245     return res;
246 }
247 
Iterator(StringView text,ColumnCount max_width)248 WrapView::Iterator::Iterator(StringView text, ColumnCount max_width)
249   : m_remaining{text}, m_max_width{max_width}
250 {
251     if (max_width <= 0)
252         throw runtime_error("Invalid max width");
253     ++*this;
254 }
255 
operator ++()256 WrapView::Iterator& WrapView::Iterator::operator++()
257 {
258     using Utf8It = utf8::iterator<const char*>;
259     Utf8It it{m_remaining.begin(), m_remaining};
260     Utf8It last_word_end = it;
261 
262     while (it != m_remaining.end())
263     {
264         const CharCategories cat = categorize(*it, {'_'});
265         if (cat == CharCategories::EndOfLine)
266         {
267             m_current = StringView{m_remaining.begin(), it.base()};
268             m_remaining = StringView{(it+1).base(), m_remaining.end()};
269             return *this;
270         }
271 
272         Utf8It word_end = it+1;
273         while (word_end != m_remaining.end() and categorize(*word_end, {'_'}) == cat)
274             ++word_end;
275 
276         if (word_end > m_remaining.begin() and
277             utf8::column_distance(m_remaining.begin(), word_end.base()) >= m_max_width)
278         {
279             auto line_end = last_word_end <= m_remaining.begin() ?
280                 Utf8It{utf8::advance(m_remaining.begin(), m_remaining.end(), m_max_width), m_remaining}
281               : last_word_end;
282 
283             m_current = StringView{m_remaining.begin(), line_end.base()};
284 
285             while (line_end != m_remaining.end() and is_horizontal_blank(*line_end))
286                 ++line_end;
287 
288             if (line_end != m_remaining.end() and *line_end == '\n')
289                 ++line_end;
290 
291             m_remaining = StringView{line_end.base(), m_remaining.end()};
292             return *this;
293         }
294         if (cat == CharCategories::Word or cat == CharCategories::Punctuation)
295             last_word_end = word_end;
296 
297         if (word_end > m_remaining.begin())
298             it = word_end;
299     }
300     m_current = m_remaining;
301     m_remaining = StringView{};
302     return *this;
303 }
304 
305 template<typename AppendFunc>
format_impl(StringView fmt,ArrayView<const StringView> params,AppendFunc append)306 void format_impl(StringView fmt, ArrayView<const StringView> params, AppendFunc append)
307 {
308     int implicitIndex = 0;
309     for (auto it = fmt.begin(), end = fmt.end(); it != end;)
310     {
311         auto opening = std::find(it, end, '{');
312         if (opening == end)
313         {
314             append(StringView{it, opening});
315             break;
316         }
317         else if (opening != it and *(opening-1) == '\\')
318         {
319             append(StringView{it, opening-1});
320             append('{');
321             it = opening + 1;
322         }
323         else
324         {
325             append(StringView{it, opening});
326             auto closing = std::find(opening, end, '}');
327             if (closing == end)
328                 throw runtime_error("format string error, unclosed '{'");
329 
330             const int index = (closing == opening + 1) ?
331                 implicitIndex : str_to_int({opening+1, closing});
332 
333             if (index >= params.size())
334                 throw runtime_error("format string parameter index too big");
335 
336             append(params[index]);
337             implicitIndex = index+1;
338             it = closing+1;
339         }
340     }
341 }
342 
format_to(ArrayView<char> buffer,StringView fmt,ArrayView<const StringView> params)343 StringView format_to(ArrayView<char> buffer, StringView fmt, ArrayView<const StringView> params)
344 {
345     char* ptr = buffer.begin();
346     const char* end = buffer.end();
347     format_impl(fmt, params, [&](StringView s) mutable {
348         for (auto c : s)
349         {
350             if (ptr == end)
351                 throw runtime_error("buffer is too small");
352             *ptr++ = c;
353         }
354     });
355     if (ptr == end)
356         throw runtime_error("buffer is too small");
357     *ptr = 0;
358 
359     return { buffer.begin(), ptr };
360 }
361 
format_with(FunctionRef<void (StringView)> append,StringView fmt,ArrayView<const StringView> params)362 void format_with(FunctionRef<void (StringView)> append, StringView fmt, ArrayView<const StringView> params)
363 {
364     format_impl(fmt, params, append);
365 }
366 
format(StringView fmt,ArrayView<const StringView> params)367 String format(StringView fmt, ArrayView<const StringView> params)
368 {
369     ByteCount size = fmt.length();
370     for (auto& s : params) size += s.length();
371     String res;
372     res.reserve(size);
373 
374     format_impl(fmt, params, [&](StringView s) { res += s; });
375     return res;
376 }
377 
double_up(StringView s,StringView characters)378 String double_up(StringView s, StringView characters)
379 {
380     String res;
381     auto pos = s.begin();
382     for (auto it = s.begin(), end = s.end(); it != end; ++it)
383     {
384         if (contains(characters, *it))
385         {
386             res += StringView{pos, it+1};
387             res += *it;
388             pos = it+1;
389         }
390     }
391     res += StringView{pos, s.end()};
392     return res;
393 }
394 
395 UnitTest test_string{[]()
__anon7f395eb70402() 396 {
397     kak_assert(String("youpi ") + "matin" == "youpi matin");
398 
399     auto wrapped = "wrap this paragraph\n respecting whitespaces and much_too_long_words" | wrap_at(16) | gather<Vector>();
400     kak_assert(wrapped.size() == 6);
401     kak_assert(wrapped[0] == "wrap this");
402     kak_assert(wrapped[1] == "paragraph");
403     kak_assert(wrapped[2] == " respecting");
404     kak_assert(wrapped[3] == "whitespaces and");
405     kak_assert(wrapped[4] == "much_too_long_wo");
406     kak_assert(wrapped[5] == "rds");
407 
408     auto wrapped2 = "error: unknown type" | wrap_at(7) | gather<Vector>();
409     kak_assert(wrapped2.size() == 3);
410     kak_assert(wrapped2[0] == "error:");
411     kak_assert(wrapped2[1] == "unknown");
412     kak_assert(wrapped2[2] == "type");
413 
414     kak_assert(trim_indent(" ") == "");
415     kak_assert(trim_indent("no-indent") == "no-indent");
416     kak_assert(trim_indent("\nno-indent") == "no-indent");
417     kak_assert(trim_indent("\n  indent\n  indent") == "indent\nindent");
418     kak_assert(trim_indent("\n  indent\n    indent") == "indent\n  indent");
419     kak_assert(trim_indent("\n  indent\n  indent\n   ") == "indent\nindent");
420 
421     kak_expect_throw(runtime_error, trim_indent("\n  indent\nno-indent"));
422 
423     kak_assert(escape(R"(\youpi:matin:tchou\:)", ":\\", '\\') == R"(\\youpi\:matin\:tchou\\\:)");
424     kak_assert(unescape(R"(\\youpi\:matin\:tchou\\\:)", ":\\", '\\') == R"(\youpi:matin:tchou\:)");
425 
426     kak_assert(prefix_match("tchou kanaky", "tchou"));
427     kak_assert(prefix_match("tchou kanaky", "tchou kanaky"));
428     kak_assert(prefix_match("tchou kanaky", "t"));
429     kak_assert(not prefix_match("tchou kanaky", "c"));
430 
431     kak_assert(subsequence_match("tchou kanaky", "tknky"));
432     kak_assert(subsequence_match("tchou kanaky", "knk"));
433     kak_assert(subsequence_match("tchou kanaky", "tchou kanaky"));
434     kak_assert(not subsequence_match("tchou kanaky", "tchou  kanaky"));
435 
436     kak_assert(format("Youhou {1} {} {0} \\{}", 10, "hehe", 5) == "Youhou hehe 5 10 {}");
437 
438     char buffer[20];
439     kak_assert(format_to(buffer, "Hey {}", 15) == "Hey 15");
440 
441     kak_assert(str_to_int("5") == 5);
442     kak_assert(str_to_int(to_string(INT_MAX)) == INT_MAX);
443     kak_assert(str_to_int(to_string(INT_MIN)) == INT_MIN);
444     kak_assert(str_to_int("00") == 0);
445     kak_assert(str_to_int("-0") == 0);
446 
447     kak_assert(double_up(R"('foo%"bar"')", "'\"%") == R"(''foo%%""bar""'')");
448 
449     kak_assert(replace("tchou/tcha/tchi", "/", "!!") == "tchou!!tcha!!tchi");
450 }};
451 
452 }
453