1 #include "string_utils.hh"
2
3 #include "exception.hh"
4 #include "utf8_iterator.hh"
5 #include "unit_tests.hh"
6
7 #include <cstdio>
8
9 namespace Kakoune
10 {
11
trim_indent(StringView str)12 String trim_indent(StringView str)
13 {
14 if (str.empty())
15 return {};
16
17 if (str[0_byte] == '\n')
18 str = str.substr(1_byte);
19 while (not str.empty() and is_blank(str.back()))
20 str = str.substr(0, str.length() - 1);
21
22 utf8::iterator it{str.begin(), str};
23 while (it != str.end() and is_horizontal_blank(*it))
24 ++it;
25
26 const StringView indent{str.begin(), it.base()};
27 return accumulate(str | split_after<StringView>('\n') | transform([&](auto&& line) {
28 if (line == "\n")
29 return line;
30 else if (not prefix_match(line, indent))
31 throw runtime_error("inconsistent indentation in the string");
32
33 return line.substr(indent.length());
34 }), String{}, [](String& s, StringView l) -> decltype(auto) { return s += l; });
35 }
36
escape(StringView str,StringView characters,char escape)37 String escape(StringView str, StringView characters, char escape)
38 {
39 String res;
40 res.reserve(str.length());
41 auto cbeg = characters.begin(), cend = characters.end();
42 for (auto it = str.begin(), end = str.end(); it != end; )
43 {
44 auto next = std::find_first_of(it, end, cbeg, cend);
45 if (next != end)
46 {
47 res += StringView{it, next+1};
48 res.back() = escape;
49 res += *next;
50 it = next+1;
51 }
52 else
53 {
54 res += StringView{it, next};
55 break;
56 }
57 }
58 return res;
59 }
60
unescape(StringView str,StringView characters,char escape)61 String unescape(StringView str, StringView characters, char escape)
62 {
63 String res;
64 res.reserve(str.length());
65 for (auto it = str.begin(), end = str.end(); it != end; )
66 {
67 auto next = std::find(it, end, escape);
68 if (next != end and next+1 != end and contains(characters, *(next+1)))
69 {
70 res += StringView{it, next+1};
71 res.back() = *(next+1);
72 it = next + 2;
73 }
74 else
75 {
76 res += StringView{it, next == end ? next : next + 1};
77 it = next == end ? next : next + 1;
78 }
79 }
80 return res;
81 }
82
indent(StringView str,StringView indent)83 String indent(StringView str, StringView indent)
84 {
85 String res;
86 res.reserve(str.length());
87 bool was_eol = true;
88 for (ByteCount i = 0; i < str.length(); ++i)
89 {
90 if (was_eol)
91 res += indent;
92 res += str[i];
93 was_eol = is_eol(str[i]);
94 }
95 return res;
96 }
97
replace(StringView str,StringView substr,StringView replacement)98 String replace(StringView str, StringView substr, StringView replacement)
99 {
100 String res;
101 for (auto it = str.begin(); it != str.end(); )
102 {
103 auto match = std::search(it, str.end(), substr.begin(), substr.end());
104 res += StringView{it, match};
105 if (match == str.end())
106 break;
107
108 res += replacement;
109 it = match + (int)substr.length();
110 }
111 return res;
112 }
113
left_pad(StringView str,ColumnCount size,Codepoint c)114 String left_pad(StringView str, ColumnCount size, Codepoint c)
115 {
116 return String(c, std::max(0_col, size - str.column_length())) + str.substr(0, size);
117 }
118
right_pad(StringView str,ColumnCount size,Codepoint c)119 String right_pad(StringView str, ColumnCount size, Codepoint c)
120 {
121 return str.substr(0, size) + String(c, std::max(0_col, size - str.column_length()));
122 }
123
str_to_int_ifp(StringView str)124 Optional<int> str_to_int_ifp(StringView str)
125 {
126 bool negative = not str.empty() and str[0] == '-';
127 if (negative)
128 str = str.substr(1_byte);
129 if (str.empty())
130 return {};
131
132 unsigned int res = 0;
133 for (auto c : str)
134 {
135 if (c < '0' or c > '9')
136 return {};
137 res = res * 10 + c - '0';
138 }
139 return negative ? -res : res;
140 }
141
str_to_int(StringView str)142 int str_to_int(StringView str)
143 {
144 if (auto val = str_to_int_ifp(str))
145 return *val;
146 throw runtime_error{str + " is not a number"};
147 }
148
to_string(int val)149 InplaceString<15> to_string(int val)
150 {
151 InplaceString<15> res;
152 res.m_length = sprintf(res.m_data, "%i", val);
153 return res;
154 }
155
to_string(unsigned val)156 InplaceString<15> to_string(unsigned val)
157 {
158 InplaceString<15> res;
159 res.m_length = sprintf(res.m_data, "%u", val);
160 return res;
161 }
162
to_string(long int val)163 InplaceString<23> to_string(long int val)
164 {
165 InplaceString<23> res;
166 res.m_length = sprintf(res.m_data, "%li", val);
167 return res;
168 }
169
to_string(long long int val)170 InplaceString<23> to_string(long long int val)
171 {
172 InplaceString<23> res;
173 res.m_length = sprintf(res.m_data, "%lli", val);
174 return res;
175 }
176
to_string(unsigned long val)177 InplaceString<23> to_string(unsigned long val)
178 {
179 InplaceString<23> res;
180 res.m_length = sprintf(res.m_data, "%lu", val);
181 return res;
182 }
183
to_string(Hex val)184 InplaceString<23> to_string(Hex val)
185 {
186 InplaceString<23> res;
187 res.m_length = sprintf(res.m_data, "%zx", val.val);
188 return res;
189 }
190
to_string(float val)191 InplaceString<23> to_string(float val)
192 {
193 InplaceString<23> res;
194 res.m_length = sprintf(res.m_data, "%f", val);
195 return res;
196 }
197
to_string(Codepoint c)198 InplaceString<7> to_string(Codepoint c)
199 {
200 InplaceString<7> res;
201 char* ptr = res.m_data;
202 utf8::dump(ptr, c);
203 res.m_length = (int)(ptr - res.m_data);
204 return res;
205 }
206
subsequence_match(StringView str,StringView subseq)207 bool subsequence_match(StringView str, StringView subseq)
208 {
209 auto it = str.begin();
210 for (auto& c : subseq)
211 {
212 if (it == str.end())
213 return false;
214 while (*it != c)
215 {
216 if (++it == str.end())
217 return false;
218 }
219 ++it;
220 }
221 return true;
222 }
223
expand_tabs(StringView line,ColumnCount tabstop,ColumnCount col)224 String expand_tabs(StringView line, ColumnCount tabstop, ColumnCount col)
225 {
226 String res;
227 res.reserve(line.length());
228 for (auto it = line.begin(), end = line.end(); it != end; )
229 {
230 if (*it == '\t')
231 {
232 ColumnCount end_col = (col / tabstop + 1) * tabstop;
233 res += String{' ', end_col - col};
234 col = end_col;
235 ++it;
236 }
237 else
238 {
239 auto char_beg = it;
240 auto cp = utf8::read_codepoint(it, end);
241 res += {char_beg, it};
242 col += codepoint_width(cp);
243 }
244 }
245 return res;
246 }
247
Iterator(StringView text,ColumnCount max_width)248 WrapView::Iterator::Iterator(StringView text, ColumnCount max_width)
249 : m_remaining{text}, m_max_width{max_width}
250 {
251 if (max_width <= 0)
252 throw runtime_error("Invalid max width");
253 ++*this;
254 }
255
operator ++()256 WrapView::Iterator& WrapView::Iterator::operator++()
257 {
258 using Utf8It = utf8::iterator<const char*>;
259 Utf8It it{m_remaining.begin(), m_remaining};
260 Utf8It last_word_end = it;
261
262 while (it != m_remaining.end())
263 {
264 const CharCategories cat = categorize(*it, {'_'});
265 if (cat == CharCategories::EndOfLine)
266 {
267 m_current = StringView{m_remaining.begin(), it.base()};
268 m_remaining = StringView{(it+1).base(), m_remaining.end()};
269 return *this;
270 }
271
272 Utf8It word_end = it+1;
273 while (word_end != m_remaining.end() and categorize(*word_end, {'_'}) == cat)
274 ++word_end;
275
276 if (word_end > m_remaining.begin() and
277 utf8::column_distance(m_remaining.begin(), word_end.base()) >= m_max_width)
278 {
279 auto line_end = last_word_end <= m_remaining.begin() ?
280 Utf8It{utf8::advance(m_remaining.begin(), m_remaining.end(), m_max_width), m_remaining}
281 : last_word_end;
282
283 m_current = StringView{m_remaining.begin(), line_end.base()};
284
285 while (line_end != m_remaining.end() and is_horizontal_blank(*line_end))
286 ++line_end;
287
288 if (line_end != m_remaining.end() and *line_end == '\n')
289 ++line_end;
290
291 m_remaining = StringView{line_end.base(), m_remaining.end()};
292 return *this;
293 }
294 if (cat == CharCategories::Word or cat == CharCategories::Punctuation)
295 last_word_end = word_end;
296
297 if (word_end > m_remaining.begin())
298 it = word_end;
299 }
300 m_current = m_remaining;
301 m_remaining = StringView{};
302 return *this;
303 }
304
305 template<typename AppendFunc>
format_impl(StringView fmt,ArrayView<const StringView> params,AppendFunc append)306 void format_impl(StringView fmt, ArrayView<const StringView> params, AppendFunc append)
307 {
308 int implicitIndex = 0;
309 for (auto it = fmt.begin(), end = fmt.end(); it != end;)
310 {
311 auto opening = std::find(it, end, '{');
312 if (opening == end)
313 {
314 append(StringView{it, opening});
315 break;
316 }
317 else if (opening != it and *(opening-1) == '\\')
318 {
319 append(StringView{it, opening-1});
320 append('{');
321 it = opening + 1;
322 }
323 else
324 {
325 append(StringView{it, opening});
326 auto closing = std::find(opening, end, '}');
327 if (closing == end)
328 throw runtime_error("format string error, unclosed '{'");
329
330 const int index = (closing == opening + 1) ?
331 implicitIndex : str_to_int({opening+1, closing});
332
333 if (index >= params.size())
334 throw runtime_error("format string parameter index too big");
335
336 append(params[index]);
337 implicitIndex = index+1;
338 it = closing+1;
339 }
340 }
341 }
342
format_to(ArrayView<char> buffer,StringView fmt,ArrayView<const StringView> params)343 StringView format_to(ArrayView<char> buffer, StringView fmt, ArrayView<const StringView> params)
344 {
345 char* ptr = buffer.begin();
346 const char* end = buffer.end();
347 format_impl(fmt, params, [&](StringView s) mutable {
348 for (auto c : s)
349 {
350 if (ptr == end)
351 throw runtime_error("buffer is too small");
352 *ptr++ = c;
353 }
354 });
355 if (ptr == end)
356 throw runtime_error("buffer is too small");
357 *ptr = 0;
358
359 return { buffer.begin(), ptr };
360 }
361
format_with(FunctionRef<void (StringView)> append,StringView fmt,ArrayView<const StringView> params)362 void format_with(FunctionRef<void (StringView)> append, StringView fmt, ArrayView<const StringView> params)
363 {
364 format_impl(fmt, params, append);
365 }
366
format(StringView fmt,ArrayView<const StringView> params)367 String format(StringView fmt, ArrayView<const StringView> params)
368 {
369 ByteCount size = fmt.length();
370 for (auto& s : params) size += s.length();
371 String res;
372 res.reserve(size);
373
374 format_impl(fmt, params, [&](StringView s) { res += s; });
375 return res;
376 }
377
double_up(StringView s,StringView characters)378 String double_up(StringView s, StringView characters)
379 {
380 String res;
381 auto pos = s.begin();
382 for (auto it = s.begin(), end = s.end(); it != end; ++it)
383 {
384 if (contains(characters, *it))
385 {
386 res += StringView{pos, it+1};
387 res += *it;
388 pos = it+1;
389 }
390 }
391 res += StringView{pos, s.end()};
392 return res;
393 }
394
395 UnitTest test_string{[]()
__anon7f395eb70402() 396 {
397 kak_assert(String("youpi ") + "matin" == "youpi matin");
398
399 auto wrapped = "wrap this paragraph\n respecting whitespaces and much_too_long_words" | wrap_at(16) | gather<Vector>();
400 kak_assert(wrapped.size() == 6);
401 kak_assert(wrapped[0] == "wrap this");
402 kak_assert(wrapped[1] == "paragraph");
403 kak_assert(wrapped[2] == " respecting");
404 kak_assert(wrapped[3] == "whitespaces and");
405 kak_assert(wrapped[4] == "much_too_long_wo");
406 kak_assert(wrapped[5] == "rds");
407
408 auto wrapped2 = "error: unknown type" | wrap_at(7) | gather<Vector>();
409 kak_assert(wrapped2.size() == 3);
410 kak_assert(wrapped2[0] == "error:");
411 kak_assert(wrapped2[1] == "unknown");
412 kak_assert(wrapped2[2] == "type");
413
414 kak_assert(trim_indent(" ") == "");
415 kak_assert(trim_indent("no-indent") == "no-indent");
416 kak_assert(trim_indent("\nno-indent") == "no-indent");
417 kak_assert(trim_indent("\n indent\n indent") == "indent\nindent");
418 kak_assert(trim_indent("\n indent\n indent") == "indent\n indent");
419 kak_assert(trim_indent("\n indent\n indent\n ") == "indent\nindent");
420
421 kak_expect_throw(runtime_error, trim_indent("\n indent\nno-indent"));
422
423 kak_assert(escape(R"(\youpi:matin:tchou\:)", ":\\", '\\') == R"(\\youpi\:matin\:tchou\\\:)");
424 kak_assert(unescape(R"(\\youpi\:matin\:tchou\\\:)", ":\\", '\\') == R"(\youpi:matin:tchou\:)");
425
426 kak_assert(prefix_match("tchou kanaky", "tchou"));
427 kak_assert(prefix_match("tchou kanaky", "tchou kanaky"));
428 kak_assert(prefix_match("tchou kanaky", "t"));
429 kak_assert(not prefix_match("tchou kanaky", "c"));
430
431 kak_assert(subsequence_match("tchou kanaky", "tknky"));
432 kak_assert(subsequence_match("tchou kanaky", "knk"));
433 kak_assert(subsequence_match("tchou kanaky", "tchou kanaky"));
434 kak_assert(not subsequence_match("tchou kanaky", "tchou kanaky"));
435
436 kak_assert(format("Youhou {1} {} {0} \\{}", 10, "hehe", 5) == "Youhou hehe 5 10 {}");
437
438 char buffer[20];
439 kak_assert(format_to(buffer, "Hey {}", 15) == "Hey 15");
440
441 kak_assert(str_to_int("5") == 5);
442 kak_assert(str_to_int(to_string(INT_MAX)) == INT_MAX);
443 kak_assert(str_to_int(to_string(INT_MIN)) == INT_MIN);
444 kak_assert(str_to_int("00") == 0);
445 kak_assert(str_to_int("-0") == 0);
446
447 kak_assert(double_up(R"('foo%"bar"')", "'\"%") == R"(''foo%%""bar""'')");
448
449 kak_assert(replace("tchou/tcha/tchi", "/", "!!") == "tchou!!tcha!!tchi");
450 }};
451
452 }
453