1 #ifndef MISC_STRINGOPS_H
2 #define MISC_STRINGOPS_H
3 
4 #include <cctype>
5 #include <string>
6 #include <algorithm>
7 
8 #include "utf8stream.hpp"
9 
10 namespace Misc
11 {
12 class StringUtils
13 {
14     struct ci
15     {
operator ()Misc::StringUtils::ci16         bool operator()(char x, char y) const {
17             return toLower(x) < toLower(y);
18         }
19     };
20 
21     // Allow to convert complex arguments to C-style strings for format() function
22     template <typename T>
argument(T value)23     static T argument(T value) noexcept
24     {
25         return value;
26     }
27 
28     template <typename T>
argument(std::basic_string<T> const & value)29     static T const * argument(std::basic_string<T> const & value) noexcept
30     {
31         return value.c_str();
32     }
33 
34 public:
35 
36     /// Plain and simple locale-unaware toLower. Anything from A to Z is lower-cased, multibyte characters are unchanged.
37     /// Don't use std::tolower(char, locale&) because that is abysmally slow.
38     /// Don't use tolower(int) because that depends on global locale.
toLower(char c)39     static char toLower(char c)
40     {
41         return (c >= 'A' && c <= 'Z') ? c + 'a' - 'A' : c;
42     }
43 
toLowerUtf8(Utf8Stream::UnicodeChar ch)44     static Utf8Stream::UnicodeChar toLowerUtf8(Utf8Stream::UnicodeChar ch)
45     {
46         // Russian alphabet
47         if (ch >= 0x0410 && ch < 0x0430)
48             return ch + 0x20;
49 
50         // Cyrillic IO character
51         if (ch == 0x0401)
52             return ch + 0x50;
53 
54         // Latin alphabet
55         if (ch >= 0x41 && ch < 0x60)
56             return ch + 0x20;
57 
58         // Deutch characters
59         if (ch == 0xc4 || ch == 0xd6 || ch == 0xdc)
60             return ch + 0x20;
61         if (ch == 0x1e9e)
62             return 0xdf;
63 
64         // TODO: probably we will need to support characters from other languages
65 
66         return ch;
67     }
68 
lowerCaseUtf8(const std::string str)69     static std::string lowerCaseUtf8(const std::string str)
70     {
71         if (str.empty())
72             return str;
73 
74         // Decode string as utf8 characters, convert to lower case and pack them to string
75         std::string out;
76         Utf8Stream stream (str.c_str());
77         while (!stream.eof ())
78         {
79             Utf8Stream::UnicodeChar character = toLowerUtf8(stream.peek());
80 
81             if (character <= 0x7f)
82                 out.append(1, static_cast<char>(character));
83             else if (character <= 0x7ff)
84             {
85                 out.append(1, static_cast<char>(0xc0 | ((character >> 6) & 0x1f)));
86                 out.append(1, static_cast<char>(0x80 | (character & 0x3f)));
87             }
88             else if (character <= 0xffff)
89             {
90                 out.append(1, static_cast<char>(0xe0 | ((character >> 12) & 0x0f)));
91                 out.append(1, static_cast<char>(0x80 | ((character >> 6) & 0x3f)));
92                 out.append(1, static_cast<char>(0x80 | (character & 0x3f)));
93             }
94             else
95             {
96                 out.append(1, static_cast<char>(0xf0 | ((character >> 18) & 0x07)));
97                 out.append(1, static_cast<char>(0x80 | ((character >> 12) & 0x3f)));
98                 out.append(1, static_cast<char>(0x80 | ((character >> 6) & 0x3f)));
99                 out.append(1, static_cast<char>(0x80 | (character & 0x3f)));
100             }
101 
102             stream.consume();
103         }
104 
105         return out;
106     }
107 
ciLess(const std::string & x,const std::string & y)108     static bool ciLess(const std::string &x, const std::string &y) {
109         return std::lexicographical_compare(x.begin(), x.end(), y.begin(), y.end(), ci());
110     }
111 
ciEqual(const std::string & x,const std::string & y)112     static bool ciEqual(const std::string &x, const std::string &y) {
113         if (x.size() != y.size()) {
114             return false;
115         }
116         std::string::const_iterator xit = x.begin();
117         std::string::const_iterator yit = y.begin();
118         for (; xit != x.end(); ++xit, ++yit) {
119             if (toLower(*xit) != toLower(*yit)) {
120                 return false;
121             }
122         }
123         return true;
124     }
125 
ciCompareLen(const std::string & x,const std::string & y,size_t len)126     static int ciCompareLen(const std::string &x, const std::string &y, size_t len)
127     {
128         std::string::const_iterator xit = x.begin();
129         std::string::const_iterator yit = y.begin();
130         for(;xit != x.end() && yit != y.end() && len > 0;++xit,++yit,--len)
131         {
132             char left = *xit;
133             char right = *yit;
134             if (left == right)
135                 continue;
136 
137             left = toLower(left);
138             right = toLower(right);
139             int res = left - right;
140             if(res != 0)
141                 return (res > 0) ? 1 : -1;
142         }
143         if(len > 0)
144         {
145             if(xit != x.end())
146                 return 1;
147             if(yit != y.end())
148                 return -1;
149         }
150         return 0;
151     }
152 
153     /// Transforms input string to lower case w/o copy
lowerCaseInPlace(std::string & inout)154     static void lowerCaseInPlace(std::string &inout) {
155         for (unsigned int i=0; i<inout.size(); ++i)
156             inout[i] = toLower(inout[i]);
157     }
158 
159     /// Returns lower case copy of input string
lowerCase(const std::string & in)160     static std::string lowerCase(const std::string &in)
161     {
162         std::string out = in;
163         lowerCaseInPlace(out);
164         return out;
165     }
166 
167     struct CiComp
168     {
operator ()Misc::StringUtils::CiComp169         bool operator()(const std::string& left, const std::string& right) const
170         {
171             return ciLess(left, right);
172         }
173     };
174 
175 
176     /// Performs a binary search on a sorted container for a string that 'key' starts with
177     template<typename Iterator, typename T>
partialBinarySearch(Iterator begin,Iterator end,const T & key)178     static Iterator partialBinarySearch(Iterator begin, Iterator end, const T& key)
179     {
180         const Iterator notFound = end;
181 
182         while(begin < end)
183         {
184             const Iterator middle = begin + (std::distance(begin, end) / 2);
185 
186             int comp = Misc::StringUtils::ciCompareLen((*middle), key, (*middle).size());
187 
188             if(comp == 0)
189                 return middle;
190             else if(comp > 0)
191                 end = middle;
192             else
193                 begin = middle + 1;
194         }
195 
196         return notFound;
197     }
198 
199     /** @brief Replaces all occurrences of a string in another string.
200      *
201      * @param str The string to operate on.
202      * @param what The string to replace.
203      * @param with The replacement string.
204      * @param whatLen The length of the string to replace.
205      * @param withLen The length of the replacement string.
206      *
207      * @return A reference to the string passed in @p str.
208      */
replaceAll(std::string & str,const char * what,const char * with,std::size_t whatLen=std::string::npos,std::size_t withLen=std::string::npos)209     static std::string &replaceAll(std::string &str, const char *what, const char *with,
210                                    std::size_t whatLen=std::string::npos, std::size_t withLen=std::string::npos)
211     {
212         if (whatLen == std::string::npos)
213             whatLen = strlen(what);
214 
215         if (withLen == std::string::npos)
216             withLen = strlen(with);
217 
218         std::size_t found;
219         std::size_t offset = 0;
220         while((found = str.find(what, offset, whatLen)) != std::string::npos)
221         {
222               str.replace(found, whatLen, with, withLen);
223               offset = found + withLen;
224         }
225         return str;
226     }
227 
228     // Requires some C++11 features:
229     // 1. std::string needs to be contiguous
230     // 2. std::snprintf with zero size (second argument) returns an output string size
231     // 3. variadic templates support
232     template <typename ... Args>
format(const char * fmt,Args const &...args)233     static std::string format(const char* fmt, Args const & ... args)
234     {
235         auto size = std::snprintf(nullptr, 0, fmt, argument(args) ...);
236         // Note: sprintf also writes a trailing null character. We should remove it.
237         std::string ret(size+1, '\0');
238         std::sprintf(&ret[0], fmt, argument(args) ...);
239         ret.erase(size);
240 
241         return ret;
242     }
243 
244     template <typename ... Args>
format(const std::string & fmt,Args const &...args)245     static std::string format(const std::string& fmt, Args const & ... args)
246     {
247         return format(fmt.c_str(), args ...);
248     }
249 
trim(std::string & s)250     static inline void trim(std::string &s)
251     {
252         // left trim
253         s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch)
254         {
255             return !std::isspace(ch);
256         }));
257 
258         // right trim
259         s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch)
260         {
261             return !std::isspace(ch);
262         }).base(), s.end());
263     }
264 
265     template <class Container>
split(const std::string & str,Container & cont,const std::string & delims=" ")266     static inline void split(const std::string& str, Container& cont, const std::string& delims = " ")
267     {
268         std::size_t current, previous = 0;
269         current = str.find_first_of(delims);
270         while (current != std::string::npos)
271         {
272             cont.push_back(str.substr(previous, current - previous));
273             previous = current + 1;
274             current = str.find_first_of(delims, previous);
275         }
276         cont.push_back(str.substr(previous, current - previous));
277     }
278 
279     // TODO: use the std::string_view once we will use the C++17.
280     // It should allow us to avoid data copying while we still will support both string and literal arguments.
281 
replaceAll(std::string & data,std::string toSearch,std::string replaceStr)282     static inline void replaceAll(std::string& data, std::string toSearch, std::string replaceStr)
283     {
284         size_t pos = data.find(toSearch);
285 
286         while( pos != std::string::npos)
287         {
288             data.replace(pos, toSearch.size(), replaceStr);
289             pos = data.find(toSearch, pos + replaceStr.size());
290         }
291     }
292 
replaceLast(std::string & str,std::string substr,std::string with)293      static inline void replaceLast(std::string& str, std::string substr, std::string with)
294      {
295          size_t pos = str.rfind(substr);
296          if (pos == std::string::npos)
297              return;
298 
299          str.replace(pos, substr.size(), with);
300      }
301 };
302 
303 }
304 
305 #endif
306