1 #ifndef MISC_STRINGOPS_H 2 #define MISC_STRINGOPS_H 3 4 #include <cctype> 5 #include <string> 6 #include <algorithm> 7 8 #include "utf8stream.hpp" 9 10 namespace Misc 11 { 12 class StringUtils 13 { 14 struct ci 15 { operator ()Misc::StringUtils::ci16 bool operator()(char x, char y) const { 17 return toLower(x) < toLower(y); 18 } 19 }; 20 21 // Allow to convert complex arguments to C-style strings for format() function 22 template <typename T> argument(T value)23 static T argument(T value) noexcept 24 { 25 return value; 26 } 27 28 template <typename T> argument(std::basic_string<T> const & value)29 static T const * argument(std::basic_string<T> const & value) noexcept 30 { 31 return value.c_str(); 32 } 33 34 public: 35 36 /// Plain and simple locale-unaware toLower. Anything from A to Z is lower-cased, multibyte characters are unchanged. 37 /// Don't use std::tolower(char, locale&) because that is abysmally slow. 38 /// Don't use tolower(int) because that depends on global locale. toLower(char c)39 static char toLower(char c) 40 { 41 return (c >= 'A' && c <= 'Z') ? c + 'a' - 'A' : c; 42 } 43 toLowerUtf8(Utf8Stream::UnicodeChar ch)44 static Utf8Stream::UnicodeChar toLowerUtf8(Utf8Stream::UnicodeChar ch) 45 { 46 // Russian alphabet 47 if (ch >= 0x0410 && ch < 0x0430) 48 return ch + 0x20; 49 50 // Cyrillic IO character 51 if (ch == 0x0401) 52 return ch + 0x50; 53 54 // Latin alphabet 55 if (ch >= 0x41 && ch < 0x60) 56 return ch + 0x20; 57 58 // Deutch characters 59 if (ch == 0xc4 || ch == 0xd6 || ch == 0xdc) 60 return ch + 0x20; 61 if (ch == 0x1e9e) 62 return 0xdf; 63 64 // TODO: probably we will need to support characters from other languages 65 66 return ch; 67 } 68 lowerCaseUtf8(const std::string str)69 static std::string lowerCaseUtf8(const std::string str) 70 { 71 if (str.empty()) 72 return str; 73 74 // Decode string as utf8 characters, convert to lower case and pack them to string 75 std::string out; 76 Utf8Stream stream (str.c_str()); 77 while (!stream.eof ()) 78 { 79 Utf8Stream::UnicodeChar character = toLowerUtf8(stream.peek()); 80 81 if (character <= 0x7f) 82 out.append(1, static_cast<char>(character)); 83 else if (character <= 0x7ff) 84 { 85 out.append(1, static_cast<char>(0xc0 | ((character >> 6) & 0x1f))); 86 out.append(1, static_cast<char>(0x80 | (character & 0x3f))); 87 } 88 else if (character <= 0xffff) 89 { 90 out.append(1, static_cast<char>(0xe0 | ((character >> 12) & 0x0f))); 91 out.append(1, static_cast<char>(0x80 | ((character >> 6) & 0x3f))); 92 out.append(1, static_cast<char>(0x80 | (character & 0x3f))); 93 } 94 else 95 { 96 out.append(1, static_cast<char>(0xf0 | ((character >> 18) & 0x07))); 97 out.append(1, static_cast<char>(0x80 | ((character >> 12) & 0x3f))); 98 out.append(1, static_cast<char>(0x80 | ((character >> 6) & 0x3f))); 99 out.append(1, static_cast<char>(0x80 | (character & 0x3f))); 100 } 101 102 stream.consume(); 103 } 104 105 return out; 106 } 107 ciLess(const std::string & x,const std::string & y)108 static bool ciLess(const std::string &x, const std::string &y) { 109 return std::lexicographical_compare(x.begin(), x.end(), y.begin(), y.end(), ci()); 110 } 111 ciEqual(const std::string & x,const std::string & y)112 static bool ciEqual(const std::string &x, const std::string &y) { 113 if (x.size() != y.size()) { 114 return false; 115 } 116 std::string::const_iterator xit = x.begin(); 117 std::string::const_iterator yit = y.begin(); 118 for (; xit != x.end(); ++xit, ++yit) { 119 if (toLower(*xit) != toLower(*yit)) { 120 return false; 121 } 122 } 123 return true; 124 } 125 ciCompareLen(const std::string & x,const std::string & y,size_t len)126 static int ciCompareLen(const std::string &x, const std::string &y, size_t len) 127 { 128 std::string::const_iterator xit = x.begin(); 129 std::string::const_iterator yit = y.begin(); 130 for(;xit != x.end() && yit != y.end() && len > 0;++xit,++yit,--len) 131 { 132 char left = *xit; 133 char right = *yit; 134 if (left == right) 135 continue; 136 137 left = toLower(left); 138 right = toLower(right); 139 int res = left - right; 140 if(res != 0) 141 return (res > 0) ? 1 : -1; 142 } 143 if(len > 0) 144 { 145 if(xit != x.end()) 146 return 1; 147 if(yit != y.end()) 148 return -1; 149 } 150 return 0; 151 } 152 153 /// Transforms input string to lower case w/o copy lowerCaseInPlace(std::string & inout)154 static void lowerCaseInPlace(std::string &inout) { 155 for (unsigned int i=0; i<inout.size(); ++i) 156 inout[i] = toLower(inout[i]); 157 } 158 159 /// Returns lower case copy of input string lowerCase(const std::string & in)160 static std::string lowerCase(const std::string &in) 161 { 162 std::string out = in; 163 lowerCaseInPlace(out); 164 return out; 165 } 166 167 struct CiComp 168 { operator ()Misc::StringUtils::CiComp169 bool operator()(const std::string& left, const std::string& right) const 170 { 171 return ciLess(left, right); 172 } 173 }; 174 175 176 /// Performs a binary search on a sorted container for a string that 'key' starts with 177 template<typename Iterator, typename T> partialBinarySearch(Iterator begin,Iterator end,const T & key)178 static Iterator partialBinarySearch(Iterator begin, Iterator end, const T& key) 179 { 180 const Iterator notFound = end; 181 182 while(begin < end) 183 { 184 const Iterator middle = begin + (std::distance(begin, end) / 2); 185 186 int comp = Misc::StringUtils::ciCompareLen((*middle), key, (*middle).size()); 187 188 if(comp == 0) 189 return middle; 190 else if(comp > 0) 191 end = middle; 192 else 193 begin = middle + 1; 194 } 195 196 return notFound; 197 } 198 199 /** @brief Replaces all occurrences of a string in another string. 200 * 201 * @param str The string to operate on. 202 * @param what The string to replace. 203 * @param with The replacement string. 204 * @param whatLen The length of the string to replace. 205 * @param withLen The length of the replacement string. 206 * 207 * @return A reference to the string passed in @p str. 208 */ replaceAll(std::string & str,const char * what,const char * with,std::size_t whatLen=std::string::npos,std::size_t withLen=std::string::npos)209 static std::string &replaceAll(std::string &str, const char *what, const char *with, 210 std::size_t whatLen=std::string::npos, std::size_t withLen=std::string::npos) 211 { 212 if (whatLen == std::string::npos) 213 whatLen = strlen(what); 214 215 if (withLen == std::string::npos) 216 withLen = strlen(with); 217 218 std::size_t found; 219 std::size_t offset = 0; 220 while((found = str.find(what, offset, whatLen)) != std::string::npos) 221 { 222 str.replace(found, whatLen, with, withLen); 223 offset = found + withLen; 224 } 225 return str; 226 } 227 228 // Requires some C++11 features: 229 // 1. std::string needs to be contiguous 230 // 2. std::snprintf with zero size (second argument) returns an output string size 231 // 3. variadic templates support 232 template <typename ... Args> format(const char * fmt,Args const &...args)233 static std::string format(const char* fmt, Args const & ... args) 234 { 235 auto size = std::snprintf(nullptr, 0, fmt, argument(args) ...); 236 // Note: sprintf also writes a trailing null character. We should remove it. 237 std::string ret(size+1, '\0'); 238 std::sprintf(&ret[0], fmt, argument(args) ...); 239 ret.erase(size); 240 241 return ret; 242 } 243 244 template <typename ... Args> format(const std::string & fmt,Args const &...args)245 static std::string format(const std::string& fmt, Args const & ... args) 246 { 247 return format(fmt.c_str(), args ...); 248 } 249 trim(std::string & s)250 static inline void trim(std::string &s) 251 { 252 // left trim 253 s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int ch) 254 { 255 return !std::isspace(ch); 256 })); 257 258 // right trim 259 s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) 260 { 261 return !std::isspace(ch); 262 }).base(), s.end()); 263 } 264 265 template <class Container> split(const std::string & str,Container & cont,const std::string & delims=" ")266 static inline void split(const std::string& str, Container& cont, const std::string& delims = " ") 267 { 268 std::size_t current, previous = 0; 269 current = str.find_first_of(delims); 270 while (current != std::string::npos) 271 { 272 cont.push_back(str.substr(previous, current - previous)); 273 previous = current + 1; 274 current = str.find_first_of(delims, previous); 275 } 276 cont.push_back(str.substr(previous, current - previous)); 277 } 278 279 // TODO: use the std::string_view once we will use the C++17. 280 // It should allow us to avoid data copying while we still will support both string and literal arguments. 281 replaceAll(std::string & data,std::string toSearch,std::string replaceStr)282 static inline void replaceAll(std::string& data, std::string toSearch, std::string replaceStr) 283 { 284 size_t pos = data.find(toSearch); 285 286 while( pos != std::string::npos) 287 { 288 data.replace(pos, toSearch.size(), replaceStr); 289 pos = data.find(toSearch, pos + replaceStr.size()); 290 } 291 } 292 replaceLast(std::string & str,std::string substr,std::string with)293 static inline void replaceLast(std::string& str, std::string substr, std::string with) 294 { 295 size_t pos = str.rfind(substr); 296 if (pos == std::string::npos) 297 return; 298 299 str.replace(pos, substr.size(), with); 300 } 301 }; 302 303 } 304 305 #endif 306