1 /*
2  Copyright (C) 2010-2014 Kristian Duske
3 
4  This file is part of TrenchBroom.
5 
6  TrenchBroom is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  TrenchBroom is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with TrenchBroom. If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifndef TrenchBroom_StringUtils_h
21 #define TrenchBroom_StringUtils_h
22 
23 #include "Macros.h"
24 
25 #include <cassert>
26 #include <cstdarg>
27 #include <locale>
28 #include <map>
29 #include <set>
30 #include <sstream>
31 #include <string>
32 #include <vector>
33 
34 typedef std::string String;
35 typedef std::stringstream StringStream;
36 typedef std::set<String> StringSet;
37 typedef std::vector<String> StringList;
38 typedef std::map<String, String> StringMap;
39 
40 static const String EmptyString("");
41 static const StringList EmptyStringList(0);
42 
43 namespace StringUtils {
44     struct CaseSensitiveCharCompare {
45     public:
operatorCaseSensitiveCharCompare46         int operator()(const char& lhs, const char& rhs) const {
47             return lhs - rhs;
48         }
49     };
50 
51     struct CaseInsensitiveCharCompare {
52     private:
53         std::locale m_locale;
54     public:
55         CaseInsensitiveCharCompare(std::locale loc = std::locale::classic()) :
m_localeCaseInsensitiveCharCompare56         m_locale(loc) {}
57 
operatorCaseInsensitiveCharCompare58         int operator()(const char& lhs, const char& rhs) const {
59             return std::tolower(lhs, m_locale) - std::tolower(rhs, m_locale);
60         }
61     };
62 
63     template <typename Cmp>
64     struct CharEqual {
65     private:
66         Cmp m_compare;
67     public:
operatorCharEqual68         bool operator()(const char& lhs, const char& rhs) const {
69             return m_compare(lhs, rhs) == 0;
70         }
71     };
72 
73     template <typename Cmp>
74     struct CharLess {
75     private:
76         Cmp m_compare;
77     public:
operatorCharLess78         bool operator()(const char& lhs, const char& rhs) const {
79             return m_compare(lhs, rhs) < 0;
80         }
81     };
82 
83     template <typename Cmp>
84     struct StringEqual {
85     public:
operatorStringEqual86         bool operator()(const String& lhs, const String& rhs) const {
87             if (lhs.size() != rhs.size())
88                 return false;
89             return std::equal(lhs.begin(), lhs.end(), rhs.begin(), CharEqual<Cmp>());
90         }
91     };
92 
93     template <typename Cmp>
94     struct StringLess {
operatorStringLess95         bool operator()(const String& lhs, const String& rhs) const {
96             typedef String::iterator::difference_type StringDiff;
97 
98             String::const_iterator lhsEnd, rhsEnd;
99             const size_t minSize = std::min(lhs.size(), rhs.size());
100             StringDiff difference = static_cast<StringDiff>(minSize);
101 
102             std::advance(lhsEnd = lhs.begin(), difference);
103             std::advance(rhsEnd = rhs.begin(), difference);
104             return std::lexicographical_compare(lhs.begin(), lhsEnd, rhs.begin(), rhsEnd, CharLess<Cmp>());
105         }
106     };
107 
108     typedef StringLess<CaseSensitiveCharCompare> CaseSensitiveStringLess;
109     typedef StringLess<CaseSensitiveCharCompare> CaseInsensitiveStringLess;
110 
111     template <typename T>
safePlural(const T count,const String & singular,const String & plural)112     const String& safePlural(const T count, const String& singular, const String& plural) {
113         return count == 1 ? singular : plural;
114     }
115 
116     template <typename T>
117     String safePlural(const String& prefix, const T count, const String& singular, const String& plural, const String& suffix = "") {
118         return prefix + safePlural(count, singular, plural) + suffix;
119     }
120 
121     template <typename T, typename P>
ftos(const T v,const P precision)122     String ftos(const T v, const P precision) {
123         std::ostringstream strout;
124         strout.precision(static_cast<std::streamsize>(precision));
125         strout << std::fixed  << v;
126 
127         String str = strout.str() ;
128         size_t end = str.find_last_not_of('0');
129         if (str[end] == '.')
130             --end;
131         return str.erase(end + 1);
132     }
133 
134     String formatString(const char* format, ...);
135     String formatStringV(const char* format, va_list arguments);
136     String trim(const String& str, const String& chars = " \n\t\r");
137 
138     size_t findFirstDifference(const String& str1, const String& str2);
139     bool isNumberedPrefix(const String& str, const String& prefix);
140     bool isPrefix(const String& str, const String& prefix);
141     bool isNumber(const String& str);
142 
143     bool containsCaseSensitive(const String& haystack, const String& needle);
144     bool containsCaseInsensitive(const String& haystack, const String& needle);
145     void sortCaseSensitive(StringList& strs);
146     void sortCaseInsensitive(StringList& strs);
147 
148     template <typename Cmp>
isEqual(const String & str1,const String & str2,const Cmp & cmp)149     bool isEqual(const String& str1, const String& str2, const Cmp& cmp) {
150         if (str1.size() != str2.size())
151             return false;
152 
153         for (size_t i = 0; i < str1.length(); ++i) {
154             if (cmp(str1[i], str2[i]) != 0)
155                 return false;
156         }
157         return true;
158     }
159 
160     bool caseSensitiveEqual(const String& str1, const String& str2);
161     bool caseInsensitiveEqual(const String& str1, const String& str2);
162 
163     template <class Cmp>
isPrefix(const String & str,const String & prefix,const Cmp & cmp)164     bool isPrefix(const String& str, const String& prefix, const Cmp& cmp) {
165         if (prefix.length() > str.length())
166             return false;
167 
168         for (size_t i = 0; i < prefix.length(); ++i) {
169             if (cmp(str[i], prefix[i]) != 0)
170                 return false;
171         }
172         return true;
173     }
174 
175     bool caseSensitivePrefix(const String& str, const String& prefix);
176     bool caseInsensitivePrefix(const String& str, const String& prefix);
177 
178     template <class Cmp>
isSuffix(const String & str,const String & suffix,const Cmp & cmp)179     bool isSuffix(const String& str, const String& suffix, const Cmp& cmp) {
180         if (suffix.length() > str.length())
181             return false;
182 
183         const size_t n = str.length() - suffix.length();
184         for (size_t i = suffix.length(); i > 0; --i) {
185             if (cmp(str[n + i - 1], suffix[i - 1]) != 0)
186                 return false;
187         }
188         return true;
189     }
190 
191     bool caseSensitiveSuffix(const String& str, const String& suffix);
192     bool caseInsensitiveSuffix(const String& str, const String& suffix);
193 
194     bool isBlank(const String& str);
195 
196     bool matchesPattern(const String& str, const String& pattern);
197 
198     template <typename I>
matchesPattern(I strCur,I strEnd,I patCur,I patEnd)199     bool matchesPattern(I strCur, I strEnd, I patCur, I patEnd) {
200         if (strCur == strEnd && patCur == patEnd)
201             return true;
202 
203 		if (patCur == patEnd)
204 			return false;
205 
206         // Handle escaped characters in pattern.
207         if (*patCur == '\\' && (patCur + 1) != patEnd) {
208             if (strCur == strEnd)
209                 return false;
210 
211             if (*(patCur + 1) == '*' ||
212                 *(patCur + 1) == '?' ||
213                 *(patCur + 1) == '\\') {
214                 if (*strCur != *(patCur + 1))
215                     return false;
216                 return matchesPattern(strCur + 1, strEnd, patCur + 2, patEnd);
217             } else {
218                 return false; // Invalid escape sequence.
219             }
220         }
221 
222 		// If the pattern is a star and the string is consumed
223 		if (*patCur == '*' && strCur == strEnd)
224 			return true;
225 
226 		// If the pattern is a '?' and the string is consumed
227 		if (*patCur == '?' && strCur == strEnd)
228 			return false;
229 
230         // Make sure that the characters after '*' are present in the string.
231         // This assumes that the pattern will not contain two consecutive '*'
232         if (*patCur == '*' && (patCur + 1) != patEnd && strCur == strEnd)
233             return false;
234 
235         // If the pattern contains '?', or current characters of both strings match
236         if (*patCur == '?' || *patCur == *strCur)
237             return matchesPattern(strCur + 1, strEnd, patCur + 1, patEnd);
238 
239         // If there is * in the pattern, then there are two possibilities
240         // a) We consider the current character of the string
241         // b) We ignore the current character of the string.
242         if (*patCur == '*')
243             return (matchesPattern(strCur,     strEnd, patCur + 1, patEnd) ||
244                     matchesPattern(strCur + 1, strEnd, patCur,     patEnd));
245         return false;
246     }
247 
248     long makeHash(const String& str);
249     String toLower(const String& str);
250     String replaceChars(const String& str, const String& needles, const String& replacements);
251     String replaceAll(const String& str, const String& needle, const String& replacement);
252     String capitalize(const String& str);
253     String escape(const String& str, const String& chars);
254     String unescape(const String& str, const String& chars);
255 
256     int stringToInt(const String& str);
257     long stringToLong(const String& str);
258     size_t stringToSize(const String& str);
259 
260     template <typename D>
split(const String & str,D d)261     StringList split(const String& str, D d) {
262         if (str.empty())
263             return EmptyStringList;
264 
265         const size_t first = str.find_first_not_of(d);
266         if (first == String::npos)
267             return EmptyStringList;
268         const size_t last = str.find_last_not_of(d);
269         assert(last != String::npos);
270         assert(first <= last);
271 
272         StringList result;
273 
274         size_t lastPos = first;
275         size_t pos = lastPos;
276         while ((pos = str.find_first_of(d, pos)) < last) {
277             result.push_back(str.substr(lastPos, pos - lastPos));
278             lastPos = ++pos;
279         }
280         if (lastPos <= last)
281             result.push_back(str.substr(lastPos, last - lastPos + 1));
282         return result;
283     }
284 
285     template <typename D>
splitAndTrim(const String & str,D d)286     StringList splitAndTrim(const String& str, D d) {
287         if (str.empty())
288             return EmptyStringList;
289 
290         const size_t first = str.find_first_not_of(d);
291         if (first == String::npos)
292             return EmptyStringList;
293         const size_t last = str.find_last_not_of(d);
294         assert(last != String::npos);
295         assert(first <= last);
296 
297         StringList result;
298 
299         size_t lastPos = first;
300         size_t pos = lastPos;
301         while ((pos = str.find_first_of(d, pos)) < last) {
302             const String item = trim(str.substr(lastPos, pos - lastPos));
303             if (!item.empty())
304                 result.push_back(item);
305             lastPos = ++pos;
306         }
307         if (lastPos <= last) {
308             const String item = trim(str.substr(lastPos, last - lastPos + 1));
309             if (!item.empty())
310                 result.push_back(item);
311         }
312         return result;
313     }
314 
315     template <typename T, typename D1, typename D2, typename D3, typename S>
join(const std::vector<T> & objs,const D1 & delim,const D2 & lastDelim,const D3 & delimForTwo,const S & toString)316     String join(const std::vector<T>& objs, const D1& delim, const D2& lastDelim, const D3& delimForTwo, const S& toString) {
317         if (objs.empty())
318             return "";
319         if (objs.size() == 1)
320             return toString(objs[0]);
321 
322         StringStream result;
323         if (objs.size() == 2) {
324             result << toString(objs[0]) << delimForTwo << toString(objs[1]);
325             return result.str();
326         }
327 
328         result << toString(objs[0]);
329         for (size_t i = 1; i < objs.size() - 1; i++)
330             result << delim << toString(objs[i]);
331         result << lastDelim << toString(objs.back());
332         return result.str();
333     }
334 
335     template <typename T, typename D, typename S>
join(const std::vector<T> & objs,const D & delim,const S & toString)336     String join(const std::vector<T>& objs, const D& delim, const S& toString) {
337         return join(objs, delim, delim, delim, toString);
338     }
339 
340     struct StringToString {
operatorStringToString341         const String& operator()(const String& str) const {
342             return str;
343         }
344     };
345 
346     template <typename D1, typename D2, typename D3>
join(const StringList & objs,const D1 & delim,const D2 & lastDelim,const D3 & delimForTwo)347     String join(const StringList& objs, const D1& delim, const D2& lastDelim, const D3& delimForTwo) {
348         return join(objs, delim, lastDelim, delimForTwo, StringToString());
349     }
350 
351     template <typename D>
join(const StringList & strs,const D & d)352     String join(const StringList& strs, const D& d) {
353         return join(strs, d, d, d);
354     }
355 
356     template <typename Cmp>
357     class SimpleStringMatcher {
358     private:
359         typedef enum {
360             Mode_Exact,
361             Mode_Prefix,
362             Mode_Suffix
363         } Mode;
364 
365         Mode m_mode;
366         String m_pattern;
367     public:
SimpleStringMatcher(const String & pattern)368         SimpleStringMatcher(const String& pattern) {
369             assert(!pattern.empty());
370             if (pattern[0] == '*') {
371                 m_mode = Mode_Suffix;
372                 m_pattern = pattern.substr(1);
373             } else if (pattern.size() > 1 &&
374                        pattern[pattern.size() - 1] == '*' &&
375                        pattern[pattern.size() - 2] != '\\') {
376                 m_mode = Mode_Prefix;
377                 m_pattern = pattern.substr(0, pattern.size() - 1);
378             } else {
379                 m_mode = Mode_Exact;
380                 m_pattern = pattern;
381             }
382             m_pattern = StringUtils::replaceAll(m_pattern, "\\*", "*");
383             assert(!m_pattern.empty());
384         }
385 
matches(const String & str)386         bool matches(const String& str) const {
387             switch (m_mode) {
388                 case Mode_Exact:
389                     return isEqual(str, m_pattern, Cmp());
390                 case Mode_Prefix:
391                     return isPrefix(str, m_pattern, Cmp());
392                 case Mode_Suffix:
393                     return isSuffix(str, m_pattern, Cmp());
394                 switchDefault()
395             }
396         }
397     };
398 
399     typedef SimpleStringMatcher<CaseSensitiveCharCompare> CaseSensitiveStringMatcher;
400     typedef SimpleStringMatcher<CaseInsensitiveCharCompare> CaseInsensitiveStringMatcher;
401 }
402 
403 #endif
404