1 /*
2  * Copyright (c) 2019, 2020, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 #include <stdio.h>
27 #include <stdarg.h>
28 #include <stdexcept>
29 #include <algorithm>
30 
31 #include "tstrings.h"
32 #include "ErrorHandling.h"
33 
34 
35 namespace tstrings {
36 
37 /* Create formatted string
38  */
unsafe_format(tstring::const_pointer format,...)39 tstring unsafe_format(tstring::const_pointer format, ...) {
40     if (!format) {
41         throw std::invalid_argument("Destination buffer can't be NULL");
42     }
43 
44     tstring fmtout;
45     int ret;
46     const int inc = 256;
47 
48     va_list args;
49     va_start(args, format);
50     do {
51         fmtout.resize(fmtout.size() + inc);
52 #ifdef _MSC_VER
53         ret = _vsntprintf_s(&*fmtout.begin(), fmtout.size(), _TRUNCATE, format, args);
54 #else
55 #if defined(__GNUC__) && __GNUC__ >= 5
56 #pragma GCC diagnostic push
57 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
58 #endif
59         // With g++ this compiles only with '-std=gnu++0x' option
60         ret = vsnprintf(&*fmtout.begin(), fmtout.size(), format, args);
61 #if defined(__GNUC__) && __GNUC__ >= 5
62 #pragma GCC diagnostic pop
63 #endif
64 #endif
65     } while(-1 == ret);
66     va_end(args);
67 
68     //update string size by actual value
69     fmtout.resize(ret);
70 
71     return fmtout;
72 }
73 
74 /*
75  * Tests if two strings are equal according to CompareType.
76  *
77  * a - string to compare
78  * b - string to compare
79  * ct - CASE_SENSITIVE: case sensitive comparing type
80  *      IGNORE_CASE: case insensitive comparing type
81  */
equals(const tstring & a,const tstring & b,const CompareType ct)82 bool equals(const tstring& a, const tstring& b, const CompareType ct) {
83     if (IGNORE_CASE==ct) {
84         return toLower(a) == toLower(b);
85     }
86     return a == b;
87 }
88 
startsWith(const tstring & str,const tstring & substr,const CompareType ct)89 bool startsWith(const tstring &str, const tstring &substr, const CompareType ct)
90 {
91     if (str.size() < substr.size()) {
92         return false;
93     }
94     const tstring startOfStr = str.substr(0, substr.size());
95     return tstrings::equals(startOfStr, substr, ct);
96 }
97 
endsWith(const tstring & str,const tstring & substr,const CompareType ct)98 bool endsWith(const tstring &str, const tstring &substr, const CompareType ct)
99 {
100     if (str.size() < substr.size()) {
101         return false;
102     }
103     const tstring endOfStr = str.substr(str.size() - substr.size());
104     return tstrings::equals(endOfStr, substr, ct);
105 }
106 
107 /*
108  * Split string into a vector with given delimiter string
109  *
110  * strVector - string vector to store split tstring
111  * str - string to split
112  * delimiter - delimiter to split the string around
113  * st - ST_ALL: return value includes an empty string
114  *      ST_EXCEPT_EMPTY_STRING: return value does not include an empty string
115  *
116  * Note: It does not support multiple delimiters
117  */
split(tstring_array & strVector,const tstring & str,const tstring & delimiter,const SplitType st)118 void split(tstring_array &strVector, const tstring &str,
119           const tstring &delimiter, const SplitType st) {
120     tstring::size_type start = 0, end = 0, length = str.length();
121 
122     if (length == 0 || delimiter.length() == 0) {
123         return;
124     }
125 
126     end = str.find(delimiter, start);
127     while(end != tstring::npos) {
128         if(st == ST_ALL || end - start > 1 ) {
129             strVector.push_back(str.substr(start, end == tstring::npos ?
130                                                   tstring::npos : end - start));
131         }
132         start = end > (tstring::npos - delimiter.size()) ?
133                 tstring::npos : end + delimiter.size();
134         end = str.find(delimiter, start);
135     }
136 
137     if(st == ST_ALL || start < length) {
138         strVector.push_back(str.substr(start, length - start));
139     }
140 }
141 
142 /*
143  * Convert uppercase letters to lowercase
144  */
toLower(const tstring & str)145 tstring toLower(const tstring& str) {
146     tstring lower(str);
147     tstring::iterator ok = std::transform(lower.begin(), lower.end(),
148                                           lower.begin(), tolower);
149     if (ok!=lower.end()) {
150         lower.resize(0);
151     }
152     return lower;
153 }
154 
155 
156 /*
157  * Replace all substring occurrences in a tstring.
158  * If 'str' or 'search' is empty the function returns 'str'.
159  * The given 'str' remains unchanged in any case.
160  * The function returns changed copy of 'str'.
161  */
replace(const tstring & str,const tstring & search,const tstring & replace)162 tstring replace(const tstring &str, const tstring &search, const tstring &replace)
163 {
164     if (search.empty()) {
165         return str;
166     }
167 
168     tstring s(str);
169 
170     for (size_t pos = 0; ; pos += replace.length()) {
171         pos = s.find(search, pos);
172         if (pos == tstring::npos) {
173             break;
174         }
175         s.erase(pos, search.length());
176         s.insert(pos, replace);
177     }
178     return s;
179 }
180 
181 
182 /*
183  * Remove trailing spaces
184  */
185 
trim(const tstring & str,const tstring & whitespace)186 tstring trim(const tstring& str, const tstring& whitespace) {
187     const size_t strBegin = str.find_first_not_of(whitespace);
188     if (strBegin == std::string::npos) {
189         return tstring(); // no content
190     }
191 
192     const size_t  strEnd = str.find_last_not_of(whitespace);
193     const size_t strRange = strEnd - strBegin + 1;
194 
195     return str.substr(strBegin, strRange);
196 }
197 
198 } // namespace tstrings
199 
200 
201 #ifdef TSTRINGS_WITH_WCHAR
202 namespace tstrings {
203 
204 namespace {
205 /*
206  * Converts UTF16-encoded string into multi-byte string of the given encoding.
207  */
toMultiByte(const std::wstring & utf16str,int encoding)208 std::string toMultiByte(const std::wstring& utf16str, int encoding) {
209     std::string reply;
210     int cm = WideCharToMultiByte(encoding,
211                                  0,
212                                  utf16str.c_str(),
213                                  int(utf16str.size()),
214                                  NULL,
215                                  0,
216                                  NULL,
217                                  NULL);
218     if (cm < 0) {
219         JP_THROW("Unexpected reply from WideCharToMultiByte()");
220     }
221     if (0 == cm) {
222         return reply;
223     }
224 
225     reply.resize(cm);
226     int cm2 = WideCharToMultiByte(encoding,
227                                   0,
228                                   utf16str.c_str(),
229                                   int(utf16str.size()),
230                                   &*reply.begin(),
231                                   cm,
232                                   NULL,
233                                   NULL);
234     if (cm != cm2) {
235         JP_THROW("Unexpected reply from WideCharToMultiByte()");
236     }
237 
238     return reply;
239 }
240 
241 /*
242  * Converts multi-byte string of the given encoding into UTF16-encoded string.
243  */
fromMultiByte(const std::string & str,int encoding)244 std::wstring fromMultiByte(const std::string& str, int encoding) {
245     std::wstring utf16;
246     int cw = MultiByteToWideChar(encoding,
247                                  MB_ERR_INVALID_CHARS,
248                                  str.c_str(),
249                                  int(str.size()),
250                                  NULL,
251                                  0);
252     if (cw < 0) {
253         JP_THROW("Unexpected reply from MultiByteToWideChar()");
254     }
255     if (0 == cw) {
256         return utf16;
257     }
258 
259     utf16.resize(cw);
260     int cw2 = MultiByteToWideChar(encoding,
261                                   MB_ERR_INVALID_CHARS,
262                                   str.c_str(),
263                                   int(str.size()),
264                                   &*utf16.begin(),
265                                   cw);
266     if (cw != cw2) {
267         JP_THROW("Unexpected reply from MultiByteToWideChar()");
268     }
269 
270     return utf16;
271 }
272 } // namespace
273 
toACP(const std::wstring & utf16str)274 std::string toACP(const std::wstring& utf16str) {
275     return toMultiByte(utf16str, CP_ACP);
276 }
277 
toUtf8(const std::wstring & utf16str)278 std::string toUtf8(const std::wstring& utf16str) {
279     return toMultiByte(utf16str, CP_UTF8);
280 }
281 
toUtf16(const std::string & utf8str)282 std::wstring toUtf16(const std::string& utf8str) {
283     return fromMultiByte(utf8str, CP_UTF8);
284 }
285 
286 // converts utf16-encoded string to Windows encoded string (WIDECHAR or ACP)
toWinString(const std::wstring & utf16)287 tstring toWinString(const std::wstring& utf16) {
288 #if defined(_UNICODE) || defined(UNICODE)
289     return utf16;
290 #else
291     return toMultiByte(utf16, CP_ACP);
292 #endif
293 }
294 
295 // converts utf8-encoded string to Windows encoded string (WIDECHAR or ACP)
toWinString(const std::string & utf8)296 tstring toWinString(const std::string& utf8) {
297     return toWinString(tstrings::toUtf16(utf8));
298 }
299 
300 
winStringToUtf8(const std::wstring & winStr)301 std::string winStringToUtf8(const std::wstring& winStr) {
302     return toUtf8(winStr);
303 }
304 
winStringToUtf8(const std::string & winStr)305 std::string winStringToUtf8(const std::string& winStr) {
306     return toUtf8(fromMultiByte(winStr, CP_ACP));
307 }
308 
winStringToUtf16(const std::wstring & winStr)309 std::wstring winStringToUtf16(const std::wstring& winStr) {
310     return winStr;
311 }
312 
winStringToUtf16(const std::string & winStr)313 std::wstring winStringToUtf16(const std::string& winStr) {
314     return fromMultiByte(winStr, CP_ACP);
315 }
316 
317 } // namespace tstrings
318 #endif // ifdef TSTRINGS_WITH_WCHAR
319