1 /*
2 * OpenClonk, http://www.openclonk.org
3 *
4 * Copyright (c) 2001-2009, RedWolf Design GmbH, http://www.clonk.de/
5 * Copyright (c) 2009-2016, The OpenClonk Team and contributors
6 *
7 * Distributed under the terms of the ISC license; see accompanying file
8 * "COPYING" for details.
9 *
10 * "Clonk" is a registered trademark of Matthes Bender, used with permission.
11 * See accompanying file "TRADEMARK" for details.
12 *
13 * To redistribute this file separately, substitute the full license texts
14 * for the above references.
15 */
16 #include "C4Include.h"
17 #include "lib/StdBuf.h"
18
19 #include "lib/StdCompiler.h"
20 #include "lib/StdAdaptors.h"
21
22 #ifdef _WIN32
23 #include "platform/C4windowswrapper.h"
24 #else
25 #define O_BINARY 0
26 #define O_SEQUENTIAL 0
27 #endif
28 #include <sys/stat.h>
29
30 // *** StdBuf
31
LoadFromFile(const char * szFile)32 bool StdBuf::LoadFromFile(const char *szFile)
33 {
34 // Open file
35 #ifdef _WIN32
36 int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
37 #else
38 int fh = open(szFile, O_BINARY | O_CLOEXEC | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
39 #endif
40 if (fh < 0) return false;
41 // Create buf
42 New(FileSize(fh));
43 // Read
44 if (read(fh, getMData(), getSize()) != (signed int) getSize())
45 {
46 close(fh);
47 return false;
48 }
49 close(fh);
50 // Ok
51 return true;
52 }
SaveToFile(const char * szFile) const53 bool StdBuf::SaveToFile(const char *szFile) const
54 {
55 // Open file
56 #ifdef _WIN32
57 int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
58 #else
59 int fh = open(szFile, O_BINARY | O_CLOEXEC | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
60 #endif
61 if (fh < 0) return false;
62 // Write data
63 if (write(fh, getData(), getSize()) != (signed int) getSize())
64 {
65 close(fh);
66 return false;
67 }
68 close(fh);
69 // Ok
70 return true;
71 }
72
LoadFromFile(const char * szFile)73 bool StdStrBuf::LoadFromFile(const char *szFile)
74 {
75 // Open file
76 #ifdef _WIN32
77 int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
78 #else
79 int fh = open(szFile, O_BINARY | O_CLOEXEC | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
80 #endif
81 if (fh < 0) return false;
82 // Create buf
83 SetLength(FileSize(fh));
84 // Read
85 if (read(fh, getMData(), getLength()) != (ssize_t) getLength())
86 {
87 close(fh);
88 return false;
89 }
90 close(fh);
91 // Ok
92 return true;
93 }
SaveToFile(const char * szFile) const94 bool StdStrBuf::SaveToFile(const char *szFile) const
95 {
96 // Open file
97 #ifdef _WIN32
98 int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
99 #else
100 int fh = open(szFile, O_BINARY | O_CLOEXEC | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
101 #endif
102 if (fh < 0) return false;
103 // Write data
104 if (write(fh, getData(), getLength()) != (ssize_t) getLength())
105 {
106 close(fh);
107 return false;
108 }
109 close(fh);
110 // Ok
111 return true;
112 }
113
CompileFunc(StdCompiler * pComp,int iType)114 void StdBuf::CompileFunc(StdCompiler *pComp, int iType)
115 {
116 // Size (guess it is a small value most of the time - if it's big, an extra byte won't hurt anyway)
117 uint32_t tmp = iSize; pComp->Value(mkIntPackAdapt(tmp)); iSize = tmp;
118 pComp->Separator(StdCompiler::SEP_PART2);
119 // Read/write data
120 if (pComp->isDeserializer())
121 {
122 New(iSize);
123 pComp->Raw(getMData(), iSize, StdCompiler::RawCompileType(iType));
124 }
125 else
126 {
127 pComp->Raw(const_cast<void *>(getData()), iSize, StdCompiler::RawCompileType(iType));
128 }
129 }
130
131 // *** StdStringBuf
132
133 #ifdef _WIN32
StdStrBuf(const wchar_t * utf16)134 StdStrBuf::StdStrBuf(const wchar_t * utf16)
135 {
136 int len = WideCharToMultiByte(CP_UTF8, 0, utf16, -1, nullptr, 0, nullptr, nullptr);
137 SetSize(len);
138 WideCharToMultiByte(CP_UTF8, 0, utf16, -1, getMData(), getSize(), nullptr, nullptr);
139 }
GetWideChar() const140 StdStrBuf::wchar_t_holder StdStrBuf::GetWideChar() const
141 {
142 if (!getSize()) return StdStrBuf::wchar_t_holder(nullptr);
143
144 int len = MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), nullptr, 0);
145 wchar_t * p = new wchar_t[len];
146 MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), p, len);
147 return StdStrBuf::wchar_t_holder(p);
148 }
GetWideCharBuf()149 StdBuf StdStrBuf::GetWideCharBuf()
150 {
151 int len = MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), nullptr, 0);
152 StdBuf r; r.SetSize(len * sizeof(wchar_t));
153 MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), getMBufPtr<wchar_t>(r), len);
154 return r;
155 }
GetWideChar(const char * utf8,bool double_null_terminate)156 StdStrBuf::wchar_t_holder GetWideChar(const char * utf8, bool double_null_terminate)
157 {
158 int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, nullptr, 0);
159 if (double_null_terminate) ++len;
160 wchar_t * p = new wchar_t[len];
161 MultiByteToWideChar(CP_UTF8, 0, utf8, -1, p, len);
162 if (double_null_terminate) p[len - 1] = wchar_t(0);
163 return StdStrBuf::wchar_t_holder(p);
164 }
GetWideCharBuf(const char * utf8)165 StdBuf GetWideCharBuf(const char * utf8)
166 {
167 int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, nullptr, 0);
168 StdBuf r; r.SetSize(len * sizeof(wchar_t));
169 MultiByteToWideChar(CP_UTF8, 0, utf8, -1, getMBufPtr<wchar_t>(r), len);
170 return r;
171 }
172 #endif
173
Format(const char * szFmt,...)174 void StdStrBuf::Format(const char *szFmt, ...)
175 {
176 // Create argument list
177 va_list args; va_start(args, szFmt);
178 // Format
179 FormatV(szFmt, args);
180 }
181
FormatV(const char * szFmt,va_list args)182 void StdStrBuf::FormatV(const char *szFmt, va_list args)
183 {
184 // Clear previous contents
185 Clear();
186 // Format
187 AppendFormatV(szFmt, args);
188 }
189
AppendFormat(const char * szFmt,...)190 void StdStrBuf::AppendFormat(const char *szFmt, ...)
191 {
192 // Create argument list
193 va_list args; va_start(args, szFmt);
194 // Format
195 AppendFormatV(szFmt, args);
196 }
197
AppendFormatV(const char * szFmt,va_list args)198 void StdStrBuf::AppendFormatV(const char *szFmt, va_list args)
199 {
200 #ifdef HAVE_VASPRINTF
201 // Format
202 char *pStr; int iBytes = vasprintf(&pStr, szFmt, args);
203 if (iBytes < 0 || !pStr) return;
204 // Append
205 if (isNull())
206 Take(pStr, iBytes);
207 else
208 {
209 Append(pStr, iBytes);
210 free(pStr);
211 }
212 #elif defined(HAVE_VSCPRINTF)
213 // Save append start
214 int iStart = getLength();
215 // Calculate size, allocate
216 int iLength = vscprintf(szFmt, args);
217 Grow(iLength);
218 // Format
219 char *pPos = getMElem<char>(*this, iSize - iLength - 1);
220 vsprintf(getMPtr(iStart), szFmt, args);
221 #else
222 // Save append start
223 int iStart = getLength(), iBytes;
224 do
225 {
226 // Grow
227 Grow(512);
228 // Try output
229 va_list args_copy;
230 #ifdef va_copy
231 va_copy(args_copy, args);
232 #else
233 args_copy = args;
234 #endif
235 iBytes = vsnprintf(getMPtr(iStart), getLength() - iStart, szFmt, args_copy);
236 #ifdef va_copy
237 va_end(args_copy);
238 #endif
239 }
240 while (iBytes < 0 || (unsigned int)(iBytes) >= getLength() - iStart);
241 // Calculate real length, if vsnprintf didn't return anything of value
242 iBytes = strlen(getMPtr(iStart));
243 // Shrink to fit
244 SetSize(iStart + iBytes + 1);
245 #endif
246 }
247
AppendBackslash()248 void StdStrBuf::AppendBackslash()
249 {
250 if(getLength() && *getPtr(getLength() - 1) == DirectorySeparator) return;
251 AppendChar(DirectorySeparator);
252 }
253
CompileFunc(StdCompiler * pComp,int iRawType)254 void StdStrBuf::CompileFunc(StdCompiler *pComp, int iRawType)
255 {
256 if (pComp->isDeserializer())
257 {
258 char *pnData;
259 pComp->String(&pnData, StdCompiler::RawCompileType(iRawType));
260 Take(pnData);
261 }
262 else
263 {
264 char *pData = const_cast<char *>(getData());
265 if (!pData) pData = const_cast<char *>("");
266 pComp->String(&pData, StdCompiler::RawCompileType(iRawType));
267 }
268 }
269
FormatString(const char * szFmt,...)270 StdStrBuf FormatString(const char *szFmt, ...)
271 {
272 va_list args; va_start(args, szFmt);
273 return FormatStringV(szFmt, args);
274 }
275
FormatStringV(const char * szFmt,va_list args)276 StdStrBuf FormatStringV(const char *szFmt, va_list args)
277 {
278 StdStrBuf Buf;
279 Buf.FormatV(szFmt, args);
280 return Buf;
281 }
282
283 // replace all occurences of one string with another. Return number of replacements.
Replace(const char * szOld,const char * szNew,size_t iStartSearch)284 int StdStrBuf::Replace(const char *szOld, const char *szNew, size_t iStartSearch)
285 {
286 if (!getPtr(0) || !szOld) return 0;
287 if (!szNew) szNew = "";
288 int cnt=0;
289 size_t iOldLen = strlen(szOld), iNewLen = strlen(szNew);
290 if (iOldLen != iNewLen)
291 {
292 // count number of occurences to calculate new string length
293 size_t iResultLen = getLength();
294 const char *szPos = getPtr(iStartSearch);
295 while ((szPos = SSearch(szPos, szOld)))
296 {
297 iResultLen += iNewLen - iOldLen;
298 ++cnt;
299 }
300 if (!cnt) return 0;
301 // now construct new string by replacement
302 StdStrBuf sResult;
303 sResult.New(iResultLen+1);
304 const char *szRPos = getPtr(0), *szRNextPos;
305 char *szWrite = sResult.getMPtr(0);
306 if (iStartSearch)
307 {
308 memcpy(szWrite, szRPos, iStartSearch * sizeof(char));
309 szRPos += iStartSearch;
310 szWrite += iStartSearch;
311 }
312 while ((szRNextPos = SSearch(szRPos, szOld)))
313 {
314 memcpy(szWrite, szRPos, (szRNextPos - szRPos - iOldLen) * sizeof(char));
315 szWrite += (szRNextPos - szRPos - iOldLen);
316 memcpy(szWrite, szNew, iNewLen * sizeof(char));
317 szWrite += iNewLen;
318 szRPos = szRNextPos;
319 }
320 strcpy(szWrite, szRPos);
321 Take(std::move(sResult));
322 }
323 else
324 {
325 // replace directly in this string
326 char *szRPos = getMPtr(iStartSearch);
327 while ((szRPos = const_cast<char *>(SSearch(szRPos, szOld))))
328 {
329 memcpy(szRPos - iOldLen, szNew, iOldLen * sizeof(char));
330 ++cnt;
331 }
332 }
333 return cnt;
334 }
335
ReplaceChar(char cOld,char cNew)336 int StdStrBuf::ReplaceChar(char cOld, char cNew)
337 {
338 if (isNull()) return 0;
339 char *szPos = getMPtr(0);
340 if (!cOld) return 0;
341 if (!cNew) cNew = '_';
342 int cnt=0;
343 while ((szPos = strchr(szPos, cOld)))
344 {
345 *szPos++ = cNew;
346 ++cnt;
347 }
348 return cnt;
349 }
350
ReplaceEnd(size_t iPos,const char * szNewEnd)351 void StdStrBuf::ReplaceEnd(size_t iPos, const char *szNewEnd)
352 {
353 size_t iLen = getLength();
354 assert(iPos <= iLen); if (iPos > iLen) return;
355 size_t iEndLen = strlen(szNewEnd);
356 if (iLen - iPos != iEndLen) SetLength(iPos + iEndLen);
357 memcpy(getMPtr(iPos), szNewEnd, iEndLen * sizeof(char));
358 }
359
ValidateChars(const char * szInitialChars,const char * szMidChars)360 bool StdStrBuf::ValidateChars(const char *szInitialChars, const char *szMidChars)
361 {
362 // only given chars may be in string
363 for (size_t i=0; i<getLength(); ++i)
364 if (!strchr(i ? szMidChars : szInitialChars, getData()[i]))
365 return false;
366 return true;
367 }
368
GetSection(size_t idx,StdStrBuf * psOutSection,char cSeparator) const369 bool StdStrBuf::GetSection(size_t idx, StdStrBuf *psOutSection, char cSeparator) const
370 {
371 assert(psOutSection);
372 psOutSection->Clear();
373 const char *szStr = getData(), *szSepPos;
374 if (!szStr) return false; // invaid argument
375 while ((szSepPos = strchr(szStr, cSeparator)) && idx) { szStr = szSepPos+1; --idx; }
376 if (idx) return false; // indexed section not found
377 // fill output buffer with section, if not empty
378 if (!szSepPos) szSepPos = getData() + getLength();
379 if (szSepPos != szStr) psOutSection->Copy(szStr, szSepPos - szStr);
380 // return true even if section is empty, because the section obviously exists
381 // (to enable loops like while (buf.GetSection(i++, §)) if (sect) ...)
382 return true;
383 }
384
ToLowerCase()385 void StdStrBuf::ToLowerCase()
386 {
387 if (!isNull())
388 for (char *szPos = getMPtr(0); *szPos; ++szPos)
389 *szPos = tolower(*szPos);
390 }
391
AppendCharacter(uint32_t unicodechar)392 void StdStrBuf::AppendCharacter(uint32_t unicodechar)
393 {
394 if (unicodechar < 0x80)
395 AppendChar(unicodechar);
396 else if (unicodechar < 0x800)
397 {
398 Grow(2);
399 *getMPtr(getLength() - 2) = (0xC0 | (unicodechar >> 6));
400 *getMPtr(getLength() - 1) = (0x80 | (unicodechar & 0x3F));
401 }
402 else if (unicodechar < 0x10000)
403 {
404 Grow(3);
405 *getMPtr(getLength() - 3) = (0xE0 | (unicodechar >> 12));
406 *getMPtr(getLength() - 2) = (0x80 | ((unicodechar >> 6) & 0x3F));
407 *getMPtr(getLength() - 1) = (0x80 | (unicodechar & 0x3F));
408 }
409 else if (unicodechar < 0x110000)
410 {
411 Grow(4);
412 *getMPtr(getLength() - 4) = (0xF0 | (unicodechar >> 18));
413 *getMPtr(getLength() - 3) = (0x80 | ((unicodechar >> 12) & 0x3F));
414 *getMPtr(getLength() - 2) = (0x80 | ((unicodechar >> 6) & 0x3F));
415 *getMPtr(getLength() - 1) = (0x80 | (unicodechar & 0x3F));
416 }
417 else /* not an unicode code point, ignore */ {}
418 }
419
420 // Returns true if charset was converted.
EnsureUnicode()421 bool StdStrBuf::EnsureUnicode()
422 {
423 // assume that it's windows-1252 and convert to utf-8
424 if (!IsValidUtf8(getData(), getLength()))
425 {
426 size_t j = 0;
427 StdStrBuf buf;
428 buf.Grow(getLength());
429 // totally unfounded statistic: most texts have less than 20 umlauts.
430 enum { GROWSIZE = 20 };
431 for (size_t i = 0; i < getSize(); ++i)
432 {
433 unsigned char c = *getPtr(i);
434 // ASCII
435 if (c < 0x80)
436 {
437 if (j >= buf.getLength())
438 buf.Grow(GROWSIZE);
439 *buf.getMPtr(j++) = c;
440 continue;
441 }
442 // Is c one of the control characters only in ISO/IEC_8859-1 or part of the common subset with windows-1252?
443 if (c == 0x81 || c == 0x8D || c == 0x8F || c == 0x90 || c == 0x9D || c >= 0xA0)
444 {
445 if (j + 1 >= buf.getLength())
446 buf.Grow(GROWSIZE);
447 *buf.getMPtr(j++) = (0xC0 | (c >> 6));
448 *buf.getMPtr(j++) = (0x80 | (c & 0x3F));
449 continue;
450 }
451 // Extra windows-1252-characters
452 buf.SetLength(j);
453 static const char * extra_chars [] =
454 {
455 //"€", 0, "‚", "ƒ", "„", "…", "†", "‡", "ˆ", "‰", "Š", "‹", "Œ", 0, "Ž", 0,
456 // 0, "‘", "’", "“", "”", "•", "–", "—", "˜", "™", "š", "›", "œ", 0, "ž", "Ÿ" };
457 "\xe2\x82\xac", nullptr, "\xe2\x80\x9a", "\xc6\x92", "\xe2\x80\x9e", "\xe2\x80\xa6", "\xe2\x80\xa0", "\xe2\x80\xa1", "\xcb\x86", "\xe2\x80\xb0", "\xc5\xa0", "\xe2\x80\xb9", "\xc5\x92", nullptr, "\xc5\xbd", nullptr,
458 nullptr, "\xe2\x80\x98", "\xe2\x80\x99", "\xe2\x80\x9c", "\xe2\x80\x9d", "\xe2\x80\xa2", "\xe2\x80\x93", "\xe2\x80\x94", "\xcb\x9c", "\xe2\x84\xa2", "\xc5\xa1", "\xe2\x80\xba", "\xc5\x93", nullptr, "\xc5\xbe", "\xc5\xb8"
459 };
460 buf.Append(extra_chars[c - 0x80]);
461 j += strlen(extra_chars[c - 0x80]);
462 }
463 buf.SetLength(j);
464 Take(std::move(buf));
465 return true;
466 }
467 return false;
468 }
469
TrimSpaces()470 bool StdStrBuf::TrimSpaces()
471 {
472 // get left trim
473 int32_t iSpaceLeftCount = 0, iLength = getLength();
474 if (!iLength) return false;
475 const char *szStr = getData();
476 while (iSpaceLeftCount < iLength)
477 if (isspace((unsigned char)(unsigned char) szStr[iSpaceLeftCount]))
478 ++iSpaceLeftCount;
479 else
480 break;
481 // only spaces? Clear!
482 if (iSpaceLeftCount == iLength)
483 {
484 Clear();
485 return true;
486 }
487 // get right trim
488 int32_t iSpaceRightCount = 0;
489 while (isspace((unsigned char)szStr[iLength - 1 - iSpaceRightCount])) ++iSpaceRightCount;
490 // anything to trim?
491 if (!iSpaceLeftCount && !iSpaceRightCount) return false;
492 // only right trim? Can do this by shortening
493 if (!iSpaceLeftCount)
494 {
495 SetLength(iLength - iSpaceRightCount);
496 return true;
497 }
498 // left trim involved - move text and shorten
499 memmove(getMPtr(0), szStr+iSpaceLeftCount, iLength - iSpaceLeftCount - iSpaceRightCount);
500 SetLength(iLength - iSpaceLeftCount - iSpaceRightCount);
501 return true;
502 }
503
504 #ifdef _WIN32
WStrToString(wchar_t * ws)505 std::string WStrToString(wchar_t *ws)
506 {
507 int len = WideCharToMultiByte(CP_UTF8, 0, ws, -1, nullptr, 0, nullptr, nullptr);
508 assert(len >= 0);
509 if (len <= 0) return std::string{};
510
511 std::string s(static_cast<size_t>(len), '\0');
512 s.resize(WideCharToMultiByte(CP_UTF8, 0, ws, -1, &s[0], s.size(), nullptr, nullptr) - 1);
513 return s;
514 }
515 #endif
516