1 /*
2  * OpenClonk, http://www.openclonk.org
3  *
4  * Copyright (c) 2001-2009, RedWolf Design GmbH, http://www.clonk.de/
5  * Copyright (c) 2009-2016, The OpenClonk Team and contributors
6  *
7  * Distributed under the terms of the ISC license; see accompanying file
8  * "COPYING" for details.
9  *
10  * "Clonk" is a registered trademark of Matthes Bender, used with permission.
11  * See accompanying file "TRADEMARK" for details.
12  *
13  * To redistribute this file separately, substitute the full license texts
14  * for the above references.
15  */
16 #include "C4Include.h"
17 #include "lib/StdBuf.h"
18 
19 #include "lib/StdCompiler.h"
20 #include "lib/StdAdaptors.h"
21 
22 #ifdef _WIN32
23 #include "platform/C4windowswrapper.h"
24 #else
25 #define O_BINARY 0
26 #define O_SEQUENTIAL 0
27 #endif
28 #include <sys/stat.h>
29 
30 // *** StdBuf
31 
LoadFromFile(const char * szFile)32 bool StdBuf::LoadFromFile(const char *szFile)
33 {
34 	// Open file
35 #ifdef _WIN32
36 	int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
37 #else
38 	int fh = open(szFile, O_BINARY | O_CLOEXEC | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
39 #endif
40 	if (fh < 0) return false;
41 	// Create buf
42 	New(FileSize(fh));
43 	// Read
44 	if (read(fh, getMData(), getSize()) != (signed int) getSize())
45 	{
46 		close(fh);
47 		return false;
48 	}
49 	close(fh);
50 	// Ok
51 	return true;
52 }
SaveToFile(const char * szFile) const53 bool StdBuf::SaveToFile(const char *szFile) const
54 {
55 	// Open file
56 #ifdef _WIN32
57 	int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
58 #else
59 	int fh = open(szFile, O_BINARY | O_CLOEXEC | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
60 #endif
61 	if (fh < 0) return false;
62 	// Write data
63 	if (write(fh, getData(), getSize()) != (signed int) getSize())
64 	{
65 		close(fh);
66 		return false;
67 	}
68 	close(fh);
69 	// Ok
70 	return true;
71 }
72 
LoadFromFile(const char * szFile)73 bool StdStrBuf::LoadFromFile(const char *szFile)
74 {
75 	// Open file
76 #ifdef _WIN32
77 	int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
78 #else
79 	int fh = open(szFile, O_BINARY | O_CLOEXEC | O_RDONLY | O_SEQUENTIAL, S_IREAD | S_IWRITE);
80 #endif
81 	if (fh < 0) return false;
82 	// Create buf
83 	SetLength(FileSize(fh));
84 	// Read
85 	if (read(fh, getMData(), getLength()) != (ssize_t) getLength())
86 	{
87 		close(fh);
88 		return false;
89 	}
90 	close(fh);
91 	// Ok
92 	return true;
93 }
SaveToFile(const char * szFile) const94 bool StdStrBuf::SaveToFile(const char *szFile) const
95 {
96 	// Open file
97 #ifdef _WIN32
98 	int fh = _wopen(::GetWideChar(szFile), O_BINARY | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
99 #else
100 	int fh = open(szFile, O_BINARY | O_CLOEXEC | O_CREAT | O_WRONLY | O_SEQUENTIAL | O_TRUNC, S_IREAD | S_IWRITE);
101 #endif
102 	if (fh < 0) return false;
103 	// Write data
104 	if (write(fh, getData(), getLength()) != (ssize_t) getLength())
105 	{
106 		close(fh);
107 		return false;
108 	}
109 	close(fh);
110 	// Ok
111 	return true;
112 }
113 
CompileFunc(StdCompiler * pComp,int iType)114 void StdBuf::CompileFunc(StdCompiler *pComp, int iType)
115 {
116 	// Size (guess it is a small value most of the time - if it's big, an extra byte won't hurt anyway)
117 	uint32_t tmp = iSize; pComp->Value(mkIntPackAdapt(tmp)); iSize = tmp;
118 	pComp->Separator(StdCompiler::SEP_PART2);
119 	// Read/write data
120 	if (pComp->isDeserializer())
121 	{
122 		New(iSize);
123 		pComp->Raw(getMData(), iSize, StdCompiler::RawCompileType(iType));
124 	}
125 	else
126 	{
127 		pComp->Raw(const_cast<void *>(getData()), iSize, StdCompiler::RawCompileType(iType));
128 	}
129 }
130 
131 // *** StdStringBuf
132 
133 #ifdef _WIN32
StdStrBuf(const wchar_t * utf16)134 StdStrBuf::StdStrBuf(const wchar_t * utf16)
135 {
136 	int len = WideCharToMultiByte(CP_UTF8, 0, utf16, -1, nullptr, 0, nullptr, nullptr);
137 	SetSize(len);
138 	WideCharToMultiByte(CP_UTF8, 0, utf16, -1, getMData(), getSize(), nullptr, nullptr);
139 }
GetWideChar() const140 StdStrBuf::wchar_t_holder StdStrBuf::GetWideChar() const
141 {
142 	if (!getSize()) return StdStrBuf::wchar_t_holder(nullptr);
143 
144 	int len = MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), nullptr, 0);
145 	wchar_t * p = new wchar_t[len];
146 	MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), p, len);
147 	return StdStrBuf::wchar_t_holder(p);
148 }
GetWideCharBuf()149 StdBuf StdStrBuf::GetWideCharBuf()
150 {
151 	int len = MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), nullptr, 0);
152 	StdBuf r; r.SetSize(len * sizeof(wchar_t));
153 	MultiByteToWideChar(CP_UTF8, 0, getData(), getSize(), getMBufPtr<wchar_t>(r), len);
154 	return r;
155 }
GetWideChar(const char * utf8,bool double_null_terminate)156 StdStrBuf::wchar_t_holder GetWideChar(const char * utf8, bool double_null_terminate)
157 {
158 	int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, nullptr, 0);
159 	if (double_null_terminate) ++len;
160 	wchar_t * p = new wchar_t[len];
161 	MultiByteToWideChar(CP_UTF8, 0, utf8, -1, p, len);
162 	if (double_null_terminate) p[len - 1] = wchar_t(0);
163 	return StdStrBuf::wchar_t_holder(p);
164 }
GetWideCharBuf(const char * utf8)165 StdBuf GetWideCharBuf(const char * utf8)
166 {
167 	int len = MultiByteToWideChar(CP_UTF8, 0, utf8, -1, nullptr, 0);
168 	StdBuf r; r.SetSize(len * sizeof(wchar_t));
169 	MultiByteToWideChar(CP_UTF8, 0, utf8, -1, getMBufPtr<wchar_t>(r), len);
170 	return r;
171 }
172 #endif
173 
Format(const char * szFmt,...)174 void StdStrBuf::Format(const char *szFmt, ...)
175 {
176 	// Create argument list
177 	va_list args; va_start(args, szFmt);
178 	// Format
179 	FormatV(szFmt, args);
180 }
181 
FormatV(const char * szFmt,va_list args)182 void StdStrBuf::FormatV(const char *szFmt, va_list args)
183 {
184 	// Clear previous contents
185 	Clear();
186 	// Format
187 	AppendFormatV(szFmt, args);
188 }
189 
AppendFormat(const char * szFmt,...)190 void StdStrBuf::AppendFormat(const char *szFmt, ...)
191 {
192 	// Create argument list
193 	va_list args; va_start(args, szFmt);
194 	// Format
195 	AppendFormatV(szFmt, args);
196 }
197 
AppendFormatV(const char * szFmt,va_list args)198 void StdStrBuf::AppendFormatV(const char *szFmt, va_list args)
199 {
200 #ifdef HAVE_VASPRINTF
201 	// Format
202 	char *pStr; int iBytes = vasprintf(&pStr, szFmt, args);
203 	if (iBytes < 0 || !pStr) return;
204 	// Append
205 	if (isNull())
206 		Take(pStr, iBytes);
207 	else
208 	{
209 		Append(pStr, iBytes);
210 		free(pStr);
211 	}
212 #elif defined(HAVE_VSCPRINTF)
213 	// Save append start
214 	int iStart = getLength();
215 	// Calculate size, allocate
216 	int iLength = vscprintf(szFmt, args);
217 	Grow(iLength);
218 	// Format
219 	char *pPos = getMElem<char>(*this, iSize - iLength - 1);
220 	vsprintf(getMPtr(iStart), szFmt, args);
221 #else
222 	// Save append start
223 	int iStart = getLength(), iBytes;
224 	do
225 	{
226 		// Grow
227 		Grow(512);
228 		// Try output
229 		va_list args_copy;
230 		#ifdef va_copy
231 			va_copy(args_copy, args);
232 		#else
233 			args_copy = args;
234 		#endif
235 		iBytes = vsnprintf(getMPtr(iStart), getLength() - iStart, szFmt, args_copy);
236 		#ifdef va_copy
237 			va_end(args_copy);
238 		#endif
239 	}
240 	while (iBytes < 0 || (unsigned int)(iBytes) >= getLength() - iStart);
241 	// Calculate real length, if vsnprintf didn't return anything of value
242 	iBytes = strlen(getMPtr(iStart));
243 	// Shrink to fit
244 	SetSize(iStart + iBytes + 1);
245 #endif
246 }
247 
AppendBackslash()248 void StdStrBuf::AppendBackslash()
249 {
250 	if(getLength() && *getPtr(getLength() - 1) == DirectorySeparator) return;
251 	AppendChar(DirectorySeparator);
252 }
253 
CompileFunc(StdCompiler * pComp,int iRawType)254 void StdStrBuf::CompileFunc(StdCompiler *pComp, int iRawType)
255 {
256 	if (pComp->isDeserializer())
257 	{
258 		char *pnData;
259 		pComp->String(&pnData, StdCompiler::RawCompileType(iRawType));
260 		Take(pnData);
261 	}
262 	else
263 	{
264 		char *pData = const_cast<char *>(getData());
265 		if (!pData) pData = const_cast<char *>("");
266 		pComp->String(&pData, StdCompiler::RawCompileType(iRawType));
267 	}
268 }
269 
FormatString(const char * szFmt,...)270 StdStrBuf FormatString(const char *szFmt, ...)
271 {
272 	va_list args; va_start(args, szFmt);
273 	return FormatStringV(szFmt, args);
274 }
275 
FormatStringV(const char * szFmt,va_list args)276 StdStrBuf FormatStringV(const char *szFmt, va_list args)
277 {
278 	StdStrBuf Buf;
279 	Buf.FormatV(szFmt, args);
280 	return Buf;
281 }
282 
283 // replace all occurences of one string with another. Return number of replacements.
Replace(const char * szOld,const char * szNew,size_t iStartSearch)284 int StdStrBuf::Replace(const char *szOld, const char *szNew, size_t iStartSearch)
285 {
286 	if (!getPtr(0) || !szOld) return 0;
287 	if (!szNew) szNew = "";
288 	int cnt=0;
289 	size_t iOldLen = strlen(szOld), iNewLen = strlen(szNew);
290 	if (iOldLen != iNewLen)
291 	{
292 		// count number of occurences to calculate new string length
293 		size_t iResultLen = getLength();
294 		const char *szPos = getPtr(iStartSearch);
295 		while ((szPos = SSearch(szPos, szOld)))
296 		{
297 			iResultLen += iNewLen - iOldLen;
298 			++cnt;
299 		}
300 		if (!cnt) return 0;
301 		// now construct new string by replacement
302 		StdStrBuf sResult;
303 		sResult.New(iResultLen+1);
304 		const char *szRPos = getPtr(0), *szRNextPos;
305 		char *szWrite = sResult.getMPtr(0);
306 		if (iStartSearch)
307 		{
308 			memcpy(szWrite, szRPos, iStartSearch * sizeof(char));
309 			szRPos += iStartSearch;
310 			szWrite += iStartSearch;
311 		}
312 		while ((szRNextPos = SSearch(szRPos, szOld)))
313 		{
314 			memcpy(szWrite, szRPos, (szRNextPos - szRPos - iOldLen) * sizeof(char));
315 			szWrite += (szRNextPos - szRPos - iOldLen);
316 			memcpy(szWrite, szNew, iNewLen * sizeof(char));
317 			szWrite += iNewLen;
318 			szRPos = szRNextPos;
319 		}
320 		strcpy(szWrite, szRPos);
321 		Take(std::move(sResult));
322 	}
323 	else
324 	{
325 		// replace directly in this string
326 		char *szRPos = getMPtr(iStartSearch);
327 		while ((szRPos = const_cast<char *>(SSearch(szRPos, szOld))))
328 		{
329 			memcpy(szRPos - iOldLen, szNew, iOldLen * sizeof(char));
330 			++cnt;
331 		}
332 	}
333 	return cnt;
334 }
335 
ReplaceChar(char cOld,char cNew)336 int StdStrBuf::ReplaceChar(char cOld, char cNew)
337 {
338 	if (isNull()) return 0;
339 	char *szPos = getMPtr(0);
340 	if (!cOld) return 0;
341 	if (!cNew) cNew = '_';
342 	int cnt=0;
343 	while ((szPos = strchr(szPos, cOld)))
344 	{
345 		*szPos++ = cNew;
346 		++cnt;
347 	}
348 	return cnt;
349 }
350 
ReplaceEnd(size_t iPos,const char * szNewEnd)351 void StdStrBuf::ReplaceEnd(size_t iPos, const char *szNewEnd)
352 {
353 	size_t iLen = getLength();
354 	assert(iPos <= iLen); if (iPos > iLen) return;
355 	size_t iEndLen = strlen(szNewEnd);
356 	if (iLen - iPos != iEndLen) SetLength(iPos + iEndLen);
357 	memcpy(getMPtr(iPos), szNewEnd, iEndLen * sizeof(char));
358 }
359 
ValidateChars(const char * szInitialChars,const char * szMidChars)360 bool StdStrBuf::ValidateChars(const char *szInitialChars, const char *szMidChars)
361 {
362 	// only given chars may be in string
363 	for (size_t i=0; i<getLength(); ++i)
364 		if (!strchr(i ? szMidChars : szInitialChars, getData()[i]))
365 			return false;
366 	return true;
367 }
368 
GetSection(size_t idx,StdStrBuf * psOutSection,char cSeparator) const369 bool StdStrBuf::GetSection(size_t idx, StdStrBuf *psOutSection, char cSeparator) const
370 {
371 	assert(psOutSection);
372 	psOutSection->Clear();
373 	const char *szStr = getData(), *szSepPos;
374 	if (!szStr) return false; // invaid argument
375 	while ((szSepPos = strchr(szStr, cSeparator)) && idx) { szStr = szSepPos+1; --idx; }
376 	if (idx) return false; // indexed section not found
377 	// fill output buffer with section, if not empty
378 	if (!szSepPos) szSepPos = getData() + getLength();
379 	if (szSepPos != szStr) psOutSection->Copy(szStr, szSepPos - szStr);
380 	// return true even if section is empty, because the section obviously exists
381 	// (to enable loops like while (buf.GetSection(i++, &sect)) if (sect) ...)
382 	return true;
383 }
384 
ToLowerCase()385 void StdStrBuf::ToLowerCase()
386 {
387 	if (!isNull())
388 		for (char *szPos = getMPtr(0); *szPos; ++szPos)
389 			*szPos = tolower(*szPos);
390 }
391 
AppendCharacter(uint32_t unicodechar)392 void StdStrBuf::AppendCharacter(uint32_t unicodechar)
393 {
394 	if (unicodechar < 0x80)
395 		AppendChar(unicodechar);
396 	else if (unicodechar < 0x800)
397 	{
398 		Grow(2);
399 		*getMPtr(getLength() - 2) = (0xC0 | (unicodechar >> 6));
400 		*getMPtr(getLength() - 1) = (0x80 | (unicodechar & 0x3F));
401 	}
402 	else if (unicodechar < 0x10000)
403 	{
404 		Grow(3);
405 		*getMPtr(getLength() - 3) = (0xE0 | (unicodechar >> 12));
406 		*getMPtr(getLength() - 2) = (0x80 | ((unicodechar >> 6) & 0x3F));
407 		*getMPtr(getLength() - 1) = (0x80 | (unicodechar & 0x3F));
408 	}
409 	else if (unicodechar < 0x110000)
410 	{
411 		Grow(4);
412 		*getMPtr(getLength() - 4) = (0xF0 | (unicodechar >> 18));
413 		*getMPtr(getLength() - 3) = (0x80 | ((unicodechar >> 12) & 0x3F));
414 		*getMPtr(getLength() - 2) = (0x80 | ((unicodechar >> 6) & 0x3F));
415 		*getMPtr(getLength() - 1) = (0x80 | (unicodechar & 0x3F));
416 	}
417 	else /* not an unicode code point, ignore */ {}
418 }
419 
420 // Returns true if charset was converted.
EnsureUnicode()421 bool StdStrBuf::EnsureUnicode()
422 {
423 	// assume that it's windows-1252 and convert to utf-8
424 	if (!IsValidUtf8(getData(), getLength()))
425 	{
426 		size_t j = 0;
427 		StdStrBuf buf;
428 		buf.Grow(getLength());
429 		// totally unfounded statistic: most texts have less than 20 umlauts.
430 		enum { GROWSIZE = 20 };
431 		for (size_t i = 0; i < getSize(); ++i)
432 		{
433 			unsigned char c = *getPtr(i);
434 			// ASCII
435 			if (c < 0x80)
436 			{
437 				if (j >= buf.getLength())
438 					buf.Grow(GROWSIZE);
439 				*buf.getMPtr(j++) = c;
440 				continue;
441 			}
442 			// Is c one of the control characters only in ISO/IEC_8859-1 or part of the common subset with windows-1252?
443 			if (c == 0x81 || c == 0x8D || c == 0x8F || c == 0x90 || c == 0x9D || c >= 0xA0)
444 			{
445 				if (j + 1 >= buf.getLength())
446 					buf.Grow(GROWSIZE);
447 				*buf.getMPtr(j++) = (0xC0 | (c >> 6));
448 				*buf.getMPtr(j++) = (0x80 | (c & 0x3F));
449 				continue;
450 			}
451 			// Extra windows-1252-characters
452 			buf.SetLength(j);
453 			static const char * extra_chars [] =
454 			{
455 				//"€",   0, "‚", "ƒ", "„", "…", "†", "‡", "ˆ", "‰", "Š", "‹", "Œ",   0, "Ž",   0,
456 				//  0, "‘", "’", "“", "”", "•", "–", "—", "˜", "™", "š", "›", "œ",   0, "ž", "Ÿ" };
457 				"\xe2\x82\xac", nullptr, "\xe2\x80\x9a", "\xc6\x92", "\xe2\x80\x9e", "\xe2\x80\xa6", "\xe2\x80\xa0", "\xe2\x80\xa1", "\xcb\x86", "\xe2\x80\xb0", "\xc5\xa0", "\xe2\x80\xb9", "\xc5\x92", nullptr, "\xc5\xbd", nullptr,
458 				nullptr, "\xe2\x80\x98", "\xe2\x80\x99", "\xe2\x80\x9c", "\xe2\x80\x9d", "\xe2\x80\xa2", "\xe2\x80\x93", "\xe2\x80\x94", "\xcb\x9c", "\xe2\x84\xa2", "\xc5\xa1", "\xe2\x80\xba", "\xc5\x93",   nullptr, "\xc5\xbe", "\xc5\xb8"
459 			};
460 			buf.Append(extra_chars[c - 0x80]);
461 			j += strlen(extra_chars[c - 0x80]);
462 		}
463 		buf.SetLength(j);
464 		Take(std::move(buf));
465 		return true;
466 	}
467 	return false;
468 }
469 
TrimSpaces()470 bool StdStrBuf::TrimSpaces()
471 {
472 	// get left trim
473 	int32_t iSpaceLeftCount = 0, iLength = getLength();
474 	if (!iLength) return false;
475 	const char *szStr = getData();
476 	while (iSpaceLeftCount < iLength)
477 		if (isspace((unsigned char)(unsigned char) szStr[iSpaceLeftCount]))
478 			++iSpaceLeftCount;
479 		else
480 			break;
481 	// only spaces? Clear!
482 	if (iSpaceLeftCount == iLength)
483 	{
484 		Clear();
485 		return true;
486 	}
487 	// get right trim
488 	int32_t iSpaceRightCount = 0;
489 	while (isspace((unsigned char)szStr[iLength - 1 - iSpaceRightCount])) ++iSpaceRightCount;
490 	// anything to trim?
491 	if (!iSpaceLeftCount && !iSpaceRightCount) return false;
492 	// only right trim? Can do this by shortening
493 	if (!iSpaceLeftCount)
494 	{
495 		SetLength(iLength - iSpaceRightCount);
496 		return true;
497 	}
498 	// left trim involved - move text and shorten
499 	memmove(getMPtr(0), szStr+iSpaceLeftCount, iLength - iSpaceLeftCount - iSpaceRightCount);
500 	SetLength(iLength - iSpaceLeftCount - iSpaceRightCount);
501 	return true;
502 }
503 
504 #ifdef _WIN32
WStrToString(wchar_t * ws)505 std::string WStrToString(wchar_t *ws)
506 {
507 	int len = WideCharToMultiByte(CP_UTF8, 0, ws, -1, nullptr, 0, nullptr, nullptr);
508 	assert(len >= 0);
509 	if (len <= 0) return std::string{};
510 
511 	std::string s(static_cast<size_t>(len), '\0');
512 	s.resize(WideCharToMultiByte(CP_UTF8, 0, ws, -1, &s[0], s.size(), nullptr, nullptr) - 1);
513 	return s;
514 }
515 #endif
516