1 /*
2 * OpenClonk, http://www.openclonk.org
3 *
4 * Copyright (c) 1998-2000, Matthes Bender
5 * Copyright (c) 2001-2009, RedWolf Design GmbH, http://www.clonk.de/
6 * Copyright (c) 2009-2016, The OpenClonk Team and contributors
7 *
8 * Distributed under the terms of the ISC license; see accompanying file
9 * "COPYING" for details.
10 *
11 * "Clonk" is a registered trademark of Matthes Bender, used with permission.
12 * See accompanying file "TRADEMARK" for details.
13 *
14 * To redistribute this file separately, substitute the full license texts
15 * for the above references.
16 */
17
18 /* All kinds of valuable helpers */
19
20 #include "C4Include.h"
21 #include "lib/Standard.h"
22
23 //------------------------------------- Basics ----------------------------------------
24
Distance(int32_t iX1,int32_t iY1,int32_t iX2,int32_t iY2)25 int32_t Distance(int32_t iX1, int32_t iY1, int32_t iX2, int32_t iY2)
26 {
27 int64_t dx = int64_t(iX1)-iX2, dy = int64_t(iY1)-iY2;
28 int64_t d2 = dx*dx+dy*dy;
29 if (d2 < 0) return -1;
30 int32_t dist = int32_t(sqrt(double(d2)));
31 if (int64_t(dist)*dist < d2) ++dist;
32 if (int64_t(dist)*dist > d2) --dist;
33 return dist;
34 }
35
36 // Angle between points (iX1, iY1) and (iX2, iY2) with range [0, 360), angle = 0 means vertically upward and increasing angles in clockwise direction.
Angle(int32_t iX1,int32_t iY1,int32_t iX2,int32_t iY2,int32_t iPrec)37 int32_t Angle(int32_t iX1, int32_t iY1, int32_t iX2, int32_t iY2, int32_t iPrec)
38 {
39 int32_t iAngle;
40 int32_t dx = iX2 - iX1, dy = iY2 - iY1;
41 if (!dx)
42 {
43 if (dy > 0) return 180 * iPrec;
44 else return 0;
45 }
46 if (!dy)
47 {
48 if (dx > 0) return 90 * iPrec;
49 else return 270 * iPrec;
50 }
51
52 iAngle = static_cast<int32_t>(180.0 * iPrec * atan2(static_cast<double>(Abs(dy)), static_cast<double>(Abs(dx))) / M_PI);
53
54 if (iX2 > iX1)
55 {
56 if (iY2 < iY1) iAngle = (90 * iPrec) - iAngle;
57 else iAngle = (90 * iPrec) + iAngle;
58 }
59 else
60 {
61 if (iY2 < iY1) iAngle = (270 * iPrec) + iAngle;
62 else iAngle = (270 * iPrec) - iAngle;
63 }
64
65 return iAngle;
66 }
67
68 /* Fast integer exponentiation */
Pow(int base,int exponent)69 int Pow(int base, int exponent)
70 {
71 if (exponent < 0) return 0;
72
73 int result = 1;
74
75 if (exponent & 1) result = base;
76 exponent >>= 1;
77
78 while (exponent)
79 {
80 base *= base;
81 if (exponent & 1) result *= base;
82 exponent >>= 1;
83 }
84
85 return result;
86 }
87
88 //--------------------------------- Characters ------------------------------------------
89
IsIdentifier(char cChar)90 bool IsIdentifier(char cChar)
91 {
92 if (Inside(cChar,'A','Z')) return true;
93 if (Inside(cChar,'a','z')) return true;
94 if (Inside(cChar,'0','9')) return true;
95 if (cChar=='_') return true;
96 if (cChar=='~') return true;
97 if (cChar=='+') return true;
98 if (cChar=='-') return true;
99 return false;
100 }
101
IsNumber(char c,int base)102 static bool IsNumber(char c, int base)
103 {
104 return (c >= '0' && c <= '9' && c < ('0' + base)) ||
105 (c >= 'a' && c <= 'z' && c < ('a' + base - 10)) ||
106 (c >= 'A' && c <= 'Z' && c < ('A' + base - 10));
107 }
108
ToNumber(char c)109 static int ToNumber(char c)
110 {
111 if (c >= '0' && c <= '9') return c - '0';
112 if (c >= 'a' && c <= 'z') return 10 + c - 'a';
113 if (c >= 'A' && c <= 'Z') return 10 + c - 'A';
114 assert(false);
115 return 0;
116 }
117
118 //------------------------------- Strings ------------------------------------------------
119
StrToI32(const char * str,int base,const char ** scan_end)120 int32_t StrToI32(const char *str, int base, const char **scan_end)
121 {
122 const char *s = str;
123 int sign = 1;
124 int32_t result = 0;
125 if (*s == '-')
126 {
127 sign = -1;
128 s++;
129 }
130 else if (*s == '+')
131 {
132 s++;
133 }
134 if (!*s)
135 {
136 // Abort if there are no digits to parse
137 if (scan_end) *scan_end = str;
138 return 0;
139 }
140 while (IsNumber(*s,base))
141 {
142 int value = ToNumber(*s++);
143 assert (value < base && value >= 0);
144 result *= base;
145 result += value;
146 }
147 if (scan_end != nullptr) *scan_end = s;
148 result *= sign;
149 return result;
150 }
151
SCopy(const char * szSource,char * sTarget,size_t iMaxL)152 void SCopy(const char *szSource, char *sTarget, size_t iMaxL)
153 {
154 if (szSource == sTarget) return;
155 if (!sTarget) return; *sTarget=0; if (!szSource) return;
156 while (*szSource && (iMaxL>0))
157 { *sTarget=*szSource; iMaxL--; szSource++; sTarget++; }
158 *sTarget=0;
159 }
160
SCopy(const char * szSource,char * sTarget)161 void SCopy(const char *szSource, char *sTarget)
162 {
163 if (szSource == sTarget) return;
164 if (!sTarget) return; *sTarget=0; if (!szSource) return;
165 strcpy(sTarget,szSource);
166 }
167
SCopyUntil(const char * szSource,char * sTarget,char cUntil,int iMaxL,int iIndex)168 void SCopyUntil(const char *szSource, char *sTarget, char cUntil, int iMaxL, int iIndex)
169 {
170 if (szSource == sTarget) return;
171 if (!sTarget) return; *sTarget=0; if (!szSource) return;
172 while ( *szSource && ((*szSource!=cUntil) || (iIndex>0)) && (iMaxL!=0) )
173 { *sTarget=*szSource; if (*szSource==cUntil) iIndex--; szSource++; sTarget++; iMaxL--; }
174 *sTarget=0;
175 }
176
SCopyUntil(const char * szSource,char * sTarget,const char * sUntil,size_t iMaxL)177 void SCopyUntil(const char *szSource, char *sTarget, const char * sUntil, size_t iMaxL)
178 {
179 size_t n = std::min(strcspn(szSource, sUntil), iMaxL - 1);
180 strncpy(sTarget, szSource, n);
181 sTarget[n] = 0;
182 }
183
SEqualUntil(const char * szStr1,const char * szStr2,char cWild)184 bool SEqualUntil(const char *szStr1, const char *szStr2, char cWild)
185 {
186 if (!szStr1 || !szStr2) return false;
187 while (*szStr1 || *szStr2)
188 {
189 if ((*szStr1==cWild) || (*szStr2==cWild)) return true;
190 if (*szStr1!=*szStr2) return false;
191 szStr1++; szStr2++;
192 }
193 return true;
194 }
195
196 // Beginning of string 1 needs to match string 2.
197
SEqual2(const char * szStr1,const char * szStr2)198 bool SEqual2(const char *szStr1, const char *szStr2)
199 {
200 if (!szStr1 || !szStr2) return false;
201 while (*szStr1 && *szStr2)
202 if (*szStr1++ != *szStr2++) return false;
203 if (*szStr2) return false; // Str1 is shorter
204 return true;
205 }
206
SEqualNoCase(const char * szStr1,const char * szStr2,int iLen)207 bool SEqualNoCase(const char *szStr1, const char *szStr2, int iLen)
208 {
209 if (!szStr1 || !szStr2) return false;
210 if (iLen==0) return true;
211 while (*szStr1 && *szStr2)
212 {
213 if ( CharCapital(*szStr1++) != CharCapital(*szStr2++)) return false;
214 if (iLen>0) { iLen--; if (iLen==0) return true; }
215 }
216 if (*szStr1 || *szStr2) return false;
217 return true;
218 }
219
SEqual2NoCase(const char * szStr1,const char * szStr2,int iLen)220 bool SEqual2NoCase(const char *szStr1, const char *szStr2, int iLen)
221 {
222 if (!szStr1 || !szStr2) return false;
223 if (iLen==0) return true;
224 while (*szStr1 && *szStr2)
225 {
226 if ( CharCapital(*szStr1++) != CharCapital(*szStr2++)) return false;
227 if (iLen>0) { iLen--; if (iLen==0) return true; }
228 }
229 if (*szStr2) return false; // Str1 is shorter
230 return true;
231 }
232
SCharPos(char cTarget,const char * szInStr,int iIndex)233 int SCharPos(char cTarget, const char *szInStr, int iIndex)
234 {
235 const char *cpos;
236 int ccpos;
237 if (!szInStr) return -1;
238 for (cpos=szInStr,ccpos=0; *cpos; cpos++,ccpos++)
239 if (*cpos==cTarget)
240 {
241 if (iIndex==0) return ccpos;
242 else iIndex--;
243 }
244 return -1;
245 }
246
SCharLastPos(char cTarget,const char * szInStr)247 int SCharLastPos(char cTarget, const char *szInStr)
248 {
249 const char *cpos;
250 int ccpos,lcpos;
251 if (!szInStr) return -1;
252 for (cpos=szInStr,ccpos=0,lcpos=-1; *cpos; cpos++,ccpos++)
253 if (*cpos==cTarget) lcpos=ccpos;
254 return lcpos;
255 }
256
SAppend(const char * szSource,char * szTarget,int iMaxL)257 void SAppend(const char *szSource, char *szTarget, int iMaxL)
258 {
259 if (iMaxL == -1)
260 SCopy(szSource, szTarget + SLen(szTarget));
261 else
262 SCopy(szSource, szTarget + SLen(szTarget), iMaxL - SLen(szTarget));
263 }
264
SAppendChar(char cChar,char * szStr)265 void SAppendChar(char cChar, char *szStr)
266 {
267 if (!szStr) return;
268 char *cPos;
269 for (cPos=szStr; *cPos; cPos++) {}
270 *cPos=cChar; *(cPos+1)=0;
271 }
272
SCopySegment(const char * szString,int iSegment,char * sTarget,char cSeparator,int iMaxL,bool fSkipWhitespace)273 bool SCopySegment(const char *szString, int iSegment, char *sTarget,
274 char cSeparator, int iMaxL, bool fSkipWhitespace)
275 {
276 // Advance to indexed segment
277 while (iSegment>0)
278 {
279 if (SCharPos(cSeparator,szString) == -1)
280 { sTarget[0]=0; return false; }
281 szString += SCharPos(cSeparator,szString)+1;
282 iSegment--;
283 }
284 // Advance whitespace
285 if (fSkipWhitespace)
286 szString = SAdvanceSpace(szString);
287 // Copy segment contents
288 SCopyUntil(szString,sTarget,cSeparator,iMaxL);
289 return true;
290 }
291
SCopySegmentEx(const char * szString,int iSegment,char * sTarget,char cSep1,char cSep2,int iMaxL,bool fSkipWhitespace)292 bool SCopySegmentEx(const char *szString, int iSegment, char *sTarget,
293 char cSep1, char cSep2, int iMaxL, bool fSkipWhitespace)
294 {
295 // Advance to indexed segment
296 while (iSegment>0)
297 {
298 // use the separator that's closer
299 int iPos1 = SCharPos(cSep1,szString), iPos2 = SCharPos(cSep2,szString);
300 if (iPos1 == -1)
301 if (iPos2 == -1)
302 { sTarget[0]=0; return false; }
303 else
304 iPos1=iPos2;
305 else if (iPos2 != -1 && iPos2 < iPos1)
306 iPos1 = iPos2;
307 szString += iPos1+1;
308 iSegment--;
309 }
310 // Advance whitespace
311 if (fSkipWhitespace)
312 szString = SAdvanceSpace(szString);
313 // Copy segment contents; use separator that's closer
314 int iPos1 = SCharPos(cSep1,szString), iPos2 = SCharPos(cSep2,szString);
315 if (iPos2 != -1 && (iPos2 < iPos1 || iPos1 == -1)) cSep1 = cSep2;
316 SCopyUntil(szString,sTarget,cSep1,iMaxL);
317 return true;
318 }
319
SCharCount(char cTarget,const char * szInStr,const char * cpUntil)320 unsigned int SCharCount(char cTarget, const char *szInStr, const char *cpUntil)
321 {
322 unsigned int iResult=0;
323 // Scan string
324 while (*szInStr)
325 {
326 // End position reached (end character is not included)
327 if (szInStr==cpUntil) return iResult;
328 // Character found
329 if (*szInStr==cTarget) iResult++;
330 // Advance
331 szInStr++;
332 }
333 // Done
334 return iResult;
335 }
336
SCharCountEx(const char * szString,const char * szCharList)337 unsigned int SCharCountEx(const char *szString, const char *szCharList)
338 {
339 unsigned int iResult = 0;
340 while ( *szCharList )
341 {
342 iResult += SCharCount( *szCharList, szString );
343 szCharList++;
344 }
345 return iResult;
346 }
347
SReplaceChar(char * str,char fc,char tc)348 void SReplaceChar(char *str, char fc, char tc)
349 {
350 while (str && *str)
351 { if (*str==fc) *str=tc; str++; }
352 }
353
SCapitalize(char * str)354 void SCapitalize(char *str)
355 {
356 while (str && *str)
357 {
358 *str=CharCapital(*str);
359 str++;
360 }
361 }
362
SSearch(const char * szString,const char * szIndex)363 const char *SSearch(const char *szString, const char *szIndex)
364 {
365 const char *cscr;
366 size_t indexlen,match=0;
367 if (!szString || !szIndex) return nullptr;
368 indexlen=SLen(szIndex);
369 for (cscr=szString; cscr && *cscr; cscr++)
370 {
371 if (*cscr==szIndex[match]) match++;
372 else match=0;
373 if (match>=indexlen) return cscr+1;
374 }
375 return nullptr;
376 }
377
SSearchNoCase(const char * szString,const char * szIndex)378 const char *SSearchNoCase(const char *szString, const char *szIndex)
379 {
380 const char *cscr;
381 size_t indexlen,match=0;
382 if (!szString || !szIndex) return nullptr;
383 indexlen=SLen(szIndex);
384 for (cscr=szString; cscr && *cscr; cscr++)
385 {
386 if (CharCapital(*cscr)==CharCapital(szIndex[match])) match++;
387 else match=0;
388 if (match>=indexlen) return cscr+1;
389 }
390 return nullptr;
391 }
392
SWordWrap(char * szText,char cSpace,char cSepa,int iMaxLine)393 void SWordWrap(char *szText, char cSpace, char cSepa, int iMaxLine)
394 {
395 if (!szText) return;
396 // Scan string
397 char *cPos,*cpLastSpace=nullptr;
398 int iLineRun=0;
399 for (cPos=szText; *cPos; cPos++)
400 {
401 // Store last space
402 if (*cPos==cSpace) cpLastSpace=cPos;
403 // Separator encountered: reset line run
404 if (*cPos==cSepa) iLineRun=0;
405 // Need a break, insert at last space
406 if (iLineRun>=iMaxLine)
407 if (cpLastSpace)
408 { *cpLastSpace=cSepa; iLineRun=cPos - cpLastSpace; }
409 // Line run
410 iLineRun++;
411 }
412 }
413
SAdvanceSpace(const char * szSPos)414 const char *SAdvanceSpace(const char *szSPos)
415 {
416 if (!szSPos) return nullptr;
417 while (IsWhiteSpace(*szSPos)) szSPos++;
418 return szSPos;
419 }
420
SRewindSpace(const char * szSPos,const char * pBegin)421 const char *SRewindSpace(const char *szSPos, const char *pBegin)
422 {
423 if (!szSPos || !pBegin) return nullptr;
424 while (IsWhiteSpace(*szSPos))
425 {
426 szSPos--;
427 if (szSPos<pBegin) return nullptr;
428 }
429 return szSPos;
430 }
431
SAdvancePast(const char * szSPos,char cPast)432 const char *SAdvancePast(const char *szSPos, char cPast)
433 {
434 if (!szSPos) return nullptr;
435 while (*szSPos)
436 {
437 if (*szSPos==cPast) { szSPos++; break; }
438 szSPos++;
439 }
440 return szSPos;
441 }
442
SCopyIdentifier(const char * szSource,char * sTarget,int iMaxL)443 void SCopyIdentifier(const char *szSource, char *sTarget, int iMaxL)
444 {
445 if (!szSource || !sTarget) return;
446 while (IsIdentifier(*szSource))
447 {
448 if (iMaxL==1) { *sTarget++ = *szSource++; break; }
449 iMaxL--;
450 *sTarget++ = *szSource++;
451 }
452 *sTarget=0;
453 }
454
SClearFrontBack(char * szString,char cClear)455 int SClearFrontBack(char *szString, char cClear)
456 {
457 int cleared=0;
458 char *cpos;
459 if (!szString) return 0;
460 for (cpos=szString; *cpos && (*cpos==cClear); cpos++,cleared++) {}
461 // strcpy is undefined when used on overlapping strings...
462 if (cpos!=szString) memmove(szString, cpos, SLen(cpos) + 1);
463 for (cpos=szString+SLen(szString)-1; (cpos>szString) && (*cpos==cClear); cpos--,cleared++)
464 *cpos=0x00;
465 return cleared;
466 }
467
SNewSegment(char * szStr,const char * szSepa)468 void SNewSegment(char *szStr, const char *szSepa)
469 {
470 if (szStr[0]) SAppend(szSepa,szStr);
471 }
472
SGetLine(const char * szText,const char * cpPosition)473 int SGetLine(const char *szText, const char *cpPosition)
474 {
475 if (!szText || !cpPosition) return 0;
476 int iLines = 1;
477 while (*szText && (szText<cpPosition))
478 {
479 if (*szText == 0x0A) iLines++;
480 szText++;
481 }
482 return iLines;
483 }
484
SLineGetCharacters(const char * szText,const char * cpPosition)485 int SLineGetCharacters(const char *szText, const char *cpPosition)
486 {
487 if (!szText || !cpPosition) return 0;
488 int iChars = 1;
489 while (*szText && (szText<cpPosition))
490 {
491 if (*szText == 0x0A)
492 iChars = 1;
493 else if (*szText == '\t')
494 // assume a tab stop every 8 characters
495 iChars = ((iChars - 1 + 8) & ~7) + 1;
496 else
497 iChars++;
498 szText++;
499 }
500 return iChars;
501 }
502
SInsert(char * szString,const char * szInsert,int iPosition,int iMaxLen)503 void SInsert(char *szString, const char *szInsert, int iPosition, int iMaxLen)
504 {
505 // Safety
506 if (!szString || !szInsert || !szInsert[0]) return;
507 size_t insertlen = strlen(szInsert);
508 if (iMaxLen >= 0 && strlen(szString) + insertlen > (size_t) iMaxLen) return;
509 // Move up string remainder
510 memmove (szString + iPosition + insertlen, szString + iPosition, SLen(szString+ iPosition) + 1);
511 // Copy insertion
512 MemCopy( szInsert, szString+iPosition, SLen(szInsert) );
513 }
514
SDelete(char * szString,int iLen,int iPosition)515 void SDelete(char *szString, int iLen, int iPosition)
516 {
517 // Safety
518 if (!szString) return;
519 // Move down string remainder
520 MemCopy( szString+iPosition+iLen, szString+iPosition, SLen(szString+iPosition+iLen)+1 );
521 }
522
SCopyEnclosed(const char * szSource,char cOpen,char cClose,char * sTarget,int iSize)523 bool SCopyEnclosed(const char *szSource, char cOpen, char cClose, char *sTarget, int iSize)
524 {
525 int iPos,iLen;
526 if (!szSource || !sTarget) return false;
527 if ((iPos = SCharPos(cOpen,szSource)) < 0) return false;
528 if ((iLen = SCharPos(cClose,szSource+iPos+1)) < 0) return false;
529 SCopy(szSource+iPos+1,sTarget,std::min(iLen,iSize));
530 return true;
531 }
532
SGetModule(const char * szList,int iIndex,char * sTarget,int iSize)533 bool SGetModule(const char *szList, int iIndex, char *sTarget, int iSize)
534 {
535 if (!szList || !sTarget) return false;
536 if (!SCopySegment(szList,iIndex,sTarget,';',iSize)) return false;
537 SClearFrontBack(sTarget);
538 return true;
539 }
540
SIsModule(const char * szList,const char * szString,int * ipIndex,bool fCaseSensitive)541 bool SIsModule(const char *szList, const char *szString, int *ipIndex, bool fCaseSensitive)
542 {
543 char szModule[1024+1];
544 // Compare all modules
545 for (int iMod=0; SGetModule(szList,iMod,szModule,1024); iMod++)
546 if (fCaseSensitive ? SEqual(szString,szModule) : SEqualNoCase(szString,szModule))
547 {
548 // Provide index if desired
549 if (ipIndex) *ipIndex = iMod;
550 // Found
551 return true;
552 }
553 // Not found
554 return false;
555 }
556
SAddModule(char * szList,const char * szModule,bool fCaseSensitive)557 bool SAddModule(char *szList, const char *szModule, bool fCaseSensitive)
558 {
559 // Safety / no empties
560 if (!szList || !szModule || !szModule[0]) return false;
561 // Already a module?
562 if (SIsModule(szList,szModule,nullptr,fCaseSensitive)) return false;
563 // New segment, add string
564 SNewSegment(szList);
565 SAppend(szModule,szList);
566 // Success
567 return true;
568 }
569
SAddModules(char * szList,const char * szModules,bool fCaseSensitive)570 bool SAddModules(char *szList, const char *szModules, bool fCaseSensitive)
571 {
572 // Safety / no empties
573 if (!szList || !szModules || !szModules[0]) return false;
574 // Add modules
575 char szModule[1024+1]; // limited
576 for (int cnt=0; SGetModule(szModules,cnt,szModule,1024); cnt++)
577 SAddModule(szList,szModule,fCaseSensitive);
578 // Success
579 return true;
580 }
581
SRemoveModule(char * szList,const char * szModule,bool fCaseSensitive)582 bool SRemoveModule(char *szList, const char *szModule, bool fCaseSensitive)
583 {
584 int iMod,iPos,iLen;
585 // Not a module
586 if (!SIsModule(szList,szModule,&iMod,fCaseSensitive)) return false;
587 // Get module start
588 iPos = 0;
589 if (iMod > 0) iPos = SCharPos(';',szList,iMod-1)+1;
590 // Get module length
591 iLen = SCharPos(';',szList+iPos);
592 if (iLen<0) iLen=SLen(szList); else iLen+=1;
593 // Delete module
594 SDelete(szList,iLen,iPos);
595 // Success
596 return true;
597 }
598
SRemoveModules(char * szList,const char * szModules,bool fCaseSensitive)599 bool SRemoveModules(char *szList, const char *szModules, bool fCaseSensitive)
600 {
601 // Safety / no empties
602 if (!szList || !szModules || !szModules[0]) return false;
603 // Remove modules
604 char szModule[1024+1]; // limited
605 for (int cnt=0; SGetModule(szModules,cnt,szModule,1024); cnt++)
606 SRemoveModule(szList,szModule,fCaseSensitive);
607 // Success
608 return true;
609 }
610
SModuleCount(const char * szList)611 int SModuleCount(const char *szList)
612 {
613 if (!szList) return 0;
614 int iCount = 0;
615 bool fNewModule = true;
616 while (*szList)
617 {
618 switch (*szList)
619 {
620 case ' ': break;
621 case ';': fNewModule=true; break;
622 default: if (fNewModule) iCount++; fNewModule=false; break;
623 }
624 szList++;
625 }
626 return iCount;
627 }
628
SWildcardMatchEx(const char * szString,const char * szWildcard)629 bool SWildcardMatchEx(const char *szString, const char *szWildcard)
630 {
631 // safety
632 if (!szString || !szWildcard) return false;
633 // match char-wise
634 const char *pWild = szWildcard, *pPos = szString;
635 const char *pLWild = nullptr, *pLPos = nullptr; // backtracking
636 while (*pWild || pLWild)
637 // string wildcard?
638 if (*pWild == '*')
639 { pLWild = ++pWild; pLPos = pPos; }
640 // nothing left to match?
641 else if (!*pPos)
642 break;
643 // equal or one-character-wildcard? proceed
644 else if (*pWild == '?' || *pWild == *pPos)
645 { pWild++; pPos++; }
646 // backtrack possible?
647 else if (pLPos)
648 { pWild = pLWild; pPos = ++pLPos; }
649 // match failed
650 else
651 return false;
652 // match complete if both strings are fully matched
653 return !*pWild && !*pPos;
654 }
655
656 // UTF-8 conformance checking
657 namespace
658 {
659 static const int utf8_continuation_byte_table[256] =
660 {
661 // How many continuation bytes must follow a byte with this value?
662 // Negative values mean that this byte can never start a valid
663 // UTF-8 sequence.
664 // Note that while the encoding scheme allows more than three
665 // trailing bytes in principle, it is not actually allowed for UTF-8.
666 // Values 0xC0 and 0xC1 can never occur in UTF-8 because they
667 // would mark the beginning of an overlong encoding of characters
668 // below 0x80.
669 // Values 0xF5 to 0xFD are invalid because they can only be used
670 // to encode characters beyond the Unicode range.
671 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0b00000000..0b00001111, 0x00..0x0F
672 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0b00010000..0b00011111, 0x10..0x1F
673 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0b00100000..0b00101111, 0x20..0x2F
674 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0b00110000..0b00111111, 0x30..0x3F
675 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0b01000000..0b01001111, 0x40..0x4F
676 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0b01010000..0b01011111, 0x50..0x5F
677 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0b01100000..0b01101111, 0x60..0x6F
678 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0b01110000..0b01111111, 0x70..0x7F
679 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0b10000000..0b10001111, 0x80..0x8F
680 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0b10010000..0b10011111, 0x90..0x9F
681 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0b10100000..0b10101111, 0xA0..0xAF
682 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0b10110000..0b10111111, 0xB0..0xBF
683 -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0b11000000..0b11001111, 0xC0..0xCF
684 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0b11010000..0b11011111, 0xD0..0xDF
685 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0b11100000..0b11101111, 0xE0..0xEF
686 3, 3, 3, 3, 3, -3, -3, -3, -4, -4, -4, -4, -5, -5, -1, -1 // 0b11110000..0b11111111, 0xF0..0xFF
687 };
688 static const uint32_t utf8_min_char_value[4] =
689 {
690 // Which is the lowest character value that may be encoded
691 // using this many continuation bytes?
692 0, 0x80, 0x800, 0x10000
693 };
694 }
695
IsValidUtf8(const char * text,int length)696 bool IsValidUtf8(const char *text, int length)
697 {
698 // Intentionally using a C-style cast to always get a uint8_t* from char*;
699 // reinterpret_cast would fail here on platforms that have unsigned char,
700 // while static_cast would fail on platforms with a signed char type
701 const uint8_t *input = (const uint8_t*)(text);
702
703 for (const uint8_t *cursor = input; length < 0 ? *cursor != 0 : cursor - input < length; ++cursor)
704 {
705 int continuation_bytes = utf8_continuation_byte_table[*cursor];
706 if (continuation_bytes < 0)
707 return false;
708 else if (continuation_bytes == 0)
709 {
710 // Standard 7-bit ASCII value (i.e., 1 byte codepoint)
711 continue;
712 }
713 else if (length >= 0 && cursor - input + continuation_bytes >= length)
714 {
715 // Too few remaining bytes
716 return false;
717 }
718
719 // Compute character value, so we can detect overlong sequences
720 assert((*cursor & 0xC0) == 0xC0);
721 uint32_t value = *cursor;
722 // strip length bits off the start byte
723 value &= (0xFF >> (continuation_bytes + 1));
724 for (int byte = 0; byte < continuation_bytes; ++byte)
725 {
726 // check that this is actually a continuation byte
727 if ((cursor[byte + 1] & 0xC0) != 0x80)
728 return false;
729 // merge continuation byte into value
730 value <<= 6;
731 value |= cursor[byte + 1] & 0x3F;
732 }
733 // make sure this is not overlong
734 if (value < utf8_min_char_value[continuation_bytes])
735 return false;
736 // and also not beyond 0x10FFFF
737 if (value > 0x10FFFF)
738 return false;
739 // and also not a wrongly encoded UTF-16 surrogate half
740 if (value >= 0xD800 && value <= 0xDFFF)
741 return false;
742 cursor += continuation_bytes;
743 }
744 // Looks fine
745 return true;
746 }
747
748 // UTF-8 iteration
GetNextUTF8Character(const char ** pszString)749 uint32_t GetNextUTF8Character(const char **pszString)
750 {
751 // assume the current character is UTF8 already (i.e., highest bit set)
752 const uint32_t REPLACEMENT_CHARACTER = 0xFFFDu;
753 const char *szString = *pszString;
754 unsigned char c = *szString++;
755 uint32_t dwResult = REPLACEMENT_CHARACTER;
756 assert(c>127);
757 if (c>191 && c<224)
758 {
759 unsigned char c2 = *szString++;
760 if ((c2 & 192) != 128) { *pszString = szString; return REPLACEMENT_CHARACTER; }
761 dwResult = (int(c&31)<<6) | (c2&63); // two char code
762 }
763 else if (c >= 224 && c <= 239)
764 {
765 unsigned char c2 = *szString++;
766 if ((c2 & 192) != 128) { *pszString = szString; return REPLACEMENT_CHARACTER; }
767 unsigned char c3 = *szString++;
768 if ((c3 & 192) != 128) { *pszString = szString; return REPLACEMENT_CHARACTER; }
769 dwResult = (int(c&15)<<12) | (int(c2&63)<<6) | int(c3&63); // three char code
770 }
771 else if (c >= 240 && c <= 247)
772 {
773 unsigned char c2 = *szString++;
774 if ((c2 & 192) != 128) { *pszString = szString; return REPLACEMENT_CHARACTER; }
775 unsigned char c3 = *szString++;
776 if ((c3 & 192) != 128) { *pszString = szString; return REPLACEMENT_CHARACTER; }
777 unsigned char c4 = *szString++;
778 if ((c4 & 192) != 128) { *pszString = szString; return REPLACEMENT_CHARACTER; }
779 dwResult = (int(c&7)<<18) | (int(c2&63)<<12) | (int(c3&63)<<6) | int(c4&63); // four char code
780 }
781 *pszString = szString;
782 return dwResult;
783 }
784
GetCharacterCount(const char * s)785 int GetCharacterCount(const char * s)
786 {
787 int l = 0;
788 while (*s)
789 {
790 unsigned char c = *s;
791 if (c < 128 || c > 247)
792 {
793 ++l;
794 s += 1;
795 }
796 else if (c > 191 && c < 224)
797 {
798 ++l;
799 s += 2;
800 }
801 else if (c >= 224 && c <= 239)
802 {
803 ++l;
804 s += 3;
805 }
806 else if (c >= 240 && c <= 247)
807 {
808 ++l;
809 s += 4;
810 }
811 else assert(false);
812 }
813 return l;
814 }
815
vstrprintf(const char * format,va_list args)816 std::string vstrprintf(const char *format, va_list args)
817 {
818 va_list argcopy;
819 va_copy(argcopy, args);
820 int size = vsnprintf(nullptr, 0, format, argcopy);
821 if (size < 0)
822 throw std::invalid_argument("invalid argument to strprintf");
823 va_end(argcopy);
824 std::string s;
825 s.resize(size + 1);
826 size = vsnprintf(&s[0], s.size(), format, args);
827 assert(size >= 0);
828 s.resize(size);
829 return s;
830 }
831
strprintf(const char * format,...)832 std::string strprintf(const char *format, ...)
833 {
834 va_list args;
835 va_start(args, format);
836 std::string s = vstrprintf(format, args);
837 va_end(args);
838 return s;
839 }
840