1 /* 2 ============================================================================== 3 4 This file is part of the Water library. 5 Copyright (c) 2016 ROLI Ltd. 6 Copyright (C) 2017 Filipe Coelho <falktx@falktx.com> 7 8 Permission is granted to use this software under the terms of the ISC license 9 http://www.isc.org/downloads/software-support-policy/isc-license/ 10 11 Permission to use, copy, modify, and/or distribute this software for any 12 purpose with or without fee is hereby granted, provided that the above 13 copyright notice and this permission notice appear in all copies. 14 15 THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH REGARD 16 TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, 18 OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF 19 USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 20 TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 21 OF THIS SOFTWARE. 22 23 ============================================================================== 24 */ 25 26 #ifndef WATER_CHARACTERFUNCTIONS_H_INCLUDED 27 #define WATER_CHARACTERFUNCTIONS_H_INCLUDED 28 29 #include "../memory/Memory.h" 30 31 #include <algorithm> 32 #include <limits> 33 34 namespace water { 35 36 /** A platform-independent 32-bit unicode character type. */ 37 typedef uint32 water_uchar; 38 39 //============================================================================== 40 /** 41 A collection of functions for manipulating characters and character strings. 42 43 Most of these methods are designed for internal use by the String and CharPointer 44 classes, but some of them may be useful to call directly. 45 46 @see String, CharPointer_UTF8, CharPointer_UTF16, CharPointer_UTF32 47 */ 48 class CharacterFunctions 49 { 50 public: 51 //============================================================================== 52 /** Converts a character to upper-case. */ 53 static water_uchar toUpperCase (water_uchar character) noexcept; 54 /** Converts a character to lower-case. */ 55 static water_uchar toLowerCase (water_uchar character) noexcept; 56 57 /** Checks whether a unicode character is upper-case. */ 58 static bool isUpperCase (water_uchar character) noexcept; 59 /** Checks whether a unicode character is lower-case. */ 60 static bool isLowerCase (water_uchar character) noexcept; 61 62 /** Checks whether a character is whitespace. */ 63 static bool isWhitespace (char character) noexcept; 64 /** Checks whether a character is whitespace. */ 65 static bool isWhitespace (water_uchar character) noexcept; 66 67 /** Checks whether a character is a digit. */ 68 static bool isDigit (char character) noexcept; 69 /** Checks whether a character is a digit. */ 70 static bool isDigit (water_uchar character) noexcept; 71 72 /** Checks whether a character is alphabetic. */ 73 static bool isLetter (char character) noexcept; 74 /** Checks whether a character is alphabetic. */ 75 static bool isLetter (water_uchar character) noexcept; 76 77 /** Checks whether a character is alphabetic or numeric. */ 78 static bool isLetterOrDigit (char character) noexcept; 79 /** Checks whether a character is alphabetic or numeric. */ 80 static bool isLetterOrDigit (water_uchar character) noexcept; 81 82 /** Checks whether a character is a printable character, i.e. alphabetic, numeric, 83 a punctuation character or a space. 84 */ 85 static bool isPrintable (char character) noexcept; 86 87 /** Checks whether a character is a printable character, i.e. alphabetic, numeric, 88 a punctuation character or a space. 89 */ 90 static bool isPrintable (water_uchar character) noexcept; 91 92 /** Returns 0 to 16 for '0' to 'F", or -1 for characters that aren't a legal hex digit. */ 93 static int getHexDigitValue (water_uchar digit) noexcept; 94 95 /** Converts a byte of Windows 1252 codepage to unicode. */ 96 static water_uchar getUnicodeCharFromWindows1252Codepage (uint8 windows1252Char) noexcept; 97 98 //============================================================================== 99 /** Parses a character string to read a floating-point number. 100 Note that this will advance the pointer that is passed in, leaving it at 101 the end of the number. 102 */ 103 template <typename CharPointerType> readDoubleValue(CharPointerType & text)104 static double readDoubleValue (CharPointerType& text) noexcept 105 { 106 double result[3] = { 0 }, accumulator[2] = { 0 }; 107 int exponentAdjustment[2] = { 0 }, exponentAccumulator[2] = { -1, -1 }; 108 int exponent = 0, decPointIndex = 0, digit = 0; 109 int lastDigit = 0, numSignificantDigits = 0; 110 bool isNegative = false, digitsFound = false; 111 const int maxSignificantDigits = 15 + 2; 112 113 text = text.findEndOfWhitespace(); 114 water_uchar c = *text; 115 116 switch (c) 117 { 118 case '-': isNegative = true; // fall-through.. 119 case '+': c = *++text; 120 } 121 122 switch (c) 123 { 124 case 'n': 125 case 'N': 126 if ((text[1] == 'a' || text[1] == 'A') && (text[2] == 'n' || text[2] == 'N')) 127 return std::numeric_limits<double>::quiet_NaN(); 128 break; 129 130 case 'i': 131 case 'I': 132 if ((text[1] == 'n' || text[1] == 'N') && (text[2] == 'f' || text[2] == 'F')) 133 return std::numeric_limits<double>::infinity(); 134 break; 135 } 136 137 for (;;) 138 { 139 if (text.isDigit()) 140 { 141 lastDigit = digit; 142 digit = (int) text.getAndAdvance() - '0'; 143 digitsFound = true; 144 145 if (decPointIndex != 0) 146 exponentAdjustment[1]++; 147 148 if (numSignificantDigits == 0 && digit == 0) 149 continue; 150 151 if (++numSignificantDigits > maxSignificantDigits) 152 { 153 if (digit > 5) 154 ++accumulator [decPointIndex]; 155 else if (digit == 5 && (lastDigit & 1) != 0) 156 ++accumulator [decPointIndex]; 157 158 if (decPointIndex > 0) 159 exponentAdjustment[1]--; 160 else 161 exponentAdjustment[0]++; 162 163 while (text.isDigit()) 164 { 165 ++text; 166 if (decPointIndex == 0) 167 exponentAdjustment[0]++; 168 } 169 } 170 else 171 { 172 const double maxAccumulatorValue = (double) ((std::numeric_limits<unsigned int>::max() - 9) / 10); 173 if (accumulator [decPointIndex] > maxAccumulatorValue) 174 { 175 result [decPointIndex] = mulexp10 (result [decPointIndex], exponentAccumulator [decPointIndex]) 176 + accumulator [decPointIndex]; 177 accumulator [decPointIndex] = 0; 178 exponentAccumulator [decPointIndex] = 0; 179 } 180 181 accumulator [decPointIndex] = accumulator[decPointIndex] * 10 + digit; 182 exponentAccumulator [decPointIndex]++; 183 } 184 } 185 else if (decPointIndex == 0 && *text == '.') 186 { 187 ++text; 188 decPointIndex = 1; 189 190 if (numSignificantDigits > maxSignificantDigits) 191 { 192 while (text.isDigit()) 193 ++text; 194 break; 195 } 196 } 197 else 198 { 199 break; 200 } 201 } 202 203 result[0] = mulexp10 (result[0], exponentAccumulator[0]) + accumulator[0]; 204 205 if (decPointIndex != 0) 206 result[1] = mulexp10 (result[1], exponentAccumulator[1]) + accumulator[1]; 207 208 c = *text; 209 if ((c == 'e' || c == 'E') && digitsFound) 210 { 211 bool negativeExponent = false; 212 213 switch (*++text) 214 { 215 case '-': negativeExponent = true; // fall-through.. 216 case '+': ++text; 217 } 218 219 while (text.isDigit()) 220 exponent = (exponent * 10) + ((int) text.getAndAdvance() - '0'); 221 222 if (negativeExponent) 223 exponent = -exponent; 224 } 225 226 double r = mulexp10 (result[0], exponent + exponentAdjustment[0]); 227 if (decPointIndex != 0) 228 r += mulexp10 (result[1], exponent - exponentAdjustment[1]); 229 230 return isNegative ? -r : r; 231 } 232 233 /** Parses a character string, to read a floating-point value. */ 234 template <typename CharPointerType> getDoubleValue(CharPointerType text)235 static double getDoubleValue (CharPointerType text) noexcept 236 { 237 return readDoubleValue (text); 238 } 239 240 //============================================================================== 241 /** Parses a character string, to read an integer value. */ 242 template <typename IntType, typename CharPointerType> getIntValue(const CharPointerType text)243 static IntType getIntValue (const CharPointerType text) noexcept 244 { 245 IntType v = 0; 246 CharPointerType s (text.findEndOfWhitespace()); 247 248 const bool isNeg = *s == '-'; 249 if (isNeg) 250 ++s; 251 252 for (;;) 253 { 254 const water_uchar c = s.getAndAdvance(); 255 256 if (c >= '0' && c <= '9') 257 v = v * 10 + (IntType) (c - '0'); 258 else 259 break; 260 } 261 262 return isNeg ? -v : v; 263 } 264 265 template <typename ResultType> 266 struct HexParser 267 { 268 template <typename CharPointerType> parseHexParser269 static ResultType parse (CharPointerType t) noexcept 270 { 271 ResultType result = 0; 272 273 while (! t.isEmpty()) 274 { 275 const int hexValue = CharacterFunctions::getHexDigitValue (t.getAndAdvance()); 276 277 if (hexValue >= 0) 278 result = (result << 4) | hexValue; 279 } 280 281 return result; 282 } 283 }; 284 285 //============================================================================== 286 /** Counts the number of characters in a given string, stopping if the count exceeds 287 a specified limit. */ 288 template <typename CharPointerType> lengthUpTo(CharPointerType text,const size_t maxCharsToCount)289 static size_t lengthUpTo (CharPointerType text, const size_t maxCharsToCount) noexcept 290 { 291 size_t len = 0; 292 293 while (len < maxCharsToCount && text.getAndAdvance() != 0) 294 ++len; 295 296 return len; 297 } 298 299 /** Counts the number of characters in a given string, stopping if the count exceeds 300 a specified end-pointer. */ 301 template <typename CharPointerType> lengthUpTo(CharPointerType start,const CharPointerType end)302 static size_t lengthUpTo (CharPointerType start, const CharPointerType end) noexcept 303 { 304 size_t len = 0; 305 306 while (start < end && start.getAndAdvance() != 0) 307 ++len; 308 309 return len; 310 } 311 312 /** Copies null-terminated characters from one string to another. */ 313 template <typename DestCharPointerType, typename SrcCharPointerType> copyAll(DestCharPointerType & dest,SrcCharPointerType src)314 static void copyAll (DestCharPointerType& dest, SrcCharPointerType src) noexcept 315 { 316 while (water_uchar c = src.getAndAdvance()) 317 dest.write (c); 318 319 dest.writeNull(); 320 } 321 322 /** Copies characters from one string to another, up to a null terminator 323 or a given byte size limit. */ 324 template <typename DestCharPointerType, typename SrcCharPointerType> copyWithDestByteLimit(DestCharPointerType & dest,SrcCharPointerType src,size_t maxBytesToWrite)325 static size_t copyWithDestByteLimit (DestCharPointerType& dest, SrcCharPointerType src, size_t maxBytesToWrite) noexcept 326 { 327 typename DestCharPointerType::CharType const* const startAddress = dest.getAddress(); 328 ssize_t maxBytes = (ssize_t) maxBytesToWrite; 329 maxBytes -= sizeof (typename DestCharPointerType::CharType); // (allow for a terminating null) 330 331 for (;;) 332 { 333 const water_uchar c = src.getAndAdvance(); 334 const size_t bytesNeeded = DestCharPointerType::getBytesRequiredFor (c); 335 336 maxBytes -= bytesNeeded; 337 if (c == 0 || maxBytes < 0) 338 break; 339 340 dest.write (c); 341 } 342 343 dest.writeNull(); 344 345 return (size_t) getAddressDifference (dest.getAddress(), startAddress) 346 + sizeof (typename DestCharPointerType::CharType); 347 } 348 349 /** Copies characters from one string to another, up to a null terminator 350 or a given maximum number of characters. */ 351 template <typename DestCharPointerType, typename SrcCharPointerType> copyWithCharLimit(DestCharPointerType & dest,SrcCharPointerType src,int maxChars)352 static void copyWithCharLimit (DestCharPointerType& dest, SrcCharPointerType src, int maxChars) noexcept 353 { 354 while (--maxChars > 0) 355 { 356 const water_uchar c = src.getAndAdvance(); 357 if (c == 0) 358 break; 359 360 dest.write (c); 361 } 362 363 dest.writeNull(); 364 } 365 366 /** Compares two characters. */ compare(water_uchar char1,water_uchar char2)367 static inline int compare (water_uchar char1, water_uchar char2) noexcept 368 { 369 if (int diff = static_cast<int> (char1) - static_cast<int> (char2)) 370 return diff < 0 ? -1 : 1; 371 372 return 0; 373 } 374 375 /** Compares two null-terminated character strings. */ 376 template <typename CharPointerType1, typename CharPointerType2> compare(CharPointerType1 s1,CharPointerType2 s2)377 static int compare (CharPointerType1 s1, CharPointerType2 s2) noexcept 378 { 379 for (;;) 380 { 381 const water_uchar c1 = s1.getAndAdvance(); 382 383 if (int diff = compare (c1, s2.getAndAdvance())) 384 return diff; 385 386 if (c1 == 0) 387 break; 388 } 389 390 return 0; 391 } 392 393 /** Compares two null-terminated character strings, up to a given number of characters. */ 394 template <typename CharPointerType1, typename CharPointerType2> compareUpTo(CharPointerType1 s1,CharPointerType2 s2,int maxChars)395 static int compareUpTo (CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept 396 { 397 while (--maxChars >= 0) 398 { 399 const water_uchar c1 = s1.getAndAdvance(); 400 401 if (int diff = compare (c1, s2.getAndAdvance())) 402 return diff; 403 404 if (c1 == 0) 405 break; 406 } 407 408 return 0; 409 } 410 411 /** Compares two characters, using a case-independant match. */ compareIgnoreCase(water_uchar char1,water_uchar char2)412 static inline int compareIgnoreCase (water_uchar char1, water_uchar char2) noexcept 413 { 414 return char1 != char2 ? compare (toUpperCase (char1), toUpperCase (char2)) : 0; 415 } 416 417 /** Compares two null-terminated character strings, using a case-independant match. */ 418 template <typename CharPointerType1, typename CharPointerType2> compareIgnoreCase(CharPointerType1 s1,CharPointerType2 s2)419 static int compareIgnoreCase (CharPointerType1 s1, CharPointerType2 s2) noexcept 420 { 421 for (;;) 422 { 423 const water_uchar c1 = s1.getAndAdvance(); 424 425 if (int diff = compareIgnoreCase (c1, s2.getAndAdvance())) 426 return diff; 427 428 if (c1 == 0) 429 break; 430 } 431 432 return 0; 433 } 434 435 /** Compares two null-terminated character strings, using a case-independent match. */ 436 template <typename CharPointerType1, typename CharPointerType2> compareIgnoreCaseUpTo(CharPointerType1 s1,CharPointerType2 s2,int maxChars)437 static int compareIgnoreCaseUpTo (CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept 438 { 439 while (--maxChars >= 0) 440 { 441 const water_uchar c1 = s1.getAndAdvance(); 442 443 if (int diff = compareIgnoreCase (c1, s2.getAndAdvance())) 444 return diff; 445 446 if (c1 == 0) 447 break; 448 } 449 450 return 0; 451 } 452 453 /** Finds the character index of a given substring in another string. 454 Returns -1 if the substring is not found. 455 */ 456 template <typename CharPointerType1, typename CharPointerType2> indexOf(CharPointerType1 textToSearch,const CharPointerType2 substringToLookFor)457 static int indexOf (CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept 458 { 459 int index = 0; 460 const int substringLength = (int) substringToLookFor.length(); 461 462 for (;;) 463 { 464 if (textToSearch.compareUpTo (substringToLookFor, substringLength) == 0) 465 return index; 466 467 if (textToSearch.getAndAdvance() == 0) 468 return -1; 469 470 ++index; 471 } 472 } 473 474 /** Returns a pointer to the first occurrence of a substring in a string. 475 If the substring is not found, this will return a pointer to the string's 476 null terminator. 477 */ 478 template <typename CharPointerType1, typename CharPointerType2> find(CharPointerType1 textToSearch,const CharPointerType2 substringToLookFor)479 static CharPointerType1 find (CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept 480 { 481 const int substringLength = (int) substringToLookFor.length(); 482 483 while (textToSearch.compareUpTo (substringToLookFor, substringLength) != 0 484 && ! textToSearch.isEmpty()) 485 ++textToSearch; 486 487 return textToSearch; 488 } 489 490 /** Returns a pointer to the first occurrence of a substring in a string. 491 If the substring is not found, this will return a pointer to the string's 492 null terminator. 493 */ 494 template <typename CharPointerType> find(CharPointerType textToSearch,const water_uchar charToLookFor)495 static CharPointerType find (CharPointerType textToSearch, const water_uchar charToLookFor) noexcept 496 { 497 for (;; ++textToSearch) 498 { 499 const water_uchar c = *textToSearch; 500 501 if (c == charToLookFor || c == 0) 502 break; 503 } 504 505 return textToSearch; 506 } 507 508 /** Finds the character index of a given substring in another string, using 509 a case-independent match. 510 Returns -1 if the substring is not found. 511 */ 512 template <typename CharPointerType1, typename CharPointerType2> indexOfIgnoreCase(CharPointerType1 haystack,const CharPointerType2 needle)513 static int indexOfIgnoreCase (CharPointerType1 haystack, const CharPointerType2 needle) noexcept 514 { 515 int index = 0; 516 const int needleLength = (int) needle.length(); 517 518 for (;;) 519 { 520 if (haystack.compareIgnoreCaseUpTo (needle, needleLength) == 0) 521 return index; 522 523 if (haystack.getAndAdvance() == 0) 524 return -1; 525 526 ++index; 527 } 528 } 529 530 /** Finds the character index of a given character in another string. 531 Returns -1 if the character is not found. 532 */ 533 template <typename Type> indexOfChar(Type text,const water_uchar charToFind)534 static int indexOfChar (Type text, const water_uchar charToFind) noexcept 535 { 536 int i = 0; 537 538 while (! text.isEmpty()) 539 { 540 if (text.getAndAdvance() == charToFind) 541 return i; 542 543 ++i; 544 } 545 546 return -1; 547 } 548 549 /** Finds the character index of a given character in another string, using 550 a case-independent match. 551 Returns -1 if the character is not found. 552 */ 553 template <typename Type> indexOfCharIgnoreCase(Type text,water_uchar charToFind)554 static int indexOfCharIgnoreCase (Type text, water_uchar charToFind) noexcept 555 { 556 charToFind = CharacterFunctions::toLowerCase (charToFind); 557 int i = 0; 558 559 while (! text.isEmpty()) 560 { 561 if (text.toLowerCase() == charToFind) 562 return i; 563 564 ++text; 565 ++i; 566 } 567 568 return -1; 569 } 570 571 /** Returns a pointer to the first non-whitespace character in a string. 572 If the string contains only whitespace, this will return a pointer 573 to its null terminator. 574 */ 575 template <typename Type> findEndOfWhitespace(Type text)576 static Type findEndOfWhitespace (Type text) noexcept 577 { 578 while (text.isWhitespace()) 579 ++text; 580 581 return text; 582 } 583 584 /** Returns a pointer to the first character in the string which is found in 585 the breakCharacters string. 586 */ 587 template <typename Type, typename BreakType> findEndOfToken(Type text,const BreakType breakCharacters,const Type quoteCharacters)588 static Type findEndOfToken (Type text, const BreakType breakCharacters, const Type quoteCharacters) 589 { 590 water_uchar currentQuoteChar = 0; 591 592 while (! text.isEmpty()) 593 { 594 const water_uchar c = text.getAndAdvance(); 595 596 if (currentQuoteChar == 0 && breakCharacters.indexOf (c) >= 0) 597 { 598 --text; 599 break; 600 } 601 602 if (quoteCharacters.indexOf (c) >= 0) 603 { 604 if (currentQuoteChar == 0) 605 currentQuoteChar = c; 606 else if (currentQuoteChar == c) 607 currentQuoteChar = 0; 608 } 609 } 610 611 return text; 612 } 613 614 private: 615 static double mulexp10 (const double value, int exponent) noexcept; 616 }; 617 618 } 619 620 #endif // WATER_CHARACTERFUNCTIONS_H_INCLUDED 621