1 /* 2 ============================================================================== 3 4 This file is part of the Water library. 5 Copyright (c) 2016 ROLI Ltd. 6 Copyright (C) 2017-2018 Filipe Coelho <falktx@falktx.com> 7 8 Permission is granted to use this software under the terms of the ISC license 9 http://www.isc.org/downloads/software-support-policy/isc-license/ 10 11 Permission to use, copy, modify, and/or distribute this software for any 12 purpose with or without fee is hereby granted, provided that the above 13 copyright notice and this permission notice appear in all copies. 14 15 THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH REGARD 16 TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND 17 FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, 18 OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF 19 USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 20 TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 21 OF THIS SOFTWARE. 22 23 ============================================================================== 24 */ 25 26 #ifndef WATER_STRING_H_INCLUDED 27 #define WATER_STRING_H_INCLUDED 28 29 #include "CharPointer_UTF8.h" 30 #include "../memory/Memory.h" 31 32 #include <limits> 33 #include <string> 34 35 namespace water { 36 37 //============================================================================== 38 /** 39 The Water String class! 40 41 Using a reference-counted internal representation, these strings are fast 42 and efficient, and there are methods to do just about any operation you'll ever 43 dream of. 44 45 @see StringArray, StringPairArray 46 */ 47 class String 48 { 49 public: 50 //============================================================================== 51 /** Creates an empty string. 52 @see empty 53 */ 54 String() noexcept; 55 56 /** Creates a copy of another string. */ 57 String (const String& other) noexcept; 58 59 #if WATER_COMPILER_SUPPORTS_MOVE_SEMANTICS 60 String (String&& other) noexcept; 61 #endif 62 63 /** Creates a string from a zero-terminated ascii text string. 64 65 The string passed-in must not contain any characters with a value above 127, because 66 these can't be converted to unicode without knowing the original encoding that was 67 used to create the string. If you attempt to pass-in values above 127, you'll get an 68 assertion. 69 70 To create strings with extended characters from UTF-8, you should explicitly call 71 String (CharPointer_UTF8 ("my utf8 string..")). It's *highly* recommended that you 72 use UTF-8 with escape characters in your source code to represent extended characters, 73 because there's no other way to represent unicode strings in a way that isn't dependent 74 on the compiler, source code editor and platform. 75 */ 76 String (const char* text); 77 78 /** Creates a string from a string of 8-bit ascii characters. 79 80 The string passed-in must not contain any characters with a value above 127, because 81 these can't be converted to unicode without knowing the original encoding that was 82 used to create the string. If you attempt to pass-in values above 127, you'll get an 83 assertion. 84 85 To create strings with extended characters from UTF-8, you should explicitly call 86 String (CharPointer_UTF8 ("my utf8 string..")). It's *highly* recommended that you 87 use UTF-8 with escape characters in your source code to represent extended characters, 88 because there's no other way to represent unicode strings in a way that isn't dependent 89 on the compiler, source code editor and platform. 90 91 This will use up to the first maxChars characters of the string (or less if the string 92 is actually shorter). 93 */ 94 String (const char* text, size_t maxChars); 95 96 //============================================================================== 97 /** Creates a string from a UTF-8 character string */ 98 String (const CharPointer_UTF8 text); 99 100 /** Creates a string from a UTF-8 character string */ 101 String (const CharPointer_UTF8 text, size_t maxChars); 102 103 /** Creates a string from a UTF-8 character string */ 104 String (const CharPointer_UTF8 start, const CharPointer_UTF8 end); 105 106 //============================================================================== 107 /** Creates a string from a UTF-8 encoded std::string. */ 108 String (const std::string&); 109 110 /** Creates a string from a StringRef */ 111 String (StringRef); 112 113 //============================================================================== 114 /** Creates a string from a single character. */ 115 static String charToString (water_uchar character); 116 117 /** Destructor. */ 118 ~String() noexcept; 119 120 /** This is the character encoding type used internally to store the string. */ 121 typedef CharPointer_UTF8 CharPointerType; 122 123 //============================================================================== 124 /** Generates a probably-unique 32-bit hashcode from this string. */ 125 int hashCode() const noexcept; 126 127 /** Generates a probably-unique 64-bit hashcode from this string. */ 128 int64 hashCode64() const noexcept; 129 130 /** Generates a probably-unique hashcode from this string. */ 131 size_t hash() const noexcept; 132 133 /** Returns the number of characters in the string. */ 134 int length() const noexcept; 135 136 //============================================================================== 137 // Assignment and concatenation operators.. 138 139 /** Replaces this string's contents with another string. */ 140 String& operator= (const String& other) noexcept; 141 142 #if WATER_COMPILER_SUPPORTS_MOVE_SEMANTICS 143 String& operator= (String&& other) noexcept; 144 #endif 145 146 /** Appends another string at the end of this one. */ 147 String& operator+= (const String& stringToAppend); 148 /** Appends another string at the end of this one. */ 149 String& operator+= (const char* textToAppend); 150 /** Appends another string at the end of this one. */ 151 String& operator+= (StringRef textToAppend); 152 /** Appends a decimal number at the end of this string. */ 153 String& operator+= (int numberToAppend); 154 /** Appends a decimal number at the end of this string. */ 155 String& operator+= (long numberToAppend); 156 /** Appends a decimal number at the end of this string. */ 157 String& operator+= (int64 numberToAppend); 158 /** Appends a decimal number at the end of this string. */ 159 String& operator+= (uint64 numberToAppend); 160 /** Appends a character at the end of this string. */ 161 String& operator+= (char characterToAppend); 162 /** Appends a character at the end of this string. */ 163 String& operator+= (water_uchar characterToAppend); 164 165 /** Appends a string to the end of this one. 166 167 @param textToAppend the string to add 168 @param maxCharsToTake the maximum number of characters to take from the string passed in 169 */ 170 void append (const String& textToAppend, size_t maxCharsToTake); 171 172 /** Appends a string to the end of this one. 173 174 @param startOfTextToAppend the start of the string to add. This must not be a nullptr 175 @param endOfTextToAppend the end of the string to add. This must not be a nullptr 176 */ 177 void appendCharPointer (const CharPointerType startOfTextToAppend, 178 const CharPointerType endOfTextToAppend); 179 180 /** Appends a string to the end of this one. 181 182 @param startOfTextToAppend the start of the string to add. This must not be a nullptr 183 @param endOfTextToAppend the end of the string to add. This must not be a nullptr 184 */ 185 template <class CharPointer> appendCharPointer(const CharPointer startOfTextToAppend,const CharPointer endOfTextToAppend)186 void appendCharPointer (const CharPointer startOfTextToAppend, 187 const CharPointer endOfTextToAppend) 188 { 189 wassert (startOfTextToAppend.getAddress() != nullptr && endOfTextToAppend.getAddress() != nullptr); 190 191 size_t extraBytesNeeded = 0, numChars = 1; 192 193 for (CharPointer t (startOfTextToAppend); t != endOfTextToAppend && ! t.isEmpty(); ++numChars) 194 extraBytesNeeded += CharPointerType::getBytesRequiredFor (t.getAndAdvance()); 195 196 if (extraBytesNeeded > 0) 197 { 198 const size_t byteOffsetOfNull = getByteOffsetOfEnd(); 199 200 preallocateBytes (byteOffsetOfNull + extraBytesNeeded); 201 CharPointerType (addBytesToPointer (text.getAddress(), (int) byteOffsetOfNull)) 202 .writeWithCharLimit (startOfTextToAppend, (int) numChars); 203 } 204 } 205 206 /** Appends a string to the end of this one. */ 207 void appendCharPointer (const CharPointerType textToAppend); 208 209 /** Appends a string to the end of this one. 210 211 @param textToAppend the string to add 212 @param maxCharsToTake the maximum number of characters to take from the string passed in 213 */ 214 template <class CharPointer> appendCharPointer(const CharPointer textToAppend,size_t maxCharsToTake)215 void appendCharPointer (const CharPointer textToAppend, size_t maxCharsToTake) 216 { 217 if (textToAppend.getAddress() != nullptr) 218 { 219 size_t extraBytesNeeded = 0, numChars = 1; 220 221 for (CharPointer t (textToAppend); numChars <= maxCharsToTake && ! t.isEmpty(); ++numChars) 222 extraBytesNeeded += CharPointerType::getBytesRequiredFor (t.getAndAdvance()); 223 224 if (extraBytesNeeded > 0) 225 { 226 const size_t byteOffsetOfNull = getByteOffsetOfEnd(); 227 228 preallocateBytes (byteOffsetOfNull + extraBytesNeeded); 229 CharPointerType (addBytesToPointer (text.getAddress(), (int) byteOffsetOfNull)) 230 .writeWithCharLimit (textToAppend, (int) numChars); 231 } 232 } 233 } 234 235 /** Appends a string to the end of this one. */ 236 template <class CharPointer> appendCharPointer(const CharPointer textToAppend)237 void appendCharPointer (const CharPointer textToAppend) 238 { 239 appendCharPointer (textToAppend, std::numeric_limits<size_t>::max()); 240 } 241 242 //============================================================================== 243 // Comparison methods.. 244 245 /** Returns true if the string contains no characters. 246 Note that there's also an isNotEmpty() method to help write readable code. 247 @see containsNonWhitespaceChars() 248 */ isEmpty()249 inline bool isEmpty() const noexcept { return text.isEmpty(); } 250 251 /** Returns true if the string contains at least one character. 252 Note that there's also an isEmpty() method to help write readable code. 253 @see containsNonWhitespaceChars() 254 */ isNotEmpty()255 inline bool isNotEmpty() const noexcept { return ! text.isEmpty(); } 256 257 /** Resets this string to be empty. */ 258 void clear() noexcept; 259 260 /** Case-insensitive comparison with another string. */ 261 bool equalsIgnoreCase (const String& other) const noexcept; 262 263 /** Case-insensitive comparison with another string. */ 264 bool equalsIgnoreCase (StringRef other) const noexcept; 265 266 /** Case-insensitive comparison with another string. */ 267 bool equalsIgnoreCase (const char* other) const noexcept; 268 269 /** Case-sensitive comparison with another string. 270 @returns 0 if the two strings are identical; negative if this string comes before 271 the other one alphabetically, or positive if it comes after it. 272 */ 273 int compare (const String& other) const noexcept; 274 275 /** Case-sensitive comparison with another string. 276 @returns 0 if the two strings are identical; negative if this string comes before 277 the other one alphabetically, or positive if it comes after it. 278 */ 279 int compare (const char* other) const noexcept; 280 281 /** Case-insensitive comparison with another string. 282 @returns 0 if the two strings are identical; negative if this string comes before 283 the other one alphabetically, or positive if it comes after it. 284 */ 285 int compareIgnoreCase (const String& other) const noexcept; 286 287 /** Compares two strings, taking into account textual characteristics like numbers and spaces. 288 289 This comparison is case-insensitive and can detect words and embedded numbers in the 290 strings, making it good for sorting human-readable lists of things like filenames. 291 292 @returns 0 if the two strings are identical; negative if this string comes before 293 the other one alphabetically, or positive if it comes after it. 294 */ 295 int compareNatural (StringRef other, bool isCaseSensitive = false) const noexcept; 296 297 /** Tests whether the string begins with another string. 298 If the parameter is an empty string, this will always return true. 299 Uses a case-sensitive comparison. 300 */ 301 bool startsWith (StringRef text) const noexcept; 302 303 /** Tests whether the string begins with a particular character. 304 If the character is 0, this will always return false. 305 Uses a case-sensitive comparison. 306 */ 307 bool startsWithChar (water_uchar character) const noexcept; 308 309 /** Tests whether the string begins with another string. 310 If the parameter is an empty string, this will always return true. 311 Uses a case-insensitive comparison. 312 */ 313 bool startsWithIgnoreCase (StringRef text) const noexcept; 314 315 /** Tests whether the string ends with another string. 316 If the parameter is an empty string, this will always return true. 317 Uses a case-sensitive comparison. 318 */ 319 bool endsWith (StringRef text) const noexcept; 320 321 /** Tests whether the string ends with a particular character. 322 If the character is 0, this will always return false. 323 Uses a case-sensitive comparison. 324 */ 325 bool endsWithChar (water_uchar character) const noexcept; 326 327 /** Tests whether the string ends with another string. 328 If the parameter is an empty string, this will always return true. 329 Uses a case-insensitive comparison. 330 */ 331 bool endsWithIgnoreCase (StringRef text) const noexcept; 332 333 /** Tests whether the string contains another substring. 334 If the parameter is an empty string, this will always return true. 335 Uses a case-sensitive comparison. 336 */ 337 bool contains (StringRef text) const noexcept; 338 339 /** Tests whether the string contains a particular character. 340 Uses a case-sensitive comparison. 341 */ 342 bool containsChar (water_uchar character) const noexcept; 343 344 /** Tests whether the string contains another substring. 345 Uses a case-insensitive comparison. 346 */ 347 bool containsIgnoreCase (StringRef text) const noexcept; 348 349 /** Tests whether the string contains another substring as a distinct word. 350 351 @returns true if the string contains this word, surrounded by 352 non-alphanumeric characters 353 @see indexOfWholeWord, containsWholeWordIgnoreCase 354 */ 355 bool containsWholeWord (StringRef wordToLookFor) const noexcept; 356 357 /** Tests whether the string contains another substring as a distinct word. 358 359 @returns true if the string contains this word, surrounded by 360 non-alphanumeric characters 361 @see indexOfWholeWordIgnoreCase, containsWholeWord 362 */ 363 bool containsWholeWordIgnoreCase (StringRef wordToLookFor) const noexcept; 364 365 /** Finds an instance of another substring if it exists as a distinct word. 366 367 @returns if the string contains this word, surrounded by non-alphanumeric characters, 368 then this will return the index of the start of the substring. If it isn't 369 found, then it will return -1 370 @see indexOfWholeWordIgnoreCase, containsWholeWord 371 */ 372 int indexOfWholeWord (StringRef wordToLookFor) const noexcept; 373 374 /** Finds an instance of another substring if it exists as a distinct word. 375 376 @returns if the string contains this word, surrounded by non-alphanumeric characters, 377 then this will return the index of the start of the substring. If it isn't 378 found, then it will return -1 379 @see indexOfWholeWord, containsWholeWordIgnoreCase 380 */ 381 int indexOfWholeWordIgnoreCase (StringRef wordToLookFor) const noexcept; 382 383 /** Looks for any of a set of characters in the string. 384 Uses a case-sensitive comparison. 385 386 @returns true if the string contains any of the characters from 387 the string that is passed in. 388 */ 389 bool containsAnyOf (StringRef charactersItMightContain) const noexcept; 390 391 /** Looks for a set of characters in the string. 392 Uses a case-sensitive comparison. 393 394 @returns Returns false if any of the characters in this string do not occur in 395 the parameter string. If this string is empty, the return value will 396 always be true. 397 */ 398 bool containsOnly (StringRef charactersItMightContain) const noexcept; 399 400 /** Returns true if this string contains any non-whitespace characters. 401 402 This will return false if the string contains only whitespace characters, or 403 if it's empty. 404 405 It is equivalent to calling "myString.trim().isNotEmpty()". 406 */ 407 bool containsNonWhitespaceChars() const noexcept; 408 409 /** Returns true if the string matches this simple wildcard expression. 410 411 So for example String ("abcdef").matchesWildcard ("*DEF", true) would return true. 412 413 This isn't a full-blown regex though! The only wildcard characters supported 414 are "*" and "?". It's mainly intended for filename pattern matching. 415 */ 416 bool matchesWildcard (StringRef wildcard, bool ignoreCase) const noexcept; 417 418 //============================================================================== 419 // Substring location methods.. 420 421 /** Searches for a character inside this string. 422 Uses a case-sensitive comparison. 423 @returns the index of the first occurrence of the character in this 424 string, or -1 if it's not found. 425 */ 426 int indexOfChar (water_uchar characterToLookFor) const noexcept; 427 428 /** Searches for a character inside this string. 429 Uses a case-sensitive comparison. 430 @param startIndex the index from which the search should proceed 431 @param characterToLookFor the character to look for 432 @returns the index of the first occurrence of the character in this 433 string, or -1 if it's not found. 434 */ 435 int indexOfChar (int startIndex, water_uchar characterToLookFor) const noexcept; 436 437 /** Returns the index of the first character that matches one of the characters 438 passed-in to this method. 439 440 This scans the string, beginning from the startIndex supplied, and if it finds 441 a character that appears in the string charactersToLookFor, it returns its index. 442 443 If none of these characters are found, it returns -1. 444 445 If ignoreCase is true, the comparison will be case-insensitive. 446 447 @see indexOfChar, lastIndexOfAnyOf 448 */ 449 int indexOfAnyOf (StringRef charactersToLookFor, 450 int startIndex = 0, 451 bool ignoreCase = false) const noexcept; 452 453 /** Searches for a substring within this string. 454 Uses a case-sensitive comparison. 455 @returns the index of the first occurrence of this substring, or -1 if it's not found. 456 If textToLookFor is an empty string, this will always return 0. 457 */ 458 int indexOf (StringRef textToLookFor) const noexcept; 459 460 /** Searches for a substring within this string. 461 Uses a case-sensitive comparison. 462 @param startIndex the index from which the search should proceed 463 @param textToLookFor the string to search for 464 @returns the index of the first occurrence of this substring, or -1 if it's not found. 465 If textToLookFor is an empty string, this will always return -1. 466 */ 467 int indexOf (int startIndex, StringRef textToLookFor) const noexcept; 468 469 /** Searches for a substring within this string. 470 Uses a case-insensitive comparison. 471 @returns the index of the first occurrence of this substring, or -1 if it's not found. 472 If textToLookFor is an empty string, this will always return 0. 473 */ 474 int indexOfIgnoreCase (StringRef textToLookFor) const noexcept; 475 476 /** Searches for a substring within this string. 477 Uses a case-insensitive comparison. 478 @param startIndex the index from which the search should proceed 479 @param textToLookFor the string to search for 480 @returns the index of the first occurrence of this substring, or -1 if it's not found. 481 If textToLookFor is an empty string, this will always return -1. 482 */ 483 int indexOfIgnoreCase (int startIndex, StringRef textToLookFor) const noexcept; 484 485 /** Searches for a character inside this string (working backwards from the end of the string). 486 Uses a case-sensitive comparison. 487 @returns the index of the last occurrence of the character in this string, or -1 if it's not found. 488 */ 489 int lastIndexOfChar (water_uchar character) const noexcept; 490 491 /** Searches for a substring inside this string (working backwards from the end of the string). 492 Uses a case-sensitive comparison. 493 @returns the index of the start of the last occurrence of the substring within this string, 494 or -1 if it's not found. If textToLookFor is an empty string, this will always return -1. 495 */ 496 int lastIndexOf (StringRef textToLookFor) const noexcept; 497 498 /** Searches for a substring inside this string (working backwards from the end of the string). 499 Uses a case-insensitive comparison. 500 @returns the index of the start of the last occurrence of the substring within this string, or -1 501 if it's not found. If textToLookFor is an empty string, this will always return -1. 502 */ 503 int lastIndexOfIgnoreCase (StringRef textToLookFor) const noexcept; 504 505 /** Returns the index of the last character in this string that matches one of the 506 characters passed-in to this method. 507 508 This scans the string backwards, starting from its end, and if it finds 509 a character that appears in the string charactersToLookFor, it returns its index. 510 511 If none of these characters are found, it returns -1. 512 513 If ignoreCase is true, the comparison will be case-insensitive. 514 515 @see lastIndexOf, indexOfAnyOf 516 */ 517 int lastIndexOfAnyOf (StringRef charactersToLookFor, 518 bool ignoreCase = false) const noexcept; 519 520 521 //============================================================================== 522 // Substring extraction and manipulation methods.. 523 524 /** Returns the character at this index in the string. 525 In a release build, no checks are made to see if the index is within a valid range, so be 526 careful! In a debug build, the index is checked and an assertion fires if it's out-of-range. 527 528 Also beware that depending on the encoding format that the string is using internally, this 529 method may execute in either O(1) or O(n) time, so be careful when using it in your algorithms. 530 If you're scanning through a string to inspect its characters, you should never use this operator 531 for random access, it's far more efficient to call getCharPointer() to return a pointer, and 532 then to use that to iterate the string. 533 @see getCharPointer 534 */ 535 water_uchar operator[] (int index) const noexcept; 536 537 /** Returns the final character of the string. 538 If the string is empty this will return 0. 539 */ 540 water_uchar getLastCharacter() const noexcept; 541 542 //============================================================================== 543 /** Returns a subsection of the string. 544 545 If the range specified is beyond the limits of the string, as much as 546 possible is returned. 547 548 @param startIndex the index of the start of the substring needed 549 @param endIndex all characters from startIndex up to (but not including) 550 this index are returned 551 @see fromFirstOccurrenceOf, dropLastCharacters, getLastCharacters, upToFirstOccurrenceOf 552 */ 553 String substring (int startIndex, int endIndex) const; 554 555 /** Returns a section of the string, starting from a given position. 556 557 @param startIndex the first character to include. If this is beyond the end 558 of the string, an empty string is returned. If it is zero or 559 less, the whole string is returned. 560 @returns the substring from startIndex up to the end of the string 561 @see dropLastCharacters, getLastCharacters, fromFirstOccurrenceOf, upToFirstOccurrenceOf, fromLastOccurrenceOf 562 */ 563 String substring (int startIndex) const; 564 565 /** Returns a version of this string with a number of characters removed 566 from the end. 567 568 @param numberToDrop the number of characters to drop from the end of the 569 string. If this is greater than the length of the string, 570 an empty string will be returned. If zero or less, the 571 original string will be returned. 572 @see substring, fromFirstOccurrenceOf, upToFirstOccurrenceOf, fromLastOccurrenceOf, getLastCharacter 573 */ 574 String dropLastCharacters (int numberToDrop) const; 575 576 /** Returns a number of characters from the end of the string. 577 578 This returns the last numCharacters characters from the end of the string. If the 579 string is shorter than numCharacters, the whole string is returned. 580 581 @see substring, dropLastCharacters, getLastCharacter 582 */ 583 String getLastCharacters (int numCharacters) const; 584 585 //============================================================================== 586 /** Returns a section of the string starting from a given substring. 587 588 This will search for the first occurrence of the given substring, and 589 return the section of the string starting from the point where this is 590 found (optionally not including the substring itself). 591 592 e.g. for the string "123456", fromFirstOccurrenceOf ("34", true) would return "3456", and 593 fromFirstOccurrenceOf ("34", false) would return "56". 594 595 If the substring isn't found, the method will return an empty string. 596 597 If ignoreCase is true, the comparison will be case-insensitive. 598 599 @see upToFirstOccurrenceOf, fromLastOccurrenceOf 600 */ 601 String fromFirstOccurrenceOf (StringRef substringToStartFrom, 602 bool includeSubStringInResult, 603 bool ignoreCase) const; 604 605 /** Returns a section of the string starting from the last occurrence of a given substring. 606 607 Similar to fromFirstOccurrenceOf(), but using the last occurrence of the substring, and 608 unlike fromFirstOccurrenceOf(), if the substring isn't found, this method will 609 return the whole of the original string. 610 611 @see fromFirstOccurrenceOf, upToLastOccurrenceOf 612 */ 613 String fromLastOccurrenceOf (StringRef substringToFind, 614 bool includeSubStringInResult, 615 bool ignoreCase) const; 616 617 /** Returns the start of this string, up to the first occurrence of a substring. 618 619 This will search for the first occurrence of a given substring, and then 620 return a copy of the string, up to the position of this substring, 621 optionally including or excluding the substring itself in the result. 622 623 e.g. for the string "123456", upTo ("34", false) would return "12", and 624 upTo ("34", true) would return "1234". 625 626 If the substring isn't found, this will return the whole of the original string. 627 628 @see upToLastOccurrenceOf, fromFirstOccurrenceOf 629 */ 630 String upToFirstOccurrenceOf (StringRef substringToEndWith, 631 bool includeSubStringInResult, 632 bool ignoreCase) const; 633 634 /** Returns the start of this string, up to the last occurrence of a substring. 635 636 Similar to upToFirstOccurrenceOf(), but this finds the last occurrence rather than the first. 637 If the substring isn't found, this will return the whole of the original string. 638 639 @see upToFirstOccurrenceOf, fromFirstOccurrenceOf 640 */ 641 String upToLastOccurrenceOf (StringRef substringToFind, 642 bool includeSubStringInResult, 643 bool ignoreCase) const; 644 645 //============================================================================== 646 /** Returns a copy of this string with any whitespace characters removed from the start and end. */ 647 String trim() const; 648 649 /** Returns a copy of this string with any whitespace characters removed from the start. */ 650 String trimStart() const; 651 652 /** Returns a copy of this string with any whitespace characters removed from the end. */ 653 String trimEnd() const; 654 655 /** Returns a copy of this string, having removed a specified set of characters from its start. 656 Characters are removed from the start of the string until it finds one that is not in the 657 specified set, and then it stops. 658 @param charactersToTrim the set of characters to remove. 659 @see trim, trimStart, trimCharactersAtEnd 660 */ 661 String trimCharactersAtStart (StringRef charactersToTrim) const; 662 663 /** Returns a copy of this string, having removed a specified set of characters from its end. 664 Characters are removed from the end of the string until it finds one that is not in the 665 specified set, and then it stops. 666 @param charactersToTrim the set of characters to remove. 667 @see trim, trimEnd, trimCharactersAtStart 668 */ 669 String trimCharactersAtEnd (StringRef charactersToTrim) const; 670 671 //============================================================================== 672 /** Returns an upper-case version of this string. */ 673 String toUpperCase() const; 674 675 /** Returns an lower-case version of this string. */ 676 String toLowerCase() const; 677 678 //============================================================================== 679 /** Replaces a sub-section of the string with another string. 680 681 This will return a copy of this string, with a set of characters 682 from startIndex to startIndex + numCharsToReplace removed, and with 683 a new string inserted in their place. 684 685 Note that this is a const method, and won't alter the string itself. 686 687 @param startIndex the first character to remove. If this is beyond the bounds of the string, 688 it will be constrained to a valid range. 689 @param numCharactersToReplace the number of characters to remove. If zero or less, no 690 characters will be taken out. 691 @param stringToInsert the new string to insert at startIndex after the characters have been 692 removed. 693 */ 694 String replaceSection (int startIndex, 695 int numCharactersToReplace, 696 StringRef stringToInsert) const; 697 698 /** Replaces all occurrences of a substring with another string. 699 700 Returns a copy of this string, with any occurrences of stringToReplace 701 swapped for stringToInsertInstead. 702 703 Note that this is a const method, and won't alter the string itself. 704 */ 705 String replace (StringRef stringToReplace, 706 StringRef stringToInsertInstead, 707 bool ignoreCase = false) const; 708 709 /** Returns a string with all occurrences of a character replaced with a different one. */ 710 String replaceCharacter (water_uchar characterToReplace, 711 water_uchar characterToInsertInstead) const; 712 713 /** Replaces a set of characters with another set. 714 715 Returns a string in which each character from charactersToReplace has been replaced 716 by the character at the equivalent position in newCharacters (so the two strings 717 passed in must be the same length). 718 719 e.g. replaceCharacters ("abc", "def") replaces 'a' with 'd', 'b' with 'e', etc. 720 721 Note that this is a const method, and won't affect the string itself. 722 */ 723 String replaceCharacters (StringRef charactersToReplace, 724 StringRef charactersToInsertInstead) const; 725 726 /** Returns a version of this string that only retains a fixed set of characters. 727 728 This will return a copy of this string, omitting any characters which are not 729 found in the string passed-in. 730 731 e.g. for "1122334455", retainCharacters ("432") would return "223344" 732 733 Note that this is a const method, and won't alter the string itself. 734 */ 735 String retainCharacters (StringRef charactersToRetain) const; 736 737 /** Returns a version of this string with a set of characters removed. 738 739 This will return a copy of this string, omitting any characters which are 740 found in the string passed-in. 741 742 e.g. for "1122334455", removeCharacters ("432") would return "1155" 743 744 Note that this is a const method, and won't alter the string itself. 745 */ 746 String removeCharacters (StringRef charactersToRemove) const; 747 748 /** Returns a section from the start of the string that only contains a certain set of characters. 749 750 This returns the leftmost section of the string, up to (and not including) the 751 first character that doesn't appear in the string passed in. 752 */ 753 String initialSectionContainingOnly (StringRef permittedCharacters) const; 754 755 /** Returns a section from the start of the string that only contains a certain set of characters. 756 757 This returns the leftmost section of the string, up to (and not including) the 758 first character that occurs in the string passed in. (If none of the specified 759 characters are found in the string, the return value will just be the original string). 760 */ 761 String initialSectionNotContaining (StringRef charactersToStopAt) const; 762 763 //============================================================================== 764 /** Checks whether the string might be in quotation marks. 765 766 @returns true if the string begins with a quote character (either a double or single quote). 767 It is also true if there is whitespace before the quote, but it doesn't check the end of the string. 768 @see unquoted, quoted 769 */ 770 bool isQuotedString() const; 771 772 /** Removes quotation marks from around the string, (if there are any). 773 774 Returns a copy of this string with any quotes removed from its ends. Quotes that aren't 775 at the ends of the string are not affected. If there aren't any quotes, the original string 776 is returned. 777 778 Note that this is a const method, and won't alter the string itself. 779 780 @see isQuotedString, quoted 781 */ 782 String unquoted() const; 783 784 /** Adds quotation marks around a string. 785 This will return a copy of the string with a quote at the start and end, (but won't 786 add the quote if there's already one there, so it's safe to call this on strings that 787 may already have quotes around them). 788 Note that this is a const method, and won't alter the string itself. 789 @param quoteCharacter the character to add at the start and end 790 @see isQuotedString, unquoted 791 */ 792 String quoted (water_uchar quoteCharacter = '"') const; 793 794 //============================================================================== 795 /** Creates a string which is a version of a string repeated and joined together. 796 797 @param stringToRepeat the string to repeat 798 @param numberOfTimesToRepeat how many times to repeat it 799 */ 800 static String repeatedString (StringRef stringToRepeat, 801 int numberOfTimesToRepeat); 802 803 /** Returns a copy of this string with the specified character repeatedly added to its 804 beginning until the total length is at least the minimum length specified. 805 */ 806 String paddedLeft (water_uchar padCharacter, int minimumLength) const; 807 808 /** Returns a copy of this string with the specified character repeatedly added to its 809 end until the total length is at least the minimum length specified. 810 */ 811 String paddedRight (water_uchar padCharacter, int minimumLength) const; 812 813 /** Creates a string from data in an unknown format. 814 815 This looks at some binary data and tries to guess whether it's Unicode 816 or 8-bit characters, then returns a string that represents it correctly. 817 818 Should be able to handle Unicode endianness correctly, by looking at 819 the first two bytes. 820 */ 821 static String createStringFromData (const void* data, int size); 822 823 /** Creates a String from a printf-style parameter list. 824 825 I don't like this method. I don't use it myself, and I recommend avoiding it and 826 using the operator<< methods or pretty much anything else instead. It's only provided 827 here because of the popular unrest that was stirred-up when I tried to remove it... 828 829 If you're really determined to use it, at least make sure that you never, ever, 830 pass any String objects to it as parameters. 831 */ 832 static String formatted (const String formatString, ... ); 833 834 //============================================================================== 835 // Numeric conversions.. 836 837 /** Creates a string containing this signed 32-bit integer as a decimal number. 838 @see getIntValue, getFloatValue, getDoubleValue, toHexString 839 */ 840 explicit String (int decimalInteger); 841 842 /** Creates a string containing this unsigned 32-bit integer as a decimal number. 843 @see getIntValue, getFloatValue, getDoubleValue, toHexString 844 */ 845 explicit String (unsigned int decimalInteger); 846 847 /** Creates a string containing this signed 16-bit integer as a decimal number. 848 @see getIntValue, getFloatValue, getDoubleValue, toHexString 849 */ 850 explicit String (short decimalInteger); 851 852 /** Creates a string containing this unsigned 16-bit integer as a decimal number. 853 @see getIntValue, getFloatValue, getDoubleValue, toHexString 854 */ 855 explicit String (unsigned short decimalInteger); 856 857 /** Creates a string containing this signed 64-bit integer as a decimal number. 858 @see getLargeIntValue, getFloatValue, getDoubleValue, toHexString 859 */ 860 explicit String (int64 largeIntegerValue); 861 862 /** Creates a string containing this unsigned 64-bit integer as a decimal number. 863 @see getLargeIntValue, getFloatValue, getDoubleValue, toHexString 864 */ 865 explicit String (uint64 largeIntegerValue); 866 867 /** Creates a string containing this signed long integer as a decimal number. 868 @see getIntValue, getFloatValue, getDoubleValue, toHexString 869 */ 870 explicit String (long decimalInteger); 871 872 /** Creates a string containing this unsigned long integer as a decimal number. 873 @see getIntValue, getFloatValue, getDoubleValue, toHexString 874 */ 875 explicit String (unsigned long decimalInteger); 876 877 /** Creates a string representing this floating-point number. 878 @param floatValue the value to convert to a string 879 @see getDoubleValue, getIntValue 880 */ 881 explicit String (float floatValue); 882 883 /** Creates a string representing this floating-point number. 884 @param doubleValue the value to convert to a string 885 @see getFloatValue, getIntValue 886 */ 887 explicit String (double doubleValue); 888 889 /** Creates a string representing this floating-point number. 890 @param floatValue the value to convert to a string 891 @param numberOfDecimalPlaces if this is > 0, it will format the number using that many 892 decimal places, and will not use exponent notation. If 0 or 893 less, it will use exponent notation if necessary. 894 @see getDoubleValue, getIntValue 895 */ 896 String (float floatValue, int numberOfDecimalPlaces); 897 898 /** Creates a string representing this floating-point number. 899 @param doubleValue the value to convert to a string 900 @param numberOfDecimalPlaces if this is > 0, it will format the number using that many 901 decimal places, and will not use exponent notation. If 0 or 902 less, it will use exponent notation if necessary. 903 @see getFloatValue, getIntValue 904 */ 905 String (double doubleValue, int numberOfDecimalPlaces); 906 907 /** Reads the value of the string as a decimal number (up to 32 bits in size). 908 909 @returns the value of the string as a 32 bit signed base-10 integer. 910 @see getTrailingIntValue, getHexValue32, getHexValue64 911 */ 912 int getIntValue() const noexcept; 913 914 /** Reads the value of the string as a decimal number (up to 64 bits in size). 915 @returns the value of the string as a 64 bit signed base-10 integer. 916 */ 917 int64 getLargeIntValue() const noexcept; 918 919 /** Parses a decimal number from the end of the string. 920 921 This will look for a value at the end of the string. 922 e.g. for "321 xyz654" it will return 654; for "2 3 4" it'll return 4. 923 924 Negative numbers are not handled, so "xyz-5" returns 5. 925 926 @see getIntValue 927 */ 928 int getTrailingIntValue() const noexcept; 929 930 /** Parses this string as a floating point number. 931 932 @returns the value of the string as a 32-bit floating point value. 933 @see getDoubleValue 934 */ 935 float getFloatValue() const noexcept; 936 937 /** Parses this string as a floating point number. 938 939 @returns the value of the string as a 64-bit floating point value. 940 @see getFloatValue 941 */ 942 double getDoubleValue() const noexcept; 943 944 /** Parses the string as a hexadecimal number. 945 946 Non-hexadecimal characters in the string are ignored. 947 948 If the string contains too many characters, then the lowest significant 949 digits are returned, e.g. "ffff12345678" would produce 0x12345678. 950 951 @returns a 32-bit number which is the value of the string in hex. 952 */ 953 int getHexValue32() const noexcept; 954 955 /** Parses the string as a hexadecimal number. 956 957 Non-hexadecimal characters in the string are ignored. 958 959 If the string contains too many characters, then the lowest significant 960 digits are returned, e.g. "ffff1234567812345678" would produce 0x1234567812345678. 961 962 @returns a 64-bit number which is the value of the string in hex. 963 */ 964 int64 getHexValue64() const noexcept; 965 966 /** Creates a string representing this 32-bit value in hexadecimal. */ 967 static String toHexString (int number); 968 969 /** Creates a string representing this 64-bit value in hexadecimal. */ 970 static String toHexString (int64 number); 971 972 /** Creates a string representing this 16-bit value in hexadecimal. */ 973 static String toHexString (short number); 974 975 /** Creates a string containing a hex dump of a block of binary data. 976 977 @param data the binary data to use as input 978 @param size how many bytes of data to use 979 @param groupSize how many bytes are grouped together before inserting a 980 space into the output. e.g. group size 0 has no spaces, 981 group size 1 looks like: "be a1 c2 ff", group size 2 looks 982 like "bea1 c2ff". 983 */ 984 static String toHexString (const void* data, int size, int groupSize = 1); 985 986 //============================================================================== 987 /** Returns the character pointer currently being used to store this string. 988 989 Because it returns a reference to the string's internal data, the pointer 990 that is returned must not be stored anywhere, as it can be deleted whenever the 991 string changes. 992 */ getCharPointer()993 inline CharPointerType getCharPointer() const noexcept { return text; } 994 995 /** Returns a pointer to a UTF-8 version of this string. 996 997 Because it returns a reference to the string's internal data, the pointer 998 that is returned must not be stored anywhere, as it can be deleted whenever the 999 string changes. 1000 1001 To find out how many bytes you need to store this string as UTF-8, you can call 1002 CharPointer_UTF8::getBytesRequiredFor (myString.getCharPointer()) 1003 1004 @see toRawUTF8, getCharPointer, toUTF16, toUTF32 1005 */ 1006 CharPointer_UTF8 toUTF8() const; 1007 1008 /** Returns a pointer to a UTF-8 version of this string. 1009 1010 Because it returns a reference to the string's internal data, the pointer 1011 that is returned must not be stored anywhere, as it can be deleted whenever the 1012 string changes. 1013 1014 To find out how many bytes you need to store this string as UTF-8, you can call 1015 CharPointer_UTF8::getBytesRequiredFor (myString.getCharPointer()) 1016 1017 @see getCharPointer, toUTF8, toUTF16, toUTF32 1018 */ 1019 const char* toRawUTF8() const; 1020 1021 /** */ 1022 std::string toStdString() const; 1023 1024 //============================================================================== 1025 /** Creates a String from a UTF-8 encoded buffer. 1026 If the size is < 0, it'll keep reading until it hits a zero. 1027 */ 1028 static String fromUTF8 (const char* utf8buffer, int bufferSizeBytes = -1); 1029 1030 /** Returns the number of bytes required to represent this string as UTF8. 1031 The number returned does NOT include the trailing zero. 1032 @see toUTF8, copyToUTF8 1033 */ 1034 size_t getNumBytesAsUTF8() const noexcept; 1035 1036 //============================================================================== 1037 /** Copies the string to a buffer as UTF-8 characters. 1038 1039 Returns the number of bytes copied to the buffer, including the terminating null 1040 character. 1041 1042 To find out how many bytes you need to store this string as UTF-8, you can call 1043 CharPointer_UTF8::getBytesRequiredFor (myString.getCharPointer()) 1044 1045 @param destBuffer the place to copy it to; if this is a null pointer, the method just 1046 returns the number of bytes required (including the terminating null character). 1047 @param maxBufferSizeBytes the size of the destination buffer, in bytes. If the string won't fit, it'll 1048 put in as many as it can while still allowing for a terminating null char at the 1049 end, and will return the number of bytes that were actually used. 1050 @see CharPointer_UTF8::writeWithDestByteLimit 1051 */ 1052 size_t copyToUTF8 (CharPointer_UTF8::CharType* destBuffer, size_t maxBufferSizeBytes) const noexcept; 1053 1054 //============================================================================== 1055 /** Increases the string's internally allocated storage. 1056 1057 Although the string's contents won't be affected by this call, it will 1058 increase the amount of memory allocated internally for the string to grow into. 1059 1060 If you're about to make a large number of calls to methods such 1061 as += or <<, it's more efficient to preallocate enough extra space 1062 beforehand, so that these methods won't have to keep resizing the string 1063 to append the extra characters. 1064 1065 @param numBytesNeeded the number of bytes to allocate storage for. If this 1066 value is less than the currently allocated size, it will 1067 have no effect. 1068 */ 1069 void preallocateBytes (size_t numBytesNeeded); 1070 1071 /** Swaps the contents of this string with another one. 1072 This is a very fast operation, as no allocation or copying needs to be done. 1073 */ 1074 void swapWith (String& other) noexcept; 1075 1076 //============================================================================== 1077 #if 0 //def CARLA_OS_MAC 1078 /** OSX ONLY - Creates a String from an OSX CFString. */ 1079 static String fromCFString (CFStringRef cfString); 1080 1081 /** OSX ONLY - Converts this string to a CFString. 1082 Remember that you must use CFRelease() to free the returned string when you're 1083 finished with it. 1084 */ 1085 CFStringRef toCFString() const; 1086 #endif 1087 1088 #ifdef CARLA_OS_MAC 1089 /** OSX ONLY - Returns a copy of this string in which any decomposed unicode characters have 1090 been converted to their precomposed equivalents. */ 1091 String convertToPrecomposedUnicode() const; 1092 #endif 1093 1094 /** Returns the number of String objects which are currently sharing the same internal 1095 data as this one. 1096 */ 1097 int getReferenceCount() const noexcept; 1098 1099 private: 1100 //============================================================================== 1101 CharPointerType text; 1102 1103 //============================================================================== 1104 struct PreallocationBytes 1105 { 1106 explicit PreallocationBytes (size_t) noexcept; 1107 size_t numBytes; 1108 }; 1109 1110 explicit String (const PreallocationBytes&); // This constructor preallocates a certain amount of memory 1111 size_t getByteOffsetOfEnd() const noexcept; 1112 }; 1113 1114 //============================================================================== 1115 /** Concatenates two strings. */ 1116 String operator+ (const char* string1, const String& string2); 1117 /** Concatenates two strings. */ 1118 String operator+ (char string1, const String& string2); 1119 /** Concatenates two strings. */ 1120 String operator+ (water_uchar string1, const String& string2); 1121 1122 /** Concatenates two strings. */ 1123 String operator+ (String string1, const String& string2); 1124 /** Concatenates two strings. */ 1125 String operator+ (String string1, const char* string2); 1126 /** Concatenates two strings. */ 1127 String operator+ (String string1, char characterToAppend); 1128 /** Concatenates two strings. */ 1129 String operator+ (String string1, water_uchar characterToAppend); 1130 1131 //============================================================================== 1132 /** Appends a character at the end of a string. */ 1133 String& operator<< (String& string1, char characterToAppend); 1134 /** Appends a character at the end of a string. */ 1135 String& operator<< (String& string1, water_uchar characterToAppend); 1136 1137 /** Appends a string to the end of the first one. */ 1138 String& operator<< (String& string1, const char* string2); 1139 /** Appends a string to the end of the first one. */ 1140 String& operator<< (String& string1, const String& string2); 1141 /** Appends a string to the end of the first one. */ 1142 String& operator<< (String& string1, StringRef string2); 1143 1144 /** Appends a decimal number at the end of a string. */ 1145 String& operator<< (String& string1, short number); 1146 /** Appends a decimal number at the end of a string. */ 1147 String& operator<< (String& string1, int number); 1148 /** Appends a decimal number at the end of a string. */ 1149 String& operator<< (String& string1, long number); 1150 /** Appends a decimal number at the end of a string. */ 1151 String& operator<< (String& string1, int64 number); 1152 /** Appends a decimal number at the end of a string. */ 1153 String& operator<< (String& string1, uint64 number); 1154 /** Appends a decimal number at the end of a string. */ 1155 String& operator<< (String& string1, float number); 1156 /** Appends a decimal number at the end of a string. */ 1157 String& operator<< (String& string1, double number); 1158 1159 //============================================================================== 1160 /** Case-sensitive comparison of two strings. */ 1161 bool operator== (const String& string1, const String& string2) noexcept; 1162 /** Case-sensitive comparison of two strings. */ 1163 bool operator== (const String& string1, const char* string2) noexcept; 1164 /** Case-sensitive comparison of two strings. */ 1165 bool operator== (const String& string1, const CharPointer_UTF8 string2) noexcept; 1166 1167 /** Case-sensitive comparison of two strings. */ 1168 bool operator!= (const String& string1, const String& string2) noexcept; 1169 /** Case-sensitive comparison of two strings. */ 1170 bool operator!= (const String& string1, const char* string2) noexcept; 1171 /** Case-sensitive comparison of two strings. */ 1172 bool operator!= (const String& string1, const CharPointer_UTF8 string2) noexcept; 1173 1174 /** Case-sensitive comparison of two strings. */ 1175 bool operator> (const String& string1, const String& string2) noexcept; 1176 /** Case-sensitive comparison of two strings. */ 1177 bool operator< (const String& string1, const String& string2) noexcept; 1178 /** Case-sensitive comparison of two strings. */ 1179 bool operator>= (const String& string1, const String& string2) noexcept; 1180 /** Case-sensitive comparison of two strings. */ 1181 bool operator<= (const String& string1, const String& string2) noexcept; 1182 1183 //============================================================================== 1184 /** This operator allows you to write a water String directly to std output streams. 1185 This is handy for writing strings to std::cout, std::cerr, etc. 1186 */ 1187 template <class traits> 1188 std::basic_ostream <char, traits>& operator<< (std::basic_ostream <char, traits>& stream, const String& stringToWrite) 1189 { 1190 return stream << stringToWrite.toRawUTF8(); 1191 } 1192 1193 /** Writes a string to an OutputStream as UTF8. */ 1194 OutputStream& operator<< (OutputStream& stream, const String& stringToWrite); 1195 1196 /** Writes a string to an OutputStream as UTF8. */ 1197 OutputStream& operator<< (OutputStream& stream, StringRef stringToWrite); 1198 1199 //============================================================================== 1200 struct StartEndString { StartEndStringStartEndString1201 StartEndString (String::CharPointerType s, String::CharPointerType e) noexcept : start (s), end (e) {} StringStartEndString1202 operator String() const { return String (start, end); } 1203 String::CharPointerType start, end; 1204 }; 1205 1206 } 1207 1208 #include "StringRef.h" 1209 1210 #endif // WATER_STRING_H_INCLUDED 1211