1 /*
2   ==============================================================================
3 
4    This file is part of the Water library.
5    Copyright (c) 2016 ROLI Ltd.
6    Copyright (C) 2017-2018 Filipe Coelho <falktx@falktx.com>
7 
8    Permission is granted to use this software under the terms of the ISC license
9    http://www.isc.org/downloads/software-support-policy/isc-license/
10 
11    Permission to use, copy, modify, and/or distribute this software for any
12    purpose with or without fee is hereby granted, provided that the above
13    copyright notice and this permission notice appear in all copies.
14 
15    THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH REGARD
16    TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
17    FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT,
18    OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
19    USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
20    TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
21    OF THIS SOFTWARE.
22 
23   ==============================================================================
24 */
25 
26 #ifndef WATER_STRING_H_INCLUDED
27 #define WATER_STRING_H_INCLUDED
28 
29 #include "CharPointer_UTF8.h"
30 #include "../memory/Memory.h"
31 
32 #include <limits>
33 #include <string>
34 
35 namespace water {
36 
37 //==============================================================================
38 /**
39     The Water String class!
40 
41     Using a reference-counted internal representation, these strings are fast
42     and efficient, and there are methods to do just about any operation you'll ever
43     dream of.
44 
45     @see StringArray, StringPairArray
46 */
47 class String
48 {
49 public:
50     //==============================================================================
51     /** Creates an empty string.
52         @see empty
53     */
54     String() noexcept;
55 
56     /** Creates a copy of another string. */
57     String (const String& other) noexcept;
58 
59    #if WATER_COMPILER_SUPPORTS_MOVE_SEMANTICS
60     String (String&& other) noexcept;
61    #endif
62 
63     /** Creates a string from a zero-terminated ascii text string.
64 
65         The string passed-in must not contain any characters with a value above 127, because
66         these can't be converted to unicode without knowing the original encoding that was
67         used to create the string. If you attempt to pass-in values above 127, you'll get an
68         assertion.
69 
70         To create strings with extended characters from UTF-8, you should explicitly call
71         String (CharPointer_UTF8 ("my utf8 string..")). It's *highly* recommended that you
72         use UTF-8 with escape characters in your source code to represent extended characters,
73         because there's no other way to represent unicode strings in a way that isn't dependent
74         on the compiler, source code editor and platform.
75     */
76     String (const char* text);
77 
78     /** Creates a string from a string of 8-bit ascii characters.
79 
80         The string passed-in must not contain any characters with a value above 127, because
81         these can't be converted to unicode without knowing the original encoding that was
82         used to create the string. If you attempt to pass-in values above 127, you'll get an
83         assertion.
84 
85         To create strings with extended characters from UTF-8, you should explicitly call
86         String (CharPointer_UTF8 ("my utf8 string..")). It's *highly* recommended that you
87         use UTF-8 with escape characters in your source code to represent extended characters,
88         because there's no other way to represent unicode strings in a way that isn't dependent
89         on the compiler, source code editor and platform.
90 
91         This will use up to the first maxChars characters of the string (or less if the string
92         is actually shorter).
93     */
94     String (const char* text, size_t maxChars);
95 
96     //==============================================================================
97     /** Creates a string from a UTF-8 character string */
98     String (const CharPointer_UTF8 text);
99 
100     /** Creates a string from a UTF-8 character string */
101     String (const CharPointer_UTF8 text, size_t maxChars);
102 
103     /** Creates a string from a UTF-8 character string */
104     String (const CharPointer_UTF8 start, const CharPointer_UTF8 end);
105 
106     //==============================================================================
107     /** Creates a string from a UTF-8 encoded std::string. */
108     String (const std::string&);
109 
110     /** Creates a string from a StringRef */
111     String (StringRef);
112 
113     //==============================================================================
114     /** Creates a string from a single character. */
115     static String charToString (water_uchar character);
116 
117     /** Destructor. */
118     ~String() noexcept;
119 
120     /** This is the character encoding type used internally to store the string. */
121     typedef CharPointer_UTF8 CharPointerType;
122 
123     //==============================================================================
124     /** Generates a probably-unique 32-bit hashcode from this string. */
125     int hashCode() const noexcept;
126 
127     /** Generates a probably-unique 64-bit hashcode from this string. */
128     int64 hashCode64() const noexcept;
129 
130     /** Generates a probably-unique hashcode from this string. */
131     size_t hash() const noexcept;
132 
133     /** Returns the number of characters in the string. */
134     int length() const noexcept;
135 
136     //==============================================================================
137     // Assignment and concatenation operators..
138 
139     /** Replaces this string's contents with another string. */
140     String& operator= (const String& other) noexcept;
141 
142    #if WATER_COMPILER_SUPPORTS_MOVE_SEMANTICS
143     String& operator= (String&& other) noexcept;
144    #endif
145 
146     /** Appends another string at the end of this one. */
147     String& operator+= (const String& stringToAppend);
148     /** Appends another string at the end of this one. */
149     String& operator+= (const char* textToAppend);
150     /** Appends another string at the end of this one. */
151     String& operator+= (StringRef textToAppend);
152     /** Appends a decimal number at the end of this string. */
153     String& operator+= (int numberToAppend);
154     /** Appends a decimal number at the end of this string. */
155     String& operator+= (long numberToAppend);
156     /** Appends a decimal number at the end of this string. */
157     String& operator+= (int64 numberToAppend);
158     /** Appends a decimal number at the end of this string. */
159     String& operator+= (uint64 numberToAppend);
160     /** Appends a character at the end of this string. */
161     String& operator+= (char characterToAppend);
162     /** Appends a character at the end of this string. */
163     String& operator+= (water_uchar characterToAppend);
164 
165     /** Appends a string to the end of this one.
166 
167         @param textToAppend     the string to add
168         @param maxCharsToTake   the maximum number of characters to take from the string passed in
169     */
170     void append (const String& textToAppend, size_t maxCharsToTake);
171 
172     /** Appends a string to the end of this one.
173 
174         @param startOfTextToAppend  the start of the string to add. This must not be a nullptr
175         @param endOfTextToAppend    the end of the string to add. This must not be a nullptr
176     */
177     void appendCharPointer (const CharPointerType startOfTextToAppend,
178                             const CharPointerType endOfTextToAppend);
179 
180     /** Appends a string to the end of this one.
181 
182         @param startOfTextToAppend  the start of the string to add. This must not be a nullptr
183         @param endOfTextToAppend    the end of the string to add. This must not be a nullptr
184     */
185     template <class CharPointer>
appendCharPointer(const CharPointer startOfTextToAppend,const CharPointer endOfTextToAppend)186     void appendCharPointer (const CharPointer startOfTextToAppend,
187                             const CharPointer endOfTextToAppend)
188     {
189         wassert (startOfTextToAppend.getAddress() != nullptr && endOfTextToAppend.getAddress() != nullptr);
190 
191         size_t extraBytesNeeded = 0, numChars = 1;
192 
193         for (CharPointer t (startOfTextToAppend); t != endOfTextToAppend && ! t.isEmpty(); ++numChars)
194             extraBytesNeeded += CharPointerType::getBytesRequiredFor (t.getAndAdvance());
195 
196         if (extraBytesNeeded > 0)
197         {
198             const size_t byteOffsetOfNull = getByteOffsetOfEnd();
199 
200             preallocateBytes (byteOffsetOfNull + extraBytesNeeded);
201             CharPointerType (addBytesToPointer (text.getAddress(), (int) byteOffsetOfNull))
202                 .writeWithCharLimit (startOfTextToAppend, (int) numChars);
203         }
204     }
205 
206     /** Appends a string to the end of this one. */
207     void appendCharPointer (const CharPointerType textToAppend);
208 
209     /** Appends a string to the end of this one.
210 
211         @param textToAppend     the string to add
212         @param maxCharsToTake   the maximum number of characters to take from the string passed in
213     */
214     template <class CharPointer>
appendCharPointer(const CharPointer textToAppend,size_t maxCharsToTake)215     void appendCharPointer (const CharPointer textToAppend, size_t maxCharsToTake)
216     {
217         if (textToAppend.getAddress() != nullptr)
218         {
219             size_t extraBytesNeeded = 0, numChars = 1;
220 
221             for (CharPointer t (textToAppend); numChars <= maxCharsToTake && ! t.isEmpty(); ++numChars)
222                 extraBytesNeeded += CharPointerType::getBytesRequiredFor (t.getAndAdvance());
223 
224             if (extraBytesNeeded > 0)
225             {
226                 const size_t byteOffsetOfNull = getByteOffsetOfEnd();
227 
228                 preallocateBytes (byteOffsetOfNull + extraBytesNeeded);
229                 CharPointerType (addBytesToPointer (text.getAddress(), (int) byteOffsetOfNull))
230                     .writeWithCharLimit (textToAppend, (int) numChars);
231             }
232         }
233     }
234 
235     /** Appends a string to the end of this one. */
236     template <class CharPointer>
appendCharPointer(const CharPointer textToAppend)237     void appendCharPointer (const CharPointer textToAppend)
238     {
239         appendCharPointer (textToAppend, std::numeric_limits<size_t>::max());
240     }
241 
242     //==============================================================================
243     // Comparison methods..
244 
245     /** Returns true if the string contains no characters.
246         Note that there's also an isNotEmpty() method to help write readable code.
247         @see containsNonWhitespaceChars()
248     */
isEmpty()249     inline bool isEmpty() const noexcept                    { return text.isEmpty(); }
250 
251     /** Returns true if the string contains at least one character.
252         Note that there's also an isEmpty() method to help write readable code.
253         @see containsNonWhitespaceChars()
254     */
isNotEmpty()255     inline bool isNotEmpty() const noexcept                 { return ! text.isEmpty(); }
256 
257     /** Resets this string to be empty. */
258     void clear() noexcept;
259 
260     /** Case-insensitive comparison with another string. */
261     bool equalsIgnoreCase (const String& other) const noexcept;
262 
263     /** Case-insensitive comparison with another string. */
264     bool equalsIgnoreCase (StringRef other) const noexcept;
265 
266     /** Case-insensitive comparison with another string. */
267     bool equalsIgnoreCase (const char* other) const noexcept;
268 
269     /** Case-sensitive comparison with another string.
270         @returns     0 if the two strings are identical; negative if this string comes before
271                      the other one alphabetically, or positive if it comes after it.
272     */
273     int compare (const String& other) const noexcept;
274 
275     /** Case-sensitive comparison with another string.
276         @returns     0 if the two strings are identical; negative if this string comes before
277                      the other one alphabetically, or positive if it comes after it.
278     */
279     int compare (const char* other) const noexcept;
280 
281     /** Case-insensitive comparison with another string.
282         @returns     0 if the two strings are identical; negative if this string comes before
283                      the other one alphabetically, or positive if it comes after it.
284     */
285     int compareIgnoreCase (const String& other) const noexcept;
286 
287     /** Compares two strings, taking into account textual characteristics like numbers and spaces.
288 
289         This comparison is case-insensitive and can detect words and embedded numbers in the
290         strings, making it good for sorting human-readable lists of things like filenames.
291 
292         @returns     0 if the two strings are identical; negative if this string comes before
293                      the other one alphabetically, or positive if it comes after it.
294     */
295     int compareNatural (StringRef other, bool isCaseSensitive = false) const noexcept;
296 
297     /** Tests whether the string begins with another string.
298         If the parameter is an empty string, this will always return true.
299         Uses a case-sensitive comparison.
300     */
301     bool startsWith (StringRef text) const noexcept;
302 
303     /** Tests whether the string begins with a particular character.
304         If the character is 0, this will always return false.
305         Uses a case-sensitive comparison.
306     */
307     bool startsWithChar (water_uchar character) const noexcept;
308 
309     /** Tests whether the string begins with another string.
310         If the parameter is an empty string, this will always return true.
311         Uses a case-insensitive comparison.
312     */
313     bool startsWithIgnoreCase (StringRef text) const noexcept;
314 
315     /** Tests whether the string ends with another string.
316         If the parameter is an empty string, this will always return true.
317         Uses a case-sensitive comparison.
318     */
319     bool endsWith (StringRef text) const noexcept;
320 
321     /** Tests whether the string ends with a particular character.
322         If the character is 0, this will always return false.
323         Uses a case-sensitive comparison.
324     */
325     bool endsWithChar (water_uchar character) const noexcept;
326 
327     /** Tests whether the string ends with another string.
328         If the parameter is an empty string, this will always return true.
329         Uses a case-insensitive comparison.
330     */
331     bool endsWithIgnoreCase (StringRef text) const noexcept;
332 
333     /** Tests whether the string contains another substring.
334         If the parameter is an empty string, this will always return true.
335         Uses a case-sensitive comparison.
336     */
337     bool contains (StringRef text) const noexcept;
338 
339     /** Tests whether the string contains a particular character.
340         Uses a case-sensitive comparison.
341     */
342     bool containsChar (water_uchar character) const noexcept;
343 
344     /** Tests whether the string contains another substring.
345         Uses a case-insensitive comparison.
346     */
347     bool containsIgnoreCase (StringRef text) const noexcept;
348 
349     /** Tests whether the string contains another substring as a distinct word.
350 
351         @returns    true if the string contains this word, surrounded by
352                     non-alphanumeric characters
353         @see indexOfWholeWord, containsWholeWordIgnoreCase
354     */
355     bool containsWholeWord (StringRef wordToLookFor) const noexcept;
356 
357     /** Tests whether the string contains another substring as a distinct word.
358 
359         @returns    true if the string contains this word, surrounded by
360                     non-alphanumeric characters
361         @see indexOfWholeWordIgnoreCase, containsWholeWord
362     */
363     bool containsWholeWordIgnoreCase (StringRef wordToLookFor) const noexcept;
364 
365     /** Finds an instance of another substring if it exists as a distinct word.
366 
367         @returns    if the string contains this word, surrounded by non-alphanumeric characters,
368                     then this will return the index of the start of the substring. If it isn't
369                     found, then it will return -1
370         @see indexOfWholeWordIgnoreCase, containsWholeWord
371     */
372     int indexOfWholeWord (StringRef wordToLookFor) const noexcept;
373 
374     /** Finds an instance of another substring if it exists as a distinct word.
375 
376         @returns    if the string contains this word, surrounded by non-alphanumeric characters,
377                     then this will return the index of the start of the substring. If it isn't
378                     found, then it will return -1
379         @see indexOfWholeWord, containsWholeWordIgnoreCase
380     */
381     int indexOfWholeWordIgnoreCase (StringRef wordToLookFor) const noexcept;
382 
383     /** Looks for any of a set of characters in the string.
384         Uses a case-sensitive comparison.
385 
386         @returns    true if the string contains any of the characters from
387                     the string that is passed in.
388     */
389     bool containsAnyOf (StringRef charactersItMightContain) const noexcept;
390 
391     /** Looks for a set of characters in the string.
392         Uses a case-sensitive comparison.
393 
394         @returns    Returns false if any of the characters in this string do not occur in
395                     the parameter string. If this string is empty, the return value will
396                     always be true.
397     */
398     bool containsOnly (StringRef charactersItMightContain) const noexcept;
399 
400     /** Returns true if this string contains any non-whitespace characters.
401 
402         This will return false if the string contains only whitespace characters, or
403         if it's empty.
404 
405         It is equivalent to calling "myString.trim().isNotEmpty()".
406     */
407     bool containsNonWhitespaceChars() const noexcept;
408 
409     /** Returns true if the string matches this simple wildcard expression.
410 
411         So for example String ("abcdef").matchesWildcard ("*DEF", true) would return true.
412 
413         This isn't a full-blown regex though! The only wildcard characters supported
414         are "*" and "?". It's mainly intended for filename pattern matching.
415     */
416     bool matchesWildcard (StringRef wildcard, bool ignoreCase) const noexcept;
417 
418     //==============================================================================
419     // Substring location methods..
420 
421     /** Searches for a character inside this string.
422         Uses a case-sensitive comparison.
423         @returns    the index of the first occurrence of the character in this
424                     string, or -1 if it's not found.
425     */
426     int indexOfChar (water_uchar characterToLookFor) const noexcept;
427 
428     /** Searches for a character inside this string.
429         Uses a case-sensitive comparison.
430         @param startIndex           the index from which the search should proceed
431         @param characterToLookFor   the character to look for
432         @returns            the index of the first occurrence of the character in this
433                             string, or -1 if it's not found.
434     */
435     int indexOfChar (int startIndex, water_uchar characterToLookFor) const noexcept;
436 
437     /** Returns the index of the first character that matches one of the characters
438         passed-in to this method.
439 
440         This scans the string, beginning from the startIndex supplied, and if it finds
441         a character that appears in the string charactersToLookFor, it returns its index.
442 
443         If none of these characters are found, it returns -1.
444 
445         If ignoreCase is true, the comparison will be case-insensitive.
446 
447         @see indexOfChar, lastIndexOfAnyOf
448     */
449     int indexOfAnyOf (StringRef charactersToLookFor,
450                       int startIndex = 0,
451                       bool ignoreCase = false) const noexcept;
452 
453     /** Searches for a substring within this string.
454         Uses a case-sensitive comparison.
455         @returns    the index of the first occurrence of this substring, or -1 if it's not found.
456                     If textToLookFor is an empty string, this will always return 0.
457     */
458     int indexOf (StringRef textToLookFor) const noexcept;
459 
460     /** Searches for a substring within this string.
461         Uses a case-sensitive comparison.
462         @param startIndex       the index from which the search should proceed
463         @param textToLookFor    the string to search for
464         @returns                the index of the first occurrence of this substring, or -1 if it's not found.
465                                 If textToLookFor is an empty string, this will always return -1.
466     */
467     int indexOf (int startIndex, StringRef textToLookFor) const noexcept;
468 
469     /** Searches for a substring within this string.
470         Uses a case-insensitive comparison.
471         @returns    the index of the first occurrence of this substring, or -1 if it's not found.
472                     If textToLookFor is an empty string, this will always return 0.
473     */
474     int indexOfIgnoreCase (StringRef textToLookFor) const noexcept;
475 
476     /** Searches for a substring within this string.
477         Uses a case-insensitive comparison.
478         @param startIndex       the index from which the search should proceed
479         @param textToLookFor    the string to search for
480         @returns                the index of the first occurrence of this substring, or -1 if it's not found.
481                                 If textToLookFor is an empty string, this will always return -1.
482     */
483     int indexOfIgnoreCase (int startIndex, StringRef textToLookFor) const noexcept;
484 
485     /** Searches for a character inside this string (working backwards from the end of the string).
486         Uses a case-sensitive comparison.
487         @returns    the index of the last occurrence of the character in this string, or -1 if it's not found.
488     */
489     int lastIndexOfChar (water_uchar character) const noexcept;
490 
491     /** Searches for a substring inside this string (working backwards from the end of the string).
492         Uses a case-sensitive comparison.
493         @returns    the index of the start of the last occurrence of the substring within this string,
494                     or -1 if it's not found. If textToLookFor is an empty string, this will always return -1.
495     */
496     int lastIndexOf (StringRef textToLookFor) const noexcept;
497 
498     /** Searches for a substring inside this string (working backwards from the end of the string).
499         Uses a case-insensitive comparison.
500         @returns    the index of the start of the last occurrence of the substring within this string, or -1
501                     if it's not found. If textToLookFor is an empty string, this will always return -1.
502     */
503     int lastIndexOfIgnoreCase (StringRef textToLookFor) const noexcept;
504 
505     /** Returns the index of the last character in this string that matches one of the
506         characters passed-in to this method.
507 
508         This scans the string backwards, starting from its end, and if it finds
509         a character that appears in the string charactersToLookFor, it returns its index.
510 
511         If none of these characters are found, it returns -1.
512 
513         If ignoreCase is true, the comparison will be case-insensitive.
514 
515         @see lastIndexOf, indexOfAnyOf
516     */
517     int lastIndexOfAnyOf (StringRef charactersToLookFor,
518                           bool ignoreCase = false) const noexcept;
519 
520 
521     //==============================================================================
522     // Substring extraction and manipulation methods..
523 
524     /** Returns the character at this index in the string.
525         In a release build, no checks are made to see if the index is within a valid range, so be
526         careful! In a debug build, the index is checked and an assertion fires if it's out-of-range.
527 
528         Also beware that depending on the encoding format that the string is using internally, this
529         method may execute in either O(1) or O(n) time, so be careful when using it in your algorithms.
530         If you're scanning through a string to inspect its characters, you should never use this operator
531         for random access, it's far more efficient to call getCharPointer() to return a pointer, and
532         then to use that to iterate the string.
533         @see getCharPointer
534     */
535     water_uchar operator[] (int index) const noexcept;
536 
537     /** Returns the final character of the string.
538         If the string is empty this will return 0.
539     */
540     water_uchar getLastCharacter() const noexcept;
541 
542     //==============================================================================
543     /** Returns a subsection of the string.
544 
545         If the range specified is beyond the limits of the string, as much as
546         possible is returned.
547 
548         @param startIndex   the index of the start of the substring needed
549         @param endIndex     all characters from startIndex up to (but not including)
550                             this index are returned
551         @see fromFirstOccurrenceOf, dropLastCharacters, getLastCharacters, upToFirstOccurrenceOf
552     */
553     String substring (int startIndex, int endIndex) const;
554 
555     /** Returns a section of the string, starting from a given position.
556 
557         @param startIndex   the first character to include. If this is beyond the end
558                             of the string, an empty string is returned. If it is zero or
559                             less, the whole string is returned.
560         @returns            the substring from startIndex up to the end of the string
561         @see dropLastCharacters, getLastCharacters, fromFirstOccurrenceOf, upToFirstOccurrenceOf, fromLastOccurrenceOf
562     */
563     String substring (int startIndex) const;
564 
565     /** Returns a version of this string with a number of characters removed
566         from the end.
567 
568         @param numberToDrop     the number of characters to drop from the end of the
569                                 string. If this is greater than the length of the string,
570                                 an empty string will be returned. If zero or less, the
571                                 original string will be returned.
572         @see substring, fromFirstOccurrenceOf, upToFirstOccurrenceOf, fromLastOccurrenceOf, getLastCharacter
573     */
574     String dropLastCharacters (int numberToDrop) const;
575 
576     /** Returns a number of characters from the end of the string.
577 
578         This returns the last numCharacters characters from the end of the string. If the
579         string is shorter than numCharacters, the whole string is returned.
580 
581         @see substring, dropLastCharacters, getLastCharacter
582     */
583     String getLastCharacters (int numCharacters) const;
584 
585     //==============================================================================
586     /** Returns a section of the string starting from a given substring.
587 
588         This will search for the first occurrence of the given substring, and
589         return the section of the string starting from the point where this is
590         found (optionally not including the substring itself).
591 
592         e.g. for the string "123456", fromFirstOccurrenceOf ("34", true) would return "3456", and
593                                       fromFirstOccurrenceOf ("34", false) would return "56".
594 
595         If the substring isn't found, the method will return an empty string.
596 
597         If ignoreCase is true, the comparison will be case-insensitive.
598 
599         @see upToFirstOccurrenceOf, fromLastOccurrenceOf
600     */
601     String fromFirstOccurrenceOf (StringRef substringToStartFrom,
602                                   bool includeSubStringInResult,
603                                   bool ignoreCase) const;
604 
605     /** Returns a section of the string starting from the last occurrence of a given substring.
606 
607         Similar to fromFirstOccurrenceOf(), but using the last occurrence of the substring, and
608         unlike fromFirstOccurrenceOf(), if the substring isn't found, this method will
609         return the whole of the original string.
610 
611         @see fromFirstOccurrenceOf, upToLastOccurrenceOf
612     */
613     String fromLastOccurrenceOf (StringRef substringToFind,
614                                  bool includeSubStringInResult,
615                                  bool ignoreCase) const;
616 
617     /** Returns the start of this string, up to the first occurrence of a substring.
618 
619         This will search for the first occurrence of a given substring, and then
620         return a copy of the string, up to the position of this substring,
621         optionally including or excluding the substring itself in the result.
622 
623         e.g. for the string "123456", upTo ("34", false) would return "12", and
624                                       upTo ("34", true) would return "1234".
625 
626         If the substring isn't found, this will return the whole of the original string.
627 
628         @see upToLastOccurrenceOf, fromFirstOccurrenceOf
629     */
630     String upToFirstOccurrenceOf (StringRef substringToEndWith,
631                                   bool includeSubStringInResult,
632                                   bool ignoreCase) const;
633 
634     /** Returns the start of this string, up to the last occurrence of a substring.
635 
636         Similar to upToFirstOccurrenceOf(), but this finds the last occurrence rather than the first.
637         If the substring isn't found, this will return the whole of the original string.
638 
639         @see upToFirstOccurrenceOf, fromFirstOccurrenceOf
640     */
641     String upToLastOccurrenceOf (StringRef substringToFind,
642                                  bool includeSubStringInResult,
643                                  bool ignoreCase) const;
644 
645     //==============================================================================
646     /** Returns a copy of this string with any whitespace characters removed from the start and end. */
647     String trim() const;
648 
649     /** Returns a copy of this string with any whitespace characters removed from the start. */
650     String trimStart() const;
651 
652     /** Returns a copy of this string with any whitespace characters removed from the end. */
653     String trimEnd() const;
654 
655     /** Returns a copy of this string, having removed a specified set of characters from its start.
656         Characters are removed from the start of the string until it finds one that is not in the
657         specified set, and then it stops.
658         @param charactersToTrim     the set of characters to remove.
659         @see trim, trimStart, trimCharactersAtEnd
660     */
661     String trimCharactersAtStart (StringRef charactersToTrim) const;
662 
663     /** Returns a copy of this string, having removed a specified set of characters from its end.
664         Characters are removed from the end of the string until it finds one that is not in the
665         specified set, and then it stops.
666         @param charactersToTrim     the set of characters to remove.
667         @see trim, trimEnd, trimCharactersAtStart
668     */
669     String trimCharactersAtEnd (StringRef charactersToTrim) const;
670 
671     //==============================================================================
672     /** Returns an upper-case version of this string. */
673     String toUpperCase() const;
674 
675     /** Returns an lower-case version of this string. */
676     String toLowerCase() const;
677 
678     //==============================================================================
679     /** Replaces a sub-section of the string with another string.
680 
681         This will return a copy of this string, with a set of characters
682         from startIndex to startIndex + numCharsToReplace removed, and with
683         a new string inserted in their place.
684 
685         Note that this is a const method, and won't alter the string itself.
686 
687         @param startIndex               the first character to remove. If this is beyond the bounds of the string,
688                                         it will be constrained to a valid range.
689         @param numCharactersToReplace   the number of characters to remove. If zero or less, no
690                                         characters will be taken out.
691         @param stringToInsert           the new string to insert at startIndex after the characters have been
692                                         removed.
693     */
694     String replaceSection (int startIndex,
695                            int numCharactersToReplace,
696                            StringRef stringToInsert) const;
697 
698     /** Replaces all occurrences of a substring with another string.
699 
700         Returns a copy of this string, with any occurrences of stringToReplace
701         swapped for stringToInsertInstead.
702 
703         Note that this is a const method, and won't alter the string itself.
704     */
705     String replace (StringRef stringToReplace,
706                     StringRef stringToInsertInstead,
707                     bool ignoreCase = false) const;
708 
709     /** Returns a string with all occurrences of a character replaced with a different one. */
710     String replaceCharacter (water_uchar characterToReplace,
711                              water_uchar characterToInsertInstead) const;
712 
713     /** Replaces a set of characters with another set.
714 
715         Returns a string in which each character from charactersToReplace has been replaced
716         by the character at the equivalent position in newCharacters (so the two strings
717         passed in must be the same length).
718 
719         e.g. replaceCharacters ("abc", "def") replaces 'a' with 'd', 'b' with 'e', etc.
720 
721         Note that this is a const method, and won't affect the string itself.
722     */
723     String replaceCharacters (StringRef charactersToReplace,
724                               StringRef charactersToInsertInstead) const;
725 
726     /** Returns a version of this string that only retains a fixed set of characters.
727 
728         This will return a copy of this string, omitting any characters which are not
729         found in the string passed-in.
730 
731         e.g. for "1122334455", retainCharacters ("432") would return "223344"
732 
733         Note that this is a const method, and won't alter the string itself.
734     */
735     String retainCharacters (StringRef charactersToRetain) const;
736 
737     /** Returns a version of this string with a set of characters removed.
738 
739         This will return a copy of this string, omitting any characters which are
740         found in the string passed-in.
741 
742         e.g. for "1122334455", removeCharacters ("432") would return "1155"
743 
744         Note that this is a const method, and won't alter the string itself.
745     */
746     String removeCharacters (StringRef charactersToRemove) const;
747 
748     /** Returns a section from the start of the string that only contains a certain set of characters.
749 
750         This returns the leftmost section of the string, up to (and not including) the
751         first character that doesn't appear in the string passed in.
752     */
753     String initialSectionContainingOnly (StringRef permittedCharacters) const;
754 
755     /** Returns a section from the start of the string that only contains a certain set of characters.
756 
757         This returns the leftmost section of the string, up to (and not including) the
758         first character that occurs in the string passed in. (If none of the specified
759         characters are found in the string, the return value will just be the original string).
760     */
761     String initialSectionNotContaining (StringRef charactersToStopAt) const;
762 
763     //==============================================================================
764     /** Checks whether the string might be in quotation marks.
765 
766         @returns    true if the string begins with a quote character (either a double or single quote).
767                     It is also true if there is whitespace before the quote, but it doesn't check the end of the string.
768         @see unquoted, quoted
769     */
770     bool isQuotedString() const;
771 
772     /** Removes quotation marks from around the string, (if there are any).
773 
774         Returns a copy of this string with any quotes removed from its ends. Quotes that aren't
775         at the ends of the string are not affected. If there aren't any quotes, the original string
776         is returned.
777 
778         Note that this is a const method, and won't alter the string itself.
779 
780         @see isQuotedString, quoted
781     */
782     String unquoted() const;
783 
784     /** Adds quotation marks around a string.
785         This will return a copy of the string with a quote at the start and end, (but won't
786         add the quote if there's already one there, so it's safe to call this on strings that
787         may already have quotes around them).
788         Note that this is a const method, and won't alter the string itself.
789         @param quoteCharacter   the character to add at the start and end
790         @see isQuotedString, unquoted
791     */
792     String quoted (water_uchar quoteCharacter = '"') const;
793 
794     //==============================================================================
795     /** Creates a string which is a version of a string repeated and joined together.
796 
797         @param stringToRepeat         the string to repeat
798         @param numberOfTimesToRepeat  how many times to repeat it
799     */
800     static String repeatedString (StringRef stringToRepeat,
801                                   int numberOfTimesToRepeat);
802 
803     /** Returns a copy of this string with the specified character repeatedly added to its
804         beginning until the total length is at least the minimum length specified.
805     */
806     String paddedLeft (water_uchar padCharacter, int minimumLength) const;
807 
808     /** Returns a copy of this string with the specified character repeatedly added to its
809         end until the total length is at least the minimum length specified.
810     */
811     String paddedRight (water_uchar padCharacter, int minimumLength) const;
812 
813     /** Creates a string from data in an unknown format.
814 
815         This looks at some binary data and tries to guess whether it's Unicode
816         or 8-bit characters, then returns a string that represents it correctly.
817 
818         Should be able to handle Unicode endianness correctly, by looking at
819         the first two bytes.
820     */
821     static String createStringFromData (const void* data, int size);
822 
823     /** Creates a String from a printf-style parameter list.
824 
825         I don't like this method. I don't use it myself, and I recommend avoiding it and
826         using the operator<< methods or pretty much anything else instead. It's only provided
827         here because of the popular unrest that was stirred-up when I tried to remove it...
828 
829         If you're really determined to use it, at least make sure that you never, ever,
830         pass any String objects to it as parameters.
831     */
832     static String formatted (const String formatString, ... );
833 
834     //==============================================================================
835     // Numeric conversions..
836 
837     /** Creates a string containing this signed 32-bit integer as a decimal number.
838         @see getIntValue, getFloatValue, getDoubleValue, toHexString
839     */
840     explicit String (int decimalInteger);
841 
842     /** Creates a string containing this unsigned 32-bit integer as a decimal number.
843         @see getIntValue, getFloatValue, getDoubleValue, toHexString
844     */
845     explicit String (unsigned int decimalInteger);
846 
847     /** Creates a string containing this signed 16-bit integer as a decimal number.
848         @see getIntValue, getFloatValue, getDoubleValue, toHexString
849     */
850     explicit String (short decimalInteger);
851 
852     /** Creates a string containing this unsigned 16-bit integer as a decimal number.
853         @see getIntValue, getFloatValue, getDoubleValue, toHexString
854     */
855     explicit String (unsigned short decimalInteger);
856 
857     /** Creates a string containing this signed 64-bit integer as a decimal number.
858         @see getLargeIntValue, getFloatValue, getDoubleValue, toHexString
859     */
860     explicit String (int64 largeIntegerValue);
861 
862     /** Creates a string containing this unsigned 64-bit integer as a decimal number.
863         @see getLargeIntValue, getFloatValue, getDoubleValue, toHexString
864     */
865     explicit String (uint64 largeIntegerValue);
866 
867     /** Creates a string containing this signed long integer as a decimal number.
868         @see getIntValue, getFloatValue, getDoubleValue, toHexString
869     */
870     explicit String (long decimalInteger);
871 
872     /** Creates a string containing this unsigned long integer as a decimal number.
873         @see getIntValue, getFloatValue, getDoubleValue, toHexString
874     */
875     explicit String (unsigned long decimalInteger);
876 
877     /** Creates a string representing this floating-point number.
878         @param floatValue               the value to convert to a string
879         @see getDoubleValue, getIntValue
880     */
881     explicit String (float floatValue);
882 
883     /** Creates a string representing this floating-point number.
884         @param doubleValue              the value to convert to a string
885         @see getFloatValue, getIntValue
886     */
887     explicit String (double doubleValue);
888 
889     /** Creates a string representing this floating-point number.
890         @param floatValue               the value to convert to a string
891         @param numberOfDecimalPlaces    if this is > 0, it will format the number using that many
892                                         decimal places, and will not use exponent notation. If 0 or
893                                         less, it will use exponent notation if necessary.
894         @see getDoubleValue, getIntValue
895     */
896     String (float floatValue, int numberOfDecimalPlaces);
897 
898     /** Creates a string representing this floating-point number.
899         @param doubleValue              the value to convert to a string
900         @param numberOfDecimalPlaces    if this is > 0, it will format the number using that many
901                                         decimal places, and will not use exponent notation. If 0 or
902                                         less, it will use exponent notation if necessary.
903         @see getFloatValue, getIntValue
904     */
905     String (double doubleValue, int numberOfDecimalPlaces);
906 
907     /** Reads the value of the string as a decimal number (up to 32 bits in size).
908 
909         @returns the value of the string as a 32 bit signed base-10 integer.
910         @see getTrailingIntValue, getHexValue32, getHexValue64
911     */
912     int getIntValue() const noexcept;
913 
914     /** Reads the value of the string as a decimal number (up to 64 bits in size).
915         @returns the value of the string as a 64 bit signed base-10 integer.
916     */
917     int64 getLargeIntValue() const noexcept;
918 
919     /** Parses a decimal number from the end of the string.
920 
921         This will look for a value at the end of the string.
922         e.g. for "321 xyz654" it will return 654; for "2 3 4" it'll return 4.
923 
924         Negative numbers are not handled, so "xyz-5" returns 5.
925 
926         @see getIntValue
927     */
928     int getTrailingIntValue() const noexcept;
929 
930     /** Parses this string as a floating point number.
931 
932         @returns    the value of the string as a 32-bit floating point value.
933         @see getDoubleValue
934     */
935     float getFloatValue() const noexcept;
936 
937     /** Parses this string as a floating point number.
938 
939         @returns    the value of the string as a 64-bit floating point value.
940         @see getFloatValue
941     */
942     double getDoubleValue() const noexcept;
943 
944     /** Parses the string as a hexadecimal number.
945 
946         Non-hexadecimal characters in the string are ignored.
947 
948         If the string contains too many characters, then the lowest significant
949         digits are returned, e.g. "ffff12345678" would produce 0x12345678.
950 
951         @returns    a 32-bit number which is the value of the string in hex.
952     */
953     int getHexValue32() const noexcept;
954 
955     /** Parses the string as a hexadecimal number.
956 
957         Non-hexadecimal characters in the string are ignored.
958 
959         If the string contains too many characters, then the lowest significant
960         digits are returned, e.g. "ffff1234567812345678" would produce 0x1234567812345678.
961 
962         @returns    a 64-bit number which is the value of the string in hex.
963     */
964     int64 getHexValue64() const noexcept;
965 
966     /** Creates a string representing this 32-bit value in hexadecimal. */
967     static String toHexString (int number);
968 
969     /** Creates a string representing this 64-bit value in hexadecimal. */
970     static String toHexString (int64 number);
971 
972     /** Creates a string representing this 16-bit value in hexadecimal. */
973     static String toHexString (short number);
974 
975     /** Creates a string containing a hex dump of a block of binary data.
976 
977         @param data         the binary data to use as input
978         @param size         how many bytes of data to use
979         @param groupSize    how many bytes are grouped together before inserting a
980                             space into the output. e.g. group size 0 has no spaces,
981                             group size 1 looks like: "be a1 c2 ff", group size 2 looks
982                             like "bea1 c2ff".
983     */
984     static String toHexString (const void* data, int size, int groupSize = 1);
985 
986     //==============================================================================
987     /** Returns the character pointer currently being used to store this string.
988 
989         Because it returns a reference to the string's internal data, the pointer
990         that is returned must not be stored anywhere, as it can be deleted whenever the
991         string changes.
992     */
getCharPointer()993     inline CharPointerType getCharPointer() const noexcept      { return text; }
994 
995     /** Returns a pointer to a UTF-8 version of this string.
996 
997         Because it returns a reference to the string's internal data, the pointer
998         that is returned must not be stored anywhere, as it can be deleted whenever the
999         string changes.
1000 
1001         To find out how many bytes you need to store this string as UTF-8, you can call
1002         CharPointer_UTF8::getBytesRequiredFor (myString.getCharPointer())
1003 
1004         @see toRawUTF8, getCharPointer, toUTF16, toUTF32
1005     */
1006     CharPointer_UTF8 toUTF8() const;
1007 
1008     /** Returns a pointer to a UTF-8 version of this string.
1009 
1010         Because it returns a reference to the string's internal data, the pointer
1011         that is returned must not be stored anywhere, as it can be deleted whenever the
1012         string changes.
1013 
1014         To find out how many bytes you need to store this string as UTF-8, you can call
1015         CharPointer_UTF8::getBytesRequiredFor (myString.getCharPointer())
1016 
1017         @see getCharPointer, toUTF8, toUTF16, toUTF32
1018     */
1019     const char* toRawUTF8() const;
1020 
1021     /** */
1022     std::string toStdString() const;
1023 
1024     //==============================================================================
1025     /** Creates a String from a UTF-8 encoded buffer.
1026         If the size is < 0, it'll keep reading until it hits a zero.
1027     */
1028     static String fromUTF8 (const char* utf8buffer, int bufferSizeBytes = -1);
1029 
1030     /** Returns the number of bytes required to represent this string as UTF8.
1031         The number returned does NOT include the trailing zero.
1032         @see toUTF8, copyToUTF8
1033     */
1034     size_t getNumBytesAsUTF8() const noexcept;
1035 
1036     //==============================================================================
1037     /** Copies the string to a buffer as UTF-8 characters.
1038 
1039         Returns the number of bytes copied to the buffer, including the terminating null
1040         character.
1041 
1042         To find out how many bytes you need to store this string as UTF-8, you can call
1043         CharPointer_UTF8::getBytesRequiredFor (myString.getCharPointer())
1044 
1045         @param destBuffer       the place to copy it to; if this is a null pointer, the method just
1046                                 returns the number of bytes required (including the terminating null character).
1047         @param maxBufferSizeBytes  the size of the destination buffer, in bytes. If the string won't fit, it'll
1048                                 put in as many as it can while still allowing for a terminating null char at the
1049                                 end, and will return the number of bytes that were actually used.
1050         @see CharPointer_UTF8::writeWithDestByteLimit
1051     */
1052     size_t copyToUTF8 (CharPointer_UTF8::CharType* destBuffer, size_t maxBufferSizeBytes) const noexcept;
1053 
1054     //==============================================================================
1055     /** Increases the string's internally allocated storage.
1056 
1057         Although the string's contents won't be affected by this call, it will
1058         increase the amount of memory allocated internally for the string to grow into.
1059 
1060         If you're about to make a large number of calls to methods such
1061         as += or <<, it's more efficient to preallocate enough extra space
1062         beforehand, so that these methods won't have to keep resizing the string
1063         to append the extra characters.
1064 
1065         @param numBytesNeeded   the number of bytes to allocate storage for. If this
1066                                 value is less than the currently allocated size, it will
1067                                 have no effect.
1068     */
1069     void preallocateBytes (size_t numBytesNeeded);
1070 
1071     /** Swaps the contents of this string with another one.
1072         This is a very fast operation, as no allocation or copying needs to be done.
1073     */
1074     void swapWith (String& other) noexcept;
1075 
1076     //==============================================================================
1077    #if 0 //def CARLA_OS_MAC
1078     /** OSX ONLY - Creates a String from an OSX CFString. */
1079     static String fromCFString (CFStringRef cfString);
1080 
1081     /** OSX ONLY - Converts this string to a CFString.
1082         Remember that you must use CFRelease() to free the returned string when you're
1083         finished with it.
1084     */
1085     CFStringRef toCFString() const;
1086    #endif
1087 
1088    #ifdef CARLA_OS_MAC
1089     /** OSX ONLY - Returns a copy of this string in which any decomposed unicode characters have
1090         been converted to their precomposed equivalents. */
1091     String convertToPrecomposedUnicode() const;
1092    #endif
1093 
1094     /** Returns the number of String objects which are currently sharing the same internal
1095         data as this one.
1096     */
1097     int getReferenceCount() const noexcept;
1098 
1099 private:
1100     //==============================================================================
1101     CharPointerType text;
1102 
1103     //==============================================================================
1104     struct PreallocationBytes
1105     {
1106         explicit PreallocationBytes (size_t) noexcept;
1107         size_t numBytes;
1108     };
1109 
1110     explicit String (const PreallocationBytes&); // This constructor preallocates a certain amount of memory
1111     size_t getByteOffsetOfEnd() const noexcept;
1112 };
1113 
1114 //==============================================================================
1115 /** Concatenates two strings. */
1116 String operator+ (const char* string1, const String& string2);
1117 /** Concatenates two strings. */
1118 String operator+ (char string1, const String& string2);
1119 /** Concatenates two strings. */
1120 String operator+ (water_uchar string1, const String& string2);
1121 
1122 /** Concatenates two strings. */
1123 String operator+ (String string1, const String& string2);
1124 /** Concatenates two strings. */
1125 String operator+ (String string1, const char* string2);
1126 /** Concatenates two strings. */
1127 String operator+ (String string1, char characterToAppend);
1128 /** Concatenates two strings. */
1129 String operator+ (String string1, water_uchar characterToAppend);
1130 
1131 //==============================================================================
1132 /** Appends a character at the end of a string. */
1133 String& operator<< (String& string1, char characterToAppend);
1134 /** Appends a character at the end of a string. */
1135 String& operator<< (String& string1, water_uchar characterToAppend);
1136 
1137 /** Appends a string to the end of the first one. */
1138 String& operator<< (String& string1, const char* string2);
1139 /** Appends a string to the end of the first one. */
1140 String& operator<< (String& string1, const String& string2);
1141 /** Appends a string to the end of the first one. */
1142 String& operator<< (String& string1, StringRef string2);
1143 
1144 /** Appends a decimal number at the end of a string. */
1145 String& operator<< (String& string1, short number);
1146 /** Appends a decimal number at the end of a string. */
1147 String& operator<< (String& string1, int number);
1148 /** Appends a decimal number at the end of a string. */
1149 String& operator<< (String& string1, long number);
1150 /** Appends a decimal number at the end of a string. */
1151 String& operator<< (String& string1, int64 number);
1152 /** Appends a decimal number at the end of a string. */
1153 String& operator<< (String& string1, uint64 number);
1154 /** Appends a decimal number at the end of a string. */
1155 String& operator<< (String& string1, float number);
1156 /** Appends a decimal number at the end of a string. */
1157 String& operator<< (String& string1, double number);
1158 
1159 //==============================================================================
1160 /** Case-sensitive comparison of two strings. */
1161 bool operator== (const String& string1, const String& string2) noexcept;
1162 /** Case-sensitive comparison of two strings. */
1163 bool operator== (const String& string1, const char* string2) noexcept;
1164 /** Case-sensitive comparison of two strings. */
1165 bool operator== (const String& string1, const CharPointer_UTF8 string2) noexcept;
1166 
1167 /** Case-sensitive comparison of two strings. */
1168 bool operator!= (const String& string1, const String& string2) noexcept;
1169 /** Case-sensitive comparison of two strings. */
1170 bool operator!= (const String& string1, const char* string2) noexcept;
1171 /** Case-sensitive comparison of two strings. */
1172 bool operator!= (const String& string1, const CharPointer_UTF8 string2) noexcept;
1173 
1174 /** Case-sensitive comparison of two strings. */
1175 bool operator>  (const String& string1, const String& string2) noexcept;
1176 /** Case-sensitive comparison of two strings. */
1177 bool operator<  (const String& string1, const String& string2) noexcept;
1178 /** Case-sensitive comparison of two strings. */
1179 bool operator>= (const String& string1, const String& string2) noexcept;
1180 /** Case-sensitive comparison of two strings. */
1181 bool operator<= (const String& string1, const String& string2) noexcept;
1182 
1183 //==============================================================================
1184 /** This operator allows you to write a water String directly to std output streams.
1185     This is handy for writing strings to std::cout, std::cerr, etc.
1186 */
1187 template <class traits>
1188 std::basic_ostream <char, traits>& operator<< (std::basic_ostream <char, traits>& stream, const String& stringToWrite)
1189 {
1190     return stream << stringToWrite.toRawUTF8();
1191 }
1192 
1193 /** Writes a string to an OutputStream as UTF8. */
1194 OutputStream& operator<< (OutputStream& stream, const String& stringToWrite);
1195 
1196 /** Writes a string to an OutputStream as UTF8. */
1197 OutputStream& operator<< (OutputStream& stream, StringRef stringToWrite);
1198 
1199 //==============================================================================
1200 struct StartEndString {
StartEndStringStartEndString1201     StartEndString (String::CharPointerType s, String::CharPointerType e) noexcept : start (s), end (e) {}
StringStartEndString1202     operator String() const   { return String (start, end); }
1203     String::CharPointerType start, end;
1204 };
1205 
1206 }
1207 
1208 #include "StringRef.h"
1209 
1210 #endif // WATER_STRING_H_INCLUDED
1211