1 /*
2   ==============================================================================
3 
4    This file is part of the JUCE library.
5    Copyright (c) 2020 - Raw Material Software Limited
6 
7    JUCE is an open source library subject to commercial or open-source
8    licensing.
9 
10    The code included in this file is provided under the terms of the ISC license
11    http://www.isc.org/downloads/software-support-policy/isc-license. Permission
12    To use, copy, modify, and/or distribute this software for any purpose with or
13    without fee is hereby granted provided that the above copyright notice and
14    this permission notice appear in all copies.
15 
16    JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
17    EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
18    DISCLAIMED.
19 
20   ==============================================================================
21 */
22 
23 namespace juce
24 {
25 
26 //==============================================================================
27 /**
28     Wraps a pointer to a null-terminated UTF-16 character string, and provides
29     various methods to operate on the data.
30     @see CharPointer_UTF8, CharPointer_UTF32
31 
32     @tags{Core}
33 */
34 class CharPointer_UTF16  final
35 {
36 public:
37    #if JUCE_NATIVE_WCHAR_IS_UTF16
38     using CharType = wchar_t;
39    #else
40     using CharType = int16;
41    #endif
42 
CharPointer_UTF16(const CharType * rawPointer)43     inline explicit CharPointer_UTF16 (const CharType* rawPointer) noexcept
44         : data (const_cast<CharType*> (rawPointer))
45     {
46     }
47 
48     inline CharPointer_UTF16 (const CharPointer_UTF16& other) = default;
49 
50     inline CharPointer_UTF16 operator= (CharPointer_UTF16 other) noexcept
51     {
52         data = other.data;
53         return *this;
54     }
55 
56     inline CharPointer_UTF16 operator= (const CharType* text) noexcept
57     {
58         data = const_cast<CharType*> (text);
59         return *this;
60     }
61 
62     /** This is a pointer comparison, it doesn't compare the actual text. */
63     inline bool operator== (CharPointer_UTF16 other) const noexcept     { return data == other.data; }
64     inline bool operator!= (CharPointer_UTF16 other) const noexcept     { return data != other.data; }
65     inline bool operator<= (CharPointer_UTF16 other) const noexcept     { return data <= other.data; }
66     inline bool operator<  (CharPointer_UTF16 other) const noexcept     { return data <  other.data; }
67     inline bool operator>= (CharPointer_UTF16 other) const noexcept     { return data >= other.data; }
68     inline bool operator>  (CharPointer_UTF16 other) const noexcept     { return data >  other.data; }
69 
70     /** Returns the address that this pointer is pointing to. */
getAddress()71     inline CharType* getAddress() const noexcept        { return data; }
72 
73     /** Returns the address that this pointer is pointing to. */
74     inline operator const CharType*() const noexcept    { return data; }
75 
76     /** Returns true if this pointer is pointing to a null character. */
isEmpty()77     inline bool isEmpty() const noexcept                { return *data == 0; }
78 
79     /** Returns true if this pointer is not pointing to a null character. */
isNotEmpty()80     inline bool isNotEmpty() const noexcept             { return *data != 0; }
81 
82     /** Returns the unicode character that this pointer is pointing to. */
83     juce_wchar operator*() const noexcept
84     {
85         auto n = (uint32) (uint16) *data;
86 
87         if (n >= 0xd800 && n <= 0xdfff && ((uint32) (uint16) data[1]) >= 0xdc00)
88             n = 0x10000 + (((n - 0xd800) << 10) | (((uint32) (uint16) data[1]) - 0xdc00));
89 
90         return (juce_wchar) n;
91     }
92 
93     /** Moves this pointer along to the next character in the string. */
94     CharPointer_UTF16 operator++() noexcept
95     {
96         auto n = (uint32) (uint16) *data++;
97 
98         if (n >= 0xd800 && n <= 0xdfff && ((uint32) (uint16) *data) >= 0xdc00)
99             ++data;
100 
101         return *this;
102     }
103 
104     /** Moves this pointer back to the previous character in the string. */
105     CharPointer_UTF16 operator--() noexcept
106     {
107         auto n = (uint32) (uint16) (*--data);
108 
109         if (n >= 0xdc00 && n <= 0xdfff)
110             --data;
111 
112         return *this;
113     }
114 
115     /** Returns the character that this pointer is currently pointing to, and then
116         advances the pointer to point to the next character. */
getAndAdvance()117     juce_wchar getAndAdvance() noexcept
118     {
119         auto n = (uint32) (uint16) *data++;
120 
121         if (n >= 0xd800 && n <= 0xdfff && ((uint32) (uint16) *data) >= 0xdc00)
122             n = 0x10000 + ((((n - 0xd800) << 10) | (((uint32) (uint16) *data++) - 0xdc00)));
123 
124         return (juce_wchar) n;
125     }
126 
127     /** Moves this pointer along to the next character in the string. */
128     CharPointer_UTF16 operator++ (int) noexcept
129     {
130         auto temp (*this);
131         ++*this;
132         return temp;
133     }
134 
135     /** Moves this pointer forwards by the specified number of characters. */
136     void operator+= (int numToSkip) noexcept
137     {
138         if (numToSkip < 0)
139         {
140             while (++numToSkip <= 0)
141                 --*this;
142         }
143         else
144         {
145             while (--numToSkip >= 0)
146                 ++*this;
147         }
148     }
149 
150     /** Moves this pointer backwards by the specified number of characters. */
151     void operator-= (int numToSkip) noexcept
152     {
153         operator+= (-numToSkip);
154     }
155 
156     /** Returns the character at a given character index from the start of the string. */
157     juce_wchar operator[] (int characterIndex) const noexcept
158     {
159         auto p (*this);
160         p += characterIndex;
161         return *p;
162     }
163 
164     /** Returns a pointer which is moved forwards from this one by the specified number of characters. */
165     CharPointer_UTF16 operator+ (int numToSkip) const noexcept
166     {
167         auto p (*this);
168         p += numToSkip;
169         return p;
170     }
171 
172     /** Returns a pointer which is moved backwards from this one by the specified number of characters. */
173     CharPointer_UTF16 operator- (int numToSkip) const noexcept
174     {
175         auto p (*this);
176         p += -numToSkip;
177         return p;
178     }
179 
180     /** Writes a unicode character to this string, and advances this pointer to point to the next position. */
write(juce_wchar charToWrite)181     void write (juce_wchar charToWrite) noexcept
182     {
183         if (charToWrite >= 0x10000)
184         {
185             charToWrite -= 0x10000;
186             *data++ = (CharType) (0xd800 + (charToWrite >> 10));
187             *data++ = (CharType) (0xdc00 + (charToWrite & 0x3ff));
188         }
189         else
190         {
191             *data++ = (CharType) charToWrite;
192         }
193     }
194 
195     /** Writes a null character to this string (leaving the pointer's position unchanged). */
writeNull()196     inline void writeNull() const noexcept
197     {
198         *data = 0;
199     }
200 
201     /** Returns the number of characters in this string. */
length()202     size_t length() const noexcept
203     {
204         auto* d = data;
205         size_t count = 0;
206 
207         for (;;)
208         {
209             auto n = (uint32) (uint16) *d++;
210 
211             if (n >= 0xd800 && n <= 0xdfff)
212             {
213                 if (*d++ == 0)
214                     break;
215             }
216             else if (n == 0)
217                 break;
218 
219             ++count;
220         }
221 
222         return count;
223     }
224 
225     /** Returns the number of characters in this string, or the given value, whichever is lower. */
lengthUpTo(size_t maxCharsToCount)226     size_t lengthUpTo (size_t maxCharsToCount) const noexcept
227     {
228         return CharacterFunctions::lengthUpTo (*this, maxCharsToCount);
229     }
230 
231     /** Returns the number of characters in this string, or up to the given end pointer, whichever is lower. */
lengthUpTo(CharPointer_UTF16 end)232     size_t lengthUpTo (CharPointer_UTF16 end) const noexcept
233     {
234         return CharacterFunctions::lengthUpTo (*this, end);
235     }
236 
237     /** Returns the number of bytes that are used to represent this string.
238         This includes the terminating null character.
239     */
sizeInBytes()240     size_t sizeInBytes() const noexcept
241     {
242         return sizeof (CharType) * (findNullIndex (data) + 1);
243     }
244 
245     /** Returns the number of bytes that would be needed to represent the given
246         unicode character in this encoding format.
247     */
getBytesRequiredFor(juce_wchar charToWrite)248     static size_t getBytesRequiredFor (juce_wchar charToWrite) noexcept
249     {
250         return (charToWrite >= 0x10000) ? (sizeof (CharType) * 2) : sizeof (CharType);
251     }
252 
253     /** Returns the number of bytes that would be needed to represent the given
254         string in this encoding format.
255         The value returned does NOT include the terminating null character.
256     */
257     template <class CharPointer>
getBytesRequiredFor(CharPointer text)258     static size_t getBytesRequiredFor (CharPointer text) noexcept
259     {
260         size_t count = 0;
261         juce_wchar n;
262 
263         while ((n = text.getAndAdvance()) != 0)
264             count += getBytesRequiredFor (n);
265 
266         return count;
267     }
268 
269     /** Returns a pointer to the null character that terminates this string. */
findTerminatingNull()270     CharPointer_UTF16 findTerminatingNull() const noexcept
271     {
272         auto* t = data;
273 
274         while (*t != 0)
275             ++t;
276 
277         return CharPointer_UTF16 (t);
278     }
279 
280     /** Copies a source string to this pointer, advancing this pointer as it goes. */
281     template <typename CharPointer>
writeAll(CharPointer src)282     void writeAll (CharPointer src) noexcept
283     {
284         CharacterFunctions::copyAll (*this, src);
285     }
286 
287     /** Copies a source string to this pointer, advancing this pointer as it goes. */
writeAll(CharPointer_UTF16 src)288     void writeAll (CharPointer_UTF16 src) noexcept
289     {
290         auto* s = src.data;
291 
292         while ((*data = *s) != 0)
293         {
294             ++data;
295             ++s;
296         }
297     }
298 
299     /** Copies a source string to this pointer, advancing this pointer as it goes.
300         The maxDestBytes parameter specifies the maximum number of bytes that can be written
301         to the destination buffer before stopping.
302     */
303     template <typename CharPointer>
writeWithDestByteLimit(CharPointer src,size_t maxDestBytes)304     size_t writeWithDestByteLimit (CharPointer src, size_t maxDestBytes) noexcept
305     {
306         return CharacterFunctions::copyWithDestByteLimit (*this, src, maxDestBytes);
307     }
308 
309     /** Copies a source string to this pointer, advancing this pointer as it goes.
310         The maxChars parameter specifies the maximum number of characters that can be
311         written to the destination buffer before stopping (including the terminating null).
312     */
313     template <typename CharPointer>
writeWithCharLimit(CharPointer src,int maxChars)314     void writeWithCharLimit (CharPointer src, int maxChars) noexcept
315     {
316         CharacterFunctions::copyWithCharLimit (*this, src, maxChars);
317     }
318 
319     /** Compares this string with another one. */
320     template <typename CharPointer>
compare(CharPointer other)321     int compare (CharPointer other) const noexcept
322     {
323         return CharacterFunctions::compare (*this, other);
324     }
325 
326     /** Compares this string with another one, up to a specified number of characters. */
327     template <typename CharPointer>
compareUpTo(CharPointer other,int maxChars)328     int compareUpTo (CharPointer other, int maxChars) const noexcept
329     {
330         return CharacterFunctions::compareUpTo (*this, other, maxChars);
331     }
332 
333     /** Compares this string with another one. */
334     template <typename CharPointer>
compareIgnoreCase(CharPointer other)335     int compareIgnoreCase (CharPointer other) const noexcept
336     {
337         return CharacterFunctions::compareIgnoreCase (*this, other);
338     }
339 
340     /** Compares this string with another one, up to a specified number of characters. */
341     template <typename CharPointer>
compareIgnoreCaseUpTo(CharPointer other,int maxChars)342     int compareIgnoreCaseUpTo (CharPointer other, int maxChars) const noexcept
343     {
344         return CharacterFunctions::compareIgnoreCaseUpTo (*this, other, maxChars);
345     }
346 
347    #if JUCE_MSVC && ! DOXYGEN
compareIgnoreCase(CharPointer_UTF16 other)348     int compareIgnoreCase (CharPointer_UTF16 other) const noexcept
349     {
350         return _wcsicmp (data, other.data);
351     }
352 
compareIgnoreCaseUpTo(CharPointer_UTF16 other,int maxChars)353     int compareIgnoreCaseUpTo (CharPointer_UTF16 other, int maxChars) const noexcept
354     {
355         return _wcsnicmp (data, other.data, (size_t) maxChars);
356     }
357 
indexOf(CharPointer_UTF16 stringToFind)358     int indexOf (CharPointer_UTF16 stringToFind) const noexcept
359     {
360         const CharType* const t = wcsstr (data, stringToFind.getAddress());
361         return t == nullptr ? -1 : (int) (t - data);
362     }
363    #endif
364 
365     /** Returns the character index of a substring, or -1 if it isn't found. */
366     template <typename CharPointer>
indexOf(CharPointer stringToFind)367     int indexOf (CharPointer stringToFind) const noexcept
368     {
369         return CharacterFunctions::indexOf (*this, stringToFind);
370     }
371 
372     /** Returns the character index of a unicode character, or -1 if it isn't found. */
indexOf(juce_wchar charToFind)373     int indexOf (juce_wchar charToFind) const noexcept
374     {
375         return CharacterFunctions::indexOfChar (*this, charToFind);
376     }
377 
378     /** Returns the character index of a unicode character, or -1 if it isn't found. */
indexOf(juce_wchar charToFind,bool ignoreCase)379     int indexOf (juce_wchar charToFind, bool ignoreCase) const noexcept
380     {
381         return ignoreCase ? CharacterFunctions::indexOfCharIgnoreCase (*this, charToFind)
382                           : CharacterFunctions::indexOfChar (*this, charToFind);
383     }
384 
385     /** Returns true if the first character of this string is whitespace. */
isWhitespace()386     bool isWhitespace() const noexcept          { return CharacterFunctions::isWhitespace (operator*()) != 0; }
387     /** Returns true if the first character of this string is a digit. */
isDigit()388     bool isDigit() const noexcept               { return CharacterFunctions::isDigit (operator*()) != 0; }
389     /** Returns true if the first character of this string is a letter. */
isLetter()390     bool isLetter() const noexcept              { return CharacterFunctions::isLetter (operator*()) != 0; }
391     /** Returns true if the first character of this string is a letter or digit. */
isLetterOrDigit()392     bool isLetterOrDigit() const noexcept       { return CharacterFunctions::isLetterOrDigit (operator*()) != 0; }
393     /** Returns true if the first character of this string is upper-case. */
isUpperCase()394     bool isUpperCase() const noexcept           { return CharacterFunctions::isUpperCase (operator*()) != 0; }
395     /** Returns true if the first character of this string is lower-case. */
isLowerCase()396     bool isLowerCase() const noexcept           { return CharacterFunctions::isLowerCase (operator*()) != 0; }
397 
398     /** Returns an upper-case version of the first character of this string. */
toUpperCase()399     juce_wchar toUpperCase() const noexcept     { return CharacterFunctions::toUpperCase (operator*()); }
400     /** Returns a lower-case version of the first character of this string. */
toLowerCase()401     juce_wchar toLowerCase() const noexcept     { return CharacterFunctions::toLowerCase (operator*()); }
402 
403     /** Parses this string as a 32-bit integer. */
getIntValue32()404     int getIntValue32() const noexcept
405     {
406        #if JUCE_MSVC
407         return _wtoi (data);
408        #else
409         return CharacterFunctions::getIntValue<int, CharPointer_UTF16> (*this);
410        #endif
411     }
412 
413     /** Parses this string as a 64-bit integer. */
getIntValue64()414     int64 getIntValue64() const noexcept
415     {
416        #if JUCE_MSVC
417         return _wtoi64 (data);
418        #else
419         return CharacterFunctions::getIntValue<int64, CharPointer_UTF16> (*this);
420        #endif
421     }
422 
423     /** Parses this string as a floating point double. */
getDoubleValue()424     double getDoubleValue() const noexcept                      { return CharacterFunctions::getDoubleValue (*this); }
425 
426     /** Returns the first non-whitespace character in the string. */
findEndOfWhitespace()427     CharPointer_UTF16 findEndOfWhitespace() const noexcept      { return CharacterFunctions::findEndOfWhitespace (*this); }
428 
429     /** Returns true if the given unicode character can be represented in this encoding. */
canRepresent(juce_wchar character)430     static bool canRepresent (juce_wchar character) noexcept
431     {
432         auto n = (uint32) character;
433         return n < 0x10ffff && (n < 0xd800 || n > 0xdfff);
434     }
435 
436     /** Returns true if this data contains a valid string in this encoding. */
isValidString(const CharType * dataToTest,int maxBytesToRead)437     static bool isValidString (const CharType* dataToTest, int maxBytesToRead)
438     {
439         maxBytesToRead /= (int) sizeof (CharType);
440 
441         while (--maxBytesToRead >= 0 && *dataToTest != 0)
442         {
443             auto n = (uint32) (uint16) *dataToTest++;
444 
445             if (n >= 0xd800)
446             {
447                 if (n > 0x10ffff)
448                     return false;
449 
450                 if (n <= 0xdfff)
451                 {
452                     if (n > 0xdc00)
453                         return false;
454 
455                     auto nextChar = (uint32) (uint16) *dataToTest++;
456 
457                     if (nextChar < 0xdc00 || nextChar > 0xdfff)
458                         return false;
459                 }
460             }
461         }
462 
463         return true;
464     }
465 
466     /** Atomically swaps this pointer for a new value, returning the previous value. */
atomicSwap(CharPointer_UTF16 newValue)467     CharPointer_UTF16 atomicSwap (CharPointer_UTF16 newValue)
468     {
469         return CharPointer_UTF16 (reinterpret_cast<Atomic<CharType*>&> (data).exchange (newValue.data));
470     }
471 
472     /** These values are the byte-order-mark (BOM) values for a UTF-16 stream. */
473     enum
474     {
475         byteOrderMarkBE1 = 0xfe,
476         byteOrderMarkBE2 = 0xff,
477         byteOrderMarkLE1 = 0xff,
478         byteOrderMarkLE2 = 0xfe
479     };
480 
481     /** Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (big endian).
482         The pointer must not be null, and must contain at least two valid bytes.
483     */
isByteOrderMarkBigEndian(const void * possibleByteOrder)484     static bool isByteOrderMarkBigEndian (const void* possibleByteOrder) noexcept
485     {
486         jassert (possibleByteOrder != nullptr);
487         auto c = static_cast<const uint8*> (possibleByteOrder);
488 
489         return c[0] == (uint8) byteOrderMarkBE1
490             && c[1] == (uint8) byteOrderMarkBE2;
491     }
492 
493     /** Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (little endian).
494         The pointer must not be null, and must contain at least two valid bytes.
495     */
isByteOrderMarkLittleEndian(const void * possibleByteOrder)496     static bool isByteOrderMarkLittleEndian (const void* possibleByteOrder) noexcept
497     {
498         jassert (possibleByteOrder != nullptr);
499         auto c = static_cast<const uint8*> (possibleByteOrder);
500 
501         return c[0] == (uint8) byteOrderMarkLE1
502             && c[1] == (uint8) byteOrderMarkLE2;
503     }
504 
505 private:
506     CharType* data;
507 
findNullIndex(const CharType * t)508     static unsigned int findNullIndex (const CharType* t) noexcept
509     {
510         unsigned int n = 0;
511 
512         while (t[n] != 0)
513             ++n;
514 
515         return n;
516     }
517 };
518 
519 } // namespace juce
520