1 /* 2 ============================================================================== 3 4 This file is part of the JUCE library. 5 Copyright (c) 2020 - Raw Material Software Limited 6 7 JUCE is an open source library subject to commercial or open-source 8 licensing. 9 10 The code included in this file is provided under the terms of the ISC license 11 http://www.isc.org/downloads/software-support-policy/isc-license. Permission 12 To use, copy, modify, and/or distribute this software for any purpose with or 13 without fee is hereby granted provided that the above copyright notice and 14 this permission notice appear in all copies. 15 16 JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER 17 EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE 18 DISCLAIMED. 19 20 ============================================================================== 21 */ 22 23 namespace juce 24 { 25 26 //============================================================================== 27 /** 28 Wraps a pointer to a null-terminated UTF-16 character string, and provides 29 various methods to operate on the data. 30 @see CharPointer_UTF8, CharPointer_UTF32 31 32 @tags{Core} 33 */ 34 class CharPointer_UTF16 final 35 { 36 public: 37 #if JUCE_NATIVE_WCHAR_IS_UTF16 38 using CharType = wchar_t; 39 #else 40 using CharType = int16; 41 #endif 42 CharPointer_UTF16(const CharType * rawPointer)43 inline explicit CharPointer_UTF16 (const CharType* rawPointer) noexcept 44 : data (const_cast<CharType*> (rawPointer)) 45 { 46 } 47 48 inline CharPointer_UTF16 (const CharPointer_UTF16& other) = default; 49 50 inline CharPointer_UTF16 operator= (CharPointer_UTF16 other) noexcept 51 { 52 data = other.data; 53 return *this; 54 } 55 56 inline CharPointer_UTF16 operator= (const CharType* text) noexcept 57 { 58 data = const_cast<CharType*> (text); 59 return *this; 60 } 61 62 /** This is a pointer comparison, it doesn't compare the actual text. */ 63 inline bool operator== (CharPointer_UTF16 other) const noexcept { return data == other.data; } 64 inline bool operator!= (CharPointer_UTF16 other) const noexcept { return data != other.data; } 65 inline bool operator<= (CharPointer_UTF16 other) const noexcept { return data <= other.data; } 66 inline bool operator< (CharPointer_UTF16 other) const noexcept { return data < other.data; } 67 inline bool operator>= (CharPointer_UTF16 other) const noexcept { return data >= other.data; } 68 inline bool operator> (CharPointer_UTF16 other) const noexcept { return data > other.data; } 69 70 /** Returns the address that this pointer is pointing to. */ getAddress()71 inline CharType* getAddress() const noexcept { return data; } 72 73 /** Returns the address that this pointer is pointing to. */ 74 inline operator const CharType*() const noexcept { return data; } 75 76 /** Returns true if this pointer is pointing to a null character. */ isEmpty()77 inline bool isEmpty() const noexcept { return *data == 0; } 78 79 /** Returns true if this pointer is not pointing to a null character. */ isNotEmpty()80 inline bool isNotEmpty() const noexcept { return *data != 0; } 81 82 /** Returns the unicode character that this pointer is pointing to. */ 83 juce_wchar operator*() const noexcept 84 { 85 auto n = (uint32) (uint16) *data; 86 87 if (n >= 0xd800 && n <= 0xdfff && ((uint32) (uint16) data[1]) >= 0xdc00) 88 n = 0x10000 + (((n - 0xd800) << 10) | (((uint32) (uint16) data[1]) - 0xdc00)); 89 90 return (juce_wchar) n; 91 } 92 93 /** Moves this pointer along to the next character in the string. */ 94 CharPointer_UTF16 operator++() noexcept 95 { 96 auto n = (uint32) (uint16) *data++; 97 98 if (n >= 0xd800 && n <= 0xdfff && ((uint32) (uint16) *data) >= 0xdc00) 99 ++data; 100 101 return *this; 102 } 103 104 /** Moves this pointer back to the previous character in the string. */ 105 CharPointer_UTF16 operator--() noexcept 106 { 107 auto n = (uint32) (uint16) (*--data); 108 109 if (n >= 0xdc00 && n <= 0xdfff) 110 --data; 111 112 return *this; 113 } 114 115 /** Returns the character that this pointer is currently pointing to, and then 116 advances the pointer to point to the next character. */ getAndAdvance()117 juce_wchar getAndAdvance() noexcept 118 { 119 auto n = (uint32) (uint16) *data++; 120 121 if (n >= 0xd800 && n <= 0xdfff && ((uint32) (uint16) *data) >= 0xdc00) 122 n = 0x10000 + ((((n - 0xd800) << 10) | (((uint32) (uint16) *data++) - 0xdc00))); 123 124 return (juce_wchar) n; 125 } 126 127 /** Moves this pointer along to the next character in the string. */ 128 CharPointer_UTF16 operator++ (int) noexcept 129 { 130 auto temp (*this); 131 ++*this; 132 return temp; 133 } 134 135 /** Moves this pointer forwards by the specified number of characters. */ 136 void operator+= (int numToSkip) noexcept 137 { 138 if (numToSkip < 0) 139 { 140 while (++numToSkip <= 0) 141 --*this; 142 } 143 else 144 { 145 while (--numToSkip >= 0) 146 ++*this; 147 } 148 } 149 150 /** Moves this pointer backwards by the specified number of characters. */ 151 void operator-= (int numToSkip) noexcept 152 { 153 operator+= (-numToSkip); 154 } 155 156 /** Returns the character at a given character index from the start of the string. */ 157 juce_wchar operator[] (int characterIndex) const noexcept 158 { 159 auto p (*this); 160 p += characterIndex; 161 return *p; 162 } 163 164 /** Returns a pointer which is moved forwards from this one by the specified number of characters. */ 165 CharPointer_UTF16 operator+ (int numToSkip) const noexcept 166 { 167 auto p (*this); 168 p += numToSkip; 169 return p; 170 } 171 172 /** Returns a pointer which is moved backwards from this one by the specified number of characters. */ 173 CharPointer_UTF16 operator- (int numToSkip) const noexcept 174 { 175 auto p (*this); 176 p += -numToSkip; 177 return p; 178 } 179 180 /** Writes a unicode character to this string, and advances this pointer to point to the next position. */ write(juce_wchar charToWrite)181 void write (juce_wchar charToWrite) noexcept 182 { 183 if (charToWrite >= 0x10000) 184 { 185 charToWrite -= 0x10000; 186 *data++ = (CharType) (0xd800 + (charToWrite >> 10)); 187 *data++ = (CharType) (0xdc00 + (charToWrite & 0x3ff)); 188 } 189 else 190 { 191 *data++ = (CharType) charToWrite; 192 } 193 } 194 195 /** Writes a null character to this string (leaving the pointer's position unchanged). */ writeNull()196 inline void writeNull() const noexcept 197 { 198 *data = 0; 199 } 200 201 /** Returns the number of characters in this string. */ length()202 size_t length() const noexcept 203 { 204 auto* d = data; 205 size_t count = 0; 206 207 for (;;) 208 { 209 auto n = (uint32) (uint16) *d++; 210 211 if (n >= 0xd800 && n <= 0xdfff) 212 { 213 if (*d++ == 0) 214 break; 215 } 216 else if (n == 0) 217 break; 218 219 ++count; 220 } 221 222 return count; 223 } 224 225 /** Returns the number of characters in this string, or the given value, whichever is lower. */ lengthUpTo(size_t maxCharsToCount)226 size_t lengthUpTo (size_t maxCharsToCount) const noexcept 227 { 228 return CharacterFunctions::lengthUpTo (*this, maxCharsToCount); 229 } 230 231 /** Returns the number of characters in this string, or up to the given end pointer, whichever is lower. */ lengthUpTo(CharPointer_UTF16 end)232 size_t lengthUpTo (CharPointer_UTF16 end) const noexcept 233 { 234 return CharacterFunctions::lengthUpTo (*this, end); 235 } 236 237 /** Returns the number of bytes that are used to represent this string. 238 This includes the terminating null character. 239 */ sizeInBytes()240 size_t sizeInBytes() const noexcept 241 { 242 return sizeof (CharType) * (findNullIndex (data) + 1); 243 } 244 245 /** Returns the number of bytes that would be needed to represent the given 246 unicode character in this encoding format. 247 */ getBytesRequiredFor(juce_wchar charToWrite)248 static size_t getBytesRequiredFor (juce_wchar charToWrite) noexcept 249 { 250 return (charToWrite >= 0x10000) ? (sizeof (CharType) * 2) : sizeof (CharType); 251 } 252 253 /** Returns the number of bytes that would be needed to represent the given 254 string in this encoding format. 255 The value returned does NOT include the terminating null character. 256 */ 257 template <class CharPointer> getBytesRequiredFor(CharPointer text)258 static size_t getBytesRequiredFor (CharPointer text) noexcept 259 { 260 size_t count = 0; 261 juce_wchar n; 262 263 while ((n = text.getAndAdvance()) != 0) 264 count += getBytesRequiredFor (n); 265 266 return count; 267 } 268 269 /** Returns a pointer to the null character that terminates this string. */ findTerminatingNull()270 CharPointer_UTF16 findTerminatingNull() const noexcept 271 { 272 auto* t = data; 273 274 while (*t != 0) 275 ++t; 276 277 return CharPointer_UTF16 (t); 278 } 279 280 /** Copies a source string to this pointer, advancing this pointer as it goes. */ 281 template <typename CharPointer> writeAll(CharPointer src)282 void writeAll (CharPointer src) noexcept 283 { 284 CharacterFunctions::copyAll (*this, src); 285 } 286 287 /** Copies a source string to this pointer, advancing this pointer as it goes. */ writeAll(CharPointer_UTF16 src)288 void writeAll (CharPointer_UTF16 src) noexcept 289 { 290 auto* s = src.data; 291 292 while ((*data = *s) != 0) 293 { 294 ++data; 295 ++s; 296 } 297 } 298 299 /** Copies a source string to this pointer, advancing this pointer as it goes. 300 The maxDestBytes parameter specifies the maximum number of bytes that can be written 301 to the destination buffer before stopping. 302 */ 303 template <typename CharPointer> writeWithDestByteLimit(CharPointer src,size_t maxDestBytes)304 size_t writeWithDestByteLimit (CharPointer src, size_t maxDestBytes) noexcept 305 { 306 return CharacterFunctions::copyWithDestByteLimit (*this, src, maxDestBytes); 307 } 308 309 /** Copies a source string to this pointer, advancing this pointer as it goes. 310 The maxChars parameter specifies the maximum number of characters that can be 311 written to the destination buffer before stopping (including the terminating null). 312 */ 313 template <typename CharPointer> writeWithCharLimit(CharPointer src,int maxChars)314 void writeWithCharLimit (CharPointer src, int maxChars) noexcept 315 { 316 CharacterFunctions::copyWithCharLimit (*this, src, maxChars); 317 } 318 319 /** Compares this string with another one. */ 320 template <typename CharPointer> compare(CharPointer other)321 int compare (CharPointer other) const noexcept 322 { 323 return CharacterFunctions::compare (*this, other); 324 } 325 326 /** Compares this string with another one, up to a specified number of characters. */ 327 template <typename CharPointer> compareUpTo(CharPointer other,int maxChars)328 int compareUpTo (CharPointer other, int maxChars) const noexcept 329 { 330 return CharacterFunctions::compareUpTo (*this, other, maxChars); 331 } 332 333 /** Compares this string with another one. */ 334 template <typename CharPointer> compareIgnoreCase(CharPointer other)335 int compareIgnoreCase (CharPointer other) const noexcept 336 { 337 return CharacterFunctions::compareIgnoreCase (*this, other); 338 } 339 340 /** Compares this string with another one, up to a specified number of characters. */ 341 template <typename CharPointer> compareIgnoreCaseUpTo(CharPointer other,int maxChars)342 int compareIgnoreCaseUpTo (CharPointer other, int maxChars) const noexcept 343 { 344 return CharacterFunctions::compareIgnoreCaseUpTo (*this, other, maxChars); 345 } 346 347 #if JUCE_MSVC && ! DOXYGEN compareIgnoreCase(CharPointer_UTF16 other)348 int compareIgnoreCase (CharPointer_UTF16 other) const noexcept 349 { 350 return _wcsicmp (data, other.data); 351 } 352 compareIgnoreCaseUpTo(CharPointer_UTF16 other,int maxChars)353 int compareIgnoreCaseUpTo (CharPointer_UTF16 other, int maxChars) const noexcept 354 { 355 return _wcsnicmp (data, other.data, (size_t) maxChars); 356 } 357 indexOf(CharPointer_UTF16 stringToFind)358 int indexOf (CharPointer_UTF16 stringToFind) const noexcept 359 { 360 const CharType* const t = wcsstr (data, stringToFind.getAddress()); 361 return t == nullptr ? -1 : (int) (t - data); 362 } 363 #endif 364 365 /** Returns the character index of a substring, or -1 if it isn't found. */ 366 template <typename CharPointer> indexOf(CharPointer stringToFind)367 int indexOf (CharPointer stringToFind) const noexcept 368 { 369 return CharacterFunctions::indexOf (*this, stringToFind); 370 } 371 372 /** Returns the character index of a unicode character, or -1 if it isn't found. */ indexOf(juce_wchar charToFind)373 int indexOf (juce_wchar charToFind) const noexcept 374 { 375 return CharacterFunctions::indexOfChar (*this, charToFind); 376 } 377 378 /** Returns the character index of a unicode character, or -1 if it isn't found. */ indexOf(juce_wchar charToFind,bool ignoreCase)379 int indexOf (juce_wchar charToFind, bool ignoreCase) const noexcept 380 { 381 return ignoreCase ? CharacterFunctions::indexOfCharIgnoreCase (*this, charToFind) 382 : CharacterFunctions::indexOfChar (*this, charToFind); 383 } 384 385 /** Returns true if the first character of this string is whitespace. */ isWhitespace()386 bool isWhitespace() const noexcept { return CharacterFunctions::isWhitespace (operator*()) != 0; } 387 /** Returns true if the first character of this string is a digit. */ isDigit()388 bool isDigit() const noexcept { return CharacterFunctions::isDigit (operator*()) != 0; } 389 /** Returns true if the first character of this string is a letter. */ isLetter()390 bool isLetter() const noexcept { return CharacterFunctions::isLetter (operator*()) != 0; } 391 /** Returns true if the first character of this string is a letter or digit. */ isLetterOrDigit()392 bool isLetterOrDigit() const noexcept { return CharacterFunctions::isLetterOrDigit (operator*()) != 0; } 393 /** Returns true if the first character of this string is upper-case. */ isUpperCase()394 bool isUpperCase() const noexcept { return CharacterFunctions::isUpperCase (operator*()) != 0; } 395 /** Returns true if the first character of this string is lower-case. */ isLowerCase()396 bool isLowerCase() const noexcept { return CharacterFunctions::isLowerCase (operator*()) != 0; } 397 398 /** Returns an upper-case version of the first character of this string. */ toUpperCase()399 juce_wchar toUpperCase() const noexcept { return CharacterFunctions::toUpperCase (operator*()); } 400 /** Returns a lower-case version of the first character of this string. */ toLowerCase()401 juce_wchar toLowerCase() const noexcept { return CharacterFunctions::toLowerCase (operator*()); } 402 403 /** Parses this string as a 32-bit integer. */ getIntValue32()404 int getIntValue32() const noexcept 405 { 406 #if JUCE_MSVC 407 return _wtoi (data); 408 #else 409 return CharacterFunctions::getIntValue<int, CharPointer_UTF16> (*this); 410 #endif 411 } 412 413 /** Parses this string as a 64-bit integer. */ getIntValue64()414 int64 getIntValue64() const noexcept 415 { 416 #if JUCE_MSVC 417 return _wtoi64 (data); 418 #else 419 return CharacterFunctions::getIntValue<int64, CharPointer_UTF16> (*this); 420 #endif 421 } 422 423 /** Parses this string as a floating point double. */ getDoubleValue()424 double getDoubleValue() const noexcept { return CharacterFunctions::getDoubleValue (*this); } 425 426 /** Returns the first non-whitespace character in the string. */ findEndOfWhitespace()427 CharPointer_UTF16 findEndOfWhitespace() const noexcept { return CharacterFunctions::findEndOfWhitespace (*this); } 428 429 /** Returns true if the given unicode character can be represented in this encoding. */ canRepresent(juce_wchar character)430 static bool canRepresent (juce_wchar character) noexcept 431 { 432 auto n = (uint32) character; 433 return n < 0x10ffff && (n < 0xd800 || n > 0xdfff); 434 } 435 436 /** Returns true if this data contains a valid string in this encoding. */ isValidString(const CharType * dataToTest,int maxBytesToRead)437 static bool isValidString (const CharType* dataToTest, int maxBytesToRead) 438 { 439 maxBytesToRead /= (int) sizeof (CharType); 440 441 while (--maxBytesToRead >= 0 && *dataToTest != 0) 442 { 443 auto n = (uint32) (uint16) *dataToTest++; 444 445 if (n >= 0xd800) 446 { 447 if (n > 0x10ffff) 448 return false; 449 450 if (n <= 0xdfff) 451 { 452 if (n > 0xdc00) 453 return false; 454 455 auto nextChar = (uint32) (uint16) *dataToTest++; 456 457 if (nextChar < 0xdc00 || nextChar > 0xdfff) 458 return false; 459 } 460 } 461 } 462 463 return true; 464 } 465 466 /** Atomically swaps this pointer for a new value, returning the previous value. */ atomicSwap(CharPointer_UTF16 newValue)467 CharPointer_UTF16 atomicSwap (CharPointer_UTF16 newValue) 468 { 469 return CharPointer_UTF16 (reinterpret_cast<Atomic<CharType*>&> (data).exchange (newValue.data)); 470 } 471 472 /** These values are the byte-order-mark (BOM) values for a UTF-16 stream. */ 473 enum 474 { 475 byteOrderMarkBE1 = 0xfe, 476 byteOrderMarkBE2 = 0xff, 477 byteOrderMarkLE1 = 0xff, 478 byteOrderMarkLE2 = 0xfe 479 }; 480 481 /** Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (big endian). 482 The pointer must not be null, and must contain at least two valid bytes. 483 */ isByteOrderMarkBigEndian(const void * possibleByteOrder)484 static bool isByteOrderMarkBigEndian (const void* possibleByteOrder) noexcept 485 { 486 jassert (possibleByteOrder != nullptr); 487 auto c = static_cast<const uint8*> (possibleByteOrder); 488 489 return c[0] == (uint8) byteOrderMarkBE1 490 && c[1] == (uint8) byteOrderMarkBE2; 491 } 492 493 /** Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (little endian). 494 The pointer must not be null, and must contain at least two valid bytes. 495 */ isByteOrderMarkLittleEndian(const void * possibleByteOrder)496 static bool isByteOrderMarkLittleEndian (const void* possibleByteOrder) noexcept 497 { 498 jassert (possibleByteOrder != nullptr); 499 auto c = static_cast<const uint8*> (possibleByteOrder); 500 501 return c[0] == (uint8) byteOrderMarkLE1 502 && c[1] == (uint8) byteOrderMarkLE2; 503 } 504 505 private: 506 CharType* data; 507 findNullIndex(const CharType * t)508 static unsigned int findNullIndex (const CharType* t) noexcept 509 { 510 unsigned int n = 0; 511 512 while (t[n] != 0) 513 ++n; 514 515 return n; 516 } 517 }; 518 519 } // namespace juce 520