1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #include "nsReadableUtils.h" 8 #include "nsReadableUtilsImpl.h" 9 10 #include <algorithm> 11 12 #include "mozilla/CheckedInt.h" 13 14 #include "nscore.h" 15 #include "nsMemory.h" 16 #include "nsString.h" 17 #include "nsTArray.h" 18 #include "nsUTF8Utils.h" 19 20 using mozilla::IsASCII; 21 22 /** 23 * Fallback implementation for finding the first non-ASCII character in a 24 * UTF-16 string. 25 */ 26 static inline int32_t 27 FirstNonASCIIUnvectorized(const char16_t* aBegin, const char16_t* aEnd) 28 { 29 typedef mozilla::NonASCIIParameters<sizeof(size_t)> p; 30 const size_t kMask = p::mask(); 31 const uintptr_t kAlignMask = p::alignMask(); 32 const size_t kNumUnicharsPerWord = p::numUnicharsPerWord(); 33 34 const char16_t* idx = aBegin; 35 36 // Align ourselves to a word boundary. 37 for (; idx != aEnd && ((uintptr_t(idx) & kAlignMask) != 0); idx++) { 38 if (!IsASCII(*idx)) { 39 return idx - aBegin; 40 } 41 } 42 43 // Check one word at a time. 44 const char16_t* wordWalkEnd = mozilla::aligned(aEnd, kAlignMask); 45 for (; idx != wordWalkEnd; idx += kNumUnicharsPerWord) { 46 const size_t word = *reinterpret_cast<const size_t*>(idx); 47 if (word & kMask) { 48 return idx - aBegin; 49 } 50 } 51 52 // Take care of the remainder one character at a time. 53 for (; idx != aEnd; idx++) { 54 if (!IsASCII(*idx)) { 55 return idx - aBegin; 56 } 57 } 58 59 return -1; 60 } 61 62 /* 63 * This function returns -1 if all characters in str are ASCII characters. 64 * Otherwise, it returns a value less than or equal to the index of the first 65 * ASCII character in str. For example, if first non-ASCII character is at 66 * position 25, it may return 25, 24, or 16. But it guarantees 67 * there are only ASCII characters before returned value. 68 */ 69 static inline int32_t 70 FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd) 71 { 72 #ifdef MOZILLA_MAY_SUPPORT_SSE2 73 if (mozilla::supports_sse2()) { 74 return mozilla::SSE2::FirstNonASCII(aBegin, aEnd); 75 } 76 #endif 77 78 return FirstNonASCIIUnvectorized(aBegin, aEnd); 79 } 80 81 void 82 LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest) 83 { 84 aDest.Truncate(); 85 LossyAppendUTF16toASCII(aSource, aDest); 86 } 87 88 void 89 CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest) 90 { 91 aDest.Truncate(); 92 AppendASCIItoUTF16(aSource, aDest); 93 } 94 95 void 96 LossyCopyUTF16toASCII(const char16ptr_t aSource, nsACString& aDest) 97 { 98 aDest.Truncate(); 99 if (aSource) { 100 LossyAppendUTF16toASCII(nsDependentString(aSource), aDest); 101 } 102 } 103 104 void 105 CopyASCIItoUTF16(const char* aSource, nsAString& aDest) 106 { 107 aDest.Truncate(); 108 if (aSource) { 109 AppendASCIItoUTF16(nsDependentCString(aSource), aDest); 110 } 111 } 112 113 void 114 CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest) 115 { 116 if (!CopyUTF16toUTF8(aSource, aDest, mozilla::fallible)) { 117 // Note that this may wildly underestimate the allocation that failed, as 118 // we report the length of aSource as UTF-16 instead of UTF-8. 119 aDest.AllocFailed(aDest.Length() + aSource.Length()); 120 } 121 } 122 123 bool 124 CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest, 125 const mozilla::fallible_t& aFallible) 126 { 127 aDest.Truncate(); 128 if (!AppendUTF16toUTF8(aSource, aDest, aFallible)) { 129 return false; 130 } 131 return true; 132 } 133 134 void 135 CopyUTF8toUTF16(const nsACString& aSource, nsAString& aDest) 136 { 137 aDest.Truncate(); 138 AppendUTF8toUTF16(aSource, aDest); 139 } 140 141 void 142 CopyUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest) 143 { 144 aDest.Truncate(); 145 AppendUTF16toUTF8(aSource, aDest); 146 } 147 148 void 149 CopyUTF8toUTF16(const char* aSource, nsAString& aDest) 150 { 151 aDest.Truncate(); 152 AppendUTF8toUTF16(aSource, aDest); 153 } 154 155 void 156 LossyAppendUTF16toASCII(const nsAString& aSource, nsACString& aDest) 157 { 158 uint32_t old_dest_length = aDest.Length(); 159 aDest.SetLength(old_dest_length + aSource.Length()); 160 161 nsAString::const_iterator fromBegin, fromEnd; 162 163 nsACString::iterator dest; 164 aDest.BeginWriting(dest); 165 166 dest.advance(old_dest_length); 167 168 // right now, this won't work on multi-fragment destinations 169 LossyConvertEncoding16to8 converter(dest.get()); 170 171 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), 172 converter); 173 } 174 175 void 176 AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest) 177 { 178 if (!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible)) { 179 aDest.AllocFailed(aDest.Length() + aSource.Length()); 180 } 181 } 182 183 bool 184 AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest, 185 const mozilla::fallible_t& aFallible) 186 { 187 uint32_t old_dest_length = aDest.Length(); 188 if (!aDest.SetLength(old_dest_length + aSource.Length(), 189 aFallible)) { 190 return false; 191 } 192 193 nsACString::const_iterator fromBegin, fromEnd; 194 195 nsAString::iterator dest; 196 aDest.BeginWriting(dest); 197 198 dest.advance(old_dest_length); 199 200 // right now, this won't work on multi-fragment destinations 201 LossyConvertEncoding8to16 converter(dest.get()); 202 203 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), 204 converter); 205 return true; 206 } 207 208 void 209 LossyAppendUTF16toASCII(const char16ptr_t aSource, nsACString& aDest) 210 { 211 if (aSource) { 212 LossyAppendUTF16toASCII(nsDependentString(aSource), aDest); 213 } 214 } 215 216 bool 217 AppendASCIItoUTF16(const char* aSource, nsAString& aDest, const mozilla::fallible_t& aFallible) 218 { 219 if (aSource) { 220 return AppendASCIItoUTF16(nsDependentCString(aSource), aDest, aFallible); 221 } 222 223 return true; 224 } 225 226 void 227 AppendASCIItoUTF16(const char* aSource, nsAString& aDest) 228 { 229 if (aSource) { 230 AppendASCIItoUTF16(nsDependentCString(aSource), aDest); 231 } 232 } 233 234 void 235 AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest) 236 { 237 if (!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible)) { 238 // Note that this may wildly underestimate the allocation that failed, as 239 // we report the length of aSource as UTF-16 instead of UTF-8. 240 aDest.AllocFailed(aDest.Length() + aSource.Length()); 241 } 242 } 243 244 bool 245 AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest, 246 const mozilla::fallible_t& aFallible) 247 { 248 // At 16 characters analysis showed better performance of both the all ASCII 249 // and non-ASCII cases, so we limit calling |FirstNonASCII| to strings of 250 // that length. 251 const nsAString::size_type kFastPathMinLength = 16; 252 253 int32_t firstNonASCII = 0; 254 if (aSource.Length() >= kFastPathMinLength) { 255 firstNonASCII = FirstNonASCII(aSource.BeginReading(), aSource.EndReading()); 256 } 257 258 if (firstNonASCII == -1) { 259 // This is all ASCII, we can use the more efficient lossy append. 260 mozilla::CheckedInt<nsACString::size_type> new_length(aSource.Length()); 261 new_length += aDest.Length(); 262 263 if (!new_length.isValid() || 264 !aDest.SetCapacity(new_length.value(), aFallible)) { 265 return false; 266 } 267 268 LossyAppendUTF16toASCII(aSource, aDest); 269 return true; 270 } 271 272 nsAString::const_iterator source_start, source_end; 273 CalculateUTF8Size calculator; 274 aSource.BeginReading(source_start); 275 aSource.EndReading(source_end); 276 277 // Skip the characters that we know are single byte. 278 source_start.advance(firstNonASCII); 279 280 copy_string(source_start, 281 source_end, calculator); 282 283 // Include the ASCII characters that were skipped in the count. 284 size_t count = calculator.Size() + firstNonASCII; 285 286 if (count) { 287 auto old_dest_length = aDest.Length(); 288 // Grow the buffer if we need to. 289 mozilla::CheckedInt<nsACString::size_type> new_length(count); 290 new_length += old_dest_length; 291 292 if (!new_length.isValid() || 293 !aDest.SetLength(new_length.value(), aFallible)) { 294 return false; 295 } 296 297 // All ready? Time to convert 298 299 nsAString::const_iterator ascii_end; 300 aSource.BeginReading(ascii_end); 301 302 if (firstNonASCII >= static_cast<int32_t>(kFastPathMinLength)) { 303 // Use the more efficient lossy converter for the ASCII portion. 304 LossyConvertEncoding16to8 lossy_converter( 305 aDest.BeginWriting() + old_dest_length); 306 nsAString::const_iterator ascii_start; 307 aSource.BeginReading(ascii_start); 308 ascii_end.advance(firstNonASCII); 309 310 copy_string(ascii_start, ascii_end, lossy_converter); 311 } else { 312 // Not using the lossy shortcut, we need to include the leading ASCII 313 // chars. 314 firstNonASCII = 0; 315 } 316 317 ConvertUTF16toUTF8 converter( 318 aDest.BeginWriting() + old_dest_length + firstNonASCII); 319 copy_string(ascii_end, 320 aSource.EndReading(source_end), converter); 321 322 NS_ASSERTION(converter.Size() == count - firstNonASCII, 323 "Unexpected disparity between CalculateUTF8Size and " 324 "ConvertUTF16toUTF8"); 325 } 326 327 return true; 328 } 329 330 void 331 AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest) 332 { 333 if (!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible)) { 334 aDest.AllocFailed(aDest.Length() + aSource.Length()); 335 } 336 } 337 338 bool 339 AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest, 340 const mozilla::fallible_t& aFallible) 341 { 342 nsACString::const_iterator source_start, source_end; 343 CalculateUTF8Length calculator; 344 copy_string(aSource.BeginReading(source_start), 345 aSource.EndReading(source_end), calculator); 346 347 uint32_t count = calculator.Length(); 348 349 // Avoid making the string mutable if we're appending an empty string 350 if (count) { 351 uint32_t old_dest_length = aDest.Length(); 352 353 // Grow the buffer if we need to. 354 if (!aDest.SetLength(old_dest_length + count, aFallible)) { 355 return false; 356 } 357 358 // All ready? Time to convert 359 360 ConvertUTF8toUTF16 converter(aDest.BeginWriting() + old_dest_length); 361 copy_string(aSource.BeginReading(source_start), 362 aSource.EndReading(source_end), converter); 363 364 NS_ASSERTION(converter.ErrorEncountered() || 365 converter.Length() == count, 366 "CalculateUTF8Length produced the wrong length"); 367 368 if (converter.ErrorEncountered()) { 369 NS_ERROR("Input wasn't UTF8 or incorrect length was calculated"); 370 aDest.SetLength(old_dest_length); 371 } 372 } 373 374 return true; 375 } 376 377 void 378 AppendUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest) 379 { 380 if (aSource) { 381 AppendUTF16toUTF8(nsDependentString(aSource), aDest); 382 } 383 } 384 385 void 386 AppendUTF8toUTF16(const char* aSource, nsAString& aDest) 387 { 388 if (aSource) { 389 AppendUTF8toUTF16(nsDependentCString(aSource), aDest); 390 } 391 } 392 393 394 /** 395 * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator). 396 * 397 * @param aSource an string you will eventually be making a copy of 398 * @return a new buffer (of the type specified by the second parameter) which you must free with |free|. 399 * 400 */ 401 template <class FromStringT, class ToCharT> 402 inline 403 ToCharT* 404 AllocateStringCopy(const FromStringT& aSource, ToCharT*) 405 { 406 return static_cast<ToCharT*>(moz_xmalloc( 407 (aSource.Length() + 1) * sizeof(ToCharT))); 408 } 409 410 411 char* 412 ToNewCString(const nsAString& aSource) 413 { 414 char* result = AllocateStringCopy(aSource, (char*)0); 415 if (!result) { 416 return nullptr; 417 } 418 419 nsAString::const_iterator fromBegin, fromEnd; 420 LossyConvertEncoding16to8 converter(result); 421 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), 422 converter).write_terminator(); 423 return result; 424 } 425 426 char* 427 ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count) 428 { 429 nsAString::const_iterator start, end; 430 CalculateUTF8Size calculator; 431 copy_string(aSource.BeginReading(start), aSource.EndReading(end), 432 calculator); 433 434 if (aUTF8Count) { 435 *aUTF8Count = calculator.Size(); 436 } 437 438 char* result = static_cast<char*> 439 (moz_xmalloc(calculator.Size() + 1)); 440 if (!result) { 441 return nullptr; 442 } 443 444 ConvertUTF16toUTF8 converter(result); 445 copy_string(aSource.BeginReading(start), aSource.EndReading(end), 446 converter).write_terminator(); 447 NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch"); 448 449 return result; 450 } 451 452 char* 453 ToNewCString(const nsACString& aSource) 454 { 455 // no conversion needed, just allocate a buffer of the correct length and copy into it 456 457 char* result = AllocateStringCopy(aSource, (char*)0); 458 if (!result) { 459 return nullptr; 460 } 461 462 nsACString::const_iterator fromBegin, fromEnd; 463 char* toBegin = result; 464 *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), 465 toBegin) = char(0); 466 return result; 467 } 468 469 char16_t* 470 ToNewUnicode(const nsAString& aSource) 471 { 472 // no conversion needed, just allocate a buffer of the correct length and copy into it 473 474 char16_t* result = AllocateStringCopy(aSource, (char16_t*)0); 475 if (!result) { 476 return nullptr; 477 } 478 479 nsAString::const_iterator fromBegin, fromEnd; 480 char16_t* toBegin = result; 481 *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), 482 toBegin) = char16_t(0); 483 return result; 484 } 485 486 char16_t* 487 ToNewUnicode(const nsACString& aSource) 488 { 489 char16_t* result = AllocateStringCopy(aSource, (char16_t*)0); 490 if (!result) { 491 return nullptr; 492 } 493 494 nsACString::const_iterator fromBegin, fromEnd; 495 LossyConvertEncoding8to16 converter(result); 496 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), 497 converter).write_terminator(); 498 return result; 499 } 500 501 uint32_t 502 CalcUTF8ToUnicodeLength(const nsACString& aSource) 503 { 504 nsACString::const_iterator start, end; 505 CalculateUTF8Length calculator; 506 copy_string(aSource.BeginReading(start), aSource.EndReading(end), 507 calculator); 508 return calculator.Length(); 509 } 510 511 char16_t* 512 UTF8ToUnicodeBuffer(const nsACString& aSource, char16_t* aBuffer, 513 uint32_t* aUTF16Count) 514 { 515 nsACString::const_iterator start, end; 516 ConvertUTF8toUTF16 converter(aBuffer); 517 copy_string(aSource.BeginReading(start), 518 aSource.EndReading(end), 519 converter).write_terminator(); 520 if (aUTF16Count) { 521 *aUTF16Count = converter.Length(); 522 } 523 return aBuffer; 524 } 525 526 char16_t* 527 UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count) 528 { 529 const uint32_t length = CalcUTF8ToUnicodeLength(aSource); 530 const size_t buffer_size = (length + 1) * sizeof(char16_t); 531 char16_t* buffer = static_cast<char16_t*>(moz_xmalloc(buffer_size)); 532 if (!buffer) { 533 return nullptr; 534 } 535 536 uint32_t copied; 537 UTF8ToUnicodeBuffer(aSource, buffer, &copied); 538 NS_ASSERTION(length == copied, "length mismatch"); 539 540 if (aUTF16Count) { 541 *aUTF16Count = copied; 542 } 543 return buffer; 544 } 545 546 char16_t* 547 CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest, 548 uint32_t aLength) 549 { 550 nsAString::const_iterator fromBegin, fromEnd; 551 char16_t* toBegin = aDest; 552 copy_string(aSource.BeginReading(fromBegin).advance(int32_t(aSrcOffset)), 553 aSource.BeginReading(fromEnd).advance(int32_t(aSrcOffset + aLength)), 554 toBegin); 555 return aDest; 556 } 557 558 void 559 CopyUnicodeTo(const nsAString::const_iterator& aSrcStart, 560 const nsAString::const_iterator& aSrcEnd, 561 nsAString& aDest) 562 { 563 aDest.SetLength(Distance(aSrcStart, aSrcEnd)); 564 565 nsAString::char_iterator dest = aDest.BeginWriting(); 566 nsAString::const_iterator fromBegin(aSrcStart); 567 568 copy_string(fromBegin, aSrcEnd, dest); 569 } 570 571 void 572 AppendUnicodeTo(const nsAString::const_iterator& aSrcStart, 573 const nsAString::const_iterator& aSrcEnd, 574 nsAString& aDest) 575 { 576 uint32_t oldLength = aDest.Length(); 577 aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd)); 578 579 nsAString::char_iterator dest = aDest.BeginWriting() + oldLength; 580 nsAString::const_iterator fromBegin(aSrcStart); 581 582 copy_string(fromBegin, aSrcEnd, dest); 583 } 584 585 bool 586 IsASCII(const nsAString& aString) 587 { 588 static const char16_t NOT_ASCII = char16_t(~0x007F); 589 590 591 // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character 592 593 nsAString::const_iterator iter, done_reading; 594 aString.BeginReading(iter); 595 aString.EndReading(done_reading); 596 597 const char16_t* c = iter.get(); 598 const char16_t* end = done_reading.get(); 599 600 while (c < end) { 601 if (*c++ & NOT_ASCII) { 602 return false; 603 } 604 } 605 606 return true; 607 } 608 609 bool 610 IsASCII(const nsACString& aString) 611 { 612 static const char NOT_ASCII = char(~0x7F); 613 614 615 // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character 616 617 nsACString::const_iterator iter, done_reading; 618 aString.BeginReading(iter); 619 aString.EndReading(done_reading); 620 621 const char* c = iter.get(); 622 const char* end = done_reading.get(); 623 624 while (c < end) { 625 if (*c++ & NOT_ASCII) { 626 return false; 627 } 628 } 629 630 return true; 631 } 632 633 bool 634 IsUTF8(const nsACString& aString, bool aRejectNonChar) 635 { 636 nsReadingIterator<char> done_reading; 637 aString.EndReading(done_reading); 638 639 int32_t state = 0; 640 bool overlong = false; 641 bool surrogate = false; 642 bool nonchar = false; 643 uint16_t olupper = 0; // overlong byte upper bound. 644 uint16_t slower = 0; // surrogate byte lower bound. 645 646 nsReadingIterator<char> iter; 647 aString.BeginReading(iter); 648 649 const char* ptr = iter.get(); 650 const char* end = done_reading.get(); 651 while (ptr < end) { 652 uint8_t c; 653 654 if (0 == state) { 655 c = *ptr++; 656 657 if (UTF8traits::isASCII(c)) { 658 continue; 659 } 660 661 if (c <= 0xC1) { // [80-BF] where not expected, [C0-C1] for overlong. 662 return false; 663 } else if (UTF8traits::is2byte(c)) { 664 state = 1; 665 } else if (UTF8traits::is3byte(c)) { 666 state = 2; 667 if (c == 0xE0) { // to exclude E0[80-9F][80-BF] 668 overlong = true; 669 olupper = 0x9F; 670 } else if (c == 0xED) { // ED[A0-BF][80-BF] : surrogate codepoint 671 surrogate = true; 672 slower = 0xA0; 673 } else if (c == 0xEF) { // EF BF [BE-BF] : non-character 674 nonchar = true; 675 } 676 } else if (c <= 0xF4) { // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090) 677 state = 3; 678 nonchar = true; 679 if (c == 0xF0) { // to exclude F0[80-8F][80-BF]{2} 680 overlong = true; 681 olupper = 0x8F; 682 } else if (c == 0xF4) { // to exclude F4[90-BF][80-BF] 683 // actually not surrogates but codepoints beyond 0x10FFFF 684 surrogate = true; 685 slower = 0x90; 686 } 687 } else { 688 return false; // Not UTF-8 string 689 } 690 } 691 692 if (nonchar && !aRejectNonChar) { 693 nonchar = false; 694 } 695 696 while (ptr < end && state) { 697 c = *ptr++; 698 --state; 699 700 // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF] 701 if (nonchar && 702 ((!state && c < 0xBE) || 703 (state == 1 && c != 0xBF) || 704 (state == 2 && 0x0F != (0x0F & c)))) { 705 nonchar = false; 706 } 707 708 if (!UTF8traits::isInSeq(c) || (overlong && c <= olupper) || 709 (surrogate && slower <= c) || (nonchar && !state)) { 710 return false; // Not UTF-8 string 711 } 712 713 overlong = surrogate = false; 714 } 715 } 716 return !state; // state != 0 at the end indicates an invalid UTF-8 seq. 717 } 718 719 /** 720 * A character sink for in-place case conversion. 721 */ 722 class ConvertToUpperCase 723 { 724 public: 725 typedef char value_type; 726 727 uint32_t 728 write(const char* aSource, uint32_t aSourceLength) 729 { 730 char* cp = const_cast<char*>(aSource); 731 const char* end = aSource + aSourceLength; 732 while (cp != end) { 733 char ch = *cp; 734 if (ch >= 'a' && ch <= 'z') { 735 *cp = ch - ('a' - 'A'); 736 } 737 ++cp; 738 } 739 return aSourceLength; 740 } 741 }; 742 743 void 744 ToUpperCase(nsCSubstring& aCString) 745 { 746 ConvertToUpperCase converter; 747 char* start; 748 converter.write(aCString.BeginWriting(start), aCString.Length()); 749 } 750 751 /** 752 * A character sink for copying with case conversion. 753 */ 754 class CopyToUpperCase 755 { 756 public: 757 typedef char value_type; 758 759 explicit CopyToUpperCase(nsACString::iterator& aDestIter, 760 const nsACString::iterator& aEndIter) 761 : mIter(aDestIter) 762 , mEnd(aEndIter) 763 { 764 } 765 766 uint32_t 767 write(const char* aSource, uint32_t aSourceLength) 768 { 769 uint32_t len = XPCOM_MIN(uint32_t(mEnd - mIter), aSourceLength); 770 char* cp = mIter.get(); 771 const char* end = aSource + len; 772 while (aSource != end) { 773 char ch = *aSource; 774 if ((ch >= 'a') && (ch <= 'z')) { 775 *cp = ch - ('a' - 'A'); 776 } else { 777 *cp = ch; 778 } 779 ++aSource; 780 ++cp; 781 } 782 mIter.advance(len); 783 return len; 784 } 785 786 protected: 787 nsACString::iterator& mIter; 788 const nsACString::iterator& mEnd; 789 }; 790 791 void 792 ToUpperCase(const nsACString& aSource, nsACString& aDest) 793 { 794 nsACString::const_iterator fromBegin, fromEnd; 795 nsACString::iterator toBegin, toEnd; 796 aDest.SetLength(aSource.Length()); 797 798 CopyToUpperCase converter(aDest.BeginWriting(toBegin), aDest.EndWriting(toEnd)); 799 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), 800 converter); 801 } 802 803 /** 804 * A character sink for case conversion. 805 */ 806 class ConvertToLowerCase 807 { 808 public: 809 typedef char value_type; 810 811 uint32_t 812 write(const char* aSource, uint32_t aSourceLength) 813 { 814 char* cp = const_cast<char*>(aSource); 815 const char* end = aSource + aSourceLength; 816 while (cp != end) { 817 char ch = *cp; 818 if ((ch >= 'A') && (ch <= 'Z')) { 819 *cp = ch + ('a' - 'A'); 820 } 821 ++cp; 822 } 823 return aSourceLength; 824 } 825 }; 826 827 void 828 ToLowerCase(nsCSubstring& aCString) 829 { 830 ConvertToLowerCase converter; 831 char* start; 832 converter.write(aCString.BeginWriting(start), aCString.Length()); 833 } 834 835 /** 836 * A character sink for copying with case conversion. 837 */ 838 class CopyToLowerCase 839 { 840 public: 841 typedef char value_type; 842 843 explicit CopyToLowerCase(nsACString::iterator& aDestIter, 844 const nsACString::iterator& aEndIter) 845 : mIter(aDestIter) 846 , mEnd(aEndIter) 847 { 848 } 849 850 uint32_t 851 write(const char* aSource, uint32_t aSourceLength) 852 { 853 uint32_t len = XPCOM_MIN(uint32_t(mEnd - mIter), aSourceLength); 854 char* cp = mIter.get(); 855 const char* end = aSource + len; 856 while (aSource != end) { 857 char ch = *aSource; 858 if ((ch >= 'A') && (ch <= 'Z')) { 859 *cp = ch + ('a' - 'A'); 860 } else { 861 *cp = ch; 862 } 863 ++aSource; 864 ++cp; 865 } 866 mIter.advance(len); 867 return len; 868 } 869 870 protected: 871 nsACString::iterator& mIter; 872 const nsACString::iterator& mEnd; 873 }; 874 875 void 876 ToLowerCase(const nsACString& aSource, nsACString& aDest) 877 { 878 nsACString::const_iterator fromBegin, fromEnd; 879 nsACString::iterator toBegin, toEnd; 880 aDest.SetLength(aSource.Length()); 881 882 CopyToLowerCase converter(aDest.BeginWriting(toBegin), aDest.EndWriting(toEnd)); 883 copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd), 884 converter); 885 } 886 887 bool 888 ParseString(const nsACString& aSource, char aDelimiter, 889 nsTArray<nsCString>& aArray) 890 { 891 nsACString::const_iterator start, end; 892 aSource.BeginReading(start); 893 aSource.EndReading(end); 894 895 uint32_t oldLength = aArray.Length(); 896 897 for (;;) { 898 nsACString::const_iterator delimiter = start; 899 FindCharInReadable(aDelimiter, delimiter, end); 900 901 if (delimiter != start) { 902 if (!aArray.AppendElement(Substring(start, delimiter))) { 903 aArray.RemoveElementsAt(oldLength, aArray.Length() - oldLength); 904 return false; 905 } 906 } 907 908 if (delimiter == end) { 909 break; 910 } 911 start = ++delimiter; 912 if (start == end) { 913 break; 914 } 915 } 916 917 return true; 918 } 919 920 template <class StringT, class IteratorT, class Comparator> 921 bool 922 FindInReadable_Impl(const StringT& aPattern, IteratorT& aSearchStart, 923 IteratorT& aSearchEnd, const Comparator& aCompare) 924 { 925 bool found_it = false; 926 927 // only bother searching at all if we're given a non-empty range to search 928 if (aSearchStart != aSearchEnd) { 929 IteratorT aPatternStart, aPatternEnd; 930 aPattern.BeginReading(aPatternStart); 931 aPattern.EndReading(aPatternEnd); 932 933 // outer loop keeps searching till we find it or run out of string to search 934 while (!found_it) { 935 // fast inner loop (that's what it's called, not what it is) looks for a potential match 936 while (aSearchStart != aSearchEnd && 937 aCompare(aPatternStart.get(), aSearchStart.get(), 1, 1)) { 938 ++aSearchStart; 939 } 940 941 // if we broke out of the `fast' loop because we're out of string ... we're done: no match 942 if (aSearchStart == aSearchEnd) { 943 break; 944 } 945 946 // otherwise, we're at a potential match, let's see if we really hit one 947 IteratorT testPattern(aPatternStart); 948 IteratorT testSearch(aSearchStart); 949 950 // slow inner loop verifies the potential match (found by the `fast' loop) at the current position 951 for (;;) { 952 // we already compared the first character in the outer loop, 953 // so we'll advance before the next comparison 954 ++testPattern; 955 ++testSearch; 956 957 // if we verified all the way to the end of the pattern, then we found it! 958 if (testPattern == aPatternEnd) { 959 found_it = true; 960 aSearchEnd = testSearch; // return the exact found range through the parameters 961 break; 962 } 963 964 // if we got to end of the string we're searching before we hit the end of the 965 // pattern, we'll never find what we're looking for 966 if (testSearch == aSearchEnd) { 967 aSearchStart = aSearchEnd; 968 break; 969 } 970 971 // else if we mismatched ... it's time to advance to the next search position 972 // and get back into the `fast' loop 973 if (aCompare(testPattern.get(), testSearch.get(), 1, 1)) { 974 ++aSearchStart; 975 break; 976 } 977 } 978 } 979 } 980 981 return found_it; 982 } 983 984 /** 985 * This searches the entire string from right to left, and returns the first match found, if any. 986 */ 987 template <class StringT, class IteratorT, class Comparator> 988 bool 989 RFindInReadable_Impl(const StringT& aPattern, IteratorT& aSearchStart, 990 IteratorT& aSearchEnd, const Comparator& aCompare) 991 { 992 IteratorT patternStart, patternEnd, searchEnd = aSearchEnd; 993 aPattern.BeginReading(patternStart); 994 aPattern.EndReading(patternEnd); 995 996 // Point to the last character in the pattern 997 --patternEnd; 998 // outer loop keeps searching till we run out of string to search 999 while (aSearchStart != searchEnd) { 1000 // Point to the end position of the next possible match 1001 --searchEnd; 1002 1003 // Check last character, if a match, explore further from here 1004 if (aCompare(patternEnd.get(), searchEnd.get(), 1, 1) == 0) { 1005 // We're at a potential match, let's see if we really hit one 1006 IteratorT testPattern(patternEnd); 1007 IteratorT testSearch(searchEnd); 1008 1009 // inner loop verifies the potential match at the current position 1010 do { 1011 // if we verified all the way to the end of the pattern, then we found it! 1012 if (testPattern == patternStart) { 1013 aSearchStart = testSearch; // point to start of match 1014 aSearchEnd = ++searchEnd; // point to end of match 1015 return true; 1016 } 1017 1018 // if we got to end of the string we're searching before we hit the end of the 1019 // pattern, we'll never find what we're looking for 1020 if (testSearch == aSearchStart) { 1021 aSearchStart = aSearchEnd; 1022 return false; 1023 } 1024 1025 // test previous character for a match 1026 --testPattern; 1027 --testSearch; 1028 } while (aCompare(testPattern.get(), testSearch.get(), 1, 1) == 0); 1029 } 1030 } 1031 1032 aSearchStart = aSearchEnd; 1033 return false; 1034 } 1035 1036 bool 1037 FindInReadable(const nsAString& aPattern, 1038 nsAString::const_iterator& aSearchStart, 1039 nsAString::const_iterator& aSearchEnd, 1040 const nsStringComparator& aComparator) 1041 { 1042 return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); 1043 } 1044 1045 bool 1046 FindInReadable(const nsACString& aPattern, 1047 nsACString::const_iterator& aSearchStart, 1048 nsACString::const_iterator& aSearchEnd, 1049 const nsCStringComparator& aComparator) 1050 { 1051 return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); 1052 } 1053 1054 bool 1055 CaseInsensitiveFindInReadable(const nsACString& aPattern, 1056 nsACString::const_iterator& aSearchStart, 1057 nsACString::const_iterator& aSearchEnd) 1058 { 1059 return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, 1060 nsCaseInsensitiveCStringComparator()); 1061 } 1062 1063 bool 1064 RFindInReadable(const nsAString& aPattern, 1065 nsAString::const_iterator& aSearchStart, 1066 nsAString::const_iterator& aSearchEnd, 1067 const nsStringComparator& aComparator) 1068 { 1069 return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); 1070 } 1071 1072 bool 1073 RFindInReadable(const nsACString& aPattern, 1074 nsACString::const_iterator& aSearchStart, 1075 nsACString::const_iterator& aSearchEnd, 1076 const nsCStringComparator& aComparator) 1077 { 1078 return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator); 1079 } 1080 1081 bool 1082 FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart, 1083 const nsAString::const_iterator& aSearchEnd) 1084 { 1085 int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); 1086 1087 const char16_t* charFoundAt = 1088 nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar); 1089 if (charFoundAt) { 1090 aSearchStart.advance(charFoundAt - aSearchStart.get()); 1091 return true; 1092 } 1093 1094 aSearchStart.advance(fragmentLength); 1095 return false; 1096 } 1097 1098 bool 1099 FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart, 1100 const nsACString::const_iterator& aSearchEnd) 1101 { 1102 int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get(); 1103 1104 const char* charFoundAt = 1105 nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar); 1106 if (charFoundAt) { 1107 aSearchStart.advance(charFoundAt - aSearchStart.get()); 1108 return true; 1109 } 1110 1111 aSearchStart.advance(fragmentLength); 1112 return false; 1113 } 1114 1115 uint32_t 1116 CountCharInReadable(const nsAString& aStr, char16_t aChar) 1117 { 1118 uint32_t count = 0; 1119 nsAString::const_iterator begin, end; 1120 1121 aStr.BeginReading(begin); 1122 aStr.EndReading(end); 1123 1124 while (begin != end) { 1125 if (*begin == aChar) { 1126 ++count; 1127 } 1128 ++begin; 1129 } 1130 1131 return count; 1132 } 1133 1134 uint32_t 1135 CountCharInReadable(const nsACString& aStr, char aChar) 1136 { 1137 uint32_t count = 0; 1138 nsACString::const_iterator begin, end; 1139 1140 aStr.BeginReading(begin); 1141 aStr.EndReading(end); 1142 1143 while (begin != end) { 1144 if (*begin == aChar) { 1145 ++count; 1146 } 1147 ++begin; 1148 } 1149 1150 return count; 1151 } 1152 1153 bool 1154 StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring) 1155 { 1156 nsAString::size_type src_len = aSource.Length(), 1157 sub_len = aSubstring.Length(); 1158 if (sub_len > src_len) { 1159 return false; 1160 } 1161 return Substring(aSource, 0, sub_len).Equals(aSubstring); 1162 } 1163 1164 bool 1165 StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring, 1166 const nsStringComparator& aComparator) 1167 { 1168 nsAString::size_type src_len = aSource.Length(), 1169 sub_len = aSubstring.Length(); 1170 if (sub_len > src_len) { 1171 return false; 1172 } 1173 return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); 1174 } 1175 1176 bool 1177 StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring) 1178 { 1179 nsACString::size_type src_len = aSource.Length(), 1180 sub_len = aSubstring.Length(); 1181 if (sub_len > src_len) { 1182 return false; 1183 } 1184 return Substring(aSource, 0, sub_len).Equals(aSubstring); 1185 } 1186 1187 bool 1188 StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring, 1189 const nsCStringComparator& aComparator) 1190 { 1191 nsACString::size_type src_len = aSource.Length(), 1192 sub_len = aSubstring.Length(); 1193 if (sub_len > src_len) { 1194 return false; 1195 } 1196 return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator); 1197 } 1198 1199 bool 1200 StringEndsWith(const nsAString& aSource, const nsAString& aSubstring) 1201 { 1202 nsAString::size_type src_len = aSource.Length(), 1203 sub_len = aSubstring.Length(); 1204 if (sub_len > src_len) { 1205 return false; 1206 } 1207 return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring); 1208 } 1209 1210 bool 1211 StringEndsWith(const nsAString& aSource, const nsAString& aSubstring, 1212 const nsStringComparator& aComparator) 1213 { 1214 nsAString::size_type src_len = aSource.Length(), 1215 sub_len = aSubstring.Length(); 1216 if (sub_len > src_len) { 1217 return false; 1218 } 1219 return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring, 1220 aComparator); 1221 } 1222 1223 bool 1224 StringEndsWith(const nsACString& aSource, const nsACString& aSubstring) 1225 { 1226 nsACString::size_type src_len = aSource.Length(), 1227 sub_len = aSubstring.Length(); 1228 if (sub_len > src_len) { 1229 return false; 1230 } 1231 return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring); 1232 } 1233 1234 bool 1235 StringEndsWith(const nsACString& aSource, const nsACString& aSubstring, 1236 const nsCStringComparator& aComparator) 1237 { 1238 nsACString::size_type src_len = aSource.Length(), 1239 sub_len = aSubstring.Length(); 1240 if (sub_len > src_len) { 1241 return false; 1242 } 1243 return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring, 1244 aComparator); 1245 } 1246 1247 1248 1249 static const char16_t empty_buffer[1] = { '\0' }; 1250 1251 const nsAFlatString& 1252 EmptyString() 1253 { 1254 static const nsDependentString sEmpty(empty_buffer); 1255 1256 return sEmpty; 1257 } 1258 1259 const nsAFlatCString& 1260 EmptyCString() 1261 { 1262 static const nsDependentCString sEmpty((const char*)empty_buffer); 1263 1264 return sEmpty; 1265 } 1266 1267 const nsAFlatString& 1268 NullString() 1269 { 1270 static const nsXPIDLString sNull; 1271 1272 return sNull; 1273 } 1274 1275 const nsAFlatCString& 1276 NullCString() 1277 { 1278 static const nsXPIDLCString sNull; 1279 1280 return sNull; 1281 } 1282 1283 int32_t 1284 CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String, 1285 const nsASingleFragmentString& aUTF16String) 1286 { 1287 static const uint32_t NOT_ASCII = uint32_t(~0x7F); 1288 1289 const char* u8; 1290 const char* u8end; 1291 aUTF8String.BeginReading(u8); 1292 aUTF8String.EndReading(u8end); 1293 1294 const char16_t* u16; 1295 const char16_t* u16end; 1296 aUTF16String.BeginReading(u16); 1297 aUTF16String.EndReading(u16end); 1298 1299 while (u8 != u8end && u16 != u16end) { 1300 // Cast away the signedness of *u8 to prevent signextension when 1301 // converting to uint32_t 1302 uint32_t c8_32 = (uint8_t)*u8; 1303 1304 if (c8_32 & NOT_ASCII) { 1305 bool err; 1306 c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err); 1307 if (err) { 1308 return INT32_MIN; 1309 } 1310 1311 uint32_t c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end); 1312 // The above UTF16CharEnumerator::NextChar() calls can 1313 // fail, but if it does for anything other than no data to 1314 // look at (which can't happen here), it returns the 1315 // Unicode replacement character 0xFFFD for the invalid 1316 // data they were fed. Ignore that error and treat invalid 1317 // UTF16 as 0xFFFD. 1318 // 1319 // This matches what our UTF16 to UTF8 conversion code 1320 // does, and thus a UTF8 string that came from an invalid 1321 // UTF16 string will compare equal to the invalid UTF16 1322 // string it came from. Same is true for any other UTF16 1323 // string differs only in the invalid part of the string. 1324 1325 if (c8_32 != c16_32) { 1326 return c8_32 < c16_32 ? -1 : 1; 1327 } 1328 } else { 1329 if (c8_32 != *u16) { 1330 return c8_32 > *u16 ? 1 : -1; 1331 } 1332 1333 ++u8; 1334 ++u16; 1335 } 1336 } 1337 1338 if (u8 != u8end) { 1339 // We get to the end of the UTF16 string, but no to the end of 1340 // the UTF8 string. The UTF8 string is longer than the UTF16 1341 // string 1342 1343 return 1; 1344 } 1345 1346 if (u16 != u16end) { 1347 // We get to the end of the UTF8 string, but no to the end of 1348 // the UTF16 string. The UTF16 string is longer than the UTF8 1349 // string 1350 1351 return -1; 1352 } 1353 1354 // The two strings match. 1355 1356 return 0; 1357 } 1358 1359 void 1360 AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest) 1361 { 1362 NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char"); 1363 if (IS_IN_BMP(aSource)) { 1364 aDest.Append(char16_t(aSource)); 1365 } else { 1366 aDest.Append(H_SURROGATE(aSource)); 1367 aDest.Append(L_SURROGATE(aSource)); 1368 } 1369 } 1370 1371 extern "C" { 1372 1373 void Gecko_AppendUTF16toCString(nsACString* aThis, const nsAString* aOther) 1374 { 1375 AppendUTF16toUTF8(*aOther, *aThis); 1376 } 1377 1378 void Gecko_AppendUTF8toString(nsAString* aThis, const nsACString* aOther) 1379 { 1380 AppendUTF8toUTF16(*aOther, *aThis); 1381 } 1382 1383 } 1384