1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "nsReadableUtils.h"
8 #include "nsReadableUtilsImpl.h"
9 
10 #include <algorithm>
11 
12 #include "mozilla/CheckedInt.h"
13 
14 #include "nscore.h"
15 #include "nsMemory.h"
16 #include "nsString.h"
17 #include "nsTArray.h"
18 #include "nsUTF8Utils.h"
19 
20 using mozilla::IsASCII;
21 
22 /**
23  * Fallback implementation for finding the first non-ASCII character in a
24  * UTF-16 string.
25  */
26 static inline int32_t
27 FirstNonASCIIUnvectorized(const char16_t* aBegin, const char16_t* aEnd)
28 {
29   typedef mozilla::NonASCIIParameters<sizeof(size_t)> p;
30   const size_t kMask = p::mask();
31   const uintptr_t kAlignMask = p::alignMask();
32   const size_t kNumUnicharsPerWord = p::numUnicharsPerWord();
33 
34   const char16_t* idx = aBegin;
35 
36   // Align ourselves to a word boundary.
37   for (; idx != aEnd && ((uintptr_t(idx) & kAlignMask) != 0); idx++) {
38     if (!IsASCII(*idx)) {
39       return idx - aBegin;
40     }
41   }
42 
43   // Check one word at a time.
44   const char16_t* wordWalkEnd = mozilla::aligned(aEnd, kAlignMask);
45   for (; idx != wordWalkEnd; idx += kNumUnicharsPerWord) {
46     const size_t word = *reinterpret_cast<const size_t*>(idx);
47     if (word & kMask) {
48       return idx - aBegin;
49     }
50   }
51 
52   // Take care of the remainder one character at a time.
53   for (; idx != aEnd; idx++) {
54     if (!IsASCII(*idx)) {
55       return idx - aBegin;
56     }
57   }
58 
59   return -1;
60 }
61 
62 /*
63  * This function returns -1 if all characters in str are ASCII characters.
64  * Otherwise, it returns a value less than or equal to the index of the first
65  * ASCII character in str. For example, if first non-ASCII character is at
66  * position 25, it may return 25, 24, or 16. But it guarantees
67  * there are only ASCII characters before returned value.
68  */
69 static inline int32_t
70 FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd)
71 {
72 #ifdef MOZILLA_MAY_SUPPORT_SSE2
73   if (mozilla::supports_sse2()) {
74     return mozilla::SSE2::FirstNonASCII(aBegin, aEnd);
75   }
76 #endif
77 
78   return FirstNonASCIIUnvectorized(aBegin, aEnd);
79 }
80 
81 void
82 LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest)
83 {
84   aDest.Truncate();
85   LossyAppendUTF16toASCII(aSource, aDest);
86 }
87 
88 void
89 CopyASCIItoUTF16(const nsACString& aSource, nsAString& aDest)
90 {
91   aDest.Truncate();
92   AppendASCIItoUTF16(aSource, aDest);
93 }
94 
95 void
96 LossyCopyUTF16toASCII(const char16ptr_t aSource, nsACString& aDest)
97 {
98   aDest.Truncate();
99   if (aSource) {
100     LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
101   }
102 }
103 
104 void
105 CopyASCIItoUTF16(const char* aSource, nsAString& aDest)
106 {
107   aDest.Truncate();
108   if (aSource) {
109     AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
110   }
111 }
112 
113 void
114 CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest)
115 {
116   if (!CopyUTF16toUTF8(aSource, aDest, mozilla::fallible)) {
117     // Note that this may wildly underestimate the allocation that failed, as
118     // we report the length of aSource as UTF-16 instead of UTF-8.
119     aDest.AllocFailed(aDest.Length() + aSource.Length());
120   }
121 }
122 
123 bool
124 CopyUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
125                 const mozilla::fallible_t& aFallible)
126 {
127   aDest.Truncate();
128   if (!AppendUTF16toUTF8(aSource, aDest, aFallible)) {
129     return false;
130   }
131   return true;
132 }
133 
134 void
135 CopyUTF8toUTF16(const nsACString& aSource, nsAString& aDest)
136 {
137   aDest.Truncate();
138   AppendUTF8toUTF16(aSource, aDest);
139 }
140 
141 void
142 CopyUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest)
143 {
144   aDest.Truncate();
145   AppendUTF16toUTF8(aSource, aDest);
146 }
147 
148 void
149 CopyUTF8toUTF16(const char* aSource, nsAString& aDest)
150 {
151   aDest.Truncate();
152   AppendUTF8toUTF16(aSource, aDest);
153 }
154 
155 void
156 LossyAppendUTF16toASCII(const nsAString& aSource, nsACString& aDest)
157 {
158   uint32_t old_dest_length = aDest.Length();
159   aDest.SetLength(old_dest_length + aSource.Length());
160 
161   nsAString::const_iterator fromBegin, fromEnd;
162 
163   nsACString::iterator dest;
164   aDest.BeginWriting(dest);
165 
166   dest.advance(old_dest_length);
167 
168   // right now, this won't work on multi-fragment destinations
169   LossyConvertEncoding16to8 converter(dest.get());
170 
171   copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
172               converter);
173 }
174 
175 void
176 AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest)
177 {
178   if (!AppendASCIItoUTF16(aSource, aDest, mozilla::fallible)) {
179     aDest.AllocFailed(aDest.Length() + aSource.Length());
180   }
181 }
182 
183 bool
184 AppendASCIItoUTF16(const nsACString& aSource, nsAString& aDest,
185                    const mozilla::fallible_t& aFallible)
186 {
187   uint32_t old_dest_length = aDest.Length();
188   if (!aDest.SetLength(old_dest_length + aSource.Length(),
189                        aFallible)) {
190     return false;
191   }
192 
193   nsACString::const_iterator fromBegin, fromEnd;
194 
195   nsAString::iterator dest;
196   aDest.BeginWriting(dest);
197 
198   dest.advance(old_dest_length);
199 
200   // right now, this won't work on multi-fragment destinations
201   LossyConvertEncoding8to16 converter(dest.get());
202 
203   copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
204               converter);
205   return true;
206 }
207 
208 void
209 LossyAppendUTF16toASCII(const char16ptr_t aSource, nsACString& aDest)
210 {
211   if (aSource) {
212     LossyAppendUTF16toASCII(nsDependentString(aSource), aDest);
213   }
214 }
215 
216 bool
217 AppendASCIItoUTF16(const char* aSource, nsAString& aDest, const mozilla::fallible_t& aFallible)
218 {
219   if (aSource) {
220     return AppendASCIItoUTF16(nsDependentCString(aSource), aDest, aFallible);
221   }
222 
223   return true;
224 }
225 
226 void
227 AppendASCIItoUTF16(const char* aSource, nsAString& aDest)
228 {
229   if (aSource) {
230     AppendASCIItoUTF16(nsDependentCString(aSource), aDest);
231   }
232 }
233 
234 void
235 AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest)
236 {
237   if (!AppendUTF16toUTF8(aSource, aDest, mozilla::fallible)) {
238     // Note that this may wildly underestimate the allocation that failed, as
239     // we report the length of aSource as UTF-16 instead of UTF-8.
240     aDest.AllocFailed(aDest.Length() + aSource.Length());
241   }
242 }
243 
244 bool
245 AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
246                   const mozilla::fallible_t& aFallible)
247 {
248   // At 16 characters analysis showed better performance of both the all ASCII
249   // and non-ASCII cases, so we limit calling |FirstNonASCII| to strings of
250   // that length.
251   const nsAString::size_type kFastPathMinLength = 16;
252 
253   int32_t firstNonASCII = 0;
254   if (aSource.Length() >= kFastPathMinLength) {
255     firstNonASCII = FirstNonASCII(aSource.BeginReading(), aSource.EndReading());
256   }
257 
258   if (firstNonASCII == -1) {
259     // This is all ASCII, we can use the more efficient lossy append.
260     mozilla::CheckedInt<nsACString::size_type> new_length(aSource.Length());
261     new_length += aDest.Length();
262 
263     if (!new_length.isValid() ||
264         !aDest.SetCapacity(new_length.value(), aFallible)) {
265       return false;
266     }
267 
268     LossyAppendUTF16toASCII(aSource, aDest);
269     return true;
270   }
271 
272   nsAString::const_iterator source_start, source_end;
273   CalculateUTF8Size calculator;
274   aSource.BeginReading(source_start);
275   aSource.EndReading(source_end);
276 
277   // Skip the characters that we know are single byte.
278   source_start.advance(firstNonASCII);
279 
280   copy_string(source_start,
281               source_end, calculator);
282 
283   // Include the ASCII characters that were skipped in the count.
284   size_t count = calculator.Size() + firstNonASCII;
285 
286   if (count) {
287     auto old_dest_length = aDest.Length();
288     // Grow the buffer if we need to.
289     mozilla::CheckedInt<nsACString::size_type> new_length(count);
290     new_length += old_dest_length;
291 
292     if (!new_length.isValid() ||
293         !aDest.SetLength(new_length.value(), aFallible)) {
294       return false;
295     }
296 
297     // All ready? Time to convert
298 
299     nsAString::const_iterator ascii_end;
300     aSource.BeginReading(ascii_end);
301 
302     if (firstNonASCII >= static_cast<int32_t>(kFastPathMinLength)) {
303       // Use the more efficient lossy converter for the ASCII portion.
304       LossyConvertEncoding16to8 lossy_converter(
305           aDest.BeginWriting() + old_dest_length);
306       nsAString::const_iterator ascii_start;
307       aSource.BeginReading(ascii_start);
308       ascii_end.advance(firstNonASCII);
309 
310       copy_string(ascii_start, ascii_end, lossy_converter);
311     } else {
312       // Not using the lossy shortcut, we need to include the leading ASCII
313       // chars.
314       firstNonASCII = 0;
315     }
316 
317     ConvertUTF16toUTF8 converter(
318         aDest.BeginWriting() + old_dest_length + firstNonASCII);
319     copy_string(ascii_end,
320                 aSource.EndReading(source_end), converter);
321 
322     NS_ASSERTION(converter.Size() == count - firstNonASCII,
323                  "Unexpected disparity between CalculateUTF8Size and "
324                  "ConvertUTF16toUTF8");
325   }
326 
327   return true;
328 }
329 
330 void
331 AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest)
332 {
333   if (!AppendUTF8toUTF16(aSource, aDest, mozilla::fallible)) {
334     aDest.AllocFailed(aDest.Length() + aSource.Length());
335   }
336 }
337 
338 bool
339 AppendUTF8toUTF16(const nsACString& aSource, nsAString& aDest,
340                   const mozilla::fallible_t& aFallible)
341 {
342   nsACString::const_iterator source_start, source_end;
343   CalculateUTF8Length calculator;
344   copy_string(aSource.BeginReading(source_start),
345               aSource.EndReading(source_end), calculator);
346 
347   uint32_t count = calculator.Length();
348 
349   // Avoid making the string mutable if we're appending an empty string
350   if (count) {
351     uint32_t old_dest_length = aDest.Length();
352 
353     // Grow the buffer if we need to.
354     if (!aDest.SetLength(old_dest_length + count, aFallible)) {
355       return false;
356     }
357 
358     // All ready? Time to convert
359 
360     ConvertUTF8toUTF16 converter(aDest.BeginWriting() + old_dest_length);
361     copy_string(aSource.BeginReading(source_start),
362                 aSource.EndReading(source_end), converter);
363 
364     NS_ASSERTION(converter.ErrorEncountered() ||
365                  converter.Length() == count,
366                  "CalculateUTF8Length produced the wrong length");
367 
368     if (converter.ErrorEncountered()) {
369       NS_ERROR("Input wasn't UTF8 or incorrect length was calculated");
370       aDest.SetLength(old_dest_length);
371     }
372   }
373 
374   return true;
375 }
376 
377 void
378 AppendUTF16toUTF8(const char16ptr_t aSource, nsACString& aDest)
379 {
380   if (aSource) {
381     AppendUTF16toUTF8(nsDependentString(aSource), aDest);
382   }
383 }
384 
385 void
386 AppendUTF8toUTF16(const char* aSource, nsAString& aDest)
387 {
388   if (aSource) {
389     AppendUTF8toUTF16(nsDependentCString(aSource), aDest);
390   }
391 }
392 
393 
394 /**
395  * A helper function that allocates a buffer of the desired character type big enough to hold a copy of the supplied string (plus a zero terminator).
396  *
397  * @param aSource an string you will eventually be making a copy of
398  * @return a new buffer (of the type specified by the second parameter) which you must free with |free|.
399  *
400  */
401 template <class FromStringT, class ToCharT>
402 inline
403 ToCharT*
404 AllocateStringCopy(const FromStringT& aSource, ToCharT*)
405 {
406   return static_cast<ToCharT*>(moz_xmalloc(
407     (aSource.Length() + 1) * sizeof(ToCharT)));
408 }
409 
410 
411 char*
412 ToNewCString(const nsAString& aSource)
413 {
414   char* result = AllocateStringCopy(aSource, (char*)0);
415   if (!result) {
416     return nullptr;
417   }
418 
419   nsAString::const_iterator fromBegin, fromEnd;
420   LossyConvertEncoding16to8 converter(result);
421   copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
422               converter).write_terminator();
423   return result;
424 }
425 
426 char*
427 ToNewUTF8String(const nsAString& aSource, uint32_t* aUTF8Count)
428 {
429   nsAString::const_iterator start, end;
430   CalculateUTF8Size calculator;
431   copy_string(aSource.BeginReading(start), aSource.EndReading(end),
432               calculator);
433 
434   if (aUTF8Count) {
435     *aUTF8Count = calculator.Size();
436   }
437 
438   char* result = static_cast<char*>
439                  (moz_xmalloc(calculator.Size() + 1));
440   if (!result) {
441     return nullptr;
442   }
443 
444   ConvertUTF16toUTF8 converter(result);
445   copy_string(aSource.BeginReading(start), aSource.EndReading(end),
446               converter).write_terminator();
447   NS_ASSERTION(calculator.Size() == converter.Size(), "length mismatch");
448 
449   return result;
450 }
451 
452 char*
453 ToNewCString(const nsACString& aSource)
454 {
455   // no conversion needed, just allocate a buffer of the correct length and copy into it
456 
457   char* result = AllocateStringCopy(aSource, (char*)0);
458   if (!result) {
459     return nullptr;
460   }
461 
462   nsACString::const_iterator fromBegin, fromEnd;
463   char* toBegin = result;
464   *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
465                toBegin) = char(0);
466   return result;
467 }
468 
469 char16_t*
470 ToNewUnicode(const nsAString& aSource)
471 {
472   // no conversion needed, just allocate a buffer of the correct length and copy into it
473 
474   char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);
475   if (!result) {
476     return nullptr;
477   }
478 
479   nsAString::const_iterator fromBegin, fromEnd;
480   char16_t* toBegin = result;
481   *copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
482                toBegin) = char16_t(0);
483   return result;
484 }
485 
486 char16_t*
487 ToNewUnicode(const nsACString& aSource)
488 {
489   char16_t* result = AllocateStringCopy(aSource, (char16_t*)0);
490   if (!result) {
491     return nullptr;
492   }
493 
494   nsACString::const_iterator fromBegin, fromEnd;
495   LossyConvertEncoding8to16 converter(result);
496   copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
497               converter).write_terminator();
498   return result;
499 }
500 
501 uint32_t
502 CalcUTF8ToUnicodeLength(const nsACString& aSource)
503 {
504   nsACString::const_iterator start, end;
505   CalculateUTF8Length calculator;
506   copy_string(aSource.BeginReading(start), aSource.EndReading(end),
507               calculator);
508   return calculator.Length();
509 }
510 
511 char16_t*
512 UTF8ToUnicodeBuffer(const nsACString& aSource, char16_t* aBuffer,
513                     uint32_t* aUTF16Count)
514 {
515   nsACString::const_iterator start, end;
516   ConvertUTF8toUTF16 converter(aBuffer);
517   copy_string(aSource.BeginReading(start),
518               aSource.EndReading(end),
519               converter).write_terminator();
520   if (aUTF16Count) {
521     *aUTF16Count = converter.Length();
522   }
523   return aBuffer;
524 }
525 
526 char16_t*
527 UTF8ToNewUnicode(const nsACString& aSource, uint32_t* aUTF16Count)
528 {
529   const uint32_t length = CalcUTF8ToUnicodeLength(aSource);
530   const size_t buffer_size = (length + 1) * sizeof(char16_t);
531   char16_t* buffer = static_cast<char16_t*>(moz_xmalloc(buffer_size));
532   if (!buffer) {
533     return nullptr;
534   }
535 
536   uint32_t copied;
537   UTF8ToUnicodeBuffer(aSource, buffer, &copied);
538   NS_ASSERTION(length == copied, "length mismatch");
539 
540   if (aUTF16Count) {
541     *aUTF16Count = copied;
542   }
543   return buffer;
544 }
545 
546 char16_t*
547 CopyUnicodeTo(const nsAString& aSource, uint32_t aSrcOffset, char16_t* aDest,
548               uint32_t aLength)
549 {
550   nsAString::const_iterator fromBegin, fromEnd;
551   char16_t* toBegin = aDest;
552   copy_string(aSource.BeginReading(fromBegin).advance(int32_t(aSrcOffset)),
553               aSource.BeginReading(fromEnd).advance(int32_t(aSrcOffset + aLength)),
554               toBegin);
555   return aDest;
556 }
557 
558 void
559 CopyUnicodeTo(const nsAString::const_iterator& aSrcStart,
560               const nsAString::const_iterator& aSrcEnd,
561               nsAString& aDest)
562 {
563   aDest.SetLength(Distance(aSrcStart, aSrcEnd));
564 
565   nsAString::char_iterator dest = aDest.BeginWriting();
566   nsAString::const_iterator fromBegin(aSrcStart);
567 
568   copy_string(fromBegin, aSrcEnd, dest);
569 }
570 
571 void
572 AppendUnicodeTo(const nsAString::const_iterator& aSrcStart,
573                 const nsAString::const_iterator& aSrcEnd,
574                 nsAString& aDest)
575 {
576   uint32_t oldLength = aDest.Length();
577   aDest.SetLength(oldLength + Distance(aSrcStart, aSrcEnd));
578 
579   nsAString::char_iterator dest = aDest.BeginWriting() + oldLength;
580   nsAString::const_iterator fromBegin(aSrcStart);
581 
582   copy_string(fromBegin, aSrcEnd, dest);
583 }
584 
585 bool
586 IsASCII(const nsAString& aString)
587 {
588   static const char16_t NOT_ASCII = char16_t(~0x007F);
589 
590 
591   // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
592 
593   nsAString::const_iterator iter, done_reading;
594   aString.BeginReading(iter);
595   aString.EndReading(done_reading);
596 
597   const char16_t* c = iter.get();
598   const char16_t* end = done_reading.get();
599 
600   while (c < end) {
601     if (*c++ & NOT_ASCII) {
602       return false;
603     }
604   }
605 
606   return true;
607 }
608 
609 bool
610 IsASCII(const nsACString& aString)
611 {
612   static const char NOT_ASCII = char(~0x7F);
613 
614 
615   // Don't want to use |copy_string| for this task, since we can stop at the first non-ASCII character
616 
617   nsACString::const_iterator iter, done_reading;
618   aString.BeginReading(iter);
619   aString.EndReading(done_reading);
620 
621   const char* c = iter.get();
622   const char* end = done_reading.get();
623 
624   while (c < end) {
625     if (*c++ & NOT_ASCII) {
626       return false;
627     }
628   }
629 
630   return true;
631 }
632 
633 bool
634 IsUTF8(const nsACString& aString, bool aRejectNonChar)
635 {
636   nsReadingIterator<char> done_reading;
637   aString.EndReading(done_reading);
638 
639   int32_t state = 0;
640   bool overlong = false;
641   bool surrogate = false;
642   bool nonchar = false;
643   uint16_t olupper = 0; // overlong byte upper bound.
644   uint16_t slower = 0;  // surrogate byte lower bound.
645 
646   nsReadingIterator<char> iter;
647   aString.BeginReading(iter);
648 
649   const char* ptr = iter.get();
650   const char* end = done_reading.get();
651   while (ptr < end) {
652     uint8_t c;
653 
654     if (0 == state) {
655       c = *ptr++;
656 
657       if (UTF8traits::isASCII(c)) {
658         continue;
659       }
660 
661       if (c <= 0xC1) { // [80-BF] where not expected, [C0-C1] for overlong.
662         return false;
663       } else if (UTF8traits::is2byte(c)) {
664         state = 1;
665       } else if (UTF8traits::is3byte(c)) {
666         state = 2;
667         if (c == 0xE0) { // to exclude E0[80-9F][80-BF]
668           overlong = true;
669           olupper = 0x9F;
670         } else if (c == 0xED) { // ED[A0-BF][80-BF] : surrogate codepoint
671           surrogate = true;
672           slower = 0xA0;
673         } else if (c == 0xEF) { // EF BF [BE-BF] : non-character
674           nonchar = true;
675         }
676       } else if (c <= 0xF4) { // XXX replace /w UTF8traits::is4byte when it's updated to exclude [F5-F7].(bug 199090)
677         state = 3;
678         nonchar = true;
679         if (c == 0xF0) { // to exclude F0[80-8F][80-BF]{2}
680           overlong = true;
681           olupper = 0x8F;
682         } else if (c == 0xF4) { // to exclude F4[90-BF][80-BF]
683           // actually not surrogates but codepoints beyond 0x10FFFF
684           surrogate = true;
685           slower = 0x90;
686         }
687       } else {
688         return false;  // Not UTF-8 string
689       }
690     }
691 
692     if (nonchar && !aRejectNonChar) {
693       nonchar = false;
694     }
695 
696     while (ptr < end && state) {
697       c = *ptr++;
698       --state;
699 
700       // non-character : EF BF [BE-BF] or F[0-7] [89AB]F BF [BE-BF]
701       if (nonchar &&
702           ((!state && c < 0xBE) ||
703            (state == 1 && c != 0xBF)  ||
704            (state == 2 && 0x0F != (0x0F & c)))) {
705         nonchar = false;
706       }
707 
708       if (!UTF8traits::isInSeq(c) || (overlong && c <= olupper) ||
709           (surrogate && slower <= c) || (nonchar && !state)) {
710         return false;  // Not UTF-8 string
711       }
712 
713       overlong = surrogate = false;
714     }
715   }
716   return !state; // state != 0 at the end indicates an invalid UTF-8 seq.
717 }
718 
719 /**
720  * A character sink for in-place case conversion.
721  */
722 class ConvertToUpperCase
723 {
724 public:
725   typedef char value_type;
726 
727   uint32_t
728   write(const char* aSource, uint32_t aSourceLength)
729   {
730     char* cp = const_cast<char*>(aSource);
731     const char* end = aSource + aSourceLength;
732     while (cp != end) {
733       char ch = *cp;
734       if (ch >= 'a' && ch <= 'z') {
735         *cp = ch - ('a' - 'A');
736       }
737       ++cp;
738     }
739     return aSourceLength;
740   }
741 };
742 
743 void
744 ToUpperCase(nsCSubstring& aCString)
745 {
746   ConvertToUpperCase converter;
747   char* start;
748   converter.write(aCString.BeginWriting(start), aCString.Length());
749 }
750 
751 /**
752  * A character sink for copying with case conversion.
753  */
754 class CopyToUpperCase
755 {
756 public:
757   typedef char value_type;
758 
759   explicit CopyToUpperCase(nsACString::iterator& aDestIter,
760                            const nsACString::iterator& aEndIter)
761     : mIter(aDestIter)
762     , mEnd(aEndIter)
763   {
764   }
765 
766   uint32_t
767   write(const char* aSource, uint32_t aSourceLength)
768   {
769     uint32_t len = XPCOM_MIN(uint32_t(mEnd - mIter), aSourceLength);
770     char* cp = mIter.get();
771     const char* end = aSource + len;
772     while (aSource != end) {
773       char ch = *aSource;
774       if ((ch >= 'a') && (ch <= 'z')) {
775         *cp = ch - ('a' - 'A');
776       } else {
777         *cp = ch;
778       }
779       ++aSource;
780       ++cp;
781     }
782     mIter.advance(len);
783     return len;
784   }
785 
786 protected:
787   nsACString::iterator& mIter;
788   const nsACString::iterator& mEnd;
789 };
790 
791 void
792 ToUpperCase(const nsACString& aSource, nsACString& aDest)
793 {
794   nsACString::const_iterator fromBegin, fromEnd;
795   nsACString::iterator toBegin, toEnd;
796   aDest.SetLength(aSource.Length());
797 
798   CopyToUpperCase converter(aDest.BeginWriting(toBegin), aDest.EndWriting(toEnd));
799   copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
800               converter);
801 }
802 
803 /**
804  * A character sink for case conversion.
805  */
806 class ConvertToLowerCase
807 {
808 public:
809   typedef char value_type;
810 
811   uint32_t
812   write(const char* aSource, uint32_t aSourceLength)
813   {
814     char* cp = const_cast<char*>(aSource);
815     const char* end = aSource + aSourceLength;
816     while (cp != end) {
817       char ch = *cp;
818       if ((ch >= 'A') && (ch <= 'Z')) {
819         *cp = ch + ('a' - 'A');
820       }
821       ++cp;
822     }
823     return aSourceLength;
824   }
825 };
826 
827 void
828 ToLowerCase(nsCSubstring& aCString)
829 {
830   ConvertToLowerCase converter;
831   char* start;
832   converter.write(aCString.BeginWriting(start), aCString.Length());
833 }
834 
835 /**
836  * A character sink for copying with case conversion.
837  */
838 class CopyToLowerCase
839 {
840 public:
841   typedef char value_type;
842 
843   explicit CopyToLowerCase(nsACString::iterator& aDestIter,
844                            const nsACString::iterator& aEndIter)
845     : mIter(aDestIter)
846     , mEnd(aEndIter)
847   {
848   }
849 
850   uint32_t
851   write(const char* aSource, uint32_t aSourceLength)
852   {
853     uint32_t len = XPCOM_MIN(uint32_t(mEnd - mIter), aSourceLength);
854     char* cp = mIter.get();
855     const char* end = aSource + len;
856     while (aSource != end) {
857       char ch = *aSource;
858       if ((ch >= 'A') && (ch <= 'Z')) {
859         *cp = ch + ('a' - 'A');
860       } else {
861         *cp = ch;
862       }
863       ++aSource;
864       ++cp;
865     }
866     mIter.advance(len);
867     return len;
868   }
869 
870 protected:
871   nsACString::iterator& mIter;
872   const nsACString::iterator& mEnd;
873 };
874 
875 void
876 ToLowerCase(const nsACString& aSource, nsACString& aDest)
877 {
878   nsACString::const_iterator fromBegin, fromEnd;
879   nsACString::iterator toBegin, toEnd;
880   aDest.SetLength(aSource.Length());
881 
882   CopyToLowerCase converter(aDest.BeginWriting(toBegin), aDest.EndWriting(toEnd));
883   copy_string(aSource.BeginReading(fromBegin), aSource.EndReading(fromEnd),
884               converter);
885 }
886 
887 bool
888 ParseString(const nsACString& aSource, char aDelimiter,
889             nsTArray<nsCString>& aArray)
890 {
891   nsACString::const_iterator start, end;
892   aSource.BeginReading(start);
893   aSource.EndReading(end);
894 
895   uint32_t oldLength = aArray.Length();
896 
897   for (;;) {
898     nsACString::const_iterator delimiter = start;
899     FindCharInReadable(aDelimiter, delimiter, end);
900 
901     if (delimiter != start) {
902       if (!aArray.AppendElement(Substring(start, delimiter))) {
903         aArray.RemoveElementsAt(oldLength, aArray.Length() - oldLength);
904         return false;
905       }
906     }
907 
908     if (delimiter == end) {
909       break;
910     }
911     start = ++delimiter;
912     if (start == end) {
913       break;
914     }
915   }
916 
917   return true;
918 }
919 
920 template <class StringT, class IteratorT, class Comparator>
921 bool
922 FindInReadable_Impl(const StringT& aPattern, IteratorT& aSearchStart,
923                     IteratorT& aSearchEnd, const Comparator& aCompare)
924 {
925   bool found_it = false;
926 
927   // only bother searching at all if we're given a non-empty range to search
928   if (aSearchStart != aSearchEnd) {
929     IteratorT aPatternStart, aPatternEnd;
930     aPattern.BeginReading(aPatternStart);
931     aPattern.EndReading(aPatternEnd);
932 
933     // outer loop keeps searching till we find it or run out of string to search
934     while (!found_it) {
935       // fast inner loop (that's what it's called, not what it is) looks for a potential match
936       while (aSearchStart != aSearchEnd &&
937              aCompare(aPatternStart.get(), aSearchStart.get(), 1, 1)) {
938         ++aSearchStart;
939       }
940 
941       // if we broke out of the `fast' loop because we're out of string ... we're done: no match
942       if (aSearchStart == aSearchEnd) {
943         break;
944       }
945 
946       // otherwise, we're at a potential match, let's see if we really hit one
947       IteratorT testPattern(aPatternStart);
948       IteratorT testSearch(aSearchStart);
949 
950       // slow inner loop verifies the potential match (found by the `fast' loop) at the current position
951       for (;;) {
952         // we already compared the first character in the outer loop,
953         //  so we'll advance before the next comparison
954         ++testPattern;
955         ++testSearch;
956 
957         // if we verified all the way to the end of the pattern, then we found it!
958         if (testPattern == aPatternEnd) {
959           found_it = true;
960           aSearchEnd = testSearch; // return the exact found range through the parameters
961           break;
962         }
963 
964         // if we got to end of the string we're searching before we hit the end of the
965         //  pattern, we'll never find what we're looking for
966         if (testSearch == aSearchEnd) {
967           aSearchStart = aSearchEnd;
968           break;
969         }
970 
971         // else if we mismatched ... it's time to advance to the next search position
972         //  and get back into the `fast' loop
973         if (aCompare(testPattern.get(), testSearch.get(), 1, 1)) {
974           ++aSearchStart;
975           break;
976         }
977       }
978     }
979   }
980 
981   return found_it;
982 }
983 
984 /**
985  * This searches the entire string from right to left, and returns the first match found, if any.
986  */
987 template <class StringT, class IteratorT, class Comparator>
988 bool
989 RFindInReadable_Impl(const StringT& aPattern, IteratorT& aSearchStart,
990                      IteratorT& aSearchEnd, const Comparator& aCompare)
991 {
992   IteratorT patternStart, patternEnd, searchEnd = aSearchEnd;
993   aPattern.BeginReading(patternStart);
994   aPattern.EndReading(patternEnd);
995 
996   // Point to the last character in the pattern
997   --patternEnd;
998   // outer loop keeps searching till we run out of string to search
999   while (aSearchStart != searchEnd) {
1000     // Point to the end position of the next possible match
1001     --searchEnd;
1002 
1003     // Check last character, if a match, explore further from here
1004     if (aCompare(patternEnd.get(), searchEnd.get(), 1, 1) == 0) {
1005       // We're at a potential match, let's see if we really hit one
1006       IteratorT testPattern(patternEnd);
1007       IteratorT testSearch(searchEnd);
1008 
1009       // inner loop verifies the potential match at the current position
1010       do {
1011         // if we verified all the way to the end of the pattern, then we found it!
1012         if (testPattern == patternStart) {
1013           aSearchStart = testSearch;  // point to start of match
1014           aSearchEnd = ++searchEnd;   // point to end of match
1015           return true;
1016         }
1017 
1018         // if we got to end of the string we're searching before we hit the end of the
1019         //  pattern, we'll never find what we're looking for
1020         if (testSearch == aSearchStart) {
1021           aSearchStart = aSearchEnd;
1022           return false;
1023         }
1024 
1025         // test previous character for a match
1026         --testPattern;
1027         --testSearch;
1028       } while (aCompare(testPattern.get(), testSearch.get(), 1, 1) == 0);
1029     }
1030   }
1031 
1032   aSearchStart = aSearchEnd;
1033   return false;
1034 }
1035 
1036 bool
1037 FindInReadable(const nsAString& aPattern,
1038                nsAString::const_iterator& aSearchStart,
1039                nsAString::const_iterator& aSearchEnd,
1040                const nsStringComparator& aComparator)
1041 {
1042   return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
1043 }
1044 
1045 bool
1046 FindInReadable(const nsACString& aPattern,
1047                nsACString::const_iterator& aSearchStart,
1048                nsACString::const_iterator& aSearchEnd,
1049                const nsCStringComparator& aComparator)
1050 {
1051   return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
1052 }
1053 
1054 bool
1055 CaseInsensitiveFindInReadable(const nsACString& aPattern,
1056                               nsACString::const_iterator& aSearchStart,
1057                               nsACString::const_iterator& aSearchEnd)
1058 {
1059   return FindInReadable_Impl(aPattern, aSearchStart, aSearchEnd,
1060                              nsCaseInsensitiveCStringComparator());
1061 }
1062 
1063 bool
1064 RFindInReadable(const nsAString& aPattern,
1065                 nsAString::const_iterator& aSearchStart,
1066                 nsAString::const_iterator& aSearchEnd,
1067                 const nsStringComparator& aComparator)
1068 {
1069   return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
1070 }
1071 
1072 bool
1073 RFindInReadable(const nsACString& aPattern,
1074                 nsACString::const_iterator& aSearchStart,
1075                 nsACString::const_iterator& aSearchEnd,
1076                 const nsCStringComparator& aComparator)
1077 {
1078   return RFindInReadable_Impl(aPattern, aSearchStart, aSearchEnd, aComparator);
1079 }
1080 
1081 bool
1082 FindCharInReadable(char16_t aChar, nsAString::const_iterator& aSearchStart,
1083                    const nsAString::const_iterator& aSearchEnd)
1084 {
1085   int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get();
1086 
1087   const char16_t* charFoundAt =
1088     nsCharTraits<char16_t>::find(aSearchStart.get(), fragmentLength, aChar);
1089   if (charFoundAt) {
1090     aSearchStart.advance(charFoundAt - aSearchStart.get());
1091     return true;
1092   }
1093 
1094   aSearchStart.advance(fragmentLength);
1095   return false;
1096 }
1097 
1098 bool
1099 FindCharInReadable(char aChar, nsACString::const_iterator& aSearchStart,
1100                    const nsACString::const_iterator& aSearchEnd)
1101 {
1102   int32_t fragmentLength = aSearchEnd.get() - aSearchStart.get();
1103 
1104   const char* charFoundAt =
1105     nsCharTraits<char>::find(aSearchStart.get(), fragmentLength, aChar);
1106   if (charFoundAt) {
1107     aSearchStart.advance(charFoundAt - aSearchStart.get());
1108     return true;
1109   }
1110 
1111   aSearchStart.advance(fragmentLength);
1112   return false;
1113 }
1114 
1115 uint32_t
1116 CountCharInReadable(const nsAString& aStr, char16_t aChar)
1117 {
1118   uint32_t count = 0;
1119   nsAString::const_iterator begin, end;
1120 
1121   aStr.BeginReading(begin);
1122   aStr.EndReading(end);
1123 
1124   while (begin != end) {
1125     if (*begin == aChar) {
1126       ++count;
1127     }
1128     ++begin;
1129   }
1130 
1131   return count;
1132 }
1133 
1134 uint32_t
1135 CountCharInReadable(const nsACString& aStr, char aChar)
1136 {
1137   uint32_t count = 0;
1138   nsACString::const_iterator begin, end;
1139 
1140   aStr.BeginReading(begin);
1141   aStr.EndReading(end);
1142 
1143   while (begin != end) {
1144     if (*begin == aChar) {
1145       ++count;
1146     }
1147     ++begin;
1148   }
1149 
1150   return count;
1151 }
1152 
1153 bool
1154 StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring)
1155 {
1156   nsAString::size_type src_len = aSource.Length(),
1157                        sub_len = aSubstring.Length();
1158   if (sub_len > src_len) {
1159     return false;
1160   }
1161   return Substring(aSource, 0, sub_len).Equals(aSubstring);
1162 }
1163 
1164 bool
1165 StringBeginsWith(const nsAString& aSource, const nsAString& aSubstring,
1166                  const nsStringComparator& aComparator)
1167 {
1168   nsAString::size_type src_len = aSource.Length(),
1169                        sub_len = aSubstring.Length();
1170   if (sub_len > src_len) {
1171     return false;
1172   }
1173   return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
1174 }
1175 
1176 bool
1177 StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring)
1178 {
1179   nsACString::size_type src_len = aSource.Length(),
1180                         sub_len = aSubstring.Length();
1181   if (sub_len > src_len) {
1182     return false;
1183   }
1184   return Substring(aSource, 0, sub_len).Equals(aSubstring);
1185 }
1186 
1187 bool
1188 StringBeginsWith(const nsACString& aSource, const nsACString& aSubstring,
1189                  const nsCStringComparator& aComparator)
1190 {
1191   nsACString::size_type src_len = aSource.Length(),
1192                         sub_len = aSubstring.Length();
1193   if (sub_len > src_len) {
1194     return false;
1195   }
1196   return Substring(aSource, 0, sub_len).Equals(aSubstring, aComparator);
1197 }
1198 
1199 bool
1200 StringEndsWith(const nsAString& aSource, const nsAString& aSubstring)
1201 {
1202   nsAString::size_type src_len = aSource.Length(),
1203                        sub_len = aSubstring.Length();
1204   if (sub_len > src_len) {
1205     return false;
1206   }
1207   return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring);
1208 }
1209 
1210 bool
1211 StringEndsWith(const nsAString& aSource, const nsAString& aSubstring,
1212                const nsStringComparator& aComparator)
1213 {
1214   nsAString::size_type src_len = aSource.Length(),
1215                        sub_len = aSubstring.Length();
1216   if (sub_len > src_len) {
1217     return false;
1218   }
1219   return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
1220                                                                aComparator);
1221 }
1222 
1223 bool
1224 StringEndsWith(const nsACString& aSource, const nsACString& aSubstring)
1225 {
1226   nsACString::size_type src_len = aSource.Length(),
1227                         sub_len = aSubstring.Length();
1228   if (sub_len > src_len) {
1229     return false;
1230   }
1231   return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring);
1232 }
1233 
1234 bool
1235 StringEndsWith(const nsACString& aSource, const nsACString& aSubstring,
1236                const nsCStringComparator& aComparator)
1237 {
1238   nsACString::size_type src_len = aSource.Length(),
1239                         sub_len = aSubstring.Length();
1240   if (sub_len > src_len) {
1241     return false;
1242   }
1243   return Substring(aSource, src_len - sub_len, sub_len).Equals(aSubstring,
1244                                                                aComparator);
1245 }
1246 
1247 
1248 
1249 static const char16_t empty_buffer[1] = { '\0' };
1250 
1251 const nsAFlatString&
1252 EmptyString()
1253 {
1254   static const nsDependentString sEmpty(empty_buffer);
1255 
1256   return sEmpty;
1257 }
1258 
1259 const nsAFlatCString&
1260 EmptyCString()
1261 {
1262   static const nsDependentCString sEmpty((const char*)empty_buffer);
1263 
1264   return sEmpty;
1265 }
1266 
1267 const nsAFlatString&
1268 NullString()
1269 {
1270   static const nsXPIDLString sNull;
1271 
1272   return sNull;
1273 }
1274 
1275 const nsAFlatCString&
1276 NullCString()
1277 {
1278   static const nsXPIDLCString sNull;
1279 
1280   return sNull;
1281 }
1282 
1283 int32_t
1284 CompareUTF8toUTF16(const nsASingleFragmentCString& aUTF8String,
1285                    const nsASingleFragmentString& aUTF16String)
1286 {
1287   static const uint32_t NOT_ASCII = uint32_t(~0x7F);
1288 
1289   const char* u8;
1290   const char* u8end;
1291   aUTF8String.BeginReading(u8);
1292   aUTF8String.EndReading(u8end);
1293 
1294   const char16_t* u16;
1295   const char16_t* u16end;
1296   aUTF16String.BeginReading(u16);
1297   aUTF16String.EndReading(u16end);
1298 
1299   while (u8 != u8end && u16 != u16end) {
1300     // Cast away the signedness of *u8 to prevent signextension when
1301     // converting to uint32_t
1302     uint32_t c8_32 = (uint8_t)*u8;
1303 
1304     if (c8_32 & NOT_ASCII) {
1305       bool err;
1306       c8_32 = UTF8CharEnumerator::NextChar(&u8, u8end, &err);
1307       if (err) {
1308         return INT32_MIN;
1309       }
1310 
1311       uint32_t c16_32 = UTF16CharEnumerator::NextChar(&u16, u16end);
1312       // The above UTF16CharEnumerator::NextChar() calls can
1313       // fail, but if it does for anything other than no data to
1314       // look at (which can't happen here), it returns the
1315       // Unicode replacement character 0xFFFD for the invalid
1316       // data they were fed. Ignore that error and treat invalid
1317       // UTF16 as 0xFFFD.
1318       //
1319       // This matches what our UTF16 to UTF8 conversion code
1320       // does, and thus a UTF8 string that came from an invalid
1321       // UTF16 string will compare equal to the invalid UTF16
1322       // string it came from. Same is true for any other UTF16
1323       // string differs only in the invalid part of the string.
1324 
1325       if (c8_32 != c16_32) {
1326         return c8_32 < c16_32 ? -1 : 1;
1327       }
1328     } else {
1329       if (c8_32 != *u16) {
1330         return c8_32 > *u16 ? 1 : -1;
1331       }
1332 
1333       ++u8;
1334       ++u16;
1335     }
1336   }
1337 
1338   if (u8 != u8end) {
1339     // We get to the end of the UTF16 string, but no to the end of
1340     // the UTF8 string. The UTF8 string is longer than the UTF16
1341     // string
1342 
1343     return 1;
1344   }
1345 
1346   if (u16 != u16end) {
1347     // We get to the end of the UTF8 string, but no to the end of
1348     // the UTF16 string. The UTF16 string is longer than the UTF8
1349     // string
1350 
1351     return -1;
1352   }
1353 
1354   // The two strings match.
1355 
1356   return 0;
1357 }
1358 
1359 void
1360 AppendUCS4ToUTF16(const uint32_t aSource, nsAString& aDest)
1361 {
1362   NS_ASSERTION(IS_VALID_CHAR(aSource), "Invalid UCS4 char");
1363   if (IS_IN_BMP(aSource)) {
1364     aDest.Append(char16_t(aSource));
1365   } else {
1366     aDest.Append(H_SURROGATE(aSource));
1367     aDest.Append(L_SURROGATE(aSource));
1368   }
1369 }
1370 
1371 extern "C" {
1372 
1373 void Gecko_AppendUTF16toCString(nsACString* aThis, const nsAString* aOther)
1374 {
1375   AppendUTF16toUTF8(*aOther, *aThis);
1376 }
1377 
1378 void Gecko_AppendUTF8toString(nsAString* aThis, const nsACString* aOther)
1379 {
1380   AppendUTF8toUTF16(*aOther, *aThis);
1381 }
1382 
1383 }
1384