1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "builtin/String.h"
8 
9 #include "mozilla/Attributes.h"
10 #include "mozilla/CheckedInt.h"
11 #include "mozilla/FloatingPoint.h"
12 #if JS_HAS_INTL_API
13 #  include "mozilla/intl/String.h"
14 #endif
15 #include "mozilla/PodOperations.h"
16 #include "mozilla/Range.h"
17 #include "mozilla/TextUtils.h"
18 
19 #include <algorithm>
20 #include <limits>
21 #include <string.h>
22 #include <type_traits>
23 
24 #include "jsnum.h"
25 #include "jstypes.h"
26 
27 #include "builtin/Array.h"
28 #include "builtin/Boolean.h"
29 #if JS_HAS_INTL_API
30 #  include "builtin/intl/CommonFunctions.h"
31 #  include "builtin/intl/FormatBuffer.h"
32 #endif
33 #include "builtin/RegExp.h"
34 #include "jit/InlinableNatives.h"
35 #include "js/Conversions.h"
36 #include "js/friend/ErrorMessages.h"  // js::GetErrorMessage, JSMSG_*
37 #include "js/friend/StackLimits.h"    // js::AutoCheckRecursionLimit
38 #if !JS_HAS_INTL_API
39 #  include "js/LocaleSensitive.h"
40 #endif
41 #include "js/PropertyAndElement.h"  // JS_DefineFunctions
42 #include "js/PropertySpec.h"
43 #include "js/StableStringChars.h"
44 #include "js/UniquePtr.h"
45 #include "util/StringBuffer.h"
46 #include "util/Unicode.h"
47 #include "vm/GlobalObject.h"
48 #include "vm/Interpreter.h"
49 #include "vm/JSAtom.h"
50 #include "vm/JSContext.h"
51 #include "vm/JSObject.h"
52 #include "vm/Opcodes.h"
53 #include "vm/Printer.h"
54 #include "vm/RegExpObject.h"
55 #include "vm/RegExpStatics.h"
56 #include "vm/SelfHosting.h"
57 #include "vm/StaticStrings.h"
58 #include "vm/ToSource.h"       // js::ValueToSource
59 #include "vm/WellKnownAtom.h"  // js_*_str
60 
61 #include "vm/InlineCharBuffer-inl.h"
62 #include "vm/Interpreter-inl.h"
63 #include "vm/StringObject-inl.h"
64 #include "vm/StringType-inl.h"
65 
66 using namespace js;
67 
68 using JS::Symbol;
69 using JS::SymbolCode;
70 
71 using mozilla::AsciiAlphanumericToNumber;
72 using mozilla::CheckedInt;
73 using mozilla::IsAsciiHexDigit;
74 using mozilla::IsNaN;
75 using mozilla::PodCopy;
76 using mozilla::RangedPtr;
77 
78 using JS::AutoCheckCannotGC;
79 using JS::AutoStableStringChars;
80 
ArgToLinearString(JSContext * cx,const CallArgs & args,unsigned argno)81 static JSLinearString* ArgToLinearString(JSContext* cx, const CallArgs& args,
82                                          unsigned argno) {
83   if (argno >= args.length()) {
84     return cx->names().undefined;
85   }
86 
87   JSString* str = ToString<CanGC>(cx, args[argno]);
88   if (!str) {
89     return nullptr;
90   }
91 
92   return str->ensureLinear(cx);
93 }
94 
95 /*
96  * Forward declarations for URI encode/decode and helper routines
97  */
98 static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp);
99 
100 static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
101 
102 static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp);
103 
104 static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
105 
106 /*
107  * Global string methods
108  */
109 
110 /* ES5 B.2.1 */
111 template <typename CharT>
Escape(JSContext * cx,const CharT * chars,uint32_t length,InlineCharBuffer<Latin1Char> & newChars,uint32_t * newLengthOut)112 static bool Escape(JSContext* cx, const CharT* chars, uint32_t length,
113                    InlineCharBuffer<Latin1Char>& newChars,
114                    uint32_t* newLengthOut) {
115   // clang-format off
116     static const uint8_t shouldPassThrough[128] = {
117          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
118          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
119          0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,       /*    !"#$%&'()*+,-./  */
120          1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,       /*   0123456789:;<=>?  */
121          1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /*   @ABCDEFGHIJKLMNO  */
122          1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,       /*   PQRSTUVWXYZ[\]^_  */
123          0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /*   `abcdefghijklmno  */
124          1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,       /*   pqrstuvwxyz{\}~  DEL */
125     };
126   // clang-format on
127 
128   /* Take a first pass and see how big the result string will need to be. */
129   uint32_t newLength = length;
130   for (size_t i = 0; i < length; i++) {
131     char16_t ch = chars[i];
132     if (ch < 128 && shouldPassThrough[ch]) {
133       continue;
134     }
135 
136     /*
137      * newlength is incremented below by at most 5 and at this point it must
138      * be a valid string length, so this should never overflow uint32_t.
139      */
140     static_assert(JSString::MAX_LENGTH < UINT32_MAX - 5,
141                   "Adding 5 to valid string length should not overflow");
142 
143     MOZ_ASSERT(newLength <= JSString::MAX_LENGTH);
144 
145     /* The character will be encoded as %XX or %uXXXX. */
146     newLength += (ch < 256) ? 2 : 5;
147 
148     if (MOZ_UNLIKELY(newLength > JSString::MAX_LENGTH)) {
149       ReportAllocationOverflow(cx);
150       return false;
151     }
152   }
153 
154   if (newLength == length) {
155     *newLengthOut = newLength;
156     return true;
157   }
158 
159   if (!newChars.maybeAlloc(cx, newLength)) {
160     return false;
161   }
162 
163   static const char digits[] = "0123456789ABCDEF";
164 
165   Latin1Char* rawNewChars = newChars.get();
166   size_t i, ni;
167   for (i = 0, ni = 0; i < length; i++) {
168     char16_t ch = chars[i];
169     if (ch < 128 && shouldPassThrough[ch]) {
170       rawNewChars[ni++] = ch;
171     } else if (ch < 256) {
172       rawNewChars[ni++] = '%';
173       rawNewChars[ni++] = digits[ch >> 4];
174       rawNewChars[ni++] = digits[ch & 0xF];
175     } else {
176       rawNewChars[ni++] = '%';
177       rawNewChars[ni++] = 'u';
178       rawNewChars[ni++] = digits[ch >> 12];
179       rawNewChars[ni++] = digits[(ch & 0xF00) >> 8];
180       rawNewChars[ni++] = digits[(ch & 0xF0) >> 4];
181       rawNewChars[ni++] = digits[ch & 0xF];
182     }
183   }
184   MOZ_ASSERT(ni == newLength);
185 
186   *newLengthOut = newLength;
187   return true;
188 }
189 
str_escape(JSContext * cx,unsigned argc,Value * vp)190 static bool str_escape(JSContext* cx, unsigned argc, Value* vp) {
191   CallArgs args = CallArgsFromVp(argc, vp);
192 
193   RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
194   if (!str) {
195     return false;
196   }
197 
198   InlineCharBuffer<Latin1Char> newChars;
199   uint32_t newLength = 0;  // initialize to silence GCC warning
200   if (str->hasLatin1Chars()) {
201     AutoCheckCannotGC nogc;
202     if (!Escape(cx, str->latin1Chars(nogc), str->length(), newChars,
203                 &newLength)) {
204       return false;
205     }
206   } else {
207     AutoCheckCannotGC nogc;
208     if (!Escape(cx, str->twoByteChars(nogc), str->length(), newChars,
209                 &newLength)) {
210       return false;
211     }
212   }
213 
214   // Return input if no characters need to be escaped.
215   if (newLength == str->length()) {
216     args.rval().setString(str);
217     return true;
218   }
219 
220   JSString* res = newChars.toString(cx, newLength);
221   if (!res) {
222     return false;
223   }
224 
225   args.rval().setString(res);
226   return true;
227 }
228 
229 template <typename CharT>
Unhex4(const RangedPtr<const CharT> chars,char16_t * result)230 static inline bool Unhex4(const RangedPtr<const CharT> chars,
231                           char16_t* result) {
232   CharT a = chars[0], b = chars[1], c = chars[2], d = chars[3];
233 
234   if (!(IsAsciiHexDigit(a) && IsAsciiHexDigit(b) && IsAsciiHexDigit(c) &&
235         IsAsciiHexDigit(d))) {
236     return false;
237   }
238 
239   char16_t unhex = AsciiAlphanumericToNumber(a);
240   unhex = (unhex << 4) + AsciiAlphanumericToNumber(b);
241   unhex = (unhex << 4) + AsciiAlphanumericToNumber(c);
242   unhex = (unhex << 4) + AsciiAlphanumericToNumber(d);
243   *result = unhex;
244   return true;
245 }
246 
247 template <typename CharT>
Unhex2(const RangedPtr<const CharT> chars,char16_t * result)248 static inline bool Unhex2(const RangedPtr<const CharT> chars,
249                           char16_t* result) {
250   CharT a = chars[0], b = chars[1];
251 
252   if (!(IsAsciiHexDigit(a) && IsAsciiHexDigit(b))) {
253     return false;
254   }
255 
256   *result = (AsciiAlphanumericToNumber(a) << 4) + AsciiAlphanumericToNumber(b);
257   return true;
258 }
259 
260 template <typename CharT>
Unescape(StringBuffer & sb,const mozilla::Range<const CharT> chars)261 static bool Unescape(StringBuffer& sb,
262                      const mozilla::Range<const CharT> chars) {
263   // Step 2.
264   uint32_t length = chars.length();
265 
266   /*
267    * Note that the spec algorithm has been optimized to avoid building
268    * a string in the case where no escapes are present.
269    */
270   bool building = false;
271 
272 #define ENSURE_BUILDING                            \
273   do {                                             \
274     if (!building) {                               \
275       building = true;                             \
276       if (!sb.reserve(length)) return false;       \
277       sb.infallibleAppend(chars.begin().get(), k); \
278     }                                              \
279   } while (false);
280 
281   // Step 4.
282   uint32_t k = 0;
283 
284   // Step 5.
285   while (k < length) {
286     // Step 5.a.
287     char16_t c = chars[k];
288 
289     // Step 5.b.
290     if (c == '%') {
291       static_assert(JSString::MAX_LENGTH < UINT32_MAX - 6,
292                     "String length is not near UINT32_MAX");
293 
294       // Steps 5.b.i-ii.
295       if (k + 6 <= length && chars[k + 1] == 'u') {
296         if (Unhex4(chars.begin() + k + 2, &c)) {
297           ENSURE_BUILDING
298           k += 5;
299         }
300       } else if (k + 3 <= length) {
301         if (Unhex2(chars.begin() + k + 1, &c)) {
302           ENSURE_BUILDING
303           k += 2;
304         }
305       }
306     }
307 
308     // Step 5.c.
309     if (building && !sb.append(c)) {
310       return false;
311     }
312 
313     // Step 5.d.
314     k += 1;
315   }
316 
317   return true;
318 #undef ENSURE_BUILDING
319 }
320 
321 // ES2018 draft rev f83aa38282c2a60c6916ebc410bfdf105a0f6a54
322 // B.2.1.2 unescape ( string )
str_unescape(JSContext * cx,unsigned argc,Value * vp)323 static bool str_unescape(JSContext* cx, unsigned argc, Value* vp) {
324   CallArgs args = CallArgsFromVp(argc, vp);
325 
326   // Step 1.
327   RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
328   if (!str) {
329     return false;
330   }
331 
332   // Step 3.
333   JSStringBuilder sb(cx);
334   if (str->hasTwoByteChars() && !sb.ensureTwoByteChars()) {
335     return false;
336   }
337 
338   // Steps 2, 4-5.
339   if (str->hasLatin1Chars()) {
340     AutoCheckCannotGC nogc;
341     if (!Unescape(sb, str->latin1Range(nogc))) {
342       return false;
343     }
344   } else {
345     AutoCheckCannotGC nogc;
346     if (!Unescape(sb, str->twoByteRange(nogc))) {
347       return false;
348     }
349   }
350 
351   // Step 6.
352   JSLinearString* result;
353   if (!sb.empty()) {
354     result = sb.finishString();
355     if (!result) {
356       return false;
357     }
358   } else {
359     result = str;
360   }
361 
362   args.rval().setString(result);
363   return true;
364 }
365 
str_uneval(JSContext * cx,unsigned argc,Value * vp)366 static bool str_uneval(JSContext* cx, unsigned argc, Value* vp) {
367   CallArgs args = CallArgsFromVp(argc, vp);
368   JSString* str = ValueToSource(cx, args.get(0));
369   if (!str) {
370     return false;
371   }
372 
373   args.rval().setString(str);
374   return true;
375 }
376 
377 static const JSFunctionSpec string_functions[] = {
378     JS_FN(js_escape_str, str_escape, 1, JSPROP_RESOLVING),
379     JS_FN(js_unescape_str, str_unescape, 1, JSPROP_RESOLVING),
380     JS_FN(js_uneval_str, str_uneval, 1, JSPROP_RESOLVING),
381     JS_FN(js_decodeURI_str, str_decodeURI, 1, JSPROP_RESOLVING),
382     JS_FN(js_encodeURI_str, str_encodeURI, 1, JSPROP_RESOLVING),
383     JS_FN(js_decodeURIComponent_str, str_decodeURI_Component, 1,
384           JSPROP_RESOLVING),
385     JS_FN(js_encodeURIComponent_str, str_encodeURI_Component, 1,
386           JSPROP_RESOLVING),
387 
388     JS_FS_END};
389 
390 static const unsigned STRING_ELEMENT_ATTRS =
391     JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
392 
str_enumerate(JSContext * cx,HandleObject obj)393 static bool str_enumerate(JSContext* cx, HandleObject obj) {
394   RootedString str(cx, obj->as<StringObject>().unbox());
395   js::StaticStrings& staticStrings = cx->staticStrings();
396 
397   RootedValue value(cx);
398   for (size_t i = 0, length = str->length(); i < length; i++) {
399     JSString* str1 = staticStrings.getUnitStringForElement(cx, str, i);
400     if (!str1) {
401       return false;
402     }
403     value.setString(str1);
404     if (!DefineDataElement(cx, obj, i, value,
405                            STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) {
406       return false;
407     }
408   }
409 
410   return true;
411 }
412 
str_mayResolve(const JSAtomState &,jsid id,JSObject *)413 static bool str_mayResolve(const JSAtomState&, jsid id, JSObject*) {
414   // str_resolve ignores non-integer ids.
415   return id.isInt();
416 }
417 
str_resolve(JSContext * cx,HandleObject obj,HandleId id,bool * resolvedp)418 static bool str_resolve(JSContext* cx, HandleObject obj, HandleId id,
419                         bool* resolvedp) {
420   if (!id.isInt()) {
421     return true;
422   }
423 
424   RootedString str(cx, obj->as<StringObject>().unbox());
425 
426   int32_t slot = id.toInt();
427   if ((size_t)slot < str->length()) {
428     JSString* str1 =
429         cx->staticStrings().getUnitStringForElement(cx, str, size_t(slot));
430     if (!str1) {
431       return false;
432     }
433     RootedValue value(cx, StringValue(str1));
434     if (!DefineDataElement(cx, obj, uint32_t(slot), value,
435                            STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) {
436       return false;
437     }
438     *resolvedp = true;
439   }
440   return true;
441 }
442 
443 static const JSClassOps StringObjectClassOps = {
444     nullptr,         // addProperty
445     nullptr,         // delProperty
446     str_enumerate,   // enumerate
447     nullptr,         // newEnumerate
448     str_resolve,     // resolve
449     str_mayResolve,  // mayResolve
450     nullptr,         // finalize
451     nullptr,         // call
452     nullptr,         // hasInstance
453     nullptr,         // construct
454     nullptr,         // trace
455 };
456 
457 const JSClass StringObject::class_ = {
458     js_String_str,
459     JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
460         JSCLASS_HAS_CACHED_PROTO(JSProto_String),
461     &StringObjectClassOps, &StringObject::classSpec_};
462 
463 /*
464  * Perform the initial |RequireObjectCoercible(thisv)| and |ToString(thisv)|
465  * from nearly all String.prototype.* functions.
466  */
ToStringForStringFunction(JSContext * cx,const char * funName,HandleValue thisv)467 static MOZ_ALWAYS_INLINE JSString* ToStringForStringFunction(
468     JSContext* cx, const char* funName, HandleValue thisv) {
469   AutoCheckRecursionLimit recursion(cx);
470   if (!recursion.check(cx)) {
471     return nullptr;
472   }
473 
474   if (thisv.isString()) {
475     return thisv.toString();
476   }
477 
478   if (thisv.isObject()) {
479     RootedObject obj(cx, &thisv.toObject());
480     if (obj->is<StringObject>()) {
481       StringObject* nobj = &obj->as<StringObject>();
482       // We have to make sure that the ToPrimitive call from ToString
483       // would be unobservable.
484       if (HasNoToPrimitiveMethodPure(nobj, cx) &&
485           HasNativeMethodPure(nobj, cx->names().toString, str_toString, cx)) {
486         return nobj->unbox();
487       }
488     }
489   } else if (thisv.isNullOrUndefined()) {
490     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
491                               JSMSG_INCOMPATIBLE_PROTO, "String", funName,
492                               thisv.isNull() ? "null" : "undefined");
493     return nullptr;
494   }
495 
496   return ToStringSlow<CanGC>(cx, thisv);
497 }
498 
IsString(HandleValue v)499 MOZ_ALWAYS_INLINE bool IsString(HandleValue v) {
500   return v.isString() || (v.isObject() && v.toObject().is<StringObject>());
501 }
502 
str_toSource_impl(JSContext * cx,const CallArgs & args)503 MOZ_ALWAYS_INLINE bool str_toSource_impl(JSContext* cx, const CallArgs& args) {
504   MOZ_ASSERT(IsString(args.thisv()));
505 
506   JSString* str = ToString<CanGC>(cx, args.thisv());
507   if (!str) {
508     return false;
509   }
510 
511   UniqueChars quoted = QuoteString(cx, str, '"');
512   if (!quoted) {
513     return false;
514   }
515 
516   JSStringBuilder sb(cx);
517   if (!sb.append("(new String(") ||
518       !sb.append(quoted.get(), strlen(quoted.get())) || !sb.append("))")) {
519     return false;
520   }
521 
522   JSString* result = sb.finishString();
523   if (!result) {
524     return false;
525   }
526   args.rval().setString(result);
527   return true;
528 }
529 
str_toSource(JSContext * cx,unsigned argc,Value * vp)530 static bool str_toSource(JSContext* cx, unsigned argc, Value* vp) {
531   CallArgs args = CallArgsFromVp(argc, vp);
532   return CallNonGenericMethod<IsString, str_toSource_impl>(cx, args);
533 }
534 
str_toString_impl(JSContext * cx,const CallArgs & args)535 MOZ_ALWAYS_INLINE bool str_toString_impl(JSContext* cx, const CallArgs& args) {
536   MOZ_ASSERT(IsString(args.thisv()));
537 
538   args.rval().setString(
539       args.thisv().isString()
540           ? args.thisv().toString()
541           : args.thisv().toObject().as<StringObject>().unbox());
542   return true;
543 }
544 
str_toString(JSContext * cx,unsigned argc,Value * vp)545 bool js::str_toString(JSContext* cx, unsigned argc, Value* vp) {
546   CallArgs args = CallArgsFromVp(argc, vp);
547   return CallNonGenericMethod<IsString, str_toString_impl>(cx, args);
548 }
549 
550 /*
551  * Java-like string native methods.
552  */
553 
SubstringKernel(JSContext * cx,HandleString str,int32_t beginInt,int32_t lengthInt)554 JSString* js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt,
555                               int32_t lengthInt) {
556   MOZ_ASSERT(0 <= beginInt);
557   MOZ_ASSERT(0 <= lengthInt);
558   MOZ_ASSERT(uint32_t(beginInt) <= str->length());
559   MOZ_ASSERT(uint32_t(lengthInt) <= str->length() - beginInt);
560 
561   uint32_t begin = beginInt;
562   uint32_t len = lengthInt;
563 
564   /*
565    * Optimization for one level deep ropes.
566    * This is common for the following pattern:
567    *
568    * while() {
569    *   text = text.substr(0, x) + "bla" + text.substr(x)
570    *   test.charCodeAt(x + 1)
571    * }
572    */
573   if (str->isRope()) {
574     JSRope* rope = &str->asRope();
575 
576     /* Substring is totally in leftChild of rope. */
577     if (begin + len <= rope->leftChild()->length()) {
578       return NewDependentString(cx, rope->leftChild(), begin, len);
579     }
580 
581     /* Substring is totally in rightChild of rope. */
582     if (begin >= rope->leftChild()->length()) {
583       begin -= rope->leftChild()->length();
584       return NewDependentString(cx, rope->rightChild(), begin, len);
585     }
586 
587     /*
588      * Requested substring is partly in the left and partly in right child.
589      * Create a rope of substrings for both childs.
590      */
591     MOZ_ASSERT(begin < rope->leftChild()->length() &&
592                begin + len > rope->leftChild()->length());
593 
594     size_t lhsLength = rope->leftChild()->length() - begin;
595     size_t rhsLength = begin + len - rope->leftChild()->length();
596 
597     Rooted<JSRope*> ropeRoot(cx, rope);
598     RootedString lhs(
599         cx, NewDependentString(cx, ropeRoot->leftChild(), begin, lhsLength));
600     if (!lhs) {
601       return nullptr;
602     }
603 
604     RootedString rhs(
605         cx, NewDependentString(cx, ropeRoot->rightChild(), 0, rhsLength));
606     if (!rhs) {
607       return nullptr;
608     }
609 
610     return JSRope::new_<CanGC>(cx, lhs, rhs, len);
611   }
612 
613   return NewDependentString(cx, str, begin, len);
614 }
615 
616 /**
617  * U+03A3 GREEK CAPITAL LETTER SIGMA has two different lower case mappings
618  * depending on its context:
619  * When it's preceded by a cased character and not followed by another cased
620  * character, its lower case form is U+03C2 GREEK SMALL LETTER FINAL SIGMA.
621  * Otherwise its lower case mapping is U+03C3 GREEK SMALL LETTER SIGMA.
622  *
623  * Unicode 9.0, §3.13 Default Case Algorithms
624  */
Final_Sigma(const char16_t * chars,size_t length,size_t index)625 static char16_t Final_Sigma(const char16_t* chars, size_t length,
626                             size_t index) {
627   MOZ_ASSERT(index < length);
628   MOZ_ASSERT(chars[index] == unicode::GREEK_CAPITAL_LETTER_SIGMA);
629   MOZ_ASSERT(unicode::ToLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA) ==
630              unicode::GREEK_SMALL_LETTER_SIGMA);
631 
632 #if JS_HAS_INTL_API
633   // Tell the analysis the BinaryProperty.contains function pointer called by
634   // mozilla::intl::String::Is{CaseIgnorable, Cased} cannot GC.
635   JS::AutoSuppressGCAnalysis nogc;
636 
637   bool precededByCased = false;
638   for (size_t i = index; i > 0;) {
639     char16_t c = chars[--i];
640     uint32_t codePoint = c;
641     if (unicode::IsTrailSurrogate(c) && i > 0) {
642       char16_t lead = chars[i - 1];
643       if (unicode::IsLeadSurrogate(lead)) {
644         codePoint = unicode::UTF16Decode(lead, c);
645         i--;
646       }
647     }
648 
649     // Ignore any characters with the property Case_Ignorable.
650     // NB: We need to skip over all Case_Ignorable characters, even when
651     // they also have the Cased binary property.
652     if (mozilla::intl::String::IsCaseIgnorable(codePoint)) {
653       continue;
654     }
655 
656     precededByCased = mozilla::intl::String::IsCased(codePoint);
657     break;
658   }
659   if (!precededByCased) {
660     return unicode::GREEK_SMALL_LETTER_SIGMA;
661   }
662 
663   bool followedByCased = false;
664   for (size_t i = index + 1; i < length;) {
665     char16_t c = chars[i++];
666     uint32_t codePoint = c;
667     if (unicode::IsLeadSurrogate(c) && i < length) {
668       char16_t trail = chars[i];
669       if (unicode::IsTrailSurrogate(trail)) {
670         codePoint = unicode::UTF16Decode(c, trail);
671         i++;
672       }
673     }
674 
675     // Ignore any characters with the property Case_Ignorable.
676     // NB: We need to skip over all Case_Ignorable characters, even when
677     // they also have the Cased binary property.
678     if (mozilla::intl::String::IsCaseIgnorable(codePoint)) {
679       continue;
680     }
681 
682     followedByCased = mozilla::intl::String::IsCased(codePoint);
683     break;
684   }
685   if (!followedByCased) {
686     return unicode::GREEK_SMALL_LETTER_FINAL_SIGMA;
687   }
688 #endif
689 
690   return unicode::GREEK_SMALL_LETTER_SIGMA;
691 }
692 
693 // If |srcLength == destLength| is true, the destination buffer was allocated
694 // with the same size as the source buffer. When we append characters which
695 // have special casing mappings, we test |srcLength == destLength| to decide
696 // if we need to back out and reallocate a sufficiently large destination
697 // buffer. Otherwise the destination buffer was allocated with the correct
698 // size to hold all lower case mapped characters, i.e.
699 // |destLength == ToLowerCaseLength(srcChars, 0, srcLength)| is true.
700 template <typename CharT>
ToLowerCaseImpl(CharT * destChars,const CharT * srcChars,size_t startIndex,size_t srcLength,size_t destLength)701 static size_t ToLowerCaseImpl(CharT* destChars, const CharT* srcChars,
702                               size_t startIndex, size_t srcLength,
703                               size_t destLength) {
704   MOZ_ASSERT(startIndex < srcLength);
705   MOZ_ASSERT(srcLength <= destLength);
706   if constexpr (std::is_same_v<CharT, Latin1Char>) {
707     MOZ_ASSERT(srcLength == destLength);
708   }
709 
710   size_t j = startIndex;
711   for (size_t i = startIndex; i < srcLength; i++) {
712     CharT c = srcChars[i];
713     if constexpr (!std::is_same_v<CharT, Latin1Char>) {
714       if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
715         char16_t trail = srcChars[i + 1];
716         if (unicode::IsTrailSurrogate(trail)) {
717           trail = unicode::ToLowerCaseNonBMPTrail(c, trail);
718           destChars[j++] = c;
719           destChars[j++] = trail;
720           i++;
721           continue;
722         }
723       }
724 
725       // Special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
726       // lowercases to <U+0069 U+0307>.
727       if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
728         // Return if the output buffer is too small.
729         if (srcLength == destLength) {
730           return i;
731         }
732 
733         destChars[j++] = CharT('i');
734         destChars[j++] = CharT(unicode::COMBINING_DOT_ABOVE);
735         continue;
736       }
737 
738       // Special case: U+03A3 GREEK CAPITAL LETTER SIGMA lowercases to
739       // one of two codepoints depending on context.
740       if (c == unicode::GREEK_CAPITAL_LETTER_SIGMA) {
741         destChars[j++] = Final_Sigma(srcChars, srcLength, i);
742         continue;
743       }
744     }
745 
746     c = unicode::ToLowerCase(c);
747     destChars[j++] = c;
748   }
749 
750   MOZ_ASSERT(j == destLength);
751   return srcLength;
752 }
753 
ToLowerCaseLength(const char16_t * chars,size_t startIndex,size_t length)754 static size_t ToLowerCaseLength(const char16_t* chars, size_t startIndex,
755                                 size_t length) {
756   size_t lowerLength = length;
757   for (size_t i = startIndex; i < length; i++) {
758     char16_t c = chars[i];
759 
760     // U+0130 is lowercased to the two-element sequence <U+0069 U+0307>.
761     if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
762       lowerLength += 1;
763     }
764   }
765   return lowerLength;
766 }
767 
768 template <typename CharT>
ToLowerCase(JSContext * cx,JSLinearString * str)769 static JSString* ToLowerCase(JSContext* cx, JSLinearString* str) {
770   // Unlike toUpperCase, toLowerCase has the nice invariant that if the
771   // input is a Latin-1 string, the output is also a Latin-1 string.
772 
773   InlineCharBuffer<CharT> newChars;
774 
775   const size_t length = str->length();
776   size_t resultLength;
777   {
778     AutoCheckCannotGC nogc;
779     const CharT* chars = str->chars<CharT>(nogc);
780 
781     // We don't need extra special casing checks in the loop below,
782     // because U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+03A3
783     // GREEK CAPITAL LETTER SIGMA already have simple lower case mappings.
784     MOZ_ASSERT(unicode::ChangesWhenLowerCased(
785                    unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE),
786                "U+0130 has a simple lower case mapping");
787     MOZ_ASSERT(
788         unicode::ChangesWhenLowerCased(unicode::GREEK_CAPITAL_LETTER_SIGMA),
789         "U+03A3 has a simple lower case mapping");
790 
791     // One element Latin-1 strings can be directly retrieved from the
792     // static strings cache.
793     if constexpr (std::is_same_v<CharT, Latin1Char>) {
794       if (length == 1) {
795         CharT lower = unicode::ToLowerCase(chars[0]);
796         MOZ_ASSERT(StaticStrings::hasUnit(lower));
797 
798         return cx->staticStrings().getUnit(lower);
799       }
800     }
801 
802     // Look for the first character that changes when lowercased.
803     size_t i = 0;
804     for (; i < length; i++) {
805       CharT c = chars[i];
806       if constexpr (!std::is_same_v<CharT, Latin1Char>) {
807         if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
808           CharT trail = chars[i + 1];
809           if (unicode::IsTrailSurrogate(trail)) {
810             if (unicode::ChangesWhenLowerCasedNonBMP(c, trail)) {
811               break;
812             }
813 
814             i++;
815             continue;
816           }
817         }
818       }
819       if (unicode::ChangesWhenLowerCased(c)) {
820         break;
821       }
822     }
823 
824     // If no character needs to change, return the input string.
825     if (i == length) {
826       return str;
827     }
828 
829     resultLength = length;
830     if (!newChars.maybeAlloc(cx, resultLength)) {
831       return nullptr;
832     }
833 
834     PodCopy(newChars.get(), chars, i);
835 
836     size_t readChars =
837         ToLowerCaseImpl(newChars.get(), chars, i, length, resultLength);
838     if constexpr (!std::is_same_v<CharT, Latin1Char>) {
839       if (readChars < length) {
840         resultLength = ToLowerCaseLength(chars, readChars, length);
841 
842         if (!newChars.maybeRealloc(cx, length, resultLength)) {
843           return nullptr;
844         }
845 
846         MOZ_ALWAYS_TRUE(length == ToLowerCaseImpl(newChars.get(), chars,
847                                                   readChars, length,
848                                                   resultLength));
849       }
850     } else {
851       MOZ_ASSERT(readChars == length,
852                  "Latin-1 strings don't have special lower case mappings");
853     }
854   }
855 
856   return newChars.toStringDontDeflate(cx, resultLength);
857 }
858 
StringToLowerCase(JSContext * cx,HandleString string)859 JSString* js::StringToLowerCase(JSContext* cx, HandleString string) {
860   JSLinearString* linear = string->ensureLinear(cx);
861   if (!linear) {
862     return nullptr;
863   }
864 
865   if (linear->hasLatin1Chars()) {
866     return ToLowerCase<Latin1Char>(cx, linear);
867   }
868   return ToLowerCase<char16_t>(cx, linear);
869 }
870 
str_toLowerCase(JSContext * cx,unsigned argc,Value * vp)871 static bool str_toLowerCase(JSContext* cx, unsigned argc, Value* vp) {
872   CallArgs args = CallArgsFromVp(argc, vp);
873 
874   RootedString str(cx,
875                    ToStringForStringFunction(cx, "toLowerCase", args.thisv()));
876   if (!str) {
877     return false;
878   }
879 
880   JSString* result = StringToLowerCase(cx, str);
881   if (!result) {
882     return false;
883   }
884 
885   args.rval().setString(result);
886   return true;
887 }
888 
889 #if JS_HAS_INTL_API
890 // String.prototype.toLocaleLowerCase is self-hosted when Intl is exposed,
891 // with core functionality performed by the intrinsic below.
892 
CaseMappingLocale(JSContext * cx,JSString * str)893 static const char* CaseMappingLocale(JSContext* cx, JSString* str) {
894   JSLinearString* locale = str->ensureLinear(cx);
895   if (!locale) {
896     return nullptr;
897   }
898 
899   MOZ_ASSERT(locale->length() >= 2, "locale is a valid language tag");
900 
901   // Lithuanian, Turkish, and Azeri have language dependent case mappings.
902   static const char languagesWithSpecialCasing[][3] = {"lt", "tr", "az"};
903 
904   // All strings in |languagesWithSpecialCasing| are of length two, so we
905   // only need to compare the first two characters to find a matching locale.
906   // ES2017 Intl, §9.2.2 BestAvailableLocale
907   if (locale->length() == 2 || locale->latin1OrTwoByteChar(2) == '-') {
908     for (const auto& language : languagesWithSpecialCasing) {
909       if (locale->latin1OrTwoByteChar(0) == language[0] &&
910           locale->latin1OrTwoByteChar(1) == language[1]) {
911         return language;
912       }
913     }
914   }
915 
916   return "";  // ICU root locale
917 }
918 
intl_toLocaleLowerCase(JSContext * cx,unsigned argc,Value * vp)919 bool js::intl_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
920   CallArgs args = CallArgsFromVp(argc, vp);
921   MOZ_ASSERT(args.length() == 2);
922   MOZ_ASSERT(args[0].isString());
923   MOZ_ASSERT(args[1].isString());
924 
925   RootedString string(cx, args[0].toString());
926 
927   const char* locale = CaseMappingLocale(cx, args[1].toString());
928   if (!locale) {
929     return false;
930   }
931 
932   // Call String.prototype.toLowerCase() for language independent casing.
933   if (intl::StringsAreEqual(locale, "")) {
934     JSString* str = StringToLowerCase(cx, string);
935     if (!str) {
936       return false;
937     }
938 
939     args.rval().setString(str);
940     return true;
941   }
942 
943   AutoStableStringChars inputChars(cx);
944   if (!inputChars.initTwoByte(cx, string)) {
945     return false;
946   }
947   mozilla::Range<const char16_t> input = inputChars.twoByteRange();
948 
949   // Note: maximum case mapping length is three characters, so the result
950   // length might be > INT32_MAX. ICU will fail in this case.
951   static_assert(JSString::MAX_LENGTH <= INT32_MAX,
952                 "String length must fit in int32_t for ICU");
953 
954   static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
955 
956   intl::FormatBuffer<char16_t, INLINE_CAPACITY> buffer(cx);
957 
958   auto ok = mozilla::intl::String::ToLocaleLowerCase(locale, input, buffer);
959   if (ok.isErr()) {
960     intl::ReportInternalError(cx, ok.unwrapErr());
961     return false;
962   }
963 
964   JSString* result = buffer.toString(cx);
965   if (!result) {
966     return false;
967   }
968 
969   args.rval().setString(result);
970   return true;
971 }
972 
973 #else
974 
975 // When the Intl API is not exposed, String.prototype.toLowerCase is implemented
976 // in C++.
str_toLocaleLowerCase(JSContext * cx,unsigned argc,Value * vp)977 static bool str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
978   CallArgs args = CallArgsFromVp(argc, vp);
979 
980   RootedString str(
981       cx, ToStringForStringFunction(cx, "toLocaleLowerCase", args.thisv()));
982   if (!str) {
983     return false;
984   }
985 
986   /*
987    * Forcefully ignore the first (or any) argument and return toLowerCase(),
988    * ECMA has reserved that argument, presumably for defining the locale.
989    */
990   if (cx->runtime()->localeCallbacks &&
991       cx->runtime()->localeCallbacks->localeToLowerCase) {
992     RootedValue result(cx);
993     if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result)) {
994       return false;
995     }
996 
997     args.rval().set(result);
998     return true;
999   }
1000 
1001   RootedLinearString linear(cx, str->ensureLinear(cx));
1002   if (!linear) {
1003     return false;
1004   }
1005 
1006   JSString* result = StringToLowerCase(cx, linear);
1007   if (!result) {
1008     return false;
1009   }
1010 
1011   args.rval().setString(result);
1012   return true;
1013 }
1014 
1015 #endif  // JS_HAS_INTL_API
1016 
ToUpperCaseHasSpecialCasing(Latin1Char charCode)1017 static inline bool ToUpperCaseHasSpecialCasing(Latin1Char charCode) {
1018   // U+00DF LATIN SMALL LETTER SHARP S is the only Latin-1 code point with
1019   // special casing rules, so detect it inline.
1020   bool hasUpperCaseSpecialCasing =
1021       charCode == unicode::LATIN_SMALL_LETTER_SHARP_S;
1022   MOZ_ASSERT(hasUpperCaseSpecialCasing ==
1023              unicode::ChangesWhenUpperCasedSpecialCasing(charCode));
1024 
1025   return hasUpperCaseSpecialCasing;
1026 }
1027 
ToUpperCaseHasSpecialCasing(char16_t charCode)1028 static inline bool ToUpperCaseHasSpecialCasing(char16_t charCode) {
1029   return unicode::ChangesWhenUpperCasedSpecialCasing(charCode);
1030 }
1031 
ToUpperCaseLengthSpecialCasing(Latin1Char charCode)1032 static inline size_t ToUpperCaseLengthSpecialCasing(Latin1Char charCode) {
1033   // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
1034   MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
1035 
1036   return 2;
1037 }
1038 
ToUpperCaseLengthSpecialCasing(char16_t charCode)1039 static inline size_t ToUpperCaseLengthSpecialCasing(char16_t charCode) {
1040   MOZ_ASSERT(ToUpperCaseHasSpecialCasing(charCode));
1041 
1042   return unicode::LengthUpperCaseSpecialCasing(charCode);
1043 }
1044 
ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,Latin1Char * elements,size_t * index)1045 static inline void ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,
1046                                                            Latin1Char* elements,
1047                                                            size_t* index) {
1048   // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
1049   MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
1050   static_assert('S' <= JSString::MAX_LATIN1_CHAR, "'S' is a Latin-1 character");
1051 
1052   elements[(*index)++] = 'S';
1053   elements[(*index)++] = 'S';
1054 }
1055 
ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,char16_t * elements,size_t * index)1056 static inline void ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,
1057                                                            char16_t* elements,
1058                                                            size_t* index) {
1059   unicode::AppendUpperCaseSpecialCasing(charCode, elements, index);
1060 }
1061 
1062 // See ToLowerCaseImpl for an explanation of the parameters.
1063 template <typename DestChar, typename SrcChar>
ToUpperCaseImpl(DestChar * destChars,const SrcChar * srcChars,size_t startIndex,size_t srcLength,size_t destLength)1064 static size_t ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars,
1065                               size_t startIndex, size_t srcLength,
1066                               size_t destLength) {
1067   static_assert(std::is_same_v<SrcChar, Latin1Char> ||
1068                     !std::is_same_v<DestChar, Latin1Char>,
1069                 "cannot write non-Latin-1 characters into Latin-1 string");
1070   MOZ_ASSERT(startIndex < srcLength);
1071   MOZ_ASSERT(srcLength <= destLength);
1072 
1073   size_t j = startIndex;
1074   for (size_t i = startIndex; i < srcLength; i++) {
1075     char16_t c = srcChars[i];
1076     if constexpr (!std::is_same_v<DestChar, Latin1Char>) {
1077       if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
1078         char16_t trail = srcChars[i + 1];
1079         if (unicode::IsTrailSurrogate(trail)) {
1080           trail = unicode::ToUpperCaseNonBMPTrail(c, trail);
1081           destChars[j++] = c;
1082           destChars[j++] = trail;
1083           i++;
1084           continue;
1085         }
1086       }
1087     }
1088 
1089     if (MOZ_UNLIKELY(c > 0x7f &&
1090                      ToUpperCaseHasSpecialCasing(static_cast<SrcChar>(c)))) {
1091       // Return if the output buffer is too small.
1092       if (srcLength == destLength) {
1093         return i;
1094       }
1095 
1096       ToUpperCaseAppendUpperCaseSpecialCasing(c, destChars, &j);
1097       continue;
1098     }
1099 
1100     c = unicode::ToUpperCase(c);
1101     if constexpr (std::is_same_v<DestChar, Latin1Char>) {
1102       MOZ_ASSERT(c <= JSString::MAX_LATIN1_CHAR);
1103     }
1104     destChars[j++] = c;
1105   }
1106 
1107   MOZ_ASSERT(j == destLength);
1108   return srcLength;
1109 }
1110 
1111 template <typename CharT>
ToUpperCaseLength(const CharT * chars,size_t startIndex,size_t length)1112 static size_t ToUpperCaseLength(const CharT* chars, size_t startIndex,
1113                                 size_t length) {
1114   size_t upperLength = length;
1115   for (size_t i = startIndex; i < length; i++) {
1116     char16_t c = chars[i];
1117 
1118     if (c > 0x7f && ToUpperCaseHasSpecialCasing(static_cast<CharT>(c))) {
1119       upperLength += ToUpperCaseLengthSpecialCasing(static_cast<CharT>(c)) - 1;
1120     }
1121   }
1122   return upperLength;
1123 }
1124 
1125 template <typename DestChar, typename SrcChar>
CopyChars(DestChar * destChars,const SrcChar * srcChars,size_t length)1126 static inline void CopyChars(DestChar* destChars, const SrcChar* srcChars,
1127                              size_t length) {
1128   static_assert(!std::is_same_v<DestChar, SrcChar>,
1129                 "PodCopy is used for the same type case");
1130   for (size_t i = 0; i < length; i++) {
1131     destChars[i] = srcChars[i];
1132   }
1133 }
1134 
1135 template <typename CharT>
CopyChars(CharT * destChars,const CharT * srcChars,size_t length)1136 static inline void CopyChars(CharT* destChars, const CharT* srcChars,
1137                              size_t length) {
1138   PodCopy(destChars, srcChars, length);
1139 }
1140 
1141 template <typename DestChar, typename SrcChar>
ToUpperCase(JSContext * cx,InlineCharBuffer<DestChar> & newChars,const SrcChar * chars,size_t startIndex,size_t length,size_t * resultLength)1142 static inline bool ToUpperCase(JSContext* cx,
1143                                InlineCharBuffer<DestChar>& newChars,
1144                                const SrcChar* chars, size_t startIndex,
1145                                size_t length, size_t* resultLength) {
1146   MOZ_ASSERT(startIndex < length);
1147 
1148   *resultLength = length;
1149   if (!newChars.maybeAlloc(cx, length)) {
1150     return false;
1151   }
1152 
1153   CopyChars(newChars.get(), chars, startIndex);
1154 
1155   size_t readChars =
1156       ToUpperCaseImpl(newChars.get(), chars, startIndex, length, length);
1157   if (readChars < length) {
1158     size_t actualLength = ToUpperCaseLength(chars, readChars, length);
1159 
1160     *resultLength = actualLength;
1161     if (!newChars.maybeRealloc(cx, length, actualLength)) {
1162       return false;
1163     }
1164 
1165     MOZ_ALWAYS_TRUE(length == ToUpperCaseImpl(newChars.get(), chars, readChars,
1166                                               length, actualLength));
1167   }
1168 
1169   return true;
1170 }
1171 
1172 template <typename CharT>
ToUpperCase(JSContext * cx,JSLinearString * str)1173 static JSString* ToUpperCase(JSContext* cx, JSLinearString* str) {
1174   using Latin1Buffer = InlineCharBuffer<Latin1Char>;
1175   using TwoByteBuffer = InlineCharBuffer<char16_t>;
1176 
1177   mozilla::MaybeOneOf<Latin1Buffer, TwoByteBuffer> newChars;
1178   const size_t length = str->length();
1179   size_t resultLength;
1180   {
1181     AutoCheckCannotGC nogc;
1182     const CharT* chars = str->chars<CharT>(nogc);
1183 
1184     // Most one element Latin-1 strings can be directly retrieved from the
1185     // static strings cache.
1186     if constexpr (std::is_same_v<CharT, Latin1Char>) {
1187       if (length == 1) {
1188         Latin1Char c = chars[0];
1189         if (c != unicode::MICRO_SIGN &&
1190             c != unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS &&
1191             c != unicode::LATIN_SMALL_LETTER_SHARP_S) {
1192           char16_t upper = unicode::ToUpperCase(c);
1193           MOZ_ASSERT(upper <= JSString::MAX_LATIN1_CHAR);
1194           MOZ_ASSERT(StaticStrings::hasUnit(upper));
1195 
1196           return cx->staticStrings().getUnit(upper);
1197         }
1198 
1199         MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR ||
1200                    ToUpperCaseHasSpecialCasing(c));
1201       }
1202     }
1203 
1204     // Look for the first character that changes when uppercased.
1205     size_t i = 0;
1206     for (; i < length; i++) {
1207       CharT c = chars[i];
1208       if constexpr (!std::is_same_v<CharT, Latin1Char>) {
1209         if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
1210           CharT trail = chars[i + 1];
1211           if (unicode::IsTrailSurrogate(trail)) {
1212             if (unicode::ChangesWhenUpperCasedNonBMP(c, trail)) {
1213               break;
1214             }
1215 
1216             i++;
1217             continue;
1218           }
1219         }
1220       }
1221       if (unicode::ChangesWhenUpperCased(c)) {
1222         break;
1223       }
1224       if (MOZ_UNLIKELY(c > 0x7f && ToUpperCaseHasSpecialCasing(c))) {
1225         break;
1226       }
1227     }
1228 
1229     // If no character needs to change, return the input string.
1230     if (i == length) {
1231       return str;
1232     }
1233 
1234     // The string changes when uppercased, so we must create a new string.
1235     // Can it be Latin-1?
1236     //
1237     // If the original string is Latin-1, it can -- unless the string
1238     // contains U+00B5 MICRO SIGN or U+00FF SMALL LETTER Y WITH DIAERESIS,
1239     // the only Latin-1 codepoints that don't uppercase within Latin-1.
1240     // Search for those codepoints to decide whether the new string can be
1241     // Latin-1.
1242     // If the original string is a two-byte string, its uppercase form is
1243     // so rarely Latin-1 that we don't even consider creating a new
1244     // Latin-1 string.
1245     if constexpr (std::is_same_v<CharT, Latin1Char>) {
1246       bool resultIsLatin1 = true;
1247       for (size_t j = i; j < length; j++) {
1248         Latin1Char c = chars[j];
1249         if (c == unicode::MICRO_SIGN ||
1250             c == unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
1251           MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR);
1252           resultIsLatin1 = false;
1253           break;
1254         } else {
1255           MOZ_ASSERT(unicode::ToUpperCase(c) <= JSString::MAX_LATIN1_CHAR);
1256         }
1257       }
1258 
1259       if (resultIsLatin1) {
1260         newChars.construct<Latin1Buffer>();
1261 
1262         if (!ToUpperCase(cx, newChars.ref<Latin1Buffer>(), chars, i, length,
1263                          &resultLength)) {
1264           return nullptr;
1265         }
1266       } else {
1267         newChars.construct<TwoByteBuffer>();
1268 
1269         if (!ToUpperCase(cx, newChars.ref<TwoByteBuffer>(), chars, i, length,
1270                          &resultLength)) {
1271           return nullptr;
1272         }
1273       }
1274     } else {
1275       newChars.construct<TwoByteBuffer>();
1276 
1277       if (!ToUpperCase(cx, newChars.ref<TwoByteBuffer>(), chars, i, length,
1278                        &resultLength)) {
1279         return nullptr;
1280       }
1281     }
1282   }
1283 
1284   return newChars.constructed<Latin1Buffer>()
1285              ? newChars.ref<Latin1Buffer>().toStringDontDeflate(cx,
1286                                                                 resultLength)
1287              : newChars.ref<TwoByteBuffer>().toStringDontDeflate(cx,
1288                                                                  resultLength);
1289 }
1290 
StringToUpperCase(JSContext * cx,HandleString string)1291 JSString* js::StringToUpperCase(JSContext* cx, HandleString string) {
1292   JSLinearString* linear = string->ensureLinear(cx);
1293   if (!linear) {
1294     return nullptr;
1295   }
1296 
1297   if (linear->hasLatin1Chars()) {
1298     return ToUpperCase<Latin1Char>(cx, linear);
1299   }
1300   return ToUpperCase<char16_t>(cx, linear);
1301 }
1302 
str_toUpperCase(JSContext * cx,unsigned argc,Value * vp)1303 static bool str_toUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1304   CallArgs args = CallArgsFromVp(argc, vp);
1305 
1306   RootedString str(cx,
1307                    ToStringForStringFunction(cx, "toUpperCase", args.thisv()));
1308   if (!str) {
1309     return false;
1310   }
1311 
1312   JSString* result = StringToUpperCase(cx, str);
1313   if (!result) {
1314     return false;
1315   }
1316 
1317   args.rval().setString(result);
1318   return true;
1319 }
1320 
1321 #if JS_HAS_INTL_API
1322 // String.prototype.toLocaleUpperCase is self-hosted when Intl is exposed,
1323 // with core functionality performed by the intrinsic below.
1324 
intl_toLocaleUpperCase(JSContext * cx,unsigned argc,Value * vp)1325 bool js::intl_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1326   CallArgs args = CallArgsFromVp(argc, vp);
1327   MOZ_ASSERT(args.length() == 2);
1328   MOZ_ASSERT(args[0].isString());
1329   MOZ_ASSERT(args[1].isString());
1330 
1331   RootedString string(cx, args[0].toString());
1332 
1333   const char* locale = CaseMappingLocale(cx, args[1].toString());
1334   if (!locale) {
1335     return false;
1336   }
1337 
1338   // Call String.prototype.toUpperCase() for language independent casing.
1339   if (intl::StringsAreEqual(locale, "")) {
1340     JSString* str = js::StringToUpperCase(cx, string);
1341     if (!str) {
1342       return false;
1343     }
1344 
1345     args.rval().setString(str);
1346     return true;
1347   }
1348 
1349   AutoStableStringChars inputChars(cx);
1350   if (!inputChars.initTwoByte(cx, string)) {
1351     return false;
1352   }
1353   mozilla::Range<const char16_t> input = inputChars.twoByteRange();
1354 
1355   // Note: maximum case mapping length is three characters, so the result
1356   // length might be > INT32_MAX. ICU will fail in this case.
1357   static_assert(JSString::MAX_LENGTH <= INT32_MAX,
1358                 "String length must fit in int32_t for ICU");
1359 
1360   static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
1361 
1362   intl::FormatBuffer<char16_t, INLINE_CAPACITY> buffer(cx);
1363 
1364   auto ok = mozilla::intl::String::ToLocaleUpperCase(locale, input, buffer);
1365   if (ok.isErr()) {
1366     intl::ReportInternalError(cx, ok.unwrapErr());
1367     return false;
1368   }
1369 
1370   JSString* result = buffer.toString(cx);
1371   if (!result) {
1372     return false;
1373   }
1374 
1375   args.rval().setString(result);
1376   return true;
1377 }
1378 
1379 #else
1380 
1381 // When the Intl API is not exposed, String.prototype.toUpperCase is implemented
1382 // in C++.
str_toLocaleUpperCase(JSContext * cx,unsigned argc,Value * vp)1383 static bool str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1384   CallArgs args = CallArgsFromVp(argc, vp);
1385 
1386   RootedString str(
1387       cx, ToStringForStringFunction(cx, "toLocaleUpperCase", args.thisv()));
1388   if (!str) {
1389     return false;
1390   }
1391 
1392   /*
1393    * Forcefully ignore the first (or any) argument and return toUpperCase(),
1394    * ECMA has reserved that argument, presumably for defining the locale.
1395    */
1396   if (cx->runtime()->localeCallbacks &&
1397       cx->runtime()->localeCallbacks->localeToUpperCase) {
1398     RootedValue result(cx);
1399     if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result)) {
1400       return false;
1401     }
1402 
1403     args.rval().set(result);
1404     return true;
1405   }
1406 
1407   RootedLinearString linear(cx, str->ensureLinear(cx));
1408   if (!linear) {
1409     return false;
1410   }
1411 
1412   JSString* result = StringToUpperCase(cx, linear);
1413   if (!result) {
1414     return false;
1415   }
1416 
1417   args.rval().setString(result);
1418   return true;
1419 }
1420 
1421 #endif  // JS_HAS_INTL_API
1422 
1423 #if JS_HAS_INTL_API
1424 
1425 // String.prototype.localeCompare is self-hosted when Intl functionality is
1426 // exposed, and the only intrinsics it requires are provided in the
1427 // implementation of Intl.Collator.
1428 
1429 #else
1430 
1431 // String.prototype.localeCompare is implemented in C++ (delegating to
1432 // JSLocaleCallbacks) when Intl functionality is not exposed.
str_localeCompare(JSContext * cx,unsigned argc,Value * vp)1433 static bool str_localeCompare(JSContext* cx, unsigned argc, Value* vp) {
1434   CallArgs args = CallArgsFromVp(argc, vp);
1435   RootedString str(
1436       cx, ToStringForStringFunction(cx, "localeCompare", args.thisv()));
1437   if (!str) {
1438     return false;
1439   }
1440 
1441   RootedString thatStr(cx, ToString<CanGC>(cx, args.get(0)));
1442   if (!thatStr) {
1443     return false;
1444   }
1445 
1446   if (cx->runtime()->localeCallbacks &&
1447       cx->runtime()->localeCallbacks->localeCompare) {
1448     RootedValue result(cx);
1449     if (!cx->runtime()->localeCallbacks->localeCompare(cx, str, thatStr,
1450                                                        &result)) {
1451       return false;
1452     }
1453 
1454     args.rval().set(result);
1455     return true;
1456   }
1457 
1458   int32_t result;
1459   if (!CompareStrings(cx, str, thatStr, &result)) {
1460     return false;
1461   }
1462 
1463   args.rval().setInt32(result);
1464   return true;
1465 }
1466 
1467 #endif  // JS_HAS_INTL_API
1468 
1469 #if JS_HAS_INTL_API
1470 
1471 // ES2017 draft rev 45e890512fd77add72cc0ee742785f9f6f6482de
1472 // 21.1.3.12 String.prototype.normalize ( [ form ] )
1473 //
1474 // String.prototype.normalize is only implementable if ICU's normalization
1475 // functionality is available.
str_normalize(JSContext * cx,unsigned argc,Value * vp)1476 static bool str_normalize(JSContext* cx, unsigned argc, Value* vp) {
1477   CallArgs args = CallArgsFromVp(argc, vp);
1478 
1479   // Steps 1-2.
1480   RootedString str(cx,
1481                    ToStringForStringFunction(cx, "normalize", args.thisv()));
1482   if (!str) {
1483     return false;
1484   }
1485 
1486   using NormalizationForm = mozilla::intl::String::NormalizationForm;
1487 
1488   NormalizationForm form;
1489   if (!args.hasDefined(0)) {
1490     // Step 3.
1491     form = NormalizationForm::NFC;
1492   } else {
1493     // Step 4.
1494     JSLinearString* formStr = ArgToLinearString(cx, args, 0);
1495     if (!formStr) {
1496       return false;
1497     }
1498 
1499     // Step 5.
1500     if (EqualStrings(formStr, cx->names().NFC)) {
1501       form = NormalizationForm::NFC;
1502     } else if (EqualStrings(formStr, cx->names().NFD)) {
1503       form = NormalizationForm::NFD;
1504     } else if (EqualStrings(formStr, cx->names().NFKC)) {
1505       form = NormalizationForm::NFKC;
1506     } else if (EqualStrings(formStr, cx->names().NFKD)) {
1507       form = NormalizationForm::NFKD;
1508     } else {
1509       JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1510                                 JSMSG_INVALID_NORMALIZE_FORM);
1511       return false;
1512     }
1513   }
1514 
1515   // Latin-1 strings are already in Normalization Form C.
1516   if (form == NormalizationForm::NFC && str->hasLatin1Chars()) {
1517     // Step 7.
1518     args.rval().setString(str);
1519     return true;
1520   }
1521 
1522   // Step 6.
1523   AutoStableStringChars stableChars(cx);
1524   if (!stableChars.initTwoByte(cx, str)) {
1525     return false;
1526   }
1527 
1528   mozilla::Range<const char16_t> srcChars = stableChars.twoByteRange();
1529 
1530   static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
1531 
1532   intl::FormatBuffer<char16_t, INLINE_CAPACITY> buffer(cx);
1533 
1534   auto alreadyNormalized =
1535       mozilla::intl::String::Normalize(form, srcChars, buffer);
1536   if (alreadyNormalized.isErr()) {
1537     intl::ReportInternalError(cx, alreadyNormalized.unwrapErr());
1538     return false;
1539   }
1540 
1541   using AlreadyNormalized = mozilla::intl::String::AlreadyNormalized;
1542 
1543   // Return if the input string is already normalized.
1544   if (alreadyNormalized.unwrap() == AlreadyNormalized::Yes) {
1545     // Step 7.
1546     args.rval().setString(str);
1547     return true;
1548   }
1549 
1550   JSString* ns = buffer.toString(cx);
1551   if (!ns) {
1552     return false;
1553   }
1554 
1555   // Step 7.
1556   args.rval().setString(ns);
1557   return true;
1558 }
1559 
1560 #endif  // JS_HAS_INTL_API
1561 
str_charAt(JSContext * cx,unsigned argc,Value * vp)1562 static bool str_charAt(JSContext* cx, unsigned argc, Value* vp) {
1563   CallArgs args = CallArgsFromVp(argc, vp);
1564 
1565   RootedString str(cx);
1566   size_t i;
1567   if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
1568     str = args.thisv().toString();
1569     i = size_t(args[0].toInt32());
1570     if (i >= str->length()) {
1571       goto out_of_range;
1572     }
1573   } else {
1574     str = ToStringForStringFunction(cx, "charAt", args.thisv());
1575     if (!str) {
1576       return false;
1577     }
1578 
1579     double d = 0.0;
1580     if (args.length() > 0 && !ToInteger(cx, args[0], &d)) {
1581       return false;
1582     }
1583 
1584     if (d < 0 || str->length() <= d) {
1585       goto out_of_range;
1586     }
1587     i = size_t(d);
1588   }
1589 
1590   str = cx->staticStrings().getUnitStringForElement(cx, str, i);
1591   if (!str) {
1592     return false;
1593   }
1594   args.rval().setString(str);
1595   return true;
1596 
1597 out_of_range:
1598   args.rval().setString(cx->runtime()->emptyString);
1599   return true;
1600 }
1601 
str_charCodeAt_impl(JSContext * cx,HandleString string,HandleValue index,MutableHandleValue res)1602 bool js::str_charCodeAt_impl(JSContext* cx, HandleString string,
1603                              HandleValue index, MutableHandleValue res) {
1604   size_t i;
1605   if (index.isInt32()) {
1606     i = index.toInt32();
1607     if (i >= string->length()) {
1608       goto out_of_range;
1609     }
1610   } else {
1611     double d = 0.0;
1612     if (!ToInteger(cx, index, &d)) {
1613       return false;
1614     }
1615     // check whether d is negative as size_t is unsigned
1616     if (d < 0 || string->length() <= d) {
1617       goto out_of_range;
1618     }
1619     i = size_t(d);
1620   }
1621   char16_t c;
1622   if (!string->getChar(cx, i, &c)) {
1623     return false;
1624   }
1625   res.setInt32(c);
1626   return true;
1627 
1628 out_of_range:
1629   res.setNaN();
1630   return true;
1631 }
1632 
str_charCodeAt(JSContext * cx,unsigned argc,Value * vp)1633 bool js::str_charCodeAt(JSContext* cx, unsigned argc, Value* vp) {
1634   CallArgs args = CallArgsFromVp(argc, vp);
1635   RootedString str(cx);
1636   RootedValue index(cx);
1637   if (args.thisv().isString()) {
1638     str = args.thisv().toString();
1639   } else {
1640     str = ToStringForStringFunction(cx, "charCodeAt", args.thisv());
1641     if (!str) {
1642       return false;
1643     }
1644   }
1645   if (args.length() != 0) {
1646     index = args[0];
1647   } else {
1648     index.setInt32(0);
1649   }
1650 
1651   return js::str_charCodeAt_impl(cx, str, index, args.rval());
1652 }
1653 
1654 /*
1655  * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
1656  * The patlen argument must be positive and no greater than sBMHPatLenMax.
1657  *
1658  * Return the index of pat in text, or -1 if not found.
1659  */
1660 static const uint32_t sBMHCharSetSize = 256; /* ISO-Latin-1 */
1661 static const uint32_t sBMHPatLenMax = 255;   /* skip table element is uint8_t */
1662 static const int sBMHBadPattern =
1663     -2; /* return value if pat is not ISO-Latin-1 */
1664 
1665 template <typename TextChar, typename PatChar>
BoyerMooreHorspool(const TextChar * text,uint32_t textLen,const PatChar * pat,uint32_t patLen)1666 static int BoyerMooreHorspool(const TextChar* text, uint32_t textLen,
1667                               const PatChar* pat, uint32_t patLen) {
1668   MOZ_ASSERT(0 < patLen && patLen <= sBMHPatLenMax);
1669 
1670   uint8_t skip[sBMHCharSetSize];
1671   for (uint32_t i = 0; i < sBMHCharSetSize; i++) {
1672     skip[i] = uint8_t(patLen);
1673   }
1674 
1675   uint32_t patLast = patLen - 1;
1676   for (uint32_t i = 0; i < patLast; i++) {
1677     char16_t c = pat[i];
1678     if (c >= sBMHCharSetSize) {
1679       return sBMHBadPattern;
1680     }
1681     skip[c] = uint8_t(patLast - i);
1682   }
1683 
1684   for (uint32_t k = patLast; k < textLen;) {
1685     for (uint32_t i = k, j = patLast;; i--, j--) {
1686       if (text[i] != pat[j]) {
1687         break;
1688       }
1689       if (j == 0) {
1690         return static_cast<int>(i); /* safe: max string size */
1691       }
1692     }
1693 
1694     char16_t c = text[k];
1695     k += (c >= sBMHCharSetSize) ? patLen : skip[c];
1696   }
1697   return -1;
1698 }
1699 
1700 template <typename TextChar, typename PatChar>
1701 struct MemCmp {
1702   using Extent = uint32_t;
computeExtentMemCmp1703   static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar*,
1704                                                 uint32_t patLen) {
1705     return (patLen - 1) * sizeof(PatChar);
1706   }
matchMemCmp1707   static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t,
1708                                       Extent extent) {
1709     MOZ_ASSERT(sizeof(TextChar) == sizeof(PatChar));
1710     return memcmp(p, t, extent) == 0;
1711   }
1712 };
1713 
1714 template <typename TextChar, typename PatChar>
1715 struct ManualCmp {
1716   using Extent = const PatChar*;
computeExtentManualCmp1717   static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar* pat,
1718                                                 uint32_t patLen) {
1719     return pat + patLen;
1720   }
matchManualCmp1721   static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t,
1722                                       Extent extent) {
1723     for (; p != extent; ++p, ++t) {
1724       if (*p != *t) {
1725         return false;
1726       }
1727     }
1728     return true;
1729   }
1730 };
1731 
1732 template <typename TextChar, typename PatChar>
FirstCharMatcherUnrolled(const TextChar * text,uint32_t n,const PatChar pat)1733 static const TextChar* FirstCharMatcherUnrolled(const TextChar* text,
1734                                                 uint32_t n, const PatChar pat) {
1735   const TextChar* textend = text + n;
1736   const TextChar* t = text;
1737 
1738   switch ((textend - t) & 7) {
1739     case 0:
1740       if (*t++ == pat) return t - 1;
1741       [[fallthrough]];
1742     case 7:
1743       if (*t++ == pat) return t - 1;
1744       [[fallthrough]];
1745     case 6:
1746       if (*t++ == pat) return t - 1;
1747       [[fallthrough]];
1748     case 5:
1749       if (*t++ == pat) return t - 1;
1750       [[fallthrough]];
1751     case 4:
1752       if (*t++ == pat) return t - 1;
1753       [[fallthrough]];
1754     case 3:
1755       if (*t++ == pat) return t - 1;
1756       [[fallthrough]];
1757     case 2:
1758       if (*t++ == pat) return t - 1;
1759       [[fallthrough]];
1760     case 1:
1761       if (*t++ == pat) return t - 1;
1762   }
1763   while (textend != t) {
1764     if (t[0] == pat) return t;
1765     if (t[1] == pat) return t + 1;
1766     if (t[2] == pat) return t + 2;
1767     if (t[3] == pat) return t + 3;
1768     if (t[4] == pat) return t + 4;
1769     if (t[5] == pat) return t + 5;
1770     if (t[6] == pat) return t + 6;
1771     if (t[7] == pat) return t + 7;
1772     t += 8;
1773   }
1774   return nullptr;
1775 }
1776 
FirstCharMatcher8bit(const char * text,uint32_t n,const char pat)1777 static const char* FirstCharMatcher8bit(const char* text, uint32_t n,
1778                                         const char pat) {
1779   return reinterpret_cast<const char*>(memchr(text, pat, n));
1780 }
1781 
1782 template <class InnerMatch, typename TextChar, typename PatChar>
Matcher(const TextChar * text,uint32_t textlen,const PatChar * pat,uint32_t patlen)1783 static int Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat,
1784                    uint32_t patlen) {
1785   MOZ_ASSERT(patlen > 0);
1786 
1787   if (sizeof(TextChar) == 1 && sizeof(PatChar) > 1 && pat[0] > 0xff) {
1788     return -1;
1789   }
1790 
1791   const typename InnerMatch::Extent extent =
1792       InnerMatch::computeExtent(pat, patlen);
1793 
1794   uint32_t i = 0;
1795   uint32_t n = textlen - patlen + 1;
1796   while (i < n) {
1797     const TextChar* pos;
1798 
1799     if (sizeof(TextChar) == 1) {
1800       MOZ_ASSERT(pat[0] <= 0xff);
1801       pos = (TextChar*)FirstCharMatcher8bit((char*)text + i, n - i, pat[0]);
1802     } else {
1803       pos = FirstCharMatcherUnrolled(text + i, n - i, char16_t(pat[0]));
1804     }
1805 
1806     if (pos == nullptr) {
1807       return -1;
1808     }
1809 
1810     i = static_cast<uint32_t>(pos - text);
1811     if (InnerMatch::match(pat + 1, text + i + 1, extent)) {
1812       return i;
1813     }
1814 
1815     i += 1;
1816   }
1817   return -1;
1818 }
1819 
1820 template <typename TextChar, typename PatChar>
StringMatch(const TextChar * text,uint32_t textLen,const PatChar * pat,uint32_t patLen)1821 static MOZ_ALWAYS_INLINE int StringMatch(const TextChar* text, uint32_t textLen,
1822                                          const PatChar* pat, uint32_t patLen) {
1823   if (patLen == 0) {
1824     return 0;
1825   }
1826   if (textLen < patLen) {
1827     return -1;
1828   }
1829 
1830 #if defined(__i386__) || defined(_M_IX86) || defined(__i386)
1831   /*
1832    * Given enough registers, the unrolled loop below is faster than the
1833    * following loop. 32-bit x86 does not have enough registers.
1834    */
1835   if (patLen == 1) {
1836     const PatChar p0 = *pat;
1837     const TextChar* end = text + textLen;
1838     for (const TextChar* c = text; c != end; ++c) {
1839       if (*c == p0) {
1840         return c - text;
1841       }
1842     }
1843     return -1;
1844   }
1845 #endif
1846 
1847   /*
1848    * If the text or pattern string is short, BMH will be more expensive than
1849    * the basic linear scan due to initialization cost and a more complex loop
1850    * body. While the correct threshold is input-dependent, we can make a few
1851    * conservative observations:
1852    *  - When |textLen| is "big enough", the initialization time will be
1853    *    proportionally small, so the worst-case slowdown is minimized.
1854    *  - When |patLen| is "too small", even the best case for BMH will be
1855    *    slower than a simple scan for large |textLen| due to the more complex
1856    *    loop body of BMH.
1857    * From this, the values for "big enough" and "too small" are determined
1858    * empirically. See bug 526348.
1859    */
1860   if (textLen >= 512 && patLen >= 11 && patLen <= sBMHPatLenMax) {
1861     int index = BoyerMooreHorspool(text, textLen, pat, patLen);
1862     if (index != sBMHBadPattern) {
1863       return index;
1864     }
1865   }
1866 
1867   /*
1868    * For big patterns with large potential overlap we want the SIMD-optimized
1869    * speed of memcmp. For small patterns, a simple loop is faster. We also can't
1870    * use memcmp if one of the strings is TwoByte and the other is Latin-1.
1871    */
1872   return (patLen > 128 && std::is_same_v<TextChar, PatChar>)
1873              ? Matcher<MemCmp<TextChar, PatChar>, TextChar, PatChar>(
1874                    text, textLen, pat, patLen)
1875              : Matcher<ManualCmp<TextChar, PatChar>, TextChar, PatChar>(
1876                    text, textLen, pat, patLen);
1877 }
1878 
StringMatch(JSLinearString * text,JSLinearString * pat,uint32_t start=0)1879 static int32_t StringMatch(JSLinearString* text, JSLinearString* pat,
1880                            uint32_t start = 0) {
1881   MOZ_ASSERT(start <= text->length());
1882   uint32_t textLen = text->length() - start;
1883   uint32_t patLen = pat->length();
1884 
1885   int match;
1886   AutoCheckCannotGC nogc;
1887   if (text->hasLatin1Chars()) {
1888     const Latin1Char* textChars = text->latin1Chars(nogc) + start;
1889     if (pat->hasLatin1Chars()) {
1890       match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1891     } else {
1892       match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1893     }
1894   } else {
1895     const char16_t* textChars = text->twoByteChars(nogc) + start;
1896     if (pat->hasLatin1Chars()) {
1897       match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1898     } else {
1899       match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1900     }
1901   }
1902 
1903   return (match == -1) ? -1 : start + match;
1904 }
1905 
1906 static const size_t sRopeMatchThresholdRatioLog2 = 4;
1907 
StringFindPattern(JSLinearString * text,JSLinearString * pat,size_t start)1908 int js::StringFindPattern(JSLinearString* text, JSLinearString* pat,
1909                           size_t start) {
1910   return StringMatch(text, pat, start);
1911 }
1912 
1913 // When an algorithm does not need a string represented as a single linear
1914 // array of characters, this range utility may be used to traverse the string a
1915 // sequence of linear arrays of characters. This avoids flattening ropes.
1916 class StringSegmentRange {
1917   // If malloc() shows up in any profiles from this vector, we can add a new
1918   // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx.
1919   using StackVector = JS::GCVector<JSString*, 16>;
1920   Rooted<StackVector> stack;
1921   RootedLinearString cur;
1922 
settle(JSString * str)1923   bool settle(JSString* str) {
1924     while (str->isRope()) {
1925       JSRope& rope = str->asRope();
1926       if (!stack.append(rope.rightChild())) {
1927         return false;
1928       }
1929       str = rope.leftChild();
1930     }
1931     cur = &str->asLinear();
1932     return true;
1933   }
1934 
1935  public:
StringSegmentRange(JSContext * cx)1936   explicit StringSegmentRange(JSContext* cx)
1937       : stack(cx, StackVector(cx)), cur(cx) {}
1938 
init(JSString * str)1939   [[nodiscard]] bool init(JSString* str) {
1940     MOZ_ASSERT(stack.empty());
1941     return settle(str);
1942   }
1943 
empty() const1944   bool empty() const { return cur == nullptr; }
1945 
front() const1946   JSLinearString* front() const {
1947     MOZ_ASSERT(!cur->isRope());
1948     return cur;
1949   }
1950 
popFront()1951   [[nodiscard]] bool popFront() {
1952     MOZ_ASSERT(!empty());
1953     if (stack.empty()) {
1954       cur = nullptr;
1955       return true;
1956     }
1957     return settle(stack.popCopy());
1958   }
1959 };
1960 
1961 typedef Vector<JSLinearString*, 16, SystemAllocPolicy> LinearStringVector;
1962 
1963 template <typename TextChar, typename PatChar>
RopeMatchImpl(const AutoCheckCannotGC & nogc,LinearStringVector & strings,const PatChar * pat,size_t patLen)1964 static int RopeMatchImpl(const AutoCheckCannotGC& nogc,
1965                          LinearStringVector& strings, const PatChar* pat,
1966                          size_t patLen) {
1967   /* Absolute offset from the beginning of the logical text string. */
1968   int pos = 0;
1969 
1970   for (JSLinearString** outerp = strings.begin(); outerp != strings.end();
1971        ++outerp) {
1972     /* Try to find a match within 'outer'. */
1973     JSLinearString* outer = *outerp;
1974     const TextChar* chars = outer->chars<TextChar>(nogc);
1975     size_t len = outer->length();
1976     int matchResult = StringMatch(chars, len, pat, patLen);
1977     if (matchResult != -1) {
1978       /* Matched! */
1979       return pos + matchResult;
1980     }
1981 
1982     /* Try to find a match starting in 'outer' and running into other nodes. */
1983     const TextChar* const text = chars + (patLen > len ? 0 : len - patLen + 1);
1984     const TextChar* const textend = chars + len;
1985     const PatChar p0 = *pat;
1986     const PatChar* const p1 = pat + 1;
1987     const PatChar* const patend = pat + patLen;
1988     for (const TextChar* t = text; t != textend;) {
1989       if (*t++ != p0) {
1990         continue;
1991       }
1992 
1993       JSLinearString** innerp = outerp;
1994       const TextChar* ttend = textend;
1995       const TextChar* tt = t;
1996       for (const PatChar* pp = p1; pp != patend; ++pp, ++tt) {
1997         while (tt == ttend) {
1998           if (++innerp == strings.end()) {
1999             return -1;
2000           }
2001 
2002           JSLinearString* inner = *innerp;
2003           tt = inner->chars<TextChar>(nogc);
2004           ttend = tt + inner->length();
2005         }
2006         if (*pp != *tt) {
2007           goto break_continue;
2008         }
2009       }
2010 
2011       /* Matched! */
2012       return pos + (t - chars) - 1; /* -1 because of *t++ above */
2013 
2014     break_continue:;
2015     }
2016 
2017     pos += len;
2018   }
2019 
2020   return -1;
2021 }
2022 
2023 /*
2024  * RopeMatch takes the text to search and the pattern to search for in the text.
2025  * RopeMatch returns false on OOM and otherwise returns the match index through
2026  * the 'match' outparam (-1 for not found).
2027  */
RopeMatch(JSContext * cx,JSRope * text,JSLinearString * pat,int * match)2028 static bool RopeMatch(JSContext* cx, JSRope* text, JSLinearString* pat,
2029                       int* match) {
2030   uint32_t patLen = pat->length();
2031   if (patLen == 0) {
2032     *match = 0;
2033     return true;
2034   }
2035   if (text->length() < patLen) {
2036     *match = -1;
2037     return true;
2038   }
2039 
2040   /*
2041    * List of leaf nodes in the rope. If we run out of memory when trying to
2042    * append to this list, we can still fall back to StringMatch, so use the
2043    * system allocator so we don't report OOM in that case.
2044    */
2045   LinearStringVector strings;
2046 
2047   /*
2048    * We don't want to do rope matching if there is a poor node-to-char ratio,
2049    * since this means spending a lot of time in the match loop below. We also
2050    * need to build the list of leaf nodes. Do both here: iterate over the
2051    * nodes so long as there are not too many.
2052    *
2053    * We also don't use rope matching if the rope contains both Latin-1 and
2054    * TwoByte nodes, to simplify the match algorithm.
2055    */
2056   {
2057     size_t threshold = text->length() >> sRopeMatchThresholdRatioLog2;
2058     StringSegmentRange r(cx);
2059     if (!r.init(text)) {
2060       return false;
2061     }
2062 
2063     bool textIsLatin1 = text->hasLatin1Chars();
2064     while (!r.empty()) {
2065       if (threshold-- == 0 || r.front()->hasLatin1Chars() != textIsLatin1 ||
2066           !strings.append(r.front())) {
2067         JSLinearString* linear = text->ensureLinear(cx);
2068         if (!linear) {
2069           return false;
2070         }
2071 
2072         *match = StringMatch(linear, pat);
2073         return true;
2074       }
2075       if (!r.popFront()) {
2076         return false;
2077       }
2078     }
2079   }
2080 
2081   AutoCheckCannotGC nogc;
2082   if (text->hasLatin1Chars()) {
2083     if (pat->hasLatin1Chars()) {
2084       *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->latin1Chars(nogc),
2085                                          patLen);
2086     } else {
2087       *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->twoByteChars(nogc),
2088                                          patLen);
2089     }
2090   } else {
2091     if (pat->hasLatin1Chars()) {
2092       *match = RopeMatchImpl<char16_t>(nogc, strings, pat->latin1Chars(nogc),
2093                                        patLen);
2094     } else {
2095       *match = RopeMatchImpl<char16_t>(nogc, strings, pat->twoByteChars(nogc),
2096                                        patLen);
2097     }
2098   }
2099 
2100   return true;
2101 }
2102 
ReportErrorIfFirstArgIsRegExp(JSContext * cx,const CallArgs & args)2103 static MOZ_ALWAYS_INLINE bool ReportErrorIfFirstArgIsRegExp(
2104     JSContext* cx, const CallArgs& args) {
2105   // Only call IsRegExp if the first argument is definitely an object, so we
2106   // don't pay the cost of an additional function call in the common case.
2107   if (args.length() == 0 || !args[0].isObject()) {
2108     return true;
2109   }
2110 
2111   bool isRegExp;
2112   if (!IsRegExp(cx, args[0], &isRegExp)) {
2113     return false;
2114   }
2115 
2116   if (isRegExp) {
2117     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
2118                               JSMSG_INVALID_ARG_TYPE, "first", "",
2119                               "Regular Expression");
2120     return false;
2121   }
2122   return true;
2123 }
2124 
2125 // ES2018 draft rev de77aaeffce115deaf948ed30c7dbe4c60983c0c
2126 // 21.1.3.7 String.prototype.includes ( searchString [ , position ] )
str_includes(JSContext * cx,unsigned argc,Value * vp)2127 bool js::str_includes(JSContext* cx, unsigned argc, Value* vp) {
2128   CallArgs args = CallArgsFromVp(argc, vp);
2129 
2130   // Steps 1-2.
2131   RootedString str(cx, ToStringForStringFunction(cx, "includes", args.thisv()));
2132   if (!str) {
2133     return false;
2134   }
2135 
2136   // Steps 3-4.
2137   if (!ReportErrorIfFirstArgIsRegExp(cx, args)) {
2138     return false;
2139   }
2140 
2141   // Step 5.
2142   RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2143   if (!searchStr) {
2144     return false;
2145   }
2146 
2147   // Step 6.
2148   uint32_t pos = 0;
2149   if (args.hasDefined(1)) {
2150     if (args[1].isInt32()) {
2151       int i = args[1].toInt32();
2152       pos = (i < 0) ? 0U : uint32_t(i);
2153     } else {
2154       double d;
2155       if (!ToInteger(cx, args[1], &d)) {
2156         return false;
2157       }
2158       pos = uint32_t(std::min(std::max(d, 0.0), double(UINT32_MAX)));
2159     }
2160   }
2161 
2162   // Step 7.
2163   uint32_t textLen = str->length();
2164 
2165   // Step 8.
2166   uint32_t start = std::min(pos, textLen);
2167 
2168   // Steps 9-10.
2169   JSLinearString* text = str->ensureLinear(cx);
2170   if (!text) {
2171     return false;
2172   }
2173 
2174   args.rval().setBoolean(StringMatch(text, searchStr, start) != -1);
2175   return true;
2176 }
2177 
2178 /* ES6 20120927 draft 15.5.4.7. */
str_indexOf(JSContext * cx,unsigned argc,Value * vp)2179 bool js::str_indexOf(JSContext* cx, unsigned argc, Value* vp) {
2180   CallArgs args = CallArgsFromVp(argc, vp);
2181 
2182   // Steps 1, 2, and 3
2183   RootedString str(cx, ToStringForStringFunction(cx, "indexOf", args.thisv()));
2184   if (!str) {
2185     return false;
2186   }
2187 
2188   // Steps 4 and 5
2189   RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2190   if (!searchStr) {
2191     return false;
2192   }
2193 
2194   // Steps 6 and 7
2195   uint32_t pos = 0;
2196   if (args.hasDefined(1)) {
2197     if (args[1].isInt32()) {
2198       int i = args[1].toInt32();
2199       pos = (i < 0) ? 0U : uint32_t(i);
2200     } else {
2201       double d;
2202       if (!ToInteger(cx, args[1], &d)) {
2203         return false;
2204       }
2205       pos = uint32_t(std::min(std::max(d, 0.0), double(UINT32_MAX)));
2206     }
2207   }
2208 
2209   // Step 8
2210   uint32_t textLen = str->length();
2211 
2212   // Step 9
2213   uint32_t start = std::min(pos, textLen);
2214 
2215   if (str == searchStr) {
2216     // AngularJS often invokes "false".indexOf("false"). This check should
2217     // be cheap enough to not hurt anything else.
2218     args.rval().setInt32(start == 0 ? 0 : -1);
2219     return true;
2220   }
2221 
2222   // Steps 10 and 11
2223   JSLinearString* text = str->ensureLinear(cx);
2224   if (!text) {
2225     return false;
2226   }
2227 
2228   args.rval().setInt32(StringMatch(text, searchStr, start));
2229   return true;
2230 }
2231 
2232 template <typename TextChar, typename PatChar>
LastIndexOfImpl(const TextChar * text,size_t textLen,const PatChar * pat,size_t patLen,size_t start)2233 static int32_t LastIndexOfImpl(const TextChar* text, size_t textLen,
2234                                const PatChar* pat, size_t patLen,
2235                                size_t start) {
2236   MOZ_ASSERT(patLen > 0);
2237   MOZ_ASSERT(patLen <= textLen);
2238   MOZ_ASSERT(start <= textLen - patLen);
2239 
2240   const PatChar p0 = *pat;
2241   const PatChar* patNext = pat + 1;
2242   const PatChar* patEnd = pat + patLen;
2243 
2244   for (const TextChar* t = text + start; t >= text; --t) {
2245     if (*t == p0) {
2246       const TextChar* t1 = t + 1;
2247       for (const PatChar* p1 = patNext; p1 < patEnd; ++p1, ++t1) {
2248         if (*t1 != *p1) {
2249           goto break_continue;
2250         }
2251       }
2252 
2253       return static_cast<int32_t>(t - text);
2254     }
2255   break_continue:;
2256   }
2257 
2258   return -1;
2259 }
2260 
2261 // ES2017 draft rev 6859bb9ccaea9c6ede81d71e5320e3833b92cb3e
2262 // 21.1.3.9 String.prototype.lastIndexOf ( searchString [ , position ] )
str_lastIndexOf(JSContext * cx,unsigned argc,Value * vp)2263 static bool str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp) {
2264   CallArgs args = CallArgsFromVp(argc, vp);
2265 
2266   // Steps 1-2.
2267   RootedString str(cx,
2268                    ToStringForStringFunction(cx, "lastIndexOf", args.thisv()));
2269   if (!str) {
2270     return false;
2271   }
2272 
2273   // Step 3.
2274   RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2275   if (!searchStr) {
2276     return false;
2277   }
2278 
2279   // Step 6.
2280   size_t len = str->length();
2281 
2282   // Step 8.
2283   size_t searchLen = searchStr->length();
2284 
2285   // Steps 4-5, 7.
2286   int start = len - searchLen;  // Start searching here
2287   if (args.hasDefined(1)) {
2288     if (args[1].isInt32()) {
2289       int i = args[1].toInt32();
2290       if (i <= 0) {
2291         start = 0;
2292       } else if (i < start) {
2293         start = i;
2294       }
2295     } else {
2296       double d;
2297       if (!ToNumber(cx, args[1], &d)) {
2298         return false;
2299       }
2300       if (!IsNaN(d)) {
2301         d = JS::ToInteger(d);
2302         if (d <= 0) {
2303           start = 0;
2304         } else if (d < start) {
2305           start = int(d);
2306         }
2307       }
2308     }
2309   }
2310 
2311   if (str == searchStr) {
2312     args.rval().setInt32(0);
2313     return true;
2314   }
2315 
2316   if (searchLen > len) {
2317     args.rval().setInt32(-1);
2318     return true;
2319   }
2320 
2321   if (searchLen == 0) {
2322     args.rval().setInt32(start);
2323     return true;
2324   }
2325   MOZ_ASSERT(0 <= start && size_t(start) < len);
2326 
2327   JSLinearString* text = str->ensureLinear(cx);
2328   if (!text) {
2329     return false;
2330   }
2331 
2332   // Step 9.
2333   int32_t res;
2334   AutoCheckCannotGC nogc;
2335   if (text->hasLatin1Chars()) {
2336     const Latin1Char* textChars = text->latin1Chars(nogc);
2337     if (searchStr->hasLatin1Chars()) {
2338       res = LastIndexOfImpl(textChars, len, searchStr->latin1Chars(nogc),
2339                             searchLen, start);
2340     } else {
2341       res = LastIndexOfImpl(textChars, len, searchStr->twoByteChars(nogc),
2342                             searchLen, start);
2343     }
2344   } else {
2345     const char16_t* textChars = text->twoByteChars(nogc);
2346     if (searchStr->hasLatin1Chars()) {
2347       res = LastIndexOfImpl(textChars, len, searchStr->latin1Chars(nogc),
2348                             searchLen, start);
2349     } else {
2350       res = LastIndexOfImpl(textChars, len, searchStr->twoByteChars(nogc),
2351                             searchLen, start);
2352     }
2353   }
2354 
2355   args.rval().setInt32(res);
2356   return true;
2357 }
2358 
2359 // ES2018 draft rev de77aaeffce115deaf948ed30c7dbe4c60983c0c
2360 // 21.1.3.20 String.prototype.startsWith ( searchString [ , position ] )
str_startsWith(JSContext * cx,unsigned argc,Value * vp)2361 bool js::str_startsWith(JSContext* cx, unsigned argc, Value* vp) {
2362   CallArgs args = CallArgsFromVp(argc, vp);
2363 
2364   // Steps 1-2.
2365   RootedString str(cx,
2366                    ToStringForStringFunction(cx, "startsWith", args.thisv()));
2367   if (!str) {
2368     return false;
2369   }
2370 
2371   // Steps 3-4.
2372   if (!ReportErrorIfFirstArgIsRegExp(cx, args)) {
2373     return false;
2374   }
2375 
2376   // Step 5.
2377   RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2378   if (!searchStr) {
2379     return false;
2380   }
2381 
2382   // Step 6.
2383   uint32_t pos = 0;
2384   if (args.hasDefined(1)) {
2385     if (args[1].isInt32()) {
2386       int i = args[1].toInt32();
2387       pos = (i < 0) ? 0U : uint32_t(i);
2388     } else {
2389       double d;
2390       if (!ToInteger(cx, args[1], &d)) {
2391         return false;
2392       }
2393       pos = uint32_t(std::min(std::max(d, 0.0), double(UINT32_MAX)));
2394     }
2395   }
2396 
2397   // Step 7.
2398   uint32_t textLen = str->length();
2399 
2400   // Step 8.
2401   uint32_t start = std::min(pos, textLen);
2402 
2403   // Step 9.
2404   uint32_t searchLen = searchStr->length();
2405 
2406   // Step 10.
2407   if (searchLen + start < searchLen || searchLen + start > textLen) {
2408     args.rval().setBoolean(false);
2409     return true;
2410   }
2411 
2412   // Steps 11-12.
2413   JSLinearString* text = str->ensureLinear(cx);
2414   if (!text) {
2415     return false;
2416   }
2417 
2418   args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
2419   return true;
2420 }
2421 
2422 // ES2018 draft rev de77aaeffce115deaf948ed30c7dbe4c60983c0c
2423 // 21.1.3.6 String.prototype.endsWith ( searchString [ , endPosition ] )
str_endsWith(JSContext * cx,unsigned argc,Value * vp)2424 bool js::str_endsWith(JSContext* cx, unsigned argc, Value* vp) {
2425   CallArgs args = CallArgsFromVp(argc, vp);
2426 
2427   // Steps 1-2.
2428   RootedString str(cx, ToStringForStringFunction(cx, "endsWith", args.thisv()));
2429   if (!str) {
2430     return false;
2431   }
2432 
2433   // Steps 3-4.
2434   if (!ReportErrorIfFirstArgIsRegExp(cx, args)) {
2435     return false;
2436   }
2437 
2438   // Step 5.
2439   RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2440   if (!searchStr) {
2441     return false;
2442   }
2443 
2444   // Step 6.
2445   uint32_t textLen = str->length();
2446 
2447   // Step 7.
2448   uint32_t pos = textLen;
2449   if (args.hasDefined(1)) {
2450     if (args[1].isInt32()) {
2451       int i = args[1].toInt32();
2452       pos = (i < 0) ? 0U : uint32_t(i);
2453     } else {
2454       double d;
2455       if (!ToInteger(cx, args[1], &d)) {
2456         return false;
2457       }
2458       pos = uint32_t(std::min(std::max(d, 0.0), double(UINT32_MAX)));
2459     }
2460   }
2461 
2462   // Step 8.
2463   uint32_t end = std::min(pos, textLen);
2464 
2465   // Step 9.
2466   uint32_t searchLen = searchStr->length();
2467 
2468   // Step 11 (reordered).
2469   if (searchLen > end) {
2470     args.rval().setBoolean(false);
2471     return true;
2472   }
2473 
2474   // Step 10.
2475   uint32_t start = end - searchLen;
2476 
2477   // Steps 12-13.
2478   JSLinearString* text = str->ensureLinear(cx);
2479   if (!text) {
2480     return false;
2481   }
2482 
2483   args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
2484   return true;
2485 }
2486 
2487 template <typename CharT>
TrimString(const CharT * chars,bool trimStart,bool trimEnd,size_t length,size_t * pBegin,size_t * pEnd)2488 static void TrimString(const CharT* chars, bool trimStart, bool trimEnd,
2489                        size_t length, size_t* pBegin, size_t* pEnd) {
2490   size_t begin = 0, end = length;
2491 
2492   if (trimStart) {
2493     while (begin < length && unicode::IsSpace(chars[begin])) {
2494       ++begin;
2495     }
2496   }
2497 
2498   if (trimEnd) {
2499     while (end > begin && unicode::IsSpace(chars[end - 1])) {
2500       --end;
2501     }
2502   }
2503 
2504   *pBegin = begin;
2505   *pEnd = end;
2506 }
2507 
TrimString(JSContext * cx,const CallArgs & args,const char * funName,bool trimStart,bool trimEnd)2508 static bool TrimString(JSContext* cx, const CallArgs& args, const char* funName,
2509                        bool trimStart, bool trimEnd) {
2510   JSString* str = ToStringForStringFunction(cx, funName, args.thisv());
2511   if (!str) {
2512     return false;
2513   }
2514 
2515   JSLinearString* linear = str->ensureLinear(cx);
2516   if (!linear) {
2517     return false;
2518   }
2519 
2520   size_t length = linear->length();
2521   size_t begin, end;
2522   if (linear->hasLatin1Chars()) {
2523     AutoCheckCannotGC nogc;
2524     TrimString(linear->latin1Chars(nogc), trimStart, trimEnd, length, &begin,
2525                &end);
2526   } else {
2527     AutoCheckCannotGC nogc;
2528     TrimString(linear->twoByteChars(nogc), trimStart, trimEnd, length, &begin,
2529                &end);
2530   }
2531 
2532   JSLinearString* result = NewDependentString(cx, linear, begin, end - begin);
2533   if (!result) {
2534     return false;
2535   }
2536 
2537   args.rval().setString(result);
2538   return true;
2539 }
2540 
str_trim(JSContext * cx,unsigned argc,Value * vp)2541 static bool str_trim(JSContext* cx, unsigned argc, Value* vp) {
2542   CallArgs args = CallArgsFromVp(argc, vp);
2543   return TrimString(cx, args, "trim", true, true);
2544 }
2545 
str_trimStart(JSContext * cx,unsigned argc,Value * vp)2546 static bool str_trimStart(JSContext* cx, unsigned argc, Value* vp) {
2547   CallArgs args = CallArgsFromVp(argc, vp);
2548   return TrimString(cx, args, "trimStart", true, false);
2549 }
2550 
str_trimEnd(JSContext * cx,unsigned argc,Value * vp)2551 static bool str_trimEnd(JSContext* cx, unsigned argc, Value* vp) {
2552   CallArgs args = CallArgsFromVp(argc, vp);
2553   return TrimString(cx, args, "trimEnd", false, true);
2554 }
2555 
2556 // Utility for building a rope (lazy concatenation) of strings.
2557 class RopeBuilder {
2558   JSContext* cx;
2559   RootedString res;
2560 
2561   RopeBuilder(const RopeBuilder& other) = delete;
2562   void operator=(const RopeBuilder& other) = delete;
2563 
2564  public:
RopeBuilder(JSContext * cx)2565   explicit RopeBuilder(JSContext* cx)
2566       : cx(cx), res(cx, cx->runtime()->emptyString) {}
2567 
append(HandleString str)2568   inline bool append(HandleString str) {
2569     res = ConcatStrings<CanGC>(cx, res, str);
2570     return !!res;
2571   }
2572 
result()2573   inline JSString* result() { return res; }
2574 };
2575 
2576 namespace {
2577 
2578 template <typename CharT>
FindDollarIndex(const CharT * chars,size_t length)2579 static uint32_t FindDollarIndex(const CharT* chars, size_t length) {
2580   if (const CharT* p = js_strchr_limit(chars, '$', chars + length)) {
2581     uint32_t dollarIndex = p - chars;
2582     MOZ_ASSERT(dollarIndex < length);
2583     return dollarIndex;
2584   }
2585   return UINT32_MAX;
2586 }
2587 
2588 } /* anonymous namespace */
2589 
2590 /*
2591  * Constructs a result string that looks like:
2592  *
2593  *      newstring = string[:matchStart] + repstr + string[matchEnd:]
2594  */
BuildFlatReplacement(JSContext * cx,HandleString textstr,HandleLinearString repstr,size_t matchStart,size_t patternLength)2595 static JSString* BuildFlatReplacement(JSContext* cx, HandleString textstr,
2596                                       HandleLinearString repstr,
2597                                       size_t matchStart, size_t patternLength) {
2598   size_t matchEnd = matchStart + patternLength;
2599 
2600   RootedString resultStr(cx, NewDependentString(cx, textstr, 0, matchStart));
2601   if (!resultStr) {
2602     return nullptr;
2603   }
2604 
2605   resultStr = ConcatStrings<CanGC>(cx, resultStr, repstr);
2606   if (!resultStr) {
2607     return nullptr;
2608   }
2609 
2610   MOZ_ASSERT(textstr->length() >= matchEnd);
2611   RootedString rest(cx, NewDependentString(cx, textstr, matchEnd,
2612                                            textstr->length() - matchEnd));
2613   if (!rest) {
2614     return nullptr;
2615   }
2616 
2617   return ConcatStrings<CanGC>(cx, resultStr, rest);
2618 }
2619 
BuildFlatRopeReplacement(JSContext * cx,HandleString textstr,HandleLinearString repstr,size_t match,size_t patternLength)2620 static JSString* BuildFlatRopeReplacement(JSContext* cx, HandleString textstr,
2621                                           HandleLinearString repstr,
2622                                           size_t match, size_t patternLength) {
2623   MOZ_ASSERT(textstr->isRope());
2624 
2625   size_t matchEnd = match + patternLength;
2626 
2627   /*
2628    * If we are replacing over a rope, avoid flattening it by iterating
2629    * through it, building a new rope.
2630    */
2631   StringSegmentRange r(cx);
2632   if (!r.init(textstr)) {
2633     return nullptr;
2634   }
2635 
2636   RopeBuilder builder(cx);
2637 
2638   /*
2639    * Special case when the pattern string is '', which matches to the
2640    * head of the string and doesn't overlap with any component of the rope.
2641    */
2642   if (patternLength == 0) {
2643     MOZ_ASSERT(match == 0);
2644     if (!builder.append(repstr)) {
2645       return nullptr;
2646     }
2647   }
2648 
2649   size_t pos = 0;
2650   while (!r.empty()) {
2651     RootedString str(cx, r.front());
2652     size_t len = str->length();
2653     size_t strEnd = pos + len;
2654     if (pos < matchEnd && strEnd > match) {
2655       /*
2656        * We need to special-case any part of the rope that overlaps
2657        * with the replacement string.
2658        */
2659       if (match >= pos) {
2660         /*
2661          * If this part of the rope overlaps with the left side of
2662          * the pattern, then it must be the only one to overlap with
2663          * the first character in the pattern, so we include the
2664          * replacement string here.
2665          */
2666         RootedString leftSide(cx, NewDependentString(cx, str, 0, match - pos));
2667         if (!leftSide || !builder.append(leftSide) || !builder.append(repstr)) {
2668           return nullptr;
2669         }
2670       }
2671 
2672       /*
2673        * If str runs off the end of the matched string, append the
2674        * last part of str.
2675        */
2676       if (strEnd > matchEnd) {
2677         RootedString rightSide(
2678             cx, NewDependentString(cx, str, matchEnd - pos, strEnd - matchEnd));
2679         if (!rightSide || !builder.append(rightSide)) {
2680           return nullptr;
2681         }
2682       }
2683     } else {
2684       if (!builder.append(str)) {
2685         return nullptr;
2686       }
2687     }
2688     pos += str->length();
2689     if (!r.popFront()) {
2690       return nullptr;
2691     }
2692   }
2693 
2694   return builder.result();
2695 }
2696 
2697 template <typename CharT>
AppendDollarReplacement(StringBuffer & newReplaceChars,size_t firstDollarIndex,size_t matchStart,size_t matchLimit,JSLinearString * text,const CharT * repChars,size_t repLength)2698 static bool AppendDollarReplacement(StringBuffer& newReplaceChars,
2699                                     size_t firstDollarIndex, size_t matchStart,
2700                                     size_t matchLimit, JSLinearString* text,
2701                                     const CharT* repChars, size_t repLength) {
2702   MOZ_ASSERT(firstDollarIndex < repLength);
2703   MOZ_ASSERT(matchStart <= matchLimit);
2704   MOZ_ASSERT(matchLimit <= text->length());
2705 
2706   // Move the pre-dollar chunk in bulk.
2707   if (!newReplaceChars.append(repChars, firstDollarIndex)) {
2708     return false;
2709   }
2710 
2711   // Move the rest char-by-char, interpreting dollars as we encounter them.
2712   const CharT* repLimit = repChars + repLength;
2713   for (const CharT* it = repChars + firstDollarIndex; it < repLimit; ++it) {
2714     if (*it != '$' || it == repLimit - 1) {
2715       if (!newReplaceChars.append(*it)) {
2716         return false;
2717       }
2718       continue;
2719     }
2720 
2721     switch (*(it + 1)) {
2722       case '$':
2723         // Eat one of the dollars.
2724         if (!newReplaceChars.append(*it)) {
2725           return false;
2726         }
2727         break;
2728       case '&':
2729         if (!newReplaceChars.appendSubstring(text, matchStart,
2730                                              matchLimit - matchStart)) {
2731           return false;
2732         }
2733         break;
2734       case '`':
2735         if (!newReplaceChars.appendSubstring(text, 0, matchStart)) {
2736           return false;
2737         }
2738         break;
2739       case '\'':
2740         if (!newReplaceChars.appendSubstring(text, matchLimit,
2741                                              text->length() - matchLimit)) {
2742           return false;
2743         }
2744         break;
2745       default:
2746         // The dollar we saw was not special (no matter what its mother told
2747         // it).
2748         if (!newReplaceChars.append(*it)) {
2749           return false;
2750         }
2751         continue;
2752     }
2753     ++it;  // We always eat an extra char in the above switch.
2754   }
2755 
2756   return true;
2757 }
2758 
2759 /*
2760  * Perform a linear-scan dollar substitution on the replacement text.
2761  */
InterpretDollarReplacement(JSContext * cx,HandleString textstrArg,HandleLinearString repstr,uint32_t firstDollarIndex,size_t matchStart,size_t patternLength)2762 static JSLinearString* InterpretDollarReplacement(
2763     JSContext* cx, HandleString textstrArg, HandleLinearString repstr,
2764     uint32_t firstDollarIndex, size_t matchStart, size_t patternLength) {
2765   RootedLinearString textstr(cx, textstrArg->ensureLinear(cx));
2766   if (!textstr) {
2767     return nullptr;
2768   }
2769 
2770   size_t matchLimit = matchStart + patternLength;
2771 
2772   /*
2773    * Most probably:
2774    *
2775    *      len(newstr) >= len(orig) - len(match) + len(replacement)
2776    *
2777    * Note that dollar vars _could_ make the resulting text smaller than this.
2778    */
2779   JSStringBuilder newReplaceChars(cx);
2780   if (repstr->hasTwoByteChars() && !newReplaceChars.ensureTwoByteChars()) {
2781     return nullptr;
2782   }
2783 
2784   if (!newReplaceChars.reserve(textstr->length() - patternLength +
2785                                repstr->length())) {
2786     return nullptr;
2787   }
2788 
2789   bool res;
2790   if (repstr->hasLatin1Chars()) {
2791     AutoCheckCannotGC nogc;
2792     res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, matchStart,
2793                                   matchLimit, textstr,
2794                                   repstr->latin1Chars(nogc), repstr->length());
2795   } else {
2796     AutoCheckCannotGC nogc;
2797     res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, matchStart,
2798                                   matchLimit, textstr,
2799                                   repstr->twoByteChars(nogc), repstr->length());
2800   }
2801   if (!res) {
2802     return nullptr;
2803   }
2804 
2805   return newReplaceChars.finishString();
2806 }
2807 
2808 template <typename StrChar, typename RepChar>
StrFlatReplaceGlobal(JSContext * cx,JSLinearString * str,JSLinearString * pat,JSLinearString * rep,StringBuffer & sb)2809 static bool StrFlatReplaceGlobal(JSContext* cx, JSLinearString* str,
2810                                  JSLinearString* pat, JSLinearString* rep,
2811                                  StringBuffer& sb) {
2812   MOZ_ASSERT(str->length() > 0);
2813 
2814   AutoCheckCannotGC nogc;
2815   const StrChar* strChars = str->chars<StrChar>(nogc);
2816   const RepChar* repChars = rep->chars<RepChar>(nogc);
2817 
2818   // The pattern is empty, so we interleave the replacement string in-between
2819   // each character.
2820   if (!pat->length()) {
2821     CheckedInt<uint32_t> strLength(str->length());
2822     CheckedInt<uint32_t> repLength(rep->length());
2823     CheckedInt<uint32_t> length = repLength * (strLength - 1) + strLength;
2824     if (!length.isValid()) {
2825       ReportAllocationOverflow(cx);
2826       return false;
2827     }
2828 
2829     if (!sb.reserve(length.value())) {
2830       return false;
2831     }
2832 
2833     for (unsigned i = 0; i < str->length() - 1; ++i, ++strChars) {
2834       sb.infallibleAppend(*strChars);
2835       sb.infallibleAppend(repChars, rep->length());
2836     }
2837     sb.infallibleAppend(*strChars);
2838     return true;
2839   }
2840 
2841   // If it's true, we are sure that the result's length is, at least, the same
2842   // length as |str->length()|.
2843   if (rep->length() >= pat->length()) {
2844     if (!sb.reserve(str->length())) {
2845       return false;
2846     }
2847   }
2848 
2849   uint32_t start = 0;
2850   for (;;) {
2851     int match = StringMatch(str, pat, start);
2852     if (match < 0) {
2853       break;
2854     }
2855     if (!sb.append(strChars + start, match - start)) {
2856       return false;
2857     }
2858     if (!sb.append(repChars, rep->length())) {
2859       return false;
2860     }
2861     start = match + pat->length();
2862   }
2863 
2864   if (!sb.append(strChars + start, str->length() - start)) {
2865     return false;
2866   }
2867 
2868   return true;
2869 }
2870 
2871 // This is identical to "str.split(pattern).join(replacement)" except that we
2872 // do some deforestation optimization in Ion.
StringFlatReplaceString(JSContext * cx,HandleString string,HandleString pattern,HandleString replacement)2873 JSString* js::StringFlatReplaceString(JSContext* cx, HandleString string,
2874                                       HandleString pattern,
2875                                       HandleString replacement) {
2876   MOZ_ASSERT(string);
2877   MOZ_ASSERT(pattern);
2878   MOZ_ASSERT(replacement);
2879 
2880   if (!string->length()) {
2881     return string;
2882   }
2883 
2884   RootedLinearString linearRepl(cx, replacement->ensureLinear(cx));
2885   if (!linearRepl) {
2886     return nullptr;
2887   }
2888 
2889   RootedLinearString linearPat(cx, pattern->ensureLinear(cx));
2890   if (!linearPat) {
2891     return nullptr;
2892   }
2893 
2894   RootedLinearString linearStr(cx, string->ensureLinear(cx));
2895   if (!linearStr) {
2896     return nullptr;
2897   }
2898 
2899   JSStringBuilder sb(cx);
2900   if (linearStr->hasTwoByteChars()) {
2901     if (!sb.ensureTwoByteChars()) {
2902       return nullptr;
2903     }
2904     if (linearRepl->hasTwoByteChars()) {
2905       if (!StrFlatReplaceGlobal<char16_t, char16_t>(cx, linearStr, linearPat,
2906                                                     linearRepl, sb)) {
2907         return nullptr;
2908       }
2909     } else {
2910       if (!StrFlatReplaceGlobal<char16_t, Latin1Char>(cx, linearStr, linearPat,
2911                                                       linearRepl, sb)) {
2912         return nullptr;
2913       }
2914     }
2915   } else {
2916     if (linearRepl->hasTwoByteChars()) {
2917       if (!sb.ensureTwoByteChars()) {
2918         return nullptr;
2919       }
2920       if (!StrFlatReplaceGlobal<Latin1Char, char16_t>(cx, linearStr, linearPat,
2921                                                       linearRepl, sb)) {
2922         return nullptr;
2923       }
2924     } else {
2925       if (!StrFlatReplaceGlobal<Latin1Char, Latin1Char>(
2926               cx, linearStr, linearPat, linearRepl, sb)) {
2927         return nullptr;
2928       }
2929     }
2930   }
2931 
2932   return sb.finishString();
2933 }
2934 
str_replace_string_raw(JSContext * cx,HandleString string,HandleString pattern,HandleString replacement)2935 JSString* js::str_replace_string_raw(JSContext* cx, HandleString string,
2936                                      HandleString pattern,
2937                                      HandleString replacement) {
2938   RootedLinearString repl(cx, replacement->ensureLinear(cx));
2939   if (!repl) {
2940     return nullptr;
2941   }
2942 
2943   RootedLinearString pat(cx, pattern->ensureLinear(cx));
2944   if (!pat) {
2945     return nullptr;
2946   }
2947 
2948   size_t patternLength = pat->length();
2949   int32_t match;
2950   uint32_t dollarIndex;
2951 
2952   {
2953     AutoCheckCannotGC nogc;
2954     dollarIndex =
2955         repl->hasLatin1Chars()
2956             ? FindDollarIndex(repl->latin1Chars(nogc), repl->length())
2957             : FindDollarIndex(repl->twoByteChars(nogc), repl->length());
2958   }
2959 
2960   /*
2961    * |string| could be a rope, so we want to avoid flattening it for as
2962    * long as possible.
2963    */
2964   if (string->isRope()) {
2965     if (!RopeMatch(cx, &string->asRope(), pat, &match)) {
2966       return nullptr;
2967     }
2968   } else {
2969     match = StringMatch(&string->asLinear(), pat, 0);
2970   }
2971 
2972   if (match < 0) {
2973     return string;
2974   }
2975 
2976   if (dollarIndex != UINT32_MAX) {
2977     repl = InterpretDollarReplacement(cx, string, repl, dollarIndex, match,
2978                                       patternLength);
2979     if (!repl) {
2980       return nullptr;
2981     }
2982   } else if (string->isRope()) {
2983     return BuildFlatRopeReplacement(cx, string, repl, match, patternLength);
2984   }
2985   return BuildFlatReplacement(cx, string, repl, match, patternLength);
2986 }
2987 
2988 // https://tc39.es/proposal-string-replaceall/#sec-string.prototype.replaceall
2989 // Steps 7-16 when functionalReplace is false and searchString is not empty.
2990 //
2991 // The steps are quite different, for performance. Loops in steps 11 and 14
2992 // are fused. GetSubstitution is optimized away when possible.
2993 template <typename StrChar, typename RepChar>
ReplaceAll(JSContext * cx,JSLinearString * string,JSLinearString * searchString,JSLinearString * replaceString)2994 static JSString* ReplaceAll(JSContext* cx, JSLinearString* string,
2995                             JSLinearString* searchString,
2996                             JSLinearString* replaceString) {
2997   // Step 7.
2998   const size_t stringLength = string->length();
2999   const size_t searchLength = searchString->length();
3000   const size_t replaceLength = replaceString->length();
3001 
3002   MOZ_ASSERT(stringLength > 0);
3003   MOZ_ASSERT(searchLength > 0);
3004   MOZ_ASSERT(stringLength >= searchLength);
3005 
3006   // Step 8 (advanceBy is equal to searchLength when searchLength > 0).
3007 
3008   // Step 9 (not needed in this implementation).
3009 
3010   // Step 10.
3011   // Find the first match.
3012   int32_t position = StringMatch(string, searchString, 0);
3013 
3014   // Nothing to replace, so return early.
3015   if (position < 0) {
3016     return string;
3017   }
3018 
3019   // Step 11 (moved below).
3020 
3021   // Step 12.
3022   uint32_t endOfLastMatch = 0;
3023 
3024   // Step 13.
3025   JSStringBuilder result(cx);
3026   if constexpr (std::is_same_v<StrChar, char16_t> ||
3027                 std::is_same_v<RepChar, char16_t>) {
3028     if (!result.ensureTwoByteChars()) {
3029       return nullptr;
3030     }
3031   }
3032 
3033   {
3034     AutoCheckCannotGC nogc;
3035     const StrChar* strChars = string->chars<StrChar>(nogc);
3036     const RepChar* repChars = replaceString->chars<RepChar>(nogc);
3037 
3038     uint32_t dollarIndex = FindDollarIndex(repChars, replaceLength);
3039 
3040     // If it's true, we are sure that the result's length is, at least, the same
3041     // length as |str->length()|.
3042     if (replaceLength >= searchLength) {
3043       if (!result.reserve(stringLength)) {
3044         return nullptr;
3045       }
3046     }
3047 
3048     do {
3049       // Step 14.c.
3050       // Append the substring before the current match.
3051       if (!result.append(strChars + endOfLastMatch,
3052                          position - endOfLastMatch)) {
3053         return nullptr;
3054       }
3055 
3056       // Steps 14.a-b and 14.d.
3057       // Append the replacement.
3058       if (dollarIndex != UINT32_MAX) {
3059         size_t matchLimit = position + searchLength;
3060         if (!AppendDollarReplacement(result, dollarIndex, position, matchLimit,
3061                                      string, repChars, replaceLength)) {
3062           return nullptr;
3063         }
3064       } else {
3065         if (!result.append(repChars, replaceLength)) {
3066           return nullptr;
3067         }
3068       }
3069 
3070       // Step 14.e.
3071       endOfLastMatch = position + searchLength;
3072 
3073       // Step 11.
3074       // Find the next match.
3075       position = StringMatch(string, searchString, endOfLastMatch);
3076     } while (position >= 0);
3077 
3078     // Step 15.
3079     // Append the substring after the last match.
3080     if (!result.append(strChars + endOfLastMatch,
3081                        stringLength - endOfLastMatch)) {
3082       return nullptr;
3083     }
3084   }
3085 
3086   // Step 16.
3087   return result.finishString();
3088 }
3089 
3090 // https://tc39.es/proposal-string-replaceall/#sec-string.prototype.replaceall
3091 // Steps 7-16 when functionalReplace is false and searchString is the empty
3092 // string.
3093 //
3094 // The steps are quite different, for performance. Loops in steps 11 and 14
3095 // are fused. GetSubstitution is optimized away when possible.
3096 template <typename StrChar, typename RepChar>
ReplaceAllInterleave(JSContext * cx,JSLinearString * string,JSLinearString * replaceString)3097 static JSString* ReplaceAllInterleave(JSContext* cx, JSLinearString* string,
3098                                       JSLinearString* replaceString) {
3099   // Step 7.
3100   const size_t stringLength = string->length();
3101   const size_t replaceLength = replaceString->length();
3102 
3103   // Step 8 (advanceBy is 1 when searchString is the empty string).
3104 
3105   // Steps 9-12 (trivial when searchString is the empty string).
3106 
3107   // Step 13.
3108   JSStringBuilder result(cx);
3109   if constexpr (std::is_same_v<StrChar, char16_t> ||
3110                 std::is_same_v<RepChar, char16_t>) {
3111     if (!result.ensureTwoByteChars()) {
3112       return nullptr;
3113     }
3114   }
3115 
3116   {
3117     AutoCheckCannotGC nogc;
3118     const StrChar* strChars = string->chars<StrChar>(nogc);
3119     const RepChar* repChars = replaceString->chars<RepChar>(nogc);
3120 
3121     uint32_t dollarIndex = FindDollarIndex(repChars, replaceLength);
3122 
3123     if (dollarIndex != UINT32_MAX) {
3124       if (!result.reserve(stringLength)) {
3125         return nullptr;
3126       }
3127     } else {
3128       // Compute the exact result length when no substitutions take place.
3129       CheckedInt<uint32_t> strLength(stringLength);
3130       CheckedInt<uint32_t> repLength(replaceLength);
3131       CheckedInt<uint32_t> length = strLength + (strLength + 1) * repLength;
3132       if (!length.isValid()) {
3133         ReportAllocationOverflow(cx);
3134         return nullptr;
3135       }
3136 
3137       if (!result.reserve(length.value())) {
3138         return nullptr;
3139       }
3140     }
3141 
3142     auto appendReplacement = [&](size_t match) {
3143       if (dollarIndex != UINT32_MAX) {
3144         return AppendDollarReplacement(result, dollarIndex, match, match,
3145                                        string, repChars, replaceLength);
3146       }
3147       return result.append(repChars, replaceLength);
3148     };
3149 
3150     for (size_t index = 0; index < stringLength; index++) {
3151       // Steps 11, 14.a-b and 14.d.
3152       // The empty string matches before each character.
3153       if (!appendReplacement(index)) {
3154         return nullptr;
3155       }
3156 
3157       // Step 14.c.
3158       if (!result.append(strChars[index])) {
3159         return nullptr;
3160       }
3161     }
3162 
3163     // Steps 11, 14.a-b and 14.d.
3164     // The empty string also matches at the end of the string.
3165     if (!appendReplacement(stringLength)) {
3166       return nullptr;
3167     }
3168 
3169     // Step 15 (not applicable when searchString is the empty string).
3170   }
3171 
3172   // Step 16.
3173   return result.finishString();
3174 }
3175 
3176 // String.prototype.replaceAll (Stage 3 proposal)
3177 // https://tc39.es/proposal-string-replaceall/
3178 //
3179 // String.prototype.replaceAll ( searchValue, replaceValue )
3180 //
3181 // Steps 7-16 when functionalReplace is false.
str_replaceAll_string_raw(JSContext * cx,HandleString string,HandleString searchString,HandleString replaceString)3182 JSString* js::str_replaceAll_string_raw(JSContext* cx, HandleString string,
3183                                         HandleString searchString,
3184                                         HandleString replaceString) {
3185   const size_t stringLength = string->length();
3186   const size_t searchLength = searchString->length();
3187 
3188   // Directly return when we're guaranteed to find no match.
3189   if (searchLength > stringLength) {
3190     return string;
3191   }
3192 
3193   RootedLinearString str(cx, string->ensureLinear(cx));
3194   if (!str) {
3195     return nullptr;
3196   }
3197 
3198   RootedLinearString repl(cx, replaceString->ensureLinear(cx));
3199   if (!repl) {
3200     return nullptr;
3201   }
3202 
3203   RootedLinearString search(cx, searchString->ensureLinear(cx));
3204   if (!search) {
3205     return nullptr;
3206   }
3207 
3208   // The pattern is empty, so we interleave the replacement string in-between
3209   // each character.
3210   if (searchLength == 0) {
3211     if (str->hasTwoByteChars()) {
3212       if (repl->hasTwoByteChars()) {
3213         return ReplaceAllInterleave<char16_t, char16_t>(cx, str, repl);
3214       }
3215       return ReplaceAllInterleave<char16_t, Latin1Char>(cx, str, repl);
3216     }
3217     if (repl->hasTwoByteChars()) {
3218       return ReplaceAllInterleave<Latin1Char, char16_t>(cx, str, repl);
3219     }
3220     return ReplaceAllInterleave<Latin1Char, Latin1Char>(cx, str, repl);
3221   }
3222 
3223   MOZ_ASSERT(stringLength > 0);
3224 
3225   if (str->hasTwoByteChars()) {
3226     if (repl->hasTwoByteChars()) {
3227       return ReplaceAll<char16_t, char16_t>(cx, str, search, repl);
3228     }
3229     return ReplaceAll<char16_t, Latin1Char>(cx, str, search, repl);
3230   }
3231   if (repl->hasTwoByteChars()) {
3232     return ReplaceAll<Latin1Char, char16_t>(cx, str, search, repl);
3233   }
3234   return ReplaceAll<Latin1Char, Latin1Char>(cx, str, search, repl);
3235 }
3236 
SingleElementStringArray(JSContext * cx,HandleLinearString str)3237 static ArrayObject* SingleElementStringArray(JSContext* cx,
3238                                              HandleLinearString str) {
3239   ArrayObject* array = NewDenseFullyAllocatedArray(cx, 1);
3240   if (!array) {
3241     return nullptr;
3242   }
3243   array->setDenseInitializedLength(1);
3244   array->initDenseElement(0, StringValue(str));
3245   return array;
3246 }
3247 
3248 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
SplitHelper(JSContext * cx,HandleLinearString str,uint32_t limit,HandleLinearString sep)3249 static ArrayObject* SplitHelper(JSContext* cx, HandleLinearString str,
3250                                 uint32_t limit, HandleLinearString sep) {
3251   size_t strLength = str->length();
3252   size_t sepLength = sep->length();
3253   MOZ_ASSERT(sepLength != 0);
3254 
3255   // Step 12.
3256   if (strLength == 0) {
3257     // Step 12.a.
3258     int match = StringMatch(str, sep, 0);
3259 
3260     // Step 12.b.
3261     if (match != -1) {
3262       return NewDenseEmptyArray(cx);
3263     }
3264 
3265     // Steps 12.c-e.
3266     return SingleElementStringArray(cx, str);
3267   }
3268 
3269   // Step 3 (reordered).
3270   RootedValueVector splits(cx);
3271 
3272   // Step 8 (reordered).
3273   size_t lastEndIndex = 0;
3274 
3275   // Step 13.
3276   size_t index = 0;
3277 
3278   // Step 14.
3279   while (index != strLength) {
3280     // Step 14.a.
3281     int match = StringMatch(str, sep, index);
3282 
3283     // Step 14.b.
3284     //
3285     // Our match algorithm differs from the spec in that it returns the
3286     // next index at which a match happens.  If no match happens we're
3287     // done.
3288     //
3289     // But what if the match is at the end of the string (and the string is
3290     // not empty)?  Per 14.c.i this shouldn't be a match, so we have to
3291     // specially exclude it.  Thus this case should hold:
3292     //
3293     //   var a = "abc".split(/\b/);
3294     //   assertEq(a.length, 1);
3295     //   assertEq(a[0], "abc");
3296     if (match == -1) {
3297       break;
3298     }
3299 
3300     // Step 14.c.
3301     size_t endIndex = match + sepLength;
3302 
3303     // Step 14.c.i.
3304     if (endIndex == lastEndIndex) {
3305       index++;
3306       continue;
3307     }
3308 
3309     // Step 14.c.ii.
3310     MOZ_ASSERT(lastEndIndex < endIndex);
3311     MOZ_ASSERT(sepLength <= strLength);
3312     MOZ_ASSERT(lastEndIndex + sepLength <= endIndex);
3313 
3314     // Step 14.c.ii.1.
3315     size_t subLength = size_t(endIndex - sepLength - lastEndIndex);
3316     JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength);
3317 
3318     // Steps 14.c.ii.2-4.
3319     if (!sub || !splits.append(StringValue(sub))) {
3320       return nullptr;
3321     }
3322 
3323     // Step 14.c.ii.5.
3324     if (splits.length() == limit) {
3325       return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3326     }
3327 
3328     // Step 14.c.ii.6.
3329     index = endIndex;
3330 
3331     // Step 14.c.ii.7.
3332     lastEndIndex = index;
3333   }
3334 
3335   // Step 15.
3336   JSString* sub =
3337       NewDependentString(cx, str, lastEndIndex, strLength - lastEndIndex);
3338 
3339   // Steps 16-17.
3340   if (!sub || !splits.append(StringValue(sub))) {
3341     return nullptr;
3342   }
3343 
3344   // Step 18.
3345   return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3346 }
3347 
3348 // Fast-path for splitting a string into a character array via split("").
CharSplitHelper(JSContext * cx,HandleLinearString str,uint32_t limit)3349 static ArrayObject* CharSplitHelper(JSContext* cx, HandleLinearString str,
3350                                     uint32_t limit) {
3351   size_t strLength = str->length();
3352   if (strLength == 0) {
3353     return NewDenseEmptyArray(cx);
3354   }
3355 
3356   js::StaticStrings& staticStrings = cx->staticStrings();
3357   uint32_t resultlen = (limit < strLength ? limit : strLength);
3358   MOZ_ASSERT(limit > 0 && resultlen > 0,
3359              "Neither limit nor strLength is zero, so resultlen is greater "
3360              "than zero.");
3361 
3362   RootedArrayObject splits(cx, NewDenseFullyAllocatedArray(cx, resultlen));
3363   if (!splits) {
3364     return nullptr;
3365   }
3366 
3367   if (str->hasLatin1Chars()) {
3368     splits->setDenseInitializedLength(resultlen);
3369 
3370     JS::AutoCheckCannotGC nogc;
3371     const Latin1Char* latin1Chars = str->latin1Chars(nogc);
3372     for (size_t i = 0; i < resultlen; ++i) {
3373       Latin1Char c = latin1Chars[i];
3374       MOZ_ASSERT(staticStrings.hasUnit(c));
3375       splits->initDenseElement(i, StringValue(staticStrings.getUnit(c)));
3376     }
3377   } else {
3378     splits->ensureDenseInitializedLength(0, resultlen);
3379 
3380     for (size_t i = 0; i < resultlen; ++i) {
3381       JSString* sub = staticStrings.getUnitStringForElement(cx, str, i);
3382       if (!sub) {
3383         return nullptr;
3384       }
3385       splits->initDenseElement(i, StringValue(sub));
3386     }
3387   }
3388 
3389   return splits;
3390 }
3391 
3392 template <typename TextChar>
SplitSingleCharHelper(JSContext * cx,HandleLinearString str,const TextChar * text,uint32_t textLen,char16_t patCh)3393 static MOZ_ALWAYS_INLINE ArrayObject* SplitSingleCharHelper(
3394     JSContext* cx, HandleLinearString str, const TextChar* text,
3395     uint32_t textLen, char16_t patCh) {
3396   // Count the number of occurrences of patCh within text.
3397   uint32_t count = 0;
3398   for (size_t index = 0; index < textLen; index++) {
3399     if (static_cast<char16_t>(text[index]) == patCh) {
3400       count++;
3401     }
3402   }
3403 
3404   // Handle zero-occurrence case - return input string in an array.
3405   if (count == 0) {
3406     return SingleElementStringArray(cx, str);
3407   }
3408 
3409   // Create the result array for the substring values.
3410   RootedArrayObject splits(cx, NewDenseFullyAllocatedArray(cx, count + 1));
3411   if (!splits) {
3412     return nullptr;
3413   }
3414   splits->ensureDenseInitializedLength(0, count + 1);
3415 
3416   // Add substrings.
3417   uint32_t splitsIndex = 0;
3418   size_t lastEndIndex = 0;
3419   for (size_t index = 0; index < textLen; index++) {
3420     if (static_cast<char16_t>(text[index]) == patCh) {
3421       size_t subLength = size_t(index - lastEndIndex);
3422       JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength);
3423       if (!sub) {
3424         return nullptr;
3425       }
3426       splits->initDenseElement(splitsIndex++, StringValue(sub));
3427       lastEndIndex = index + 1;
3428     }
3429   }
3430 
3431   // Add substring for tail of string (after last match).
3432   JSString* sub =
3433       NewDependentString(cx, str, lastEndIndex, textLen - lastEndIndex);
3434   if (!sub) {
3435     return nullptr;
3436   }
3437   splits->initDenseElement(splitsIndex++, StringValue(sub));
3438 
3439   return splits;
3440 }
3441 
3442 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
SplitSingleCharHelper(JSContext * cx,HandleLinearString str,char16_t ch)3443 static ArrayObject* SplitSingleCharHelper(JSContext* cx, HandleLinearString str,
3444                                           char16_t ch) {
3445   // Step 12.
3446   size_t strLength = str->length();
3447 
3448   AutoStableStringChars linearChars(cx);
3449   if (!linearChars.init(cx, str)) {
3450     return nullptr;
3451   }
3452 
3453   if (linearChars.isLatin1()) {
3454     return SplitSingleCharHelper(cx, str, linearChars.latin1Chars(), strLength,
3455                                  ch);
3456   }
3457 
3458   return SplitSingleCharHelper(cx, str, linearChars.twoByteChars(), strLength,
3459                                ch);
3460 }
3461 
3462 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
StringSplitString(JSContext * cx,HandleString str,HandleString sep,uint32_t limit)3463 ArrayObject* js::StringSplitString(JSContext* cx, HandleString str,
3464                                    HandleString sep, uint32_t limit) {
3465   MOZ_ASSERT(limit > 0, "Only called for strictly positive limit.");
3466 
3467   RootedLinearString linearStr(cx, str->ensureLinear(cx));
3468   if (!linearStr) {
3469     return nullptr;
3470   }
3471 
3472   RootedLinearString linearSep(cx, sep->ensureLinear(cx));
3473   if (!linearSep) {
3474     return nullptr;
3475   }
3476 
3477   if (linearSep->length() == 0) {
3478     return CharSplitHelper(cx, linearStr, limit);
3479   }
3480 
3481   if (linearSep->length() == 1 && limit >= static_cast<uint32_t>(INT32_MAX)) {
3482     char16_t ch = linearSep->latin1OrTwoByteChar(0);
3483     return SplitSingleCharHelper(cx, linearStr, ch);
3484   }
3485 
3486   return SplitHelper(cx, linearStr, limit, linearSep);
3487 }
3488 
3489 static const JSFunctionSpec string_methods[] = {
3490     JS_FN(js_toSource_str, str_toSource, 0, 0),
3491 
3492     /* Java-like methods. */
3493     JS_INLINABLE_FN(js_toString_str, str_toString, 0, 0, StringToString),
3494     JS_INLINABLE_FN(js_valueOf_str, str_toString, 0, 0, StringValueOf),
3495     JS_INLINABLE_FN("toLowerCase", str_toLowerCase, 0, 0, StringToLowerCase),
3496     JS_INLINABLE_FN("toUpperCase", str_toUpperCase, 0, 0, StringToUpperCase),
3497     JS_INLINABLE_FN("charAt", str_charAt, 1, 0, StringCharAt),
3498     JS_INLINABLE_FN("charCodeAt", str_charCodeAt, 1, 0, StringCharCodeAt),
3499     JS_SELF_HOSTED_FN("substring", "String_substring", 2, 0),
3500     JS_SELF_HOSTED_FN("padStart", "String_pad_start", 2, 0),
3501     JS_SELF_HOSTED_FN("padEnd", "String_pad_end", 2, 0),
3502     JS_SELF_HOSTED_FN("codePointAt", "String_codePointAt", 1, 0),
3503     JS_FN("includes", str_includes, 1, 0), JS_FN("indexOf", str_indexOf, 1, 0),
3504     JS_FN("lastIndexOf", str_lastIndexOf, 1, 0),
3505     JS_FN("startsWith", str_startsWith, 1, 0),
3506     JS_FN("endsWith", str_endsWith, 1, 0), JS_FN("trim", str_trim, 0, 0),
3507     JS_FN("trimStart", str_trimStart, 0, 0),
3508     JS_FN("trimEnd", str_trimEnd, 0, 0),
3509 #if JS_HAS_INTL_API
3510     JS_SELF_HOSTED_FN("toLocaleLowerCase", "String_toLocaleLowerCase", 0, 0),
3511     JS_SELF_HOSTED_FN("toLocaleUpperCase", "String_toLocaleUpperCase", 0, 0),
3512     JS_SELF_HOSTED_FN("localeCompare", "String_localeCompare", 1, 0),
3513 #else
3514     JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0, 0),
3515     JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0, 0),
3516     JS_FN("localeCompare", str_localeCompare, 1, 0),
3517 #endif
3518     JS_SELF_HOSTED_FN("repeat", "String_repeat", 1, 0),
3519 #if JS_HAS_INTL_API
3520     JS_FN("normalize", str_normalize, 0, 0),
3521 #endif
3522 
3523     /* Perl-ish methods (search is actually Python-esque). */
3524     JS_SELF_HOSTED_FN("match", "String_match", 1, 0),
3525     JS_SELF_HOSTED_FN("matchAll", "String_matchAll", 1, 0),
3526     JS_SELF_HOSTED_FN("search", "String_search", 1, 0),
3527     JS_SELF_HOSTED_FN("replace", "String_replace", 2, 0),
3528     JS_SELF_HOSTED_FN("replaceAll", "String_replaceAll", 2, 0),
3529     JS_SELF_HOSTED_FN("split", "String_split", 2, 0),
3530     JS_SELF_HOSTED_FN("substr", "String_substr", 2, 0),
3531 
3532     /* Python-esque sequence methods. */
3533     JS_SELF_HOSTED_FN("concat", "String_concat", 1, 0),
3534     JS_SELF_HOSTED_FN("slice", "String_slice", 2, 0),
3535 
3536     JS_SELF_HOSTED_FN("at", "String_at", 1, 0),
3537 
3538     /* HTML string methods. */
3539     JS_SELF_HOSTED_FN("bold", "String_bold", 0, 0),
3540     JS_SELF_HOSTED_FN("italics", "String_italics", 0, 0),
3541     JS_SELF_HOSTED_FN("fixed", "String_fixed", 0, 0),
3542     JS_SELF_HOSTED_FN("strike", "String_strike", 0, 0),
3543     JS_SELF_HOSTED_FN("small", "String_small", 0, 0),
3544     JS_SELF_HOSTED_FN("big", "String_big", 0, 0),
3545     JS_SELF_HOSTED_FN("blink", "String_blink", 0, 0),
3546     JS_SELF_HOSTED_FN("sup", "String_sup", 0, 0),
3547     JS_SELF_HOSTED_FN("sub", "String_sub", 0, 0),
3548     JS_SELF_HOSTED_FN("anchor", "String_anchor", 1, 0),
3549     JS_SELF_HOSTED_FN("link", "String_link", 1, 0),
3550     JS_SELF_HOSTED_FN("fontcolor", "String_fontcolor", 1, 0),
3551     JS_SELF_HOSTED_FN("fontsize", "String_fontsize", 1, 0),
3552 
3553     JS_SELF_HOSTED_SYM_FN(iterator, "String_iterator", 0, 0), JS_FS_END};
3554 
3555 // ES6 rev 27 (2014 Aug 24) 21.1.1
StringConstructor(JSContext * cx,unsigned argc,Value * vp)3556 bool js::StringConstructor(JSContext* cx, unsigned argc, Value* vp) {
3557   CallArgs args = CallArgsFromVp(argc, vp);
3558 
3559   RootedString str(cx);
3560   if (args.length() > 0) {
3561     if (!args.isConstructing() && args[0].isSymbol()) {
3562       return js::SymbolDescriptiveString(cx, args[0].toSymbol(), args.rval());
3563     }
3564 
3565     str = ToString<CanGC>(cx, args[0]);
3566     if (!str) {
3567       return false;
3568     }
3569   } else {
3570     str = cx->runtime()->emptyString;
3571   }
3572 
3573   if (args.isConstructing()) {
3574     RootedObject proto(cx);
3575     if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_String, &proto)) {
3576       return false;
3577     }
3578 
3579     StringObject* strobj = StringObject::create(cx, str, proto);
3580     if (!strobj) {
3581       return false;
3582     }
3583     args.rval().setObject(*strobj);
3584     return true;
3585   }
3586 
3587   args.rval().setString(str);
3588   return true;
3589 }
3590 
str_fromCharCode(JSContext * cx,unsigned argc,Value * vp)3591 bool js::str_fromCharCode(JSContext* cx, unsigned argc, Value* vp) {
3592   CallArgs args = CallArgsFromVp(argc, vp);
3593 
3594   MOZ_ASSERT(args.length() <= ARGS_LENGTH_MAX);
3595 
3596   // Optimize the single-char case.
3597   if (args.length() == 1) {
3598     return str_fromCharCode_one_arg(cx, args[0], args.rval());
3599   }
3600 
3601   // Optimize the case where the result will definitely fit in an inline
3602   // string (thin or fat) and so we don't need to malloc the chars. (We could
3603   // cover some cases where args.length() goes up to
3604   // JSFatInlineString::MAX_LENGTH_LATIN1 if we also checked if the chars are
3605   // all Latin-1, but it doesn't seem worth the effort.)
3606   InlineCharBuffer<char16_t> chars;
3607   if (!chars.maybeAlloc(cx, args.length())) {
3608     return false;
3609   }
3610 
3611   char16_t* rawChars = chars.get();
3612   for (unsigned i = 0; i < args.length(); i++) {
3613     uint16_t code;
3614     if (!ToUint16(cx, args[i], &code)) {
3615       return false;
3616     }
3617 
3618     rawChars[i] = char16_t(code);
3619   }
3620 
3621   JSString* str = chars.toString(cx, args.length());
3622   if (!str) {
3623     return false;
3624   }
3625 
3626   args.rval().setString(str);
3627   return true;
3628 }
3629 
CodeUnitToString(JSContext * cx,uint16_t ucode,MutableHandleValue rval)3630 static inline bool CodeUnitToString(JSContext* cx, uint16_t ucode,
3631                                     MutableHandleValue rval) {
3632   if (StaticStrings::hasUnit(ucode)) {
3633     rval.setString(cx->staticStrings().getUnit(ucode));
3634     return true;
3635   }
3636 
3637   char16_t c = char16_t(ucode);
3638   JSString* str = NewStringCopyNDontDeflate<CanGC>(cx, &c, 1);
3639   if (!str) {
3640     return false;
3641   }
3642 
3643   rval.setString(str);
3644   return true;
3645 }
3646 
str_fromCharCode_one_arg(JSContext * cx,HandleValue code,MutableHandleValue rval)3647 bool js::str_fromCharCode_one_arg(JSContext* cx, HandleValue code,
3648                                   MutableHandleValue rval) {
3649   uint16_t ucode;
3650 
3651   if (!ToUint16(cx, code, &ucode)) {
3652     return false;
3653   }
3654 
3655   return CodeUnitToString(cx, ucode, rval);
3656 }
3657 
ToCodePoint(JSContext * cx,HandleValue code,uint32_t * codePoint)3658 static MOZ_ALWAYS_INLINE bool ToCodePoint(JSContext* cx, HandleValue code,
3659                                           uint32_t* codePoint) {
3660   // String.fromCodePoint, Steps 5.a-b.
3661 
3662   // Fast path for the common case - the input is already an int32.
3663   if (code.isInt32()) {
3664     int32_t nextCP = code.toInt32();
3665     if (nextCP >= 0 && nextCP <= int32_t(unicode::NonBMPMax)) {
3666       *codePoint = uint32_t(nextCP);
3667       return true;
3668     }
3669   }
3670 
3671   double nextCP;
3672   if (!ToNumber(cx, code, &nextCP)) {
3673     return false;
3674   }
3675 
3676   // String.fromCodePoint, Steps 5.c-d.
3677   if (JS::ToInteger(nextCP) != nextCP || nextCP < 0 ||
3678       nextCP > unicode::NonBMPMax) {
3679     ToCStringBuf cbuf;
3680     if (const char* numStr = NumberToCString(cx, &cbuf, nextCP)) {
3681       JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
3682                                 JSMSG_NOT_A_CODEPOINT, numStr);
3683     }
3684     return false;
3685   }
3686 
3687   *codePoint = uint32_t(nextCP);
3688   return true;
3689 }
3690 
str_fromCodePoint_one_arg(JSContext * cx,HandleValue code,MutableHandleValue rval)3691 bool js::str_fromCodePoint_one_arg(JSContext* cx, HandleValue code,
3692                                    MutableHandleValue rval) {
3693   // Steps 1-4 (omitted).
3694 
3695   // Steps 5.a-d.
3696   uint32_t codePoint;
3697   if (!ToCodePoint(cx, code, &codePoint)) {
3698     return false;
3699   }
3700 
3701   // Steps 5.e, 6.
3702   if (!unicode::IsSupplementary(codePoint)) {
3703     return CodeUnitToString(cx, uint16_t(codePoint), rval);
3704   }
3705 
3706   char16_t chars[] = {unicode::LeadSurrogate(codePoint),
3707                       unicode::TrailSurrogate(codePoint)};
3708   JSString* str = NewStringCopyNDontDeflate<CanGC>(cx, chars, 2);
3709   if (!str) {
3710     return false;
3711   }
3712 
3713   rval.setString(str);
3714   return true;
3715 }
3716 
str_fromCodePoint_few_args(JSContext * cx,const CallArgs & args)3717 static bool str_fromCodePoint_few_args(JSContext* cx, const CallArgs& args) {
3718   MOZ_ASSERT(args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2);
3719 
3720   // Steps 1-2 (omitted).
3721 
3722   // Step 3.
3723   char16_t elements[JSFatInlineString::MAX_LENGTH_TWO_BYTE];
3724 
3725   // Steps 4-5.
3726   unsigned length = 0;
3727   for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) {
3728     // Steps 5.a-d.
3729     uint32_t codePoint;
3730     if (!ToCodePoint(cx, args[nextIndex], &codePoint)) {
3731       return false;
3732     }
3733 
3734     // Step 5.e.
3735     unicode::UTF16Encode(codePoint, elements, &length);
3736   }
3737 
3738   // Step 6.
3739   JSString* str = NewStringCopyN<CanGC>(cx, elements, length);
3740   if (!str) {
3741     return false;
3742   }
3743 
3744   args.rval().setString(str);
3745   return true;
3746 }
3747 
3748 // ES2017 draft rev 40edb3a95a475c1b251141ac681b8793129d9a6d
3749 // 21.1.2.2 String.fromCodePoint(...codePoints)
str_fromCodePoint(JSContext * cx,unsigned argc,Value * vp)3750 bool js::str_fromCodePoint(JSContext* cx, unsigned argc, Value* vp) {
3751   CallArgs args = CallArgsFromVp(argc, vp);
3752 
3753   // Optimize the single code-point case.
3754   if (args.length() == 1) {
3755     return str_fromCodePoint_one_arg(cx, args[0], args.rval());
3756   }
3757 
3758   // Optimize the case where the result will definitely fit in an inline
3759   // string (thin or fat) and so we don't need to malloc the chars. (We could
3760   // cover some cases where |args.length()| goes up to
3761   // JSFatInlineString::MAX_LENGTH_LATIN1 / 2 if we also checked if the chars
3762   // are all Latin-1, but it doesn't seem worth the effort.)
3763   if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2) {
3764     return str_fromCodePoint_few_args(cx, args);
3765   }
3766 
3767   // Steps 1-2 (omitted).
3768 
3769   // Step 3.
3770   static_assert(
3771       ARGS_LENGTH_MAX < std::numeric_limits<decltype(args.length())>::max() / 2,
3772       "|args.length() * 2| does not overflow");
3773   auto elements = cx->make_pod_arena_array<char16_t>(js::StringBufferArena,
3774                                                      args.length() * 2);
3775   if (!elements) {
3776     return false;
3777   }
3778 
3779   // Steps 4-5.
3780   unsigned length = 0;
3781   for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) {
3782     // Steps 5.a-d.
3783     uint32_t codePoint;
3784     if (!ToCodePoint(cx, args[nextIndex], &codePoint)) {
3785       return false;
3786     }
3787 
3788     // Step 5.e.
3789     unicode::UTF16Encode(codePoint, elements.get(), &length);
3790   }
3791 
3792   // Step 6.
3793   JSString* str = NewString<CanGC>(cx, std::move(elements), length);
3794   if (!str) {
3795     return false;
3796   }
3797 
3798   args.rval().setString(str);
3799   return true;
3800 }
3801 
3802 static const JSFunctionSpec string_static_methods[] = {
3803     JS_INLINABLE_FN("fromCharCode", js::str_fromCharCode, 1, 0,
3804                     StringFromCharCode),
3805     JS_INLINABLE_FN("fromCodePoint", js::str_fromCodePoint, 1, 0,
3806                     StringFromCodePoint),
3807 
3808     JS_SELF_HOSTED_FN("raw", "String_static_raw", 1, 0), JS_FS_END};
3809 
3810 /* static */
assignInitialShape(JSContext * cx,Handle<StringObject * > obj)3811 Shape* StringObject::assignInitialShape(JSContext* cx,
3812                                         Handle<StringObject*> obj) {
3813   MOZ_ASSERT(obj->empty());
3814 
3815   if (!NativeObject::addPropertyInReservedSlot(cx, obj, cx->names().length,
3816                                                LENGTH_SLOT, {})) {
3817     return nullptr;
3818   }
3819 
3820   return obj->shape();
3821 }
3822 
createPrototype(JSContext * cx,JSProtoKey key)3823 JSObject* StringObject::createPrototype(JSContext* cx, JSProtoKey key) {
3824   Rooted<JSString*> empty(cx, cx->runtime()->emptyString);
3825   Rooted<StringObject*> proto(
3826       cx, GlobalObject::createBlankPrototype<StringObject>(cx, cx->global()));
3827   if (!proto) {
3828     return nullptr;
3829   }
3830   if (!StringObject::init(cx, proto, empty)) {
3831     return nullptr;
3832   }
3833   return proto;
3834 }
3835 
StringClassFinish(JSContext * cx,HandleObject ctor,HandleObject proto)3836 static bool StringClassFinish(JSContext* cx, HandleObject ctor,
3837                               HandleObject proto) {
3838   HandleNativeObject nativeProto = proto.as<NativeObject>();
3839 
3840   // Create "trimLeft" as an alias for "trimStart".
3841   RootedValue trimFn(cx);
3842   RootedId trimId(cx, NameToId(cx->names().trimStart));
3843   RootedId trimAliasId(cx, NameToId(cx->names().trimLeft));
3844   if (!NativeGetProperty(cx, nativeProto, trimId, &trimFn) ||
3845       !NativeDefineDataProperty(cx, nativeProto, trimAliasId, trimFn, 0)) {
3846     return false;
3847   }
3848 
3849   // Create "trimRight" as an alias for "trimEnd".
3850   trimId = NameToId(cx->names().trimEnd);
3851   trimAliasId = NameToId(cx->names().trimRight);
3852   if (!NativeGetProperty(cx, nativeProto, trimId, &trimFn) ||
3853       !NativeDefineDataProperty(cx, nativeProto, trimAliasId, trimFn, 0)) {
3854     return false;
3855   }
3856 
3857   /*
3858    * Define escape/unescape, the URI encode/decode functions, and maybe
3859    * uneval on the global object.
3860    */
3861   if (!JS_DefineFunctions(cx, cx->global(), string_functions)) {
3862     return false;
3863   }
3864 
3865   return true;
3866 }
3867 
3868 const ClassSpec StringObject::classSpec_ = {
3869     GenericCreateConstructor<StringConstructor, 1, gc::AllocKind::FUNCTION,
3870                              &jit::JitInfo_String>,
3871     StringObject::createPrototype,
3872     string_static_methods,
3873     nullptr,
3874     string_methods,
3875     nullptr,
3876     StringClassFinish};
3877 
3878 #define ____ false
3879 
3880 /*
3881  * Uri reserved chars + #:
3882  * - 35: #
3883  * - 36: $
3884  * - 38: &
3885  * - 43: +
3886  * - 44: ,
3887  * - 47: /
3888  * - 58: :
3889  * - 59: ;
3890  * - 61: =
3891  * - 63: ?
3892  * - 64: @
3893  */
3894 static const bool js_isUriReservedPlusPound[] = {
3895     // clang-format off
3896 /*       0     1     2     3     4     5     6     7     8     9  */
3897 /*  0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3898 /*  1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3899 /*  2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3900 /*  3 */ ____, ____, ____, ____, ____, true, true, ____, true, ____,
3901 /*  4 */ ____, ____, ____, true, true, ____, ____, true, ____, ____,
3902 /*  5 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true,
3903 /*  6 */ ____, true, ____, true, true, ____, ____, ____, ____, ____,
3904 /*  7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3905 /*  8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3906 /*  9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3907 /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3908 /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3909 /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
3910     // clang-format on
3911 };
3912 
3913 /*
3914  * Uri unescaped chars:
3915  * -      33: !
3916  * -      39: '
3917  * -      40: (
3918  * -      41: )
3919  * -      42: *
3920  * -      45: -
3921  * -      46: .
3922  * -  48..57: 0-9
3923  * -  65..90: A-Z
3924  * -      95: _
3925  * - 97..122: a-z
3926  * -     126: ~
3927  */
3928 static const bool js_isUriUnescaped[] = {
3929     // clang-format off
3930 /*       0     1     2     3     4     5     6     7     8     9  */
3931 /*  0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3932 /*  1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3933 /*  2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3934 /*  3 */ ____, ____, ____, true, ____, ____, ____, ____, ____, true,
3935 /*  4 */ true, true, true, ____, ____, true, true, ____, true, true,
3936 /*  5 */ true, true, true, true, true, true, true, true, ____, ____,
3937 /*  6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
3938 /*  7 */ true, true, true, true, true, true, true, true, true, true,
3939 /*  8 */ true, true, true, true, true, true, true, true, true, true,
3940 /*  9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
3941 /* 10 */ true, true, true, true, true, true, true, true, true, true,
3942 /* 11 */ true, true, true, true, true, true, true, true, true, true,
3943 /* 12 */ true, true, true, ____, ____, ____, true, ____
3944     // clang-format on
3945 };
3946 
3947 #undef ____
3948 
TransferBufferToString(JSStringBuilder & sb,JSString * str,MutableHandleValue rval)3949 static inline bool TransferBufferToString(JSStringBuilder& sb, JSString* str,
3950                                           MutableHandleValue rval) {
3951   if (!sb.empty()) {
3952     str = sb.finishString();
3953     if (!str) {
3954       return false;
3955     }
3956   }
3957   rval.setString(str);
3958   return true;
3959 }
3960 
3961 /*
3962  * ECMA 3, 15.1.3 URI Handling Function Properties
3963  *
3964  * The following are implementations of the algorithms
3965  * given in the ECMA specification for the hidden functions
3966  * 'Encode' and 'Decode'.
3967  */
3968 enum EncodeResult { Encode_Failure, Encode_BadUri, Encode_Success };
3969 
3970 // Bug 1403318: GCC sometimes inlines this Encode function rather than the
3971 // caller Encode function. Annotate both functions with MOZ_NEVER_INLINE resp.
3972 // MOZ_ALWAYS_INLINE to ensure we get the desired inlining behavior.
3973 template <typename CharT>
Encode(StringBuffer & sb,const CharT * chars,size_t length,const bool * unescapedSet)3974 static MOZ_NEVER_INLINE EncodeResult Encode(StringBuffer& sb,
3975                                             const CharT* chars, size_t length,
3976                                             const bool* unescapedSet) {
3977   Latin1Char hexBuf[3];
3978   hexBuf[0] = '%';
3979 
3980   auto appendEncoded = [&sb, &hexBuf](Latin1Char c) {
3981     static const char HexDigits[] = "0123456789ABCDEF"; /* NB: uppercase */
3982 
3983     hexBuf[1] = HexDigits[c >> 4];
3984     hexBuf[2] = HexDigits[c & 0xf];
3985     return sb.append(hexBuf, 3);
3986   };
3987 
3988   auto appendRange = [&sb, chars, length](size_t start, size_t end) {
3989     MOZ_ASSERT(start <= end);
3990 
3991     if (start < end) {
3992       if (start == 0) {
3993         if (!sb.reserve(length)) {
3994           return false;
3995         }
3996       }
3997       return sb.append(chars + start, chars + end);
3998     }
3999     return true;
4000   };
4001 
4002   size_t startAppend = 0;
4003   for (size_t k = 0; k < length; k++) {
4004     CharT c = chars[k];
4005     if (c < 128 &&
4006         (js_isUriUnescaped[c] || (unescapedSet && unescapedSet[c]))) {
4007       continue;
4008     } else {
4009       if (!appendRange(startAppend, k)) {
4010         return Encode_Failure;
4011       }
4012 
4013       if constexpr (std::is_same_v<CharT, Latin1Char>) {
4014         if (c < 0x80) {
4015           if (!appendEncoded(c)) {
4016             return Encode_Failure;
4017           }
4018         } else {
4019           if (!appendEncoded(0xC0 | (c >> 6)) ||
4020               !appendEncoded(0x80 | (c & 0x3F))) {
4021             return Encode_Failure;
4022           }
4023         }
4024       } else {
4025         if (unicode::IsTrailSurrogate(c)) {
4026           return Encode_BadUri;
4027         }
4028 
4029         uint32_t v;
4030         if (!unicode::IsLeadSurrogate(c)) {
4031           v = c;
4032         } else {
4033           k++;
4034           if (k == length) {
4035             return Encode_BadUri;
4036           }
4037 
4038           char16_t c2 = chars[k];
4039           if (!unicode::IsTrailSurrogate(c2)) {
4040             return Encode_BadUri;
4041           }
4042 
4043           v = unicode::UTF16Decode(c, c2);
4044         }
4045 
4046         uint8_t utf8buf[4];
4047         size_t L = OneUcs4ToUtf8Char(utf8buf, v);
4048         for (size_t j = 0; j < L; j++) {
4049           if (!appendEncoded(utf8buf[j])) {
4050             return Encode_Failure;
4051           }
4052         }
4053       }
4054 
4055       startAppend = k + 1;
4056     }
4057   }
4058 
4059   if (startAppend > 0) {
4060     if (!appendRange(startAppend, length)) {
4061       return Encode_Failure;
4062     }
4063   }
4064 
4065   return Encode_Success;
4066 }
4067 
Encode(JSContext * cx,HandleLinearString str,const bool * unescapedSet,MutableHandleValue rval)4068 static MOZ_ALWAYS_INLINE bool Encode(JSContext* cx, HandleLinearString str,
4069                                      const bool* unescapedSet,
4070                                      MutableHandleValue rval) {
4071   size_t length = str->length();
4072   if (length == 0) {
4073     rval.setString(cx->runtime()->emptyString);
4074     return true;
4075   }
4076 
4077   JSStringBuilder sb(cx);
4078 
4079   EncodeResult res;
4080   if (str->hasLatin1Chars()) {
4081     AutoCheckCannotGC nogc;
4082     res = Encode(sb, str->latin1Chars(nogc), str->length(), unescapedSet);
4083   } else {
4084     AutoCheckCannotGC nogc;
4085     res = Encode(sb, str->twoByteChars(nogc), str->length(), unescapedSet);
4086   }
4087 
4088   if (res == Encode_Failure) {
4089     return false;
4090   }
4091 
4092   if (res == Encode_BadUri) {
4093     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
4094     return false;
4095   }
4096 
4097   MOZ_ASSERT(res == Encode_Success);
4098   return TransferBufferToString(sb, str, rval);
4099 }
4100 
4101 enum DecodeResult { Decode_Failure, Decode_BadUri, Decode_Success };
4102 
4103 template <typename CharT>
Decode(StringBuffer & sb,const CharT * chars,size_t length,const bool * reservedSet)4104 static DecodeResult Decode(StringBuffer& sb, const CharT* chars, size_t length,
4105                            const bool* reservedSet) {
4106   auto appendRange = [&sb, chars](size_t start, size_t end) {
4107     MOZ_ASSERT(start <= end);
4108 
4109     if (start < end) {
4110       return sb.append(chars + start, chars + end);
4111     }
4112     return true;
4113   };
4114 
4115   size_t startAppend = 0;
4116   for (size_t k = 0; k < length; k++) {
4117     CharT c = chars[k];
4118     if (c == '%') {
4119       size_t start = k;
4120       if ((k + 2) >= length) {
4121         return Decode_BadUri;
4122       }
4123 
4124       if (!IsAsciiHexDigit(chars[k + 1]) || !IsAsciiHexDigit(chars[k + 2])) {
4125         return Decode_BadUri;
4126       }
4127 
4128       uint32_t B = AsciiAlphanumericToNumber(chars[k + 1]) * 16 +
4129                    AsciiAlphanumericToNumber(chars[k + 2]);
4130       k += 2;
4131       if (B < 128) {
4132         Latin1Char ch = Latin1Char(B);
4133         if (reservedSet && reservedSet[ch]) {
4134           continue;
4135         }
4136 
4137         if (!appendRange(startAppend, start)) {
4138           return Decode_Failure;
4139         }
4140         if (!sb.append(ch)) {
4141           return Decode_Failure;
4142         }
4143       } else {
4144         int n = 1;
4145         while (B & (0x80 >> n)) {
4146           n++;
4147         }
4148 
4149         if (n == 1 || n > 4) {
4150           return Decode_BadUri;
4151         }
4152 
4153         uint8_t octets[4];
4154         octets[0] = (uint8_t)B;
4155         if (k + 3 * (n - 1) >= length) {
4156           return Decode_BadUri;
4157         }
4158 
4159         for (int j = 1; j < n; j++) {
4160           k++;
4161           if (chars[k] != '%') {
4162             return Decode_BadUri;
4163           }
4164 
4165           if (!IsAsciiHexDigit(chars[k + 1]) ||
4166               !IsAsciiHexDigit(chars[k + 2])) {
4167             return Decode_BadUri;
4168           }
4169 
4170           B = AsciiAlphanumericToNumber(chars[k + 1]) * 16 +
4171               AsciiAlphanumericToNumber(chars[k + 2]);
4172           if ((B & 0xC0) != 0x80) {
4173             return Decode_BadUri;
4174           }
4175 
4176           k += 2;
4177           octets[j] = char(B);
4178         }
4179 
4180         if (!appendRange(startAppend, start)) {
4181           return Decode_Failure;
4182         }
4183 
4184         uint32_t v = JS::Utf8ToOneUcs4Char(octets, n);
4185         MOZ_ASSERT(v >= 128);
4186         if (v >= unicode::NonBMPMin) {
4187           if (v > unicode::NonBMPMax) {
4188             return Decode_BadUri;
4189           }
4190 
4191           if (!sb.append(unicode::LeadSurrogate(v))) {
4192             return Decode_Failure;
4193           }
4194           if (!sb.append(unicode::TrailSurrogate(v))) {
4195             return Decode_Failure;
4196           }
4197         } else {
4198           if (!sb.append(char16_t(v))) {
4199             return Decode_Failure;
4200           }
4201         }
4202       }
4203 
4204       startAppend = k + 1;
4205     }
4206   }
4207 
4208   if (startAppend > 0) {
4209     if (!appendRange(startAppend, length)) {
4210       return Decode_Failure;
4211     }
4212   }
4213 
4214   return Decode_Success;
4215 }
4216 
Decode(JSContext * cx,HandleLinearString str,const bool * reservedSet,MutableHandleValue rval)4217 static bool Decode(JSContext* cx, HandleLinearString str,
4218                    const bool* reservedSet, MutableHandleValue rval) {
4219   size_t length = str->length();
4220   if (length == 0) {
4221     rval.setString(cx->runtime()->emptyString);
4222     return true;
4223   }
4224 
4225   JSStringBuilder sb(cx);
4226 
4227   DecodeResult res;
4228   if (str->hasLatin1Chars()) {
4229     AutoCheckCannotGC nogc;
4230     res = Decode(sb, str->latin1Chars(nogc), str->length(), reservedSet);
4231   } else {
4232     AutoCheckCannotGC nogc;
4233     res = Decode(sb, str->twoByteChars(nogc), str->length(), reservedSet);
4234   }
4235 
4236   if (res == Decode_Failure) {
4237     return false;
4238   }
4239 
4240   if (res == Decode_BadUri) {
4241     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
4242     return false;
4243   }
4244 
4245   MOZ_ASSERT(res == Decode_Success);
4246   return TransferBufferToString(sb, str, rval);
4247 }
4248 
str_decodeURI(JSContext * cx,unsigned argc,Value * vp)4249 static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp) {
4250   CallArgs args = CallArgsFromVp(argc, vp);
4251   RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
4252   if (!str) {
4253     return false;
4254   }
4255 
4256   return Decode(cx, str, js_isUriReservedPlusPound, args.rval());
4257 }
4258 
str_decodeURI_Component(JSContext * cx,unsigned argc,Value * vp)4259 static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp) {
4260   CallArgs args = CallArgsFromVp(argc, vp);
4261   RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
4262   if (!str) {
4263     return false;
4264   }
4265 
4266   return Decode(cx, str, nullptr, args.rval());
4267 }
4268 
str_encodeURI(JSContext * cx,unsigned argc,Value * vp)4269 static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp) {
4270   CallArgs args = CallArgsFromVp(argc, vp);
4271   RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
4272   if (!str) {
4273     return false;
4274   }
4275 
4276   return Encode(cx, str, js_isUriReservedPlusPound, args.rval());
4277 }
4278 
str_encodeURI_Component(JSContext * cx,unsigned argc,Value * vp)4279 static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp) {
4280   CallArgs args = CallArgsFromVp(argc, vp);
4281   RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
4282   if (!str) {
4283     return false;
4284   }
4285 
4286   return Encode(cx, str, nullptr, args.rval());
4287 }
4288 
EncodeURI(JSContext * cx,const char * chars,size_t length)4289 JSString* js::EncodeURI(JSContext* cx, const char* chars, size_t length) {
4290   JSStringBuilder sb(cx);
4291   EncodeResult result = Encode(sb, reinterpret_cast<const Latin1Char*>(chars),
4292                                length, js_isUriReservedPlusPound);
4293   if (result == EncodeResult::Encode_Failure) {
4294     return nullptr;
4295   }
4296   if (result == EncodeResult::Encode_BadUri) {
4297     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
4298     return nullptr;
4299   }
4300   if (sb.empty()) {
4301     return NewStringCopyN<CanGC>(cx, chars, length);
4302   }
4303   return sb.finishString();
4304 }
4305 
FlatStringMatchHelper(JSContext * cx,HandleString str,HandleString pattern,bool * isFlat,int32_t * match)4306 static bool FlatStringMatchHelper(JSContext* cx, HandleString str,
4307                                   HandleString pattern, bool* isFlat,
4308                                   int32_t* match) {
4309   RootedLinearString linearPattern(cx, pattern->ensureLinear(cx));
4310   if (!linearPattern) {
4311     return false;
4312   }
4313 
4314   static const size_t MAX_FLAT_PAT_LEN = 256;
4315   if (linearPattern->length() > MAX_FLAT_PAT_LEN ||
4316       StringHasRegExpMetaChars(linearPattern)) {
4317     *isFlat = false;
4318     return true;
4319   }
4320 
4321   *isFlat = true;
4322   if (str->isRope()) {
4323     if (!RopeMatch(cx, &str->asRope(), linearPattern, match)) {
4324       return false;
4325     }
4326   } else {
4327     *match = StringMatch(&str->asLinear(), linearPattern);
4328   }
4329 
4330   return true;
4331 }
4332 
BuildFlatMatchArray(JSContext * cx,HandleString str,HandleString pattern,int32_t match,MutableHandleValue rval)4333 static bool BuildFlatMatchArray(JSContext* cx, HandleString str,
4334                                 HandleString pattern, int32_t match,
4335                                 MutableHandleValue rval) {
4336   if (match < 0) {
4337     rval.setNull();
4338     return true;
4339   }
4340 
4341   // Get the templateObject that defines the shape and type of the output
4342   // object.
4343   ArrayObject* templateObject =
4344       cx->realm()->regExps.getOrCreateMatchResultTemplateObject(cx);
4345   if (!templateObject) {
4346     return false;
4347   }
4348 
4349   RootedArrayObject arr(
4350       cx, NewDenseFullyAllocatedArrayWithTemplate(cx, 1, templateObject));
4351   if (!arr) {
4352     return false;
4353   }
4354 
4355   // Store a Value for each pair.
4356   arr->setDenseInitializedLength(1);
4357   arr->initDenseElement(0, StringValue(pattern));
4358 
4359   // Set the |index| property. (TemplateObject positions it in slot 0).
4360   arr->setSlot(0, Int32Value(match));
4361 
4362   // Set the |input| property. (TemplateObject positions it in slot 1).
4363   arr->setSlot(1, StringValue(str));
4364 
4365 #ifdef DEBUG
4366   RootedValue test(cx);
4367   RootedId id(cx, NameToId(cx->names().index));
4368   if (!NativeGetProperty(cx, arr, id, &test)) {
4369     return false;
4370   }
4371   MOZ_ASSERT(test == arr->getSlot(0));
4372   id = NameToId(cx->names().input);
4373   if (!NativeGetProperty(cx, arr, id, &test)) {
4374     return false;
4375   }
4376   MOZ_ASSERT(test == arr->getSlot(1));
4377 #endif
4378 
4379   rval.setObject(*arr);
4380   return true;
4381 }
4382 
4383 #ifdef DEBUG
CallIsStringOptimizable(JSContext * cx,const char * name,bool * result)4384 static bool CallIsStringOptimizable(JSContext* cx, const char* name,
4385                                     bool* result) {
4386   FixedInvokeArgs<0> args(cx);
4387 
4388   RootedValue rval(cx);
4389   if (!CallSelfHostedFunction(cx, name, UndefinedHandleValue, args, &rval)) {
4390     return false;
4391   }
4392 
4393   *result = rval.toBoolean();
4394   return true;
4395 }
4396 #endif
4397 
FlatStringMatch(JSContext * cx,unsigned argc,Value * vp)4398 bool js::FlatStringMatch(JSContext* cx, unsigned argc, Value* vp) {
4399   CallArgs args = CallArgsFromVp(argc, vp);
4400   MOZ_ASSERT(args.length() == 2);
4401   MOZ_ASSERT(args[0].isString());
4402   MOZ_ASSERT(args[1].isString());
4403 #ifdef DEBUG
4404   bool isOptimizable = false;
4405   if (!CallIsStringOptimizable(cx, "IsStringMatchOptimizable",
4406                                &isOptimizable)) {
4407     return false;
4408   }
4409   MOZ_ASSERT(isOptimizable);
4410 #endif
4411 
4412   RootedString str(cx, args[0].toString());
4413   RootedString pattern(cx, args[1].toString());
4414 
4415   bool isFlat = false;
4416   int32_t match = 0;
4417   if (!FlatStringMatchHelper(cx, str, pattern, &isFlat, &match)) {
4418     return false;
4419   }
4420 
4421   if (!isFlat) {
4422     args.rval().setUndefined();
4423     return true;
4424   }
4425 
4426   return BuildFlatMatchArray(cx, str, pattern, match, args.rval());
4427 }
4428 
FlatStringSearch(JSContext * cx,unsigned argc,Value * vp)4429 bool js::FlatStringSearch(JSContext* cx, unsigned argc, Value* vp) {
4430   CallArgs args = CallArgsFromVp(argc, vp);
4431   MOZ_ASSERT(args.length() == 2);
4432   MOZ_ASSERT(args[0].isString());
4433   MOZ_ASSERT(args[1].isString());
4434 #ifdef DEBUG
4435   bool isOptimizable = false;
4436   if (!CallIsStringOptimizable(cx, "IsStringSearchOptimizable",
4437                                &isOptimizable)) {
4438     return false;
4439   }
4440   MOZ_ASSERT(isOptimizable);
4441 #endif
4442 
4443   RootedString str(cx, args[0].toString());
4444   RootedString pattern(cx, args[1].toString());
4445 
4446   bool isFlat = false;
4447   int32_t match = 0;
4448   if (!FlatStringMatchHelper(cx, str, pattern, &isFlat, &match)) {
4449     return false;
4450   }
4451 
4452   if (!isFlat) {
4453     args.rval().setInt32(-2);
4454     return true;
4455   }
4456 
4457   args.rval().setInt32(match);
4458   return true;
4459 }
4460