1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "builtin/String.h"
8 
9 #include "mozilla/Attributes.h"
10 #include "mozilla/CheckedInt.h"
11 #include "mozilla/FloatingPoint.h"
12 #include "mozilla/PodOperations.h"
13 #include "mozilla/Range.h"
14 #include "mozilla/TextUtils.h"
15 
16 #include <algorithm>
17 #include <limits>
18 #include <string.h>
19 #include <type_traits>
20 
21 #include "jsapi.h"
22 #include "jsnum.h"
23 #include "jstypes.h"
24 
25 #include "builtin/Array.h"
26 #include "builtin/Boolean.h"
27 #if JS_HAS_INTL_API
28 #  include "builtin/intl/CommonFunctions.h"
29 #endif
30 #include "builtin/RegExp.h"
31 #include "jit/InlinableNatives.h"
32 #include "js/Conversions.h"
33 #include "js/friend/ErrorMessages.h"  // js::GetErrorMessage, JSMSG_*
34 #include "js/friend/StackLimits.h"    // js::AutoCheckRecursionLimit
35 #if !JS_HAS_INTL_API
36 #  include "js/LocaleSensitive.h"
37 #endif
38 #include "js/PropertySpec.h"
39 #include "js/StableStringChars.h"
40 #include "js/UniquePtr.h"
41 #if JS_HAS_INTL_API
42 #  include "unicode/uchar.h"
43 #  include "unicode/unorm2.h"
44 #  include "unicode/ustring.h"
45 #  include "unicode/utypes.h"
46 #endif
47 #include "util/StringBuffer.h"
48 #include "util/Unicode.h"
49 #include "vm/BytecodeUtil.h"
50 #include "vm/GlobalObject.h"
51 #include "vm/Interpreter.h"
52 #include "vm/JSAtom.h"
53 #include "vm/JSContext.h"
54 #include "vm/JSObject.h"
55 #include "vm/Opcodes.h"
56 #include "vm/Printer.h"
57 #include "vm/RegExpObject.h"
58 #include "vm/RegExpStatics.h"
59 #include "vm/SelfHosting.h"
60 #include "vm/ToSource.h"       // js::ValueToSource
61 #include "vm/WellKnownAtom.h"  // js_*_str
62 
63 #include "vm/InlineCharBuffer-inl.h"
64 #include "vm/Interpreter-inl.h"
65 #include "vm/StringObject-inl.h"
66 #include "vm/StringType-inl.h"
67 
68 using namespace js;
69 
70 using JS::Symbol;
71 using JS::SymbolCode;
72 
73 using mozilla::AsciiAlphanumericToNumber;
74 using mozilla::CheckedInt;
75 using mozilla::IsAsciiHexDigit;
76 using mozilla::IsNaN;
77 using mozilla::PodCopy;
78 using mozilla::RangedPtr;
79 
80 using JS::AutoCheckCannotGC;
81 using JS::AutoStableStringChars;
82 
ArgToLinearString(JSContext * cx,const CallArgs & args,unsigned argno)83 static JSLinearString* ArgToLinearString(JSContext* cx, const CallArgs& args,
84                                          unsigned argno) {
85   if (argno >= args.length()) {
86     return cx->names().undefined;
87   }
88 
89   JSString* str = ToString<CanGC>(cx, args[argno]);
90   if (!str) {
91     return nullptr;
92   }
93 
94   return str->ensureLinear(cx);
95 }
96 
97 /*
98  * Forward declarations for URI encode/decode and helper routines
99  */
100 static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp);
101 
102 static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
103 
104 static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp);
105 
106 static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
107 
108 /*
109  * Global string methods
110  */
111 
112 /* ES5 B.2.1 */
113 template <typename CharT>
Escape(JSContext * cx,const CharT * chars,uint32_t length,InlineCharBuffer<Latin1Char> & newChars,uint32_t * newLengthOut)114 static bool Escape(JSContext* cx, const CharT* chars, uint32_t length,
115                    InlineCharBuffer<Latin1Char>& newChars,
116                    uint32_t* newLengthOut) {
117   // clang-format off
118     static const uint8_t shouldPassThrough[128] = {
119          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
120          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
121          0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,       /*    !"#$%&'()*+,-./  */
122          1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,       /*   0123456789:;<=>?  */
123          1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /*   @ABCDEFGHIJKLMNO  */
124          1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,       /*   PQRSTUVWXYZ[\]^_  */
125          0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /*   `abcdefghijklmno  */
126          1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,       /*   pqrstuvwxyz{\}~  DEL */
127     };
128   // clang-format on
129 
130   /* Take a first pass and see how big the result string will need to be. */
131   uint32_t newLength = length;
132   for (size_t i = 0; i < length; i++) {
133     char16_t ch = chars[i];
134     if (ch < 128 && shouldPassThrough[ch]) {
135       continue;
136     }
137 
138     /*
139      * newlength is incremented below by at most 5 and at this point it must
140      * be a valid string length, so this should never overflow uint32_t.
141      */
142     static_assert(JSString::MAX_LENGTH < UINT32_MAX - 5,
143                   "Adding 5 to valid string length should not overflow");
144 
145     MOZ_ASSERT(newLength <= JSString::MAX_LENGTH);
146 
147     /* The character will be encoded as %XX or %uXXXX. */
148     newLength += (ch < 256) ? 2 : 5;
149 
150     if (MOZ_UNLIKELY(newLength > JSString::MAX_LENGTH)) {
151       ReportAllocationOverflow(cx);
152       return false;
153     }
154   }
155 
156   if (newLength == length) {
157     *newLengthOut = newLength;
158     return true;
159   }
160 
161   if (!newChars.maybeAlloc(cx, newLength)) {
162     return false;
163   }
164 
165   static const char digits[] = "0123456789ABCDEF";
166 
167   Latin1Char* rawNewChars = newChars.get();
168   size_t i, ni;
169   for (i = 0, ni = 0; i < length; i++) {
170     char16_t ch = chars[i];
171     if (ch < 128 && shouldPassThrough[ch]) {
172       rawNewChars[ni++] = ch;
173     } else if (ch < 256) {
174       rawNewChars[ni++] = '%';
175       rawNewChars[ni++] = digits[ch >> 4];
176       rawNewChars[ni++] = digits[ch & 0xF];
177     } else {
178       rawNewChars[ni++] = '%';
179       rawNewChars[ni++] = 'u';
180       rawNewChars[ni++] = digits[ch >> 12];
181       rawNewChars[ni++] = digits[(ch & 0xF00) >> 8];
182       rawNewChars[ni++] = digits[(ch & 0xF0) >> 4];
183       rawNewChars[ni++] = digits[ch & 0xF];
184     }
185   }
186   MOZ_ASSERT(ni == newLength);
187 
188   *newLengthOut = newLength;
189   return true;
190 }
191 
str_escape(JSContext * cx,unsigned argc,Value * vp)192 static bool str_escape(JSContext* cx, unsigned argc, Value* vp) {
193   CallArgs args = CallArgsFromVp(argc, vp);
194 
195   RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
196   if (!str) {
197     return false;
198   }
199 
200   InlineCharBuffer<Latin1Char> newChars;
201   uint32_t newLength = 0;  // initialize to silence GCC warning
202   if (str->hasLatin1Chars()) {
203     AutoCheckCannotGC nogc;
204     if (!Escape(cx, str->latin1Chars(nogc), str->length(), newChars,
205                 &newLength)) {
206       return false;
207     }
208   } else {
209     AutoCheckCannotGC nogc;
210     if (!Escape(cx, str->twoByteChars(nogc), str->length(), newChars,
211                 &newLength)) {
212       return false;
213     }
214   }
215 
216   // Return input if no characters need to be escaped.
217   if (newLength == str->length()) {
218     args.rval().setString(str);
219     return true;
220   }
221 
222   JSString* res = newChars.toString(cx, newLength);
223   if (!res) {
224     return false;
225   }
226 
227   args.rval().setString(res);
228   return true;
229 }
230 
231 template <typename CharT>
Unhex4(const RangedPtr<const CharT> chars,char16_t * result)232 static inline bool Unhex4(const RangedPtr<const CharT> chars,
233                           char16_t* result) {
234   CharT a = chars[0], b = chars[1], c = chars[2], d = chars[3];
235 
236   if (!(IsAsciiHexDigit(a) && IsAsciiHexDigit(b) && IsAsciiHexDigit(c) &&
237         IsAsciiHexDigit(d))) {
238     return false;
239   }
240 
241   char16_t unhex = AsciiAlphanumericToNumber(a);
242   unhex = (unhex << 4) + AsciiAlphanumericToNumber(b);
243   unhex = (unhex << 4) + AsciiAlphanumericToNumber(c);
244   unhex = (unhex << 4) + AsciiAlphanumericToNumber(d);
245   *result = unhex;
246   return true;
247 }
248 
249 template <typename CharT>
Unhex2(const RangedPtr<const CharT> chars,char16_t * result)250 static inline bool Unhex2(const RangedPtr<const CharT> chars,
251                           char16_t* result) {
252   CharT a = chars[0], b = chars[1];
253 
254   if (!(IsAsciiHexDigit(a) && IsAsciiHexDigit(b))) {
255     return false;
256   }
257 
258   *result = (AsciiAlphanumericToNumber(a) << 4) + AsciiAlphanumericToNumber(b);
259   return true;
260 }
261 
262 template <typename CharT>
Unescape(StringBuffer & sb,const mozilla::Range<const CharT> chars)263 static bool Unescape(StringBuffer& sb,
264                      const mozilla::Range<const CharT> chars) {
265   // Step 2.
266   uint32_t length = chars.length();
267 
268   /*
269    * Note that the spec algorithm has been optimized to avoid building
270    * a string in the case where no escapes are present.
271    */
272   bool building = false;
273 
274 #define ENSURE_BUILDING                            \
275   do {                                             \
276     if (!building) {                               \
277       building = true;                             \
278       if (!sb.reserve(length)) return false;       \
279       sb.infallibleAppend(chars.begin().get(), k); \
280     }                                              \
281   } while (false);
282 
283   // Step 4.
284   uint32_t k = 0;
285 
286   // Step 5.
287   while (k < length) {
288     // Step 5.a.
289     char16_t c = chars[k];
290 
291     // Step 5.b.
292     if (c == '%') {
293       static_assert(JSString::MAX_LENGTH < UINT32_MAX - 6,
294                     "String length is not near UINT32_MAX");
295 
296       // Steps 5.b.i-ii.
297       if (k + 6 <= length && chars[k + 1] == 'u') {
298         if (Unhex4(chars.begin() + k + 2, &c)) {
299           ENSURE_BUILDING
300           k += 5;
301         }
302       } else if (k + 3 <= length) {
303         if (Unhex2(chars.begin() + k + 1, &c)) {
304           ENSURE_BUILDING
305           k += 2;
306         }
307       }
308     }
309 
310     // Step 5.c.
311     if (building && !sb.append(c)) {
312       return false;
313     }
314 
315     // Step 5.d.
316     k += 1;
317   }
318 
319   return true;
320 #undef ENSURE_BUILDING
321 }
322 
323 // ES2018 draft rev f83aa38282c2a60c6916ebc410bfdf105a0f6a54
324 // B.2.1.2 unescape ( string )
str_unescape(JSContext * cx,unsigned argc,Value * vp)325 static bool str_unescape(JSContext* cx, unsigned argc, Value* vp) {
326   CallArgs args = CallArgsFromVp(argc, vp);
327 
328   // Step 1.
329   RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
330   if (!str) {
331     return false;
332   }
333 
334   // Step 3.
335   JSStringBuilder sb(cx);
336   if (str->hasTwoByteChars() && !sb.ensureTwoByteChars()) {
337     return false;
338   }
339 
340   // Steps 2, 4-5.
341   if (str->hasLatin1Chars()) {
342     AutoCheckCannotGC nogc;
343     if (!Unescape(sb, str->latin1Range(nogc))) {
344       return false;
345     }
346   } else {
347     AutoCheckCannotGC nogc;
348     if (!Unescape(sb, str->twoByteRange(nogc))) {
349       return false;
350     }
351   }
352 
353   // Step 6.
354   JSLinearString* result;
355   if (!sb.empty()) {
356     result = sb.finishString();
357     if (!result) {
358       return false;
359     }
360   } else {
361     result = str;
362   }
363 
364   args.rval().setString(result);
365   return true;
366 }
367 
str_uneval(JSContext * cx,unsigned argc,Value * vp)368 static bool str_uneval(JSContext* cx, unsigned argc, Value* vp) {
369   CallArgs args = CallArgsFromVp(argc, vp);
370   JSString* str = ValueToSource(cx, args.get(0));
371   if (!str) {
372     return false;
373   }
374 
375   args.rval().setString(str);
376   return true;
377 }
378 
379 static const JSFunctionSpec string_functions[] = {
380     JS_FN(js_escape_str, str_escape, 1, JSPROP_RESOLVING),
381     JS_FN(js_unescape_str, str_unescape, 1, JSPROP_RESOLVING),
382     JS_FN(js_uneval_str, str_uneval, 1, JSPROP_RESOLVING),
383     JS_FN(js_decodeURI_str, str_decodeURI, 1, JSPROP_RESOLVING),
384     JS_FN(js_encodeURI_str, str_encodeURI, 1, JSPROP_RESOLVING),
385     JS_FN(js_decodeURIComponent_str, str_decodeURI_Component, 1,
386           JSPROP_RESOLVING),
387     JS_FN(js_encodeURIComponent_str, str_encodeURI_Component, 1,
388           JSPROP_RESOLVING),
389 
390     JS_FS_END};
391 
392 static const unsigned STRING_ELEMENT_ATTRS =
393     JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
394 
str_enumerate(JSContext * cx,HandleObject obj)395 static bool str_enumerate(JSContext* cx, HandleObject obj) {
396   RootedString str(cx, obj->as<StringObject>().unbox());
397   js::StaticStrings& staticStrings = cx->staticStrings();
398 
399   RootedValue value(cx);
400   for (size_t i = 0, length = str->length(); i < length; i++) {
401     JSString* str1 = staticStrings.getUnitStringForElement(cx, str, i);
402     if (!str1) {
403       return false;
404     }
405     value.setString(str1);
406     if (!DefineDataElement(cx, obj, i, value,
407                            STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) {
408       return false;
409     }
410   }
411 
412   return true;
413 }
414 
str_mayResolve(const JSAtomState &,jsid id,JSObject *)415 static bool str_mayResolve(const JSAtomState&, jsid id, JSObject*) {
416   // str_resolve ignores non-integer ids.
417   return JSID_IS_INT(id);
418 }
419 
str_resolve(JSContext * cx,HandleObject obj,HandleId id,bool * resolvedp)420 static bool str_resolve(JSContext* cx, HandleObject obj, HandleId id,
421                         bool* resolvedp) {
422   if (!JSID_IS_INT(id)) {
423     return true;
424   }
425 
426   RootedString str(cx, obj->as<StringObject>().unbox());
427 
428   int32_t slot = JSID_TO_INT(id);
429   if ((size_t)slot < str->length()) {
430     JSString* str1 =
431         cx->staticStrings().getUnitStringForElement(cx, str, size_t(slot));
432     if (!str1) {
433       return false;
434     }
435     RootedValue value(cx, StringValue(str1));
436     if (!DefineDataElement(cx, obj, uint32_t(slot), value,
437                            STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) {
438       return false;
439     }
440     *resolvedp = true;
441   }
442   return true;
443 }
444 
445 static const JSClassOps StringObjectClassOps = {
446     nullptr,         // addProperty
447     nullptr,         // delProperty
448     str_enumerate,   // enumerate
449     nullptr,         // newEnumerate
450     str_resolve,     // resolve
451     str_mayResolve,  // mayResolve
452     nullptr,         // finalize
453     nullptr,         // call
454     nullptr,         // hasInstance
455     nullptr,         // construct
456     nullptr,         // trace
457 };
458 
459 const JSClass StringObject::class_ = {
460     js_String_str,
461     JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
462         JSCLASS_HAS_CACHED_PROTO(JSProto_String),
463     &StringObjectClassOps, &StringObject::classSpec_};
464 
465 /*
466  * Perform the initial |RequireObjectCoercible(thisv)| and |ToString(thisv)|
467  * from nearly all String.prototype.* functions.
468  */
ToStringForStringFunction(JSContext * cx,const char * funName,HandleValue thisv)469 static MOZ_ALWAYS_INLINE JSString* ToStringForStringFunction(
470     JSContext* cx, const char* funName, HandleValue thisv) {
471   AutoCheckRecursionLimit recursion(cx);
472   if (!recursion.check(cx)) {
473     return nullptr;
474   }
475 
476   if (thisv.isString()) {
477     return thisv.toString();
478   }
479 
480   if (thisv.isObject()) {
481     RootedObject obj(cx, &thisv.toObject());
482     if (obj->is<StringObject>()) {
483       StringObject* nobj = &obj->as<StringObject>();
484       // We have to make sure that the ToPrimitive call from ToString
485       // would be unobservable.
486       if (HasNoToPrimitiveMethodPure(nobj, cx) &&
487           HasNativeMethodPure(nobj, cx->names().toString, str_toString, cx)) {
488         return nobj->unbox();
489       }
490     }
491   } else if (thisv.isNullOrUndefined()) {
492     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
493                               JSMSG_INCOMPATIBLE_PROTO, "String", funName,
494                               thisv.isNull() ? "null" : "undefined");
495     return nullptr;
496   }
497 
498   return ToStringSlow<CanGC>(cx, thisv);
499 }
500 
IsString(HandleValue v)501 MOZ_ALWAYS_INLINE bool IsString(HandleValue v) {
502   return v.isString() || (v.isObject() && v.toObject().is<StringObject>());
503 }
504 
str_toSource_impl(JSContext * cx,const CallArgs & args)505 MOZ_ALWAYS_INLINE bool str_toSource_impl(JSContext* cx, const CallArgs& args) {
506   MOZ_ASSERT(IsString(args.thisv()));
507 
508   JSString* str = ToString<CanGC>(cx, args.thisv());
509   if (!str) {
510     return false;
511   }
512 
513   UniqueChars quoted = QuoteString(cx, str, '"');
514   if (!quoted) {
515     return false;
516   }
517 
518   JSStringBuilder sb(cx);
519   if (!sb.append("(new String(") ||
520       !sb.append(quoted.get(), strlen(quoted.get())) || !sb.append("))")) {
521     return false;
522   }
523 
524   JSString* result = sb.finishString();
525   if (!result) {
526     return false;
527   }
528   args.rval().setString(result);
529   return true;
530 }
531 
str_toSource(JSContext * cx,unsigned argc,Value * vp)532 static bool str_toSource(JSContext* cx, unsigned argc, Value* vp) {
533   CallArgs args = CallArgsFromVp(argc, vp);
534   return CallNonGenericMethod<IsString, str_toSource_impl>(cx, args);
535 }
536 
str_toString_impl(JSContext * cx,const CallArgs & args)537 MOZ_ALWAYS_INLINE bool str_toString_impl(JSContext* cx, const CallArgs& args) {
538   MOZ_ASSERT(IsString(args.thisv()));
539 
540   args.rval().setString(
541       args.thisv().isString()
542           ? args.thisv().toString()
543           : args.thisv().toObject().as<StringObject>().unbox());
544   return true;
545 }
546 
str_toString(JSContext * cx,unsigned argc,Value * vp)547 bool js::str_toString(JSContext* cx, unsigned argc, Value* vp) {
548   CallArgs args = CallArgsFromVp(argc, vp);
549   return CallNonGenericMethod<IsString, str_toString_impl>(cx, args);
550 }
551 
552 /*
553  * Java-like string native methods.
554  */
555 
SubstringKernel(JSContext * cx,HandleString str,int32_t beginInt,int32_t lengthInt)556 JSString* js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt,
557                               int32_t lengthInt) {
558   MOZ_ASSERT(0 <= beginInt);
559   MOZ_ASSERT(0 <= lengthInt);
560   MOZ_ASSERT(uint32_t(beginInt) <= str->length());
561   MOZ_ASSERT(uint32_t(lengthInt) <= str->length() - beginInt);
562 
563   uint32_t begin = beginInt;
564   uint32_t len = lengthInt;
565 
566   /*
567    * Optimization for one level deep ropes.
568    * This is common for the following pattern:
569    *
570    * while() {
571    *   text = text.substr(0, x) + "bla" + text.substr(x)
572    *   test.charCodeAt(x + 1)
573    * }
574    */
575   if (str->isRope()) {
576     JSRope* rope = &str->asRope();
577 
578     /* Substring is totally in leftChild of rope. */
579     if (begin + len <= rope->leftChild()->length()) {
580       return NewDependentString(cx, rope->leftChild(), begin, len);
581     }
582 
583     /* Substring is totally in rightChild of rope. */
584     if (begin >= rope->leftChild()->length()) {
585       begin -= rope->leftChild()->length();
586       return NewDependentString(cx, rope->rightChild(), begin, len);
587     }
588 
589     /*
590      * Requested substring is partly in the left and partly in right child.
591      * Create a rope of substrings for both childs.
592      */
593     MOZ_ASSERT(begin < rope->leftChild()->length() &&
594                begin + len > rope->leftChild()->length());
595 
596     size_t lhsLength = rope->leftChild()->length() - begin;
597     size_t rhsLength = begin + len - rope->leftChild()->length();
598 
599     Rooted<JSRope*> ropeRoot(cx, rope);
600     RootedString lhs(
601         cx, NewDependentString(cx, ropeRoot->leftChild(), begin, lhsLength));
602     if (!lhs) {
603       return nullptr;
604     }
605 
606     RootedString rhs(
607         cx, NewDependentString(cx, ropeRoot->rightChild(), 0, rhsLength));
608     if (!rhs) {
609       return nullptr;
610     }
611 
612     return JSRope::new_<CanGC>(cx, lhs, rhs, len);
613   }
614 
615   return NewDependentString(cx, str, begin, len);
616 }
617 
618 /**
619  * U+03A3 GREEK CAPITAL LETTER SIGMA has two different lower case mappings
620  * depending on its context:
621  * When it's preceded by a cased character and not followed by another cased
622  * character, its lower case form is U+03C2 GREEK SMALL LETTER FINAL SIGMA.
623  * Otherwise its lower case mapping is U+03C3 GREEK SMALL LETTER SIGMA.
624  *
625  * Unicode 9.0, §3.13 Default Case Algorithms
626  */
Final_Sigma(const char16_t * chars,size_t length,size_t index)627 static char16_t Final_Sigma(const char16_t* chars, size_t length,
628                             size_t index) {
629   MOZ_ASSERT(index < length);
630   MOZ_ASSERT(chars[index] == unicode::GREEK_CAPITAL_LETTER_SIGMA);
631   MOZ_ASSERT(unicode::ToLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA) ==
632              unicode::GREEK_SMALL_LETTER_SIGMA);
633 
634 #if JS_HAS_INTL_API
635   // Tell the analysis the BinaryProperty.contains function pointer called by
636   // u_hasBinaryProperty cannot GC.
637   JS::AutoSuppressGCAnalysis nogc;
638 
639   bool precededByCased = false;
640   for (size_t i = index; i > 0;) {
641     char16_t c = chars[--i];
642     uint32_t codePoint = c;
643     if (unicode::IsTrailSurrogate(c) && i > 0) {
644       char16_t lead = chars[i - 1];
645       if (unicode::IsLeadSurrogate(lead)) {
646         codePoint = unicode::UTF16Decode(lead, c);
647         i--;
648       }
649     }
650 
651     // Ignore any characters with the property Case_Ignorable.
652     // NB: We need to skip over all Case_Ignorable characters, even when
653     // they also have the Cased binary property.
654     if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE)) {
655       continue;
656     }
657 
658     precededByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED);
659     break;
660   }
661   if (!precededByCased) {
662     return unicode::GREEK_SMALL_LETTER_SIGMA;
663   }
664 
665   bool followedByCased = false;
666   for (size_t i = index + 1; i < length;) {
667     char16_t c = chars[i++];
668     uint32_t codePoint = c;
669     if (unicode::IsLeadSurrogate(c) && i < length) {
670       char16_t trail = chars[i];
671       if (unicode::IsTrailSurrogate(trail)) {
672         codePoint = unicode::UTF16Decode(c, trail);
673         i++;
674       }
675     }
676 
677     // Ignore any characters with the property Case_Ignorable.
678     // NB: We need to skip over all Case_Ignorable characters, even when
679     // they also have the Cased binary property.
680     if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE)) {
681       continue;
682     }
683 
684     followedByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED);
685     break;
686   }
687   if (!followedByCased) {
688     return unicode::GREEK_SMALL_LETTER_FINAL_SIGMA;
689   }
690 #endif
691 
692   return unicode::GREEK_SMALL_LETTER_SIGMA;
693 }
694 
695 // If |srcLength == destLength| is true, the destination buffer was allocated
696 // with the same size as the source buffer. When we append characters which
697 // have special casing mappings, we test |srcLength == destLength| to decide
698 // if we need to back out and reallocate a sufficiently large destination
699 // buffer. Otherwise the destination buffer was allocated with the correct
700 // size to hold all lower case mapped characters, i.e.
701 // |destLength == ToLowerCaseLength(srcChars, 0, srcLength)| is true.
702 template <typename CharT>
ToLowerCaseImpl(CharT * destChars,const CharT * srcChars,size_t startIndex,size_t srcLength,size_t destLength)703 static size_t ToLowerCaseImpl(CharT* destChars, const CharT* srcChars,
704                               size_t startIndex, size_t srcLength,
705                               size_t destLength) {
706   MOZ_ASSERT(startIndex < srcLength);
707   MOZ_ASSERT(srcLength <= destLength);
708   if constexpr (std::is_same_v<CharT, Latin1Char>) {
709     MOZ_ASSERT(srcLength == destLength);
710   }
711 
712   size_t j = startIndex;
713   for (size_t i = startIndex; i < srcLength; i++) {
714     CharT c = srcChars[i];
715     if constexpr (!std::is_same_v<CharT, Latin1Char>) {
716       if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
717         char16_t trail = srcChars[i + 1];
718         if (unicode::IsTrailSurrogate(trail)) {
719           trail = unicode::ToLowerCaseNonBMPTrail(c, trail);
720           destChars[j++] = c;
721           destChars[j++] = trail;
722           i++;
723           continue;
724         }
725       }
726 
727       // Special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
728       // lowercases to <U+0069 U+0307>.
729       if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
730         // Return if the output buffer is too small.
731         if (srcLength == destLength) {
732           return i;
733         }
734 
735         destChars[j++] = CharT('i');
736         destChars[j++] = CharT(unicode::COMBINING_DOT_ABOVE);
737         continue;
738       }
739 
740       // Special case: U+03A3 GREEK CAPITAL LETTER SIGMA lowercases to
741       // one of two codepoints depending on context.
742       if (c == unicode::GREEK_CAPITAL_LETTER_SIGMA) {
743         destChars[j++] = Final_Sigma(srcChars, srcLength, i);
744         continue;
745       }
746     }
747 
748     c = unicode::ToLowerCase(c);
749     destChars[j++] = c;
750   }
751 
752   MOZ_ASSERT(j == destLength);
753   return srcLength;
754 }
755 
ToLowerCaseLength(const char16_t * chars,size_t startIndex,size_t length)756 static size_t ToLowerCaseLength(const char16_t* chars, size_t startIndex,
757                                 size_t length) {
758   size_t lowerLength = length;
759   for (size_t i = startIndex; i < length; i++) {
760     char16_t c = chars[i];
761 
762     // U+0130 is lowercased to the two-element sequence <U+0069 U+0307>.
763     if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
764       lowerLength += 1;
765     }
766   }
767   return lowerLength;
768 }
769 
770 template <typename CharT>
ToLowerCase(JSContext * cx,JSLinearString * str)771 static JSString* ToLowerCase(JSContext* cx, JSLinearString* str) {
772   // Unlike toUpperCase, toLowerCase has the nice invariant that if the
773   // input is a Latin-1 string, the output is also a Latin-1 string.
774 
775   InlineCharBuffer<CharT> newChars;
776 
777   const size_t length = str->length();
778   size_t resultLength;
779   {
780     AutoCheckCannotGC nogc;
781     const CharT* chars = str->chars<CharT>(nogc);
782 
783     // We don't need extra special casing checks in the loop below,
784     // because U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+03A3
785     // GREEK CAPITAL LETTER SIGMA already have simple lower case mappings.
786     MOZ_ASSERT(unicode::ChangesWhenLowerCased(
787                    unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE),
788                "U+0130 has a simple lower case mapping");
789     MOZ_ASSERT(
790         unicode::ChangesWhenLowerCased(unicode::GREEK_CAPITAL_LETTER_SIGMA),
791         "U+03A3 has a simple lower case mapping");
792 
793     // One element Latin-1 strings can be directly retrieved from the
794     // static strings cache.
795     if constexpr (std::is_same_v<CharT, Latin1Char>) {
796       if (length == 1) {
797         CharT lower = unicode::ToLowerCase(chars[0]);
798         MOZ_ASSERT(StaticStrings::hasUnit(lower));
799 
800         return cx->staticStrings().getUnit(lower);
801       }
802     }
803 
804     // Look for the first character that changes when lowercased.
805     size_t i = 0;
806     for (; i < length; i++) {
807       CharT c = chars[i];
808       if constexpr (!std::is_same_v<CharT, Latin1Char>) {
809         if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
810           CharT trail = chars[i + 1];
811           if (unicode::IsTrailSurrogate(trail)) {
812             if (unicode::ChangesWhenLowerCasedNonBMP(c, trail)) {
813               break;
814             }
815 
816             i++;
817             continue;
818           }
819         }
820       }
821       if (unicode::ChangesWhenLowerCased(c)) {
822         break;
823       }
824     }
825 
826     // If no character needs to change, return the input string.
827     if (i == length) {
828       return str;
829     }
830 
831     resultLength = length;
832     if (!newChars.maybeAlloc(cx, resultLength)) {
833       return nullptr;
834     }
835 
836     PodCopy(newChars.get(), chars, i);
837 
838     size_t readChars =
839         ToLowerCaseImpl(newChars.get(), chars, i, length, resultLength);
840     if constexpr (!std::is_same_v<CharT, Latin1Char>) {
841       if (readChars < length) {
842         resultLength = ToLowerCaseLength(chars, readChars, length);
843 
844         if (!newChars.maybeRealloc(cx, length, resultLength)) {
845           return nullptr;
846         }
847 
848         MOZ_ALWAYS_TRUE(length == ToLowerCaseImpl(newChars.get(), chars,
849                                                   readChars, length,
850                                                   resultLength));
851       }
852     } else {
853       MOZ_ASSERT(readChars == length,
854                  "Latin-1 strings don't have special lower case mappings");
855     }
856   }
857 
858   return newChars.toStringDontDeflate(cx, resultLength);
859 }
860 
StringToLowerCase(JSContext * cx,HandleString string)861 JSString* js::StringToLowerCase(JSContext* cx, HandleString string) {
862   JSLinearString* linear = string->ensureLinear(cx);
863   if (!linear) {
864     return nullptr;
865   }
866 
867   if (linear->hasLatin1Chars()) {
868     return ToLowerCase<Latin1Char>(cx, linear);
869   }
870   return ToLowerCase<char16_t>(cx, linear);
871 }
872 
str_toLowerCase(JSContext * cx,unsigned argc,Value * vp)873 static bool str_toLowerCase(JSContext* cx, unsigned argc, Value* vp) {
874   CallArgs args = CallArgsFromVp(argc, vp);
875 
876   RootedString str(cx,
877                    ToStringForStringFunction(cx, "toLowerCase", args.thisv()));
878   if (!str) {
879     return false;
880   }
881 
882   JSString* result = StringToLowerCase(cx, str);
883   if (!result) {
884     return false;
885   }
886 
887   args.rval().setString(result);
888   return true;
889 }
890 
891 #if JS_HAS_INTL_API
892 // String.prototype.toLocaleLowerCase is self-hosted when Intl is exposed,
893 // with core functionality performed by the intrinsic below.
894 
CaseMappingLocale(JSContext * cx,JSString * str)895 static const char* CaseMappingLocale(JSContext* cx, JSString* str) {
896   JSLinearString* locale = str->ensureLinear(cx);
897   if (!locale) {
898     return nullptr;
899   }
900 
901   MOZ_ASSERT(locale->length() >= 2, "locale is a valid language tag");
902 
903   // Lithuanian, Turkish, and Azeri have language dependent case mappings.
904   static const char languagesWithSpecialCasing[][3] = {"lt", "tr", "az"};
905 
906   // All strings in |languagesWithSpecialCasing| are of length two, so we
907   // only need to compare the first two characters to find a matching locale.
908   // ES2017 Intl, §9.2.2 BestAvailableLocale
909   if (locale->length() == 2 || locale->latin1OrTwoByteChar(2) == '-') {
910     for (const auto& language : languagesWithSpecialCasing) {
911       if (locale->latin1OrTwoByteChar(0) == language[0] &&
912           locale->latin1OrTwoByteChar(1) == language[1]) {
913         return language;
914       }
915     }
916   }
917 
918   return "";  // ICU root locale
919 }
920 
intl_toLocaleLowerCase(JSContext * cx,unsigned argc,Value * vp)921 bool js::intl_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
922   CallArgs args = CallArgsFromVp(argc, vp);
923   MOZ_ASSERT(args.length() == 2);
924   MOZ_ASSERT(args[0].isString());
925   MOZ_ASSERT(args[1].isString());
926 
927   RootedString string(cx, args[0].toString());
928 
929   const char* locale = CaseMappingLocale(cx, args[1].toString());
930   if (!locale) {
931     return false;
932   }
933 
934   // Call String.prototype.toLowerCase() for language independent casing.
935   if (intl::StringsAreEqual(locale, "")) {
936     JSString* str = StringToLowerCase(cx, string);
937     if (!str) {
938       return false;
939     }
940 
941     args.rval().setString(str);
942     return true;
943   }
944 
945   AutoStableStringChars inputChars(cx);
946   if (!inputChars.initTwoByte(cx, string)) {
947     return false;
948   }
949   mozilla::Range<const char16_t> input = inputChars.twoByteRange();
950 
951   // Note: maximum case mapping length is three characters, so the result
952   // length might be > INT32_MAX. ICU will fail in this case.
953   static_assert(JSString::MAX_LENGTH <= INT32_MAX,
954                 "String length must fit in int32_t for ICU");
955 
956   static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
957 
958   Vector<char16_t, INLINE_CAPACITY> chars(cx);
959   if (!chars.resize(std::max(INLINE_CAPACITY, input.length()))) {
960     return false;
961   }
962 
963   int32_t size = intl::CallICU(
964       cx,
965       [&input, locale](UChar* chars, int32_t size, UErrorCode* status) {
966         return u_strToLower(chars, size, input.begin().get(), input.length(),
967                             locale, status);
968       },
969       chars);
970   if (size < 0) {
971     return false;
972   }
973 
974   JSString* result = NewStringCopyN<CanGC>(cx, chars.begin(), size);
975   if (!result) {
976     return false;
977   }
978 
979   args.rval().setString(result);
980   return true;
981 }
982 
983 #else
984 
985 // When the Intl API is not exposed, String.prototype.toLowerCase is implemented
986 // in C++.
str_toLocaleLowerCase(JSContext * cx,unsigned argc,Value * vp)987 static bool str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
988   CallArgs args = CallArgsFromVp(argc, vp);
989 
990   RootedString str(
991       cx, ToStringForStringFunction(cx, "toLocaleLowerCase", args.thisv()));
992   if (!str) {
993     return false;
994   }
995 
996   /*
997    * Forcefully ignore the first (or any) argument and return toLowerCase(),
998    * ECMA has reserved that argument, presumably for defining the locale.
999    */
1000   if (cx->runtime()->localeCallbacks &&
1001       cx->runtime()->localeCallbacks->localeToLowerCase) {
1002     RootedValue result(cx);
1003     if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result)) {
1004       return false;
1005     }
1006 
1007     args.rval().set(result);
1008     return true;
1009   }
1010 
1011   RootedLinearString linear(cx, str->ensureLinear(cx));
1012   if (!linear) {
1013     return false;
1014   }
1015 
1016   JSString* result = StringToLowerCase(cx, linear);
1017   if (!result) {
1018     return false;
1019   }
1020 
1021   args.rval().setString(result);
1022   return true;
1023 }
1024 
1025 #endif  // JS_HAS_INTL_API
1026 
ToUpperCaseHasSpecialCasing(Latin1Char charCode)1027 static inline bool ToUpperCaseHasSpecialCasing(Latin1Char charCode) {
1028   // U+00DF LATIN SMALL LETTER SHARP S is the only Latin-1 code point with
1029   // special casing rules, so detect it inline.
1030   bool hasUpperCaseSpecialCasing =
1031       charCode == unicode::LATIN_SMALL_LETTER_SHARP_S;
1032   MOZ_ASSERT(hasUpperCaseSpecialCasing ==
1033              unicode::ChangesWhenUpperCasedSpecialCasing(charCode));
1034 
1035   return hasUpperCaseSpecialCasing;
1036 }
1037 
ToUpperCaseHasSpecialCasing(char16_t charCode)1038 static inline bool ToUpperCaseHasSpecialCasing(char16_t charCode) {
1039   return unicode::ChangesWhenUpperCasedSpecialCasing(charCode);
1040 }
1041 
ToUpperCaseLengthSpecialCasing(Latin1Char charCode)1042 static inline size_t ToUpperCaseLengthSpecialCasing(Latin1Char charCode) {
1043   // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
1044   MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
1045 
1046   return 2;
1047 }
1048 
ToUpperCaseLengthSpecialCasing(char16_t charCode)1049 static inline size_t ToUpperCaseLengthSpecialCasing(char16_t charCode) {
1050   MOZ_ASSERT(ToUpperCaseHasSpecialCasing(charCode));
1051 
1052   return unicode::LengthUpperCaseSpecialCasing(charCode);
1053 }
1054 
ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,Latin1Char * elements,size_t * index)1055 static inline void ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,
1056                                                            Latin1Char* elements,
1057                                                            size_t* index) {
1058   // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
1059   MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
1060   static_assert('S' <= JSString::MAX_LATIN1_CHAR, "'S' is a Latin-1 character");
1061 
1062   elements[(*index)++] = 'S';
1063   elements[(*index)++] = 'S';
1064 }
1065 
ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,char16_t * elements,size_t * index)1066 static inline void ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,
1067                                                            char16_t* elements,
1068                                                            size_t* index) {
1069   unicode::AppendUpperCaseSpecialCasing(charCode, elements, index);
1070 }
1071 
1072 // See ToLowerCaseImpl for an explanation of the parameters.
1073 template <typename DestChar, typename SrcChar>
ToUpperCaseImpl(DestChar * destChars,const SrcChar * srcChars,size_t startIndex,size_t srcLength,size_t destLength)1074 static size_t ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars,
1075                               size_t startIndex, size_t srcLength,
1076                               size_t destLength) {
1077   static_assert(std::is_same_v<SrcChar, Latin1Char> ||
1078                     !std::is_same_v<DestChar, Latin1Char>,
1079                 "cannot write non-Latin-1 characters into Latin-1 string");
1080   MOZ_ASSERT(startIndex < srcLength);
1081   MOZ_ASSERT(srcLength <= destLength);
1082 
1083   size_t j = startIndex;
1084   for (size_t i = startIndex; i < srcLength; i++) {
1085     char16_t c = srcChars[i];
1086     if constexpr (!std::is_same_v<DestChar, Latin1Char>) {
1087       if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
1088         char16_t trail = srcChars[i + 1];
1089         if (unicode::IsTrailSurrogate(trail)) {
1090           trail = unicode::ToUpperCaseNonBMPTrail(c, trail);
1091           destChars[j++] = c;
1092           destChars[j++] = trail;
1093           i++;
1094           continue;
1095         }
1096       }
1097     }
1098 
1099     if (MOZ_UNLIKELY(c > 0x7f &&
1100                      ToUpperCaseHasSpecialCasing(static_cast<SrcChar>(c)))) {
1101       // Return if the output buffer is too small.
1102       if (srcLength == destLength) {
1103         return i;
1104       }
1105 
1106       ToUpperCaseAppendUpperCaseSpecialCasing(c, destChars, &j);
1107       continue;
1108     }
1109 
1110     c = unicode::ToUpperCase(c);
1111     if constexpr (std::is_same_v<DestChar, Latin1Char>) {
1112       MOZ_ASSERT(c <= JSString::MAX_LATIN1_CHAR);
1113     }
1114     destChars[j++] = c;
1115   }
1116 
1117   MOZ_ASSERT(j == destLength);
1118   return srcLength;
1119 }
1120 
1121 template <typename CharT>
ToUpperCaseLength(const CharT * chars,size_t startIndex,size_t length)1122 static size_t ToUpperCaseLength(const CharT* chars, size_t startIndex,
1123                                 size_t length) {
1124   size_t upperLength = length;
1125   for (size_t i = startIndex; i < length; i++) {
1126     char16_t c = chars[i];
1127 
1128     if (c > 0x7f && ToUpperCaseHasSpecialCasing(static_cast<CharT>(c))) {
1129       upperLength += ToUpperCaseLengthSpecialCasing(static_cast<CharT>(c)) - 1;
1130     }
1131   }
1132   return upperLength;
1133 }
1134 
1135 template <typename DestChar, typename SrcChar>
CopyChars(DestChar * destChars,const SrcChar * srcChars,size_t length)1136 static inline void CopyChars(DestChar* destChars, const SrcChar* srcChars,
1137                              size_t length) {
1138   static_assert(!std::is_same_v<DestChar, SrcChar>,
1139                 "PodCopy is used for the same type case");
1140   for (size_t i = 0; i < length; i++) {
1141     destChars[i] = srcChars[i];
1142   }
1143 }
1144 
1145 template <typename CharT>
CopyChars(CharT * destChars,const CharT * srcChars,size_t length)1146 static inline void CopyChars(CharT* destChars, const CharT* srcChars,
1147                              size_t length) {
1148   PodCopy(destChars, srcChars, length);
1149 }
1150 
1151 template <typename DestChar, typename SrcChar>
ToUpperCase(JSContext * cx,InlineCharBuffer<DestChar> & newChars,const SrcChar * chars,size_t startIndex,size_t length,size_t * resultLength)1152 static inline bool ToUpperCase(JSContext* cx,
1153                                InlineCharBuffer<DestChar>& newChars,
1154                                const SrcChar* chars, size_t startIndex,
1155                                size_t length, size_t* resultLength) {
1156   MOZ_ASSERT(startIndex < length);
1157 
1158   *resultLength = length;
1159   if (!newChars.maybeAlloc(cx, length)) {
1160     return false;
1161   }
1162 
1163   CopyChars(newChars.get(), chars, startIndex);
1164 
1165   size_t readChars =
1166       ToUpperCaseImpl(newChars.get(), chars, startIndex, length, length);
1167   if (readChars < length) {
1168     size_t actualLength = ToUpperCaseLength(chars, readChars, length);
1169 
1170     *resultLength = actualLength;
1171     if (!newChars.maybeRealloc(cx, length, actualLength)) {
1172       return false;
1173     }
1174 
1175     MOZ_ALWAYS_TRUE(length == ToUpperCaseImpl(newChars.get(), chars, readChars,
1176                                               length, actualLength));
1177   }
1178 
1179   return true;
1180 }
1181 
1182 template <typename CharT>
ToUpperCase(JSContext * cx,JSLinearString * str)1183 static JSString* ToUpperCase(JSContext* cx, JSLinearString* str) {
1184   using Latin1Buffer = InlineCharBuffer<Latin1Char>;
1185   using TwoByteBuffer = InlineCharBuffer<char16_t>;
1186 
1187   mozilla::MaybeOneOf<Latin1Buffer, TwoByteBuffer> newChars;
1188   const size_t length = str->length();
1189   size_t resultLength;
1190   {
1191     AutoCheckCannotGC nogc;
1192     const CharT* chars = str->chars<CharT>(nogc);
1193 
1194     // Most one element Latin-1 strings can be directly retrieved from the
1195     // static strings cache.
1196     if constexpr (std::is_same_v<CharT, Latin1Char>) {
1197       if (length == 1) {
1198         Latin1Char c = chars[0];
1199         if (c != unicode::MICRO_SIGN &&
1200             c != unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS &&
1201             c != unicode::LATIN_SMALL_LETTER_SHARP_S) {
1202           char16_t upper = unicode::ToUpperCase(c);
1203           MOZ_ASSERT(upper <= JSString::MAX_LATIN1_CHAR);
1204           MOZ_ASSERT(StaticStrings::hasUnit(upper));
1205 
1206           return cx->staticStrings().getUnit(upper);
1207         }
1208 
1209         MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR ||
1210                    ToUpperCaseHasSpecialCasing(c));
1211       }
1212     }
1213 
1214     // Look for the first character that changes when uppercased.
1215     size_t i = 0;
1216     for (; i < length; i++) {
1217       CharT c = chars[i];
1218       if constexpr (!std::is_same_v<CharT, Latin1Char>) {
1219         if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
1220           CharT trail = chars[i + 1];
1221           if (unicode::IsTrailSurrogate(trail)) {
1222             if (unicode::ChangesWhenUpperCasedNonBMP(c, trail)) {
1223               break;
1224             }
1225 
1226             i++;
1227             continue;
1228           }
1229         }
1230       }
1231       if (unicode::ChangesWhenUpperCased(c)) {
1232         break;
1233       }
1234       if (MOZ_UNLIKELY(c > 0x7f && ToUpperCaseHasSpecialCasing(c))) {
1235         break;
1236       }
1237     }
1238 
1239     // If no character needs to change, return the input string.
1240     if (i == length) {
1241       return str;
1242     }
1243 
1244     // The string changes when uppercased, so we must create a new string.
1245     // Can it be Latin-1?
1246     //
1247     // If the original string is Latin-1, it can -- unless the string
1248     // contains U+00B5 MICRO SIGN or U+00FF SMALL LETTER Y WITH DIAERESIS,
1249     // the only Latin-1 codepoints that don't uppercase within Latin-1.
1250     // Search for those codepoints to decide whether the new string can be
1251     // Latin-1.
1252     // If the original string is a two-byte string, its uppercase form is
1253     // so rarely Latin-1 that we don't even consider creating a new
1254     // Latin-1 string.
1255     if constexpr (std::is_same_v<CharT, Latin1Char>) {
1256       bool resultIsLatin1 = true;
1257       for (size_t j = i; j < length; j++) {
1258         Latin1Char c = chars[j];
1259         if (c == unicode::MICRO_SIGN ||
1260             c == unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
1261           MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR);
1262           resultIsLatin1 = false;
1263           break;
1264         } else {
1265           MOZ_ASSERT(unicode::ToUpperCase(c) <= JSString::MAX_LATIN1_CHAR);
1266         }
1267       }
1268 
1269       if (resultIsLatin1) {
1270         newChars.construct<Latin1Buffer>();
1271 
1272         if (!ToUpperCase(cx, newChars.ref<Latin1Buffer>(), chars, i, length,
1273                          &resultLength)) {
1274           return nullptr;
1275         }
1276       } else {
1277         newChars.construct<TwoByteBuffer>();
1278 
1279         if (!ToUpperCase(cx, newChars.ref<TwoByteBuffer>(), chars, i, length,
1280                          &resultLength)) {
1281           return nullptr;
1282         }
1283       }
1284     } else {
1285       newChars.construct<TwoByteBuffer>();
1286 
1287       if (!ToUpperCase(cx, newChars.ref<TwoByteBuffer>(), chars, i, length,
1288                        &resultLength)) {
1289         return nullptr;
1290       }
1291     }
1292   }
1293 
1294   return newChars.constructed<Latin1Buffer>()
1295              ? newChars.ref<Latin1Buffer>().toStringDontDeflate(cx,
1296                                                                 resultLength)
1297              : newChars.ref<TwoByteBuffer>().toStringDontDeflate(cx,
1298                                                                  resultLength);
1299 }
1300 
StringToUpperCase(JSContext * cx,HandleString string)1301 JSString* js::StringToUpperCase(JSContext* cx, HandleString string) {
1302   JSLinearString* linear = string->ensureLinear(cx);
1303   if (!linear) {
1304     return nullptr;
1305   }
1306 
1307   if (linear->hasLatin1Chars()) {
1308     return ToUpperCase<Latin1Char>(cx, linear);
1309   }
1310   return ToUpperCase<char16_t>(cx, linear);
1311 }
1312 
str_toUpperCase(JSContext * cx,unsigned argc,Value * vp)1313 static bool str_toUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1314   CallArgs args = CallArgsFromVp(argc, vp);
1315 
1316   RootedString str(cx,
1317                    ToStringForStringFunction(cx, "toUpperCase", args.thisv()));
1318   if (!str) {
1319     return false;
1320   }
1321 
1322   JSString* result = StringToUpperCase(cx, str);
1323   if (!result) {
1324     return false;
1325   }
1326 
1327   args.rval().setString(result);
1328   return true;
1329 }
1330 
1331 #if JS_HAS_INTL_API
1332 // String.prototype.toLocaleUpperCase is self-hosted when Intl is exposed,
1333 // with core functionality performed by the intrinsic below.
1334 
intl_toLocaleUpperCase(JSContext * cx,unsigned argc,Value * vp)1335 bool js::intl_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1336   CallArgs args = CallArgsFromVp(argc, vp);
1337   MOZ_ASSERT(args.length() == 2);
1338   MOZ_ASSERT(args[0].isString());
1339   MOZ_ASSERT(args[1].isString());
1340 
1341   RootedString string(cx, args[0].toString());
1342 
1343   const char* locale = CaseMappingLocale(cx, args[1].toString());
1344   if (!locale) {
1345     return false;
1346   }
1347 
1348   // Call String.prototype.toUpperCase() for language independent casing.
1349   if (intl::StringsAreEqual(locale, "")) {
1350     JSString* str = js::StringToUpperCase(cx, string);
1351     if (!str) {
1352       return false;
1353     }
1354 
1355     args.rval().setString(str);
1356     return true;
1357   }
1358 
1359   AutoStableStringChars inputChars(cx);
1360   if (!inputChars.initTwoByte(cx, string)) {
1361     return false;
1362   }
1363   mozilla::Range<const char16_t> input = inputChars.twoByteRange();
1364 
1365   // Note: maximum case mapping length is three characters, so the result
1366   // length might be > INT32_MAX. ICU will fail in this case.
1367   static_assert(JSString::MAX_LENGTH <= INT32_MAX,
1368                 "String length must fit in int32_t for ICU");
1369 
1370   static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
1371 
1372   Vector<char16_t, INLINE_CAPACITY> chars(cx);
1373   if (!chars.resize(std::max(INLINE_CAPACITY, input.length()))) {
1374     return false;
1375   }
1376 
1377   int32_t size = intl::CallICU(
1378       cx,
1379       [&input, locale](UChar* chars, int32_t size, UErrorCode* status) {
1380         return u_strToUpper(chars, size, input.begin().get(), input.length(),
1381                             locale, status);
1382       },
1383       chars);
1384   if (size < 0) {
1385     return false;
1386   }
1387 
1388   JSString* result = NewStringCopyN<CanGC>(cx, chars.begin(), size);
1389   if (!result) {
1390     return false;
1391   }
1392 
1393   args.rval().setString(result);
1394   return true;
1395 }
1396 
1397 #else
1398 
1399 // When the Intl API is not exposed, String.prototype.toUpperCase is implemented
1400 // in C++.
str_toLocaleUpperCase(JSContext * cx,unsigned argc,Value * vp)1401 static bool str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1402   CallArgs args = CallArgsFromVp(argc, vp);
1403 
1404   RootedString str(
1405       cx, ToStringForStringFunction(cx, "toLocaleUpperCase", args.thisv()));
1406   if (!str) {
1407     return false;
1408   }
1409 
1410   /*
1411    * Forcefully ignore the first (or any) argument and return toUpperCase(),
1412    * ECMA has reserved that argument, presumably for defining the locale.
1413    */
1414   if (cx->runtime()->localeCallbacks &&
1415       cx->runtime()->localeCallbacks->localeToUpperCase) {
1416     RootedValue result(cx);
1417     if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result)) {
1418       return false;
1419     }
1420 
1421     args.rval().set(result);
1422     return true;
1423   }
1424 
1425   RootedLinearString linear(cx, str->ensureLinear(cx));
1426   if (!linear) {
1427     return false;
1428   }
1429 
1430   JSString* result = StringToUpperCase(cx, linear);
1431   if (!result) {
1432     return false;
1433   }
1434 
1435   args.rval().setString(result);
1436   return true;
1437 }
1438 
1439 #endif  // JS_HAS_INTL_API
1440 
1441 #if JS_HAS_INTL_API
1442 
1443 // String.prototype.localeCompare is self-hosted when Intl functionality is
1444 // exposed, and the only intrinsics it requires are provided in the
1445 // implementation of Intl.Collator.
1446 
1447 #else
1448 
1449 // String.prototype.localeCompare is implemented in C++ (delegating to
1450 // JSLocaleCallbacks) when Intl functionality is not exposed.
str_localeCompare(JSContext * cx,unsigned argc,Value * vp)1451 static bool str_localeCompare(JSContext* cx, unsigned argc, Value* vp) {
1452   CallArgs args = CallArgsFromVp(argc, vp);
1453   RootedString str(
1454       cx, ToStringForStringFunction(cx, "localeCompare", args.thisv()));
1455   if (!str) {
1456     return false;
1457   }
1458 
1459   RootedString thatStr(cx, ToString<CanGC>(cx, args.get(0)));
1460   if (!thatStr) {
1461     return false;
1462   }
1463 
1464   if (cx->runtime()->localeCallbacks &&
1465       cx->runtime()->localeCallbacks->localeCompare) {
1466     RootedValue result(cx);
1467     if (!cx->runtime()->localeCallbacks->localeCompare(cx, str, thatStr,
1468                                                        &result)) {
1469       return false;
1470     }
1471 
1472     args.rval().set(result);
1473     return true;
1474   }
1475 
1476   int32_t result;
1477   if (!CompareStrings(cx, str, thatStr, &result)) {
1478     return false;
1479   }
1480 
1481   args.rval().setInt32(result);
1482   return true;
1483 }
1484 
1485 #endif  // JS_HAS_INTL_API
1486 
1487 #if JS_HAS_INTL_API
1488 
1489 // ES2017 draft rev 45e890512fd77add72cc0ee742785f9f6f6482de
1490 // 21.1.3.12 String.prototype.normalize ( [ form ] )
1491 //
1492 // String.prototype.normalize is only implementable if ICU's normalization
1493 // functionality is available.
str_normalize(JSContext * cx,unsigned argc,Value * vp)1494 static bool str_normalize(JSContext* cx, unsigned argc, Value* vp) {
1495   CallArgs args = CallArgsFromVp(argc, vp);
1496 
1497   // Steps 1-2.
1498   RootedString str(cx,
1499                    ToStringForStringFunction(cx, "normalize", args.thisv()));
1500   if (!str) {
1501     return false;
1502   }
1503 
1504   enum NormalizationForm { NFC, NFD, NFKC, NFKD };
1505 
1506   NormalizationForm form;
1507   if (!args.hasDefined(0)) {
1508     // Step 3.
1509     form = NFC;
1510   } else {
1511     // Step 4.
1512     JSLinearString* formStr = ArgToLinearString(cx, args, 0);
1513     if (!formStr) {
1514       return false;
1515     }
1516 
1517     // Step 5.
1518     if (EqualStrings(formStr, cx->names().NFC)) {
1519       form = NFC;
1520     } else if (EqualStrings(formStr, cx->names().NFD)) {
1521       form = NFD;
1522     } else if (EqualStrings(formStr, cx->names().NFKC)) {
1523       form = NFKC;
1524     } else if (EqualStrings(formStr, cx->names().NFKD)) {
1525       form = NFKD;
1526     } else {
1527       JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1528                                 JSMSG_INVALID_NORMALIZE_FORM);
1529       return false;
1530     }
1531   }
1532 
1533   // Latin-1 strings are already in Normalization Form C.
1534   if (form == NFC && str->hasLatin1Chars()) {
1535     // Step 7.
1536     args.rval().setString(str);
1537     return true;
1538   }
1539 
1540   // Step 6.
1541   AutoStableStringChars stableChars(cx);
1542   if (!stableChars.initTwoByte(cx, str)) {
1543     return false;
1544   }
1545 
1546   mozilla::Range<const char16_t> srcChars = stableChars.twoByteRange();
1547 
1548   // The unorm2_getXXXInstance() methods return a shared instance which must
1549   // not be deleted.
1550   UErrorCode status = U_ZERO_ERROR;
1551   const UNormalizer2* normalizer;
1552   if (form == NFC) {
1553     normalizer = unorm2_getNFCInstance(&status);
1554   } else if (form == NFD) {
1555     normalizer = unorm2_getNFDInstance(&status);
1556   } else if (form == NFKC) {
1557     normalizer = unorm2_getNFKCInstance(&status);
1558   } else {
1559     MOZ_ASSERT(form == NFKD);
1560     normalizer = unorm2_getNFKDInstance(&status);
1561   }
1562   if (U_FAILURE(status)) {
1563     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1564                               JSMSG_INTERNAL_INTL_ERROR);
1565     return false;
1566   }
1567 
1568   int32_t spanLengthInt = unorm2_spanQuickCheckYes(
1569       normalizer, srcChars.begin().get(), srcChars.length(), &status);
1570   if (U_FAILURE(status)) {
1571     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1572                               JSMSG_INTERNAL_INTL_ERROR);
1573     return false;
1574   }
1575   MOZ_ASSERT(0 <= spanLengthInt && size_t(spanLengthInt) <= srcChars.length());
1576   size_t spanLength = size_t(spanLengthInt);
1577 
1578   // Return if the input string is already normalized.
1579   if (spanLength == srcChars.length()) {
1580     // Step 7.
1581     args.rval().setString(str);
1582     return true;
1583   }
1584 
1585   static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
1586 
1587   Vector<char16_t, INLINE_CAPACITY> chars(cx);
1588   if (!chars.resize(std::max(INLINE_CAPACITY, srcChars.length()))) {
1589     return false;
1590   }
1591 
1592   // Copy the already normalized prefix.
1593   if (spanLength > 0) {
1594     PodCopy(chars.begin(), srcChars.begin().get(), spanLength);
1595   }
1596 
1597   int32_t size = intl::CallICU(
1598       cx,
1599       [normalizer, &srcChars, spanLength](UChar* chars, uint32_t size,
1600                                           UErrorCode* status) {
1601         mozilla::RangedPtr<const char16_t> remainingStart =
1602             srcChars.begin() + spanLength;
1603         size_t remainingLength = srcChars.length() - spanLength;
1604 
1605         return unorm2_normalizeSecondAndAppend(normalizer, chars, spanLength,
1606                                                size, remainingStart.get(),
1607                                                remainingLength, status);
1608       },
1609       chars);
1610   if (size < 0) {
1611     return false;
1612   }
1613 
1614   JSString* ns = NewStringCopyN<CanGC>(cx, chars.begin(), size);
1615   if (!ns) {
1616     return false;
1617   }
1618 
1619   // Step 7.
1620   args.rval().setString(ns);
1621   return true;
1622 }
1623 
1624 #endif  // JS_HAS_INTL_API
1625 
str_charAt(JSContext * cx,unsigned argc,Value * vp)1626 static bool str_charAt(JSContext* cx, unsigned argc, Value* vp) {
1627   CallArgs args = CallArgsFromVp(argc, vp);
1628 
1629   RootedString str(cx);
1630   size_t i;
1631   if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
1632     str = args.thisv().toString();
1633     i = size_t(args[0].toInt32());
1634     if (i >= str->length()) {
1635       goto out_of_range;
1636     }
1637   } else {
1638     str = ToStringForStringFunction(cx, "charAt", args.thisv());
1639     if (!str) {
1640       return false;
1641     }
1642 
1643     double d = 0.0;
1644     if (args.length() > 0 && !ToInteger(cx, args[0], &d)) {
1645       return false;
1646     }
1647 
1648     if (d < 0 || str->length() <= d) {
1649       goto out_of_range;
1650     }
1651     i = size_t(d);
1652   }
1653 
1654   str = cx->staticStrings().getUnitStringForElement(cx, str, i);
1655   if (!str) {
1656     return false;
1657   }
1658   args.rval().setString(str);
1659   return true;
1660 
1661 out_of_range:
1662   args.rval().setString(cx->runtime()->emptyString);
1663   return true;
1664 }
1665 
str_charCodeAt_impl(JSContext * cx,HandleString string,HandleValue index,MutableHandleValue res)1666 bool js::str_charCodeAt_impl(JSContext* cx, HandleString string,
1667                              HandleValue index, MutableHandleValue res) {
1668   size_t i;
1669   if (index.isInt32()) {
1670     i = index.toInt32();
1671     if (i >= string->length()) {
1672       goto out_of_range;
1673     }
1674   } else {
1675     double d = 0.0;
1676     if (!ToInteger(cx, index, &d)) {
1677       return false;
1678     }
1679     // check whether d is negative as size_t is unsigned
1680     if (d < 0 || string->length() <= d) {
1681       goto out_of_range;
1682     }
1683     i = size_t(d);
1684   }
1685   char16_t c;
1686   if (!string->getChar(cx, i, &c)) {
1687     return false;
1688   }
1689   res.setInt32(c);
1690   return true;
1691 
1692 out_of_range:
1693   res.setNaN();
1694   return true;
1695 }
1696 
str_charCodeAt(JSContext * cx,unsigned argc,Value * vp)1697 bool js::str_charCodeAt(JSContext* cx, unsigned argc, Value* vp) {
1698   CallArgs args = CallArgsFromVp(argc, vp);
1699   RootedString str(cx);
1700   RootedValue index(cx);
1701   if (args.thisv().isString()) {
1702     str = args.thisv().toString();
1703   } else {
1704     str = ToStringForStringFunction(cx, "charCodeAt", args.thisv());
1705     if (!str) {
1706       return false;
1707     }
1708   }
1709   if (args.length() != 0) {
1710     index = args[0];
1711   } else {
1712     index.setInt32(0);
1713   }
1714 
1715   return js::str_charCodeAt_impl(cx, str, index, args.rval());
1716 }
1717 
1718 /*
1719  * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
1720  * The patlen argument must be positive and no greater than sBMHPatLenMax.
1721  *
1722  * Return the index of pat in text, or -1 if not found.
1723  */
1724 static const uint32_t sBMHCharSetSize = 256; /* ISO-Latin-1 */
1725 static const uint32_t sBMHPatLenMax = 255;   /* skip table element is uint8_t */
1726 static const int sBMHBadPattern =
1727     -2; /* return value if pat is not ISO-Latin-1 */
1728 
1729 template <typename TextChar, typename PatChar>
BoyerMooreHorspool(const TextChar * text,uint32_t textLen,const PatChar * pat,uint32_t patLen)1730 static int BoyerMooreHorspool(const TextChar* text, uint32_t textLen,
1731                               const PatChar* pat, uint32_t patLen) {
1732   MOZ_ASSERT(0 < patLen && patLen <= sBMHPatLenMax);
1733 
1734   uint8_t skip[sBMHCharSetSize];
1735   for (uint32_t i = 0; i < sBMHCharSetSize; i++) {
1736     skip[i] = uint8_t(patLen);
1737   }
1738 
1739   uint32_t patLast = patLen - 1;
1740   for (uint32_t i = 0; i < patLast; i++) {
1741     char16_t c = pat[i];
1742     if (c >= sBMHCharSetSize) {
1743       return sBMHBadPattern;
1744     }
1745     skip[c] = uint8_t(patLast - i);
1746   }
1747 
1748   for (uint32_t k = patLast; k < textLen;) {
1749     for (uint32_t i = k, j = patLast;; i--, j--) {
1750       if (text[i] != pat[j]) {
1751         break;
1752       }
1753       if (j == 0) {
1754         return static_cast<int>(i); /* safe: max string size */
1755       }
1756     }
1757 
1758     char16_t c = text[k];
1759     k += (c >= sBMHCharSetSize) ? patLen : skip[c];
1760   }
1761   return -1;
1762 }
1763 
1764 template <typename TextChar, typename PatChar>
1765 struct MemCmp {
1766   using Extent = uint32_t;
computeExtentMemCmp1767   static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar*,
1768                                                 uint32_t patLen) {
1769     return (patLen - 1) * sizeof(PatChar);
1770   }
matchMemCmp1771   static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t,
1772                                       Extent extent) {
1773     MOZ_ASSERT(sizeof(TextChar) == sizeof(PatChar));
1774     return memcmp(p, t, extent) == 0;
1775   }
1776 };
1777 
1778 template <typename TextChar, typename PatChar>
1779 struct ManualCmp {
1780   using Extent = const PatChar*;
computeExtentManualCmp1781   static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar* pat,
1782                                                 uint32_t patLen) {
1783     return pat + patLen;
1784   }
matchManualCmp1785   static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t,
1786                                       Extent extent) {
1787     for (; p != extent; ++p, ++t) {
1788       if (*p != *t) {
1789         return false;
1790       }
1791     }
1792     return true;
1793   }
1794 };
1795 
1796 template <typename TextChar, typename PatChar>
FirstCharMatcherUnrolled(const TextChar * text,uint32_t n,const PatChar pat)1797 static const TextChar* FirstCharMatcherUnrolled(const TextChar* text,
1798                                                 uint32_t n, const PatChar pat) {
1799   const TextChar* textend = text + n;
1800   const TextChar* t = text;
1801 
1802   switch ((textend - t) & 7) {
1803     case 0:
1804       if (*t++ == pat) return t - 1;
1805       [[fallthrough]];
1806     case 7:
1807       if (*t++ == pat) return t - 1;
1808       [[fallthrough]];
1809     case 6:
1810       if (*t++ == pat) return t - 1;
1811       [[fallthrough]];
1812     case 5:
1813       if (*t++ == pat) return t - 1;
1814       [[fallthrough]];
1815     case 4:
1816       if (*t++ == pat) return t - 1;
1817       [[fallthrough]];
1818     case 3:
1819       if (*t++ == pat) return t - 1;
1820       [[fallthrough]];
1821     case 2:
1822       if (*t++ == pat) return t - 1;
1823       [[fallthrough]];
1824     case 1:
1825       if (*t++ == pat) return t - 1;
1826   }
1827   while (textend != t) {
1828     if (t[0] == pat) return t;
1829     if (t[1] == pat) return t + 1;
1830     if (t[2] == pat) return t + 2;
1831     if (t[3] == pat) return t + 3;
1832     if (t[4] == pat) return t + 4;
1833     if (t[5] == pat) return t + 5;
1834     if (t[6] == pat) return t + 6;
1835     if (t[7] == pat) return t + 7;
1836     t += 8;
1837   }
1838   return nullptr;
1839 }
1840 
FirstCharMatcher8bit(const char * text,uint32_t n,const char pat)1841 static const char* FirstCharMatcher8bit(const char* text, uint32_t n,
1842                                         const char pat) {
1843   return reinterpret_cast<const char*>(memchr(text, pat, n));
1844 }
1845 
1846 template <class InnerMatch, typename TextChar, typename PatChar>
Matcher(const TextChar * text,uint32_t textlen,const PatChar * pat,uint32_t patlen)1847 static int Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat,
1848                    uint32_t patlen) {
1849   MOZ_ASSERT(patlen > 0);
1850 
1851   if (sizeof(TextChar) == 1 && sizeof(PatChar) > 1 && pat[0] > 0xff) {
1852     return -1;
1853   }
1854 
1855   const typename InnerMatch::Extent extent =
1856       InnerMatch::computeExtent(pat, patlen);
1857 
1858   uint32_t i = 0;
1859   uint32_t n = textlen - patlen + 1;
1860   while (i < n) {
1861     const TextChar* pos;
1862 
1863     if (sizeof(TextChar) == 1) {
1864       MOZ_ASSERT(pat[0] <= 0xff);
1865       pos = (TextChar*)FirstCharMatcher8bit((char*)text + i, n - i, pat[0]);
1866     } else {
1867       pos = FirstCharMatcherUnrolled(text + i, n - i, char16_t(pat[0]));
1868     }
1869 
1870     if (pos == nullptr) {
1871       return -1;
1872     }
1873 
1874     i = static_cast<uint32_t>(pos - text);
1875     if (InnerMatch::match(pat + 1, text + i + 1, extent)) {
1876       return i;
1877     }
1878 
1879     i += 1;
1880   }
1881   return -1;
1882 }
1883 
1884 template <typename TextChar, typename PatChar>
StringMatch(const TextChar * text,uint32_t textLen,const PatChar * pat,uint32_t patLen)1885 static MOZ_ALWAYS_INLINE int StringMatch(const TextChar* text, uint32_t textLen,
1886                                          const PatChar* pat, uint32_t patLen) {
1887   if (patLen == 0) {
1888     return 0;
1889   }
1890   if (textLen < patLen) {
1891     return -1;
1892   }
1893 
1894 #if defined(__i386__) || defined(_M_IX86) || defined(__i386)
1895   /*
1896    * Given enough registers, the unrolled loop below is faster than the
1897    * following loop. 32-bit x86 does not have enough registers.
1898    */
1899   if (patLen == 1) {
1900     const PatChar p0 = *pat;
1901     const TextChar* end = text + textLen;
1902     for (const TextChar* c = text; c != end; ++c) {
1903       if (*c == p0) {
1904         return c - text;
1905       }
1906     }
1907     return -1;
1908   }
1909 #endif
1910 
1911   /*
1912    * If the text or pattern string is short, BMH will be more expensive than
1913    * the basic linear scan due to initialization cost and a more complex loop
1914    * body. While the correct threshold is input-dependent, we can make a few
1915    * conservative observations:
1916    *  - When |textLen| is "big enough", the initialization time will be
1917    *    proportionally small, so the worst-case slowdown is minimized.
1918    *  - When |patLen| is "too small", even the best case for BMH will be
1919    *    slower than a simple scan for large |textLen| due to the more complex
1920    *    loop body of BMH.
1921    * From this, the values for "big enough" and "too small" are determined
1922    * empirically. See bug 526348.
1923    */
1924   if (textLen >= 512 && patLen >= 11 && patLen <= sBMHPatLenMax) {
1925     int index = BoyerMooreHorspool(text, textLen, pat, patLen);
1926     if (index != sBMHBadPattern) {
1927       return index;
1928     }
1929   }
1930 
1931   /*
1932    * For big patterns with large potential overlap we want the SIMD-optimized
1933    * speed of memcmp. For small patterns, a simple loop is faster. We also can't
1934    * use memcmp if one of the strings is TwoByte and the other is Latin-1.
1935    */
1936   return (patLen > 128 && std::is_same_v<TextChar, PatChar>)
1937              ? Matcher<MemCmp<TextChar, PatChar>, TextChar, PatChar>(
1938                    text, textLen, pat, patLen)
1939              : Matcher<ManualCmp<TextChar, PatChar>, TextChar, PatChar>(
1940                    text, textLen, pat, patLen);
1941 }
1942 
StringMatch(JSLinearString * text,JSLinearString * pat,uint32_t start=0)1943 static int32_t StringMatch(JSLinearString* text, JSLinearString* pat,
1944                            uint32_t start = 0) {
1945   MOZ_ASSERT(start <= text->length());
1946   uint32_t textLen = text->length() - start;
1947   uint32_t patLen = pat->length();
1948 
1949   int match;
1950   AutoCheckCannotGC nogc;
1951   if (text->hasLatin1Chars()) {
1952     const Latin1Char* textChars = text->latin1Chars(nogc) + start;
1953     if (pat->hasLatin1Chars()) {
1954       match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1955     } else {
1956       match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1957     }
1958   } else {
1959     const char16_t* textChars = text->twoByteChars(nogc) + start;
1960     if (pat->hasLatin1Chars()) {
1961       match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1962     } else {
1963       match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1964     }
1965   }
1966 
1967   return (match == -1) ? -1 : start + match;
1968 }
1969 
1970 static const size_t sRopeMatchThresholdRatioLog2 = 4;
1971 
StringFindPattern(JSLinearString * text,JSLinearString * pat,size_t start)1972 int js::StringFindPattern(JSLinearString* text, JSLinearString* pat,
1973                           size_t start) {
1974   return StringMatch(text, pat, start);
1975 }
1976 
1977 // When an algorithm does not need a string represented as a single linear
1978 // array of characters, this range utility may be used to traverse the string a
1979 // sequence of linear arrays of characters. This avoids flattening ropes.
1980 class StringSegmentRange {
1981   // If malloc() shows up in any profiles from this vector, we can add a new
1982   // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx.
1983   using StackVector = JS::GCVector<JSString*, 16>;
1984   Rooted<StackVector> stack;
1985   RootedLinearString cur;
1986 
settle(JSString * str)1987   bool settle(JSString* str) {
1988     while (str->isRope()) {
1989       JSRope& rope = str->asRope();
1990       if (!stack.append(rope.rightChild())) {
1991         return false;
1992       }
1993       str = rope.leftChild();
1994     }
1995     cur = &str->asLinear();
1996     return true;
1997   }
1998 
1999  public:
StringSegmentRange(JSContext * cx)2000   explicit StringSegmentRange(JSContext* cx)
2001       : stack(cx, StackVector(cx)), cur(cx) {}
2002 
init(JSString * str)2003   [[nodiscard]] bool init(JSString* str) {
2004     MOZ_ASSERT(stack.empty());
2005     return settle(str);
2006   }
2007 
empty() const2008   bool empty() const { return cur == nullptr; }
2009 
front() const2010   JSLinearString* front() const {
2011     MOZ_ASSERT(!cur->isRope());
2012     return cur;
2013   }
2014 
popFront()2015   [[nodiscard]] bool popFront() {
2016     MOZ_ASSERT(!empty());
2017     if (stack.empty()) {
2018       cur = nullptr;
2019       return true;
2020     }
2021     return settle(stack.popCopy());
2022   }
2023 };
2024 
2025 typedef Vector<JSLinearString*, 16, SystemAllocPolicy> LinearStringVector;
2026 
2027 template <typename TextChar, typename PatChar>
RopeMatchImpl(const AutoCheckCannotGC & nogc,LinearStringVector & strings,const PatChar * pat,size_t patLen)2028 static int RopeMatchImpl(const AutoCheckCannotGC& nogc,
2029                          LinearStringVector& strings, const PatChar* pat,
2030                          size_t patLen) {
2031   /* Absolute offset from the beginning of the logical text string. */
2032   int pos = 0;
2033 
2034   for (JSLinearString** outerp = strings.begin(); outerp != strings.end();
2035        ++outerp) {
2036     /* Try to find a match within 'outer'. */
2037     JSLinearString* outer = *outerp;
2038     const TextChar* chars = outer->chars<TextChar>(nogc);
2039     size_t len = outer->length();
2040     int matchResult = StringMatch(chars, len, pat, patLen);
2041     if (matchResult != -1) {
2042       /* Matched! */
2043       return pos + matchResult;
2044     }
2045 
2046     /* Try to find a match starting in 'outer' and running into other nodes. */
2047     const TextChar* const text = chars + (patLen > len ? 0 : len - patLen + 1);
2048     const TextChar* const textend = chars + len;
2049     const PatChar p0 = *pat;
2050     const PatChar* const p1 = pat + 1;
2051     const PatChar* const patend = pat + patLen;
2052     for (const TextChar* t = text; t != textend;) {
2053       if (*t++ != p0) {
2054         continue;
2055       }
2056 
2057       JSLinearString** innerp = outerp;
2058       const TextChar* ttend = textend;
2059       const TextChar* tt = t;
2060       for (const PatChar* pp = p1; pp != patend; ++pp, ++tt) {
2061         while (tt == ttend) {
2062           if (++innerp == strings.end()) {
2063             return -1;
2064           }
2065 
2066           JSLinearString* inner = *innerp;
2067           tt = inner->chars<TextChar>(nogc);
2068           ttend = tt + inner->length();
2069         }
2070         if (*pp != *tt) {
2071           goto break_continue;
2072         }
2073       }
2074 
2075       /* Matched! */
2076       return pos + (t - chars) - 1; /* -1 because of *t++ above */
2077 
2078     break_continue:;
2079     }
2080 
2081     pos += len;
2082   }
2083 
2084   return -1;
2085 }
2086 
2087 /*
2088  * RopeMatch takes the text to search and the pattern to search for in the text.
2089  * RopeMatch returns false on OOM and otherwise returns the match index through
2090  * the 'match' outparam (-1 for not found).
2091  */
RopeMatch(JSContext * cx,JSRope * text,JSLinearString * pat,int * match)2092 static bool RopeMatch(JSContext* cx, JSRope* text, JSLinearString* pat,
2093                       int* match) {
2094   uint32_t patLen = pat->length();
2095   if (patLen == 0) {
2096     *match = 0;
2097     return true;
2098   }
2099   if (text->length() < patLen) {
2100     *match = -1;
2101     return true;
2102   }
2103 
2104   /*
2105    * List of leaf nodes in the rope. If we run out of memory when trying to
2106    * append to this list, we can still fall back to StringMatch, so use the
2107    * system allocator so we don't report OOM in that case.
2108    */
2109   LinearStringVector strings;
2110 
2111   /*
2112    * We don't want to do rope matching if there is a poor node-to-char ratio,
2113    * since this means spending a lot of time in the match loop below. We also
2114    * need to build the list of leaf nodes. Do both here: iterate over the
2115    * nodes so long as there are not too many.
2116    *
2117    * We also don't use rope matching if the rope contains both Latin-1 and
2118    * TwoByte nodes, to simplify the match algorithm.
2119    */
2120   {
2121     size_t threshold = text->length() >> sRopeMatchThresholdRatioLog2;
2122     StringSegmentRange r(cx);
2123     if (!r.init(text)) {
2124       return false;
2125     }
2126 
2127     bool textIsLatin1 = text->hasLatin1Chars();
2128     while (!r.empty()) {
2129       if (threshold-- == 0 || r.front()->hasLatin1Chars() != textIsLatin1 ||
2130           !strings.append(r.front())) {
2131         JSLinearString* linear = text->ensureLinear(cx);
2132         if (!linear) {
2133           return false;
2134         }
2135 
2136         *match = StringMatch(linear, pat);
2137         return true;
2138       }
2139       if (!r.popFront()) {
2140         return false;
2141       }
2142     }
2143   }
2144 
2145   AutoCheckCannotGC nogc;
2146   if (text->hasLatin1Chars()) {
2147     if (pat->hasLatin1Chars()) {
2148       *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->latin1Chars(nogc),
2149                                          patLen);
2150     } else {
2151       *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->twoByteChars(nogc),
2152                                          patLen);
2153     }
2154   } else {
2155     if (pat->hasLatin1Chars()) {
2156       *match = RopeMatchImpl<char16_t>(nogc, strings, pat->latin1Chars(nogc),
2157                                        patLen);
2158     } else {
2159       *match = RopeMatchImpl<char16_t>(nogc, strings, pat->twoByteChars(nogc),
2160                                        patLen);
2161     }
2162   }
2163 
2164   return true;
2165 }
2166 
ReportErrorIfFirstArgIsRegExp(JSContext * cx,const CallArgs & args)2167 static MOZ_ALWAYS_INLINE bool ReportErrorIfFirstArgIsRegExp(
2168     JSContext* cx, const CallArgs& args) {
2169   // Only call IsRegExp if the first argument is definitely an object, so we
2170   // don't pay the cost of an additional function call in the common case.
2171   if (args.length() == 0 || !args[0].isObject()) {
2172     return true;
2173   }
2174 
2175   bool isRegExp;
2176   if (!IsRegExp(cx, args[0], &isRegExp)) {
2177     return false;
2178   }
2179 
2180   if (isRegExp) {
2181     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
2182                               JSMSG_INVALID_ARG_TYPE, "first", "",
2183                               "Regular Expression");
2184     return false;
2185   }
2186   return true;
2187 }
2188 
2189 // ES2018 draft rev de77aaeffce115deaf948ed30c7dbe4c60983c0c
2190 // 21.1.3.7 String.prototype.includes ( searchString [ , position ] )
str_includes(JSContext * cx,unsigned argc,Value * vp)2191 bool js::str_includes(JSContext* cx, unsigned argc, Value* vp) {
2192   CallArgs args = CallArgsFromVp(argc, vp);
2193 
2194   // Steps 1-2.
2195   RootedString str(cx, ToStringForStringFunction(cx, "includes", args.thisv()));
2196   if (!str) {
2197     return false;
2198   }
2199 
2200   // Steps 3-4.
2201   if (!ReportErrorIfFirstArgIsRegExp(cx, args)) {
2202     return false;
2203   }
2204 
2205   // Step 5.
2206   RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2207   if (!searchStr) {
2208     return false;
2209   }
2210 
2211   // Step 6.
2212   uint32_t pos = 0;
2213   if (args.hasDefined(1)) {
2214     if (args[1].isInt32()) {
2215       int i = args[1].toInt32();
2216       pos = (i < 0) ? 0U : uint32_t(i);
2217     } else {
2218       double d;
2219       if (!ToInteger(cx, args[1], &d)) {
2220         return false;
2221       }
2222       pos = uint32_t(std::min(std::max(d, 0.0), double(UINT32_MAX)));
2223     }
2224   }
2225 
2226   // Step 7.
2227   uint32_t textLen = str->length();
2228 
2229   // Step 8.
2230   uint32_t start = std::min(pos, textLen);
2231 
2232   // Steps 9-10.
2233   JSLinearString* text = str->ensureLinear(cx);
2234   if (!text) {
2235     return false;
2236   }
2237 
2238   args.rval().setBoolean(StringMatch(text, searchStr, start) != -1);
2239   return true;
2240 }
2241 
2242 /* ES6 20120927 draft 15.5.4.7. */
str_indexOf(JSContext * cx,unsigned argc,Value * vp)2243 bool js::str_indexOf(JSContext* cx, unsigned argc, Value* vp) {
2244   CallArgs args = CallArgsFromVp(argc, vp);
2245 
2246   // Steps 1, 2, and 3
2247   RootedString str(cx, ToStringForStringFunction(cx, "indexOf", args.thisv()));
2248   if (!str) {
2249     return false;
2250   }
2251 
2252   // Steps 4 and 5
2253   RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2254   if (!searchStr) {
2255     return false;
2256   }
2257 
2258   // Steps 6 and 7
2259   uint32_t pos = 0;
2260   if (args.hasDefined(1)) {
2261     if (args[1].isInt32()) {
2262       int i = args[1].toInt32();
2263       pos = (i < 0) ? 0U : uint32_t(i);
2264     } else {
2265       double d;
2266       if (!ToInteger(cx, args[1], &d)) {
2267         return false;
2268       }
2269       pos = uint32_t(std::min(std::max(d, 0.0), double(UINT32_MAX)));
2270     }
2271   }
2272 
2273   // Step 8
2274   uint32_t textLen = str->length();
2275 
2276   // Step 9
2277   uint32_t start = std::min(pos, textLen);
2278 
2279   if (str == searchStr) {
2280     // AngularJS often invokes "false".indexOf("false"). This check should
2281     // be cheap enough to not hurt anything else.
2282     args.rval().setInt32(start == 0 ? 0 : -1);
2283     return true;
2284   }
2285 
2286   // Steps 10 and 11
2287   JSLinearString* text = str->ensureLinear(cx);
2288   if (!text) {
2289     return false;
2290   }
2291 
2292   args.rval().setInt32(StringMatch(text, searchStr, start));
2293   return true;
2294 }
2295 
2296 template <typename TextChar, typename PatChar>
LastIndexOfImpl(const TextChar * text,size_t textLen,const PatChar * pat,size_t patLen,size_t start)2297 static int32_t LastIndexOfImpl(const TextChar* text, size_t textLen,
2298                                const PatChar* pat, size_t patLen,
2299                                size_t start) {
2300   MOZ_ASSERT(patLen > 0);
2301   MOZ_ASSERT(patLen <= textLen);
2302   MOZ_ASSERT(start <= textLen - patLen);
2303 
2304   const PatChar p0 = *pat;
2305   const PatChar* patNext = pat + 1;
2306   const PatChar* patEnd = pat + patLen;
2307 
2308   for (const TextChar* t = text + start; t >= text; --t) {
2309     if (*t == p0) {
2310       const TextChar* t1 = t + 1;
2311       for (const PatChar* p1 = patNext; p1 < patEnd; ++p1, ++t1) {
2312         if (*t1 != *p1) {
2313           goto break_continue;
2314         }
2315       }
2316 
2317       return static_cast<int32_t>(t - text);
2318     }
2319   break_continue:;
2320   }
2321 
2322   return -1;
2323 }
2324 
2325 // ES2017 draft rev 6859bb9ccaea9c6ede81d71e5320e3833b92cb3e
2326 // 21.1.3.9 String.prototype.lastIndexOf ( searchString [ , position ] )
str_lastIndexOf(JSContext * cx,unsigned argc,Value * vp)2327 static bool str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp) {
2328   CallArgs args = CallArgsFromVp(argc, vp);
2329 
2330   // Steps 1-2.
2331   RootedString str(cx,
2332                    ToStringForStringFunction(cx, "lastIndexOf", args.thisv()));
2333   if (!str) {
2334     return false;
2335   }
2336 
2337   // Step 3.
2338   RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2339   if (!searchStr) {
2340     return false;
2341   }
2342 
2343   // Step 6.
2344   size_t len = str->length();
2345 
2346   // Step 8.
2347   size_t searchLen = searchStr->length();
2348 
2349   // Steps 4-5, 7.
2350   int start = len - searchLen;  // Start searching here
2351   if (args.hasDefined(1)) {
2352     if (args[1].isInt32()) {
2353       int i = args[1].toInt32();
2354       if (i <= 0) {
2355         start = 0;
2356       } else if (i < start) {
2357         start = i;
2358       }
2359     } else {
2360       double d;
2361       if (!ToNumber(cx, args[1], &d)) {
2362         return false;
2363       }
2364       if (!IsNaN(d)) {
2365         d = JS::ToInteger(d);
2366         if (d <= 0) {
2367           start = 0;
2368         } else if (d < start) {
2369           start = int(d);
2370         }
2371       }
2372     }
2373   }
2374 
2375   if (str == searchStr) {
2376     args.rval().setInt32(0);
2377     return true;
2378   }
2379 
2380   if (searchLen > len) {
2381     args.rval().setInt32(-1);
2382     return true;
2383   }
2384 
2385   if (searchLen == 0) {
2386     args.rval().setInt32(start);
2387     return true;
2388   }
2389   MOZ_ASSERT(0 <= start && size_t(start) < len);
2390 
2391   JSLinearString* text = str->ensureLinear(cx);
2392   if (!text) {
2393     return false;
2394   }
2395 
2396   // Step 9.
2397   int32_t res;
2398   AutoCheckCannotGC nogc;
2399   if (text->hasLatin1Chars()) {
2400     const Latin1Char* textChars = text->latin1Chars(nogc);
2401     if (searchStr->hasLatin1Chars()) {
2402       res = LastIndexOfImpl(textChars, len, searchStr->latin1Chars(nogc),
2403                             searchLen, start);
2404     } else {
2405       res = LastIndexOfImpl(textChars, len, searchStr->twoByteChars(nogc),
2406                             searchLen, start);
2407     }
2408   } else {
2409     const char16_t* textChars = text->twoByteChars(nogc);
2410     if (searchStr->hasLatin1Chars()) {
2411       res = LastIndexOfImpl(textChars, len, searchStr->latin1Chars(nogc),
2412                             searchLen, start);
2413     } else {
2414       res = LastIndexOfImpl(textChars, len, searchStr->twoByteChars(nogc),
2415                             searchLen, start);
2416     }
2417   }
2418 
2419   args.rval().setInt32(res);
2420   return true;
2421 }
2422 
2423 // ES2018 draft rev de77aaeffce115deaf948ed30c7dbe4c60983c0c
2424 // 21.1.3.20 String.prototype.startsWith ( searchString [ , position ] )
str_startsWith(JSContext * cx,unsigned argc,Value * vp)2425 bool js::str_startsWith(JSContext* cx, unsigned argc, Value* vp) {
2426   CallArgs args = CallArgsFromVp(argc, vp);
2427 
2428   // Steps 1-2.
2429   RootedString str(cx,
2430                    ToStringForStringFunction(cx, "startsWith", args.thisv()));
2431   if (!str) {
2432     return false;
2433   }
2434 
2435   // Steps 3-4.
2436   if (!ReportErrorIfFirstArgIsRegExp(cx, args)) {
2437     return false;
2438   }
2439 
2440   // Step 5.
2441   RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2442   if (!searchStr) {
2443     return false;
2444   }
2445 
2446   // Step 6.
2447   uint32_t pos = 0;
2448   if (args.hasDefined(1)) {
2449     if (args[1].isInt32()) {
2450       int i = args[1].toInt32();
2451       pos = (i < 0) ? 0U : uint32_t(i);
2452     } else {
2453       double d;
2454       if (!ToInteger(cx, args[1], &d)) {
2455         return false;
2456       }
2457       pos = uint32_t(std::min(std::max(d, 0.0), double(UINT32_MAX)));
2458     }
2459   }
2460 
2461   // Step 7.
2462   uint32_t textLen = str->length();
2463 
2464   // Step 8.
2465   uint32_t start = std::min(pos, textLen);
2466 
2467   // Step 9.
2468   uint32_t searchLen = searchStr->length();
2469 
2470   // Step 10.
2471   if (searchLen + start < searchLen || searchLen + start > textLen) {
2472     args.rval().setBoolean(false);
2473     return true;
2474   }
2475 
2476   // Steps 11-12.
2477   JSLinearString* text = str->ensureLinear(cx);
2478   if (!text) {
2479     return false;
2480   }
2481 
2482   args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
2483   return true;
2484 }
2485 
2486 // ES2018 draft rev de77aaeffce115deaf948ed30c7dbe4c60983c0c
2487 // 21.1.3.6 String.prototype.endsWith ( searchString [ , endPosition ] )
str_endsWith(JSContext * cx,unsigned argc,Value * vp)2488 bool js::str_endsWith(JSContext* cx, unsigned argc, Value* vp) {
2489   CallArgs args = CallArgsFromVp(argc, vp);
2490 
2491   // Steps 1-2.
2492   RootedString str(cx, ToStringForStringFunction(cx, "endsWith", args.thisv()));
2493   if (!str) {
2494     return false;
2495   }
2496 
2497   // Steps 3-4.
2498   if (!ReportErrorIfFirstArgIsRegExp(cx, args)) {
2499     return false;
2500   }
2501 
2502   // Step 5.
2503   RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2504   if (!searchStr) {
2505     return false;
2506   }
2507 
2508   // Step 6.
2509   uint32_t textLen = str->length();
2510 
2511   // Step 7.
2512   uint32_t pos = textLen;
2513   if (args.hasDefined(1)) {
2514     if (args[1].isInt32()) {
2515       int i = args[1].toInt32();
2516       pos = (i < 0) ? 0U : uint32_t(i);
2517     } else {
2518       double d;
2519       if (!ToInteger(cx, args[1], &d)) {
2520         return false;
2521       }
2522       pos = uint32_t(std::min(std::max(d, 0.0), double(UINT32_MAX)));
2523     }
2524   }
2525 
2526   // Step 8.
2527   uint32_t end = std::min(pos, textLen);
2528 
2529   // Step 9.
2530   uint32_t searchLen = searchStr->length();
2531 
2532   // Step 11 (reordered).
2533   if (searchLen > end) {
2534     args.rval().setBoolean(false);
2535     return true;
2536   }
2537 
2538   // Step 10.
2539   uint32_t start = end - searchLen;
2540 
2541   // Steps 12-13.
2542   JSLinearString* text = str->ensureLinear(cx);
2543   if (!text) {
2544     return false;
2545   }
2546 
2547   args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
2548   return true;
2549 }
2550 
2551 template <typename CharT>
TrimString(const CharT * chars,bool trimStart,bool trimEnd,size_t length,size_t * pBegin,size_t * pEnd)2552 static void TrimString(const CharT* chars, bool trimStart, bool trimEnd,
2553                        size_t length, size_t* pBegin, size_t* pEnd) {
2554   size_t begin = 0, end = length;
2555 
2556   if (trimStart) {
2557     while (begin < length && unicode::IsSpace(chars[begin])) {
2558       ++begin;
2559     }
2560   }
2561 
2562   if (trimEnd) {
2563     while (end > begin && unicode::IsSpace(chars[end - 1])) {
2564       --end;
2565     }
2566   }
2567 
2568   *pBegin = begin;
2569   *pEnd = end;
2570 }
2571 
TrimString(JSContext * cx,const CallArgs & args,const char * funName,bool trimStart,bool trimEnd)2572 static bool TrimString(JSContext* cx, const CallArgs& args, const char* funName,
2573                        bool trimStart, bool trimEnd) {
2574   JSString* str = ToStringForStringFunction(cx, funName, args.thisv());
2575   if (!str) {
2576     return false;
2577   }
2578 
2579   JSLinearString* linear = str->ensureLinear(cx);
2580   if (!linear) {
2581     return false;
2582   }
2583 
2584   size_t length = linear->length();
2585   size_t begin, end;
2586   if (linear->hasLatin1Chars()) {
2587     AutoCheckCannotGC nogc;
2588     TrimString(linear->latin1Chars(nogc), trimStart, trimEnd, length, &begin,
2589                &end);
2590   } else {
2591     AutoCheckCannotGC nogc;
2592     TrimString(linear->twoByteChars(nogc), trimStart, trimEnd, length, &begin,
2593                &end);
2594   }
2595 
2596   JSLinearString* result = NewDependentString(cx, linear, begin, end - begin);
2597   if (!result) {
2598     return false;
2599   }
2600 
2601   args.rval().setString(result);
2602   return true;
2603 }
2604 
str_trim(JSContext * cx,unsigned argc,Value * vp)2605 static bool str_trim(JSContext* cx, unsigned argc, Value* vp) {
2606   CallArgs args = CallArgsFromVp(argc, vp);
2607   return TrimString(cx, args, "trim", true, true);
2608 }
2609 
str_trimStart(JSContext * cx,unsigned argc,Value * vp)2610 static bool str_trimStart(JSContext* cx, unsigned argc, Value* vp) {
2611   CallArgs args = CallArgsFromVp(argc, vp);
2612   return TrimString(cx, args, "trimStart", true, false);
2613 }
2614 
str_trimEnd(JSContext * cx,unsigned argc,Value * vp)2615 static bool str_trimEnd(JSContext* cx, unsigned argc, Value* vp) {
2616   CallArgs args = CallArgsFromVp(argc, vp);
2617   return TrimString(cx, args, "trimEnd", false, true);
2618 }
2619 
2620 // Utility for building a rope (lazy concatenation) of strings.
2621 class RopeBuilder {
2622   JSContext* cx;
2623   RootedString res;
2624 
2625   RopeBuilder(const RopeBuilder& other) = delete;
2626   void operator=(const RopeBuilder& other) = delete;
2627 
2628  public:
RopeBuilder(JSContext * cx)2629   explicit RopeBuilder(JSContext* cx)
2630       : cx(cx), res(cx, cx->runtime()->emptyString) {}
2631 
append(HandleString str)2632   inline bool append(HandleString str) {
2633     res = ConcatStrings<CanGC>(cx, res, str);
2634     return !!res;
2635   }
2636 
result()2637   inline JSString* result() { return res; }
2638 };
2639 
2640 namespace {
2641 
2642 template <typename CharT>
FindDollarIndex(const CharT * chars,size_t length)2643 static uint32_t FindDollarIndex(const CharT* chars, size_t length) {
2644   if (const CharT* p = js_strchr_limit(chars, '$', chars + length)) {
2645     uint32_t dollarIndex = p - chars;
2646     MOZ_ASSERT(dollarIndex < length);
2647     return dollarIndex;
2648   }
2649   return UINT32_MAX;
2650 }
2651 
2652 } /* anonymous namespace */
2653 
2654 /*
2655  * Constructs a result string that looks like:
2656  *
2657  *      newstring = string[:matchStart] + repstr + string[matchEnd:]
2658  */
BuildFlatReplacement(JSContext * cx,HandleString textstr,HandleLinearString repstr,size_t matchStart,size_t patternLength)2659 static JSString* BuildFlatReplacement(JSContext* cx, HandleString textstr,
2660                                       HandleLinearString repstr,
2661                                       size_t matchStart, size_t patternLength) {
2662   size_t matchEnd = matchStart + patternLength;
2663 
2664   RootedString resultStr(cx, NewDependentString(cx, textstr, 0, matchStart));
2665   if (!resultStr) {
2666     return nullptr;
2667   }
2668 
2669   resultStr = ConcatStrings<CanGC>(cx, resultStr, repstr);
2670   if (!resultStr) {
2671     return nullptr;
2672   }
2673 
2674   MOZ_ASSERT(textstr->length() >= matchEnd);
2675   RootedString rest(cx, NewDependentString(cx, textstr, matchEnd,
2676                                            textstr->length() - matchEnd));
2677   if (!rest) {
2678     return nullptr;
2679   }
2680 
2681   return ConcatStrings<CanGC>(cx, resultStr, rest);
2682 }
2683 
BuildFlatRopeReplacement(JSContext * cx,HandleString textstr,HandleLinearString repstr,size_t match,size_t patternLength)2684 static JSString* BuildFlatRopeReplacement(JSContext* cx, HandleString textstr,
2685                                           HandleLinearString repstr,
2686                                           size_t match, size_t patternLength) {
2687   MOZ_ASSERT(textstr->isRope());
2688 
2689   size_t matchEnd = match + patternLength;
2690 
2691   /*
2692    * If we are replacing over a rope, avoid flattening it by iterating
2693    * through it, building a new rope.
2694    */
2695   StringSegmentRange r(cx);
2696   if (!r.init(textstr)) {
2697     return nullptr;
2698   }
2699 
2700   RopeBuilder builder(cx);
2701 
2702   /*
2703    * Special case when the pattern string is '', which matches to the
2704    * head of the string and doesn't overlap with any component of the rope.
2705    */
2706   if (patternLength == 0) {
2707     MOZ_ASSERT(match == 0);
2708     if (!builder.append(repstr)) {
2709       return nullptr;
2710     }
2711   }
2712 
2713   size_t pos = 0;
2714   while (!r.empty()) {
2715     RootedString str(cx, r.front());
2716     size_t len = str->length();
2717     size_t strEnd = pos + len;
2718     if (pos < matchEnd && strEnd > match) {
2719       /*
2720        * We need to special-case any part of the rope that overlaps
2721        * with the replacement string.
2722        */
2723       if (match >= pos) {
2724         /*
2725          * If this part of the rope overlaps with the left side of
2726          * the pattern, then it must be the only one to overlap with
2727          * the first character in the pattern, so we include the
2728          * replacement string here.
2729          */
2730         RootedString leftSide(cx, NewDependentString(cx, str, 0, match - pos));
2731         if (!leftSide || !builder.append(leftSide) || !builder.append(repstr)) {
2732           return nullptr;
2733         }
2734       }
2735 
2736       /*
2737        * If str runs off the end of the matched string, append the
2738        * last part of str.
2739        */
2740       if (strEnd > matchEnd) {
2741         RootedString rightSide(
2742             cx, NewDependentString(cx, str, matchEnd - pos, strEnd - matchEnd));
2743         if (!rightSide || !builder.append(rightSide)) {
2744           return nullptr;
2745         }
2746       }
2747     } else {
2748       if (!builder.append(str)) {
2749         return nullptr;
2750       }
2751     }
2752     pos += str->length();
2753     if (!r.popFront()) {
2754       return nullptr;
2755     }
2756   }
2757 
2758   return builder.result();
2759 }
2760 
2761 template <typename CharT>
AppendDollarReplacement(StringBuffer & newReplaceChars,size_t firstDollarIndex,size_t matchStart,size_t matchLimit,JSLinearString * text,const CharT * repChars,size_t repLength)2762 static bool AppendDollarReplacement(StringBuffer& newReplaceChars,
2763                                     size_t firstDollarIndex, size_t matchStart,
2764                                     size_t matchLimit, JSLinearString* text,
2765                                     const CharT* repChars, size_t repLength) {
2766   MOZ_ASSERT(firstDollarIndex < repLength);
2767   MOZ_ASSERT(matchStart <= matchLimit);
2768   MOZ_ASSERT(matchLimit <= text->length());
2769 
2770   // Move the pre-dollar chunk in bulk.
2771   if (!newReplaceChars.append(repChars, firstDollarIndex)) {
2772     return false;
2773   }
2774 
2775   // Move the rest char-by-char, interpreting dollars as we encounter them.
2776   const CharT* repLimit = repChars + repLength;
2777   for (const CharT* it = repChars + firstDollarIndex; it < repLimit; ++it) {
2778     if (*it != '$' || it == repLimit - 1) {
2779       if (!newReplaceChars.append(*it)) {
2780         return false;
2781       }
2782       continue;
2783     }
2784 
2785     switch (*(it + 1)) {
2786       case '$':
2787         // Eat one of the dollars.
2788         if (!newReplaceChars.append(*it)) {
2789           return false;
2790         }
2791         break;
2792       case '&':
2793         if (!newReplaceChars.appendSubstring(text, matchStart,
2794                                              matchLimit - matchStart)) {
2795           return false;
2796         }
2797         break;
2798       case '`':
2799         if (!newReplaceChars.appendSubstring(text, 0, matchStart)) {
2800           return false;
2801         }
2802         break;
2803       case '\'':
2804         if (!newReplaceChars.appendSubstring(text, matchLimit,
2805                                              text->length() - matchLimit)) {
2806           return false;
2807         }
2808         break;
2809       default:
2810         // The dollar we saw was not special (no matter what its mother told
2811         // it).
2812         if (!newReplaceChars.append(*it)) {
2813           return false;
2814         }
2815         continue;
2816     }
2817     ++it;  // We always eat an extra char in the above switch.
2818   }
2819 
2820   return true;
2821 }
2822 
2823 /*
2824  * Perform a linear-scan dollar substitution on the replacement text.
2825  */
InterpretDollarReplacement(JSContext * cx,HandleString textstrArg,HandleLinearString repstr,uint32_t firstDollarIndex,size_t matchStart,size_t patternLength)2826 static JSLinearString* InterpretDollarReplacement(
2827     JSContext* cx, HandleString textstrArg, HandleLinearString repstr,
2828     uint32_t firstDollarIndex, size_t matchStart, size_t patternLength) {
2829   RootedLinearString textstr(cx, textstrArg->ensureLinear(cx));
2830   if (!textstr) {
2831     return nullptr;
2832   }
2833 
2834   size_t matchLimit = matchStart + patternLength;
2835 
2836   /*
2837    * Most probably:
2838    *
2839    *      len(newstr) >= len(orig) - len(match) + len(replacement)
2840    *
2841    * Note that dollar vars _could_ make the resulting text smaller than this.
2842    */
2843   JSStringBuilder newReplaceChars(cx);
2844   if (repstr->hasTwoByteChars() && !newReplaceChars.ensureTwoByteChars()) {
2845     return nullptr;
2846   }
2847 
2848   if (!newReplaceChars.reserve(textstr->length() - patternLength +
2849                                repstr->length())) {
2850     return nullptr;
2851   }
2852 
2853   bool res;
2854   if (repstr->hasLatin1Chars()) {
2855     AutoCheckCannotGC nogc;
2856     res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, matchStart,
2857                                   matchLimit, textstr,
2858                                   repstr->latin1Chars(nogc), repstr->length());
2859   } else {
2860     AutoCheckCannotGC nogc;
2861     res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, matchStart,
2862                                   matchLimit, textstr,
2863                                   repstr->twoByteChars(nogc), repstr->length());
2864   }
2865   if (!res) {
2866     return nullptr;
2867   }
2868 
2869   return newReplaceChars.finishString();
2870 }
2871 
2872 template <typename StrChar, typename RepChar>
StrFlatReplaceGlobal(JSContext * cx,JSLinearString * str,JSLinearString * pat,JSLinearString * rep,StringBuffer & sb)2873 static bool StrFlatReplaceGlobal(JSContext* cx, JSLinearString* str,
2874                                  JSLinearString* pat, JSLinearString* rep,
2875                                  StringBuffer& sb) {
2876   MOZ_ASSERT(str->length() > 0);
2877 
2878   AutoCheckCannotGC nogc;
2879   const StrChar* strChars = str->chars<StrChar>(nogc);
2880   const RepChar* repChars = rep->chars<RepChar>(nogc);
2881 
2882   // The pattern is empty, so we interleave the replacement string in-between
2883   // each character.
2884   if (!pat->length()) {
2885     CheckedInt<uint32_t> strLength(str->length());
2886     CheckedInt<uint32_t> repLength(rep->length());
2887     CheckedInt<uint32_t> length = repLength * (strLength - 1) + strLength;
2888     if (!length.isValid()) {
2889       ReportAllocationOverflow(cx);
2890       return false;
2891     }
2892 
2893     if (!sb.reserve(length.value())) {
2894       return false;
2895     }
2896 
2897     for (unsigned i = 0; i < str->length() - 1; ++i, ++strChars) {
2898       sb.infallibleAppend(*strChars);
2899       sb.infallibleAppend(repChars, rep->length());
2900     }
2901     sb.infallibleAppend(*strChars);
2902     return true;
2903   }
2904 
2905   // If it's true, we are sure that the result's length is, at least, the same
2906   // length as |str->length()|.
2907   if (rep->length() >= pat->length()) {
2908     if (!sb.reserve(str->length())) {
2909       return false;
2910     }
2911   }
2912 
2913   uint32_t start = 0;
2914   for (;;) {
2915     int match = StringMatch(str, pat, start);
2916     if (match < 0) {
2917       break;
2918     }
2919     if (!sb.append(strChars + start, match - start)) {
2920       return false;
2921     }
2922     if (!sb.append(repChars, rep->length())) {
2923       return false;
2924     }
2925     start = match + pat->length();
2926   }
2927 
2928   if (!sb.append(strChars + start, str->length() - start)) {
2929     return false;
2930   }
2931 
2932   return true;
2933 }
2934 
2935 // This is identical to "str.split(pattern).join(replacement)" except that we
2936 // do some deforestation optimization in Ion.
StringFlatReplaceString(JSContext * cx,HandleString string,HandleString pattern,HandleString replacement)2937 JSString* js::StringFlatReplaceString(JSContext* cx, HandleString string,
2938                                       HandleString pattern,
2939                                       HandleString replacement) {
2940   MOZ_ASSERT(string);
2941   MOZ_ASSERT(pattern);
2942   MOZ_ASSERT(replacement);
2943 
2944   if (!string->length()) {
2945     return string;
2946   }
2947 
2948   RootedLinearString linearRepl(cx, replacement->ensureLinear(cx));
2949   if (!linearRepl) {
2950     return nullptr;
2951   }
2952 
2953   RootedLinearString linearPat(cx, pattern->ensureLinear(cx));
2954   if (!linearPat) {
2955     return nullptr;
2956   }
2957 
2958   RootedLinearString linearStr(cx, string->ensureLinear(cx));
2959   if (!linearStr) {
2960     return nullptr;
2961   }
2962 
2963   JSStringBuilder sb(cx);
2964   if (linearStr->hasTwoByteChars()) {
2965     if (!sb.ensureTwoByteChars()) {
2966       return nullptr;
2967     }
2968     if (linearRepl->hasTwoByteChars()) {
2969       if (!StrFlatReplaceGlobal<char16_t, char16_t>(cx, linearStr, linearPat,
2970                                                     linearRepl, sb)) {
2971         return nullptr;
2972       }
2973     } else {
2974       if (!StrFlatReplaceGlobal<char16_t, Latin1Char>(cx, linearStr, linearPat,
2975                                                       linearRepl, sb)) {
2976         return nullptr;
2977       }
2978     }
2979   } else {
2980     if (linearRepl->hasTwoByteChars()) {
2981       if (!sb.ensureTwoByteChars()) {
2982         return nullptr;
2983       }
2984       if (!StrFlatReplaceGlobal<Latin1Char, char16_t>(cx, linearStr, linearPat,
2985                                                       linearRepl, sb)) {
2986         return nullptr;
2987       }
2988     } else {
2989       if (!StrFlatReplaceGlobal<Latin1Char, Latin1Char>(
2990               cx, linearStr, linearPat, linearRepl, sb)) {
2991         return nullptr;
2992       }
2993     }
2994   }
2995 
2996   return sb.finishString();
2997 }
2998 
str_replace_string_raw(JSContext * cx,HandleString string,HandleString pattern,HandleString replacement)2999 JSString* js::str_replace_string_raw(JSContext* cx, HandleString string,
3000                                      HandleString pattern,
3001                                      HandleString replacement) {
3002   RootedLinearString repl(cx, replacement->ensureLinear(cx));
3003   if (!repl) {
3004     return nullptr;
3005   }
3006 
3007   RootedLinearString pat(cx, pattern->ensureLinear(cx));
3008   if (!pat) {
3009     return nullptr;
3010   }
3011 
3012   size_t patternLength = pat->length();
3013   int32_t match;
3014   uint32_t dollarIndex;
3015 
3016   {
3017     AutoCheckCannotGC nogc;
3018     dollarIndex =
3019         repl->hasLatin1Chars()
3020             ? FindDollarIndex(repl->latin1Chars(nogc), repl->length())
3021             : FindDollarIndex(repl->twoByteChars(nogc), repl->length());
3022   }
3023 
3024   /*
3025    * |string| could be a rope, so we want to avoid flattening it for as
3026    * long as possible.
3027    */
3028   if (string->isRope()) {
3029     if (!RopeMatch(cx, &string->asRope(), pat, &match)) {
3030       return nullptr;
3031     }
3032   } else {
3033     match = StringMatch(&string->asLinear(), pat, 0);
3034   }
3035 
3036   if (match < 0) {
3037     return string;
3038   }
3039 
3040   if (dollarIndex != UINT32_MAX) {
3041     repl = InterpretDollarReplacement(cx, string, repl, dollarIndex, match,
3042                                       patternLength);
3043     if (!repl) {
3044       return nullptr;
3045     }
3046   } else if (string->isRope()) {
3047     return BuildFlatRopeReplacement(cx, string, repl, match, patternLength);
3048   }
3049   return BuildFlatReplacement(cx, string, repl, match, patternLength);
3050 }
3051 
3052 // https://tc39.es/proposal-string-replaceall/#sec-string.prototype.replaceall
3053 // Steps 7-16 when functionalReplace is false and searchString is not empty.
3054 //
3055 // The steps are quite different, for performance. Loops in steps 11 and 14
3056 // are fused. GetSubstitution is optimized away when possible.
3057 template <typename StrChar, typename RepChar>
ReplaceAll(JSContext * cx,JSLinearString * string,JSLinearString * searchString,JSLinearString * replaceString)3058 static JSString* ReplaceAll(JSContext* cx, JSLinearString* string,
3059                             JSLinearString* searchString,
3060                             JSLinearString* replaceString) {
3061   // Step 7.
3062   const size_t stringLength = string->length();
3063   const size_t searchLength = searchString->length();
3064   const size_t replaceLength = replaceString->length();
3065 
3066   MOZ_ASSERT(stringLength > 0);
3067   MOZ_ASSERT(searchLength > 0);
3068   MOZ_ASSERT(stringLength >= searchLength);
3069 
3070   // Step 8 (advanceBy is equal to searchLength when searchLength > 0).
3071 
3072   // Step 9 (not needed in this implementation).
3073 
3074   // Step 10.
3075   // Find the first match.
3076   int32_t position = StringMatch(string, searchString, 0);
3077 
3078   // Nothing to replace, so return early.
3079   if (position < 0) {
3080     return string;
3081   }
3082 
3083   // Step 11 (moved below).
3084 
3085   // Step 12.
3086   uint32_t endOfLastMatch = 0;
3087 
3088   // Step 13.
3089   JSStringBuilder result(cx);
3090   if constexpr (std::is_same_v<StrChar, char16_t> ||
3091                 std::is_same_v<RepChar, char16_t>) {
3092     if (!result.ensureTwoByteChars()) {
3093       return nullptr;
3094     }
3095   }
3096 
3097   {
3098     AutoCheckCannotGC nogc;
3099     const StrChar* strChars = string->chars<StrChar>(nogc);
3100     const RepChar* repChars = replaceString->chars<RepChar>(nogc);
3101 
3102     uint32_t dollarIndex = FindDollarIndex(repChars, replaceLength);
3103 
3104     // If it's true, we are sure that the result's length is, at least, the same
3105     // length as |str->length()|.
3106     if (replaceLength >= searchLength) {
3107       if (!result.reserve(stringLength)) {
3108         return nullptr;
3109       }
3110     }
3111 
3112     do {
3113       // Step 14.c.
3114       // Append the substring before the current match.
3115       if (!result.append(strChars + endOfLastMatch,
3116                          position - endOfLastMatch)) {
3117         return nullptr;
3118       }
3119 
3120       // Steps 14.a-b and 14.d.
3121       // Append the replacement.
3122       if (dollarIndex != UINT32_MAX) {
3123         size_t matchLimit = position + searchLength;
3124         if (!AppendDollarReplacement(result, dollarIndex, position, matchLimit,
3125                                      string, repChars, replaceLength)) {
3126           return nullptr;
3127         }
3128       } else {
3129         if (!result.append(repChars, replaceLength)) {
3130           return nullptr;
3131         }
3132       }
3133 
3134       // Step 14.e.
3135       endOfLastMatch = position + searchLength;
3136 
3137       // Step 11.
3138       // Find the next match.
3139       position = StringMatch(string, searchString, endOfLastMatch);
3140     } while (position >= 0);
3141 
3142     // Step 15.
3143     // Append the substring after the last match.
3144     if (!result.append(strChars + endOfLastMatch,
3145                        stringLength - endOfLastMatch)) {
3146       return nullptr;
3147     }
3148   }
3149 
3150   // Step 16.
3151   return result.finishString();
3152 }
3153 
3154 // https://tc39.es/proposal-string-replaceall/#sec-string.prototype.replaceall
3155 // Steps 7-16 when functionalReplace is false and searchString is the empty
3156 // string.
3157 //
3158 // The steps are quite different, for performance. Loops in steps 11 and 14
3159 // are fused. GetSubstitution is optimized away when possible.
3160 template <typename StrChar, typename RepChar>
ReplaceAllInterleave(JSContext * cx,JSLinearString * string,JSLinearString * replaceString)3161 static JSString* ReplaceAllInterleave(JSContext* cx, JSLinearString* string,
3162                                       JSLinearString* replaceString) {
3163   // Step 7.
3164   const size_t stringLength = string->length();
3165   const size_t replaceLength = replaceString->length();
3166 
3167   // Step 8 (advanceBy is 1 when searchString is the empty string).
3168 
3169   // Steps 9-12 (trivial when searchString is the empty string).
3170 
3171   // Step 13.
3172   JSStringBuilder result(cx);
3173   if constexpr (std::is_same_v<StrChar, char16_t> ||
3174                 std::is_same_v<RepChar, char16_t>) {
3175     if (!result.ensureTwoByteChars()) {
3176       return nullptr;
3177     }
3178   }
3179 
3180   {
3181     AutoCheckCannotGC nogc;
3182     const StrChar* strChars = string->chars<StrChar>(nogc);
3183     const RepChar* repChars = replaceString->chars<RepChar>(nogc);
3184 
3185     uint32_t dollarIndex = FindDollarIndex(repChars, replaceLength);
3186 
3187     if (dollarIndex != UINT32_MAX) {
3188       if (!result.reserve(stringLength)) {
3189         return nullptr;
3190       }
3191     } else {
3192       // Compute the exact result length when no substitutions take place.
3193       CheckedInt<uint32_t> strLength(stringLength);
3194       CheckedInt<uint32_t> repLength(replaceLength);
3195       CheckedInt<uint32_t> length = strLength + (strLength + 1) * repLength;
3196       if (!length.isValid()) {
3197         ReportAllocationOverflow(cx);
3198         return nullptr;
3199       }
3200 
3201       if (!result.reserve(length.value())) {
3202         return nullptr;
3203       }
3204     }
3205 
3206     auto appendReplacement = [&](size_t match) {
3207       if (dollarIndex != UINT32_MAX) {
3208         return AppendDollarReplacement(result, dollarIndex, match, match,
3209                                        string, repChars, replaceLength);
3210       }
3211       return result.append(repChars, replaceLength);
3212     };
3213 
3214     for (size_t index = 0; index < stringLength; index++) {
3215       // Steps 11, 14.a-b and 14.d.
3216       // The empty string matches before each character.
3217       if (!appendReplacement(index)) {
3218         return nullptr;
3219       }
3220 
3221       // Step 14.c.
3222       if (!result.append(strChars[index])) {
3223         return nullptr;
3224       }
3225     }
3226 
3227     // Steps 11, 14.a-b and 14.d.
3228     // The empty string also matches at the end of the string.
3229     if (!appendReplacement(stringLength)) {
3230       return nullptr;
3231     }
3232 
3233     // Step 15 (not applicable when searchString is the empty string).
3234   }
3235 
3236   // Step 16.
3237   return result.finishString();
3238 }
3239 
3240 // String.prototype.replaceAll (Stage 3 proposal)
3241 // https://tc39.es/proposal-string-replaceall/
3242 //
3243 // String.prototype.replaceAll ( searchValue, replaceValue )
3244 //
3245 // Steps 7-16 when functionalReplace is false.
str_replaceAll_string_raw(JSContext * cx,HandleString string,HandleString searchString,HandleString replaceString)3246 JSString* js::str_replaceAll_string_raw(JSContext* cx, HandleString string,
3247                                         HandleString searchString,
3248                                         HandleString replaceString) {
3249   const size_t stringLength = string->length();
3250   const size_t searchLength = searchString->length();
3251 
3252   // Directly return when we're guaranteed to find no match.
3253   if (searchLength > stringLength) {
3254     return string;
3255   }
3256 
3257   RootedLinearString str(cx, string->ensureLinear(cx));
3258   if (!str) {
3259     return nullptr;
3260   }
3261 
3262   RootedLinearString repl(cx, replaceString->ensureLinear(cx));
3263   if (!repl) {
3264     return nullptr;
3265   }
3266 
3267   RootedLinearString search(cx, searchString->ensureLinear(cx));
3268   if (!search) {
3269     return nullptr;
3270   }
3271 
3272   // The pattern is empty, so we interleave the replacement string in-between
3273   // each character.
3274   if (searchLength == 0) {
3275     if (str->hasTwoByteChars()) {
3276       if (repl->hasTwoByteChars()) {
3277         return ReplaceAllInterleave<char16_t, char16_t>(cx, str, repl);
3278       }
3279       return ReplaceAllInterleave<char16_t, Latin1Char>(cx, str, repl);
3280     }
3281     if (repl->hasTwoByteChars()) {
3282       return ReplaceAllInterleave<Latin1Char, char16_t>(cx, str, repl);
3283     }
3284     return ReplaceAllInterleave<Latin1Char, Latin1Char>(cx, str, repl);
3285   }
3286 
3287   MOZ_ASSERT(stringLength > 0);
3288 
3289   if (str->hasTwoByteChars()) {
3290     if (repl->hasTwoByteChars()) {
3291       return ReplaceAll<char16_t, char16_t>(cx, str, search, repl);
3292     }
3293     return ReplaceAll<char16_t, Latin1Char>(cx, str, search, repl);
3294   }
3295   if (repl->hasTwoByteChars()) {
3296     return ReplaceAll<Latin1Char, char16_t>(cx, str, search, repl);
3297   }
3298   return ReplaceAll<Latin1Char, Latin1Char>(cx, str, search, repl);
3299 }
3300 
SingleElementStringArray(JSContext * cx,HandleLinearString str)3301 static ArrayObject* SingleElementStringArray(JSContext* cx,
3302                                              HandleLinearString str) {
3303   ArrayObject* array = NewDenseFullyAllocatedArray(cx, 1);
3304   if (!array) {
3305     return nullptr;
3306   }
3307   array->setDenseInitializedLength(1);
3308   array->initDenseElement(0, StringValue(str));
3309   return array;
3310 }
3311 
3312 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
SplitHelper(JSContext * cx,HandleLinearString str,uint32_t limit,HandleLinearString sep)3313 static ArrayObject* SplitHelper(JSContext* cx, HandleLinearString str,
3314                                 uint32_t limit, HandleLinearString sep) {
3315   size_t strLength = str->length();
3316   size_t sepLength = sep->length();
3317   MOZ_ASSERT(sepLength != 0);
3318 
3319   // Step 12.
3320   if (strLength == 0) {
3321     // Step 12.a.
3322     int match = StringMatch(str, sep, 0);
3323 
3324     // Step 12.b.
3325     if (match != -1) {
3326       return NewDenseEmptyArray(cx);
3327     }
3328 
3329     // Steps 12.c-e.
3330     return SingleElementStringArray(cx, str);
3331   }
3332 
3333   // Step 3 (reordered).
3334   RootedValueVector splits(cx);
3335 
3336   // Step 8 (reordered).
3337   size_t lastEndIndex = 0;
3338 
3339   // Step 13.
3340   size_t index = 0;
3341 
3342   // Step 14.
3343   while (index != strLength) {
3344     // Step 14.a.
3345     int match = StringMatch(str, sep, index);
3346 
3347     // Step 14.b.
3348     //
3349     // Our match algorithm differs from the spec in that it returns the
3350     // next index at which a match happens.  If no match happens we're
3351     // done.
3352     //
3353     // But what if the match is at the end of the string (and the string is
3354     // not empty)?  Per 14.c.i this shouldn't be a match, so we have to
3355     // specially exclude it.  Thus this case should hold:
3356     //
3357     //   var a = "abc".split(/\b/);
3358     //   assertEq(a.length, 1);
3359     //   assertEq(a[0], "abc");
3360     if (match == -1) {
3361       break;
3362     }
3363 
3364     // Step 14.c.
3365     size_t endIndex = match + sepLength;
3366 
3367     // Step 14.c.i.
3368     if (endIndex == lastEndIndex) {
3369       index++;
3370       continue;
3371     }
3372 
3373     // Step 14.c.ii.
3374     MOZ_ASSERT(lastEndIndex < endIndex);
3375     MOZ_ASSERT(sepLength <= strLength);
3376     MOZ_ASSERT(lastEndIndex + sepLength <= endIndex);
3377 
3378     // Step 14.c.ii.1.
3379     size_t subLength = size_t(endIndex - sepLength - lastEndIndex);
3380     JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength);
3381 
3382     // Steps 14.c.ii.2-4.
3383     if (!sub || !splits.append(StringValue(sub))) {
3384       return nullptr;
3385     }
3386 
3387     // Step 14.c.ii.5.
3388     if (splits.length() == limit) {
3389       return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3390     }
3391 
3392     // Step 14.c.ii.6.
3393     index = endIndex;
3394 
3395     // Step 14.c.ii.7.
3396     lastEndIndex = index;
3397   }
3398 
3399   // Step 15.
3400   JSString* sub =
3401       NewDependentString(cx, str, lastEndIndex, strLength - lastEndIndex);
3402 
3403   // Steps 16-17.
3404   if (!sub || !splits.append(StringValue(sub))) {
3405     return nullptr;
3406   }
3407 
3408   // Step 18.
3409   return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3410 }
3411 
3412 // Fast-path for splitting a string into a character array via split("").
CharSplitHelper(JSContext * cx,HandleLinearString str,uint32_t limit)3413 static ArrayObject* CharSplitHelper(JSContext* cx, HandleLinearString str,
3414                                     uint32_t limit) {
3415   size_t strLength = str->length();
3416   if (strLength == 0) {
3417     return NewDenseEmptyArray(cx);
3418   }
3419 
3420   js::StaticStrings& staticStrings = cx->staticStrings();
3421   uint32_t resultlen = (limit < strLength ? limit : strLength);
3422   MOZ_ASSERT(limit > 0 && resultlen > 0,
3423              "Neither limit nor strLength is zero, so resultlen is greater "
3424              "than zero.");
3425 
3426   RootedArrayObject splits(cx, NewDenseFullyAllocatedArray(cx, resultlen));
3427   if (!splits) {
3428     return nullptr;
3429   }
3430 
3431   if (str->hasLatin1Chars()) {
3432     splits->setDenseInitializedLength(resultlen);
3433 
3434     JS::AutoCheckCannotGC nogc;
3435     const Latin1Char* latin1Chars = str->latin1Chars(nogc);
3436     for (size_t i = 0; i < resultlen; ++i) {
3437       Latin1Char c = latin1Chars[i];
3438       MOZ_ASSERT(staticStrings.hasUnit(c));
3439       splits->initDenseElement(i, StringValue(staticStrings.getUnit(c)));
3440     }
3441   } else {
3442     splits->ensureDenseInitializedLength(0, resultlen);
3443 
3444     for (size_t i = 0; i < resultlen; ++i) {
3445       JSString* sub = staticStrings.getUnitStringForElement(cx, str, i);
3446       if (!sub) {
3447         return nullptr;
3448       }
3449       splits->initDenseElement(i, StringValue(sub));
3450     }
3451   }
3452 
3453   return splits;
3454 }
3455 
3456 template <typename TextChar>
SplitSingleCharHelper(JSContext * cx,HandleLinearString str,const TextChar * text,uint32_t textLen,char16_t patCh)3457 static MOZ_ALWAYS_INLINE ArrayObject* SplitSingleCharHelper(
3458     JSContext* cx, HandleLinearString str, const TextChar* text,
3459     uint32_t textLen, char16_t patCh) {
3460   // Count the number of occurrences of patCh within text.
3461   uint32_t count = 0;
3462   for (size_t index = 0; index < textLen; index++) {
3463     if (static_cast<char16_t>(text[index]) == patCh) {
3464       count++;
3465     }
3466   }
3467 
3468   // Handle zero-occurrence case - return input string in an array.
3469   if (count == 0) {
3470     return SingleElementStringArray(cx, str);
3471   }
3472 
3473   // Create the result array for the substring values.
3474   RootedArrayObject splits(cx, NewDenseFullyAllocatedArray(cx, count + 1));
3475   if (!splits) {
3476     return nullptr;
3477   }
3478   splits->ensureDenseInitializedLength(0, count + 1);
3479 
3480   // Add substrings.
3481   uint32_t splitsIndex = 0;
3482   size_t lastEndIndex = 0;
3483   for (size_t index = 0; index < textLen; index++) {
3484     if (static_cast<char16_t>(text[index]) == patCh) {
3485       size_t subLength = size_t(index - lastEndIndex);
3486       JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength);
3487       if (!sub) {
3488         return nullptr;
3489       }
3490       splits->initDenseElement(splitsIndex++, StringValue(sub));
3491       lastEndIndex = index + 1;
3492     }
3493   }
3494 
3495   // Add substring for tail of string (after last match).
3496   JSString* sub =
3497       NewDependentString(cx, str, lastEndIndex, textLen - lastEndIndex);
3498   if (!sub) {
3499     return nullptr;
3500   }
3501   splits->initDenseElement(splitsIndex++, StringValue(sub));
3502 
3503   return splits;
3504 }
3505 
3506 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
SplitSingleCharHelper(JSContext * cx,HandleLinearString str,char16_t ch)3507 static ArrayObject* SplitSingleCharHelper(JSContext* cx, HandleLinearString str,
3508                                           char16_t ch) {
3509   // Step 12.
3510   size_t strLength = str->length();
3511 
3512   AutoStableStringChars linearChars(cx);
3513   if (!linearChars.init(cx, str)) {
3514     return nullptr;
3515   }
3516 
3517   if (linearChars.isLatin1()) {
3518     return SplitSingleCharHelper(cx, str, linearChars.latin1Chars(), strLength,
3519                                  ch);
3520   }
3521 
3522   return SplitSingleCharHelper(cx, str, linearChars.twoByteChars(), strLength,
3523                                ch);
3524 }
3525 
3526 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
StringSplitString(JSContext * cx,HandleString str,HandleString sep,uint32_t limit)3527 ArrayObject* js::StringSplitString(JSContext* cx, HandleString str,
3528                                    HandleString sep, uint32_t limit) {
3529   MOZ_ASSERT(limit > 0, "Only called for strictly positive limit.");
3530 
3531   RootedLinearString linearStr(cx, str->ensureLinear(cx));
3532   if (!linearStr) {
3533     return nullptr;
3534   }
3535 
3536   RootedLinearString linearSep(cx, sep->ensureLinear(cx));
3537   if (!linearSep) {
3538     return nullptr;
3539   }
3540 
3541   if (linearSep->length() == 0) {
3542     return CharSplitHelper(cx, linearStr, limit);
3543   }
3544 
3545   if (linearSep->length() == 1 && limit >= static_cast<uint32_t>(INT32_MAX)) {
3546     char16_t ch = linearSep->latin1OrTwoByteChar(0);
3547     return SplitSingleCharHelper(cx, linearStr, ch);
3548   }
3549 
3550   return SplitHelper(cx, linearStr, limit, linearSep);
3551 }
3552 
3553 static const JSFunctionSpec string_methods[] = {
3554     JS_FN(js_toSource_str, str_toSource, 0, 0),
3555 
3556     /* Java-like methods. */
3557     JS_INLINABLE_FN(js_toString_str, str_toString, 0, 0, StringToString),
3558     JS_INLINABLE_FN(js_valueOf_str, str_toString, 0, 0, StringValueOf),
3559     JS_INLINABLE_FN("toLowerCase", str_toLowerCase, 0, 0, StringToLowerCase),
3560     JS_INLINABLE_FN("toUpperCase", str_toUpperCase, 0, 0, StringToUpperCase),
3561     JS_INLINABLE_FN("charAt", str_charAt, 1, 0, StringCharAt),
3562     JS_INLINABLE_FN("charCodeAt", str_charCodeAt, 1, 0, StringCharCodeAt),
3563     JS_SELF_HOSTED_FN("substring", "String_substring", 2, 0),
3564     JS_SELF_HOSTED_FN("padStart", "String_pad_start", 2, 0),
3565     JS_SELF_HOSTED_FN("padEnd", "String_pad_end", 2, 0),
3566     JS_SELF_HOSTED_FN("codePointAt", "String_codePointAt", 1, 0),
3567     JS_FN("includes", str_includes, 1, 0), JS_FN("indexOf", str_indexOf, 1, 0),
3568     JS_FN("lastIndexOf", str_lastIndexOf, 1, 0),
3569     JS_FN("startsWith", str_startsWith, 1, 0),
3570     JS_FN("endsWith", str_endsWith, 1, 0), JS_FN("trim", str_trim, 0, 0),
3571     JS_FN("trimStart", str_trimStart, 0, 0),
3572     JS_FN("trimEnd", str_trimEnd, 0, 0),
3573 #if JS_HAS_INTL_API
3574     JS_SELF_HOSTED_FN("toLocaleLowerCase", "String_toLocaleLowerCase", 0, 0),
3575     JS_SELF_HOSTED_FN("toLocaleUpperCase", "String_toLocaleUpperCase", 0, 0),
3576     JS_SELF_HOSTED_FN("localeCompare", "String_localeCompare", 1, 0),
3577 #else
3578     JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0, 0),
3579     JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0, 0),
3580     JS_FN("localeCompare", str_localeCompare, 1, 0),
3581 #endif
3582     JS_SELF_HOSTED_FN("repeat", "String_repeat", 1, 0),
3583 #if JS_HAS_INTL_API
3584     JS_FN("normalize", str_normalize, 0, 0),
3585 #endif
3586 
3587     /* Perl-ish methods (search is actually Python-esque). */
3588     JS_SELF_HOSTED_FN("match", "String_match", 1, 0),
3589     JS_SELF_HOSTED_FN("matchAll", "String_matchAll", 1, 0),
3590     JS_SELF_HOSTED_FN("search", "String_search", 1, 0),
3591     JS_SELF_HOSTED_FN("replace", "String_replace", 2, 0),
3592     JS_SELF_HOSTED_FN("replaceAll", "String_replaceAll", 2, 0),
3593     JS_SELF_HOSTED_FN("split", "String_split", 2, 0),
3594     JS_SELF_HOSTED_FN("substr", "String_substr", 2, 0),
3595 
3596     /* Python-esque sequence methods. */
3597     JS_SELF_HOSTED_FN("concat", "String_concat", 1, 0),
3598     JS_SELF_HOSTED_FN("slice", "String_slice", 2, 0),
3599 
3600     JS_SELF_HOSTED_FN("at", "String_at", 1, 0),
3601 
3602     /* HTML string methods. */
3603     JS_SELF_HOSTED_FN("bold", "String_bold", 0, 0),
3604     JS_SELF_HOSTED_FN("italics", "String_italics", 0, 0),
3605     JS_SELF_HOSTED_FN("fixed", "String_fixed", 0, 0),
3606     JS_SELF_HOSTED_FN("strike", "String_strike", 0, 0),
3607     JS_SELF_HOSTED_FN("small", "String_small", 0, 0),
3608     JS_SELF_HOSTED_FN("big", "String_big", 0, 0),
3609     JS_SELF_HOSTED_FN("blink", "String_blink", 0, 0),
3610     JS_SELF_HOSTED_FN("sup", "String_sup", 0, 0),
3611     JS_SELF_HOSTED_FN("sub", "String_sub", 0, 0),
3612     JS_SELF_HOSTED_FN("anchor", "String_anchor", 1, 0),
3613     JS_SELF_HOSTED_FN("link", "String_link", 1, 0),
3614     JS_SELF_HOSTED_FN("fontcolor", "String_fontcolor", 1, 0),
3615     JS_SELF_HOSTED_FN("fontsize", "String_fontsize", 1, 0),
3616 
3617     JS_SELF_HOSTED_SYM_FN(iterator, "String_iterator", 0, 0), JS_FS_END};
3618 
3619 // ES6 rev 27 (2014 Aug 24) 21.1.1
StringConstructor(JSContext * cx,unsigned argc,Value * vp)3620 bool js::StringConstructor(JSContext* cx, unsigned argc, Value* vp) {
3621   CallArgs args = CallArgsFromVp(argc, vp);
3622 
3623   RootedString str(cx);
3624   if (args.length() > 0) {
3625     if (!args.isConstructing() && args[0].isSymbol()) {
3626       return js::SymbolDescriptiveString(cx, args[0].toSymbol(), args.rval());
3627     }
3628 
3629     str = ToString<CanGC>(cx, args[0]);
3630     if (!str) {
3631       return false;
3632     }
3633   } else {
3634     str = cx->runtime()->emptyString;
3635   }
3636 
3637   if (args.isConstructing()) {
3638     RootedObject proto(cx);
3639     if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_String, &proto)) {
3640       return false;
3641     }
3642 
3643     StringObject* strobj = StringObject::create(cx, str, proto);
3644     if (!strobj) {
3645       return false;
3646     }
3647     args.rval().setObject(*strobj);
3648     return true;
3649   }
3650 
3651   args.rval().setString(str);
3652   return true;
3653 }
3654 
str_fromCharCode(JSContext * cx,unsigned argc,Value * vp)3655 bool js::str_fromCharCode(JSContext* cx, unsigned argc, Value* vp) {
3656   CallArgs args = CallArgsFromVp(argc, vp);
3657 
3658   MOZ_ASSERT(args.length() <= ARGS_LENGTH_MAX);
3659 
3660   // Optimize the single-char case.
3661   if (args.length() == 1) {
3662     return str_fromCharCode_one_arg(cx, args[0], args.rval());
3663   }
3664 
3665   // Optimize the case where the result will definitely fit in an inline
3666   // string (thin or fat) and so we don't need to malloc the chars. (We could
3667   // cover some cases where args.length() goes up to
3668   // JSFatInlineString::MAX_LENGTH_LATIN1 if we also checked if the chars are
3669   // all Latin-1, but it doesn't seem worth the effort.)
3670   InlineCharBuffer<char16_t> chars;
3671   if (!chars.maybeAlloc(cx, args.length())) {
3672     return false;
3673   }
3674 
3675   char16_t* rawChars = chars.get();
3676   for (unsigned i = 0; i < args.length(); i++) {
3677     uint16_t code;
3678     if (!ToUint16(cx, args[i], &code)) {
3679       return false;
3680     }
3681 
3682     rawChars[i] = char16_t(code);
3683   }
3684 
3685   JSString* str = chars.toString(cx, args.length());
3686   if (!str) {
3687     return false;
3688   }
3689 
3690   args.rval().setString(str);
3691   return true;
3692 }
3693 
CodeUnitToString(JSContext * cx,uint16_t ucode,MutableHandleValue rval)3694 static inline bool CodeUnitToString(JSContext* cx, uint16_t ucode,
3695                                     MutableHandleValue rval) {
3696   if (StaticStrings::hasUnit(ucode)) {
3697     rval.setString(cx->staticStrings().getUnit(ucode));
3698     return true;
3699   }
3700 
3701   char16_t c = char16_t(ucode);
3702   JSString* str = NewStringCopyNDontDeflate<CanGC>(cx, &c, 1);
3703   if (!str) {
3704     return false;
3705   }
3706 
3707   rval.setString(str);
3708   return true;
3709 }
3710 
str_fromCharCode_one_arg(JSContext * cx,HandleValue code,MutableHandleValue rval)3711 bool js::str_fromCharCode_one_arg(JSContext* cx, HandleValue code,
3712                                   MutableHandleValue rval) {
3713   uint16_t ucode;
3714 
3715   if (!ToUint16(cx, code, &ucode)) {
3716     return false;
3717   }
3718 
3719   return CodeUnitToString(cx, ucode, rval);
3720 }
3721 
ToCodePoint(JSContext * cx,HandleValue code,uint32_t * codePoint)3722 static MOZ_ALWAYS_INLINE bool ToCodePoint(JSContext* cx, HandleValue code,
3723                                           uint32_t* codePoint) {
3724   // String.fromCodePoint, Steps 5.a-b.
3725 
3726   // Fast path for the common case - the input is already an int32.
3727   if (code.isInt32()) {
3728     int32_t nextCP = code.toInt32();
3729     if (nextCP >= 0 && nextCP <= int32_t(unicode::NonBMPMax)) {
3730       *codePoint = uint32_t(nextCP);
3731       return true;
3732     }
3733   }
3734 
3735   double nextCP;
3736   if (!ToNumber(cx, code, &nextCP)) {
3737     return false;
3738   }
3739 
3740   // String.fromCodePoint, Steps 5.c-d.
3741   if (JS::ToInteger(nextCP) != nextCP || nextCP < 0 ||
3742       nextCP > unicode::NonBMPMax) {
3743     ToCStringBuf cbuf;
3744     if (const char* numStr = NumberToCString(cx, &cbuf, nextCP)) {
3745       JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
3746                                 JSMSG_NOT_A_CODEPOINT, numStr);
3747     }
3748     return false;
3749   }
3750 
3751   *codePoint = uint32_t(nextCP);
3752   return true;
3753 }
3754 
str_fromCodePoint_one_arg(JSContext * cx,HandleValue code,MutableHandleValue rval)3755 bool js::str_fromCodePoint_one_arg(JSContext* cx, HandleValue code,
3756                                    MutableHandleValue rval) {
3757   // Steps 1-4 (omitted).
3758 
3759   // Steps 5.a-d.
3760   uint32_t codePoint;
3761   if (!ToCodePoint(cx, code, &codePoint)) {
3762     return false;
3763   }
3764 
3765   // Steps 5.e, 6.
3766   if (!unicode::IsSupplementary(codePoint)) {
3767     return CodeUnitToString(cx, uint16_t(codePoint), rval);
3768   }
3769 
3770   char16_t chars[] = {unicode::LeadSurrogate(codePoint),
3771                       unicode::TrailSurrogate(codePoint)};
3772   JSString* str = NewStringCopyNDontDeflate<CanGC>(cx, chars, 2);
3773   if (!str) {
3774     return false;
3775   }
3776 
3777   rval.setString(str);
3778   return true;
3779 }
3780 
str_fromCodePoint_few_args(JSContext * cx,const CallArgs & args)3781 static bool str_fromCodePoint_few_args(JSContext* cx, const CallArgs& args) {
3782   MOZ_ASSERT(args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2);
3783 
3784   // Steps 1-2 (omitted).
3785 
3786   // Step 3.
3787   char16_t elements[JSFatInlineString::MAX_LENGTH_TWO_BYTE];
3788 
3789   // Steps 4-5.
3790   unsigned length = 0;
3791   for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) {
3792     // Steps 5.a-d.
3793     uint32_t codePoint;
3794     if (!ToCodePoint(cx, args[nextIndex], &codePoint)) {
3795       return false;
3796     }
3797 
3798     // Step 5.e.
3799     unicode::UTF16Encode(codePoint, elements, &length);
3800   }
3801 
3802   // Step 6.
3803   JSString* str = NewStringCopyN<CanGC>(cx, elements, length);
3804   if (!str) {
3805     return false;
3806   }
3807 
3808   args.rval().setString(str);
3809   return true;
3810 }
3811 
3812 // ES2017 draft rev 40edb3a95a475c1b251141ac681b8793129d9a6d
3813 // 21.1.2.2 String.fromCodePoint(...codePoints)
str_fromCodePoint(JSContext * cx,unsigned argc,Value * vp)3814 bool js::str_fromCodePoint(JSContext* cx, unsigned argc, Value* vp) {
3815   CallArgs args = CallArgsFromVp(argc, vp);
3816 
3817   // Optimize the single code-point case.
3818   if (args.length() == 1) {
3819     return str_fromCodePoint_one_arg(cx, args[0], args.rval());
3820   }
3821 
3822   // Optimize the case where the result will definitely fit in an inline
3823   // string (thin or fat) and so we don't need to malloc the chars. (We could
3824   // cover some cases where |args.length()| goes up to
3825   // JSFatInlineString::MAX_LENGTH_LATIN1 / 2 if we also checked if the chars
3826   // are all Latin-1, but it doesn't seem worth the effort.)
3827   if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2) {
3828     return str_fromCodePoint_few_args(cx, args);
3829   }
3830 
3831   // Steps 1-2 (omitted).
3832 
3833   // Step 3.
3834   static_assert(
3835       ARGS_LENGTH_MAX < std::numeric_limits<decltype(args.length())>::max() / 2,
3836       "|args.length() * 2| does not overflow");
3837   auto elements = cx->make_pod_arena_array<char16_t>(js::StringBufferArena,
3838                                                      args.length() * 2);
3839   if (!elements) {
3840     return false;
3841   }
3842 
3843   // Steps 4-5.
3844   unsigned length = 0;
3845   for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) {
3846     // Steps 5.a-d.
3847     uint32_t codePoint;
3848     if (!ToCodePoint(cx, args[nextIndex], &codePoint)) {
3849       return false;
3850     }
3851 
3852     // Step 5.e.
3853     unicode::UTF16Encode(codePoint, elements.get(), &length);
3854   }
3855 
3856   // Step 6.
3857   JSString* str = NewString<CanGC>(cx, std::move(elements), length);
3858   if (!str) {
3859     return false;
3860   }
3861 
3862   args.rval().setString(str);
3863   return true;
3864 }
3865 
3866 static const JSFunctionSpec string_static_methods[] = {
3867     JS_INLINABLE_FN("fromCharCode", js::str_fromCharCode, 1, 0,
3868                     StringFromCharCode),
3869     JS_INLINABLE_FN("fromCodePoint", js::str_fromCodePoint, 1, 0,
3870                     StringFromCodePoint),
3871 
3872     JS_SELF_HOSTED_FN("raw", "String_static_raw", 1, 0), JS_FS_END};
3873 
3874 /* static */
assignInitialShape(JSContext * cx,Handle<StringObject * > obj)3875 Shape* StringObject::assignInitialShape(JSContext* cx,
3876                                         Handle<StringObject*> obj) {
3877   MOZ_ASSERT(obj->empty());
3878 
3879   if (!NativeObject::addPropertyInReservedSlot(cx, obj, cx->names().length,
3880                                                LENGTH_SLOT, {})) {
3881     return nullptr;
3882   }
3883 
3884   return obj->shape();
3885 }
3886 
createPrototype(JSContext * cx,JSProtoKey key)3887 JSObject* StringObject::createPrototype(JSContext* cx, JSProtoKey key) {
3888   Rooted<JSString*> empty(cx, cx->runtime()->emptyString);
3889   Rooted<StringObject*> proto(
3890       cx, GlobalObject::createBlankPrototype<StringObject>(cx, cx->global()));
3891   if (!proto) {
3892     return nullptr;
3893   }
3894   if (!StringObject::init(cx, proto, empty)) {
3895     return nullptr;
3896   }
3897   return proto;
3898 }
3899 
StringClassFinish(JSContext * cx,HandleObject ctor,HandleObject proto)3900 static bool StringClassFinish(JSContext* cx, HandleObject ctor,
3901                               HandleObject proto) {
3902   HandleNativeObject nativeProto = proto.as<NativeObject>();
3903 
3904   // Create "trimLeft" as an alias for "trimStart".
3905   RootedValue trimFn(cx);
3906   RootedId trimId(cx, NameToId(cx->names().trimStart));
3907   RootedId trimAliasId(cx, NameToId(cx->names().trimLeft));
3908   if (!NativeGetProperty(cx, nativeProto, trimId, &trimFn) ||
3909       !NativeDefineDataProperty(cx, nativeProto, trimAliasId, trimFn, 0)) {
3910     return false;
3911   }
3912 
3913   // Create "trimRight" as an alias for "trimEnd".
3914   trimId = NameToId(cx->names().trimEnd);
3915   trimAliasId = NameToId(cx->names().trimRight);
3916   if (!NativeGetProperty(cx, nativeProto, trimId, &trimFn) ||
3917       !NativeDefineDataProperty(cx, nativeProto, trimAliasId, trimFn, 0)) {
3918     return false;
3919   }
3920 
3921   /*
3922    * Define escape/unescape, the URI encode/decode functions, and maybe
3923    * uneval on the global object.
3924    */
3925   if (!JS_DefineFunctions(cx, cx->global(), string_functions)) {
3926     return false;
3927   }
3928 
3929   return true;
3930 }
3931 
3932 const ClassSpec StringObject::classSpec_ = {
3933     GenericCreateConstructor<StringConstructor, 1, gc::AllocKind::FUNCTION,
3934                              &jit::JitInfo_String>,
3935     StringObject::createPrototype,
3936     string_static_methods,
3937     nullptr,
3938     string_methods,
3939     nullptr,
3940     StringClassFinish};
3941 
3942 #define ____ false
3943 
3944 /*
3945  * Uri reserved chars + #:
3946  * - 35: #
3947  * - 36: $
3948  * - 38: &
3949  * - 43: +
3950  * - 44: ,
3951  * - 47: /
3952  * - 58: :
3953  * - 59: ;
3954  * - 61: =
3955  * - 63: ?
3956  * - 64: @
3957  */
3958 static const bool js_isUriReservedPlusPound[] = {
3959     // clang-format off
3960 /*       0     1     2     3     4     5     6     7     8     9  */
3961 /*  0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3962 /*  1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3963 /*  2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3964 /*  3 */ ____, ____, ____, ____, ____, true, true, ____, true, ____,
3965 /*  4 */ ____, ____, ____, true, true, ____, ____, true, ____, ____,
3966 /*  5 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true,
3967 /*  6 */ ____, true, ____, true, true, ____, ____, ____, ____, ____,
3968 /*  7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3969 /*  8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3970 /*  9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3971 /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3972 /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3973 /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
3974     // clang-format on
3975 };
3976 
3977 /*
3978  * Uri unescaped chars:
3979  * -      33: !
3980  * -      39: '
3981  * -      40: (
3982  * -      41: )
3983  * -      42: *
3984  * -      45: -
3985  * -      46: .
3986  * -  48..57: 0-9
3987  * -  65..90: A-Z
3988  * -      95: _
3989  * - 97..122: a-z
3990  * -     126: ~
3991  */
3992 static const bool js_isUriUnescaped[] = {
3993     // clang-format off
3994 /*       0     1     2     3     4     5     6     7     8     9  */
3995 /*  0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3996 /*  1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3997 /*  2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3998 /*  3 */ ____, ____, ____, true, ____, ____, ____, ____, ____, true,
3999 /*  4 */ true, true, true, ____, ____, true, true, ____, true, true,
4000 /*  5 */ true, true, true, true, true, true, true, true, ____, ____,
4001 /*  6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
4002 /*  7 */ true, true, true, true, true, true, true, true, true, true,
4003 /*  8 */ true, true, true, true, true, true, true, true, true, true,
4004 /*  9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
4005 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4006 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4007 /* 12 */ true, true, true, ____, ____, ____, true, ____
4008     // clang-format on
4009 };
4010 
4011 #undef ____
4012 
TransferBufferToString(JSStringBuilder & sb,JSString * str,MutableHandleValue rval)4013 static inline bool TransferBufferToString(JSStringBuilder& sb, JSString* str,
4014                                           MutableHandleValue rval) {
4015   if (!sb.empty()) {
4016     str = sb.finishString();
4017     if (!str) {
4018       return false;
4019     }
4020   }
4021   rval.setString(str);
4022   return true;
4023 }
4024 
4025 /*
4026  * ECMA 3, 15.1.3 URI Handling Function Properties
4027  *
4028  * The following are implementations of the algorithms
4029  * given in the ECMA specification for the hidden functions
4030  * 'Encode' and 'Decode'.
4031  */
4032 enum EncodeResult { Encode_Failure, Encode_BadUri, Encode_Success };
4033 
4034 // Bug 1403318: GCC sometimes inlines this Encode function rather than the
4035 // caller Encode function. Annotate both functions with MOZ_NEVER_INLINE resp.
4036 // MOZ_ALWAYS_INLINE to ensure we get the desired inlining behavior.
4037 template <typename CharT>
Encode(StringBuffer & sb,const CharT * chars,size_t length,const bool * unescapedSet)4038 static MOZ_NEVER_INLINE EncodeResult Encode(StringBuffer& sb,
4039                                             const CharT* chars, size_t length,
4040                                             const bool* unescapedSet) {
4041   Latin1Char hexBuf[3];
4042   hexBuf[0] = '%';
4043 
4044   auto appendEncoded = [&sb, &hexBuf](Latin1Char c) {
4045     static const char HexDigits[] = "0123456789ABCDEF"; /* NB: uppercase */
4046 
4047     hexBuf[1] = HexDigits[c >> 4];
4048     hexBuf[2] = HexDigits[c & 0xf];
4049     return sb.append(hexBuf, 3);
4050   };
4051 
4052   auto appendRange = [&sb, chars, length](size_t start, size_t end) {
4053     MOZ_ASSERT(start <= end);
4054 
4055     if (start < end) {
4056       if (start == 0) {
4057         if (!sb.reserve(length)) {
4058           return false;
4059         }
4060       }
4061       return sb.append(chars + start, chars + end);
4062     }
4063     return true;
4064   };
4065 
4066   size_t startAppend = 0;
4067   for (size_t k = 0; k < length; k++) {
4068     CharT c = chars[k];
4069     if (c < 128 &&
4070         (js_isUriUnescaped[c] || (unescapedSet && unescapedSet[c]))) {
4071       continue;
4072     } else {
4073       if (!appendRange(startAppend, k)) {
4074         return Encode_Failure;
4075       }
4076 
4077       if constexpr (std::is_same_v<CharT, Latin1Char>) {
4078         if (c < 0x80) {
4079           if (!appendEncoded(c)) {
4080             return Encode_Failure;
4081           }
4082         } else {
4083           if (!appendEncoded(0xC0 | (c >> 6)) ||
4084               !appendEncoded(0x80 | (c & 0x3F))) {
4085             return Encode_Failure;
4086           }
4087         }
4088       } else {
4089         if (unicode::IsTrailSurrogate(c)) {
4090           return Encode_BadUri;
4091         }
4092 
4093         uint32_t v;
4094         if (!unicode::IsLeadSurrogate(c)) {
4095           v = c;
4096         } else {
4097           k++;
4098           if (k == length) {
4099             return Encode_BadUri;
4100           }
4101 
4102           char16_t c2 = chars[k];
4103           if (!unicode::IsTrailSurrogate(c2)) {
4104             return Encode_BadUri;
4105           }
4106 
4107           v = unicode::UTF16Decode(c, c2);
4108         }
4109 
4110         uint8_t utf8buf[4];
4111         size_t L = OneUcs4ToUtf8Char(utf8buf, v);
4112         for (size_t j = 0; j < L; j++) {
4113           if (!appendEncoded(utf8buf[j])) {
4114             return Encode_Failure;
4115           }
4116         }
4117       }
4118 
4119       startAppend = k + 1;
4120     }
4121   }
4122 
4123   if (startAppend > 0) {
4124     if (!appendRange(startAppend, length)) {
4125       return Encode_Failure;
4126     }
4127   }
4128 
4129   return Encode_Success;
4130 }
4131 
Encode(JSContext * cx,HandleLinearString str,const bool * unescapedSet,MutableHandleValue rval)4132 static MOZ_ALWAYS_INLINE bool Encode(JSContext* cx, HandleLinearString str,
4133                                      const bool* unescapedSet,
4134                                      MutableHandleValue rval) {
4135   size_t length = str->length();
4136   if (length == 0) {
4137     rval.setString(cx->runtime()->emptyString);
4138     return true;
4139   }
4140 
4141   JSStringBuilder sb(cx);
4142 
4143   EncodeResult res;
4144   if (str->hasLatin1Chars()) {
4145     AutoCheckCannotGC nogc;
4146     res = Encode(sb, str->latin1Chars(nogc), str->length(), unescapedSet);
4147   } else {
4148     AutoCheckCannotGC nogc;
4149     res = Encode(sb, str->twoByteChars(nogc), str->length(), unescapedSet);
4150   }
4151 
4152   if (res == Encode_Failure) {
4153     return false;
4154   }
4155 
4156   if (res == Encode_BadUri) {
4157     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
4158     return false;
4159   }
4160 
4161   MOZ_ASSERT(res == Encode_Success);
4162   return TransferBufferToString(sb, str, rval);
4163 }
4164 
4165 enum DecodeResult { Decode_Failure, Decode_BadUri, Decode_Success };
4166 
4167 template <typename CharT>
Decode(StringBuffer & sb,const CharT * chars,size_t length,const bool * reservedSet)4168 static DecodeResult Decode(StringBuffer& sb, const CharT* chars, size_t length,
4169                            const bool* reservedSet) {
4170   auto appendRange = [&sb, chars](size_t start, size_t end) {
4171     MOZ_ASSERT(start <= end);
4172 
4173     if (start < end) {
4174       return sb.append(chars + start, chars + end);
4175     }
4176     return true;
4177   };
4178 
4179   size_t startAppend = 0;
4180   for (size_t k = 0; k < length; k++) {
4181     CharT c = chars[k];
4182     if (c == '%') {
4183       size_t start = k;
4184       if ((k + 2) >= length) {
4185         return Decode_BadUri;
4186       }
4187 
4188       if (!IsAsciiHexDigit(chars[k + 1]) || !IsAsciiHexDigit(chars[k + 2])) {
4189         return Decode_BadUri;
4190       }
4191 
4192       uint32_t B = AsciiAlphanumericToNumber(chars[k + 1]) * 16 +
4193                    AsciiAlphanumericToNumber(chars[k + 2]);
4194       k += 2;
4195       if (B < 128) {
4196         Latin1Char ch = Latin1Char(B);
4197         if (reservedSet && reservedSet[ch]) {
4198           continue;
4199         }
4200 
4201         if (!appendRange(startAppend, start)) {
4202           return Decode_Failure;
4203         }
4204         if (!sb.append(ch)) {
4205           return Decode_Failure;
4206         }
4207       } else {
4208         int n = 1;
4209         while (B & (0x80 >> n)) {
4210           n++;
4211         }
4212 
4213         if (n == 1 || n > 4) {
4214           return Decode_BadUri;
4215         }
4216 
4217         uint8_t octets[4];
4218         octets[0] = (uint8_t)B;
4219         if (k + 3 * (n - 1) >= length) {
4220           return Decode_BadUri;
4221         }
4222 
4223         for (int j = 1; j < n; j++) {
4224           k++;
4225           if (chars[k] != '%') {
4226             return Decode_BadUri;
4227           }
4228 
4229           if (!IsAsciiHexDigit(chars[k + 1]) ||
4230               !IsAsciiHexDigit(chars[k + 2])) {
4231             return Decode_BadUri;
4232           }
4233 
4234           B = AsciiAlphanumericToNumber(chars[k + 1]) * 16 +
4235               AsciiAlphanumericToNumber(chars[k + 2]);
4236           if ((B & 0xC0) != 0x80) {
4237             return Decode_BadUri;
4238           }
4239 
4240           k += 2;
4241           octets[j] = char(B);
4242         }
4243 
4244         if (!appendRange(startAppend, start)) {
4245           return Decode_Failure;
4246         }
4247 
4248         uint32_t v = JS::Utf8ToOneUcs4Char(octets, n);
4249         MOZ_ASSERT(v >= 128);
4250         if (v >= unicode::NonBMPMin) {
4251           if (v > unicode::NonBMPMax) {
4252             return Decode_BadUri;
4253           }
4254 
4255           if (!sb.append(unicode::LeadSurrogate(v))) {
4256             return Decode_Failure;
4257           }
4258           if (!sb.append(unicode::TrailSurrogate(v))) {
4259             return Decode_Failure;
4260           }
4261         } else {
4262           if (!sb.append(char16_t(v))) {
4263             return Decode_Failure;
4264           }
4265         }
4266       }
4267 
4268       startAppend = k + 1;
4269     }
4270   }
4271 
4272   if (startAppend > 0) {
4273     if (!appendRange(startAppend, length)) {
4274       return Decode_Failure;
4275     }
4276   }
4277 
4278   return Decode_Success;
4279 }
4280 
Decode(JSContext * cx,HandleLinearString str,const bool * reservedSet,MutableHandleValue rval)4281 static bool Decode(JSContext* cx, HandleLinearString str,
4282                    const bool* reservedSet, MutableHandleValue rval) {
4283   size_t length = str->length();
4284   if (length == 0) {
4285     rval.setString(cx->runtime()->emptyString);
4286     return true;
4287   }
4288 
4289   JSStringBuilder sb(cx);
4290 
4291   DecodeResult res;
4292   if (str->hasLatin1Chars()) {
4293     AutoCheckCannotGC nogc;
4294     res = Decode(sb, str->latin1Chars(nogc), str->length(), reservedSet);
4295   } else {
4296     AutoCheckCannotGC nogc;
4297     res = Decode(sb, str->twoByteChars(nogc), str->length(), reservedSet);
4298   }
4299 
4300   if (res == Decode_Failure) {
4301     return false;
4302   }
4303 
4304   if (res == Decode_BadUri) {
4305     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
4306     return false;
4307   }
4308 
4309   MOZ_ASSERT(res == Decode_Success);
4310   return TransferBufferToString(sb, str, rval);
4311 }
4312 
str_decodeURI(JSContext * cx,unsigned argc,Value * vp)4313 static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp) {
4314   CallArgs args = CallArgsFromVp(argc, vp);
4315   RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
4316   if (!str) {
4317     return false;
4318   }
4319 
4320   return Decode(cx, str, js_isUriReservedPlusPound, args.rval());
4321 }
4322 
str_decodeURI_Component(JSContext * cx,unsigned argc,Value * vp)4323 static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp) {
4324   CallArgs args = CallArgsFromVp(argc, vp);
4325   RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
4326   if (!str) {
4327     return false;
4328   }
4329 
4330   return Decode(cx, str, nullptr, args.rval());
4331 }
4332 
str_encodeURI(JSContext * cx,unsigned argc,Value * vp)4333 static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp) {
4334   CallArgs args = CallArgsFromVp(argc, vp);
4335   RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
4336   if (!str) {
4337     return false;
4338   }
4339 
4340   return Encode(cx, str, js_isUriReservedPlusPound, args.rval());
4341 }
4342 
str_encodeURI_Component(JSContext * cx,unsigned argc,Value * vp)4343 static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp) {
4344   CallArgs args = CallArgsFromVp(argc, vp);
4345   RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
4346   if (!str) {
4347     return false;
4348   }
4349 
4350   return Encode(cx, str, nullptr, args.rval());
4351 }
4352 
EncodeURI(JSContext * cx,const char * chars,size_t length)4353 JSString* js::EncodeURI(JSContext* cx, const char* chars, size_t length) {
4354   JSStringBuilder sb(cx);
4355   EncodeResult result = Encode(sb, reinterpret_cast<const Latin1Char*>(chars),
4356                                length, js_isUriReservedPlusPound);
4357   if (result == EncodeResult::Encode_Failure) {
4358     return nullptr;
4359   }
4360   if (result == EncodeResult::Encode_BadUri) {
4361     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
4362     return nullptr;
4363   }
4364   if (sb.empty()) {
4365     return NewStringCopyN<CanGC>(cx, chars, length);
4366   }
4367   return sb.finishString();
4368 }
4369 
FlatStringMatchHelper(JSContext * cx,HandleString str,HandleString pattern,bool * isFlat,int32_t * match)4370 static bool FlatStringMatchHelper(JSContext* cx, HandleString str,
4371                                   HandleString pattern, bool* isFlat,
4372                                   int32_t* match) {
4373   RootedLinearString linearPattern(cx, pattern->ensureLinear(cx));
4374   if (!linearPattern) {
4375     return false;
4376   }
4377 
4378   static const size_t MAX_FLAT_PAT_LEN = 256;
4379   if (linearPattern->length() > MAX_FLAT_PAT_LEN ||
4380       StringHasRegExpMetaChars(linearPattern)) {
4381     *isFlat = false;
4382     return true;
4383   }
4384 
4385   *isFlat = true;
4386   if (str->isRope()) {
4387     if (!RopeMatch(cx, &str->asRope(), linearPattern, match)) {
4388       return false;
4389     }
4390   } else {
4391     *match = StringMatch(&str->asLinear(), linearPattern);
4392   }
4393 
4394   return true;
4395 }
4396 
BuildFlatMatchArray(JSContext * cx,HandleString str,HandleString pattern,int32_t match,MutableHandleValue rval)4397 static bool BuildFlatMatchArray(JSContext* cx, HandleString str,
4398                                 HandleString pattern, int32_t match,
4399                                 MutableHandleValue rval) {
4400   if (match < 0) {
4401     rval.setNull();
4402     return true;
4403   }
4404 
4405   // Get the templateObject that defines the shape and type of the output
4406   // object.
4407   ArrayObject* templateObject =
4408       cx->realm()->regExps.getOrCreateMatchResultTemplateObject(cx);
4409   if (!templateObject) {
4410     return false;
4411   }
4412 
4413   RootedArrayObject arr(
4414       cx, NewDenseFullyAllocatedArrayWithTemplate(cx, 1, templateObject));
4415   if (!arr) {
4416     return false;
4417   }
4418 
4419   // Store a Value for each pair.
4420   arr->setDenseInitializedLength(1);
4421   arr->initDenseElement(0, StringValue(pattern));
4422 
4423   // Set the |index| property. (TemplateObject positions it in slot 0).
4424   arr->setSlot(0, Int32Value(match));
4425 
4426   // Set the |input| property. (TemplateObject positions it in slot 1).
4427   arr->setSlot(1, StringValue(str));
4428 
4429 #ifdef DEBUG
4430   RootedValue test(cx);
4431   RootedId id(cx, NameToId(cx->names().index));
4432   if (!NativeGetProperty(cx, arr, id, &test)) {
4433     return false;
4434   }
4435   MOZ_ASSERT(test == arr->getSlot(0));
4436   id = NameToId(cx->names().input);
4437   if (!NativeGetProperty(cx, arr, id, &test)) {
4438     return false;
4439   }
4440   MOZ_ASSERT(test == arr->getSlot(1));
4441 #endif
4442 
4443   rval.setObject(*arr);
4444   return true;
4445 }
4446 
4447 #ifdef DEBUG
CallIsStringOptimizable(JSContext * cx,const char * name,bool * result)4448 static bool CallIsStringOptimizable(JSContext* cx, const char* name,
4449                                     bool* result) {
4450   FixedInvokeArgs<0> args(cx);
4451 
4452   RootedValue rval(cx);
4453   if (!CallSelfHostedFunction(cx, name, UndefinedHandleValue, args, &rval)) {
4454     return false;
4455   }
4456 
4457   *result = rval.toBoolean();
4458   return true;
4459 }
4460 #endif
4461 
FlatStringMatch(JSContext * cx,unsigned argc,Value * vp)4462 bool js::FlatStringMatch(JSContext* cx, unsigned argc, Value* vp) {
4463   CallArgs args = CallArgsFromVp(argc, vp);
4464   MOZ_ASSERT(args.length() == 2);
4465   MOZ_ASSERT(args[0].isString());
4466   MOZ_ASSERT(args[1].isString());
4467 #ifdef DEBUG
4468   bool isOptimizable = false;
4469   if (!CallIsStringOptimizable(cx, "IsStringMatchOptimizable",
4470                                &isOptimizable)) {
4471     return false;
4472   }
4473   MOZ_ASSERT(isOptimizable);
4474 #endif
4475 
4476   RootedString str(cx, args[0].toString());
4477   RootedString pattern(cx, args[1].toString());
4478 
4479   bool isFlat = false;
4480   int32_t match = 0;
4481   if (!FlatStringMatchHelper(cx, str, pattern, &isFlat, &match)) {
4482     return false;
4483   }
4484 
4485   if (!isFlat) {
4486     args.rval().setUndefined();
4487     return true;
4488   }
4489 
4490   return BuildFlatMatchArray(cx, str, pattern, match, args.rval());
4491 }
4492 
FlatStringSearch(JSContext * cx,unsigned argc,Value * vp)4493 bool js::FlatStringSearch(JSContext* cx, unsigned argc, Value* vp) {
4494   CallArgs args = CallArgsFromVp(argc, vp);
4495   MOZ_ASSERT(args.length() == 2);
4496   MOZ_ASSERT(args[0].isString());
4497   MOZ_ASSERT(args[1].isString());
4498 #ifdef DEBUG
4499   bool isOptimizable = false;
4500   if (!CallIsStringOptimizable(cx, "IsStringSearchOptimizable",
4501                                &isOptimizable)) {
4502     return false;
4503   }
4504   MOZ_ASSERT(isOptimizable);
4505 #endif
4506 
4507   RootedString str(cx, args[0].toString());
4508   RootedString pattern(cx, args[1].toString());
4509 
4510   bool isFlat = false;
4511   int32_t match = 0;
4512   if (!FlatStringMatchHelper(cx, str, pattern, &isFlat, &match)) {
4513     return false;
4514   }
4515 
4516   if (!isFlat) {
4517     args.rval().setInt32(-2);
4518     return true;
4519   }
4520 
4521   args.rval().setInt32(match);
4522   return true;
4523 }
4524