1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2  * vim: set ts=8 sts=4 et sw=4 tw=99:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "builtin/String.h"
8 
9 #include "mozilla/ArrayUtils.h"
10 #include "mozilla/Attributes.h"
11 #include "mozilla/CheckedInt.h"
12 #include "mozilla/FloatingPoint.h"
13 #include "mozilla/PodOperations.h"
14 #include "mozilla/Range.h"
15 #include "mozilla/TypeTraits.h"
16 #include "mozilla/Unused.h"
17 
18 #include <ctype.h>
19 #include <limits>
20 #include <string.h>
21 
22 #include "jsapi.h"
23 #include "jsarray.h"
24 #include "jsbool.h"
25 #include "jsnum.h"
26 #include "jstypes.h"
27 #include "jsutil.h"
28 
29 #include "builtin/intl/CommonFunctions.h"
30 #include "builtin/intl/ICUStubs.h"
31 #include "builtin/RegExp.h"
32 #include "jit/InlinableNatives.h"
33 #include "js/Conversions.h"
34 #include "js/UniquePtr.h"
35 #if ENABLE_INTL_API
36 #include "unicode/uchar.h"
37 #include "unicode/unorm2.h"
38 #endif
39 #include "util/StringBuffer.h"
40 #include "util/Unicode.h"
41 #include "vm/BytecodeUtil.h"
42 #include "vm/GlobalObject.h"
43 #include "vm/Interpreter.h"
44 #include "vm/JSAtom.h"
45 #include "vm/JSContext.h"
46 #include "vm/JSObject.h"
47 #include "vm/Opcodes.h"
48 #include "vm/Printer.h"
49 #include "vm/RegExpObject.h"
50 #include "vm/RegExpStatics.h"
51 #include "vm/SelfHosting.h"
52 
53 #include "vm/Interpreter-inl.h"
54 #include "vm/StringObject-inl.h"
55 #include "vm/StringType-inl.h"
56 #include "vm/TypeInference-inl.h"
57 
58 using namespace js;
59 using namespace js::gc;
60 
61 using JS::Symbol;
62 using JS::SymbolCode;
63 
64 using mozilla::CheckedInt;
65 using mozilla::IsNaN;
66 using mozilla::IsSame;
67 using mozilla::PodCopy;
68 using mozilla::RangedPtr;
69 
70 using JS::AutoCheckCannotGC;
71 
ArgToLinearString(JSContext * cx,const CallArgs & args,unsigned argno)72 static JSLinearString* ArgToLinearString(JSContext* cx, const CallArgs& args,
73                                          unsigned argno) {
74   if (argno >= args.length()) return cx->names().undefined;
75 
76   JSString* str = ToString<CanGC>(cx, args[argno]);
77   if (!str) return nullptr;
78 
79   return str->ensureLinear(cx);
80 }
81 
82 template <typename CharT>
83 struct MaximumInlineLength;
84 
85 template <>
86 struct MaximumInlineLength<Latin1Char> {
87   static constexpr size_t value = JSFatInlineString::MAX_LENGTH_LATIN1;
88 };
89 
90 template <>
91 struct MaximumInlineLength<char16_t> {
92   static constexpr size_t value = JSFatInlineString::MAX_LENGTH_TWO_BYTE;
93 };
94 
95 // Character buffer class used for ToLowerCase and ToUpperCase operations, as
96 // well as other string operations where the final string length is known in
97 // advance.
98 //
99 // Case conversion operations normally return a string with the same length as
100 // the input string. To avoid over-allocation, we optimistically allocate an
101 // array with same size as the input string and only when we detect special
102 // casing characters, which can change the output string length, we reallocate
103 // the output buffer to the final string length.
104 //
105 // As a further mean to improve runtime performance, the character buffer
106 // contains an inline storage, so we don't need to heap-allocate an array when
107 // a JSInlineString will be used for the output string.
108 //
109 // Why not use mozilla::Vector instead? mozilla::Vector doesn't provide enough
110 // fine-grained control to avoid over-allocation when (re)allocating for exact
111 // buffer sizes. This led to visible performance regressions in µ-benchmarks.
112 template <typename CharT>
113 class MOZ_NON_PARAM InlineCharBuffer {
114   static constexpr size_t InlineCapacity = MaximumInlineLength<CharT>::value;
115 
116   CharT inlineStorage[InlineCapacity];
117   UniquePtr<CharT[], JS::FreePolicy> heapStorage;
118 
119 #ifdef DEBUG
120   // In debug mode, we keep track of the requested string lengths to ensure
121   // all character buffer methods are called in the correct order and with
122   // the expected argument values.
123   size_t lastRequestedLength = 0;
124 
assertValidRequest(size_t expectedLastLength,size_t length)125   void assertValidRequest(size_t expectedLastLength, size_t length) {
126     MOZ_ASSERT(length >= expectedLastLength, "cannot shrink requested length");
127     MOZ_ASSERT(lastRequestedLength == expectedLastLength);
128     lastRequestedLength = length;
129   }
130 #else
assertValidRequest(size_t expectedLastLength,size_t length)131   void assertValidRequest(size_t expectedLastLength, size_t length) {}
132 #endif
133 
134  public:
get()135   CharT* get() { return heapStorage ? heapStorage.get() : inlineStorage; }
136 
maybeAlloc(JSContext * cx,size_t length)137   bool maybeAlloc(JSContext* cx, size_t length) {
138     assertValidRequest(0, length);
139 
140     if (length <= InlineCapacity) return true;
141 
142     MOZ_ASSERT(!heapStorage, "heap storage already allocated");
143     heapStorage = cx->make_pod_array<CharT>(length + 1);
144     return !!heapStorage;
145   }
146 
maybeRealloc(JSContext * cx,size_t oldLength,size_t newLength)147   bool maybeRealloc(JSContext* cx, size_t oldLength, size_t newLength) {
148     assertValidRequest(oldLength, newLength);
149 
150     if (newLength <= InlineCapacity) return true;
151 
152     if (!heapStorage) {
153       heapStorage = cx->make_pod_array<CharT>(newLength + 1);
154       if (!heapStorage) return false;
155 
156       MOZ_ASSERT(oldLength <= InlineCapacity);
157       PodCopy(heapStorage.get(), inlineStorage, oldLength);
158       return true;
159     }
160 
161     CharT* oldChars = heapStorage.release();
162     CharT* newChars = cx->pod_realloc(oldChars, oldLength + 1, newLength + 1);
163     if (!newChars) {
164       js_free(oldChars);
165       return false;
166     }
167 
168     heapStorage.reset(newChars);
169     return true;
170   }
171 
toStringDontDeflate(JSContext * cx,size_t length)172   JSString* toStringDontDeflate(JSContext* cx, size_t length) {
173     MOZ_ASSERT(length == lastRequestedLength);
174 
175     if (JSInlineString::lengthFits<CharT>(length)) {
176       MOZ_ASSERT(
177           !heapStorage,
178           "expected only inline storage when length fits in inline string");
179 
180       return NewStringCopyNDontDeflate<CanGC>(cx, inlineStorage, length);
181     }
182 
183     MOZ_ASSERT(heapStorage,
184                "heap storage was not allocated for non-inline string");
185 
186     heapStorage.get()[length] = '\0';  // Null-terminate
187     JSString* res = NewStringDontDeflate<CanGC>(cx, heapStorage.get(), length);
188     if (!res) return nullptr;
189 
190     mozilla::Unused << heapStorage.release();
191     return res;
192   }
193 
toString(JSContext * cx,size_t length)194   JSString* toString(JSContext* cx, size_t length) {
195     MOZ_ASSERT(length == lastRequestedLength);
196 
197     if (JSInlineString::lengthFits<CharT>(length)) {
198       MOZ_ASSERT(
199           !heapStorage,
200           "expected only inline storage when length fits in inline string");
201 
202       return NewStringCopyN<CanGC>(cx, inlineStorage, length);
203     }
204 
205     MOZ_ASSERT(heapStorage,
206                "heap storage was not allocated for non-inline string");
207 
208     heapStorage.get()[length] = '\0';  // Null-terminate
209     JSString* res = NewString<CanGC>(cx, heapStorage.get(), length);
210     if (!res) return nullptr;
211 
212     mozilla::Unused << heapStorage.release();
213     return res;
214   }
215 };
216 
217 /*
218  * Forward declarations for URI encode/decode and helper routines
219  */
220 static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp);
221 
222 static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
223 
224 static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp);
225 
226 static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
227 
228 /*
229  * Global string methods
230  */
231 
232 /* ES5 B.2.1 */
233 template <typename CharT>
Escape(JSContext * cx,const CharT * chars,uint32_t length,InlineCharBuffer<Latin1Char> & newChars,uint32_t * newLengthOut)234 static bool Escape(JSContext* cx, const CharT* chars, uint32_t length,
235                    InlineCharBuffer<Latin1Char>& newChars,
236                    uint32_t* newLengthOut) {
237   // clang-format off
238     static const uint8_t shouldPassThrough[128] = {
239          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
240          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
241          0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,       /*    !"#$%&'()*+,-./  */
242          1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,       /*   0123456789:;<=>?  */
243          1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /*   @ABCDEFGHIJKLMNO  */
244          1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,       /*   PQRSTUVWXYZ[\]^_  */
245          0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,       /*   `abcdefghijklmno  */
246          1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,       /*   pqrstuvwxyz{\}~  DEL */
247     };
248   // clang-format on
249 
250   /* Take a first pass and see how big the result string will need to be. */
251   uint32_t newLength = length;
252   for (size_t i = 0; i < length; i++) {
253     char16_t ch = chars[i];
254     if (ch < 128 && shouldPassThrough[ch]) continue;
255 
256     /* The character will be encoded as %XX or %uXXXX. */
257     newLength += (ch < 256) ? 2 : 5;
258 
259     /*
260      * newlength is incremented by at most 5 on each iteration, so worst
261      * case newlength == length * 6. This can't overflow.
262      */
263     static_assert(JSString::MAX_LENGTH < UINT32_MAX / 6,
264                   "newlength must not overflow");
265   }
266 
267   if (newLength == length) {
268     *newLengthOut = newLength;
269     return true;
270   }
271 
272   if (!newChars.maybeAlloc(cx, newLength)) return false;
273 
274   static const char digits[] = "0123456789ABCDEF";
275 
276   Latin1Char* rawNewChars = newChars.get();
277   size_t i, ni;
278   for (i = 0, ni = 0; i < length; i++) {
279     char16_t ch = chars[i];
280     if (ch < 128 && shouldPassThrough[ch]) {
281       rawNewChars[ni++] = ch;
282     } else if (ch < 256) {
283       rawNewChars[ni++] = '%';
284       rawNewChars[ni++] = digits[ch >> 4];
285       rawNewChars[ni++] = digits[ch & 0xF];
286     } else {
287       rawNewChars[ni++] = '%';
288       rawNewChars[ni++] = 'u';
289       rawNewChars[ni++] = digits[ch >> 12];
290       rawNewChars[ni++] = digits[(ch & 0xF00) >> 8];
291       rawNewChars[ni++] = digits[(ch & 0xF0) >> 4];
292       rawNewChars[ni++] = digits[ch & 0xF];
293     }
294   }
295   MOZ_ASSERT(ni == newLength);
296 
297   *newLengthOut = newLength;
298   return true;
299 }
300 
str_escape(JSContext * cx,unsigned argc,Value * vp)301 static bool str_escape(JSContext* cx, unsigned argc, Value* vp) {
302   CallArgs args = CallArgsFromVp(argc, vp);
303 
304   RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
305   if (!str) return false;
306 
307   InlineCharBuffer<Latin1Char> newChars;
308   uint32_t newLength = 0;  // initialize to silence GCC warning
309   if (str->hasLatin1Chars()) {
310     AutoCheckCannotGC nogc;
311     if (!Escape(cx, str->latin1Chars(nogc), str->length(), newChars,
312                 &newLength))
313       return false;
314   } else {
315     AutoCheckCannotGC nogc;
316     if (!Escape(cx, str->twoByteChars(nogc), str->length(), newChars,
317                 &newLength))
318       return false;
319   }
320 
321   // Return input if no characters need to be escaped.
322   if (newLength == str->length()) {
323     args.rval().setString(str);
324     return true;
325   }
326 
327   JSString* res = newChars.toString(cx, newLength);
328   if (!res) return false;
329 
330   args.rval().setString(res);
331   return true;
332 }
333 
334 template <typename CharT>
Unhex4(const RangedPtr<const CharT> chars,char16_t * result)335 static inline bool Unhex4(const RangedPtr<const CharT> chars,
336                           char16_t* result) {
337   char16_t a = chars[0], b = chars[1], c = chars[2], d = chars[3];
338 
339   if (!(JS7_ISHEX(a) && JS7_ISHEX(b) && JS7_ISHEX(c) && JS7_ISHEX(d)))
340     return false;
341 
342   *result =
343       (((((JS7_UNHEX(a) << 4) + JS7_UNHEX(b)) << 4) + JS7_UNHEX(c)) << 4) +
344       JS7_UNHEX(d);
345   return true;
346 }
347 
348 template <typename CharT>
Unhex2(const RangedPtr<const CharT> chars,char16_t * result)349 static inline bool Unhex2(const RangedPtr<const CharT> chars,
350                           char16_t* result) {
351   char16_t a = chars[0], b = chars[1];
352 
353   if (!(JS7_ISHEX(a) && JS7_ISHEX(b))) return false;
354 
355   *result = (JS7_UNHEX(a) << 4) + JS7_UNHEX(b);
356   return true;
357 }
358 
359 template <typename CharT>
Unescape(StringBuffer & sb,const mozilla::Range<const CharT> chars)360 static bool Unescape(StringBuffer& sb,
361                      const mozilla::Range<const CharT> chars) {
362   // Step 2.
363   uint32_t length = chars.length();
364 
365   /*
366    * Note that the spec algorithm has been optimized to avoid building
367    * a string in the case where no escapes are present.
368    */
369   bool building = false;
370 
371 #define ENSURE_BUILDING                            \
372   do {                                             \
373     if (!building) {                               \
374       building = true;                             \
375       if (!sb.reserve(length)) return false;       \
376       sb.infallibleAppend(chars.begin().get(), k); \
377     }                                              \
378   } while (false);
379 
380   // Step 4.
381   uint32_t k = 0;
382 
383   // Step 5.
384   while (k < length) {
385     // Step 5.a.
386     char16_t c = chars[k];
387 
388     // Step 5.b.
389     if (c == '%') {
390       static_assert(JSString::MAX_LENGTH < UINT32_MAX - 6,
391                     "String length is not near UINT32_MAX");
392 
393       // Steps 5.b.i-ii.
394       if (k + 6 <= length && chars[k + 1] == 'u') {
395         if (Unhex4(chars.begin() + k + 2, &c)) {
396           ENSURE_BUILDING
397           k += 5;
398         }
399       } else if (k + 3 <= length) {
400         if (Unhex2(chars.begin() + k + 1, &c)) {
401           ENSURE_BUILDING
402           k += 2;
403         }
404       }
405     }
406 
407     // Step 5.c.
408     if (building && !sb.append(c)) return false;
409 
410     // Step 5.d.
411     k += 1;
412   }
413 
414   return true;
415 #undef ENSURE_BUILDING
416 }
417 
418 // ES2018 draft rev f83aa38282c2a60c6916ebc410bfdf105a0f6a54
419 // B.2.1.2 unescape ( string )
str_unescape(JSContext * cx,unsigned argc,Value * vp)420 static bool str_unescape(JSContext* cx, unsigned argc, Value* vp) {
421   CallArgs args = CallArgsFromVp(argc, vp);
422 
423   // Step 1.
424   RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
425   if (!str) return false;
426 
427   // Step 3.
428   StringBuffer sb(cx);
429   if (str->hasTwoByteChars() && !sb.ensureTwoByteChars()) return false;
430 
431   // Steps 2, 4-5.
432   if (str->hasLatin1Chars()) {
433     AutoCheckCannotGC nogc;
434     if (!Unescape(sb, str->latin1Range(nogc))) return false;
435   } else {
436     AutoCheckCannotGC nogc;
437     if (!Unescape(sb, str->twoByteRange(nogc))) return false;
438   }
439 
440   // Step 6.
441   JSLinearString* result;
442   if (!sb.empty()) {
443     result = sb.finishString();
444     if (!result) return false;
445   } else {
446     result = str;
447   }
448 
449   args.rval().setString(result);
450   return true;
451 }
452 
str_uneval(JSContext * cx,unsigned argc,Value * vp)453 static bool str_uneval(JSContext* cx, unsigned argc, Value* vp) {
454   CallArgs args = CallArgsFromVp(argc, vp);
455   JSString* str = ValueToSource(cx, args.get(0));
456   if (!str) return false;
457 
458   args.rval().setString(str);
459   return true;
460 }
461 
462 static const JSFunctionSpec string_functions[] = {
463     JS_FN(js_escape_str, str_escape, 1, JSPROP_RESOLVING),
464     JS_FN(js_unescape_str, str_unescape, 1, JSPROP_RESOLVING),
465     JS_FN(js_uneval_str, str_uneval, 1, JSPROP_RESOLVING),
466     JS_FN(js_decodeURI_str, str_decodeURI, 1, JSPROP_RESOLVING),
467     JS_FN(js_encodeURI_str, str_encodeURI, 1, JSPROP_RESOLVING),
468     JS_FN(js_decodeURIComponent_str, str_decodeURI_Component, 1,
469           JSPROP_RESOLVING),
470     JS_FN(js_encodeURIComponent_str, str_encodeURI_Component, 1,
471           JSPROP_RESOLVING),
472 
473     JS_FS_END};
474 
475 static const unsigned STRING_ELEMENT_ATTRS =
476     JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
477 
str_enumerate(JSContext * cx,HandleObject obj)478 static bool str_enumerate(JSContext* cx, HandleObject obj) {
479   RootedString str(cx, obj->as<StringObject>().unbox());
480   js::StaticStrings& staticStrings = cx->staticStrings();
481 
482   RootedValue value(cx);
483   for (size_t i = 0, length = str->length(); i < length; i++) {
484     JSString* str1 = staticStrings.getUnitStringForElement(cx, str, i);
485     if (!str1) return false;
486     value.setString(str1);
487     if (!DefineDataElement(cx, obj, i, value,
488                            STRING_ELEMENT_ATTRS | JSPROP_RESOLVING))
489       return false;
490   }
491 
492   return true;
493 }
494 
str_mayResolve(const JSAtomState &,jsid id,JSObject *)495 static bool str_mayResolve(const JSAtomState&, jsid id, JSObject*) {
496   // str_resolve ignores non-integer ids.
497   return JSID_IS_INT(id);
498 }
499 
str_resolve(JSContext * cx,HandleObject obj,HandleId id,bool * resolvedp)500 static bool str_resolve(JSContext* cx, HandleObject obj, HandleId id,
501                         bool* resolvedp) {
502   if (!JSID_IS_INT(id)) return true;
503 
504   RootedString str(cx, obj->as<StringObject>().unbox());
505 
506   int32_t slot = JSID_TO_INT(id);
507   if ((size_t)slot < str->length()) {
508     JSString* str1 =
509         cx->staticStrings().getUnitStringForElement(cx, str, size_t(slot));
510     if (!str1) return false;
511     RootedValue value(cx, StringValue(str1));
512     if (!DefineDataElement(cx, obj, uint32_t(slot), value,
513                            STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) {
514       return false;
515     }
516     *resolvedp = true;
517   }
518   return true;
519 }
520 
521 static const ClassOps StringObjectClassOps = {
522     nullptr,                /* addProperty */
523     nullptr,                /* delProperty */
524     str_enumerate, nullptr, /* newEnumerate */
525     str_resolve,   str_mayResolve};
526 
527 const Class StringObject::class_ = {
528     js_String_str,
529     JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
530         JSCLASS_HAS_CACHED_PROTO(JSProto_String),
531     &StringObjectClassOps};
532 
533 /*
534  * Perform the initial |RequireObjectCoercible(thisv)| and |ToString(thisv)|
535  * from nearly all String.prototype.* functions.
536  */
ToStringForStringFunction(JSContext * cx,HandleValue thisv)537 static MOZ_ALWAYS_INLINE JSString* ToStringForStringFunction(
538     JSContext* cx, HandleValue thisv) {
539   if (!CheckRecursionLimit(cx)) return nullptr;
540 
541   if (thisv.isString()) return thisv.toString();
542 
543   if (thisv.isObject()) {
544     RootedObject obj(cx, &thisv.toObject());
545     if (obj->is<StringObject>()) {
546       StringObject* nobj = &obj->as<StringObject>();
547       // We have to make sure that the ToPrimitive call from ToString
548       // would be unobservable.
549       if (HasNoToPrimitiveMethodPure(nobj, cx) &&
550           HasNativeMethodPure(nobj, cx->names().toString, str_toString, cx)) {
551         return nobj->unbox();
552       }
553     }
554   } else if (thisv.isNullOrUndefined()) {
555     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
556                               JSMSG_CANT_CONVERT_TO,
557                               thisv.isNull() ? "null" : "undefined", "object");
558     return nullptr;
559   }
560 
561   return ToStringSlow<CanGC>(cx, thisv);
562 }
563 
IsString(HandleValue v)564 MOZ_ALWAYS_INLINE bool IsString(HandleValue v) {
565   return v.isString() || (v.isObject() && v.toObject().is<StringObject>());
566 }
567 
str_toSource_impl(JSContext * cx,const CallArgs & args)568 MOZ_ALWAYS_INLINE bool str_toSource_impl(JSContext* cx, const CallArgs& args) {
569   MOZ_ASSERT(IsString(args.thisv()));
570 
571   Rooted<JSString*> str(cx, ToString<CanGC>(cx, args.thisv()));
572   if (!str) return false;
573 
574   str = QuoteString(cx, str, '"');
575   if (!str) return false;
576 
577   StringBuffer sb(cx);
578   if (!sb.append("(new String(") || !sb.append(str) || !sb.append("))"))
579     return false;
580 
581   str = sb.finishString();
582   if (!str) return false;
583   args.rval().setString(str);
584   return true;
585 }
586 
str_toSource(JSContext * cx,unsigned argc,Value * vp)587 static bool str_toSource(JSContext* cx, unsigned argc, Value* vp) {
588   CallArgs args = CallArgsFromVp(argc, vp);
589   return CallNonGenericMethod<IsString, str_toSource_impl>(cx, args);
590 }
591 
str_toString_impl(JSContext * cx,const CallArgs & args)592 MOZ_ALWAYS_INLINE bool str_toString_impl(JSContext* cx, const CallArgs& args) {
593   MOZ_ASSERT(IsString(args.thisv()));
594 
595   args.rval().setString(
596       args.thisv().isString()
597           ? args.thisv().toString()
598           : args.thisv().toObject().as<StringObject>().unbox());
599   return true;
600 }
601 
str_toString(JSContext * cx,unsigned argc,Value * vp)602 bool js::str_toString(JSContext* cx, unsigned argc, Value* vp) {
603   CallArgs args = CallArgsFromVp(argc, vp);
604   return CallNonGenericMethod<IsString, str_toString_impl>(cx, args);
605 }
606 
607 /*
608  * Java-like string native methods.
609  */
610 
SubstringKernel(JSContext * cx,HandleString str,int32_t beginInt,int32_t lengthInt)611 JSString* js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt,
612                               int32_t lengthInt) {
613   MOZ_ASSERT(0 <= beginInt);
614   MOZ_ASSERT(0 <= lengthInt);
615   MOZ_ASSERT(uint32_t(beginInt) <= str->length());
616   MOZ_ASSERT(uint32_t(lengthInt) <= str->length() - beginInt);
617 
618   uint32_t begin = beginInt;
619   uint32_t len = lengthInt;
620 
621   /*
622    * Optimization for one level deep ropes.
623    * This is common for the following pattern:
624    *
625    * while() {
626    *   text = text.substr(0, x) + "bla" + text.substr(x)
627    *   test.charCodeAt(x + 1)
628    * }
629    */
630   if (str->isRope()) {
631     JSRope* rope = &str->asRope();
632 
633     /* Substring is totally in leftChild of rope. */
634     if (begin + len <= rope->leftChild()->length())
635       return NewDependentString(cx, rope->leftChild(), begin, len);
636 
637     /* Substring is totally in rightChild of rope. */
638     if (begin >= rope->leftChild()->length()) {
639       begin -= rope->leftChild()->length();
640       return NewDependentString(cx, rope->rightChild(), begin, len);
641     }
642 
643     /*
644      * Requested substring is partly in the left and partly in right child.
645      * Create a rope of substrings for both childs.
646      */
647     MOZ_ASSERT(begin < rope->leftChild()->length() &&
648                begin + len > rope->leftChild()->length());
649 
650     size_t lhsLength = rope->leftChild()->length() - begin;
651     size_t rhsLength = begin + len - rope->leftChild()->length();
652 
653     Rooted<JSRope*> ropeRoot(cx, rope);
654     RootedString lhs(
655         cx, NewDependentString(cx, ropeRoot->leftChild(), begin, lhsLength));
656     if (!lhs) return nullptr;
657 
658     RootedString rhs(
659         cx, NewDependentString(cx, ropeRoot->rightChild(), 0, rhsLength));
660     if (!rhs) return nullptr;
661 
662     return JSRope::new_<CanGC>(cx, lhs, rhs, len);
663   }
664 
665   return NewDependentString(cx, str, begin, len);
666 }
667 
668 /**
669  * U+03A3 GREEK CAPITAL LETTER SIGMA has two different lower case mappings
670  * depending on its context:
671  * When it's preceded by a cased character and not followed by another cased
672  * character, its lower case form is U+03C2 GREEK SMALL LETTER FINAL SIGMA.
673  * Otherwise its lower case mapping is U+03C3 GREEK SMALL LETTER SIGMA.
674  *
675  * Unicode 9.0, §3.13 Default Case Algorithms
676  */
Final_Sigma(const char16_t * chars,size_t length,size_t index)677 static char16_t Final_Sigma(const char16_t* chars, size_t length,
678                             size_t index) {
679   MOZ_ASSERT(index < length);
680   MOZ_ASSERT(chars[index] == unicode::GREEK_CAPITAL_LETTER_SIGMA);
681   MOZ_ASSERT(unicode::ToLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA) ==
682              unicode::GREEK_SMALL_LETTER_SIGMA);
683 
684 #if ENABLE_INTL_API
685   // Tell the analysis the BinaryProperty.contains function pointer called by
686   // u_hasBinaryProperty cannot GC.
687   JS::AutoSuppressGCAnalysis nogc;
688 
689   bool precededByCased = false;
690   for (size_t i = index; i > 0;) {
691     char16_t c = chars[--i];
692     uint32_t codePoint = c;
693     if (unicode::IsTrailSurrogate(c) && i > 0) {
694       char16_t lead = chars[i - 1];
695       if (unicode::IsLeadSurrogate(lead)) {
696         codePoint = unicode::UTF16Decode(lead, c);
697         i--;
698       }
699     }
700 
701     // Ignore any characters with the property Case_Ignorable.
702     // NB: We need to skip over all Case_Ignorable characters, even when
703     // they also have the Cased binary property.
704     if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE)) continue;
705 
706     precededByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED);
707     break;
708   }
709   if (!precededByCased) return unicode::GREEK_SMALL_LETTER_SIGMA;
710 
711   bool followedByCased = false;
712   for (size_t i = index + 1; i < length;) {
713     char16_t c = chars[i++];
714     uint32_t codePoint = c;
715     if (unicode::IsLeadSurrogate(c) && i < length) {
716       char16_t trail = chars[i];
717       if (unicode::IsTrailSurrogate(trail)) {
718         codePoint = unicode::UTF16Decode(c, trail);
719         i++;
720       }
721     }
722 
723     // Ignore any characters with the property Case_Ignorable.
724     // NB: We need to skip over all Case_Ignorable characters, even when
725     // they also have the Cased binary property.
726     if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE)) continue;
727 
728     followedByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED);
729     break;
730   }
731   if (!followedByCased) return unicode::GREEK_SMALL_LETTER_FINAL_SIGMA;
732 #endif
733 
734   return unicode::GREEK_SMALL_LETTER_SIGMA;
735 }
736 
Final_Sigma(const Latin1Char * chars,size_t length,size_t index)737 static Latin1Char Final_Sigma(const Latin1Char* chars, size_t length,
738                               size_t index) {
739   MOZ_ASSERT_UNREACHABLE("U+03A3 is not a Latin-1 character");
740   return 0;
741 }
742 
743 // If |srcLength == destLength| is true, the destination buffer was allocated
744 // with the same size as the source buffer. When we append characters which
745 // have special casing mappings, we test |srcLength == destLength| to decide
746 // if we need to back out and reallocate a sufficiently large destination
747 // buffer. Otherwise the destination buffer was allocated with the correct
748 // size to hold all lower case mapped characters, i.e.
749 // |destLength == ToLowerCaseLength(srcChars, 0, srcLength)| is true.
750 template <typename CharT>
ToLowerCaseImpl(CharT * destChars,const CharT * srcChars,size_t startIndex,size_t srcLength,size_t destLength)751 static size_t ToLowerCaseImpl(CharT* destChars, const CharT* srcChars,
752                               size_t startIndex, size_t srcLength,
753                               size_t destLength) {
754   MOZ_ASSERT(startIndex < srcLength);
755   MOZ_ASSERT(srcLength <= destLength);
756   MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), srcLength == destLength);
757 
758   size_t j = startIndex;
759   for (size_t i = startIndex; i < srcLength; i++) {
760     char16_t c = srcChars[i];
761     if (!IsSame<CharT, Latin1Char>::value) {
762       if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
763         char16_t trail = srcChars[i + 1];
764         if (unicode::IsTrailSurrogate(trail)) {
765           trail = unicode::ToLowerCaseNonBMPTrail(c, trail);
766           destChars[j++] = c;
767           destChars[j++] = trail;
768           i++;
769           continue;
770         }
771       }
772 
773       // Special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
774       // lowercases to <U+0069 U+0307>.
775       if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
776         // Return if the output buffer is too small.
777         if (srcLength == destLength) return i;
778 
779         destChars[j++] = CharT('i');
780         destChars[j++] = CharT(unicode::COMBINING_DOT_ABOVE);
781         continue;
782       }
783 
784       // Special case: U+03A3 GREEK CAPITAL LETTER SIGMA lowercases to
785       // one of two codepoints depending on context.
786       if (c == unicode::GREEK_CAPITAL_LETTER_SIGMA) {
787         destChars[j++] = Final_Sigma(srcChars, srcLength, i);
788         continue;
789       }
790     }
791 
792     c = unicode::ToLowerCase(c);
793     MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value),
794                   c <= JSString::MAX_LATIN1_CHAR);
795     destChars[j++] = c;
796   }
797 
798   MOZ_ASSERT(j == destLength);
799   return srcLength;
800 }
801 
ToLowerCaseLength(const char16_t * chars,size_t startIndex,size_t length)802 static size_t ToLowerCaseLength(const char16_t* chars, size_t startIndex,
803                                 size_t length) {
804   size_t lowerLength = length;
805   for (size_t i = startIndex; i < length; i++) {
806     char16_t c = chars[i];
807 
808     // U+0130 is lowercased to the two-element sequence <U+0069 U+0307>.
809     if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) lowerLength += 1;
810   }
811   return lowerLength;
812 }
813 
ToLowerCaseLength(const Latin1Char * chars,size_t startIndex,size_t length)814 static size_t ToLowerCaseLength(const Latin1Char* chars, size_t startIndex,
815                                 size_t length) {
816   MOZ_ASSERT_UNREACHABLE("never called for Latin-1 strings");
817   return 0;
818 }
819 
820 template <typename CharT>
ToLowerCase(JSContext * cx,JSLinearString * str)821 static JSString* ToLowerCase(JSContext* cx, JSLinearString* str) {
822   // Unlike toUpperCase, toLowerCase has the nice invariant that if the
823   // input is a Latin-1 string, the output is also a Latin-1 string.
824 
825   InlineCharBuffer<CharT> newChars;
826 
827   const size_t length = str->length();
828   size_t resultLength;
829   {
830     AutoCheckCannotGC nogc;
831     const CharT* chars = str->chars<CharT>(nogc);
832 
833     // We don't need extra special casing checks in the loop below,
834     // because U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+03A3
835     // GREEK CAPITAL LETTER SIGMA already have simple lower case mappings.
836     MOZ_ASSERT(
837         unicode::CanLowerCase(unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE),
838         "U+0130 has a simple lower case mapping");
839     MOZ_ASSERT(unicode::CanLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA),
840                "U+03A3 has a simple lower case mapping");
841 
842     // One element Latin-1 strings can be directly retrieved from the
843     // static strings cache.
844     if (IsSame<CharT, Latin1Char>::value) {
845       if (length == 1) {
846         char16_t lower = unicode::ToLowerCase(chars[0]);
847         MOZ_ASSERT(lower <= JSString::MAX_LATIN1_CHAR);
848         MOZ_ASSERT(StaticStrings::hasUnit(lower));
849 
850         return cx->staticStrings().getUnit(lower);
851       }
852     }
853 
854     // Look for the first character that changes when lowercased.
855     size_t i = 0;
856     for (; i < length; i++) {
857       CharT c = chars[i];
858       if (!IsSame<CharT, Latin1Char>::value) {
859         if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
860           CharT trail = chars[i + 1];
861           if (unicode::IsTrailSurrogate(trail)) {
862             if (unicode::CanLowerCaseNonBMP(c, trail)) break;
863 
864             i++;
865             continue;
866           }
867         }
868       }
869       if (unicode::CanLowerCase(c)) break;
870     }
871 
872     // If no character needs to change, return the input string.
873     if (i == length) return str;
874 
875     resultLength = length;
876     if (!newChars.maybeAlloc(cx, resultLength)) return nullptr;
877 
878     PodCopy(newChars.get(), chars, i);
879 
880     size_t readChars =
881         ToLowerCaseImpl(newChars.get(), chars, i, length, resultLength);
882     if (readChars < length) {
883       MOZ_ASSERT((!IsSame<CharT, Latin1Char>::value),
884                  "Latin-1 strings don't have special lower case mappings");
885       resultLength = ToLowerCaseLength(chars, readChars, length);
886 
887       if (!newChars.maybeRealloc(cx, length, resultLength)) return nullptr;
888 
889       MOZ_ALWAYS_TRUE(length == ToLowerCaseImpl(newChars.get(), chars,
890                                                 readChars, length,
891                                                 resultLength));
892     }
893   }
894 
895   return newChars.toStringDontDeflate(cx, resultLength);
896 }
897 
StringToLowerCase(JSContext * cx,HandleString string)898 JSString* js::StringToLowerCase(JSContext* cx, HandleString string) {
899   JSLinearString* linear = string->ensureLinear(cx);
900   if (!linear) return nullptr;
901 
902   if (linear->hasLatin1Chars()) return ToLowerCase<Latin1Char>(cx, linear);
903   return ToLowerCase<char16_t>(cx, linear);
904 }
905 
str_toLowerCase(JSContext * cx,unsigned argc,Value * vp)906 bool js::str_toLowerCase(JSContext* cx, unsigned argc, Value* vp) {
907   CallArgs args = CallArgsFromVp(argc, vp);
908 
909   RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
910   if (!str) return false;
911 
912   JSString* result = StringToLowerCase(cx, str);
913   if (!result) return false;
914 
915   args.rval().setString(result);
916   return true;
917 }
918 
CaseMappingLocale(JSContext * cx,JSString * str)919 static const char* CaseMappingLocale(JSContext* cx, JSString* str) {
920   JSLinearString* locale = str->ensureLinear(cx);
921   if (!locale) return nullptr;
922 
923   MOZ_ASSERT(locale->length() >= 2, "locale is a valid language tag");
924 
925   // Lithuanian, Turkish, and Azeri have language dependent case mappings.
926   static const char languagesWithSpecialCasing[][3] = {"lt", "tr", "az"};
927 
928   // All strings in |languagesWithSpecialCasing| are of length two, so we
929   // only need to compare the first two characters to find a matching locale.
930   // ES2017 Intl, §9.2.2 BestAvailableLocale
931   if (locale->length() == 2 || locale->latin1OrTwoByteChar(2) == '-') {
932     for (const auto& language : languagesWithSpecialCasing) {
933       if (locale->latin1OrTwoByteChar(0) == language[0] &&
934           locale->latin1OrTwoByteChar(1) == language[1]) {
935         return language;
936       }
937     }
938   }
939 
940   return "";  // ICU root locale
941 }
942 
intl_toLocaleLowerCase(JSContext * cx,unsigned argc,Value * vp)943 bool js::intl_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
944   CallArgs args = CallArgsFromVp(argc, vp);
945   MOZ_ASSERT(args.length() == 2);
946   MOZ_ASSERT(args[0].isString());
947   MOZ_ASSERT(args[1].isString());
948 
949   RootedString string(cx, args[0].toString());
950 
951   const char* locale = CaseMappingLocale(cx, args[1].toString());
952   if (!locale) return false;
953 
954   // Call String.prototype.toLowerCase() for language independent casing.
955   if (intl::StringsAreEqual(locale, "")) {
956     JSString* str = StringToLowerCase(cx, string);
957     if (!str) return false;
958 
959     args.rval().setString(str);
960     return true;
961   }
962 
963   AutoStableStringChars inputChars(cx);
964   if (!inputChars.initTwoByte(cx, string)) return false;
965   mozilla::Range<const char16_t> input = inputChars.twoByteRange();
966 
967   // Maximum case mapping length is three characters.
968   static_assert(JSString::MAX_LENGTH < INT32_MAX / 3,
969                 "Case conversion doesn't overflow int32_t indices");
970 
971   JSString* str = intl::CallICU(
972       cx, [&input, locale](UChar* chars, int32_t size, UErrorCode* status) {
973         return u_strToLower(chars, size, input.begin().get(), input.length(),
974                             locale, status);
975       });
976   if (!str) return false;
977 
978   args.rval().setString(str);
979   return true;
980 }
981 
982 #if EXPOSE_INTL_API
983 
984   // String.prototype.toLocaleLowerCase is self-hosted when Intl is exposed,
985   // with core functionality performed by the intrinsic above.
986 
987 #else
988 
str_toLocaleLowerCase(JSContext * cx,unsigned argc,Value * vp)989 bool js::str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
990   CallArgs args = CallArgsFromVp(argc, vp);
991 
992   RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
993   if (!str) return false;
994 
995   /*
996    * Forcefully ignore the first (or any) argument and return toLowerCase(),
997    * ECMA has reserved that argument, presumably for defining the locale.
998    */
999   if (cx->runtime()->localeCallbacks &&
1000       cx->runtime()->localeCallbacks->localeToLowerCase) {
1001     RootedValue result(cx);
1002     if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result))
1003       return false;
1004 
1005     args.rval().set(result);
1006     return true;
1007   }
1008 
1009   RootedLinearString linear(cx, str->ensureLinear(cx));
1010   if (!linear) return false;
1011 
1012   JSString* result = StringToLowerCase(cx, linear);
1013   if (!result) return false;
1014 
1015   args.rval().setString(result);
1016   return true;
1017 }
1018 
1019 #endif  // EXPOSE_INTL_API
1020 
CanUpperCaseSpecialCasing(Latin1Char charCode)1021 static inline bool CanUpperCaseSpecialCasing(Latin1Char charCode) {
1022   // Handle U+00DF LATIN SMALL LETTER SHARP S inline, all other Latin-1
1023   // characters don't have special casing rules.
1024   MOZ_ASSERT_IF(charCode != unicode::LATIN_SMALL_LETTER_SHARP_S,
1025                 !unicode::CanUpperCaseSpecialCasing(charCode));
1026 
1027   return charCode == unicode::LATIN_SMALL_LETTER_SHARP_S;
1028 }
1029 
CanUpperCaseSpecialCasing(char16_t charCode)1030 static inline bool CanUpperCaseSpecialCasing(char16_t charCode) {
1031   return unicode::CanUpperCaseSpecialCasing(charCode);
1032 }
1033 
LengthUpperCaseSpecialCasing(Latin1Char charCode)1034 static inline size_t LengthUpperCaseSpecialCasing(Latin1Char charCode) {
1035   // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
1036   MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
1037 
1038   return 2;
1039 }
1040 
LengthUpperCaseSpecialCasing(char16_t charCode)1041 static inline size_t LengthUpperCaseSpecialCasing(char16_t charCode) {
1042   MOZ_ASSERT(CanUpperCaseSpecialCasing(charCode));
1043 
1044   return unicode::LengthUpperCaseSpecialCasing(charCode);
1045 }
1046 
AppendUpperCaseSpecialCasing(char16_t charCode,Latin1Char * elements,size_t * index)1047 static inline void AppendUpperCaseSpecialCasing(char16_t charCode,
1048                                                 Latin1Char* elements,
1049                                                 size_t* index) {
1050   // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
1051   MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
1052   static_assert('S' <= JSString::MAX_LATIN1_CHAR, "'S' is a Latin-1 character");
1053 
1054   elements[(*index)++] = 'S';
1055   elements[(*index)++] = 'S';
1056 }
1057 
AppendUpperCaseSpecialCasing(char16_t charCode,char16_t * elements,size_t * index)1058 static inline void AppendUpperCaseSpecialCasing(char16_t charCode,
1059                                                 char16_t* elements,
1060                                                 size_t* index) {
1061   unicode::AppendUpperCaseSpecialCasing(charCode, elements, index);
1062 }
1063 
1064 // See ToLowerCaseImpl for an explanation of the parameters.
1065 template <typename DestChar, typename SrcChar>
ToUpperCaseImpl(DestChar * destChars,const SrcChar * srcChars,size_t startIndex,size_t srcLength,size_t destLength)1066 static size_t ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars,
1067                               size_t startIndex, size_t srcLength,
1068                               size_t destLength) {
1069   static_assert(IsSame<SrcChar, Latin1Char>::value ||
1070                     !IsSame<DestChar, Latin1Char>::value,
1071                 "cannot write non-Latin-1 characters into Latin-1 string");
1072   MOZ_ASSERT(startIndex < srcLength);
1073   MOZ_ASSERT(srcLength <= destLength);
1074 
1075   size_t j = startIndex;
1076   for (size_t i = startIndex; i < srcLength; i++) {
1077     char16_t c = srcChars[i];
1078     if (!IsSame<DestChar, Latin1Char>::value) {
1079       if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
1080         char16_t trail = srcChars[i + 1];
1081         if (unicode::IsTrailSurrogate(trail)) {
1082           trail = unicode::ToUpperCaseNonBMPTrail(c, trail);
1083           destChars[j++] = c;
1084           destChars[j++] = trail;
1085           i++;
1086           continue;
1087         }
1088       }
1089     }
1090 
1091     if (MOZ_UNLIKELY(c > 0x7f &&
1092                      CanUpperCaseSpecialCasing(static_cast<SrcChar>(c)))) {
1093       // Return if the output buffer is too small.
1094       if (srcLength == destLength) return i;
1095 
1096       AppendUpperCaseSpecialCasing(c, destChars, &j);
1097       continue;
1098     }
1099 
1100     c = unicode::ToUpperCase(c);
1101     MOZ_ASSERT_IF((IsSame<DestChar, Latin1Char>::value),
1102                   c <= JSString::MAX_LATIN1_CHAR);
1103     destChars[j++] = c;
1104   }
1105 
1106   MOZ_ASSERT(j == destLength);
1107   return srcLength;
1108 }
1109 
1110 // Explicit instantiation so we don't hit the static_assert from above.
ToUpperCaseImpl(Latin1Char * destChars,const char16_t * srcChars,size_t startIndex,size_t srcLength,size_t destLength)1111 static bool ToUpperCaseImpl(Latin1Char* destChars, const char16_t* srcChars,
1112                             size_t startIndex, size_t srcLength,
1113                             size_t destLength) {
1114   MOZ_ASSERT_UNREACHABLE(
1115       "cannot write non-Latin-1 characters into Latin-1 string");
1116   return false;
1117 }
1118 
1119 template <typename CharT>
ToUpperCaseLength(const CharT * chars,size_t startIndex,size_t length)1120 static size_t ToUpperCaseLength(const CharT* chars, size_t startIndex,
1121                                 size_t length) {
1122   size_t upperLength = length;
1123   for (size_t i = startIndex; i < length; i++) {
1124     char16_t c = chars[i];
1125 
1126     if (c > 0x7f && CanUpperCaseSpecialCasing(static_cast<CharT>(c)))
1127       upperLength += LengthUpperCaseSpecialCasing(static_cast<CharT>(c)) - 1;
1128   }
1129   return upperLength;
1130 }
1131 
1132 template <typename DestChar, typename SrcChar>
CopyChars(DestChar * destChars,const SrcChar * srcChars,size_t length)1133 static inline void CopyChars(DestChar* destChars, const SrcChar* srcChars,
1134                              size_t length) {
1135   static_assert(!IsSame<DestChar, SrcChar>::value,
1136                 "PodCopy is used for the same type case");
1137   for (size_t i = 0; i < length; i++) destChars[i] = srcChars[i];
1138 }
1139 
1140 template <typename CharT>
CopyChars(CharT * destChars,const CharT * srcChars,size_t length)1141 static inline void CopyChars(CharT* destChars, const CharT* srcChars,
1142                              size_t length) {
1143   PodCopy(destChars, srcChars, length);
1144 }
1145 
1146 template <typename DestChar, typename SrcChar>
ToUpperCase(JSContext * cx,InlineCharBuffer<DestChar> & newChars,const SrcChar * chars,size_t startIndex,size_t length,size_t * resultLength)1147 static inline bool ToUpperCase(JSContext* cx,
1148                                InlineCharBuffer<DestChar>& newChars,
1149                                const SrcChar* chars, size_t startIndex,
1150                                size_t length, size_t* resultLength) {
1151   MOZ_ASSERT(startIndex < length);
1152 
1153   *resultLength = length;
1154   if (!newChars.maybeAlloc(cx, length)) return false;
1155 
1156   CopyChars(newChars.get(), chars, startIndex);
1157 
1158   size_t readChars =
1159       ToUpperCaseImpl(newChars.get(), chars, startIndex, length, length);
1160   if (readChars < length) {
1161     size_t actualLength = ToUpperCaseLength(chars, readChars, length);
1162 
1163     *resultLength = actualLength;
1164     if (!newChars.maybeRealloc(cx, length, actualLength)) return false;
1165 
1166     MOZ_ALWAYS_TRUE(length == ToUpperCaseImpl(newChars.get(), chars, readChars,
1167                                               length, actualLength));
1168   }
1169 
1170   return true;
1171 }
1172 
1173 template <typename CharT>
ToUpperCase(JSContext * cx,JSLinearString * str)1174 static JSString* ToUpperCase(JSContext* cx, JSLinearString* str) {
1175   using Latin1Buffer = InlineCharBuffer<Latin1Char>;
1176   using TwoByteBuffer = InlineCharBuffer<char16_t>;
1177 
1178   mozilla::MaybeOneOf<Latin1Buffer, TwoByteBuffer> newChars;
1179   const size_t length = str->length();
1180   size_t resultLength;
1181   {
1182     AutoCheckCannotGC nogc;
1183     const CharT* chars = str->chars<CharT>(nogc);
1184 
1185     // Most one element Latin-1 strings can be directly retrieved from the
1186     // static strings cache.
1187     if (IsSame<CharT, Latin1Char>::value) {
1188       if (length == 1) {
1189         Latin1Char c = chars[0];
1190         if (c != unicode::MICRO_SIGN &&
1191             c != unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS &&
1192             c != unicode::LATIN_SMALL_LETTER_SHARP_S) {
1193           char16_t upper = unicode::ToUpperCase(c);
1194           MOZ_ASSERT(upper <= JSString::MAX_LATIN1_CHAR);
1195           MOZ_ASSERT(StaticStrings::hasUnit(upper));
1196 
1197           return cx->staticStrings().getUnit(upper);
1198         }
1199 
1200         MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR ||
1201                    CanUpperCaseSpecialCasing(c));
1202       }
1203     }
1204 
1205     // Look for the first character that changes when uppercased.
1206     size_t i = 0;
1207     for (; i < length; i++) {
1208       CharT c = chars[i];
1209       if (!IsSame<CharT, Latin1Char>::value) {
1210         if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
1211           CharT trail = chars[i + 1];
1212           if (unicode::IsTrailSurrogate(trail)) {
1213             if (unicode::CanUpperCaseNonBMP(c, trail)) break;
1214 
1215             i++;
1216             continue;
1217           }
1218         }
1219       }
1220       if (unicode::CanUpperCase(c)) break;
1221       if (MOZ_UNLIKELY(c > 0x7f && CanUpperCaseSpecialCasing(c))) break;
1222     }
1223 
1224     // If no character needs to change, return the input string.
1225     if (i == length) return str;
1226 
1227     // The string changes when uppercased, so we must create a new string.
1228     // Can it be Latin-1?
1229     //
1230     // If the original string is Latin-1, it can -- unless the string
1231     // contains U+00B5 MICRO SIGN or U+00FF SMALL LETTER Y WITH DIAERESIS,
1232     // the only Latin-1 codepoints that don't uppercase within Latin-1.
1233     // Search for those codepoints to decide whether the new string can be
1234     // Latin-1.
1235     // If the original string is a two-byte string, its uppercase form is
1236     // so rarely Latin-1 that we don't even consider creating a new
1237     // Latin-1 string.
1238     bool resultIsLatin1;
1239     if (IsSame<CharT, Latin1Char>::value) {
1240       resultIsLatin1 = true;
1241       for (size_t j = i; j < length; j++) {
1242         Latin1Char c = chars[j];
1243         if (c == unicode::MICRO_SIGN ||
1244             c == unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
1245           MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR);
1246           resultIsLatin1 = false;
1247           break;
1248         } else {
1249           MOZ_ASSERT(unicode::ToUpperCase(c) <= JSString::MAX_LATIN1_CHAR);
1250         }
1251       }
1252     } else {
1253       resultIsLatin1 = false;
1254     }
1255 
1256     if (resultIsLatin1) {
1257       newChars.construct<Latin1Buffer>();
1258 
1259       if (!ToUpperCase(cx, newChars.ref<Latin1Buffer>(), chars, i, length,
1260                        &resultLength))
1261         return nullptr;
1262     } else {
1263       newChars.construct<TwoByteBuffer>();
1264 
1265       if (!ToUpperCase(cx, newChars.ref<TwoByteBuffer>(), chars, i, length,
1266                        &resultLength))
1267         return nullptr;
1268     }
1269   }
1270 
1271   return newChars.constructed<Latin1Buffer>()
1272              ? newChars.ref<Latin1Buffer>().toStringDontDeflate(cx,
1273                                                                 resultLength)
1274              : newChars.ref<TwoByteBuffer>().toStringDontDeflate(cx,
1275                                                                  resultLength);
1276 }
1277 
StringToUpperCase(JSContext * cx,HandleString string)1278 JSString* js::StringToUpperCase(JSContext* cx, HandleString string) {
1279   JSLinearString* linear = string->ensureLinear(cx);
1280   if (!linear) return nullptr;
1281 
1282   if (linear->hasLatin1Chars()) return ToUpperCase<Latin1Char>(cx, linear);
1283   return ToUpperCase<char16_t>(cx, linear);
1284 }
1285 
str_toUpperCase(JSContext * cx,unsigned argc,Value * vp)1286 bool js::str_toUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1287   CallArgs args = CallArgsFromVp(argc, vp);
1288 
1289   RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
1290   if (!str) return false;
1291 
1292   JSString* result = StringToUpperCase(cx, str);
1293   if (!result) return false;
1294 
1295   args.rval().setString(result);
1296   return true;
1297 }
1298 
intl_toLocaleUpperCase(JSContext * cx,unsigned argc,Value * vp)1299 bool js::intl_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1300   CallArgs args = CallArgsFromVp(argc, vp);
1301   MOZ_ASSERT(args.length() == 2);
1302   MOZ_ASSERT(args[0].isString());
1303   MOZ_ASSERT(args[1].isString());
1304 
1305   RootedString string(cx, args[0].toString());
1306 
1307   const char* locale = CaseMappingLocale(cx, args[1].toString());
1308   if (!locale) return false;
1309 
1310   // Call String.prototype.toUpperCase() for language independent casing.
1311   if (intl::StringsAreEqual(locale, "")) {
1312     JSString* str = js::StringToUpperCase(cx, string);
1313     if (!str) return false;
1314 
1315     args.rval().setString(str);
1316     return true;
1317   }
1318 
1319   AutoStableStringChars inputChars(cx);
1320   if (!inputChars.initTwoByte(cx, string)) return false;
1321   mozilla::Range<const char16_t> input = inputChars.twoByteRange();
1322 
1323   // Maximum case mapping length is three characters.
1324   static_assert(JSString::MAX_LENGTH < INT32_MAX / 3,
1325                 "Case conversion doesn't overflow int32_t indices");
1326 
1327   JSString* str = intl::CallICU(
1328       cx, [&input, locale](UChar* chars, int32_t size, UErrorCode* status) {
1329         return u_strToUpper(chars, size, input.begin().get(), input.length(),
1330                             locale, status);
1331       });
1332   if (!str) return false;
1333 
1334   args.rval().setString(str);
1335   return true;
1336 }
1337 
1338 #if EXPOSE_INTL_API
1339 
1340   // String.prototype.toLocaleLowerCase is self-hosted when Intl is exposed,
1341   // with core functionality performed by the intrinsic above.
1342 
1343 #else
1344 
str_toLocaleUpperCase(JSContext * cx,unsigned argc,Value * vp)1345 bool js::str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1346   CallArgs args = CallArgsFromVp(argc, vp);
1347 
1348   RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
1349   if (!str) return false;
1350 
1351   /*
1352    * Forcefully ignore the first (or any) argument and return toUpperCase(),
1353    * ECMA has reserved that argument, presumably for defining the locale.
1354    */
1355   if (cx->runtime()->localeCallbacks &&
1356       cx->runtime()->localeCallbacks->localeToUpperCase) {
1357     RootedValue result(cx);
1358     if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result))
1359       return false;
1360 
1361     args.rval().set(result);
1362     return true;
1363   }
1364 
1365   RootedLinearString linear(cx, str->ensureLinear(cx));
1366   if (!linear) return false;
1367 
1368   JSString* result = StringToUpperCase(cx, linear);
1369   if (!result) return false;
1370 
1371   args.rval().setString(result);
1372   return true;
1373 }
1374 
1375 #endif  // EXPOSE_INTL_API
1376 
1377 #if EXPOSE_INTL_API
1378 
1379   // String.prototype.localeCompare is self-hosted when Intl is exposed.
1380 
1381 #else
1382 
str_localeCompare(JSContext * cx,unsigned argc,Value * vp)1383 bool js::str_localeCompare(JSContext* cx, unsigned argc, Value* vp) {
1384   CallArgs args = CallArgsFromVp(argc, vp);
1385   RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
1386   if (!str) return false;
1387 
1388   RootedString thatStr(cx, ToString<CanGC>(cx, args.get(0)));
1389   if (!thatStr) return false;
1390 
1391   if (cx->runtime()->localeCallbacks &&
1392       cx->runtime()->localeCallbacks->localeCompare) {
1393     RootedValue result(cx);
1394     if (!cx->runtime()->localeCallbacks->localeCompare(cx, str, thatStr,
1395                                                        &result))
1396       return false;
1397 
1398     args.rval().set(result);
1399     return true;
1400   }
1401 
1402   int32_t result;
1403   if (!CompareStrings(cx, str, thatStr, &result)) return false;
1404 
1405   args.rval().setInt32(result);
1406   return true;
1407 }
1408 
1409 #endif  // EXPOSE_INTL_API
1410 
1411 #if EXPOSE_INTL_API
1412 
1413 // ES2017 draft rev 45e890512fd77add72cc0ee742785f9f6f6482de
1414 // 21.1.3.12 String.prototype.normalize ( [ form ] )
str_normalize(JSContext * cx,unsigned argc,Value * vp)1415 bool js::str_normalize(JSContext* cx, unsigned argc, Value* vp) {
1416   CallArgs args = CallArgsFromVp(argc, vp);
1417 
1418   // Steps 1-2.
1419   RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
1420   if (!str) return false;
1421 
1422   enum NormalizationForm { NFC, NFD, NFKC, NFKD };
1423 
1424   NormalizationForm form;
1425   if (!args.hasDefined(0)) {
1426     // Step 3.
1427     form = NFC;
1428   } else {
1429     // Step 4.
1430     JSLinearString* formStr = ArgToLinearString(cx, args, 0);
1431     if (!formStr) return false;
1432 
1433     // Step 5.
1434     if (EqualStrings(formStr, cx->names().NFC)) {
1435       form = NFC;
1436     } else if (EqualStrings(formStr, cx->names().NFD)) {
1437       form = NFD;
1438     } else if (EqualStrings(formStr, cx->names().NFKC)) {
1439       form = NFKC;
1440     } else if (EqualStrings(formStr, cx->names().NFKD)) {
1441       form = NFKD;
1442     } else {
1443       JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1444                                 JSMSG_INVALID_NORMALIZE_FORM);
1445       return false;
1446     }
1447   }
1448 
1449   // Latin-1 strings are already in Normalization Form C.
1450   if (form == NFC && str->hasLatin1Chars()) {
1451     // Step 7.
1452     args.rval().setString(str);
1453     return true;
1454   }
1455 
1456   // Step 6.
1457   AutoStableStringChars stableChars(cx);
1458   if (!stableChars.initTwoByte(cx, str)) return false;
1459 
1460   mozilla::Range<const char16_t> srcChars = stableChars.twoByteRange();
1461 
1462   // The unorm2_getXXXInstance() methods return a shared instance which must
1463   // not be deleted.
1464   UErrorCode status = U_ZERO_ERROR;
1465   const UNormalizer2* normalizer;
1466   if (form == NFC) {
1467     normalizer = unorm2_getNFCInstance(&status);
1468   } else if (form == NFD) {
1469     normalizer = unorm2_getNFDInstance(&status);
1470   } else if (form == NFKC) {
1471     normalizer = unorm2_getNFKCInstance(&status);
1472   } else {
1473     MOZ_ASSERT(form == NFKD);
1474     normalizer = unorm2_getNFKDInstance(&status);
1475   }
1476   if (U_FAILURE(status)) {
1477     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1478                               JSMSG_INTERNAL_INTL_ERROR);
1479     return false;
1480   }
1481 
1482   int32_t spanLength = unorm2_spanQuickCheckYes(
1483       normalizer, srcChars.begin().get(), srcChars.length(), &status);
1484   if (U_FAILURE(status)) {
1485     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1486                               JSMSG_INTERNAL_INTL_ERROR);
1487     return false;
1488   }
1489   MOZ_ASSERT(0 <= spanLength && size_t(spanLength) <= srcChars.length());
1490 
1491   // Return if the input string is already normalized.
1492   if (size_t(spanLength) == srcChars.length()) {
1493     // Step 7.
1494     args.rval().setString(str);
1495     return true;
1496   }
1497 
1498   static const size_t INLINE_CAPACITY = 32;
1499 
1500   Vector<char16_t, INLINE_CAPACITY> chars(cx);
1501   if (!chars.resize(Max(INLINE_CAPACITY, srcChars.length()))) return false;
1502 
1503   // Copy the already normalized prefix.
1504   if (spanLength > 0)
1505     PodCopy(chars.begin(), srcChars.begin().get(), size_t(spanLength));
1506 
1507   mozilla::RangedPtr<const char16_t> remainingStart =
1508       srcChars.begin() + spanLength;
1509   size_t remainingLength = srcChars.length() - size_t(spanLength);
1510 
1511   int32_t size = unorm2_normalizeSecondAndAppend(
1512       normalizer, chars.begin(), spanLength, chars.length(),
1513       remainingStart.get(), remainingLength, &status);
1514   if (status == U_BUFFER_OVERFLOW_ERROR) {
1515     MOZ_ASSERT(size >= 0);
1516     if (!chars.resize(size)) return false;
1517     status = U_ZERO_ERROR;
1518 #ifdef DEBUG
1519     int32_t finalSize =
1520 #endif
1521         unorm2_normalizeSecondAndAppend(normalizer, chars.begin(), spanLength,
1522                                         chars.length(), remainingStart.get(),
1523                                         remainingLength, &status);
1524     MOZ_ASSERT_IF(!U_FAILURE(status), size == finalSize);
1525   }
1526   if (U_FAILURE(status)) {
1527     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1528                               JSMSG_INTERNAL_INTL_ERROR);
1529     return false;
1530   }
1531 
1532   MOZ_ASSERT(size >= 0);
1533   JSString* ns = NewStringCopyN<CanGC>(cx, chars.begin(), size);
1534   if (!ns) return false;
1535 
1536   // Step 7.
1537   args.rval().setString(ns);
1538   return true;
1539 }
1540 
1541 #endif  // EXPOSE_INTL_API
1542 
str_charAt(JSContext * cx,unsigned argc,Value * vp)1543 bool js::str_charAt(JSContext* cx, unsigned argc, Value* vp) {
1544   CallArgs args = CallArgsFromVp(argc, vp);
1545 
1546   RootedString str(cx);
1547   size_t i;
1548   if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
1549     str = args.thisv().toString();
1550     i = size_t(args[0].toInt32());
1551     if (i >= str->length()) goto out_of_range;
1552   } else {
1553     str = ToStringForStringFunction(cx, args.thisv());
1554     if (!str) return false;
1555 
1556     double d = 0.0;
1557     if (args.length() > 0 && !ToInteger(cx, args[0], &d)) return false;
1558 
1559     if (d < 0 || str->length() <= d) goto out_of_range;
1560     i = size_t(d);
1561   }
1562 
1563   str = cx->staticStrings().getUnitStringForElement(cx, str, i);
1564   if (!str) return false;
1565   args.rval().setString(str);
1566   return true;
1567 
1568 out_of_range:
1569   args.rval().setString(cx->runtime()->emptyString);
1570   return true;
1571 }
1572 
str_charCodeAt_impl(JSContext * cx,HandleString string,HandleValue index,MutableHandleValue res)1573 bool js::str_charCodeAt_impl(JSContext* cx, HandleString string,
1574                              HandleValue index, MutableHandleValue res) {
1575   size_t i;
1576   if (index.isInt32()) {
1577     i = index.toInt32();
1578     if (i >= string->length()) goto out_of_range;
1579   } else {
1580     double d = 0.0;
1581     if (!ToInteger(cx, index, &d)) return false;
1582     // check whether d is negative as size_t is unsigned
1583     if (d < 0 || string->length() <= d) goto out_of_range;
1584     i = size_t(d);
1585   }
1586   char16_t c;
1587   if (!string->getChar(cx, i, &c)) return false;
1588   res.setInt32(c);
1589   return true;
1590 
1591 out_of_range:
1592   res.setNaN();
1593   return true;
1594 }
1595 
str_charCodeAt(JSContext * cx,unsigned argc,Value * vp)1596 bool js::str_charCodeAt(JSContext* cx, unsigned argc, Value* vp) {
1597   CallArgs args = CallArgsFromVp(argc, vp);
1598   RootedString str(cx);
1599   RootedValue index(cx);
1600   if (args.thisv().isString()) {
1601     str = args.thisv().toString();
1602   } else {
1603     str = ToStringForStringFunction(cx, args.thisv());
1604     if (!str) return false;
1605   }
1606   if (args.length() != 0)
1607     index = args[0];
1608   else
1609     index.setInt32(0);
1610 
1611   return js::str_charCodeAt_impl(cx, str, index, args.rval());
1612 }
1613 
1614 /*
1615  * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
1616  * The patlen argument must be positive and no greater than sBMHPatLenMax.
1617  *
1618  * Return the index of pat in text, or -1 if not found.
1619  */
1620 static const uint32_t sBMHCharSetSize = 256; /* ISO-Latin-1 */
1621 static const uint32_t sBMHPatLenMax = 255;   /* skip table element is uint8_t */
1622 static const int sBMHBadPattern =
1623     -2; /* return value if pat is not ISO-Latin-1 */
1624 
1625 template <typename TextChar, typename PatChar>
BoyerMooreHorspool(const TextChar * text,uint32_t textLen,const PatChar * pat,uint32_t patLen)1626 static int BoyerMooreHorspool(const TextChar* text, uint32_t textLen,
1627                               const PatChar* pat, uint32_t patLen) {
1628   MOZ_ASSERT(0 < patLen && patLen <= sBMHPatLenMax);
1629 
1630   uint8_t skip[sBMHCharSetSize];
1631   for (uint32_t i = 0; i < sBMHCharSetSize; i++) skip[i] = uint8_t(patLen);
1632 
1633   uint32_t patLast = patLen - 1;
1634   for (uint32_t i = 0; i < patLast; i++) {
1635     char16_t c = pat[i];
1636     if (c >= sBMHCharSetSize) return sBMHBadPattern;
1637     skip[c] = uint8_t(patLast - i);
1638   }
1639 
1640   for (uint32_t k = patLast; k < textLen;) {
1641     for (uint32_t i = k, j = patLast;; i--, j--) {
1642       if (text[i] != pat[j]) break;
1643       if (j == 0) return static_cast<int>(i); /* safe: max string size */
1644     }
1645 
1646     char16_t c = text[k];
1647     k += (c >= sBMHCharSetSize) ? patLen : skip[c];
1648   }
1649   return -1;
1650 }
1651 
1652 template <typename TextChar, typename PatChar>
1653 struct MemCmp {
1654   typedef uint32_t Extent;
computeExtentMemCmp1655   static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar*,
1656                                                 uint32_t patLen) {
1657     return (patLen - 1) * sizeof(PatChar);
1658   }
matchMemCmp1659   static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t,
1660                                       Extent extent) {
1661     MOZ_ASSERT(sizeof(TextChar) == sizeof(PatChar));
1662     return memcmp(p, t, extent) == 0;
1663   }
1664 };
1665 
1666 template <typename TextChar, typename PatChar>
1667 struct ManualCmp {
1668   typedef const PatChar* Extent;
computeExtentManualCmp1669   static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar* pat,
1670                                                 uint32_t patLen) {
1671     return pat + patLen;
1672   }
matchManualCmp1673   static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t,
1674                                       Extent extent) {
1675     for (; p != extent; ++p, ++t) {
1676       if (*p != *t) return false;
1677     }
1678     return true;
1679   }
1680 };
1681 
1682 template <typename TextChar, typename PatChar>
FirstCharMatcherUnrolled(const TextChar * text,uint32_t n,const PatChar pat)1683 static const TextChar* FirstCharMatcherUnrolled(const TextChar* text,
1684                                                 uint32_t n, const PatChar pat) {
1685   const TextChar* textend = text + n;
1686   const TextChar* t = text;
1687 
1688   switch ((textend - t) & 7) {
1689     case 0:
1690       if (*t++ == pat) return t - 1;
1691       MOZ_FALLTHROUGH;
1692     case 7:
1693       if (*t++ == pat) return t - 1;
1694       MOZ_FALLTHROUGH;
1695     case 6:
1696       if (*t++ == pat) return t - 1;
1697       MOZ_FALLTHROUGH;
1698     case 5:
1699       if (*t++ == pat) return t - 1;
1700       MOZ_FALLTHROUGH;
1701     case 4:
1702       if (*t++ == pat) return t - 1;
1703       MOZ_FALLTHROUGH;
1704     case 3:
1705       if (*t++ == pat) return t - 1;
1706       MOZ_FALLTHROUGH;
1707     case 2:
1708       if (*t++ == pat) return t - 1;
1709       MOZ_FALLTHROUGH;
1710     case 1:
1711       if (*t++ == pat) return t - 1;
1712   }
1713   while (textend != t) {
1714     if (t[0] == pat) return t;
1715     if (t[1] == pat) return t + 1;
1716     if (t[2] == pat) return t + 2;
1717     if (t[3] == pat) return t + 3;
1718     if (t[4] == pat) return t + 4;
1719     if (t[5] == pat) return t + 5;
1720     if (t[6] == pat) return t + 6;
1721     if (t[7] == pat) return t + 7;
1722     t += 8;
1723   }
1724   return nullptr;
1725 }
1726 
FirstCharMatcher8bit(const char * text,uint32_t n,const char pat)1727 static const char* FirstCharMatcher8bit(const char* text, uint32_t n,
1728                                         const char pat) {
1729   return reinterpret_cast<const char*>(memchr(text, pat, n));
1730 }
1731 
1732 template <class InnerMatch, typename TextChar, typename PatChar>
Matcher(const TextChar * text,uint32_t textlen,const PatChar * pat,uint32_t patlen)1733 static int Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat,
1734                    uint32_t patlen) {
1735   MOZ_ASSERT(patlen > 0);
1736 
1737   if (sizeof(TextChar) == 1 && sizeof(PatChar) > 1 && pat[0] > 0xff) return -1;
1738 
1739   const typename InnerMatch::Extent extent =
1740       InnerMatch::computeExtent(pat, patlen);
1741 
1742   uint32_t i = 0;
1743   uint32_t n = textlen - patlen + 1;
1744   while (i < n) {
1745     const TextChar* pos;
1746 
1747     if (sizeof(TextChar) == 1) {
1748       MOZ_ASSERT(pat[0] <= 0xff);
1749       pos = (TextChar*)FirstCharMatcher8bit((char*)text + i, n - i, pat[0]);
1750     } else {
1751       pos = FirstCharMatcherUnrolled(text + i, n - i, char16_t(pat[0]));
1752     }
1753 
1754     if (pos == nullptr) return -1;
1755 
1756     i = static_cast<uint32_t>(pos - text);
1757     if (InnerMatch::match(pat + 1, text + i + 1, extent)) return i;
1758 
1759     i += 1;
1760   }
1761   return -1;
1762 }
1763 
1764 template <typename TextChar, typename PatChar>
StringMatch(const TextChar * text,uint32_t textLen,const PatChar * pat,uint32_t patLen)1765 static MOZ_ALWAYS_INLINE int StringMatch(const TextChar* text, uint32_t textLen,
1766                                          const PatChar* pat, uint32_t patLen) {
1767   if (patLen == 0) return 0;
1768   if (textLen < patLen) return -1;
1769 
1770 #if defined(__i386__) || defined(_M_IX86) || defined(__i386)
1771   /*
1772    * Given enough registers, the unrolled loop below is faster than the
1773    * following loop. 32-bit x86 does not have enough registers.
1774    */
1775   if (patLen == 1) {
1776     const PatChar p0 = *pat;
1777     const TextChar* end = text + textLen;
1778     for (const TextChar* c = text; c != end; ++c) {
1779       if (*c == p0) return c - text;
1780     }
1781     return -1;
1782   }
1783 #endif
1784 
1785   /*
1786    * If the text or pattern string is short, BMH will be more expensive than
1787    * the basic linear scan due to initialization cost and a more complex loop
1788    * body. While the correct threshold is input-dependent, we can make a few
1789    * conservative observations:
1790    *  - When |textLen| is "big enough", the initialization time will be
1791    *    proportionally small, so the worst-case slowdown is minimized.
1792    *  - When |patLen| is "too small", even the best case for BMH will be
1793    *    slower than a simple scan for large |textLen| due to the more complex
1794    *    loop body of BMH.
1795    * From this, the values for "big enough" and "too small" are determined
1796    * empirically. See bug 526348.
1797    */
1798   if (textLen >= 512 && patLen >= 11 && patLen <= sBMHPatLenMax) {
1799     int index = BoyerMooreHorspool(text, textLen, pat, patLen);
1800     if (index != sBMHBadPattern) return index;
1801   }
1802 
1803   /*
1804    * For big patterns with large potential overlap we want the SIMD-optimized
1805    * speed of memcmp. For small patterns, a simple loop is faster. We also can't
1806    * use memcmp if one of the strings is TwoByte and the other is Latin-1.
1807    */
1808   return (patLen > 128 && IsSame<TextChar, PatChar>::value)
1809              ? Matcher<MemCmp<TextChar, PatChar>, TextChar, PatChar>(
1810                    text, textLen, pat, patLen)
1811              : Matcher<ManualCmp<TextChar, PatChar>, TextChar, PatChar>(
1812                    text, textLen, pat, patLen);
1813 }
1814 
StringMatch(JSLinearString * text,JSLinearString * pat,uint32_t start=0)1815 static int32_t StringMatch(JSLinearString* text, JSLinearString* pat,
1816                            uint32_t start = 0) {
1817   MOZ_ASSERT(start <= text->length());
1818   uint32_t textLen = text->length() - start;
1819   uint32_t patLen = pat->length();
1820 
1821   int match;
1822   AutoCheckCannotGC nogc;
1823   if (text->hasLatin1Chars()) {
1824     const Latin1Char* textChars = text->latin1Chars(nogc) + start;
1825     if (pat->hasLatin1Chars())
1826       match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1827     else
1828       match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1829   } else {
1830     const char16_t* textChars = text->twoByteChars(nogc) + start;
1831     if (pat->hasLatin1Chars())
1832       match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1833     else
1834       match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1835   }
1836 
1837   return (match == -1) ? -1 : start + match;
1838 }
1839 
1840 static const size_t sRopeMatchThresholdRatioLog2 = 4;
1841 
StringFindPattern(JSLinearString * text,JSLinearString * pat,size_t start)1842 int js::StringFindPattern(JSLinearString* text, JSLinearString* pat,
1843                           size_t start) {
1844   return StringMatch(text, pat, start);
1845 }
1846 
1847 // When an algorithm does not need a string represented as a single linear
1848 // array of characters, this range utility may be used to traverse the string a
1849 // sequence of linear arrays of characters. This avoids flattening ropes.
1850 class StringSegmentRange {
1851   // If malloc() shows up in any profiles from this vector, we can add a new
1852   // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx.
1853   using StackVector = JS::GCVector<JSString*, 16>;
1854   Rooted<StackVector> stack;
1855   RootedLinearString cur;
1856 
settle(JSString * str)1857   bool settle(JSString* str) {
1858     while (str->isRope()) {
1859       JSRope& rope = str->asRope();
1860       if (!stack.append(rope.rightChild())) return false;
1861       str = rope.leftChild();
1862     }
1863     cur = &str->asLinear();
1864     return true;
1865   }
1866 
1867  public:
StringSegmentRange(JSContext * cx)1868   explicit StringSegmentRange(JSContext* cx)
1869       : stack(cx, StackVector(cx)), cur(cx) {}
1870 
init(JSString * str)1871   MOZ_MUST_USE bool init(JSString* str) {
1872     MOZ_ASSERT(stack.empty());
1873     return settle(str);
1874   }
1875 
empty() const1876   bool empty() const { return cur == nullptr; }
1877 
front() const1878   JSLinearString* front() const {
1879     MOZ_ASSERT(!cur->isRope());
1880     return cur;
1881   }
1882 
popFront()1883   MOZ_MUST_USE bool popFront() {
1884     MOZ_ASSERT(!empty());
1885     if (stack.empty()) {
1886       cur = nullptr;
1887       return true;
1888     }
1889     return settle(stack.popCopy());
1890   }
1891 };
1892 
1893 typedef Vector<JSLinearString*, 16, SystemAllocPolicy> LinearStringVector;
1894 
1895 template <typename TextChar, typename PatChar>
RopeMatchImpl(const AutoCheckCannotGC & nogc,LinearStringVector & strings,const PatChar * pat,size_t patLen)1896 static int RopeMatchImpl(const AutoCheckCannotGC& nogc,
1897                          LinearStringVector& strings, const PatChar* pat,
1898                          size_t patLen) {
1899   /* Absolute offset from the beginning of the logical text string. */
1900   int pos = 0;
1901 
1902   for (JSLinearString** outerp = strings.begin(); outerp != strings.end();
1903        ++outerp) {
1904     /* Try to find a match within 'outer'. */
1905     JSLinearString* outer = *outerp;
1906     const TextChar* chars = outer->chars<TextChar>(nogc);
1907     size_t len = outer->length();
1908     int matchResult = StringMatch(chars, len, pat, patLen);
1909     if (matchResult != -1) {
1910       /* Matched! */
1911       return pos + matchResult;
1912     }
1913 
1914     /* Try to find a match starting in 'outer' and running into other nodes. */
1915     const TextChar* const text = chars + (patLen > len ? 0 : len - patLen + 1);
1916     const TextChar* const textend = chars + len;
1917     const PatChar p0 = *pat;
1918     const PatChar* const p1 = pat + 1;
1919     const PatChar* const patend = pat + patLen;
1920     for (const TextChar* t = text; t != textend;) {
1921       if (*t++ != p0) continue;
1922 
1923       JSLinearString** innerp = outerp;
1924       const TextChar* ttend = textend;
1925       const TextChar* tt = t;
1926       for (const PatChar *pp = p1; pp != patend; ++pp, ++tt) {
1927         while (tt == ttend) {
1928           if (++innerp == strings.end()) return -1;
1929 
1930           JSLinearString* inner = *innerp;
1931           tt = inner->chars<TextChar>(nogc);
1932           ttend = tt + inner->length();
1933         }
1934         if (*pp != *tt) goto break_continue;
1935       }
1936 
1937       /* Matched! */
1938       return pos + (t - chars) - 1; /* -1 because of *t++ above */
1939 
1940     break_continue:;
1941     }
1942 
1943     pos += len;
1944   }
1945 
1946   return -1;
1947 }
1948 
1949 /*
1950  * RopeMatch takes the text to search and the pattern to search for in the text.
1951  * RopeMatch returns false on OOM and otherwise returns the match index through
1952  * the 'match' outparam (-1 for not found).
1953  */
RopeMatch(JSContext * cx,JSRope * text,JSLinearString * pat,int * match)1954 static bool RopeMatch(JSContext* cx, JSRope* text, JSLinearString* pat,
1955                       int* match) {
1956   uint32_t patLen = pat->length();
1957   if (patLen == 0) {
1958     *match = 0;
1959     return true;
1960   }
1961   if (text->length() < patLen) {
1962     *match = -1;
1963     return true;
1964   }
1965 
1966   /*
1967    * List of leaf nodes in the rope. If we run out of memory when trying to
1968    * append to this list, we can still fall back to StringMatch, so use the
1969    * system allocator so we don't report OOM in that case.
1970    */
1971   LinearStringVector strings;
1972 
1973   /*
1974    * We don't want to do rope matching if there is a poor node-to-char ratio,
1975    * since this means spending a lot of time in the match loop below. We also
1976    * need to build the list of leaf nodes. Do both here: iterate over the
1977    * nodes so long as there are not too many.
1978    *
1979    * We also don't use rope matching if the rope contains both Latin-1 and
1980    * TwoByte nodes, to simplify the match algorithm.
1981    */
1982   {
1983     size_t threshold = text->length() >> sRopeMatchThresholdRatioLog2;
1984     StringSegmentRange r(cx);
1985     if (!r.init(text)) return false;
1986 
1987     bool textIsLatin1 = text->hasLatin1Chars();
1988     while (!r.empty()) {
1989       if (threshold-- == 0 || r.front()->hasLatin1Chars() != textIsLatin1 ||
1990           !strings.append(r.front())) {
1991         JSLinearString* linear = text->ensureLinear(cx);
1992         if (!linear) return false;
1993 
1994         *match = StringMatch(linear, pat);
1995         return true;
1996       }
1997       if (!r.popFront()) return false;
1998     }
1999   }
2000 
2001   AutoCheckCannotGC nogc;
2002   if (text->hasLatin1Chars()) {
2003     if (pat->hasLatin1Chars())
2004       *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->latin1Chars(nogc),
2005                                          patLen);
2006     else
2007       *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->twoByteChars(nogc),
2008                                          patLen);
2009   } else {
2010     if (pat->hasLatin1Chars())
2011       *match = RopeMatchImpl<char16_t>(nogc, strings, pat->latin1Chars(nogc),
2012                                        patLen);
2013     else
2014       *match = RopeMatchImpl<char16_t>(nogc, strings, pat->twoByteChars(nogc),
2015                                        patLen);
2016   }
2017 
2018   return true;
2019 }
2020 
ReportErrorIfFirstArgIsRegExp(JSContext * cx,const CallArgs & args)2021 static MOZ_ALWAYS_INLINE bool ReportErrorIfFirstArgIsRegExp(
2022     JSContext* cx, const CallArgs& args) {
2023   // Only call IsRegExp if the first argument is definitely an object, so we
2024   // don't pay the cost of an additional function call in the common case.
2025   if (args.length() == 0 || !args[0].isObject()) return true;
2026 
2027   bool isRegExp;
2028   if (!IsRegExp(cx, args[0], &isRegExp)) return false;
2029 
2030   if (isRegExp) {
2031     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
2032                               JSMSG_INVALID_ARG_TYPE, "first", "",
2033                               "Regular Expression");
2034     return false;
2035   }
2036   return true;
2037 }
2038 
2039 // ES2018 draft rev de77aaeffce115deaf948ed30c7dbe4c60983c0c
2040 // 21.1.3.7 String.prototype.includes ( searchString [ , position ] )
str_includes(JSContext * cx,unsigned argc,Value * vp)2041 bool js::str_includes(JSContext* cx, unsigned argc, Value* vp) {
2042   CallArgs args = CallArgsFromVp(argc, vp);
2043 
2044   // Steps 1-2.
2045   RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
2046   if (!str) return false;
2047 
2048   // Steps 3-4.
2049   if (!ReportErrorIfFirstArgIsRegExp(cx, args)) return false;
2050 
2051   // Step 5.
2052   RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2053   if (!searchStr) return false;
2054 
2055   // Step 6.
2056   uint32_t pos = 0;
2057   if (args.hasDefined(1)) {
2058     if (args[1].isInt32()) {
2059       int i = args[1].toInt32();
2060       pos = (i < 0) ? 0U : uint32_t(i);
2061     } else {
2062       double d;
2063       if (!ToInteger(cx, args[1], &d)) return false;
2064       pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
2065     }
2066   }
2067 
2068   // Step 7.
2069   uint32_t textLen = str->length();
2070 
2071   // Step 8.
2072   uint32_t start = Min(Max(pos, 0U), textLen);
2073 
2074   // Steps 9-10.
2075   JSLinearString* text = str->ensureLinear(cx);
2076   if (!text) return false;
2077 
2078   args.rval().setBoolean(StringMatch(text, searchStr, start) != -1);
2079   return true;
2080 }
2081 
2082 /* ES6 20120927 draft 15.5.4.7. */
str_indexOf(JSContext * cx,unsigned argc,Value * vp)2083 bool js::str_indexOf(JSContext* cx, unsigned argc, Value* vp) {
2084   CallArgs args = CallArgsFromVp(argc, vp);
2085 
2086   // Steps 1, 2, and 3
2087   RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
2088   if (!str) return false;
2089 
2090   // Steps 4 and 5
2091   RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2092   if (!searchStr) return false;
2093 
2094   // Steps 6 and 7
2095   uint32_t pos = 0;
2096   if (args.hasDefined(1)) {
2097     if (args[1].isInt32()) {
2098       int i = args[1].toInt32();
2099       pos = (i < 0) ? 0U : uint32_t(i);
2100     } else {
2101       double d;
2102       if (!ToInteger(cx, args[1], &d)) return false;
2103       pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
2104     }
2105   }
2106 
2107   // Step 8
2108   uint32_t textLen = str->length();
2109 
2110   // Step 9
2111   uint32_t start = Min(Max(pos, 0U), textLen);
2112 
2113   if (str == searchStr) {
2114     // AngularJS often invokes "false".indexOf("false"). This check should
2115     // be cheap enough to not hurt anything else.
2116     args.rval().setInt32(start == 0 ? 0 : -1);
2117     return true;
2118   }
2119 
2120   // Steps 10 and 11
2121   JSLinearString* text = str->ensureLinear(cx);
2122   if (!text) return false;
2123 
2124   args.rval().setInt32(StringMatch(text, searchStr, start));
2125   return true;
2126 }
2127 
2128 template <typename TextChar, typename PatChar>
LastIndexOfImpl(const TextChar * text,size_t textLen,const PatChar * pat,size_t patLen,size_t start)2129 static int32_t LastIndexOfImpl(const TextChar* text, size_t textLen,
2130                                const PatChar* pat, size_t patLen,
2131                                size_t start) {
2132   MOZ_ASSERT(patLen > 0);
2133   MOZ_ASSERT(patLen <= textLen);
2134   MOZ_ASSERT(start <= textLen - patLen);
2135 
2136   const PatChar p0 = *pat;
2137   const PatChar* patNext = pat + 1;
2138   const PatChar* patEnd = pat + patLen;
2139 
2140   for (const TextChar* t = text + start; t >= text; --t) {
2141     if (*t == p0) {
2142       const TextChar* t1 = t + 1;
2143       for (const PatChar *p1 = patNext; p1 < patEnd; ++p1, ++t1) {
2144         if (*t1 != *p1) goto break_continue;
2145       }
2146 
2147       return static_cast<int32_t>(t - text);
2148     }
2149   break_continue:;
2150   }
2151 
2152   return -1;
2153 }
2154 
2155 // ES2017 draft rev 6859bb9ccaea9c6ede81d71e5320e3833b92cb3e
2156 // 21.1.3.9 String.prototype.lastIndexOf ( searchString [ , position ] )
str_lastIndexOf(JSContext * cx,unsigned argc,Value * vp)2157 bool js::str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp) {
2158   CallArgs args = CallArgsFromVp(argc, vp);
2159 
2160   // Steps 1-2.
2161   RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
2162   if (!str) return false;
2163 
2164   // Step 3.
2165   RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2166   if (!searchStr) return false;
2167 
2168   // Step 6.
2169   size_t len = str->length();
2170 
2171   // Step 8.
2172   size_t searchLen = searchStr->length();
2173 
2174   // Steps 4-5, 7.
2175   int start = len - searchLen;  // Start searching here
2176   if (args.hasDefined(1)) {
2177     if (args[1].isInt32()) {
2178       int i = args[1].toInt32();
2179       if (i <= 0)
2180         start = 0;
2181       else if (i < start)
2182         start = i;
2183     } else {
2184       double d;
2185       if (!ToNumber(cx, args[1], &d)) return false;
2186       if (!IsNaN(d)) {
2187         d = JS::ToInteger(d);
2188         if (d <= 0)
2189           start = 0;
2190         else if (d < start)
2191           start = int(d);
2192       }
2193     }
2194   }
2195 
2196   if (str == searchStr) {
2197     args.rval().setInt32(0);
2198     return true;
2199   }
2200 
2201   if (searchLen > len) {
2202     args.rval().setInt32(-1);
2203     return true;
2204   }
2205 
2206   if (searchLen == 0) {
2207     args.rval().setInt32(start);
2208     return true;
2209   }
2210   MOZ_ASSERT(0 <= start && size_t(start) < len);
2211 
2212   JSLinearString* text = str->ensureLinear(cx);
2213   if (!text) return false;
2214 
2215   // Step 9.
2216   int32_t res;
2217   AutoCheckCannotGC nogc;
2218   if (text->hasLatin1Chars()) {
2219     const Latin1Char* textChars = text->latin1Chars(nogc);
2220     if (searchStr->hasLatin1Chars())
2221       res = LastIndexOfImpl(textChars, len, searchStr->latin1Chars(nogc),
2222                             searchLen, start);
2223     else
2224       res = LastIndexOfImpl(textChars, len, searchStr->twoByteChars(nogc),
2225                             searchLen, start);
2226   } else {
2227     const char16_t* textChars = text->twoByteChars(nogc);
2228     if (searchStr->hasLatin1Chars())
2229       res = LastIndexOfImpl(textChars, len, searchStr->latin1Chars(nogc),
2230                             searchLen, start);
2231     else
2232       res = LastIndexOfImpl(textChars, len, searchStr->twoByteChars(nogc),
2233                             searchLen, start);
2234   }
2235 
2236   args.rval().setInt32(res);
2237   return true;
2238 }
2239 
2240 // ES2018 draft rev de77aaeffce115deaf948ed30c7dbe4c60983c0c
2241 // 21.1.3.20 String.prototype.startsWith ( searchString [ , position ] )
str_startsWith(JSContext * cx,unsigned argc,Value * vp)2242 bool js::str_startsWith(JSContext* cx, unsigned argc, Value* vp) {
2243   CallArgs args = CallArgsFromVp(argc, vp);
2244 
2245   // Steps 1-2.
2246   RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
2247   if (!str) return false;
2248 
2249   // Steps 3-4.
2250   if (!ReportErrorIfFirstArgIsRegExp(cx, args)) return false;
2251 
2252   // Step 5.
2253   RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2254   if (!searchStr) return false;
2255 
2256   // Step 6.
2257   uint32_t pos = 0;
2258   if (args.hasDefined(1)) {
2259     if (args[1].isInt32()) {
2260       int i = args[1].toInt32();
2261       pos = (i < 0) ? 0U : uint32_t(i);
2262     } else {
2263       double d;
2264       if (!ToInteger(cx, args[1], &d)) return false;
2265       pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
2266     }
2267   }
2268 
2269   // Step 7.
2270   uint32_t textLen = str->length();
2271 
2272   // Step 8.
2273   uint32_t start = Min(Max(pos, 0U), textLen);
2274 
2275   // Step 9.
2276   uint32_t searchLen = searchStr->length();
2277 
2278   // Step 10.
2279   if (searchLen + start < searchLen || searchLen + start > textLen) {
2280     args.rval().setBoolean(false);
2281     return true;
2282   }
2283 
2284   // Steps 11-12.
2285   JSLinearString* text = str->ensureLinear(cx);
2286   if (!text) return false;
2287 
2288   args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
2289   return true;
2290 }
2291 
2292 // ES2018 draft rev de77aaeffce115deaf948ed30c7dbe4c60983c0c
2293 // 21.1.3.6 String.prototype.endsWith ( searchString [ , endPosition ] )
str_endsWith(JSContext * cx,unsigned argc,Value * vp)2294 bool js::str_endsWith(JSContext* cx, unsigned argc, Value* vp) {
2295   CallArgs args = CallArgsFromVp(argc, vp);
2296 
2297   // Steps 1-2.
2298   RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
2299   if (!str) return false;
2300 
2301   // Steps 3-4.
2302   if (!ReportErrorIfFirstArgIsRegExp(cx, args)) return false;
2303 
2304   // Step 5.
2305   RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2306   if (!searchStr) return false;
2307 
2308   // Step 6.
2309   uint32_t textLen = str->length();
2310 
2311   // Step 7.
2312   uint32_t pos = textLen;
2313   if (args.hasDefined(1)) {
2314     if (args[1].isInt32()) {
2315       int i = args[1].toInt32();
2316       pos = (i < 0) ? 0U : uint32_t(i);
2317     } else {
2318       double d;
2319       if (!ToInteger(cx, args[1], &d)) return false;
2320       pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
2321     }
2322   }
2323 
2324   // Step 8.
2325   uint32_t end = Min(Max(pos, 0U), textLen);
2326 
2327   // Step 9.
2328   uint32_t searchLen = searchStr->length();
2329 
2330   // Step 11 (reordered).
2331   if (searchLen > end) {
2332     args.rval().setBoolean(false);
2333     return true;
2334   }
2335 
2336   // Step 10.
2337   uint32_t start = end - searchLen;
2338 
2339   // Steps 12-13.
2340   JSLinearString* text = str->ensureLinear(cx);
2341   if (!text) return false;
2342 
2343   args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
2344   return true;
2345 }
2346 
2347 template <typename CharT>
TrimString(const CharT * chars,bool trimStart,bool trimEnd,size_t length,size_t * pBegin,size_t * pEnd)2348 static void TrimString(const CharT* chars, bool trimStart, bool trimEnd,
2349                        size_t length, size_t* pBegin, size_t* pEnd) {
2350   size_t begin = 0, end = length;
2351 
2352   if (trimStart) {
2353     while (begin < length && unicode::IsSpace(chars[begin])) ++begin;
2354   }
2355 
2356   if (trimEnd) {
2357     while (end > begin && unicode::IsSpace(chars[end - 1])) --end;
2358   }
2359 
2360   *pBegin = begin;
2361   *pEnd = end;
2362 }
2363 
TrimString(JSContext * cx,const CallArgs & args,bool trimStart,bool trimEnd)2364 static bool TrimString(JSContext* cx, const CallArgs& args, bool trimStart,
2365                        bool trimEnd) {
2366   JSString* str = ToStringForStringFunction(cx, args.thisv());
2367   if (!str) return false;
2368 
2369   JSLinearString* linear = str->ensureLinear(cx);
2370   if (!linear) return false;
2371 
2372   size_t length = linear->length();
2373   size_t begin, end;
2374   if (linear->hasLatin1Chars()) {
2375     AutoCheckCannotGC nogc;
2376     TrimString(linear->latin1Chars(nogc), trimStart, trimEnd, length, &begin,
2377                &end);
2378   } else {
2379     AutoCheckCannotGC nogc;
2380     TrimString(linear->twoByteChars(nogc), trimStart, trimEnd, length, &begin,
2381                &end);
2382   }
2383 
2384   JSLinearString* result = NewDependentString(cx, linear, begin, end - begin);
2385   if (!result) return false;
2386 
2387   args.rval().setString(result);
2388   return true;
2389 }
2390 
str_trim(JSContext * cx,unsigned argc,Value * vp)2391 bool js::str_trim(JSContext* cx, unsigned argc, Value* vp) {
2392   CallArgs args = CallArgsFromVp(argc, vp);
2393   return TrimString(cx, args, true, true);
2394 }
2395 
str_trimStart(JSContext * cx,unsigned argc,Value * vp)2396 bool js::str_trimStart(JSContext* cx, unsigned argc, Value* vp) {
2397   CallArgs args = CallArgsFromVp(argc, vp);
2398   return TrimString(cx, args, true, false);
2399 }
2400 
str_trimEnd(JSContext * cx,unsigned argc,Value * vp)2401 bool js::str_trimEnd(JSContext* cx, unsigned argc, Value* vp) {
2402   CallArgs args = CallArgsFromVp(argc, vp);
2403   return TrimString(cx, args, false, true);
2404 }
2405 
2406 // Utility for building a rope (lazy concatenation) of strings.
2407 class RopeBuilder {
2408   JSContext* cx;
2409   RootedString res;
2410 
2411   RopeBuilder(const RopeBuilder& other) = delete;
2412   void operator=(const RopeBuilder& other) = delete;
2413 
2414  public:
RopeBuilder(JSContext * cx)2415   explicit RopeBuilder(JSContext* cx)
2416       : cx(cx), res(cx, cx->runtime()->emptyString) {}
2417 
append(HandleString str)2418   inline bool append(HandleString str) {
2419     res = ConcatStrings<CanGC>(cx, res, str);
2420     return !!res;
2421   }
2422 
result()2423   inline JSString* result() { return res; }
2424 };
2425 
2426 namespace {
2427 
2428 template <typename CharT>
FindDollarIndex(const CharT * chars,size_t length)2429 static uint32_t FindDollarIndex(const CharT* chars, size_t length) {
2430   if (const CharT* p = js_strchr_limit(chars, '$', chars + length)) {
2431     uint32_t dollarIndex = p - chars;
2432     MOZ_ASSERT(dollarIndex < length);
2433     return dollarIndex;
2434   }
2435   return UINT32_MAX;
2436 }
2437 
2438 } /* anonymous namespace */
2439 
2440 /*
2441  * Constructs a result string that looks like:
2442  *
2443  *      newstring = string[:matchStart] + repstr + string[matchEnd:]
2444  */
BuildFlatReplacement(JSContext * cx,HandleString textstr,HandleLinearString repstr,size_t matchStart,size_t patternLength)2445 static JSString* BuildFlatReplacement(JSContext* cx, HandleString textstr,
2446                                       HandleLinearString repstr,
2447                                       size_t matchStart, size_t patternLength) {
2448   size_t matchEnd = matchStart + patternLength;
2449 
2450   RootedString resultStr(cx, NewDependentString(cx, textstr, 0, matchStart));
2451   if (!resultStr) return nullptr;
2452 
2453   resultStr = ConcatStrings<CanGC>(cx, resultStr, repstr);
2454   if (!resultStr) return nullptr;
2455 
2456   MOZ_ASSERT(textstr->length() >= matchEnd);
2457   RootedString rest(cx, NewDependentString(cx, textstr, matchEnd,
2458                                            textstr->length() - matchEnd));
2459   if (!rest) return nullptr;
2460 
2461   return ConcatStrings<CanGC>(cx, resultStr, rest);
2462 }
2463 
BuildFlatRopeReplacement(JSContext * cx,HandleString textstr,HandleLinearString repstr,size_t match,size_t patternLength)2464 static JSString* BuildFlatRopeReplacement(JSContext* cx, HandleString textstr,
2465                                           HandleLinearString repstr,
2466                                           size_t match, size_t patternLength) {
2467   MOZ_ASSERT(textstr->isRope());
2468 
2469   size_t matchEnd = match + patternLength;
2470 
2471   /*
2472    * If we are replacing over a rope, avoid flattening it by iterating
2473    * through it, building a new rope.
2474    */
2475   StringSegmentRange r(cx);
2476   if (!r.init(textstr)) return nullptr;
2477 
2478   RopeBuilder builder(cx);
2479   size_t pos = 0;
2480   while (!r.empty()) {
2481     RootedString str(cx, r.front());
2482     size_t len = str->length();
2483     size_t strEnd = pos + len;
2484     if (pos < matchEnd && strEnd > match) {
2485       /*
2486        * We need to special-case any part of the rope that overlaps
2487        * with the replacement string.
2488        */
2489       if (match >= pos) {
2490         /*
2491          * If this part of the rope overlaps with the left side of
2492          * the pattern, then it must be the only one to overlap with
2493          * the first character in the pattern, so we include the
2494          * replacement string here.
2495          */
2496         RootedString leftSide(cx, NewDependentString(cx, str, 0, match - pos));
2497         if (!leftSide || !builder.append(leftSide) || !builder.append(repstr)) {
2498           return nullptr;
2499         }
2500       }
2501 
2502       /*
2503        * If str runs off the end of the matched string, append the
2504        * last part of str.
2505        */
2506       if (strEnd > matchEnd) {
2507         RootedString rightSide(
2508             cx, NewDependentString(cx, str, matchEnd - pos, strEnd - matchEnd));
2509         if (!rightSide || !builder.append(rightSide)) return nullptr;
2510       }
2511     } else {
2512       if (!builder.append(str)) return nullptr;
2513     }
2514     pos += str->length();
2515     if (!r.popFront()) return nullptr;
2516   }
2517 
2518   return builder.result();
2519 }
2520 
2521 template <typename CharT>
AppendDollarReplacement(StringBuffer & newReplaceChars,size_t firstDollarIndex,size_t matchStart,size_t matchLimit,JSLinearString * text,const CharT * repChars,size_t repLength)2522 static bool AppendDollarReplacement(StringBuffer& newReplaceChars,
2523                                     size_t firstDollarIndex, size_t matchStart,
2524                                     size_t matchLimit, JSLinearString* text,
2525                                     const CharT* repChars, size_t repLength) {
2526   MOZ_ASSERT(firstDollarIndex < repLength);
2527 
2528   /* Move the pre-dollar chunk in bulk. */
2529   newReplaceChars.infallibleAppend(repChars, firstDollarIndex);
2530 
2531   /* Move the rest char-by-char, interpreting dollars as we encounter them. */
2532   const CharT* repLimit = repChars + repLength;
2533   for (const CharT* it = repChars + firstDollarIndex; it < repLimit; ++it) {
2534     if (*it != '$' || it == repLimit - 1) {
2535       if (!newReplaceChars.append(*it)) return false;
2536       continue;
2537     }
2538 
2539     switch (*(it + 1)) {
2540       case '$': /* Eat one of the dollars. */
2541         if (!newReplaceChars.append(*it)) return false;
2542         break;
2543       case '&':
2544         if (!newReplaceChars.appendSubstring(text, matchStart,
2545                                              matchLimit - matchStart))
2546           return false;
2547         break;
2548       case '`':
2549         if (!newReplaceChars.appendSubstring(text, 0, matchStart)) return false;
2550         break;
2551       case '\'':
2552         if (!newReplaceChars.appendSubstring(text, matchLimit,
2553                                              text->length() - matchLimit))
2554           return false;
2555         break;
2556       default: /* The dollar we saw was not special (no matter what its mother
2557                   told it). */
2558         if (!newReplaceChars.append(*it)) return false;
2559         continue;
2560     }
2561     ++it; /* We always eat an extra char in the above switch. */
2562   }
2563 
2564   return true;
2565 }
2566 
2567 /*
2568  * Perform a linear-scan dollar substitution on the replacement text.
2569  */
InterpretDollarReplacement(JSContext * cx,HandleString textstrArg,HandleLinearString repstr,uint32_t firstDollarIndex,size_t matchStart,size_t patternLength)2570 static JSLinearString* InterpretDollarReplacement(
2571     JSContext* cx, HandleString textstrArg, HandleLinearString repstr,
2572     uint32_t firstDollarIndex, size_t matchStart, size_t patternLength) {
2573   RootedLinearString textstr(cx, textstrArg->ensureLinear(cx));
2574   if (!textstr) return nullptr;
2575 
2576   size_t matchLimit = matchStart + patternLength;
2577 
2578   /*
2579    * Most probably:
2580    *
2581    *      len(newstr) >= len(orig) - len(match) + len(replacement)
2582    *
2583    * Note that dollar vars _could_ make the resulting text smaller than this.
2584    */
2585   StringBuffer newReplaceChars(cx);
2586   if (repstr->hasTwoByteChars() && !newReplaceChars.ensureTwoByteChars())
2587     return nullptr;
2588 
2589   if (!newReplaceChars.reserve(textstr->length() - patternLength +
2590                                repstr->length()))
2591     return nullptr;
2592 
2593   bool res;
2594   if (repstr->hasLatin1Chars()) {
2595     AutoCheckCannotGC nogc;
2596     res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, matchStart,
2597                                   matchLimit, textstr,
2598                                   repstr->latin1Chars(nogc), repstr->length());
2599   } else {
2600     AutoCheckCannotGC nogc;
2601     res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, matchStart,
2602                                   matchLimit, textstr,
2603                                   repstr->twoByteChars(nogc), repstr->length());
2604   }
2605   if (!res) return nullptr;
2606 
2607   return newReplaceChars.finishString();
2608 }
2609 
2610 template <typename StrChar, typename RepChar>
StrFlatReplaceGlobal(JSContext * cx,JSLinearString * str,JSLinearString * pat,JSLinearString * rep,StringBuffer & sb)2611 static bool StrFlatReplaceGlobal(JSContext* cx, JSLinearString* str,
2612                                  JSLinearString* pat, JSLinearString* rep,
2613                                  StringBuffer& sb) {
2614   MOZ_ASSERT(str->length() > 0);
2615 
2616   AutoCheckCannotGC nogc;
2617   const StrChar* strChars = str->chars<StrChar>(nogc);
2618   const RepChar* repChars = rep->chars<RepChar>(nogc);
2619 
2620   // The pattern is empty, so we interleave the replacement string in-between
2621   // each character.
2622   if (!pat->length()) {
2623     CheckedInt<uint32_t> strLength(str->length());
2624     CheckedInt<uint32_t> repLength(rep->length());
2625     CheckedInt<uint32_t> length = repLength * (strLength - 1) + strLength;
2626     if (!length.isValid()) {
2627       ReportAllocationOverflow(cx);
2628       return false;
2629     }
2630 
2631     if (!sb.reserve(length.value())) return false;
2632 
2633     for (unsigned i = 0; i < str->length() - 1; ++i, ++strChars) {
2634       sb.infallibleAppend(*strChars);
2635       sb.infallibleAppend(repChars, rep->length());
2636     }
2637     sb.infallibleAppend(*strChars);
2638     return true;
2639   }
2640 
2641   // If it's true, we are sure that the result's length is, at least, the same
2642   // length as |str->length()|.
2643   if (rep->length() >= pat->length()) {
2644     if (!sb.reserve(str->length())) return false;
2645   }
2646 
2647   uint32_t start = 0;
2648   for (;;) {
2649     int match = StringMatch(str, pat, start);
2650     if (match < 0) break;
2651     if (!sb.append(strChars + start, match - start)) return false;
2652     if (!sb.append(repChars, rep->length())) return false;
2653     start = match + pat->length();
2654   }
2655 
2656   if (!sb.append(strChars + start, str->length() - start)) return false;
2657 
2658   return true;
2659 }
2660 
2661 // This is identical to "str.split(pattern).join(replacement)" except that we
2662 // do some deforestation optimization in Ion.
str_flat_replace_string(JSContext * cx,HandleString string,HandleString pattern,HandleString replacement)2663 JSString* js::str_flat_replace_string(JSContext* cx, HandleString string,
2664                                       HandleString pattern,
2665                                       HandleString replacement) {
2666   MOZ_ASSERT(string);
2667   MOZ_ASSERT(pattern);
2668   MOZ_ASSERT(replacement);
2669 
2670   if (!string->length()) return string;
2671 
2672   RootedLinearString linearRepl(cx, replacement->ensureLinear(cx));
2673   if (!linearRepl) return nullptr;
2674 
2675   RootedLinearString linearPat(cx, pattern->ensureLinear(cx));
2676   if (!linearPat) return nullptr;
2677 
2678   RootedLinearString linearStr(cx, string->ensureLinear(cx));
2679   if (!linearStr) return nullptr;
2680 
2681   StringBuffer sb(cx);
2682   if (linearStr->hasTwoByteChars()) {
2683     if (!sb.ensureTwoByteChars()) return nullptr;
2684     if (linearRepl->hasTwoByteChars()) {
2685       if (!StrFlatReplaceGlobal<char16_t, char16_t>(cx, linearStr, linearPat,
2686                                                     linearRepl, sb))
2687         return nullptr;
2688     } else {
2689       if (!StrFlatReplaceGlobal<char16_t, Latin1Char>(cx, linearStr, linearPat,
2690                                                       linearRepl, sb))
2691         return nullptr;
2692     }
2693   } else {
2694     if (linearRepl->hasTwoByteChars()) {
2695       if (!sb.ensureTwoByteChars()) return nullptr;
2696       if (!StrFlatReplaceGlobal<Latin1Char, char16_t>(cx, linearStr, linearPat,
2697                                                       linearRepl, sb))
2698         return nullptr;
2699     } else {
2700       if (!StrFlatReplaceGlobal<Latin1Char, Latin1Char>(
2701               cx, linearStr, linearPat, linearRepl, sb))
2702         return nullptr;
2703     }
2704   }
2705 
2706   return sb.finishString();
2707 }
2708 
str_replace_string_raw(JSContext * cx,HandleString string,HandleString pattern,HandleString replacement)2709 JSString* js::str_replace_string_raw(JSContext* cx, HandleString string,
2710                                      HandleString pattern,
2711                                      HandleString replacement) {
2712   RootedLinearString repl(cx, replacement->ensureLinear(cx));
2713   if (!repl) return nullptr;
2714 
2715   RootedLinearString pat(cx, pattern->ensureLinear(cx));
2716   if (!pat) return nullptr;
2717 
2718   size_t patternLength = pat->length();
2719   int32_t match;
2720   uint32_t dollarIndex;
2721 
2722   {
2723     AutoCheckCannotGC nogc;
2724     dollarIndex =
2725         repl->hasLatin1Chars()
2726             ? FindDollarIndex(repl->latin1Chars(nogc), repl->length())
2727             : FindDollarIndex(repl->twoByteChars(nogc), repl->length());
2728   }
2729 
2730   /*
2731    * |string| could be a rope, so we want to avoid flattening it for as
2732    * long as possible.
2733    */
2734   if (string->isRope()) {
2735     if (!RopeMatch(cx, &string->asRope(), pat, &match)) return nullptr;
2736   } else {
2737     match = StringMatch(&string->asLinear(), pat, 0);
2738   }
2739 
2740   if (match < 0) return string;
2741 
2742   if (dollarIndex != UINT32_MAX) {
2743     repl = InterpretDollarReplacement(cx, string, repl, dollarIndex, match,
2744                                       patternLength);
2745     if (!repl) return nullptr;
2746   } else if (string->isRope()) {
2747     return BuildFlatRopeReplacement(cx, string, repl, match, patternLength);
2748   }
2749   return BuildFlatReplacement(cx, string, repl, match, patternLength);
2750 }
2751 
NewFullyAllocatedStringArray(JSContext * cx,HandleObjectGroup group,uint32_t length)2752 static ArrayObject* NewFullyAllocatedStringArray(JSContext* cx,
2753                                                  HandleObjectGroup group,
2754                                                  uint32_t length) {
2755   ArrayObject* array = NewFullyAllocatedArrayTryUseGroup(cx, group, length);
2756   if (!array) return nullptr;
2757 
2758   // Only string values will be added to this array. Inform TI early about
2759   // the element type, so we can directly initialize all elements using
2760   // NativeObject::initDenseElement() instead of the slightly more expensive
2761   // NativeObject::initDenseElementWithType() method.
2762   // Since this function is never called to create a zero-length array, it's
2763   // always necessary and correct to call AddTypePropertyId here.
2764   MOZ_ASSERT(length > 0);
2765   AddTypePropertyId(cx, array, JSID_VOID, TypeSet::StringType());
2766 
2767   return array;
2768 }
2769 
SingleElementStringArray(JSContext * cx,HandleObjectGroup group,HandleLinearString str)2770 static ArrayObject* SingleElementStringArray(JSContext* cx,
2771                                              HandleObjectGroup group,
2772                                              HandleLinearString str) {
2773   ArrayObject* array = NewFullyAllocatedStringArray(cx, group, 1);
2774   if (!array) return nullptr;
2775   array->setDenseInitializedLength(1);
2776   array->initDenseElement(0, StringValue(str));
2777   return array;
2778 }
2779 
2780 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
SplitHelper(JSContext * cx,HandleLinearString str,uint32_t limit,HandleLinearString sep,HandleObjectGroup group)2781 static ArrayObject* SplitHelper(JSContext* cx, HandleLinearString str,
2782                                 uint32_t limit, HandleLinearString sep,
2783                                 HandleObjectGroup group) {
2784   size_t strLength = str->length();
2785   size_t sepLength = sep->length();
2786   MOZ_ASSERT(sepLength != 0);
2787 
2788   // Step 12.
2789   if (strLength == 0) {
2790     // Step 12.a.
2791     int match = StringMatch(str, sep, 0);
2792 
2793     // Step 12.b.
2794     if (match != -1) return NewFullyAllocatedArrayTryUseGroup(cx, group, 0);
2795 
2796     // Steps 12.c-e.
2797     return SingleElementStringArray(cx, group, str);
2798   }
2799 
2800   // Step 3 (reordered).
2801   AutoValueVector splits(cx);
2802 
2803   // Step 8 (reordered).
2804   size_t lastEndIndex = 0;
2805 
2806   // Step 13.
2807   size_t index = 0;
2808 
2809   // Step 14.
2810   while (index != strLength) {
2811     // Step 14.a.
2812     int match = StringMatch(str, sep, index);
2813 
2814     // Step 14.b.
2815     //
2816     // Our match algorithm differs from the spec in that it returns the
2817     // next index at which a match happens.  If no match happens we're
2818     // done.
2819     //
2820     // But what if the match is at the end of the string (and the string is
2821     // not empty)?  Per 14.c.i this shouldn't be a match, so we have to
2822     // specially exclude it.  Thus this case should hold:
2823     //
2824     //   var a = "abc".split(/\b/);
2825     //   assertEq(a.length, 1);
2826     //   assertEq(a[0], "abc");
2827     if (match == -1) break;
2828 
2829     // Step 14.c.
2830     size_t endIndex = match + sepLength;
2831 
2832     // Step 14.c.i.
2833     if (endIndex == lastEndIndex) {
2834       index++;
2835       continue;
2836     }
2837 
2838     // Step 14.c.ii.
2839     MOZ_ASSERT(lastEndIndex < endIndex);
2840     MOZ_ASSERT(sepLength <= strLength);
2841     MOZ_ASSERT(lastEndIndex + sepLength <= endIndex);
2842 
2843     // Step 14.c.ii.1.
2844     size_t subLength = size_t(endIndex - sepLength - lastEndIndex);
2845     JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength);
2846 
2847     // Steps 14.c.ii.2-4.
2848     if (!sub || !splits.append(StringValue(sub))) return nullptr;
2849 
2850     // Step 14.c.ii.5.
2851     if (splits.length() == limit)
2852       return NewCopiedArrayTryUseGroup(cx, group, splits.begin(),
2853                                        splits.length());
2854 
2855     // Step 14.c.ii.6.
2856     index = endIndex;
2857 
2858     // Step 14.c.ii.7.
2859     lastEndIndex = index;
2860   }
2861 
2862   // Step 15.
2863   JSString* sub =
2864       NewDependentString(cx, str, lastEndIndex, strLength - lastEndIndex);
2865 
2866   // Steps 16-17.
2867   if (!sub || !splits.append(StringValue(sub))) return nullptr;
2868 
2869   // Step 18.
2870   return NewCopiedArrayTryUseGroup(cx, group, splits.begin(), splits.length());
2871 }
2872 
2873 // Fast-path for splitting a string into a character array via split("").
CharSplitHelper(JSContext * cx,HandleLinearString str,uint32_t limit,HandleObjectGroup group)2874 static ArrayObject* CharSplitHelper(JSContext* cx, HandleLinearString str,
2875                                     uint32_t limit, HandleObjectGroup group) {
2876   size_t strLength = str->length();
2877   if (strLength == 0) return NewFullyAllocatedArrayTryUseGroup(cx, group, 0);
2878 
2879   js::StaticStrings& staticStrings = cx->staticStrings();
2880   uint32_t resultlen = (limit < strLength ? limit : strLength);
2881   MOZ_ASSERT(limit > 0 && resultlen > 0,
2882              "Neither limit nor strLength is zero, so resultlen is greater "
2883              "than zero.");
2884 
2885   RootedArrayObject splits(cx,
2886                            NewFullyAllocatedStringArray(cx, group, resultlen));
2887   if (!splits) return nullptr;
2888   splits->ensureDenseInitializedLength(cx, 0, resultlen);
2889 
2890   for (size_t i = 0; i < resultlen; ++i) {
2891     JSString* sub = staticStrings.getUnitStringForElement(cx, str, i);
2892     if (!sub) return nullptr;
2893     splits->initDenseElement(i, StringValue(sub));
2894   }
2895 
2896   return splits;
2897 }
2898 
2899 template <typename TextChar>
SplitSingleCharHelper(JSContext * cx,HandleLinearString str,const TextChar * text,uint32_t textLen,char16_t patCh,HandleObjectGroup group)2900 static MOZ_ALWAYS_INLINE ArrayObject* SplitSingleCharHelper(
2901     JSContext* cx, HandleLinearString str, const TextChar* text,
2902     uint32_t textLen, char16_t patCh, HandleObjectGroup group) {
2903   // Count the number of occurrences of patCh within text.
2904   uint32_t count = 0;
2905   for (size_t index = 0; index < textLen; index++) {
2906     if (static_cast<char16_t>(text[index]) == patCh) count++;
2907   }
2908 
2909   // Handle zero-occurrence case - return input string in an array.
2910   if (count == 0) return SingleElementStringArray(cx, group, str);
2911 
2912   // Create the result array for the substring values.
2913   RootedArrayObject splits(cx,
2914                            NewFullyAllocatedStringArray(cx, group, count + 1));
2915   if (!splits) return nullptr;
2916   splits->ensureDenseInitializedLength(cx, 0, count + 1);
2917 
2918   // Add substrings.
2919   uint32_t splitsIndex = 0;
2920   size_t lastEndIndex = 0;
2921   for (size_t index = 0; index < textLen; index++) {
2922     if (static_cast<char16_t>(text[index]) == patCh) {
2923       size_t subLength = size_t(index - lastEndIndex);
2924       JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength);
2925       if (!sub) return nullptr;
2926       splits->initDenseElement(splitsIndex++, StringValue(sub));
2927       lastEndIndex = index + 1;
2928     }
2929   }
2930 
2931   // Add substring for tail of string (after last match).
2932   JSString* sub =
2933       NewDependentString(cx, str, lastEndIndex, textLen - lastEndIndex);
2934   if (!sub) return nullptr;
2935   splits->initDenseElement(splitsIndex++, StringValue(sub));
2936 
2937   return splits;
2938 }
2939 
2940 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
SplitSingleCharHelper(JSContext * cx,HandleLinearString str,char16_t ch,HandleObjectGroup group)2941 static ArrayObject* SplitSingleCharHelper(JSContext* cx, HandleLinearString str,
2942                                           char16_t ch,
2943                                           HandleObjectGroup group) {
2944   // Step 12.
2945   size_t strLength = str->length();
2946 
2947   AutoStableStringChars linearChars(cx);
2948   if (!linearChars.init(cx, str)) return nullptr;
2949 
2950   if (linearChars.isLatin1())
2951     return SplitSingleCharHelper(cx, str, linearChars.latin1Chars(), strLength,
2952                                  ch, group);
2953 
2954   return SplitSingleCharHelper(cx, str, linearChars.twoByteChars(), strLength,
2955                                ch, group);
2956 }
2957 
2958 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
str_split_string(JSContext * cx,HandleObjectGroup group,HandleString str,HandleString sep,uint32_t limit)2959 ArrayObject* js::str_split_string(JSContext* cx, HandleObjectGroup group,
2960                                   HandleString str, HandleString sep,
2961                                   uint32_t limit) {
2962   MOZ_ASSERT(limit > 0, "Only called for strictly positive limit.");
2963 
2964   RootedLinearString linearStr(cx, str->ensureLinear(cx));
2965   if (!linearStr) return nullptr;
2966 
2967   RootedLinearString linearSep(cx, sep->ensureLinear(cx));
2968   if (!linearSep) return nullptr;
2969 
2970   if (linearSep->length() == 0)
2971     return CharSplitHelper(cx, linearStr, limit, group);
2972 
2973   if (linearSep->length() == 1 && limit >= static_cast<uint32_t>(INT32_MAX)) {
2974     char16_t ch = linearSep->latin1OrTwoByteChar(0);
2975     return SplitSingleCharHelper(cx, linearStr, ch, group);
2976   }
2977 
2978   return SplitHelper(cx, linearStr, limit, linearSep, group);
2979 }
2980 
2981 /*
2982  * Python-esque sequence operations.
2983  */
str_concat(JSContext * cx,unsigned argc,Value * vp)2984 bool js::str_concat(JSContext* cx, unsigned argc, Value* vp) {
2985   CallArgs args = CallArgsFromVp(argc, vp);
2986   JSString* str = ToStringForStringFunction(cx, args.thisv());
2987   if (!str) return false;
2988 
2989   for (unsigned i = 0; i < args.length(); i++) {
2990     JSString* argStr = ToString<NoGC>(cx, args[i]);
2991     if (!argStr) {
2992       RootedString strRoot(cx, str);
2993       argStr = ToString<CanGC>(cx, args[i]);
2994       if (!argStr) return false;
2995       str = strRoot;
2996     }
2997 
2998     JSString* next = ConcatStrings<NoGC>(cx, str, argStr);
2999     if (next) {
3000       str = next;
3001     } else {
3002       RootedString strRoot(cx, str), argStrRoot(cx, argStr);
3003       str = ConcatStrings<CanGC>(cx, strRoot, argStrRoot);
3004       if (!str) return false;
3005     }
3006   }
3007 
3008   args.rval().setString(str);
3009   return true;
3010 }
3011 
3012 static const JSFunctionSpec string_methods[] = {
3013     JS_FN(js_toSource_str, str_toSource, 0, 0),
3014 
3015     /* Java-like methods. */
3016     JS_FN(js_toString_str, str_toString, 0, 0),
3017     JS_FN(js_valueOf_str, str_toString, 0, 0),
3018     JS_INLINABLE_FN("toLowerCase", str_toLowerCase, 0, 0, StringToLowerCase),
3019     JS_INLINABLE_FN("toUpperCase", str_toUpperCase, 0, 0, StringToUpperCase),
3020     JS_INLINABLE_FN("charAt", str_charAt, 1, 0, StringCharAt),
3021     JS_INLINABLE_FN("charCodeAt", str_charCodeAt, 1, 0, StringCharCodeAt),
3022     JS_SELF_HOSTED_FN("substring", "String_substring", 2, 0),
3023     JS_SELF_HOSTED_FN("padStart", "String_pad_start", 2, 0),
3024     JS_SELF_HOSTED_FN("padEnd", "String_pad_end", 2, 0),
3025     JS_SELF_HOSTED_FN("codePointAt", "String_codePointAt", 1, 0),
3026     JS_FN("includes", str_includes, 1, 0), JS_FN("indexOf", str_indexOf, 1, 0),
3027     JS_FN("lastIndexOf", str_lastIndexOf, 1, 0),
3028     JS_FN("startsWith", str_startsWith, 1, 0),
3029     JS_FN("endsWith", str_endsWith, 1, 0), JS_FN("trim", str_trim, 0, 0),
3030 #ifdef NIGHTLY_BUILD
3031     JS_FN("trimStart", str_trimStart, 0, 0),
3032     JS_FN("trimEnd", str_trimEnd, 0, 0),
3033 #else
3034     JS_FN("trimLeft", str_trimStart, 0, 0),
3035     JS_FN("trimRight", str_trimEnd, 0, 0),
3036 #endif
3037 #if EXPOSE_INTL_API
3038     JS_SELF_HOSTED_FN("toLocaleLowerCase", "String_toLocaleLowerCase", 0, 0),
3039     JS_SELF_HOSTED_FN("toLocaleUpperCase", "String_toLocaleUpperCase", 0, 0),
3040     JS_SELF_HOSTED_FN("localeCompare", "String_localeCompare", 1, 0),
3041 #else
3042     JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0, 0),
3043     JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0, 0),
3044     JS_FN("localeCompare", str_localeCompare, 1, 0),
3045 #endif
3046     JS_SELF_HOSTED_FN("repeat", "String_repeat", 1, 0),
3047 #if EXPOSE_INTL_API
3048     JS_FN("normalize", str_normalize, 0, 0),
3049 #endif
3050 
3051     /* Perl-ish methods (search is actually Python-esque). */
3052     JS_SELF_HOSTED_FN("match", "String_match", 1, 0),
3053     JS_SELF_HOSTED_FN("search", "String_search", 1, 0),
3054     JS_SELF_HOSTED_FN("replace", "String_replace", 2, 0),
3055     JS_SELF_HOSTED_FN("split", "String_split", 2, 0),
3056     JS_SELF_HOSTED_FN("substr", "String_substr", 2, 0),
3057 
3058     /* Python-esque sequence methods. */
3059     JS_FN("concat", str_concat, 1, 0),
3060     JS_SELF_HOSTED_FN("slice", "String_slice", 2, 0),
3061 
3062     /* HTML string methods. */
3063     JS_SELF_HOSTED_FN("bold", "String_bold", 0, 0),
3064     JS_SELF_HOSTED_FN("italics", "String_italics", 0, 0),
3065     JS_SELF_HOSTED_FN("fixed", "String_fixed", 0, 0),
3066     JS_SELF_HOSTED_FN("strike", "String_strike", 0, 0),
3067     JS_SELF_HOSTED_FN("small", "String_small", 0, 0),
3068     JS_SELF_HOSTED_FN("big", "String_big", 0, 0),
3069     JS_SELF_HOSTED_FN("blink", "String_blink", 0, 0),
3070     JS_SELF_HOSTED_FN("sup", "String_sup", 0, 0),
3071     JS_SELF_HOSTED_FN("sub", "String_sub", 0, 0),
3072     JS_SELF_HOSTED_FN("anchor", "String_anchor", 1, 0),
3073     JS_SELF_HOSTED_FN("link", "String_link", 1, 0),
3074     JS_SELF_HOSTED_FN("fontcolor", "String_fontcolor", 1, 0),
3075     JS_SELF_HOSTED_FN("fontsize", "String_fontsize", 1, 0),
3076 
3077     JS_SELF_HOSTED_SYM_FN(iterator, "String_iterator", 0, 0), JS_FS_END};
3078 
3079 // ES6 rev 27 (2014 Aug 24) 21.1.1
StringConstructor(JSContext * cx,unsigned argc,Value * vp)3080 bool js::StringConstructor(JSContext* cx, unsigned argc, Value* vp) {
3081   CallArgs args = CallArgsFromVp(argc, vp);
3082 
3083   RootedString str(cx);
3084   if (args.length() > 0) {
3085     if (!args.isConstructing() && args[0].isSymbol())
3086       return js::SymbolDescriptiveString(cx, args[0].toSymbol(), args.rval());
3087 
3088     str = ToString<CanGC>(cx, args[0]);
3089     if (!str) return false;
3090   } else {
3091     str = cx->runtime()->emptyString;
3092   }
3093 
3094   if (args.isConstructing()) {
3095     RootedObject proto(cx);
3096     if (!GetPrototypeFromBuiltinConstructor(cx, args, &proto)) return false;
3097 
3098     StringObject* strobj = StringObject::create(cx, str, proto);
3099     if (!strobj) return false;
3100     args.rval().setObject(*strobj);
3101     return true;
3102   }
3103 
3104   args.rval().setString(str);
3105   return true;
3106 }
3107 
str_fromCharCode(JSContext * cx,unsigned argc,Value * vp)3108 bool js::str_fromCharCode(JSContext* cx, unsigned argc, Value* vp) {
3109   CallArgs args = CallArgsFromVp(argc, vp);
3110 
3111   MOZ_ASSERT(args.length() <= ARGS_LENGTH_MAX);
3112 
3113   // Optimize the single-char case.
3114   if (args.length() == 1)
3115     return str_fromCharCode_one_arg(cx, args[0], args.rval());
3116 
3117   // Optimize the case where the result will definitely fit in an inline
3118   // string (thin or fat) and so we don't need to malloc the chars. (We could
3119   // cover some cases where args.length() goes up to
3120   // JSFatInlineString::MAX_LENGTH_LATIN1 if we also checked if the chars are
3121   // all Latin-1, but it doesn't seem worth the effort.)
3122   InlineCharBuffer<char16_t> chars;
3123   if (!chars.maybeAlloc(cx, args.length())) return false;
3124 
3125   char16_t* rawChars = chars.get();
3126   for (unsigned i = 0; i < args.length(); i++) {
3127     uint16_t code;
3128     if (!ToUint16(cx, args[i], &code)) return false;
3129 
3130     rawChars[i] = char16_t(code);
3131   }
3132 
3133   JSString* str = chars.toString(cx, args.length());
3134   if (!str) return false;
3135 
3136   args.rval().setString(str);
3137   return true;
3138 }
3139 
CodeUnitToString(JSContext * cx,uint16_t ucode,MutableHandleValue rval)3140 static inline bool CodeUnitToString(JSContext* cx, uint16_t ucode,
3141                                     MutableHandleValue rval) {
3142   if (StaticStrings::hasUnit(ucode)) {
3143     rval.setString(cx->staticStrings().getUnit(ucode));
3144     return true;
3145   }
3146 
3147   char16_t c = char16_t(ucode);
3148   JSString* str = NewStringCopyNDontDeflate<CanGC>(cx, &c, 1);
3149   if (!str) return false;
3150 
3151   rval.setString(str);
3152   return true;
3153 }
3154 
str_fromCharCode_one_arg(JSContext * cx,HandleValue code,MutableHandleValue rval)3155 bool js::str_fromCharCode_one_arg(JSContext* cx, HandleValue code,
3156                                   MutableHandleValue rval) {
3157   uint16_t ucode;
3158 
3159   if (!ToUint16(cx, code, &ucode)) return false;
3160 
3161   return CodeUnitToString(cx, ucode, rval);
3162 }
3163 
ToCodePoint(JSContext * cx,HandleValue code,uint32_t * codePoint)3164 static MOZ_ALWAYS_INLINE bool ToCodePoint(JSContext* cx, HandleValue code,
3165                                           uint32_t* codePoint) {
3166   // String.fromCodePoint, Steps 5.a-b.
3167 
3168   // Fast path for the common case - the input is already an int32.
3169   if (code.isInt32()) {
3170     int32_t nextCP = code.toInt32();
3171     if (nextCP >= 0 && nextCP <= int32_t(unicode::NonBMPMax)) {
3172       *codePoint = uint32_t(nextCP);
3173       return true;
3174     }
3175   }
3176 
3177   double nextCP;
3178   if (!ToNumber(cx, code, &nextCP)) return false;
3179 
3180   // String.fromCodePoint, Steps 5.c-d.
3181   if (JS::ToInteger(nextCP) != nextCP || nextCP < 0 ||
3182       nextCP > unicode::NonBMPMax) {
3183     ToCStringBuf cbuf;
3184     if (const char* numStr = NumberToCString(cx, &cbuf, nextCP))
3185       JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
3186                                 JSMSG_NOT_A_CODEPOINT, numStr);
3187     return false;
3188   }
3189 
3190   *codePoint = uint32_t(nextCP);
3191   return true;
3192 }
3193 
str_fromCodePoint_one_arg(JSContext * cx,HandleValue code,MutableHandleValue rval)3194 bool js::str_fromCodePoint_one_arg(JSContext* cx, HandleValue code,
3195                                    MutableHandleValue rval) {
3196   // Steps 1-4 (omitted).
3197 
3198   // Steps 5.a-d.
3199   uint32_t codePoint;
3200   if (!ToCodePoint(cx, code, &codePoint)) return false;
3201 
3202   // Steps 5.e, 6.
3203   if (!unicode::IsSupplementary(codePoint))
3204     return CodeUnitToString(cx, uint16_t(codePoint), rval);
3205 
3206   char16_t chars[] = {unicode::LeadSurrogate(codePoint),
3207                       unicode::TrailSurrogate(codePoint)};
3208   JSString* str = NewStringCopyNDontDeflate<CanGC>(cx, chars, 2);
3209   if (!str) return false;
3210 
3211   rval.setString(str);
3212   return true;
3213 }
3214 
str_fromCodePoint_few_args(JSContext * cx,const CallArgs & args)3215 static bool str_fromCodePoint_few_args(JSContext* cx, const CallArgs& args) {
3216   MOZ_ASSERT(args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2);
3217 
3218   // Steps 1-2 (omitted).
3219 
3220   // Step 3.
3221   char16_t elements[JSFatInlineString::MAX_LENGTH_TWO_BYTE];
3222 
3223   // Steps 4-5.
3224   unsigned length = 0;
3225   for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) {
3226     // Steps 5.a-d.
3227     uint32_t codePoint;
3228     if (!ToCodePoint(cx, args[nextIndex], &codePoint)) return false;
3229 
3230     // Step 5.e.
3231     unicode::UTF16Encode(codePoint, elements, &length);
3232   }
3233 
3234   // Step 6.
3235   JSString* str = NewStringCopyN<CanGC>(cx, elements, length);
3236   if (!str) return false;
3237 
3238   args.rval().setString(str);
3239   return true;
3240 }
3241 
3242 // ES2017 draft rev 40edb3a95a475c1b251141ac681b8793129d9a6d
3243 // 21.1.2.2 String.fromCodePoint(...codePoints)
str_fromCodePoint(JSContext * cx,unsigned argc,Value * vp)3244 bool js::str_fromCodePoint(JSContext* cx, unsigned argc, Value* vp) {
3245   CallArgs args = CallArgsFromVp(argc, vp);
3246 
3247   // Optimize the single code-point case.
3248   if (args.length() == 1)
3249     return str_fromCodePoint_one_arg(cx, args[0], args.rval());
3250 
3251   // Optimize the case where the result will definitely fit in an inline
3252   // string (thin or fat) and so we don't need to malloc the chars. (We could
3253   // cover some cases where |args.length()| goes up to
3254   // JSFatInlineString::MAX_LENGTH_LATIN1 / 2 if we also checked if the chars
3255   // are all Latin-1, but it doesn't seem worth the effort.)
3256   if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2)
3257     return str_fromCodePoint_few_args(cx, args);
3258 
3259   // Steps 1-2 (omitted).
3260 
3261   // Step 3.
3262   static_assert(
3263       ARGS_LENGTH_MAX < std::numeric_limits<decltype(args.length())>::max() / 2,
3264       "|args.length() * 2 + 1| does not overflow");
3265   char16_t* elements = cx->pod_malloc<char16_t>(args.length() * 2 + 1);
3266   if (!elements) return false;
3267 
3268   // Steps 4-5.
3269   unsigned length = 0;
3270   for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) {
3271     // Steps 5.a-d.
3272     uint32_t codePoint;
3273     if (!ToCodePoint(cx, args[nextIndex], &codePoint)) {
3274       js_free(elements);
3275       return false;
3276     }
3277 
3278     // Step 5.e.
3279     unicode::UTF16Encode(codePoint, elements, &length);
3280   }
3281   elements[length] = 0;
3282 
3283   // Step 6.
3284   JSString* str = NewString<CanGC>(cx, elements, length);
3285   if (!str) {
3286     js_free(elements);
3287     return false;
3288   }
3289 
3290   args.rval().setString(str);
3291   return true;
3292 }
3293 
3294 static const JSFunctionSpec string_static_methods[] = {
3295     JS_INLINABLE_FN("fromCharCode", js::str_fromCharCode, 1, 0,
3296                     StringFromCharCode),
3297     JS_INLINABLE_FN("fromCodePoint", js::str_fromCodePoint, 1, 0,
3298                     StringFromCodePoint),
3299 
3300     JS_SELF_HOSTED_FN("raw", "String_static_raw", 1, 0),
3301     JS_SELF_HOSTED_FN("substring", "String_static_substring", 3, 0),
3302     JS_SELF_HOSTED_FN("substr", "String_static_substr", 3, 0),
3303     JS_SELF_HOSTED_FN("slice", "String_static_slice", 3, 0),
3304 
3305     JS_SELF_HOSTED_FN("match", "String_generic_match", 2, 0),
3306     JS_SELF_HOSTED_FN("replace", "String_generic_replace", 3, 0),
3307     JS_SELF_HOSTED_FN("search", "String_generic_search", 2, 0),
3308     JS_SELF_HOSTED_FN("split", "String_generic_split", 3, 0),
3309 
3310     JS_SELF_HOSTED_FN("toLowerCase", "String_static_toLowerCase", 1, 0),
3311     JS_SELF_HOSTED_FN("toUpperCase", "String_static_toUpperCase", 1, 0),
3312     JS_SELF_HOSTED_FN("charAt", "String_static_charAt", 2, 0),
3313     JS_SELF_HOSTED_FN("charCodeAt", "String_static_charCodeAt", 2, 0),
3314     JS_SELF_HOSTED_FN("includes", "String_static_includes", 2, 0),
3315     JS_SELF_HOSTED_FN("indexOf", "String_static_indexOf", 2, 0),
3316     JS_SELF_HOSTED_FN("lastIndexOf", "String_static_lastIndexOf", 2, 0),
3317     JS_SELF_HOSTED_FN("startsWith", "String_static_startsWith", 2, 0),
3318     JS_SELF_HOSTED_FN("endsWith", "String_static_endsWith", 2, 0),
3319     JS_SELF_HOSTED_FN("trim", "String_static_trim", 1, 0),
3320     JS_SELF_HOSTED_FN("trimLeft", "String_static_trimLeft", 1, 0),
3321     JS_SELF_HOSTED_FN("trimRight", "String_static_trimRight", 1, 0),
3322     JS_SELF_HOSTED_FN("toLocaleLowerCase", "String_static_toLocaleLowerCase", 1,
3323                       0),
3324     JS_SELF_HOSTED_FN("toLocaleUpperCase", "String_static_toLocaleUpperCase", 1,
3325                       0),
3326 #if EXPOSE_INTL_API
3327     JS_SELF_HOSTED_FN("normalize", "String_static_normalize", 1, 0),
3328 #endif
3329     JS_SELF_HOSTED_FN("concat", "String_static_concat", 2, 0),
3330 
3331     JS_SELF_HOSTED_FN("localeCompare", "String_static_localeCompare", 2, 0),
3332     JS_FS_END};
3333 
assignInitialShape(JSContext * cx,Handle<StringObject * > obj)3334 /* static */ Shape* StringObject::assignInitialShape(
3335     JSContext* cx, Handle<StringObject*> obj) {
3336   MOZ_ASSERT(obj->empty());
3337 
3338   return NativeObject::addDataProperty(cx, obj, cx->names().length, LENGTH_SLOT,
3339                                        JSPROP_PERMANENT | JSPROP_READONLY);
3340 }
3341 
InitStringClass(JSContext * cx,HandleObject obj)3342 JSObject* js::InitStringClass(JSContext* cx, HandleObject obj) {
3343   MOZ_ASSERT(obj->isNative());
3344 
3345   Handle<GlobalObject*> global = obj.as<GlobalObject>();
3346 
3347   Rooted<JSString*> empty(cx, cx->runtime()->emptyString);
3348   RootedObject proto(cx, GlobalObject::createBlankPrototype(
3349                              cx, global, &StringObject::class_));
3350   if (!proto) return nullptr;
3351   Handle<StringObject*> protoObj = proto.as<StringObject>();
3352   if (!StringObject::init(cx, protoObj, empty)) return nullptr;
3353 
3354   /* Now create the String function. */
3355   RootedFunction ctor(cx);
3356   ctor = GlobalObject::createConstructor(
3357       cx, StringConstructor, cx->names().String, 1, AllocKind::FUNCTION,
3358       &jit::JitInfo_String);
3359   if (!ctor) return nullptr;
3360 
3361   if (!LinkConstructorAndPrototype(cx, ctor, proto)) return nullptr;
3362 
3363   if (!DefinePropertiesAndFunctions(cx, proto, nullptr, string_methods) ||
3364       !DefinePropertiesAndFunctions(cx, ctor, nullptr, string_static_methods)) {
3365     return nullptr;
3366   }
3367 
3368 #ifdef NIGHTLY_BUILD
3369   // Create "trimLeft" as an alias for "trimStart".
3370   RootedValue trimFn(cx);
3371   RootedId trimId(cx, NameToId(cx->names().trimStart));
3372   RootedId trimAliasId(cx, NameToId(cx->names().trimLeft));
3373   if (!NativeGetProperty(cx, protoObj, trimId, &trimFn) ||
3374       !NativeDefineDataProperty(cx, protoObj, trimAliasId, trimFn, 0)) {
3375     return nullptr;
3376   }
3377 
3378   // Create "trimRight" as an alias for "trimEnd".
3379   trimId = NameToId(cx->names().trimEnd);
3380   trimAliasId = NameToId(cx->names().trimRight);
3381   if (!NativeGetProperty(cx, protoObj, trimId, &trimFn) ||
3382       !NativeDefineDataProperty(cx, protoObj, trimAliasId, trimFn, 0)) {
3383     return nullptr;
3384   }
3385 #endif
3386 
3387   /*
3388    * Define escape/unescape, the URI encode/decode functions, and maybe
3389    * uneval on the global object.
3390    */
3391   if (!JS_DefineFunctions(cx, global, string_functions)) return nullptr;
3392 
3393   if (!GlobalObject::initBuiltinConstructor(cx, global, JSProto_String, ctor,
3394                                             proto))
3395     return nullptr;
3396 
3397   return proto;
3398 }
3399 
3400 #define ____ false
3401 
3402 /*
3403  * Uri reserved chars + #:
3404  * - 35: #
3405  * - 36: $
3406  * - 38: &
3407  * - 43: +
3408  * - 44: ,
3409  * - 47: /
3410  * - 58: :
3411  * - 59: ;
3412  * - 61: =
3413  * - 63: ?
3414  * - 64: @
3415  */
3416 static const bool js_isUriReservedPlusPound[] = {
3417     // clang-format off
3418 /*       0     1     2     3     4     5     6     7     8     9  */
3419 /*  0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3420 /*  1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3421 /*  2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3422 /*  3 */ ____, ____, ____, ____, ____, true, true, ____, true, ____,
3423 /*  4 */ ____, ____, ____, true, true, ____, ____, true, ____, ____,
3424 /*  5 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true,
3425 /*  6 */ ____, true, ____, true, true, ____, ____, ____, ____, ____,
3426 /*  7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3427 /*  8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3428 /*  9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3429 /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3430 /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3431 /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
3432     // clang-format on
3433 };
3434 
3435 /*
3436  * Uri unescaped chars:
3437  * -      33: !
3438  * -      39: '
3439  * -      40: (
3440  * -      41: )
3441  * -      42: *
3442  * -      45: -
3443  * -      46: .
3444  * -  48..57: 0-9
3445  * -  65..90: A-Z
3446  * -      95: _
3447  * - 97..122: a-z
3448  * -     126: ~
3449  */
3450 static const bool js_isUriUnescaped[] = {
3451     // clang-format off
3452 /*       0     1     2     3     4     5     6     7     8     9  */
3453 /*  0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3454 /*  1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3455 /*  2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3456 /*  3 */ ____, ____, ____, true, ____, ____, ____, ____, ____, true,
3457 /*  4 */ true, true, true, ____, ____, true, true, ____, true, true,
3458 /*  5 */ true, true, true, true, true, true, true, true, ____, ____,
3459 /*  6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
3460 /*  7 */ true, true, true, true, true, true, true, true, true, true,
3461 /*  8 */ true, true, true, true, true, true, true, true, true, true,
3462 /*  9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
3463 /* 10 */ true, true, true, true, true, true, true, true, true, true,
3464 /* 11 */ true, true, true, true, true, true, true, true, true, true,
3465 /* 12 */ true, true, true, ____, ____, ____, true, ____
3466     // clang-format on
3467 };
3468 
3469 #undef ____
3470 
TransferBufferToString(StringBuffer & sb,MutableHandleValue rval)3471 static inline bool TransferBufferToString(StringBuffer& sb,
3472                                           MutableHandleValue rval) {
3473   JSString* str = sb.finishString();
3474   if (!str) return false;
3475   rval.setString(str);
3476   return true;
3477 }
3478 
3479 /*
3480  * ECMA 3, 15.1.3 URI Handling Function Properties
3481  *
3482  * The following are implementations of the algorithms
3483  * given in the ECMA specification for the hidden functions
3484  * 'Encode' and 'Decode'.
3485  */
3486 enum EncodeResult { Encode_Failure, Encode_BadUri, Encode_Success };
3487 
3488 // Bug 1403318: GCC sometimes inlines this Encode function rather than the
3489 // caller Encode function. Annotate both functions with MOZ_NEVER_INLINE resp.
3490 // MOZ_ALWAYS_INLINE to ensure we get the desired inlining behavior.
3491 template <typename CharT>
Encode(StringBuffer & sb,const CharT * chars,size_t length,const bool * unescapedSet)3492 static MOZ_NEVER_INLINE EncodeResult Encode(StringBuffer& sb,
3493                                             const CharT* chars, size_t length,
3494                                             const bool* unescapedSet) {
3495   Latin1Char hexBuf[4];
3496   hexBuf[0] = '%';
3497   hexBuf[3] = 0;
3498 
3499   auto appendEncoded = [&sb, &hexBuf](Latin1Char c) {
3500     static const char HexDigits[] = "0123456789ABCDEF"; /* NB: uppercase */
3501 
3502     hexBuf[1] = HexDigits[c >> 4];
3503     hexBuf[2] = HexDigits[c & 0xf];
3504     return sb.append(hexBuf, 3);
3505   };
3506 
3507   for (size_t k = 0; k < length; k++) {
3508     CharT c = chars[k];
3509     if (c < 128 &&
3510         (js_isUriUnescaped[c] || (unescapedSet && unescapedSet[c]))) {
3511       if (!sb.append(Latin1Char(c))) return Encode_Failure;
3512     } else {
3513       if (mozilla::IsSame<CharT, Latin1Char>::value) {
3514         if (c < 0x80) {
3515           if (!appendEncoded(c)) return Encode_Failure;
3516         } else {
3517           if (!appendEncoded(0xC0 | (c >> 6)) ||
3518               !appendEncoded(0x80 | (c & 0x3F)))
3519             return Encode_Failure;
3520         }
3521       } else {
3522         if (unicode::IsTrailSurrogate(c)) return Encode_BadUri;
3523 
3524         uint32_t v;
3525         if (!unicode::IsLeadSurrogate(c)) {
3526           v = c;
3527         } else {
3528           k++;
3529           if (k == length) return Encode_BadUri;
3530 
3531           char16_t c2 = chars[k];
3532           if (!unicode::IsTrailSurrogate(c2)) return Encode_BadUri;
3533 
3534           v = unicode::UTF16Decode(c, c2);
3535         }
3536 
3537         uint8_t utf8buf[4];
3538         size_t L = OneUcs4ToUtf8Char(utf8buf, v);
3539         for (size_t j = 0; j < L; j++) {
3540           if (!appendEncoded(utf8buf[j])) return Encode_Failure;
3541         }
3542       }
3543     }
3544   }
3545 
3546   return Encode_Success;
3547 }
3548 
Encode(JSContext * cx,HandleLinearString str,const bool * unescapedSet,MutableHandleValue rval)3549 static MOZ_ALWAYS_INLINE bool Encode(JSContext* cx, HandleLinearString str,
3550                                      const bool* unescapedSet,
3551                                      MutableHandleValue rval) {
3552   size_t length = str->length();
3553   if (length == 0) {
3554     rval.setString(cx->runtime()->emptyString);
3555     return true;
3556   }
3557 
3558   StringBuffer sb(cx);
3559   if (!sb.reserve(length)) return false;
3560 
3561   EncodeResult res;
3562   if (str->hasLatin1Chars()) {
3563     AutoCheckCannotGC nogc;
3564     res = Encode(sb, str->latin1Chars(nogc), str->length(), unescapedSet);
3565   } else {
3566     AutoCheckCannotGC nogc;
3567     res = Encode(sb, str->twoByteChars(nogc), str->length(), unescapedSet);
3568   }
3569 
3570   if (res == Encode_Failure) return false;
3571 
3572   if (res == Encode_BadUri) {
3573     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
3574     return false;
3575   }
3576 
3577   MOZ_ASSERT(res == Encode_Success);
3578   return TransferBufferToString(sb, rval);
3579 }
3580 
3581 enum DecodeResult { Decode_Failure, Decode_BadUri, Decode_Success };
3582 
3583 template <typename CharT>
Decode(StringBuffer & sb,const CharT * chars,size_t length,const bool * reservedSet)3584 static DecodeResult Decode(StringBuffer& sb, const CharT* chars, size_t length,
3585                            const bool* reservedSet) {
3586   for (size_t k = 0; k < length; k++) {
3587     CharT c = chars[k];
3588     if (c == '%') {
3589       size_t start = k;
3590       if ((k + 2) >= length) return Decode_BadUri;
3591 
3592       if (!JS7_ISHEX(chars[k + 1]) || !JS7_ISHEX(chars[k + 2]))
3593         return Decode_BadUri;
3594 
3595       uint32_t B = JS7_UNHEX(chars[k + 1]) * 16 + JS7_UNHEX(chars[k + 2]);
3596       k += 2;
3597       if (B < 128) {
3598         c = CharT(B);
3599         if (reservedSet && reservedSet[c]) {
3600           if (!sb.append(chars + start, k - start + 1)) return Decode_Failure;
3601         } else {
3602           if (!sb.append(c)) return Decode_Failure;
3603         }
3604       } else {
3605         int n = 1;
3606         while (B & (0x80 >> n)) n++;
3607 
3608         if (n == 1 || n > 4) return Decode_BadUri;
3609 
3610         uint8_t octets[4];
3611         octets[0] = (uint8_t)B;
3612         if (k + 3 * (n - 1) >= length) return Decode_BadUri;
3613 
3614         for (int j = 1; j < n; j++) {
3615           k++;
3616           if (chars[k] != '%') return Decode_BadUri;
3617 
3618           if (!JS7_ISHEX(chars[k + 1]) || !JS7_ISHEX(chars[k + 2]))
3619             return Decode_BadUri;
3620 
3621           B = JS7_UNHEX(chars[k + 1]) * 16 + JS7_UNHEX(chars[k + 2]);
3622           if ((B & 0xC0) != 0x80) return Decode_BadUri;
3623 
3624           k += 2;
3625           octets[j] = char(B);
3626         }
3627 
3628         uint32_t v = JS::Utf8ToOneUcs4Char(octets, n);
3629         MOZ_ASSERT(v >= 128);
3630         if (v >= unicode::NonBMPMin) {
3631           if (v > unicode::NonBMPMax) return Decode_BadUri;
3632 
3633           if (!sb.append(unicode::LeadSurrogate(v))) return Decode_Failure;
3634           if (!sb.append(unicode::TrailSurrogate(v))) return Decode_Failure;
3635         } else {
3636           if (!sb.append(char16_t(v))) return Decode_Failure;
3637         }
3638       }
3639     } else {
3640       if (!sb.append(c)) return Decode_Failure;
3641     }
3642   }
3643 
3644   return Decode_Success;
3645 }
3646 
Decode(JSContext * cx,HandleLinearString str,const bool * reservedSet,MutableHandleValue rval)3647 static bool Decode(JSContext* cx, HandleLinearString str,
3648                    const bool* reservedSet, MutableHandleValue rval) {
3649   size_t length = str->length();
3650   if (length == 0) {
3651     rval.setString(cx->runtime()->emptyString);
3652     return true;
3653   }
3654 
3655   StringBuffer sb(cx);
3656 
3657   DecodeResult res;
3658   if (str->hasLatin1Chars()) {
3659     AutoCheckCannotGC nogc;
3660     res = Decode(sb, str->latin1Chars(nogc), str->length(), reservedSet);
3661   } else {
3662     AutoCheckCannotGC nogc;
3663     res = Decode(sb, str->twoByteChars(nogc), str->length(), reservedSet);
3664   }
3665 
3666   if (res == Decode_Failure) return false;
3667 
3668   if (res == Decode_BadUri) {
3669     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
3670     return false;
3671   }
3672 
3673   MOZ_ASSERT(res == Decode_Success);
3674   return TransferBufferToString(sb, rval);
3675 }
3676 
str_decodeURI(JSContext * cx,unsigned argc,Value * vp)3677 static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp) {
3678   CallArgs args = CallArgsFromVp(argc, vp);
3679   RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
3680   if (!str) return false;
3681 
3682   return Decode(cx, str, js_isUriReservedPlusPound, args.rval());
3683 }
3684 
str_decodeURI_Component(JSContext * cx,unsigned argc,Value * vp)3685 static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp) {
3686   CallArgs args = CallArgsFromVp(argc, vp);
3687   RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
3688   if (!str) return false;
3689 
3690   return Decode(cx, str, nullptr, args.rval());
3691 }
3692 
str_encodeURI(JSContext * cx,unsigned argc,Value * vp)3693 static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp) {
3694   CallArgs args = CallArgsFromVp(argc, vp);
3695   RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
3696   if (!str) return false;
3697 
3698   return Encode(cx, str, js_isUriReservedPlusPound, args.rval());
3699 }
3700 
str_encodeURI_Component(JSContext * cx,unsigned argc,Value * vp)3701 static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp) {
3702   CallArgs args = CallArgsFromVp(argc, vp);
3703   RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
3704   if (!str) return false;
3705 
3706   return Encode(cx, str, nullptr, args.rval());
3707 }
3708 
EncodeURI(JSContext * cx,StringBuffer & sb,const char * chars,size_t length)3709 bool js::EncodeURI(JSContext* cx, StringBuffer& sb, const char* chars,
3710                    size_t length) {
3711   EncodeResult result = Encode(sb, reinterpret_cast<const Latin1Char*>(chars),
3712                                length, js_isUriReservedPlusPound);
3713   if (result == EncodeResult::Encode_Failure) return false;
3714   if (result == EncodeResult::Encode_BadUri) {
3715     JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
3716     return false;
3717   }
3718   return true;
3719 }
3720 
FlatStringMatchHelper(JSContext * cx,HandleString str,HandleString pattern,bool * isFlat,int32_t * match)3721 static bool FlatStringMatchHelper(JSContext* cx, HandleString str,
3722                                   HandleString pattern, bool* isFlat,
3723                                   int32_t* match) {
3724   RootedLinearString linearPattern(cx, pattern->ensureLinear(cx));
3725   if (!linearPattern) return false;
3726 
3727   static const size_t MAX_FLAT_PAT_LEN = 256;
3728   if (linearPattern->length() > MAX_FLAT_PAT_LEN ||
3729       StringHasRegExpMetaChars(linearPattern)) {
3730     *isFlat = false;
3731     return true;
3732   }
3733 
3734   *isFlat = true;
3735   if (str->isRope()) {
3736     if (!RopeMatch(cx, &str->asRope(), linearPattern, match)) return false;
3737   } else {
3738     *match = StringMatch(&str->asLinear(), linearPattern);
3739   }
3740 
3741   return true;
3742 }
3743 
BuildFlatMatchArray(JSContext * cx,HandleString str,HandleString pattern,int32_t match,MutableHandleValue rval)3744 static bool BuildFlatMatchArray(JSContext* cx, HandleString str,
3745                                 HandleString pattern, int32_t match,
3746                                 MutableHandleValue rval) {
3747   if (match < 0) {
3748     rval.setNull();
3749     return true;
3750   }
3751 
3752   /* Get the templateObject that defines the shape and type of the output object
3753    */
3754   JSObject* templateObject =
3755       cx->compartment()->regExps.getOrCreateMatchResultTemplateObject(cx);
3756   if (!templateObject) return false;
3757 
3758   RootedArrayObject arr(
3759       cx, NewDenseFullyAllocatedArrayWithTemplate(cx, 1, templateObject));
3760   if (!arr) return false;
3761 
3762   /* Store a Value for each pair. */
3763   arr->setDenseInitializedLength(1);
3764   arr->initDenseElement(0, StringValue(pattern));
3765 
3766   /* Set the |index| property. (TemplateObject positions it in slot 0) */
3767   arr->setSlot(0, Int32Value(match));
3768 
3769   /* Set the |input| property. (TemplateObject positions it in slot 1) */
3770   arr->setSlot(1, StringValue(str));
3771 
3772 #ifdef DEBUG
3773   RootedValue test(cx);
3774   RootedId id(cx, NameToId(cx->names().index));
3775   if (!NativeGetProperty(cx, arr, id, &test)) return false;
3776   MOZ_ASSERT(test == arr->getSlot(0));
3777   id = NameToId(cx->names().input);
3778   if (!NativeGetProperty(cx, arr, id, &test)) return false;
3779   MOZ_ASSERT(test == arr->getSlot(1));
3780 #endif
3781 
3782   rval.setObject(*arr);
3783   return true;
3784 }
3785 
3786 #ifdef DEBUG
CallIsStringOptimizable(JSContext * cx,const char * name,bool * result)3787 static bool CallIsStringOptimizable(JSContext* cx, const char* name,
3788                                     bool* result) {
3789   FixedInvokeArgs<0> args(cx);
3790 
3791   RootedValue rval(cx);
3792   if (!CallSelfHostedFunction(cx, name, UndefinedHandleValue, args, &rval))
3793     return false;
3794 
3795   *result = rval.toBoolean();
3796   return true;
3797 }
3798 #endif
3799 
FlatStringMatch(JSContext * cx,unsigned argc,Value * vp)3800 bool js::FlatStringMatch(JSContext* cx, unsigned argc, Value* vp) {
3801   CallArgs args = CallArgsFromVp(argc, vp);
3802   MOZ_ASSERT(args.length() == 2);
3803   MOZ_ASSERT(args[0].isString());
3804   MOZ_ASSERT(args[1].isString());
3805 #ifdef DEBUG
3806   bool isOptimizable = false;
3807   if (!CallIsStringOptimizable(cx, "IsStringMatchOptimizable", &isOptimizable))
3808     return false;
3809   MOZ_ASSERT(isOptimizable);
3810 #endif
3811 
3812   RootedString str(cx, args[0].toString());
3813   RootedString pattern(cx, args[1].toString());
3814 
3815   bool isFlat = false;
3816   int32_t match = 0;
3817   if (!FlatStringMatchHelper(cx, str, pattern, &isFlat, &match)) return false;
3818 
3819   if (!isFlat) {
3820     args.rval().setUndefined();
3821     return true;
3822   }
3823 
3824   return BuildFlatMatchArray(cx, str, pattern, match, args.rval());
3825 }
3826 
FlatStringSearch(JSContext * cx,unsigned argc,Value * vp)3827 bool js::FlatStringSearch(JSContext* cx, unsigned argc, Value* vp) {
3828   CallArgs args = CallArgsFromVp(argc, vp);
3829   MOZ_ASSERT(args.length() == 2);
3830   MOZ_ASSERT(args[0].isString());
3831   MOZ_ASSERT(args[1].isString());
3832 #ifdef DEBUG
3833   bool isOptimizable = false;
3834   if (!CallIsStringOptimizable(cx, "IsStringSearchOptimizable", &isOptimizable))
3835     return false;
3836   MOZ_ASSERT(isOptimizable);
3837 #endif
3838 
3839   RootedString str(cx, args[0].toString());
3840   RootedString pattern(cx, args[1].toString());
3841 
3842   bool isFlat = false;
3843   int32_t match = 0;
3844   if (!FlatStringMatchHelper(cx, str, pattern, &isFlat, &match)) return false;
3845 
3846   if (!isFlat) {
3847     args.rval().setInt32(-2);
3848     return true;
3849   }
3850 
3851   args.rval().setInt32(match);
3852   return true;
3853 }
3854