1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "builtin/String.h"
8
9 #include "mozilla/Attributes.h"
10 #include "mozilla/CheckedInt.h"
11 #include "mozilla/FloatingPoint.h"
12 #include "mozilla/PodOperations.h"
13 #include "mozilla/Range.h"
14 #include "mozilla/TextUtils.h"
15
16 #include <algorithm>
17 #include <limits>
18 #include <string.h>
19 #include <type_traits>
20
21 #include "jsapi.h"
22 #include "jsnum.h"
23 #include "jstypes.h"
24
25 #include "builtin/Array.h"
26 #include "builtin/Boolean.h"
27 #if JS_HAS_INTL_API
28 # include "builtin/intl/CommonFunctions.h"
29 #endif
30 #include "builtin/RegExp.h"
31 #include "jit/InlinableNatives.h"
32 #include "js/Conversions.h"
33 #include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_*
34 #include "js/friend/StackLimits.h" // js::AutoCheckRecursionLimit
35 #if !JS_HAS_INTL_API
36 # include "js/LocaleSensitive.h"
37 #endif
38 #include "js/PropertySpec.h"
39 #include "js/StableStringChars.h"
40 #include "js/UniquePtr.h"
41 #if JS_HAS_INTL_API
42 # include "unicode/uchar.h"
43 # include "unicode/unorm2.h"
44 # include "unicode/ustring.h"
45 # include "unicode/utypes.h"
46 #endif
47 #include "util/StringBuffer.h"
48 #include "util/Unicode.h"
49 #include "vm/BytecodeUtil.h"
50 #include "vm/GlobalObject.h"
51 #include "vm/Interpreter.h"
52 #include "vm/JSAtom.h"
53 #include "vm/JSContext.h"
54 #include "vm/JSObject.h"
55 #include "vm/Opcodes.h"
56 #include "vm/Printer.h"
57 #include "vm/RegExpObject.h"
58 #include "vm/RegExpStatics.h"
59 #include "vm/SelfHosting.h"
60 #include "vm/ToSource.h" // js::ValueToSource
61 #include "vm/WellKnownAtom.h" // js_*_str
62
63 #include "vm/InlineCharBuffer-inl.h"
64 #include "vm/Interpreter-inl.h"
65 #include "vm/StringObject-inl.h"
66 #include "vm/StringType-inl.h"
67
68 using namespace js;
69
70 using JS::Symbol;
71 using JS::SymbolCode;
72
73 using mozilla::AsciiAlphanumericToNumber;
74 using mozilla::CheckedInt;
75 using mozilla::IsAsciiHexDigit;
76 using mozilla::IsNaN;
77 using mozilla::PodCopy;
78 using mozilla::RangedPtr;
79
80 using JS::AutoCheckCannotGC;
81 using JS::AutoStableStringChars;
82
ArgToLinearString(JSContext * cx,const CallArgs & args,unsigned argno)83 static JSLinearString* ArgToLinearString(JSContext* cx, const CallArgs& args,
84 unsigned argno) {
85 if (argno >= args.length()) {
86 return cx->names().undefined;
87 }
88
89 JSString* str = ToString<CanGC>(cx, args[argno]);
90 if (!str) {
91 return nullptr;
92 }
93
94 return str->ensureLinear(cx);
95 }
96
97 /*
98 * Forward declarations for URI encode/decode and helper routines
99 */
100 static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp);
101
102 static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
103
104 static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp);
105
106 static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
107
108 /*
109 * Global string methods
110 */
111
112 /* ES5 B.2.1 */
113 template <typename CharT>
Escape(JSContext * cx,const CharT * chars,uint32_t length,InlineCharBuffer<Latin1Char> & newChars,uint32_t * newLengthOut)114 static bool Escape(JSContext* cx, const CharT* chars, uint32_t length,
115 InlineCharBuffer<Latin1Char>& newChars,
116 uint32_t* newLengthOut) {
117 // clang-format off
118 static const uint8_t shouldPassThrough[128] = {
119 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
120 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
121 0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1, /* !"#$%&'()*+,-./ */
122 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 0123456789:;<=>? */
123 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* @ABCDEFGHIJKLMNO */
124 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* PQRSTUVWXYZ[\]^_ */
125 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* `abcdefghijklmno */
126 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, /* pqrstuvwxyz{\}~ DEL */
127 };
128 // clang-format on
129
130 /* Take a first pass and see how big the result string will need to be. */
131 uint32_t newLength = length;
132 for (size_t i = 0; i < length; i++) {
133 char16_t ch = chars[i];
134 if (ch < 128 && shouldPassThrough[ch]) {
135 continue;
136 }
137
138 /*
139 * newlength is incremented below by at most 5 and at this point it must
140 * be a valid string length, so this should never overflow uint32_t.
141 */
142 static_assert(JSString::MAX_LENGTH < UINT32_MAX - 5,
143 "Adding 5 to valid string length should not overflow");
144
145 MOZ_ASSERT(newLength <= JSString::MAX_LENGTH);
146
147 /* The character will be encoded as %XX or %uXXXX. */
148 newLength += (ch < 256) ? 2 : 5;
149
150 if (MOZ_UNLIKELY(newLength > JSString::MAX_LENGTH)) {
151 ReportAllocationOverflow(cx);
152 return false;
153 }
154 }
155
156 if (newLength == length) {
157 *newLengthOut = newLength;
158 return true;
159 }
160
161 if (!newChars.maybeAlloc(cx, newLength)) {
162 return false;
163 }
164
165 static const char digits[] = "0123456789ABCDEF";
166
167 Latin1Char* rawNewChars = newChars.get();
168 size_t i, ni;
169 for (i = 0, ni = 0; i < length; i++) {
170 char16_t ch = chars[i];
171 if (ch < 128 && shouldPassThrough[ch]) {
172 rawNewChars[ni++] = ch;
173 } else if (ch < 256) {
174 rawNewChars[ni++] = '%';
175 rawNewChars[ni++] = digits[ch >> 4];
176 rawNewChars[ni++] = digits[ch & 0xF];
177 } else {
178 rawNewChars[ni++] = '%';
179 rawNewChars[ni++] = 'u';
180 rawNewChars[ni++] = digits[ch >> 12];
181 rawNewChars[ni++] = digits[(ch & 0xF00) >> 8];
182 rawNewChars[ni++] = digits[(ch & 0xF0) >> 4];
183 rawNewChars[ni++] = digits[ch & 0xF];
184 }
185 }
186 MOZ_ASSERT(ni == newLength);
187
188 *newLengthOut = newLength;
189 return true;
190 }
191
str_escape(JSContext * cx,unsigned argc,Value * vp)192 static bool str_escape(JSContext* cx, unsigned argc, Value* vp) {
193 CallArgs args = CallArgsFromVp(argc, vp);
194
195 RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
196 if (!str) {
197 return false;
198 }
199
200 InlineCharBuffer<Latin1Char> newChars;
201 uint32_t newLength = 0; // initialize to silence GCC warning
202 if (str->hasLatin1Chars()) {
203 AutoCheckCannotGC nogc;
204 if (!Escape(cx, str->latin1Chars(nogc), str->length(), newChars,
205 &newLength)) {
206 return false;
207 }
208 } else {
209 AutoCheckCannotGC nogc;
210 if (!Escape(cx, str->twoByteChars(nogc), str->length(), newChars,
211 &newLength)) {
212 return false;
213 }
214 }
215
216 // Return input if no characters need to be escaped.
217 if (newLength == str->length()) {
218 args.rval().setString(str);
219 return true;
220 }
221
222 JSString* res = newChars.toString(cx, newLength);
223 if (!res) {
224 return false;
225 }
226
227 args.rval().setString(res);
228 return true;
229 }
230
231 template <typename CharT>
Unhex4(const RangedPtr<const CharT> chars,char16_t * result)232 static inline bool Unhex4(const RangedPtr<const CharT> chars,
233 char16_t* result) {
234 CharT a = chars[0], b = chars[1], c = chars[2], d = chars[3];
235
236 if (!(IsAsciiHexDigit(a) && IsAsciiHexDigit(b) && IsAsciiHexDigit(c) &&
237 IsAsciiHexDigit(d))) {
238 return false;
239 }
240
241 char16_t unhex = AsciiAlphanumericToNumber(a);
242 unhex = (unhex << 4) + AsciiAlphanumericToNumber(b);
243 unhex = (unhex << 4) + AsciiAlphanumericToNumber(c);
244 unhex = (unhex << 4) + AsciiAlphanumericToNumber(d);
245 *result = unhex;
246 return true;
247 }
248
249 template <typename CharT>
Unhex2(const RangedPtr<const CharT> chars,char16_t * result)250 static inline bool Unhex2(const RangedPtr<const CharT> chars,
251 char16_t* result) {
252 CharT a = chars[0], b = chars[1];
253
254 if (!(IsAsciiHexDigit(a) && IsAsciiHexDigit(b))) {
255 return false;
256 }
257
258 *result = (AsciiAlphanumericToNumber(a) << 4) + AsciiAlphanumericToNumber(b);
259 return true;
260 }
261
262 template <typename CharT>
Unescape(StringBuffer & sb,const mozilla::Range<const CharT> chars)263 static bool Unescape(StringBuffer& sb,
264 const mozilla::Range<const CharT> chars) {
265 // Step 2.
266 uint32_t length = chars.length();
267
268 /*
269 * Note that the spec algorithm has been optimized to avoid building
270 * a string in the case where no escapes are present.
271 */
272 bool building = false;
273
274 #define ENSURE_BUILDING \
275 do { \
276 if (!building) { \
277 building = true; \
278 if (!sb.reserve(length)) return false; \
279 sb.infallibleAppend(chars.begin().get(), k); \
280 } \
281 } while (false);
282
283 // Step 4.
284 uint32_t k = 0;
285
286 // Step 5.
287 while (k < length) {
288 // Step 5.a.
289 char16_t c = chars[k];
290
291 // Step 5.b.
292 if (c == '%') {
293 static_assert(JSString::MAX_LENGTH < UINT32_MAX - 6,
294 "String length is not near UINT32_MAX");
295
296 // Steps 5.b.i-ii.
297 if (k + 6 <= length && chars[k + 1] == 'u') {
298 if (Unhex4(chars.begin() + k + 2, &c)) {
299 ENSURE_BUILDING
300 k += 5;
301 }
302 } else if (k + 3 <= length) {
303 if (Unhex2(chars.begin() + k + 1, &c)) {
304 ENSURE_BUILDING
305 k += 2;
306 }
307 }
308 }
309
310 // Step 5.c.
311 if (building && !sb.append(c)) {
312 return false;
313 }
314
315 // Step 5.d.
316 k += 1;
317 }
318
319 return true;
320 #undef ENSURE_BUILDING
321 }
322
323 // ES2018 draft rev f83aa38282c2a60c6916ebc410bfdf105a0f6a54
324 // B.2.1.2 unescape ( string )
str_unescape(JSContext * cx,unsigned argc,Value * vp)325 static bool str_unescape(JSContext* cx, unsigned argc, Value* vp) {
326 CallArgs args = CallArgsFromVp(argc, vp);
327
328 // Step 1.
329 RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
330 if (!str) {
331 return false;
332 }
333
334 // Step 3.
335 JSStringBuilder sb(cx);
336 if (str->hasTwoByteChars() && !sb.ensureTwoByteChars()) {
337 return false;
338 }
339
340 // Steps 2, 4-5.
341 if (str->hasLatin1Chars()) {
342 AutoCheckCannotGC nogc;
343 if (!Unescape(sb, str->latin1Range(nogc))) {
344 return false;
345 }
346 } else {
347 AutoCheckCannotGC nogc;
348 if (!Unescape(sb, str->twoByteRange(nogc))) {
349 return false;
350 }
351 }
352
353 // Step 6.
354 JSLinearString* result;
355 if (!sb.empty()) {
356 result = sb.finishString();
357 if (!result) {
358 return false;
359 }
360 } else {
361 result = str;
362 }
363
364 args.rval().setString(result);
365 return true;
366 }
367
str_uneval(JSContext * cx,unsigned argc,Value * vp)368 static bool str_uneval(JSContext* cx, unsigned argc, Value* vp) {
369 CallArgs args = CallArgsFromVp(argc, vp);
370 JSString* str = ValueToSource(cx, args.get(0));
371 if (!str) {
372 return false;
373 }
374
375 args.rval().setString(str);
376 return true;
377 }
378
379 static const JSFunctionSpec string_functions[] = {
380 JS_FN(js_escape_str, str_escape, 1, JSPROP_RESOLVING),
381 JS_FN(js_unescape_str, str_unescape, 1, JSPROP_RESOLVING),
382 JS_FN(js_uneval_str, str_uneval, 1, JSPROP_RESOLVING),
383 JS_FN(js_decodeURI_str, str_decodeURI, 1, JSPROP_RESOLVING),
384 JS_FN(js_encodeURI_str, str_encodeURI, 1, JSPROP_RESOLVING),
385 JS_FN(js_decodeURIComponent_str, str_decodeURI_Component, 1,
386 JSPROP_RESOLVING),
387 JS_FN(js_encodeURIComponent_str, str_encodeURI_Component, 1,
388 JSPROP_RESOLVING),
389
390 JS_FS_END};
391
392 static const unsigned STRING_ELEMENT_ATTRS =
393 JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
394
str_enumerate(JSContext * cx,HandleObject obj)395 static bool str_enumerate(JSContext* cx, HandleObject obj) {
396 RootedString str(cx, obj->as<StringObject>().unbox());
397 js::StaticStrings& staticStrings = cx->staticStrings();
398
399 RootedValue value(cx);
400 for (size_t i = 0, length = str->length(); i < length; i++) {
401 JSString* str1 = staticStrings.getUnitStringForElement(cx, str, i);
402 if (!str1) {
403 return false;
404 }
405 value.setString(str1);
406 if (!DefineDataElement(cx, obj, i, value,
407 STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) {
408 return false;
409 }
410 }
411
412 return true;
413 }
414
str_mayResolve(const JSAtomState &,jsid id,JSObject *)415 static bool str_mayResolve(const JSAtomState&, jsid id, JSObject*) {
416 // str_resolve ignores non-integer ids.
417 return JSID_IS_INT(id);
418 }
419
str_resolve(JSContext * cx,HandleObject obj,HandleId id,bool * resolvedp)420 static bool str_resolve(JSContext* cx, HandleObject obj, HandleId id,
421 bool* resolvedp) {
422 if (!JSID_IS_INT(id)) {
423 return true;
424 }
425
426 RootedString str(cx, obj->as<StringObject>().unbox());
427
428 int32_t slot = JSID_TO_INT(id);
429 if ((size_t)slot < str->length()) {
430 JSString* str1 =
431 cx->staticStrings().getUnitStringForElement(cx, str, size_t(slot));
432 if (!str1) {
433 return false;
434 }
435 RootedValue value(cx, StringValue(str1));
436 if (!DefineDataElement(cx, obj, uint32_t(slot), value,
437 STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) {
438 return false;
439 }
440 *resolvedp = true;
441 }
442 return true;
443 }
444
445 static const JSClassOps StringObjectClassOps = {
446 nullptr, // addProperty
447 nullptr, // delProperty
448 str_enumerate, // enumerate
449 nullptr, // newEnumerate
450 str_resolve, // resolve
451 str_mayResolve, // mayResolve
452 nullptr, // finalize
453 nullptr, // call
454 nullptr, // hasInstance
455 nullptr, // construct
456 nullptr, // trace
457 };
458
459 const JSClass StringObject::class_ = {
460 js_String_str,
461 JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
462 JSCLASS_HAS_CACHED_PROTO(JSProto_String),
463 &StringObjectClassOps, &StringObject::classSpec_};
464
465 /*
466 * Perform the initial |RequireObjectCoercible(thisv)| and |ToString(thisv)|
467 * from nearly all String.prototype.* functions.
468 */
ToStringForStringFunction(JSContext * cx,const char * funName,HandleValue thisv)469 static MOZ_ALWAYS_INLINE JSString* ToStringForStringFunction(
470 JSContext* cx, const char* funName, HandleValue thisv) {
471 AutoCheckRecursionLimit recursion(cx);
472 if (!recursion.check(cx)) {
473 return nullptr;
474 }
475
476 if (thisv.isString()) {
477 return thisv.toString();
478 }
479
480 if (thisv.isObject()) {
481 RootedObject obj(cx, &thisv.toObject());
482 if (obj->is<StringObject>()) {
483 StringObject* nobj = &obj->as<StringObject>();
484 // We have to make sure that the ToPrimitive call from ToString
485 // would be unobservable.
486 if (HasNoToPrimitiveMethodPure(nobj, cx) &&
487 HasNativeMethodPure(nobj, cx->names().toString, str_toString, cx)) {
488 return nobj->unbox();
489 }
490 }
491 } else if (thisv.isNullOrUndefined()) {
492 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
493 JSMSG_INCOMPATIBLE_PROTO, "String", funName,
494 thisv.isNull() ? "null" : "undefined");
495 return nullptr;
496 }
497
498 return ToStringSlow<CanGC>(cx, thisv);
499 }
500
IsString(HandleValue v)501 MOZ_ALWAYS_INLINE bool IsString(HandleValue v) {
502 return v.isString() || (v.isObject() && v.toObject().is<StringObject>());
503 }
504
str_toSource_impl(JSContext * cx,const CallArgs & args)505 MOZ_ALWAYS_INLINE bool str_toSource_impl(JSContext* cx, const CallArgs& args) {
506 MOZ_ASSERT(IsString(args.thisv()));
507
508 JSString* str = ToString<CanGC>(cx, args.thisv());
509 if (!str) {
510 return false;
511 }
512
513 UniqueChars quoted = QuoteString(cx, str, '"');
514 if (!quoted) {
515 return false;
516 }
517
518 JSStringBuilder sb(cx);
519 if (!sb.append("(new String(") ||
520 !sb.append(quoted.get(), strlen(quoted.get())) || !sb.append("))")) {
521 return false;
522 }
523
524 JSString* result = sb.finishString();
525 if (!result) {
526 return false;
527 }
528 args.rval().setString(result);
529 return true;
530 }
531
str_toSource(JSContext * cx,unsigned argc,Value * vp)532 static bool str_toSource(JSContext* cx, unsigned argc, Value* vp) {
533 CallArgs args = CallArgsFromVp(argc, vp);
534 return CallNonGenericMethod<IsString, str_toSource_impl>(cx, args);
535 }
536
str_toString_impl(JSContext * cx,const CallArgs & args)537 MOZ_ALWAYS_INLINE bool str_toString_impl(JSContext* cx, const CallArgs& args) {
538 MOZ_ASSERT(IsString(args.thisv()));
539
540 args.rval().setString(
541 args.thisv().isString()
542 ? args.thisv().toString()
543 : args.thisv().toObject().as<StringObject>().unbox());
544 return true;
545 }
546
str_toString(JSContext * cx,unsigned argc,Value * vp)547 bool js::str_toString(JSContext* cx, unsigned argc, Value* vp) {
548 CallArgs args = CallArgsFromVp(argc, vp);
549 return CallNonGenericMethod<IsString, str_toString_impl>(cx, args);
550 }
551
552 /*
553 * Java-like string native methods.
554 */
555
SubstringKernel(JSContext * cx,HandleString str,int32_t beginInt,int32_t lengthInt)556 JSString* js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt,
557 int32_t lengthInt) {
558 MOZ_ASSERT(0 <= beginInt);
559 MOZ_ASSERT(0 <= lengthInt);
560 MOZ_ASSERT(uint32_t(beginInt) <= str->length());
561 MOZ_ASSERT(uint32_t(lengthInt) <= str->length() - beginInt);
562
563 uint32_t begin = beginInt;
564 uint32_t len = lengthInt;
565
566 /*
567 * Optimization for one level deep ropes.
568 * This is common for the following pattern:
569 *
570 * while() {
571 * text = text.substr(0, x) + "bla" + text.substr(x)
572 * test.charCodeAt(x + 1)
573 * }
574 */
575 if (str->isRope()) {
576 JSRope* rope = &str->asRope();
577
578 /* Substring is totally in leftChild of rope. */
579 if (begin + len <= rope->leftChild()->length()) {
580 return NewDependentString(cx, rope->leftChild(), begin, len);
581 }
582
583 /* Substring is totally in rightChild of rope. */
584 if (begin >= rope->leftChild()->length()) {
585 begin -= rope->leftChild()->length();
586 return NewDependentString(cx, rope->rightChild(), begin, len);
587 }
588
589 /*
590 * Requested substring is partly in the left and partly in right child.
591 * Create a rope of substrings for both childs.
592 */
593 MOZ_ASSERT(begin < rope->leftChild()->length() &&
594 begin + len > rope->leftChild()->length());
595
596 size_t lhsLength = rope->leftChild()->length() - begin;
597 size_t rhsLength = begin + len - rope->leftChild()->length();
598
599 Rooted<JSRope*> ropeRoot(cx, rope);
600 RootedString lhs(
601 cx, NewDependentString(cx, ropeRoot->leftChild(), begin, lhsLength));
602 if (!lhs) {
603 return nullptr;
604 }
605
606 RootedString rhs(
607 cx, NewDependentString(cx, ropeRoot->rightChild(), 0, rhsLength));
608 if (!rhs) {
609 return nullptr;
610 }
611
612 return JSRope::new_<CanGC>(cx, lhs, rhs, len);
613 }
614
615 return NewDependentString(cx, str, begin, len);
616 }
617
618 /**
619 * U+03A3 GREEK CAPITAL LETTER SIGMA has two different lower case mappings
620 * depending on its context:
621 * When it's preceded by a cased character and not followed by another cased
622 * character, its lower case form is U+03C2 GREEK SMALL LETTER FINAL SIGMA.
623 * Otherwise its lower case mapping is U+03C3 GREEK SMALL LETTER SIGMA.
624 *
625 * Unicode 9.0, §3.13 Default Case Algorithms
626 */
Final_Sigma(const char16_t * chars,size_t length,size_t index)627 static char16_t Final_Sigma(const char16_t* chars, size_t length,
628 size_t index) {
629 MOZ_ASSERT(index < length);
630 MOZ_ASSERT(chars[index] == unicode::GREEK_CAPITAL_LETTER_SIGMA);
631 MOZ_ASSERT(unicode::ToLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA) ==
632 unicode::GREEK_SMALL_LETTER_SIGMA);
633
634 #if JS_HAS_INTL_API
635 // Tell the analysis the BinaryProperty.contains function pointer called by
636 // u_hasBinaryProperty cannot GC.
637 JS::AutoSuppressGCAnalysis nogc;
638
639 bool precededByCased = false;
640 for (size_t i = index; i > 0;) {
641 char16_t c = chars[--i];
642 uint32_t codePoint = c;
643 if (unicode::IsTrailSurrogate(c) && i > 0) {
644 char16_t lead = chars[i - 1];
645 if (unicode::IsLeadSurrogate(lead)) {
646 codePoint = unicode::UTF16Decode(lead, c);
647 i--;
648 }
649 }
650
651 // Ignore any characters with the property Case_Ignorable.
652 // NB: We need to skip over all Case_Ignorable characters, even when
653 // they also have the Cased binary property.
654 if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE)) {
655 continue;
656 }
657
658 precededByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED);
659 break;
660 }
661 if (!precededByCased) {
662 return unicode::GREEK_SMALL_LETTER_SIGMA;
663 }
664
665 bool followedByCased = false;
666 for (size_t i = index + 1; i < length;) {
667 char16_t c = chars[i++];
668 uint32_t codePoint = c;
669 if (unicode::IsLeadSurrogate(c) && i < length) {
670 char16_t trail = chars[i];
671 if (unicode::IsTrailSurrogate(trail)) {
672 codePoint = unicode::UTF16Decode(c, trail);
673 i++;
674 }
675 }
676
677 // Ignore any characters with the property Case_Ignorable.
678 // NB: We need to skip over all Case_Ignorable characters, even when
679 // they also have the Cased binary property.
680 if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE)) {
681 continue;
682 }
683
684 followedByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED);
685 break;
686 }
687 if (!followedByCased) {
688 return unicode::GREEK_SMALL_LETTER_FINAL_SIGMA;
689 }
690 #endif
691
692 return unicode::GREEK_SMALL_LETTER_SIGMA;
693 }
694
695 // If |srcLength == destLength| is true, the destination buffer was allocated
696 // with the same size as the source buffer. When we append characters which
697 // have special casing mappings, we test |srcLength == destLength| to decide
698 // if we need to back out and reallocate a sufficiently large destination
699 // buffer. Otherwise the destination buffer was allocated with the correct
700 // size to hold all lower case mapped characters, i.e.
701 // |destLength == ToLowerCaseLength(srcChars, 0, srcLength)| is true.
702 template <typename CharT>
ToLowerCaseImpl(CharT * destChars,const CharT * srcChars,size_t startIndex,size_t srcLength,size_t destLength)703 static size_t ToLowerCaseImpl(CharT* destChars, const CharT* srcChars,
704 size_t startIndex, size_t srcLength,
705 size_t destLength) {
706 MOZ_ASSERT(startIndex < srcLength);
707 MOZ_ASSERT(srcLength <= destLength);
708 if constexpr (std::is_same_v<CharT, Latin1Char>) {
709 MOZ_ASSERT(srcLength == destLength);
710 }
711
712 size_t j = startIndex;
713 for (size_t i = startIndex; i < srcLength; i++) {
714 CharT c = srcChars[i];
715 if constexpr (!std::is_same_v<CharT, Latin1Char>) {
716 if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
717 char16_t trail = srcChars[i + 1];
718 if (unicode::IsTrailSurrogate(trail)) {
719 trail = unicode::ToLowerCaseNonBMPTrail(c, trail);
720 destChars[j++] = c;
721 destChars[j++] = trail;
722 i++;
723 continue;
724 }
725 }
726
727 // Special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
728 // lowercases to <U+0069 U+0307>.
729 if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
730 // Return if the output buffer is too small.
731 if (srcLength == destLength) {
732 return i;
733 }
734
735 destChars[j++] = CharT('i');
736 destChars[j++] = CharT(unicode::COMBINING_DOT_ABOVE);
737 continue;
738 }
739
740 // Special case: U+03A3 GREEK CAPITAL LETTER SIGMA lowercases to
741 // one of two codepoints depending on context.
742 if (c == unicode::GREEK_CAPITAL_LETTER_SIGMA) {
743 destChars[j++] = Final_Sigma(srcChars, srcLength, i);
744 continue;
745 }
746 }
747
748 c = unicode::ToLowerCase(c);
749 destChars[j++] = c;
750 }
751
752 MOZ_ASSERT(j == destLength);
753 return srcLength;
754 }
755
ToLowerCaseLength(const char16_t * chars,size_t startIndex,size_t length)756 static size_t ToLowerCaseLength(const char16_t* chars, size_t startIndex,
757 size_t length) {
758 size_t lowerLength = length;
759 for (size_t i = startIndex; i < length; i++) {
760 char16_t c = chars[i];
761
762 // U+0130 is lowercased to the two-element sequence <U+0069 U+0307>.
763 if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
764 lowerLength += 1;
765 }
766 }
767 return lowerLength;
768 }
769
770 template <typename CharT>
ToLowerCase(JSContext * cx,JSLinearString * str)771 static JSString* ToLowerCase(JSContext* cx, JSLinearString* str) {
772 // Unlike toUpperCase, toLowerCase has the nice invariant that if the
773 // input is a Latin-1 string, the output is also a Latin-1 string.
774
775 InlineCharBuffer<CharT> newChars;
776
777 const size_t length = str->length();
778 size_t resultLength;
779 {
780 AutoCheckCannotGC nogc;
781 const CharT* chars = str->chars<CharT>(nogc);
782
783 // We don't need extra special casing checks in the loop below,
784 // because U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+03A3
785 // GREEK CAPITAL LETTER SIGMA already have simple lower case mappings.
786 MOZ_ASSERT(unicode::ChangesWhenLowerCased(
787 unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE),
788 "U+0130 has a simple lower case mapping");
789 MOZ_ASSERT(
790 unicode::ChangesWhenLowerCased(unicode::GREEK_CAPITAL_LETTER_SIGMA),
791 "U+03A3 has a simple lower case mapping");
792
793 // One element Latin-1 strings can be directly retrieved from the
794 // static strings cache.
795 if constexpr (std::is_same_v<CharT, Latin1Char>) {
796 if (length == 1) {
797 CharT lower = unicode::ToLowerCase(chars[0]);
798 MOZ_ASSERT(StaticStrings::hasUnit(lower));
799
800 return cx->staticStrings().getUnit(lower);
801 }
802 }
803
804 // Look for the first character that changes when lowercased.
805 size_t i = 0;
806 for (; i < length; i++) {
807 CharT c = chars[i];
808 if constexpr (!std::is_same_v<CharT, Latin1Char>) {
809 if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
810 CharT trail = chars[i + 1];
811 if (unicode::IsTrailSurrogate(trail)) {
812 if (unicode::ChangesWhenLowerCasedNonBMP(c, trail)) {
813 break;
814 }
815
816 i++;
817 continue;
818 }
819 }
820 }
821 if (unicode::ChangesWhenLowerCased(c)) {
822 break;
823 }
824 }
825
826 // If no character needs to change, return the input string.
827 if (i == length) {
828 return str;
829 }
830
831 resultLength = length;
832 if (!newChars.maybeAlloc(cx, resultLength)) {
833 return nullptr;
834 }
835
836 PodCopy(newChars.get(), chars, i);
837
838 size_t readChars =
839 ToLowerCaseImpl(newChars.get(), chars, i, length, resultLength);
840 if constexpr (!std::is_same_v<CharT, Latin1Char>) {
841 if (readChars < length) {
842 resultLength = ToLowerCaseLength(chars, readChars, length);
843
844 if (!newChars.maybeRealloc(cx, length, resultLength)) {
845 return nullptr;
846 }
847
848 MOZ_ALWAYS_TRUE(length == ToLowerCaseImpl(newChars.get(), chars,
849 readChars, length,
850 resultLength));
851 }
852 } else {
853 MOZ_ASSERT(readChars == length,
854 "Latin-1 strings don't have special lower case mappings");
855 }
856 }
857
858 return newChars.toStringDontDeflate(cx, resultLength);
859 }
860
StringToLowerCase(JSContext * cx,HandleString string)861 JSString* js::StringToLowerCase(JSContext* cx, HandleString string) {
862 JSLinearString* linear = string->ensureLinear(cx);
863 if (!linear) {
864 return nullptr;
865 }
866
867 if (linear->hasLatin1Chars()) {
868 return ToLowerCase<Latin1Char>(cx, linear);
869 }
870 return ToLowerCase<char16_t>(cx, linear);
871 }
872
str_toLowerCase(JSContext * cx,unsigned argc,Value * vp)873 static bool str_toLowerCase(JSContext* cx, unsigned argc, Value* vp) {
874 CallArgs args = CallArgsFromVp(argc, vp);
875
876 RootedString str(cx,
877 ToStringForStringFunction(cx, "toLowerCase", args.thisv()));
878 if (!str) {
879 return false;
880 }
881
882 JSString* result = StringToLowerCase(cx, str);
883 if (!result) {
884 return false;
885 }
886
887 args.rval().setString(result);
888 return true;
889 }
890
891 #if JS_HAS_INTL_API
892 // String.prototype.toLocaleLowerCase is self-hosted when Intl is exposed,
893 // with core functionality performed by the intrinsic below.
894
CaseMappingLocale(JSContext * cx,JSString * str)895 static const char* CaseMappingLocale(JSContext* cx, JSString* str) {
896 JSLinearString* locale = str->ensureLinear(cx);
897 if (!locale) {
898 return nullptr;
899 }
900
901 MOZ_ASSERT(locale->length() >= 2, "locale is a valid language tag");
902
903 // Lithuanian, Turkish, and Azeri have language dependent case mappings.
904 static const char languagesWithSpecialCasing[][3] = {"lt", "tr", "az"};
905
906 // All strings in |languagesWithSpecialCasing| are of length two, so we
907 // only need to compare the first two characters to find a matching locale.
908 // ES2017 Intl, §9.2.2 BestAvailableLocale
909 if (locale->length() == 2 || locale->latin1OrTwoByteChar(2) == '-') {
910 for (const auto& language : languagesWithSpecialCasing) {
911 if (locale->latin1OrTwoByteChar(0) == language[0] &&
912 locale->latin1OrTwoByteChar(1) == language[1]) {
913 return language;
914 }
915 }
916 }
917
918 return ""; // ICU root locale
919 }
920
intl_toLocaleLowerCase(JSContext * cx,unsigned argc,Value * vp)921 bool js::intl_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
922 CallArgs args = CallArgsFromVp(argc, vp);
923 MOZ_ASSERT(args.length() == 2);
924 MOZ_ASSERT(args[0].isString());
925 MOZ_ASSERT(args[1].isString());
926
927 RootedString string(cx, args[0].toString());
928
929 const char* locale = CaseMappingLocale(cx, args[1].toString());
930 if (!locale) {
931 return false;
932 }
933
934 // Call String.prototype.toLowerCase() for language independent casing.
935 if (intl::StringsAreEqual(locale, "")) {
936 JSString* str = StringToLowerCase(cx, string);
937 if (!str) {
938 return false;
939 }
940
941 args.rval().setString(str);
942 return true;
943 }
944
945 AutoStableStringChars inputChars(cx);
946 if (!inputChars.initTwoByte(cx, string)) {
947 return false;
948 }
949 mozilla::Range<const char16_t> input = inputChars.twoByteRange();
950
951 // Note: maximum case mapping length is three characters, so the result
952 // length might be > INT32_MAX. ICU will fail in this case.
953 static_assert(JSString::MAX_LENGTH <= INT32_MAX,
954 "String length must fit in int32_t for ICU");
955
956 static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
957
958 Vector<char16_t, INLINE_CAPACITY> chars(cx);
959 if (!chars.resize(std::max(INLINE_CAPACITY, input.length()))) {
960 return false;
961 }
962
963 int32_t size = intl::CallICU(
964 cx,
965 [&input, locale](UChar* chars, int32_t size, UErrorCode* status) {
966 return u_strToLower(chars, size, input.begin().get(), input.length(),
967 locale, status);
968 },
969 chars);
970 if (size < 0) {
971 return false;
972 }
973
974 JSString* result = NewStringCopyN<CanGC>(cx, chars.begin(), size);
975 if (!result) {
976 return false;
977 }
978
979 args.rval().setString(result);
980 return true;
981 }
982
983 #else
984
985 // When the Intl API is not exposed, String.prototype.toLowerCase is implemented
986 // in C++.
str_toLocaleLowerCase(JSContext * cx,unsigned argc,Value * vp)987 static bool str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
988 CallArgs args = CallArgsFromVp(argc, vp);
989
990 RootedString str(
991 cx, ToStringForStringFunction(cx, "toLocaleLowerCase", args.thisv()));
992 if (!str) {
993 return false;
994 }
995
996 /*
997 * Forcefully ignore the first (or any) argument and return toLowerCase(),
998 * ECMA has reserved that argument, presumably for defining the locale.
999 */
1000 if (cx->runtime()->localeCallbacks &&
1001 cx->runtime()->localeCallbacks->localeToLowerCase) {
1002 RootedValue result(cx);
1003 if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result)) {
1004 return false;
1005 }
1006
1007 args.rval().set(result);
1008 return true;
1009 }
1010
1011 RootedLinearString linear(cx, str->ensureLinear(cx));
1012 if (!linear) {
1013 return false;
1014 }
1015
1016 JSString* result = StringToLowerCase(cx, linear);
1017 if (!result) {
1018 return false;
1019 }
1020
1021 args.rval().setString(result);
1022 return true;
1023 }
1024
1025 #endif // JS_HAS_INTL_API
1026
ToUpperCaseHasSpecialCasing(Latin1Char charCode)1027 static inline bool ToUpperCaseHasSpecialCasing(Latin1Char charCode) {
1028 // U+00DF LATIN SMALL LETTER SHARP S is the only Latin-1 code point with
1029 // special casing rules, so detect it inline.
1030 bool hasUpperCaseSpecialCasing =
1031 charCode == unicode::LATIN_SMALL_LETTER_SHARP_S;
1032 MOZ_ASSERT(hasUpperCaseSpecialCasing ==
1033 unicode::ChangesWhenUpperCasedSpecialCasing(charCode));
1034
1035 return hasUpperCaseSpecialCasing;
1036 }
1037
ToUpperCaseHasSpecialCasing(char16_t charCode)1038 static inline bool ToUpperCaseHasSpecialCasing(char16_t charCode) {
1039 return unicode::ChangesWhenUpperCasedSpecialCasing(charCode);
1040 }
1041
ToUpperCaseLengthSpecialCasing(Latin1Char charCode)1042 static inline size_t ToUpperCaseLengthSpecialCasing(Latin1Char charCode) {
1043 // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
1044 MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
1045
1046 return 2;
1047 }
1048
ToUpperCaseLengthSpecialCasing(char16_t charCode)1049 static inline size_t ToUpperCaseLengthSpecialCasing(char16_t charCode) {
1050 MOZ_ASSERT(ToUpperCaseHasSpecialCasing(charCode));
1051
1052 return unicode::LengthUpperCaseSpecialCasing(charCode);
1053 }
1054
ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,Latin1Char * elements,size_t * index)1055 static inline void ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,
1056 Latin1Char* elements,
1057 size_t* index) {
1058 // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
1059 MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
1060 static_assert('S' <= JSString::MAX_LATIN1_CHAR, "'S' is a Latin-1 character");
1061
1062 elements[(*index)++] = 'S';
1063 elements[(*index)++] = 'S';
1064 }
1065
ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,char16_t * elements,size_t * index)1066 static inline void ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,
1067 char16_t* elements,
1068 size_t* index) {
1069 unicode::AppendUpperCaseSpecialCasing(charCode, elements, index);
1070 }
1071
1072 // See ToLowerCaseImpl for an explanation of the parameters.
1073 template <typename DestChar, typename SrcChar>
ToUpperCaseImpl(DestChar * destChars,const SrcChar * srcChars,size_t startIndex,size_t srcLength,size_t destLength)1074 static size_t ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars,
1075 size_t startIndex, size_t srcLength,
1076 size_t destLength) {
1077 static_assert(std::is_same_v<SrcChar, Latin1Char> ||
1078 !std::is_same_v<DestChar, Latin1Char>,
1079 "cannot write non-Latin-1 characters into Latin-1 string");
1080 MOZ_ASSERT(startIndex < srcLength);
1081 MOZ_ASSERT(srcLength <= destLength);
1082
1083 size_t j = startIndex;
1084 for (size_t i = startIndex; i < srcLength; i++) {
1085 char16_t c = srcChars[i];
1086 if constexpr (!std::is_same_v<DestChar, Latin1Char>) {
1087 if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
1088 char16_t trail = srcChars[i + 1];
1089 if (unicode::IsTrailSurrogate(trail)) {
1090 trail = unicode::ToUpperCaseNonBMPTrail(c, trail);
1091 destChars[j++] = c;
1092 destChars[j++] = trail;
1093 i++;
1094 continue;
1095 }
1096 }
1097 }
1098
1099 if (MOZ_UNLIKELY(c > 0x7f &&
1100 ToUpperCaseHasSpecialCasing(static_cast<SrcChar>(c)))) {
1101 // Return if the output buffer is too small.
1102 if (srcLength == destLength) {
1103 return i;
1104 }
1105
1106 ToUpperCaseAppendUpperCaseSpecialCasing(c, destChars, &j);
1107 continue;
1108 }
1109
1110 c = unicode::ToUpperCase(c);
1111 if constexpr (std::is_same_v<DestChar, Latin1Char>) {
1112 MOZ_ASSERT(c <= JSString::MAX_LATIN1_CHAR);
1113 }
1114 destChars[j++] = c;
1115 }
1116
1117 MOZ_ASSERT(j == destLength);
1118 return srcLength;
1119 }
1120
1121 template <typename CharT>
ToUpperCaseLength(const CharT * chars,size_t startIndex,size_t length)1122 static size_t ToUpperCaseLength(const CharT* chars, size_t startIndex,
1123 size_t length) {
1124 size_t upperLength = length;
1125 for (size_t i = startIndex; i < length; i++) {
1126 char16_t c = chars[i];
1127
1128 if (c > 0x7f && ToUpperCaseHasSpecialCasing(static_cast<CharT>(c))) {
1129 upperLength += ToUpperCaseLengthSpecialCasing(static_cast<CharT>(c)) - 1;
1130 }
1131 }
1132 return upperLength;
1133 }
1134
1135 template <typename DestChar, typename SrcChar>
CopyChars(DestChar * destChars,const SrcChar * srcChars,size_t length)1136 static inline void CopyChars(DestChar* destChars, const SrcChar* srcChars,
1137 size_t length) {
1138 static_assert(!std::is_same_v<DestChar, SrcChar>,
1139 "PodCopy is used for the same type case");
1140 for (size_t i = 0; i < length; i++) {
1141 destChars[i] = srcChars[i];
1142 }
1143 }
1144
1145 template <typename CharT>
CopyChars(CharT * destChars,const CharT * srcChars,size_t length)1146 static inline void CopyChars(CharT* destChars, const CharT* srcChars,
1147 size_t length) {
1148 PodCopy(destChars, srcChars, length);
1149 }
1150
1151 template <typename DestChar, typename SrcChar>
ToUpperCase(JSContext * cx,InlineCharBuffer<DestChar> & newChars,const SrcChar * chars,size_t startIndex,size_t length,size_t * resultLength)1152 static inline bool ToUpperCase(JSContext* cx,
1153 InlineCharBuffer<DestChar>& newChars,
1154 const SrcChar* chars, size_t startIndex,
1155 size_t length, size_t* resultLength) {
1156 MOZ_ASSERT(startIndex < length);
1157
1158 *resultLength = length;
1159 if (!newChars.maybeAlloc(cx, length)) {
1160 return false;
1161 }
1162
1163 CopyChars(newChars.get(), chars, startIndex);
1164
1165 size_t readChars =
1166 ToUpperCaseImpl(newChars.get(), chars, startIndex, length, length);
1167 if (readChars < length) {
1168 size_t actualLength = ToUpperCaseLength(chars, readChars, length);
1169
1170 *resultLength = actualLength;
1171 if (!newChars.maybeRealloc(cx, length, actualLength)) {
1172 return false;
1173 }
1174
1175 MOZ_ALWAYS_TRUE(length == ToUpperCaseImpl(newChars.get(), chars, readChars,
1176 length, actualLength));
1177 }
1178
1179 return true;
1180 }
1181
1182 template <typename CharT>
ToUpperCase(JSContext * cx,JSLinearString * str)1183 static JSString* ToUpperCase(JSContext* cx, JSLinearString* str) {
1184 using Latin1Buffer = InlineCharBuffer<Latin1Char>;
1185 using TwoByteBuffer = InlineCharBuffer<char16_t>;
1186
1187 mozilla::MaybeOneOf<Latin1Buffer, TwoByteBuffer> newChars;
1188 const size_t length = str->length();
1189 size_t resultLength;
1190 {
1191 AutoCheckCannotGC nogc;
1192 const CharT* chars = str->chars<CharT>(nogc);
1193
1194 // Most one element Latin-1 strings can be directly retrieved from the
1195 // static strings cache.
1196 if constexpr (std::is_same_v<CharT, Latin1Char>) {
1197 if (length == 1) {
1198 Latin1Char c = chars[0];
1199 if (c != unicode::MICRO_SIGN &&
1200 c != unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS &&
1201 c != unicode::LATIN_SMALL_LETTER_SHARP_S) {
1202 char16_t upper = unicode::ToUpperCase(c);
1203 MOZ_ASSERT(upper <= JSString::MAX_LATIN1_CHAR);
1204 MOZ_ASSERT(StaticStrings::hasUnit(upper));
1205
1206 return cx->staticStrings().getUnit(upper);
1207 }
1208
1209 MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR ||
1210 ToUpperCaseHasSpecialCasing(c));
1211 }
1212 }
1213
1214 // Look for the first character that changes when uppercased.
1215 size_t i = 0;
1216 for (; i < length; i++) {
1217 CharT c = chars[i];
1218 if constexpr (!std::is_same_v<CharT, Latin1Char>) {
1219 if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
1220 CharT trail = chars[i + 1];
1221 if (unicode::IsTrailSurrogate(trail)) {
1222 if (unicode::ChangesWhenUpperCasedNonBMP(c, trail)) {
1223 break;
1224 }
1225
1226 i++;
1227 continue;
1228 }
1229 }
1230 }
1231 if (unicode::ChangesWhenUpperCased(c)) {
1232 break;
1233 }
1234 if (MOZ_UNLIKELY(c > 0x7f && ToUpperCaseHasSpecialCasing(c))) {
1235 break;
1236 }
1237 }
1238
1239 // If no character needs to change, return the input string.
1240 if (i == length) {
1241 return str;
1242 }
1243
1244 // The string changes when uppercased, so we must create a new string.
1245 // Can it be Latin-1?
1246 //
1247 // If the original string is Latin-1, it can -- unless the string
1248 // contains U+00B5 MICRO SIGN or U+00FF SMALL LETTER Y WITH DIAERESIS,
1249 // the only Latin-1 codepoints that don't uppercase within Latin-1.
1250 // Search for those codepoints to decide whether the new string can be
1251 // Latin-1.
1252 // If the original string is a two-byte string, its uppercase form is
1253 // so rarely Latin-1 that we don't even consider creating a new
1254 // Latin-1 string.
1255 if constexpr (std::is_same_v<CharT, Latin1Char>) {
1256 bool resultIsLatin1 = true;
1257 for (size_t j = i; j < length; j++) {
1258 Latin1Char c = chars[j];
1259 if (c == unicode::MICRO_SIGN ||
1260 c == unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
1261 MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR);
1262 resultIsLatin1 = false;
1263 break;
1264 } else {
1265 MOZ_ASSERT(unicode::ToUpperCase(c) <= JSString::MAX_LATIN1_CHAR);
1266 }
1267 }
1268
1269 if (resultIsLatin1) {
1270 newChars.construct<Latin1Buffer>();
1271
1272 if (!ToUpperCase(cx, newChars.ref<Latin1Buffer>(), chars, i, length,
1273 &resultLength)) {
1274 return nullptr;
1275 }
1276 } else {
1277 newChars.construct<TwoByteBuffer>();
1278
1279 if (!ToUpperCase(cx, newChars.ref<TwoByteBuffer>(), chars, i, length,
1280 &resultLength)) {
1281 return nullptr;
1282 }
1283 }
1284 } else {
1285 newChars.construct<TwoByteBuffer>();
1286
1287 if (!ToUpperCase(cx, newChars.ref<TwoByteBuffer>(), chars, i, length,
1288 &resultLength)) {
1289 return nullptr;
1290 }
1291 }
1292 }
1293
1294 return newChars.constructed<Latin1Buffer>()
1295 ? newChars.ref<Latin1Buffer>().toStringDontDeflate(cx,
1296 resultLength)
1297 : newChars.ref<TwoByteBuffer>().toStringDontDeflate(cx,
1298 resultLength);
1299 }
1300
StringToUpperCase(JSContext * cx,HandleString string)1301 JSString* js::StringToUpperCase(JSContext* cx, HandleString string) {
1302 JSLinearString* linear = string->ensureLinear(cx);
1303 if (!linear) {
1304 return nullptr;
1305 }
1306
1307 if (linear->hasLatin1Chars()) {
1308 return ToUpperCase<Latin1Char>(cx, linear);
1309 }
1310 return ToUpperCase<char16_t>(cx, linear);
1311 }
1312
str_toUpperCase(JSContext * cx,unsigned argc,Value * vp)1313 static bool str_toUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1314 CallArgs args = CallArgsFromVp(argc, vp);
1315
1316 RootedString str(cx,
1317 ToStringForStringFunction(cx, "toUpperCase", args.thisv()));
1318 if (!str) {
1319 return false;
1320 }
1321
1322 JSString* result = StringToUpperCase(cx, str);
1323 if (!result) {
1324 return false;
1325 }
1326
1327 args.rval().setString(result);
1328 return true;
1329 }
1330
1331 #if JS_HAS_INTL_API
1332 // String.prototype.toLocaleUpperCase is self-hosted when Intl is exposed,
1333 // with core functionality performed by the intrinsic below.
1334
intl_toLocaleUpperCase(JSContext * cx,unsigned argc,Value * vp)1335 bool js::intl_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1336 CallArgs args = CallArgsFromVp(argc, vp);
1337 MOZ_ASSERT(args.length() == 2);
1338 MOZ_ASSERT(args[0].isString());
1339 MOZ_ASSERT(args[1].isString());
1340
1341 RootedString string(cx, args[0].toString());
1342
1343 const char* locale = CaseMappingLocale(cx, args[1].toString());
1344 if (!locale) {
1345 return false;
1346 }
1347
1348 // Call String.prototype.toUpperCase() for language independent casing.
1349 if (intl::StringsAreEqual(locale, "")) {
1350 JSString* str = js::StringToUpperCase(cx, string);
1351 if (!str) {
1352 return false;
1353 }
1354
1355 args.rval().setString(str);
1356 return true;
1357 }
1358
1359 AutoStableStringChars inputChars(cx);
1360 if (!inputChars.initTwoByte(cx, string)) {
1361 return false;
1362 }
1363 mozilla::Range<const char16_t> input = inputChars.twoByteRange();
1364
1365 // Note: maximum case mapping length is three characters, so the result
1366 // length might be > INT32_MAX. ICU will fail in this case.
1367 static_assert(JSString::MAX_LENGTH <= INT32_MAX,
1368 "String length must fit in int32_t for ICU");
1369
1370 static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
1371
1372 Vector<char16_t, INLINE_CAPACITY> chars(cx);
1373 if (!chars.resize(std::max(INLINE_CAPACITY, input.length()))) {
1374 return false;
1375 }
1376
1377 int32_t size = intl::CallICU(
1378 cx,
1379 [&input, locale](UChar* chars, int32_t size, UErrorCode* status) {
1380 return u_strToUpper(chars, size, input.begin().get(), input.length(),
1381 locale, status);
1382 },
1383 chars);
1384 if (size < 0) {
1385 return false;
1386 }
1387
1388 JSString* result = NewStringCopyN<CanGC>(cx, chars.begin(), size);
1389 if (!result) {
1390 return false;
1391 }
1392
1393 args.rval().setString(result);
1394 return true;
1395 }
1396
1397 #else
1398
1399 // When the Intl API is not exposed, String.prototype.toUpperCase is implemented
1400 // in C++.
str_toLocaleUpperCase(JSContext * cx,unsigned argc,Value * vp)1401 static bool str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1402 CallArgs args = CallArgsFromVp(argc, vp);
1403
1404 RootedString str(
1405 cx, ToStringForStringFunction(cx, "toLocaleUpperCase", args.thisv()));
1406 if (!str) {
1407 return false;
1408 }
1409
1410 /*
1411 * Forcefully ignore the first (or any) argument and return toUpperCase(),
1412 * ECMA has reserved that argument, presumably for defining the locale.
1413 */
1414 if (cx->runtime()->localeCallbacks &&
1415 cx->runtime()->localeCallbacks->localeToUpperCase) {
1416 RootedValue result(cx);
1417 if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result)) {
1418 return false;
1419 }
1420
1421 args.rval().set(result);
1422 return true;
1423 }
1424
1425 RootedLinearString linear(cx, str->ensureLinear(cx));
1426 if (!linear) {
1427 return false;
1428 }
1429
1430 JSString* result = StringToUpperCase(cx, linear);
1431 if (!result) {
1432 return false;
1433 }
1434
1435 args.rval().setString(result);
1436 return true;
1437 }
1438
1439 #endif // JS_HAS_INTL_API
1440
1441 #if JS_HAS_INTL_API
1442
1443 // String.prototype.localeCompare is self-hosted when Intl functionality is
1444 // exposed, and the only intrinsics it requires are provided in the
1445 // implementation of Intl.Collator.
1446
1447 #else
1448
1449 // String.prototype.localeCompare is implemented in C++ (delegating to
1450 // JSLocaleCallbacks) when Intl functionality is not exposed.
str_localeCompare(JSContext * cx,unsigned argc,Value * vp)1451 static bool str_localeCompare(JSContext* cx, unsigned argc, Value* vp) {
1452 CallArgs args = CallArgsFromVp(argc, vp);
1453 RootedString str(
1454 cx, ToStringForStringFunction(cx, "localeCompare", args.thisv()));
1455 if (!str) {
1456 return false;
1457 }
1458
1459 RootedString thatStr(cx, ToString<CanGC>(cx, args.get(0)));
1460 if (!thatStr) {
1461 return false;
1462 }
1463
1464 if (cx->runtime()->localeCallbacks &&
1465 cx->runtime()->localeCallbacks->localeCompare) {
1466 RootedValue result(cx);
1467 if (!cx->runtime()->localeCallbacks->localeCompare(cx, str, thatStr,
1468 &result)) {
1469 return false;
1470 }
1471
1472 args.rval().set(result);
1473 return true;
1474 }
1475
1476 int32_t result;
1477 if (!CompareStrings(cx, str, thatStr, &result)) {
1478 return false;
1479 }
1480
1481 args.rval().setInt32(result);
1482 return true;
1483 }
1484
1485 #endif // JS_HAS_INTL_API
1486
1487 #if JS_HAS_INTL_API
1488
1489 // ES2017 draft rev 45e890512fd77add72cc0ee742785f9f6f6482de
1490 // 21.1.3.12 String.prototype.normalize ( [ form ] )
1491 //
1492 // String.prototype.normalize is only implementable if ICU's normalization
1493 // functionality is available.
str_normalize(JSContext * cx,unsigned argc,Value * vp)1494 static bool str_normalize(JSContext* cx, unsigned argc, Value* vp) {
1495 CallArgs args = CallArgsFromVp(argc, vp);
1496
1497 // Steps 1-2.
1498 RootedString str(cx,
1499 ToStringForStringFunction(cx, "normalize", args.thisv()));
1500 if (!str) {
1501 return false;
1502 }
1503
1504 enum NormalizationForm { NFC, NFD, NFKC, NFKD };
1505
1506 NormalizationForm form;
1507 if (!args.hasDefined(0)) {
1508 // Step 3.
1509 form = NFC;
1510 } else {
1511 // Step 4.
1512 JSLinearString* formStr = ArgToLinearString(cx, args, 0);
1513 if (!formStr) {
1514 return false;
1515 }
1516
1517 // Step 5.
1518 if (EqualStrings(formStr, cx->names().NFC)) {
1519 form = NFC;
1520 } else if (EqualStrings(formStr, cx->names().NFD)) {
1521 form = NFD;
1522 } else if (EqualStrings(formStr, cx->names().NFKC)) {
1523 form = NFKC;
1524 } else if (EqualStrings(formStr, cx->names().NFKD)) {
1525 form = NFKD;
1526 } else {
1527 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1528 JSMSG_INVALID_NORMALIZE_FORM);
1529 return false;
1530 }
1531 }
1532
1533 // Latin-1 strings are already in Normalization Form C.
1534 if (form == NFC && str->hasLatin1Chars()) {
1535 // Step 7.
1536 args.rval().setString(str);
1537 return true;
1538 }
1539
1540 // Step 6.
1541 AutoStableStringChars stableChars(cx);
1542 if (!stableChars.initTwoByte(cx, str)) {
1543 return false;
1544 }
1545
1546 mozilla::Range<const char16_t> srcChars = stableChars.twoByteRange();
1547
1548 // The unorm2_getXXXInstance() methods return a shared instance which must
1549 // not be deleted.
1550 UErrorCode status = U_ZERO_ERROR;
1551 const UNormalizer2* normalizer;
1552 if (form == NFC) {
1553 normalizer = unorm2_getNFCInstance(&status);
1554 } else if (form == NFD) {
1555 normalizer = unorm2_getNFDInstance(&status);
1556 } else if (form == NFKC) {
1557 normalizer = unorm2_getNFKCInstance(&status);
1558 } else {
1559 MOZ_ASSERT(form == NFKD);
1560 normalizer = unorm2_getNFKDInstance(&status);
1561 }
1562 if (U_FAILURE(status)) {
1563 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1564 JSMSG_INTERNAL_INTL_ERROR);
1565 return false;
1566 }
1567
1568 int32_t spanLengthInt = unorm2_spanQuickCheckYes(
1569 normalizer, srcChars.begin().get(), srcChars.length(), &status);
1570 if (U_FAILURE(status)) {
1571 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1572 JSMSG_INTERNAL_INTL_ERROR);
1573 return false;
1574 }
1575 MOZ_ASSERT(0 <= spanLengthInt && size_t(spanLengthInt) <= srcChars.length());
1576 size_t spanLength = size_t(spanLengthInt);
1577
1578 // Return if the input string is already normalized.
1579 if (spanLength == srcChars.length()) {
1580 // Step 7.
1581 args.rval().setString(str);
1582 return true;
1583 }
1584
1585 static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
1586
1587 Vector<char16_t, INLINE_CAPACITY> chars(cx);
1588 if (!chars.resize(std::max(INLINE_CAPACITY, srcChars.length()))) {
1589 return false;
1590 }
1591
1592 // Copy the already normalized prefix.
1593 if (spanLength > 0) {
1594 PodCopy(chars.begin(), srcChars.begin().get(), spanLength);
1595 }
1596
1597 int32_t size = intl::CallICU(
1598 cx,
1599 [normalizer, &srcChars, spanLength](UChar* chars, uint32_t size,
1600 UErrorCode* status) {
1601 mozilla::RangedPtr<const char16_t> remainingStart =
1602 srcChars.begin() + spanLength;
1603 size_t remainingLength = srcChars.length() - spanLength;
1604
1605 return unorm2_normalizeSecondAndAppend(normalizer, chars, spanLength,
1606 size, remainingStart.get(),
1607 remainingLength, status);
1608 },
1609 chars);
1610 if (size < 0) {
1611 return false;
1612 }
1613
1614 JSString* ns = NewStringCopyN<CanGC>(cx, chars.begin(), size);
1615 if (!ns) {
1616 return false;
1617 }
1618
1619 // Step 7.
1620 args.rval().setString(ns);
1621 return true;
1622 }
1623
1624 #endif // JS_HAS_INTL_API
1625
str_charAt(JSContext * cx,unsigned argc,Value * vp)1626 static bool str_charAt(JSContext* cx, unsigned argc, Value* vp) {
1627 CallArgs args = CallArgsFromVp(argc, vp);
1628
1629 RootedString str(cx);
1630 size_t i;
1631 if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
1632 str = args.thisv().toString();
1633 i = size_t(args[0].toInt32());
1634 if (i >= str->length()) {
1635 goto out_of_range;
1636 }
1637 } else {
1638 str = ToStringForStringFunction(cx, "charAt", args.thisv());
1639 if (!str) {
1640 return false;
1641 }
1642
1643 double d = 0.0;
1644 if (args.length() > 0 && !ToInteger(cx, args[0], &d)) {
1645 return false;
1646 }
1647
1648 if (d < 0 || str->length() <= d) {
1649 goto out_of_range;
1650 }
1651 i = size_t(d);
1652 }
1653
1654 str = cx->staticStrings().getUnitStringForElement(cx, str, i);
1655 if (!str) {
1656 return false;
1657 }
1658 args.rval().setString(str);
1659 return true;
1660
1661 out_of_range:
1662 args.rval().setString(cx->runtime()->emptyString);
1663 return true;
1664 }
1665
str_charCodeAt_impl(JSContext * cx,HandleString string,HandleValue index,MutableHandleValue res)1666 bool js::str_charCodeAt_impl(JSContext* cx, HandleString string,
1667 HandleValue index, MutableHandleValue res) {
1668 size_t i;
1669 if (index.isInt32()) {
1670 i = index.toInt32();
1671 if (i >= string->length()) {
1672 goto out_of_range;
1673 }
1674 } else {
1675 double d = 0.0;
1676 if (!ToInteger(cx, index, &d)) {
1677 return false;
1678 }
1679 // check whether d is negative as size_t is unsigned
1680 if (d < 0 || string->length() <= d) {
1681 goto out_of_range;
1682 }
1683 i = size_t(d);
1684 }
1685 char16_t c;
1686 if (!string->getChar(cx, i, &c)) {
1687 return false;
1688 }
1689 res.setInt32(c);
1690 return true;
1691
1692 out_of_range:
1693 res.setNaN();
1694 return true;
1695 }
1696
str_charCodeAt(JSContext * cx,unsigned argc,Value * vp)1697 bool js::str_charCodeAt(JSContext* cx, unsigned argc, Value* vp) {
1698 CallArgs args = CallArgsFromVp(argc, vp);
1699 RootedString str(cx);
1700 RootedValue index(cx);
1701 if (args.thisv().isString()) {
1702 str = args.thisv().toString();
1703 } else {
1704 str = ToStringForStringFunction(cx, "charCodeAt", args.thisv());
1705 if (!str) {
1706 return false;
1707 }
1708 }
1709 if (args.length() != 0) {
1710 index = args[0];
1711 } else {
1712 index.setInt32(0);
1713 }
1714
1715 return js::str_charCodeAt_impl(cx, str, index, args.rval());
1716 }
1717
1718 /*
1719 * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
1720 * The patlen argument must be positive and no greater than sBMHPatLenMax.
1721 *
1722 * Return the index of pat in text, or -1 if not found.
1723 */
1724 static const uint32_t sBMHCharSetSize = 256; /* ISO-Latin-1 */
1725 static const uint32_t sBMHPatLenMax = 255; /* skip table element is uint8_t */
1726 static const int sBMHBadPattern =
1727 -2; /* return value if pat is not ISO-Latin-1 */
1728
1729 template <typename TextChar, typename PatChar>
BoyerMooreHorspool(const TextChar * text,uint32_t textLen,const PatChar * pat,uint32_t patLen)1730 static int BoyerMooreHorspool(const TextChar* text, uint32_t textLen,
1731 const PatChar* pat, uint32_t patLen) {
1732 MOZ_ASSERT(0 < patLen && patLen <= sBMHPatLenMax);
1733
1734 uint8_t skip[sBMHCharSetSize];
1735 for (uint32_t i = 0; i < sBMHCharSetSize; i++) {
1736 skip[i] = uint8_t(patLen);
1737 }
1738
1739 uint32_t patLast = patLen - 1;
1740 for (uint32_t i = 0; i < patLast; i++) {
1741 char16_t c = pat[i];
1742 if (c >= sBMHCharSetSize) {
1743 return sBMHBadPattern;
1744 }
1745 skip[c] = uint8_t(patLast - i);
1746 }
1747
1748 for (uint32_t k = patLast; k < textLen;) {
1749 for (uint32_t i = k, j = patLast;; i--, j--) {
1750 if (text[i] != pat[j]) {
1751 break;
1752 }
1753 if (j == 0) {
1754 return static_cast<int>(i); /* safe: max string size */
1755 }
1756 }
1757
1758 char16_t c = text[k];
1759 k += (c >= sBMHCharSetSize) ? patLen : skip[c];
1760 }
1761 return -1;
1762 }
1763
1764 template <typename TextChar, typename PatChar>
1765 struct MemCmp {
1766 using Extent = uint32_t;
computeExtentMemCmp1767 static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar*,
1768 uint32_t patLen) {
1769 return (patLen - 1) * sizeof(PatChar);
1770 }
matchMemCmp1771 static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t,
1772 Extent extent) {
1773 MOZ_ASSERT(sizeof(TextChar) == sizeof(PatChar));
1774 return memcmp(p, t, extent) == 0;
1775 }
1776 };
1777
1778 template <typename TextChar, typename PatChar>
1779 struct ManualCmp {
1780 using Extent = const PatChar*;
computeExtentManualCmp1781 static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar* pat,
1782 uint32_t patLen) {
1783 return pat + patLen;
1784 }
matchManualCmp1785 static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t,
1786 Extent extent) {
1787 for (; p != extent; ++p, ++t) {
1788 if (*p != *t) {
1789 return false;
1790 }
1791 }
1792 return true;
1793 }
1794 };
1795
1796 template <typename TextChar, typename PatChar>
FirstCharMatcherUnrolled(const TextChar * text,uint32_t n,const PatChar pat)1797 static const TextChar* FirstCharMatcherUnrolled(const TextChar* text,
1798 uint32_t n, const PatChar pat) {
1799 const TextChar* textend = text + n;
1800 const TextChar* t = text;
1801
1802 switch ((textend - t) & 7) {
1803 case 0:
1804 if (*t++ == pat) return t - 1;
1805 [[fallthrough]];
1806 case 7:
1807 if (*t++ == pat) return t - 1;
1808 [[fallthrough]];
1809 case 6:
1810 if (*t++ == pat) return t - 1;
1811 [[fallthrough]];
1812 case 5:
1813 if (*t++ == pat) return t - 1;
1814 [[fallthrough]];
1815 case 4:
1816 if (*t++ == pat) return t - 1;
1817 [[fallthrough]];
1818 case 3:
1819 if (*t++ == pat) return t - 1;
1820 [[fallthrough]];
1821 case 2:
1822 if (*t++ == pat) return t - 1;
1823 [[fallthrough]];
1824 case 1:
1825 if (*t++ == pat) return t - 1;
1826 }
1827 while (textend != t) {
1828 if (t[0] == pat) return t;
1829 if (t[1] == pat) return t + 1;
1830 if (t[2] == pat) return t + 2;
1831 if (t[3] == pat) return t + 3;
1832 if (t[4] == pat) return t + 4;
1833 if (t[5] == pat) return t + 5;
1834 if (t[6] == pat) return t + 6;
1835 if (t[7] == pat) return t + 7;
1836 t += 8;
1837 }
1838 return nullptr;
1839 }
1840
FirstCharMatcher8bit(const char * text,uint32_t n,const char pat)1841 static const char* FirstCharMatcher8bit(const char* text, uint32_t n,
1842 const char pat) {
1843 return reinterpret_cast<const char*>(memchr(text, pat, n));
1844 }
1845
1846 template <class InnerMatch, typename TextChar, typename PatChar>
Matcher(const TextChar * text,uint32_t textlen,const PatChar * pat,uint32_t patlen)1847 static int Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat,
1848 uint32_t patlen) {
1849 MOZ_ASSERT(patlen > 0);
1850
1851 if (sizeof(TextChar) == 1 && sizeof(PatChar) > 1 && pat[0] > 0xff) {
1852 return -1;
1853 }
1854
1855 const typename InnerMatch::Extent extent =
1856 InnerMatch::computeExtent(pat, patlen);
1857
1858 uint32_t i = 0;
1859 uint32_t n = textlen - patlen + 1;
1860 while (i < n) {
1861 const TextChar* pos;
1862
1863 if (sizeof(TextChar) == 1) {
1864 MOZ_ASSERT(pat[0] <= 0xff);
1865 pos = (TextChar*)FirstCharMatcher8bit((char*)text + i, n - i, pat[0]);
1866 } else {
1867 pos = FirstCharMatcherUnrolled(text + i, n - i, char16_t(pat[0]));
1868 }
1869
1870 if (pos == nullptr) {
1871 return -1;
1872 }
1873
1874 i = static_cast<uint32_t>(pos - text);
1875 if (InnerMatch::match(pat + 1, text + i + 1, extent)) {
1876 return i;
1877 }
1878
1879 i += 1;
1880 }
1881 return -1;
1882 }
1883
1884 template <typename TextChar, typename PatChar>
StringMatch(const TextChar * text,uint32_t textLen,const PatChar * pat,uint32_t patLen)1885 static MOZ_ALWAYS_INLINE int StringMatch(const TextChar* text, uint32_t textLen,
1886 const PatChar* pat, uint32_t patLen) {
1887 if (patLen == 0) {
1888 return 0;
1889 }
1890 if (textLen < patLen) {
1891 return -1;
1892 }
1893
1894 #if defined(__i386__) || defined(_M_IX86) || defined(__i386)
1895 /*
1896 * Given enough registers, the unrolled loop below is faster than the
1897 * following loop. 32-bit x86 does not have enough registers.
1898 */
1899 if (patLen == 1) {
1900 const PatChar p0 = *pat;
1901 const TextChar* end = text + textLen;
1902 for (const TextChar* c = text; c != end; ++c) {
1903 if (*c == p0) {
1904 return c - text;
1905 }
1906 }
1907 return -1;
1908 }
1909 #endif
1910
1911 /*
1912 * If the text or pattern string is short, BMH will be more expensive than
1913 * the basic linear scan due to initialization cost and a more complex loop
1914 * body. While the correct threshold is input-dependent, we can make a few
1915 * conservative observations:
1916 * - When |textLen| is "big enough", the initialization time will be
1917 * proportionally small, so the worst-case slowdown is minimized.
1918 * - When |patLen| is "too small", even the best case for BMH will be
1919 * slower than a simple scan for large |textLen| due to the more complex
1920 * loop body of BMH.
1921 * From this, the values for "big enough" and "too small" are determined
1922 * empirically. See bug 526348.
1923 */
1924 if (textLen >= 512 && patLen >= 11 && patLen <= sBMHPatLenMax) {
1925 int index = BoyerMooreHorspool(text, textLen, pat, patLen);
1926 if (index != sBMHBadPattern) {
1927 return index;
1928 }
1929 }
1930
1931 /*
1932 * For big patterns with large potential overlap we want the SIMD-optimized
1933 * speed of memcmp. For small patterns, a simple loop is faster. We also can't
1934 * use memcmp if one of the strings is TwoByte and the other is Latin-1.
1935 */
1936 return (patLen > 128 && std::is_same_v<TextChar, PatChar>)
1937 ? Matcher<MemCmp<TextChar, PatChar>, TextChar, PatChar>(
1938 text, textLen, pat, patLen)
1939 : Matcher<ManualCmp<TextChar, PatChar>, TextChar, PatChar>(
1940 text, textLen, pat, patLen);
1941 }
1942
StringMatch(JSLinearString * text,JSLinearString * pat,uint32_t start=0)1943 static int32_t StringMatch(JSLinearString* text, JSLinearString* pat,
1944 uint32_t start = 0) {
1945 MOZ_ASSERT(start <= text->length());
1946 uint32_t textLen = text->length() - start;
1947 uint32_t patLen = pat->length();
1948
1949 int match;
1950 AutoCheckCannotGC nogc;
1951 if (text->hasLatin1Chars()) {
1952 const Latin1Char* textChars = text->latin1Chars(nogc) + start;
1953 if (pat->hasLatin1Chars()) {
1954 match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1955 } else {
1956 match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1957 }
1958 } else {
1959 const char16_t* textChars = text->twoByteChars(nogc) + start;
1960 if (pat->hasLatin1Chars()) {
1961 match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1962 } else {
1963 match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1964 }
1965 }
1966
1967 return (match == -1) ? -1 : start + match;
1968 }
1969
1970 static const size_t sRopeMatchThresholdRatioLog2 = 4;
1971
StringFindPattern(JSLinearString * text,JSLinearString * pat,size_t start)1972 int js::StringFindPattern(JSLinearString* text, JSLinearString* pat,
1973 size_t start) {
1974 return StringMatch(text, pat, start);
1975 }
1976
1977 // When an algorithm does not need a string represented as a single linear
1978 // array of characters, this range utility may be used to traverse the string a
1979 // sequence of linear arrays of characters. This avoids flattening ropes.
1980 class StringSegmentRange {
1981 // If malloc() shows up in any profiles from this vector, we can add a new
1982 // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx.
1983 using StackVector = JS::GCVector<JSString*, 16>;
1984 Rooted<StackVector> stack;
1985 RootedLinearString cur;
1986
settle(JSString * str)1987 bool settle(JSString* str) {
1988 while (str->isRope()) {
1989 JSRope& rope = str->asRope();
1990 if (!stack.append(rope.rightChild())) {
1991 return false;
1992 }
1993 str = rope.leftChild();
1994 }
1995 cur = &str->asLinear();
1996 return true;
1997 }
1998
1999 public:
StringSegmentRange(JSContext * cx)2000 explicit StringSegmentRange(JSContext* cx)
2001 : stack(cx, StackVector(cx)), cur(cx) {}
2002
init(JSString * str)2003 [[nodiscard]] bool init(JSString* str) {
2004 MOZ_ASSERT(stack.empty());
2005 return settle(str);
2006 }
2007
empty() const2008 bool empty() const { return cur == nullptr; }
2009
front() const2010 JSLinearString* front() const {
2011 MOZ_ASSERT(!cur->isRope());
2012 return cur;
2013 }
2014
popFront()2015 [[nodiscard]] bool popFront() {
2016 MOZ_ASSERT(!empty());
2017 if (stack.empty()) {
2018 cur = nullptr;
2019 return true;
2020 }
2021 return settle(stack.popCopy());
2022 }
2023 };
2024
2025 typedef Vector<JSLinearString*, 16, SystemAllocPolicy> LinearStringVector;
2026
2027 template <typename TextChar, typename PatChar>
RopeMatchImpl(const AutoCheckCannotGC & nogc,LinearStringVector & strings,const PatChar * pat,size_t patLen)2028 static int RopeMatchImpl(const AutoCheckCannotGC& nogc,
2029 LinearStringVector& strings, const PatChar* pat,
2030 size_t patLen) {
2031 /* Absolute offset from the beginning of the logical text string. */
2032 int pos = 0;
2033
2034 for (JSLinearString** outerp = strings.begin(); outerp != strings.end();
2035 ++outerp) {
2036 /* Try to find a match within 'outer'. */
2037 JSLinearString* outer = *outerp;
2038 const TextChar* chars = outer->chars<TextChar>(nogc);
2039 size_t len = outer->length();
2040 int matchResult = StringMatch(chars, len, pat, patLen);
2041 if (matchResult != -1) {
2042 /* Matched! */
2043 return pos + matchResult;
2044 }
2045
2046 /* Try to find a match starting in 'outer' and running into other nodes. */
2047 const TextChar* const text = chars + (patLen > len ? 0 : len - patLen + 1);
2048 const TextChar* const textend = chars + len;
2049 const PatChar p0 = *pat;
2050 const PatChar* const p1 = pat + 1;
2051 const PatChar* const patend = pat + patLen;
2052 for (const TextChar* t = text; t != textend;) {
2053 if (*t++ != p0) {
2054 continue;
2055 }
2056
2057 JSLinearString** innerp = outerp;
2058 const TextChar* ttend = textend;
2059 const TextChar* tt = t;
2060 for (const PatChar* pp = p1; pp != patend; ++pp, ++tt) {
2061 while (tt == ttend) {
2062 if (++innerp == strings.end()) {
2063 return -1;
2064 }
2065
2066 JSLinearString* inner = *innerp;
2067 tt = inner->chars<TextChar>(nogc);
2068 ttend = tt + inner->length();
2069 }
2070 if (*pp != *tt) {
2071 goto break_continue;
2072 }
2073 }
2074
2075 /* Matched! */
2076 return pos + (t - chars) - 1; /* -1 because of *t++ above */
2077
2078 break_continue:;
2079 }
2080
2081 pos += len;
2082 }
2083
2084 return -1;
2085 }
2086
2087 /*
2088 * RopeMatch takes the text to search and the pattern to search for in the text.
2089 * RopeMatch returns false on OOM and otherwise returns the match index through
2090 * the 'match' outparam (-1 for not found).
2091 */
RopeMatch(JSContext * cx,JSRope * text,JSLinearString * pat,int * match)2092 static bool RopeMatch(JSContext* cx, JSRope* text, JSLinearString* pat,
2093 int* match) {
2094 uint32_t patLen = pat->length();
2095 if (patLen == 0) {
2096 *match = 0;
2097 return true;
2098 }
2099 if (text->length() < patLen) {
2100 *match = -1;
2101 return true;
2102 }
2103
2104 /*
2105 * List of leaf nodes in the rope. If we run out of memory when trying to
2106 * append to this list, we can still fall back to StringMatch, so use the
2107 * system allocator so we don't report OOM in that case.
2108 */
2109 LinearStringVector strings;
2110
2111 /*
2112 * We don't want to do rope matching if there is a poor node-to-char ratio,
2113 * since this means spending a lot of time in the match loop below. We also
2114 * need to build the list of leaf nodes. Do both here: iterate over the
2115 * nodes so long as there are not too many.
2116 *
2117 * We also don't use rope matching if the rope contains both Latin-1 and
2118 * TwoByte nodes, to simplify the match algorithm.
2119 */
2120 {
2121 size_t threshold = text->length() >> sRopeMatchThresholdRatioLog2;
2122 StringSegmentRange r(cx);
2123 if (!r.init(text)) {
2124 return false;
2125 }
2126
2127 bool textIsLatin1 = text->hasLatin1Chars();
2128 while (!r.empty()) {
2129 if (threshold-- == 0 || r.front()->hasLatin1Chars() != textIsLatin1 ||
2130 !strings.append(r.front())) {
2131 JSLinearString* linear = text->ensureLinear(cx);
2132 if (!linear) {
2133 return false;
2134 }
2135
2136 *match = StringMatch(linear, pat);
2137 return true;
2138 }
2139 if (!r.popFront()) {
2140 return false;
2141 }
2142 }
2143 }
2144
2145 AutoCheckCannotGC nogc;
2146 if (text->hasLatin1Chars()) {
2147 if (pat->hasLatin1Chars()) {
2148 *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->latin1Chars(nogc),
2149 patLen);
2150 } else {
2151 *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->twoByteChars(nogc),
2152 patLen);
2153 }
2154 } else {
2155 if (pat->hasLatin1Chars()) {
2156 *match = RopeMatchImpl<char16_t>(nogc, strings, pat->latin1Chars(nogc),
2157 patLen);
2158 } else {
2159 *match = RopeMatchImpl<char16_t>(nogc, strings, pat->twoByteChars(nogc),
2160 patLen);
2161 }
2162 }
2163
2164 return true;
2165 }
2166
ReportErrorIfFirstArgIsRegExp(JSContext * cx,const CallArgs & args)2167 static MOZ_ALWAYS_INLINE bool ReportErrorIfFirstArgIsRegExp(
2168 JSContext* cx, const CallArgs& args) {
2169 // Only call IsRegExp if the first argument is definitely an object, so we
2170 // don't pay the cost of an additional function call in the common case.
2171 if (args.length() == 0 || !args[0].isObject()) {
2172 return true;
2173 }
2174
2175 bool isRegExp;
2176 if (!IsRegExp(cx, args[0], &isRegExp)) {
2177 return false;
2178 }
2179
2180 if (isRegExp) {
2181 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
2182 JSMSG_INVALID_ARG_TYPE, "first", "",
2183 "Regular Expression");
2184 return false;
2185 }
2186 return true;
2187 }
2188
2189 // ES2018 draft rev de77aaeffce115deaf948ed30c7dbe4c60983c0c
2190 // 21.1.3.7 String.prototype.includes ( searchString [ , position ] )
str_includes(JSContext * cx,unsigned argc,Value * vp)2191 bool js::str_includes(JSContext* cx, unsigned argc, Value* vp) {
2192 CallArgs args = CallArgsFromVp(argc, vp);
2193
2194 // Steps 1-2.
2195 RootedString str(cx, ToStringForStringFunction(cx, "includes", args.thisv()));
2196 if (!str) {
2197 return false;
2198 }
2199
2200 // Steps 3-4.
2201 if (!ReportErrorIfFirstArgIsRegExp(cx, args)) {
2202 return false;
2203 }
2204
2205 // Step 5.
2206 RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2207 if (!searchStr) {
2208 return false;
2209 }
2210
2211 // Step 6.
2212 uint32_t pos = 0;
2213 if (args.hasDefined(1)) {
2214 if (args[1].isInt32()) {
2215 int i = args[1].toInt32();
2216 pos = (i < 0) ? 0U : uint32_t(i);
2217 } else {
2218 double d;
2219 if (!ToInteger(cx, args[1], &d)) {
2220 return false;
2221 }
2222 pos = uint32_t(std::min(std::max(d, 0.0), double(UINT32_MAX)));
2223 }
2224 }
2225
2226 // Step 7.
2227 uint32_t textLen = str->length();
2228
2229 // Step 8.
2230 uint32_t start = std::min(pos, textLen);
2231
2232 // Steps 9-10.
2233 JSLinearString* text = str->ensureLinear(cx);
2234 if (!text) {
2235 return false;
2236 }
2237
2238 args.rval().setBoolean(StringMatch(text, searchStr, start) != -1);
2239 return true;
2240 }
2241
2242 /* ES6 20120927 draft 15.5.4.7. */
str_indexOf(JSContext * cx,unsigned argc,Value * vp)2243 bool js::str_indexOf(JSContext* cx, unsigned argc, Value* vp) {
2244 CallArgs args = CallArgsFromVp(argc, vp);
2245
2246 // Steps 1, 2, and 3
2247 RootedString str(cx, ToStringForStringFunction(cx, "indexOf", args.thisv()));
2248 if (!str) {
2249 return false;
2250 }
2251
2252 // Steps 4 and 5
2253 RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2254 if (!searchStr) {
2255 return false;
2256 }
2257
2258 // Steps 6 and 7
2259 uint32_t pos = 0;
2260 if (args.hasDefined(1)) {
2261 if (args[1].isInt32()) {
2262 int i = args[1].toInt32();
2263 pos = (i < 0) ? 0U : uint32_t(i);
2264 } else {
2265 double d;
2266 if (!ToInteger(cx, args[1], &d)) {
2267 return false;
2268 }
2269 pos = uint32_t(std::min(std::max(d, 0.0), double(UINT32_MAX)));
2270 }
2271 }
2272
2273 // Step 8
2274 uint32_t textLen = str->length();
2275
2276 // Step 9
2277 uint32_t start = std::min(pos, textLen);
2278
2279 if (str == searchStr) {
2280 // AngularJS often invokes "false".indexOf("false"). This check should
2281 // be cheap enough to not hurt anything else.
2282 args.rval().setInt32(start == 0 ? 0 : -1);
2283 return true;
2284 }
2285
2286 // Steps 10 and 11
2287 JSLinearString* text = str->ensureLinear(cx);
2288 if (!text) {
2289 return false;
2290 }
2291
2292 args.rval().setInt32(StringMatch(text, searchStr, start));
2293 return true;
2294 }
2295
2296 template <typename TextChar, typename PatChar>
LastIndexOfImpl(const TextChar * text,size_t textLen,const PatChar * pat,size_t patLen,size_t start)2297 static int32_t LastIndexOfImpl(const TextChar* text, size_t textLen,
2298 const PatChar* pat, size_t patLen,
2299 size_t start) {
2300 MOZ_ASSERT(patLen > 0);
2301 MOZ_ASSERT(patLen <= textLen);
2302 MOZ_ASSERT(start <= textLen - patLen);
2303
2304 const PatChar p0 = *pat;
2305 const PatChar* patNext = pat + 1;
2306 const PatChar* patEnd = pat + patLen;
2307
2308 for (const TextChar* t = text + start; t >= text; --t) {
2309 if (*t == p0) {
2310 const TextChar* t1 = t + 1;
2311 for (const PatChar* p1 = patNext; p1 < patEnd; ++p1, ++t1) {
2312 if (*t1 != *p1) {
2313 goto break_continue;
2314 }
2315 }
2316
2317 return static_cast<int32_t>(t - text);
2318 }
2319 break_continue:;
2320 }
2321
2322 return -1;
2323 }
2324
2325 // ES2017 draft rev 6859bb9ccaea9c6ede81d71e5320e3833b92cb3e
2326 // 21.1.3.9 String.prototype.lastIndexOf ( searchString [ , position ] )
str_lastIndexOf(JSContext * cx,unsigned argc,Value * vp)2327 static bool str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp) {
2328 CallArgs args = CallArgsFromVp(argc, vp);
2329
2330 // Steps 1-2.
2331 RootedString str(cx,
2332 ToStringForStringFunction(cx, "lastIndexOf", args.thisv()));
2333 if (!str) {
2334 return false;
2335 }
2336
2337 // Step 3.
2338 RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2339 if (!searchStr) {
2340 return false;
2341 }
2342
2343 // Step 6.
2344 size_t len = str->length();
2345
2346 // Step 8.
2347 size_t searchLen = searchStr->length();
2348
2349 // Steps 4-5, 7.
2350 int start = len - searchLen; // Start searching here
2351 if (args.hasDefined(1)) {
2352 if (args[1].isInt32()) {
2353 int i = args[1].toInt32();
2354 if (i <= 0) {
2355 start = 0;
2356 } else if (i < start) {
2357 start = i;
2358 }
2359 } else {
2360 double d;
2361 if (!ToNumber(cx, args[1], &d)) {
2362 return false;
2363 }
2364 if (!IsNaN(d)) {
2365 d = JS::ToInteger(d);
2366 if (d <= 0) {
2367 start = 0;
2368 } else if (d < start) {
2369 start = int(d);
2370 }
2371 }
2372 }
2373 }
2374
2375 if (str == searchStr) {
2376 args.rval().setInt32(0);
2377 return true;
2378 }
2379
2380 if (searchLen > len) {
2381 args.rval().setInt32(-1);
2382 return true;
2383 }
2384
2385 if (searchLen == 0) {
2386 args.rval().setInt32(start);
2387 return true;
2388 }
2389 MOZ_ASSERT(0 <= start && size_t(start) < len);
2390
2391 JSLinearString* text = str->ensureLinear(cx);
2392 if (!text) {
2393 return false;
2394 }
2395
2396 // Step 9.
2397 int32_t res;
2398 AutoCheckCannotGC nogc;
2399 if (text->hasLatin1Chars()) {
2400 const Latin1Char* textChars = text->latin1Chars(nogc);
2401 if (searchStr->hasLatin1Chars()) {
2402 res = LastIndexOfImpl(textChars, len, searchStr->latin1Chars(nogc),
2403 searchLen, start);
2404 } else {
2405 res = LastIndexOfImpl(textChars, len, searchStr->twoByteChars(nogc),
2406 searchLen, start);
2407 }
2408 } else {
2409 const char16_t* textChars = text->twoByteChars(nogc);
2410 if (searchStr->hasLatin1Chars()) {
2411 res = LastIndexOfImpl(textChars, len, searchStr->latin1Chars(nogc),
2412 searchLen, start);
2413 } else {
2414 res = LastIndexOfImpl(textChars, len, searchStr->twoByteChars(nogc),
2415 searchLen, start);
2416 }
2417 }
2418
2419 args.rval().setInt32(res);
2420 return true;
2421 }
2422
2423 // ES2018 draft rev de77aaeffce115deaf948ed30c7dbe4c60983c0c
2424 // 21.1.3.20 String.prototype.startsWith ( searchString [ , position ] )
str_startsWith(JSContext * cx,unsigned argc,Value * vp)2425 bool js::str_startsWith(JSContext* cx, unsigned argc, Value* vp) {
2426 CallArgs args = CallArgsFromVp(argc, vp);
2427
2428 // Steps 1-2.
2429 RootedString str(cx,
2430 ToStringForStringFunction(cx, "startsWith", args.thisv()));
2431 if (!str) {
2432 return false;
2433 }
2434
2435 // Steps 3-4.
2436 if (!ReportErrorIfFirstArgIsRegExp(cx, args)) {
2437 return false;
2438 }
2439
2440 // Step 5.
2441 RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2442 if (!searchStr) {
2443 return false;
2444 }
2445
2446 // Step 6.
2447 uint32_t pos = 0;
2448 if (args.hasDefined(1)) {
2449 if (args[1].isInt32()) {
2450 int i = args[1].toInt32();
2451 pos = (i < 0) ? 0U : uint32_t(i);
2452 } else {
2453 double d;
2454 if (!ToInteger(cx, args[1], &d)) {
2455 return false;
2456 }
2457 pos = uint32_t(std::min(std::max(d, 0.0), double(UINT32_MAX)));
2458 }
2459 }
2460
2461 // Step 7.
2462 uint32_t textLen = str->length();
2463
2464 // Step 8.
2465 uint32_t start = std::min(pos, textLen);
2466
2467 // Step 9.
2468 uint32_t searchLen = searchStr->length();
2469
2470 // Step 10.
2471 if (searchLen + start < searchLen || searchLen + start > textLen) {
2472 args.rval().setBoolean(false);
2473 return true;
2474 }
2475
2476 // Steps 11-12.
2477 JSLinearString* text = str->ensureLinear(cx);
2478 if (!text) {
2479 return false;
2480 }
2481
2482 args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
2483 return true;
2484 }
2485
2486 // ES2018 draft rev de77aaeffce115deaf948ed30c7dbe4c60983c0c
2487 // 21.1.3.6 String.prototype.endsWith ( searchString [ , endPosition ] )
str_endsWith(JSContext * cx,unsigned argc,Value * vp)2488 bool js::str_endsWith(JSContext* cx, unsigned argc, Value* vp) {
2489 CallArgs args = CallArgsFromVp(argc, vp);
2490
2491 // Steps 1-2.
2492 RootedString str(cx, ToStringForStringFunction(cx, "endsWith", args.thisv()));
2493 if (!str) {
2494 return false;
2495 }
2496
2497 // Steps 3-4.
2498 if (!ReportErrorIfFirstArgIsRegExp(cx, args)) {
2499 return false;
2500 }
2501
2502 // Step 5.
2503 RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2504 if (!searchStr) {
2505 return false;
2506 }
2507
2508 // Step 6.
2509 uint32_t textLen = str->length();
2510
2511 // Step 7.
2512 uint32_t pos = textLen;
2513 if (args.hasDefined(1)) {
2514 if (args[1].isInt32()) {
2515 int i = args[1].toInt32();
2516 pos = (i < 0) ? 0U : uint32_t(i);
2517 } else {
2518 double d;
2519 if (!ToInteger(cx, args[1], &d)) {
2520 return false;
2521 }
2522 pos = uint32_t(std::min(std::max(d, 0.0), double(UINT32_MAX)));
2523 }
2524 }
2525
2526 // Step 8.
2527 uint32_t end = std::min(pos, textLen);
2528
2529 // Step 9.
2530 uint32_t searchLen = searchStr->length();
2531
2532 // Step 11 (reordered).
2533 if (searchLen > end) {
2534 args.rval().setBoolean(false);
2535 return true;
2536 }
2537
2538 // Step 10.
2539 uint32_t start = end - searchLen;
2540
2541 // Steps 12-13.
2542 JSLinearString* text = str->ensureLinear(cx);
2543 if (!text) {
2544 return false;
2545 }
2546
2547 args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
2548 return true;
2549 }
2550
2551 template <typename CharT>
TrimString(const CharT * chars,bool trimStart,bool trimEnd,size_t length,size_t * pBegin,size_t * pEnd)2552 static void TrimString(const CharT* chars, bool trimStart, bool trimEnd,
2553 size_t length, size_t* pBegin, size_t* pEnd) {
2554 size_t begin = 0, end = length;
2555
2556 if (trimStart) {
2557 while (begin < length && unicode::IsSpace(chars[begin])) {
2558 ++begin;
2559 }
2560 }
2561
2562 if (trimEnd) {
2563 while (end > begin && unicode::IsSpace(chars[end - 1])) {
2564 --end;
2565 }
2566 }
2567
2568 *pBegin = begin;
2569 *pEnd = end;
2570 }
2571
TrimString(JSContext * cx,const CallArgs & args,const char * funName,bool trimStart,bool trimEnd)2572 static bool TrimString(JSContext* cx, const CallArgs& args, const char* funName,
2573 bool trimStart, bool trimEnd) {
2574 JSString* str = ToStringForStringFunction(cx, funName, args.thisv());
2575 if (!str) {
2576 return false;
2577 }
2578
2579 JSLinearString* linear = str->ensureLinear(cx);
2580 if (!linear) {
2581 return false;
2582 }
2583
2584 size_t length = linear->length();
2585 size_t begin, end;
2586 if (linear->hasLatin1Chars()) {
2587 AutoCheckCannotGC nogc;
2588 TrimString(linear->latin1Chars(nogc), trimStart, trimEnd, length, &begin,
2589 &end);
2590 } else {
2591 AutoCheckCannotGC nogc;
2592 TrimString(linear->twoByteChars(nogc), trimStart, trimEnd, length, &begin,
2593 &end);
2594 }
2595
2596 JSLinearString* result = NewDependentString(cx, linear, begin, end - begin);
2597 if (!result) {
2598 return false;
2599 }
2600
2601 args.rval().setString(result);
2602 return true;
2603 }
2604
str_trim(JSContext * cx,unsigned argc,Value * vp)2605 static bool str_trim(JSContext* cx, unsigned argc, Value* vp) {
2606 CallArgs args = CallArgsFromVp(argc, vp);
2607 return TrimString(cx, args, "trim", true, true);
2608 }
2609
str_trimStart(JSContext * cx,unsigned argc,Value * vp)2610 static bool str_trimStart(JSContext* cx, unsigned argc, Value* vp) {
2611 CallArgs args = CallArgsFromVp(argc, vp);
2612 return TrimString(cx, args, "trimStart", true, false);
2613 }
2614
str_trimEnd(JSContext * cx,unsigned argc,Value * vp)2615 static bool str_trimEnd(JSContext* cx, unsigned argc, Value* vp) {
2616 CallArgs args = CallArgsFromVp(argc, vp);
2617 return TrimString(cx, args, "trimEnd", false, true);
2618 }
2619
2620 // Utility for building a rope (lazy concatenation) of strings.
2621 class RopeBuilder {
2622 JSContext* cx;
2623 RootedString res;
2624
2625 RopeBuilder(const RopeBuilder& other) = delete;
2626 void operator=(const RopeBuilder& other) = delete;
2627
2628 public:
RopeBuilder(JSContext * cx)2629 explicit RopeBuilder(JSContext* cx)
2630 : cx(cx), res(cx, cx->runtime()->emptyString) {}
2631
append(HandleString str)2632 inline bool append(HandleString str) {
2633 res = ConcatStrings<CanGC>(cx, res, str);
2634 return !!res;
2635 }
2636
result()2637 inline JSString* result() { return res; }
2638 };
2639
2640 namespace {
2641
2642 template <typename CharT>
FindDollarIndex(const CharT * chars,size_t length)2643 static uint32_t FindDollarIndex(const CharT* chars, size_t length) {
2644 if (const CharT* p = js_strchr_limit(chars, '$', chars + length)) {
2645 uint32_t dollarIndex = p - chars;
2646 MOZ_ASSERT(dollarIndex < length);
2647 return dollarIndex;
2648 }
2649 return UINT32_MAX;
2650 }
2651
2652 } /* anonymous namespace */
2653
2654 /*
2655 * Constructs a result string that looks like:
2656 *
2657 * newstring = string[:matchStart] + repstr + string[matchEnd:]
2658 */
BuildFlatReplacement(JSContext * cx,HandleString textstr,HandleLinearString repstr,size_t matchStart,size_t patternLength)2659 static JSString* BuildFlatReplacement(JSContext* cx, HandleString textstr,
2660 HandleLinearString repstr,
2661 size_t matchStart, size_t patternLength) {
2662 size_t matchEnd = matchStart + patternLength;
2663
2664 RootedString resultStr(cx, NewDependentString(cx, textstr, 0, matchStart));
2665 if (!resultStr) {
2666 return nullptr;
2667 }
2668
2669 resultStr = ConcatStrings<CanGC>(cx, resultStr, repstr);
2670 if (!resultStr) {
2671 return nullptr;
2672 }
2673
2674 MOZ_ASSERT(textstr->length() >= matchEnd);
2675 RootedString rest(cx, NewDependentString(cx, textstr, matchEnd,
2676 textstr->length() - matchEnd));
2677 if (!rest) {
2678 return nullptr;
2679 }
2680
2681 return ConcatStrings<CanGC>(cx, resultStr, rest);
2682 }
2683
BuildFlatRopeReplacement(JSContext * cx,HandleString textstr,HandleLinearString repstr,size_t match,size_t patternLength)2684 static JSString* BuildFlatRopeReplacement(JSContext* cx, HandleString textstr,
2685 HandleLinearString repstr,
2686 size_t match, size_t patternLength) {
2687 MOZ_ASSERT(textstr->isRope());
2688
2689 size_t matchEnd = match + patternLength;
2690
2691 /*
2692 * If we are replacing over a rope, avoid flattening it by iterating
2693 * through it, building a new rope.
2694 */
2695 StringSegmentRange r(cx);
2696 if (!r.init(textstr)) {
2697 return nullptr;
2698 }
2699
2700 RopeBuilder builder(cx);
2701
2702 /*
2703 * Special case when the pattern string is '', which matches to the
2704 * head of the string and doesn't overlap with any component of the rope.
2705 */
2706 if (patternLength == 0) {
2707 MOZ_ASSERT(match == 0);
2708 if (!builder.append(repstr)) {
2709 return nullptr;
2710 }
2711 }
2712
2713 size_t pos = 0;
2714 while (!r.empty()) {
2715 RootedString str(cx, r.front());
2716 size_t len = str->length();
2717 size_t strEnd = pos + len;
2718 if (pos < matchEnd && strEnd > match) {
2719 /*
2720 * We need to special-case any part of the rope that overlaps
2721 * with the replacement string.
2722 */
2723 if (match >= pos) {
2724 /*
2725 * If this part of the rope overlaps with the left side of
2726 * the pattern, then it must be the only one to overlap with
2727 * the first character in the pattern, so we include the
2728 * replacement string here.
2729 */
2730 RootedString leftSide(cx, NewDependentString(cx, str, 0, match - pos));
2731 if (!leftSide || !builder.append(leftSide) || !builder.append(repstr)) {
2732 return nullptr;
2733 }
2734 }
2735
2736 /*
2737 * If str runs off the end of the matched string, append the
2738 * last part of str.
2739 */
2740 if (strEnd > matchEnd) {
2741 RootedString rightSide(
2742 cx, NewDependentString(cx, str, matchEnd - pos, strEnd - matchEnd));
2743 if (!rightSide || !builder.append(rightSide)) {
2744 return nullptr;
2745 }
2746 }
2747 } else {
2748 if (!builder.append(str)) {
2749 return nullptr;
2750 }
2751 }
2752 pos += str->length();
2753 if (!r.popFront()) {
2754 return nullptr;
2755 }
2756 }
2757
2758 return builder.result();
2759 }
2760
2761 template <typename CharT>
AppendDollarReplacement(StringBuffer & newReplaceChars,size_t firstDollarIndex,size_t matchStart,size_t matchLimit,JSLinearString * text,const CharT * repChars,size_t repLength)2762 static bool AppendDollarReplacement(StringBuffer& newReplaceChars,
2763 size_t firstDollarIndex, size_t matchStart,
2764 size_t matchLimit, JSLinearString* text,
2765 const CharT* repChars, size_t repLength) {
2766 MOZ_ASSERT(firstDollarIndex < repLength);
2767 MOZ_ASSERT(matchStart <= matchLimit);
2768 MOZ_ASSERT(matchLimit <= text->length());
2769
2770 // Move the pre-dollar chunk in bulk.
2771 if (!newReplaceChars.append(repChars, firstDollarIndex)) {
2772 return false;
2773 }
2774
2775 // Move the rest char-by-char, interpreting dollars as we encounter them.
2776 const CharT* repLimit = repChars + repLength;
2777 for (const CharT* it = repChars + firstDollarIndex; it < repLimit; ++it) {
2778 if (*it != '$' || it == repLimit - 1) {
2779 if (!newReplaceChars.append(*it)) {
2780 return false;
2781 }
2782 continue;
2783 }
2784
2785 switch (*(it + 1)) {
2786 case '$':
2787 // Eat one of the dollars.
2788 if (!newReplaceChars.append(*it)) {
2789 return false;
2790 }
2791 break;
2792 case '&':
2793 if (!newReplaceChars.appendSubstring(text, matchStart,
2794 matchLimit - matchStart)) {
2795 return false;
2796 }
2797 break;
2798 case '`':
2799 if (!newReplaceChars.appendSubstring(text, 0, matchStart)) {
2800 return false;
2801 }
2802 break;
2803 case '\'':
2804 if (!newReplaceChars.appendSubstring(text, matchLimit,
2805 text->length() - matchLimit)) {
2806 return false;
2807 }
2808 break;
2809 default:
2810 // The dollar we saw was not special (no matter what its mother told
2811 // it).
2812 if (!newReplaceChars.append(*it)) {
2813 return false;
2814 }
2815 continue;
2816 }
2817 ++it; // We always eat an extra char in the above switch.
2818 }
2819
2820 return true;
2821 }
2822
2823 /*
2824 * Perform a linear-scan dollar substitution on the replacement text.
2825 */
InterpretDollarReplacement(JSContext * cx,HandleString textstrArg,HandleLinearString repstr,uint32_t firstDollarIndex,size_t matchStart,size_t patternLength)2826 static JSLinearString* InterpretDollarReplacement(
2827 JSContext* cx, HandleString textstrArg, HandleLinearString repstr,
2828 uint32_t firstDollarIndex, size_t matchStart, size_t patternLength) {
2829 RootedLinearString textstr(cx, textstrArg->ensureLinear(cx));
2830 if (!textstr) {
2831 return nullptr;
2832 }
2833
2834 size_t matchLimit = matchStart + patternLength;
2835
2836 /*
2837 * Most probably:
2838 *
2839 * len(newstr) >= len(orig) - len(match) + len(replacement)
2840 *
2841 * Note that dollar vars _could_ make the resulting text smaller than this.
2842 */
2843 JSStringBuilder newReplaceChars(cx);
2844 if (repstr->hasTwoByteChars() && !newReplaceChars.ensureTwoByteChars()) {
2845 return nullptr;
2846 }
2847
2848 if (!newReplaceChars.reserve(textstr->length() - patternLength +
2849 repstr->length())) {
2850 return nullptr;
2851 }
2852
2853 bool res;
2854 if (repstr->hasLatin1Chars()) {
2855 AutoCheckCannotGC nogc;
2856 res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, matchStart,
2857 matchLimit, textstr,
2858 repstr->latin1Chars(nogc), repstr->length());
2859 } else {
2860 AutoCheckCannotGC nogc;
2861 res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, matchStart,
2862 matchLimit, textstr,
2863 repstr->twoByteChars(nogc), repstr->length());
2864 }
2865 if (!res) {
2866 return nullptr;
2867 }
2868
2869 return newReplaceChars.finishString();
2870 }
2871
2872 template <typename StrChar, typename RepChar>
StrFlatReplaceGlobal(JSContext * cx,JSLinearString * str,JSLinearString * pat,JSLinearString * rep,StringBuffer & sb)2873 static bool StrFlatReplaceGlobal(JSContext* cx, JSLinearString* str,
2874 JSLinearString* pat, JSLinearString* rep,
2875 StringBuffer& sb) {
2876 MOZ_ASSERT(str->length() > 0);
2877
2878 AutoCheckCannotGC nogc;
2879 const StrChar* strChars = str->chars<StrChar>(nogc);
2880 const RepChar* repChars = rep->chars<RepChar>(nogc);
2881
2882 // The pattern is empty, so we interleave the replacement string in-between
2883 // each character.
2884 if (!pat->length()) {
2885 CheckedInt<uint32_t> strLength(str->length());
2886 CheckedInt<uint32_t> repLength(rep->length());
2887 CheckedInt<uint32_t> length = repLength * (strLength - 1) + strLength;
2888 if (!length.isValid()) {
2889 ReportAllocationOverflow(cx);
2890 return false;
2891 }
2892
2893 if (!sb.reserve(length.value())) {
2894 return false;
2895 }
2896
2897 for (unsigned i = 0; i < str->length() - 1; ++i, ++strChars) {
2898 sb.infallibleAppend(*strChars);
2899 sb.infallibleAppend(repChars, rep->length());
2900 }
2901 sb.infallibleAppend(*strChars);
2902 return true;
2903 }
2904
2905 // If it's true, we are sure that the result's length is, at least, the same
2906 // length as |str->length()|.
2907 if (rep->length() >= pat->length()) {
2908 if (!sb.reserve(str->length())) {
2909 return false;
2910 }
2911 }
2912
2913 uint32_t start = 0;
2914 for (;;) {
2915 int match = StringMatch(str, pat, start);
2916 if (match < 0) {
2917 break;
2918 }
2919 if (!sb.append(strChars + start, match - start)) {
2920 return false;
2921 }
2922 if (!sb.append(repChars, rep->length())) {
2923 return false;
2924 }
2925 start = match + pat->length();
2926 }
2927
2928 if (!sb.append(strChars + start, str->length() - start)) {
2929 return false;
2930 }
2931
2932 return true;
2933 }
2934
2935 // This is identical to "str.split(pattern).join(replacement)" except that we
2936 // do some deforestation optimization in Ion.
StringFlatReplaceString(JSContext * cx,HandleString string,HandleString pattern,HandleString replacement)2937 JSString* js::StringFlatReplaceString(JSContext* cx, HandleString string,
2938 HandleString pattern,
2939 HandleString replacement) {
2940 MOZ_ASSERT(string);
2941 MOZ_ASSERT(pattern);
2942 MOZ_ASSERT(replacement);
2943
2944 if (!string->length()) {
2945 return string;
2946 }
2947
2948 RootedLinearString linearRepl(cx, replacement->ensureLinear(cx));
2949 if (!linearRepl) {
2950 return nullptr;
2951 }
2952
2953 RootedLinearString linearPat(cx, pattern->ensureLinear(cx));
2954 if (!linearPat) {
2955 return nullptr;
2956 }
2957
2958 RootedLinearString linearStr(cx, string->ensureLinear(cx));
2959 if (!linearStr) {
2960 return nullptr;
2961 }
2962
2963 JSStringBuilder sb(cx);
2964 if (linearStr->hasTwoByteChars()) {
2965 if (!sb.ensureTwoByteChars()) {
2966 return nullptr;
2967 }
2968 if (linearRepl->hasTwoByteChars()) {
2969 if (!StrFlatReplaceGlobal<char16_t, char16_t>(cx, linearStr, linearPat,
2970 linearRepl, sb)) {
2971 return nullptr;
2972 }
2973 } else {
2974 if (!StrFlatReplaceGlobal<char16_t, Latin1Char>(cx, linearStr, linearPat,
2975 linearRepl, sb)) {
2976 return nullptr;
2977 }
2978 }
2979 } else {
2980 if (linearRepl->hasTwoByteChars()) {
2981 if (!sb.ensureTwoByteChars()) {
2982 return nullptr;
2983 }
2984 if (!StrFlatReplaceGlobal<Latin1Char, char16_t>(cx, linearStr, linearPat,
2985 linearRepl, sb)) {
2986 return nullptr;
2987 }
2988 } else {
2989 if (!StrFlatReplaceGlobal<Latin1Char, Latin1Char>(
2990 cx, linearStr, linearPat, linearRepl, sb)) {
2991 return nullptr;
2992 }
2993 }
2994 }
2995
2996 return sb.finishString();
2997 }
2998
str_replace_string_raw(JSContext * cx,HandleString string,HandleString pattern,HandleString replacement)2999 JSString* js::str_replace_string_raw(JSContext* cx, HandleString string,
3000 HandleString pattern,
3001 HandleString replacement) {
3002 RootedLinearString repl(cx, replacement->ensureLinear(cx));
3003 if (!repl) {
3004 return nullptr;
3005 }
3006
3007 RootedLinearString pat(cx, pattern->ensureLinear(cx));
3008 if (!pat) {
3009 return nullptr;
3010 }
3011
3012 size_t patternLength = pat->length();
3013 int32_t match;
3014 uint32_t dollarIndex;
3015
3016 {
3017 AutoCheckCannotGC nogc;
3018 dollarIndex =
3019 repl->hasLatin1Chars()
3020 ? FindDollarIndex(repl->latin1Chars(nogc), repl->length())
3021 : FindDollarIndex(repl->twoByteChars(nogc), repl->length());
3022 }
3023
3024 /*
3025 * |string| could be a rope, so we want to avoid flattening it for as
3026 * long as possible.
3027 */
3028 if (string->isRope()) {
3029 if (!RopeMatch(cx, &string->asRope(), pat, &match)) {
3030 return nullptr;
3031 }
3032 } else {
3033 match = StringMatch(&string->asLinear(), pat, 0);
3034 }
3035
3036 if (match < 0) {
3037 return string;
3038 }
3039
3040 if (dollarIndex != UINT32_MAX) {
3041 repl = InterpretDollarReplacement(cx, string, repl, dollarIndex, match,
3042 patternLength);
3043 if (!repl) {
3044 return nullptr;
3045 }
3046 } else if (string->isRope()) {
3047 return BuildFlatRopeReplacement(cx, string, repl, match, patternLength);
3048 }
3049 return BuildFlatReplacement(cx, string, repl, match, patternLength);
3050 }
3051
3052 // https://tc39.es/proposal-string-replaceall/#sec-string.prototype.replaceall
3053 // Steps 7-16 when functionalReplace is false and searchString is not empty.
3054 //
3055 // The steps are quite different, for performance. Loops in steps 11 and 14
3056 // are fused. GetSubstitution is optimized away when possible.
3057 template <typename StrChar, typename RepChar>
ReplaceAll(JSContext * cx,JSLinearString * string,JSLinearString * searchString,JSLinearString * replaceString)3058 static JSString* ReplaceAll(JSContext* cx, JSLinearString* string,
3059 JSLinearString* searchString,
3060 JSLinearString* replaceString) {
3061 // Step 7.
3062 const size_t stringLength = string->length();
3063 const size_t searchLength = searchString->length();
3064 const size_t replaceLength = replaceString->length();
3065
3066 MOZ_ASSERT(stringLength > 0);
3067 MOZ_ASSERT(searchLength > 0);
3068 MOZ_ASSERT(stringLength >= searchLength);
3069
3070 // Step 8 (advanceBy is equal to searchLength when searchLength > 0).
3071
3072 // Step 9 (not needed in this implementation).
3073
3074 // Step 10.
3075 // Find the first match.
3076 int32_t position = StringMatch(string, searchString, 0);
3077
3078 // Nothing to replace, so return early.
3079 if (position < 0) {
3080 return string;
3081 }
3082
3083 // Step 11 (moved below).
3084
3085 // Step 12.
3086 uint32_t endOfLastMatch = 0;
3087
3088 // Step 13.
3089 JSStringBuilder result(cx);
3090 if constexpr (std::is_same_v<StrChar, char16_t> ||
3091 std::is_same_v<RepChar, char16_t>) {
3092 if (!result.ensureTwoByteChars()) {
3093 return nullptr;
3094 }
3095 }
3096
3097 {
3098 AutoCheckCannotGC nogc;
3099 const StrChar* strChars = string->chars<StrChar>(nogc);
3100 const RepChar* repChars = replaceString->chars<RepChar>(nogc);
3101
3102 uint32_t dollarIndex = FindDollarIndex(repChars, replaceLength);
3103
3104 // If it's true, we are sure that the result's length is, at least, the same
3105 // length as |str->length()|.
3106 if (replaceLength >= searchLength) {
3107 if (!result.reserve(stringLength)) {
3108 return nullptr;
3109 }
3110 }
3111
3112 do {
3113 // Step 14.c.
3114 // Append the substring before the current match.
3115 if (!result.append(strChars + endOfLastMatch,
3116 position - endOfLastMatch)) {
3117 return nullptr;
3118 }
3119
3120 // Steps 14.a-b and 14.d.
3121 // Append the replacement.
3122 if (dollarIndex != UINT32_MAX) {
3123 size_t matchLimit = position + searchLength;
3124 if (!AppendDollarReplacement(result, dollarIndex, position, matchLimit,
3125 string, repChars, replaceLength)) {
3126 return nullptr;
3127 }
3128 } else {
3129 if (!result.append(repChars, replaceLength)) {
3130 return nullptr;
3131 }
3132 }
3133
3134 // Step 14.e.
3135 endOfLastMatch = position + searchLength;
3136
3137 // Step 11.
3138 // Find the next match.
3139 position = StringMatch(string, searchString, endOfLastMatch);
3140 } while (position >= 0);
3141
3142 // Step 15.
3143 // Append the substring after the last match.
3144 if (!result.append(strChars + endOfLastMatch,
3145 stringLength - endOfLastMatch)) {
3146 return nullptr;
3147 }
3148 }
3149
3150 // Step 16.
3151 return result.finishString();
3152 }
3153
3154 // https://tc39.es/proposal-string-replaceall/#sec-string.prototype.replaceall
3155 // Steps 7-16 when functionalReplace is false and searchString is the empty
3156 // string.
3157 //
3158 // The steps are quite different, for performance. Loops in steps 11 and 14
3159 // are fused. GetSubstitution is optimized away when possible.
3160 template <typename StrChar, typename RepChar>
ReplaceAllInterleave(JSContext * cx,JSLinearString * string,JSLinearString * replaceString)3161 static JSString* ReplaceAllInterleave(JSContext* cx, JSLinearString* string,
3162 JSLinearString* replaceString) {
3163 // Step 7.
3164 const size_t stringLength = string->length();
3165 const size_t replaceLength = replaceString->length();
3166
3167 // Step 8 (advanceBy is 1 when searchString is the empty string).
3168
3169 // Steps 9-12 (trivial when searchString is the empty string).
3170
3171 // Step 13.
3172 JSStringBuilder result(cx);
3173 if constexpr (std::is_same_v<StrChar, char16_t> ||
3174 std::is_same_v<RepChar, char16_t>) {
3175 if (!result.ensureTwoByteChars()) {
3176 return nullptr;
3177 }
3178 }
3179
3180 {
3181 AutoCheckCannotGC nogc;
3182 const StrChar* strChars = string->chars<StrChar>(nogc);
3183 const RepChar* repChars = replaceString->chars<RepChar>(nogc);
3184
3185 uint32_t dollarIndex = FindDollarIndex(repChars, replaceLength);
3186
3187 if (dollarIndex != UINT32_MAX) {
3188 if (!result.reserve(stringLength)) {
3189 return nullptr;
3190 }
3191 } else {
3192 // Compute the exact result length when no substitutions take place.
3193 CheckedInt<uint32_t> strLength(stringLength);
3194 CheckedInt<uint32_t> repLength(replaceLength);
3195 CheckedInt<uint32_t> length = strLength + (strLength + 1) * repLength;
3196 if (!length.isValid()) {
3197 ReportAllocationOverflow(cx);
3198 return nullptr;
3199 }
3200
3201 if (!result.reserve(length.value())) {
3202 return nullptr;
3203 }
3204 }
3205
3206 auto appendReplacement = [&](size_t match) {
3207 if (dollarIndex != UINT32_MAX) {
3208 return AppendDollarReplacement(result, dollarIndex, match, match,
3209 string, repChars, replaceLength);
3210 }
3211 return result.append(repChars, replaceLength);
3212 };
3213
3214 for (size_t index = 0; index < stringLength; index++) {
3215 // Steps 11, 14.a-b and 14.d.
3216 // The empty string matches before each character.
3217 if (!appendReplacement(index)) {
3218 return nullptr;
3219 }
3220
3221 // Step 14.c.
3222 if (!result.append(strChars[index])) {
3223 return nullptr;
3224 }
3225 }
3226
3227 // Steps 11, 14.a-b and 14.d.
3228 // The empty string also matches at the end of the string.
3229 if (!appendReplacement(stringLength)) {
3230 return nullptr;
3231 }
3232
3233 // Step 15 (not applicable when searchString is the empty string).
3234 }
3235
3236 // Step 16.
3237 return result.finishString();
3238 }
3239
3240 // String.prototype.replaceAll (Stage 3 proposal)
3241 // https://tc39.es/proposal-string-replaceall/
3242 //
3243 // String.prototype.replaceAll ( searchValue, replaceValue )
3244 //
3245 // Steps 7-16 when functionalReplace is false.
str_replaceAll_string_raw(JSContext * cx,HandleString string,HandleString searchString,HandleString replaceString)3246 JSString* js::str_replaceAll_string_raw(JSContext* cx, HandleString string,
3247 HandleString searchString,
3248 HandleString replaceString) {
3249 const size_t stringLength = string->length();
3250 const size_t searchLength = searchString->length();
3251
3252 // Directly return when we're guaranteed to find no match.
3253 if (searchLength > stringLength) {
3254 return string;
3255 }
3256
3257 RootedLinearString str(cx, string->ensureLinear(cx));
3258 if (!str) {
3259 return nullptr;
3260 }
3261
3262 RootedLinearString repl(cx, replaceString->ensureLinear(cx));
3263 if (!repl) {
3264 return nullptr;
3265 }
3266
3267 RootedLinearString search(cx, searchString->ensureLinear(cx));
3268 if (!search) {
3269 return nullptr;
3270 }
3271
3272 // The pattern is empty, so we interleave the replacement string in-between
3273 // each character.
3274 if (searchLength == 0) {
3275 if (str->hasTwoByteChars()) {
3276 if (repl->hasTwoByteChars()) {
3277 return ReplaceAllInterleave<char16_t, char16_t>(cx, str, repl);
3278 }
3279 return ReplaceAllInterleave<char16_t, Latin1Char>(cx, str, repl);
3280 }
3281 if (repl->hasTwoByteChars()) {
3282 return ReplaceAllInterleave<Latin1Char, char16_t>(cx, str, repl);
3283 }
3284 return ReplaceAllInterleave<Latin1Char, Latin1Char>(cx, str, repl);
3285 }
3286
3287 MOZ_ASSERT(stringLength > 0);
3288
3289 if (str->hasTwoByteChars()) {
3290 if (repl->hasTwoByteChars()) {
3291 return ReplaceAll<char16_t, char16_t>(cx, str, search, repl);
3292 }
3293 return ReplaceAll<char16_t, Latin1Char>(cx, str, search, repl);
3294 }
3295 if (repl->hasTwoByteChars()) {
3296 return ReplaceAll<Latin1Char, char16_t>(cx, str, search, repl);
3297 }
3298 return ReplaceAll<Latin1Char, Latin1Char>(cx, str, search, repl);
3299 }
3300
SingleElementStringArray(JSContext * cx,HandleLinearString str)3301 static ArrayObject* SingleElementStringArray(JSContext* cx,
3302 HandleLinearString str) {
3303 ArrayObject* array = NewDenseFullyAllocatedArray(cx, 1);
3304 if (!array) {
3305 return nullptr;
3306 }
3307 array->setDenseInitializedLength(1);
3308 array->initDenseElement(0, StringValue(str));
3309 return array;
3310 }
3311
3312 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
SplitHelper(JSContext * cx,HandleLinearString str,uint32_t limit,HandleLinearString sep)3313 static ArrayObject* SplitHelper(JSContext* cx, HandleLinearString str,
3314 uint32_t limit, HandleLinearString sep) {
3315 size_t strLength = str->length();
3316 size_t sepLength = sep->length();
3317 MOZ_ASSERT(sepLength != 0);
3318
3319 // Step 12.
3320 if (strLength == 0) {
3321 // Step 12.a.
3322 int match = StringMatch(str, sep, 0);
3323
3324 // Step 12.b.
3325 if (match != -1) {
3326 return NewDenseEmptyArray(cx);
3327 }
3328
3329 // Steps 12.c-e.
3330 return SingleElementStringArray(cx, str);
3331 }
3332
3333 // Step 3 (reordered).
3334 RootedValueVector splits(cx);
3335
3336 // Step 8 (reordered).
3337 size_t lastEndIndex = 0;
3338
3339 // Step 13.
3340 size_t index = 0;
3341
3342 // Step 14.
3343 while (index != strLength) {
3344 // Step 14.a.
3345 int match = StringMatch(str, sep, index);
3346
3347 // Step 14.b.
3348 //
3349 // Our match algorithm differs from the spec in that it returns the
3350 // next index at which a match happens. If no match happens we're
3351 // done.
3352 //
3353 // But what if the match is at the end of the string (and the string is
3354 // not empty)? Per 14.c.i this shouldn't be a match, so we have to
3355 // specially exclude it. Thus this case should hold:
3356 //
3357 // var a = "abc".split(/\b/);
3358 // assertEq(a.length, 1);
3359 // assertEq(a[0], "abc");
3360 if (match == -1) {
3361 break;
3362 }
3363
3364 // Step 14.c.
3365 size_t endIndex = match + sepLength;
3366
3367 // Step 14.c.i.
3368 if (endIndex == lastEndIndex) {
3369 index++;
3370 continue;
3371 }
3372
3373 // Step 14.c.ii.
3374 MOZ_ASSERT(lastEndIndex < endIndex);
3375 MOZ_ASSERT(sepLength <= strLength);
3376 MOZ_ASSERT(lastEndIndex + sepLength <= endIndex);
3377
3378 // Step 14.c.ii.1.
3379 size_t subLength = size_t(endIndex - sepLength - lastEndIndex);
3380 JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength);
3381
3382 // Steps 14.c.ii.2-4.
3383 if (!sub || !splits.append(StringValue(sub))) {
3384 return nullptr;
3385 }
3386
3387 // Step 14.c.ii.5.
3388 if (splits.length() == limit) {
3389 return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3390 }
3391
3392 // Step 14.c.ii.6.
3393 index = endIndex;
3394
3395 // Step 14.c.ii.7.
3396 lastEndIndex = index;
3397 }
3398
3399 // Step 15.
3400 JSString* sub =
3401 NewDependentString(cx, str, lastEndIndex, strLength - lastEndIndex);
3402
3403 // Steps 16-17.
3404 if (!sub || !splits.append(StringValue(sub))) {
3405 return nullptr;
3406 }
3407
3408 // Step 18.
3409 return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3410 }
3411
3412 // Fast-path for splitting a string into a character array via split("").
CharSplitHelper(JSContext * cx,HandleLinearString str,uint32_t limit)3413 static ArrayObject* CharSplitHelper(JSContext* cx, HandleLinearString str,
3414 uint32_t limit) {
3415 size_t strLength = str->length();
3416 if (strLength == 0) {
3417 return NewDenseEmptyArray(cx);
3418 }
3419
3420 js::StaticStrings& staticStrings = cx->staticStrings();
3421 uint32_t resultlen = (limit < strLength ? limit : strLength);
3422 MOZ_ASSERT(limit > 0 && resultlen > 0,
3423 "Neither limit nor strLength is zero, so resultlen is greater "
3424 "than zero.");
3425
3426 RootedArrayObject splits(cx, NewDenseFullyAllocatedArray(cx, resultlen));
3427 if (!splits) {
3428 return nullptr;
3429 }
3430
3431 if (str->hasLatin1Chars()) {
3432 splits->setDenseInitializedLength(resultlen);
3433
3434 JS::AutoCheckCannotGC nogc;
3435 const Latin1Char* latin1Chars = str->latin1Chars(nogc);
3436 for (size_t i = 0; i < resultlen; ++i) {
3437 Latin1Char c = latin1Chars[i];
3438 MOZ_ASSERT(staticStrings.hasUnit(c));
3439 splits->initDenseElement(i, StringValue(staticStrings.getUnit(c)));
3440 }
3441 } else {
3442 splits->ensureDenseInitializedLength(0, resultlen);
3443
3444 for (size_t i = 0; i < resultlen; ++i) {
3445 JSString* sub = staticStrings.getUnitStringForElement(cx, str, i);
3446 if (!sub) {
3447 return nullptr;
3448 }
3449 splits->initDenseElement(i, StringValue(sub));
3450 }
3451 }
3452
3453 return splits;
3454 }
3455
3456 template <typename TextChar>
SplitSingleCharHelper(JSContext * cx,HandleLinearString str,const TextChar * text,uint32_t textLen,char16_t patCh)3457 static MOZ_ALWAYS_INLINE ArrayObject* SplitSingleCharHelper(
3458 JSContext* cx, HandleLinearString str, const TextChar* text,
3459 uint32_t textLen, char16_t patCh) {
3460 // Count the number of occurrences of patCh within text.
3461 uint32_t count = 0;
3462 for (size_t index = 0; index < textLen; index++) {
3463 if (static_cast<char16_t>(text[index]) == patCh) {
3464 count++;
3465 }
3466 }
3467
3468 // Handle zero-occurrence case - return input string in an array.
3469 if (count == 0) {
3470 return SingleElementStringArray(cx, str);
3471 }
3472
3473 // Create the result array for the substring values.
3474 RootedArrayObject splits(cx, NewDenseFullyAllocatedArray(cx, count + 1));
3475 if (!splits) {
3476 return nullptr;
3477 }
3478 splits->ensureDenseInitializedLength(0, count + 1);
3479
3480 // Add substrings.
3481 uint32_t splitsIndex = 0;
3482 size_t lastEndIndex = 0;
3483 for (size_t index = 0; index < textLen; index++) {
3484 if (static_cast<char16_t>(text[index]) == patCh) {
3485 size_t subLength = size_t(index - lastEndIndex);
3486 JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength);
3487 if (!sub) {
3488 return nullptr;
3489 }
3490 splits->initDenseElement(splitsIndex++, StringValue(sub));
3491 lastEndIndex = index + 1;
3492 }
3493 }
3494
3495 // Add substring for tail of string (after last match).
3496 JSString* sub =
3497 NewDependentString(cx, str, lastEndIndex, textLen - lastEndIndex);
3498 if (!sub) {
3499 return nullptr;
3500 }
3501 splits->initDenseElement(splitsIndex++, StringValue(sub));
3502
3503 return splits;
3504 }
3505
3506 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
SplitSingleCharHelper(JSContext * cx,HandleLinearString str,char16_t ch)3507 static ArrayObject* SplitSingleCharHelper(JSContext* cx, HandleLinearString str,
3508 char16_t ch) {
3509 // Step 12.
3510 size_t strLength = str->length();
3511
3512 AutoStableStringChars linearChars(cx);
3513 if (!linearChars.init(cx, str)) {
3514 return nullptr;
3515 }
3516
3517 if (linearChars.isLatin1()) {
3518 return SplitSingleCharHelper(cx, str, linearChars.latin1Chars(), strLength,
3519 ch);
3520 }
3521
3522 return SplitSingleCharHelper(cx, str, linearChars.twoByteChars(), strLength,
3523 ch);
3524 }
3525
3526 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
StringSplitString(JSContext * cx,HandleString str,HandleString sep,uint32_t limit)3527 ArrayObject* js::StringSplitString(JSContext* cx, HandleString str,
3528 HandleString sep, uint32_t limit) {
3529 MOZ_ASSERT(limit > 0, "Only called for strictly positive limit.");
3530
3531 RootedLinearString linearStr(cx, str->ensureLinear(cx));
3532 if (!linearStr) {
3533 return nullptr;
3534 }
3535
3536 RootedLinearString linearSep(cx, sep->ensureLinear(cx));
3537 if (!linearSep) {
3538 return nullptr;
3539 }
3540
3541 if (linearSep->length() == 0) {
3542 return CharSplitHelper(cx, linearStr, limit);
3543 }
3544
3545 if (linearSep->length() == 1 && limit >= static_cast<uint32_t>(INT32_MAX)) {
3546 char16_t ch = linearSep->latin1OrTwoByteChar(0);
3547 return SplitSingleCharHelper(cx, linearStr, ch);
3548 }
3549
3550 return SplitHelper(cx, linearStr, limit, linearSep);
3551 }
3552
3553 static const JSFunctionSpec string_methods[] = {
3554 JS_FN(js_toSource_str, str_toSource, 0, 0),
3555
3556 /* Java-like methods. */
3557 JS_INLINABLE_FN(js_toString_str, str_toString, 0, 0, StringToString),
3558 JS_INLINABLE_FN(js_valueOf_str, str_toString, 0, 0, StringValueOf),
3559 JS_INLINABLE_FN("toLowerCase", str_toLowerCase, 0, 0, StringToLowerCase),
3560 JS_INLINABLE_FN("toUpperCase", str_toUpperCase, 0, 0, StringToUpperCase),
3561 JS_INLINABLE_FN("charAt", str_charAt, 1, 0, StringCharAt),
3562 JS_INLINABLE_FN("charCodeAt", str_charCodeAt, 1, 0, StringCharCodeAt),
3563 JS_SELF_HOSTED_FN("substring", "String_substring", 2, 0),
3564 JS_SELF_HOSTED_FN("padStart", "String_pad_start", 2, 0),
3565 JS_SELF_HOSTED_FN("padEnd", "String_pad_end", 2, 0),
3566 JS_SELF_HOSTED_FN("codePointAt", "String_codePointAt", 1, 0),
3567 JS_FN("includes", str_includes, 1, 0), JS_FN("indexOf", str_indexOf, 1, 0),
3568 JS_FN("lastIndexOf", str_lastIndexOf, 1, 0),
3569 JS_FN("startsWith", str_startsWith, 1, 0),
3570 JS_FN("endsWith", str_endsWith, 1, 0), JS_FN("trim", str_trim, 0, 0),
3571 JS_FN("trimStart", str_trimStart, 0, 0),
3572 JS_FN("trimEnd", str_trimEnd, 0, 0),
3573 #if JS_HAS_INTL_API
3574 JS_SELF_HOSTED_FN("toLocaleLowerCase", "String_toLocaleLowerCase", 0, 0),
3575 JS_SELF_HOSTED_FN("toLocaleUpperCase", "String_toLocaleUpperCase", 0, 0),
3576 JS_SELF_HOSTED_FN("localeCompare", "String_localeCompare", 1, 0),
3577 #else
3578 JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0, 0),
3579 JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0, 0),
3580 JS_FN("localeCompare", str_localeCompare, 1, 0),
3581 #endif
3582 JS_SELF_HOSTED_FN("repeat", "String_repeat", 1, 0),
3583 #if JS_HAS_INTL_API
3584 JS_FN("normalize", str_normalize, 0, 0),
3585 #endif
3586
3587 /* Perl-ish methods (search is actually Python-esque). */
3588 JS_SELF_HOSTED_FN("match", "String_match", 1, 0),
3589 JS_SELF_HOSTED_FN("matchAll", "String_matchAll", 1, 0),
3590 JS_SELF_HOSTED_FN("search", "String_search", 1, 0),
3591 JS_SELF_HOSTED_FN("replace", "String_replace", 2, 0),
3592 JS_SELF_HOSTED_FN("replaceAll", "String_replaceAll", 2, 0),
3593 JS_SELF_HOSTED_FN("split", "String_split", 2, 0),
3594 JS_SELF_HOSTED_FN("substr", "String_substr", 2, 0),
3595
3596 /* Python-esque sequence methods. */
3597 JS_SELF_HOSTED_FN("concat", "String_concat", 1, 0),
3598 JS_SELF_HOSTED_FN("slice", "String_slice", 2, 0),
3599
3600 JS_SELF_HOSTED_FN("at", "String_at", 1, 0),
3601
3602 /* HTML string methods. */
3603 JS_SELF_HOSTED_FN("bold", "String_bold", 0, 0),
3604 JS_SELF_HOSTED_FN("italics", "String_italics", 0, 0),
3605 JS_SELF_HOSTED_FN("fixed", "String_fixed", 0, 0),
3606 JS_SELF_HOSTED_FN("strike", "String_strike", 0, 0),
3607 JS_SELF_HOSTED_FN("small", "String_small", 0, 0),
3608 JS_SELF_HOSTED_FN("big", "String_big", 0, 0),
3609 JS_SELF_HOSTED_FN("blink", "String_blink", 0, 0),
3610 JS_SELF_HOSTED_FN("sup", "String_sup", 0, 0),
3611 JS_SELF_HOSTED_FN("sub", "String_sub", 0, 0),
3612 JS_SELF_HOSTED_FN("anchor", "String_anchor", 1, 0),
3613 JS_SELF_HOSTED_FN("link", "String_link", 1, 0),
3614 JS_SELF_HOSTED_FN("fontcolor", "String_fontcolor", 1, 0),
3615 JS_SELF_HOSTED_FN("fontsize", "String_fontsize", 1, 0),
3616
3617 JS_SELF_HOSTED_SYM_FN(iterator, "String_iterator", 0, 0), JS_FS_END};
3618
3619 // ES6 rev 27 (2014 Aug 24) 21.1.1
StringConstructor(JSContext * cx,unsigned argc,Value * vp)3620 bool js::StringConstructor(JSContext* cx, unsigned argc, Value* vp) {
3621 CallArgs args = CallArgsFromVp(argc, vp);
3622
3623 RootedString str(cx);
3624 if (args.length() > 0) {
3625 if (!args.isConstructing() && args[0].isSymbol()) {
3626 return js::SymbolDescriptiveString(cx, args[0].toSymbol(), args.rval());
3627 }
3628
3629 str = ToString<CanGC>(cx, args[0]);
3630 if (!str) {
3631 return false;
3632 }
3633 } else {
3634 str = cx->runtime()->emptyString;
3635 }
3636
3637 if (args.isConstructing()) {
3638 RootedObject proto(cx);
3639 if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_String, &proto)) {
3640 return false;
3641 }
3642
3643 StringObject* strobj = StringObject::create(cx, str, proto);
3644 if (!strobj) {
3645 return false;
3646 }
3647 args.rval().setObject(*strobj);
3648 return true;
3649 }
3650
3651 args.rval().setString(str);
3652 return true;
3653 }
3654
str_fromCharCode(JSContext * cx,unsigned argc,Value * vp)3655 bool js::str_fromCharCode(JSContext* cx, unsigned argc, Value* vp) {
3656 CallArgs args = CallArgsFromVp(argc, vp);
3657
3658 MOZ_ASSERT(args.length() <= ARGS_LENGTH_MAX);
3659
3660 // Optimize the single-char case.
3661 if (args.length() == 1) {
3662 return str_fromCharCode_one_arg(cx, args[0], args.rval());
3663 }
3664
3665 // Optimize the case where the result will definitely fit in an inline
3666 // string (thin or fat) and so we don't need to malloc the chars. (We could
3667 // cover some cases where args.length() goes up to
3668 // JSFatInlineString::MAX_LENGTH_LATIN1 if we also checked if the chars are
3669 // all Latin-1, but it doesn't seem worth the effort.)
3670 InlineCharBuffer<char16_t> chars;
3671 if (!chars.maybeAlloc(cx, args.length())) {
3672 return false;
3673 }
3674
3675 char16_t* rawChars = chars.get();
3676 for (unsigned i = 0; i < args.length(); i++) {
3677 uint16_t code;
3678 if (!ToUint16(cx, args[i], &code)) {
3679 return false;
3680 }
3681
3682 rawChars[i] = char16_t(code);
3683 }
3684
3685 JSString* str = chars.toString(cx, args.length());
3686 if (!str) {
3687 return false;
3688 }
3689
3690 args.rval().setString(str);
3691 return true;
3692 }
3693
CodeUnitToString(JSContext * cx,uint16_t ucode,MutableHandleValue rval)3694 static inline bool CodeUnitToString(JSContext* cx, uint16_t ucode,
3695 MutableHandleValue rval) {
3696 if (StaticStrings::hasUnit(ucode)) {
3697 rval.setString(cx->staticStrings().getUnit(ucode));
3698 return true;
3699 }
3700
3701 char16_t c = char16_t(ucode);
3702 JSString* str = NewStringCopyNDontDeflate<CanGC>(cx, &c, 1);
3703 if (!str) {
3704 return false;
3705 }
3706
3707 rval.setString(str);
3708 return true;
3709 }
3710
str_fromCharCode_one_arg(JSContext * cx,HandleValue code,MutableHandleValue rval)3711 bool js::str_fromCharCode_one_arg(JSContext* cx, HandleValue code,
3712 MutableHandleValue rval) {
3713 uint16_t ucode;
3714
3715 if (!ToUint16(cx, code, &ucode)) {
3716 return false;
3717 }
3718
3719 return CodeUnitToString(cx, ucode, rval);
3720 }
3721
ToCodePoint(JSContext * cx,HandleValue code,uint32_t * codePoint)3722 static MOZ_ALWAYS_INLINE bool ToCodePoint(JSContext* cx, HandleValue code,
3723 uint32_t* codePoint) {
3724 // String.fromCodePoint, Steps 5.a-b.
3725
3726 // Fast path for the common case - the input is already an int32.
3727 if (code.isInt32()) {
3728 int32_t nextCP = code.toInt32();
3729 if (nextCP >= 0 && nextCP <= int32_t(unicode::NonBMPMax)) {
3730 *codePoint = uint32_t(nextCP);
3731 return true;
3732 }
3733 }
3734
3735 double nextCP;
3736 if (!ToNumber(cx, code, &nextCP)) {
3737 return false;
3738 }
3739
3740 // String.fromCodePoint, Steps 5.c-d.
3741 if (JS::ToInteger(nextCP) != nextCP || nextCP < 0 ||
3742 nextCP > unicode::NonBMPMax) {
3743 ToCStringBuf cbuf;
3744 if (const char* numStr = NumberToCString(cx, &cbuf, nextCP)) {
3745 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
3746 JSMSG_NOT_A_CODEPOINT, numStr);
3747 }
3748 return false;
3749 }
3750
3751 *codePoint = uint32_t(nextCP);
3752 return true;
3753 }
3754
str_fromCodePoint_one_arg(JSContext * cx,HandleValue code,MutableHandleValue rval)3755 bool js::str_fromCodePoint_one_arg(JSContext* cx, HandleValue code,
3756 MutableHandleValue rval) {
3757 // Steps 1-4 (omitted).
3758
3759 // Steps 5.a-d.
3760 uint32_t codePoint;
3761 if (!ToCodePoint(cx, code, &codePoint)) {
3762 return false;
3763 }
3764
3765 // Steps 5.e, 6.
3766 if (!unicode::IsSupplementary(codePoint)) {
3767 return CodeUnitToString(cx, uint16_t(codePoint), rval);
3768 }
3769
3770 char16_t chars[] = {unicode::LeadSurrogate(codePoint),
3771 unicode::TrailSurrogate(codePoint)};
3772 JSString* str = NewStringCopyNDontDeflate<CanGC>(cx, chars, 2);
3773 if (!str) {
3774 return false;
3775 }
3776
3777 rval.setString(str);
3778 return true;
3779 }
3780
str_fromCodePoint_few_args(JSContext * cx,const CallArgs & args)3781 static bool str_fromCodePoint_few_args(JSContext* cx, const CallArgs& args) {
3782 MOZ_ASSERT(args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2);
3783
3784 // Steps 1-2 (omitted).
3785
3786 // Step 3.
3787 char16_t elements[JSFatInlineString::MAX_LENGTH_TWO_BYTE];
3788
3789 // Steps 4-5.
3790 unsigned length = 0;
3791 for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) {
3792 // Steps 5.a-d.
3793 uint32_t codePoint;
3794 if (!ToCodePoint(cx, args[nextIndex], &codePoint)) {
3795 return false;
3796 }
3797
3798 // Step 5.e.
3799 unicode::UTF16Encode(codePoint, elements, &length);
3800 }
3801
3802 // Step 6.
3803 JSString* str = NewStringCopyN<CanGC>(cx, elements, length);
3804 if (!str) {
3805 return false;
3806 }
3807
3808 args.rval().setString(str);
3809 return true;
3810 }
3811
3812 // ES2017 draft rev 40edb3a95a475c1b251141ac681b8793129d9a6d
3813 // 21.1.2.2 String.fromCodePoint(...codePoints)
str_fromCodePoint(JSContext * cx,unsigned argc,Value * vp)3814 bool js::str_fromCodePoint(JSContext* cx, unsigned argc, Value* vp) {
3815 CallArgs args = CallArgsFromVp(argc, vp);
3816
3817 // Optimize the single code-point case.
3818 if (args.length() == 1) {
3819 return str_fromCodePoint_one_arg(cx, args[0], args.rval());
3820 }
3821
3822 // Optimize the case where the result will definitely fit in an inline
3823 // string (thin or fat) and so we don't need to malloc the chars. (We could
3824 // cover some cases where |args.length()| goes up to
3825 // JSFatInlineString::MAX_LENGTH_LATIN1 / 2 if we also checked if the chars
3826 // are all Latin-1, but it doesn't seem worth the effort.)
3827 if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2) {
3828 return str_fromCodePoint_few_args(cx, args);
3829 }
3830
3831 // Steps 1-2 (omitted).
3832
3833 // Step 3.
3834 static_assert(
3835 ARGS_LENGTH_MAX < std::numeric_limits<decltype(args.length())>::max() / 2,
3836 "|args.length() * 2| does not overflow");
3837 auto elements = cx->make_pod_arena_array<char16_t>(js::StringBufferArena,
3838 args.length() * 2);
3839 if (!elements) {
3840 return false;
3841 }
3842
3843 // Steps 4-5.
3844 unsigned length = 0;
3845 for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) {
3846 // Steps 5.a-d.
3847 uint32_t codePoint;
3848 if (!ToCodePoint(cx, args[nextIndex], &codePoint)) {
3849 return false;
3850 }
3851
3852 // Step 5.e.
3853 unicode::UTF16Encode(codePoint, elements.get(), &length);
3854 }
3855
3856 // Step 6.
3857 JSString* str = NewString<CanGC>(cx, std::move(elements), length);
3858 if (!str) {
3859 return false;
3860 }
3861
3862 args.rval().setString(str);
3863 return true;
3864 }
3865
3866 static const JSFunctionSpec string_static_methods[] = {
3867 JS_INLINABLE_FN("fromCharCode", js::str_fromCharCode, 1, 0,
3868 StringFromCharCode),
3869 JS_INLINABLE_FN("fromCodePoint", js::str_fromCodePoint, 1, 0,
3870 StringFromCodePoint),
3871
3872 JS_SELF_HOSTED_FN("raw", "String_static_raw", 1, 0), JS_FS_END};
3873
3874 /* static */
assignInitialShape(JSContext * cx,Handle<StringObject * > obj)3875 Shape* StringObject::assignInitialShape(JSContext* cx,
3876 Handle<StringObject*> obj) {
3877 MOZ_ASSERT(obj->empty());
3878
3879 if (!NativeObject::addPropertyInReservedSlot(cx, obj, cx->names().length,
3880 LENGTH_SLOT, {})) {
3881 return nullptr;
3882 }
3883
3884 return obj->shape();
3885 }
3886
createPrototype(JSContext * cx,JSProtoKey key)3887 JSObject* StringObject::createPrototype(JSContext* cx, JSProtoKey key) {
3888 Rooted<JSString*> empty(cx, cx->runtime()->emptyString);
3889 Rooted<StringObject*> proto(
3890 cx, GlobalObject::createBlankPrototype<StringObject>(cx, cx->global()));
3891 if (!proto) {
3892 return nullptr;
3893 }
3894 if (!StringObject::init(cx, proto, empty)) {
3895 return nullptr;
3896 }
3897 return proto;
3898 }
3899
StringClassFinish(JSContext * cx,HandleObject ctor,HandleObject proto)3900 static bool StringClassFinish(JSContext* cx, HandleObject ctor,
3901 HandleObject proto) {
3902 HandleNativeObject nativeProto = proto.as<NativeObject>();
3903
3904 // Create "trimLeft" as an alias for "trimStart".
3905 RootedValue trimFn(cx);
3906 RootedId trimId(cx, NameToId(cx->names().trimStart));
3907 RootedId trimAliasId(cx, NameToId(cx->names().trimLeft));
3908 if (!NativeGetProperty(cx, nativeProto, trimId, &trimFn) ||
3909 !NativeDefineDataProperty(cx, nativeProto, trimAliasId, trimFn, 0)) {
3910 return false;
3911 }
3912
3913 // Create "trimRight" as an alias for "trimEnd".
3914 trimId = NameToId(cx->names().trimEnd);
3915 trimAliasId = NameToId(cx->names().trimRight);
3916 if (!NativeGetProperty(cx, nativeProto, trimId, &trimFn) ||
3917 !NativeDefineDataProperty(cx, nativeProto, trimAliasId, trimFn, 0)) {
3918 return false;
3919 }
3920
3921 /*
3922 * Define escape/unescape, the URI encode/decode functions, and maybe
3923 * uneval on the global object.
3924 */
3925 if (!JS_DefineFunctions(cx, cx->global(), string_functions)) {
3926 return false;
3927 }
3928
3929 return true;
3930 }
3931
3932 const ClassSpec StringObject::classSpec_ = {
3933 GenericCreateConstructor<StringConstructor, 1, gc::AllocKind::FUNCTION,
3934 &jit::JitInfo_String>,
3935 StringObject::createPrototype,
3936 string_static_methods,
3937 nullptr,
3938 string_methods,
3939 nullptr,
3940 StringClassFinish};
3941
3942 #define ____ false
3943
3944 /*
3945 * Uri reserved chars + #:
3946 * - 35: #
3947 * - 36: $
3948 * - 38: &
3949 * - 43: +
3950 * - 44: ,
3951 * - 47: /
3952 * - 58: :
3953 * - 59: ;
3954 * - 61: =
3955 * - 63: ?
3956 * - 64: @
3957 */
3958 static const bool js_isUriReservedPlusPound[] = {
3959 // clang-format off
3960 /* 0 1 2 3 4 5 6 7 8 9 */
3961 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3962 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3963 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3964 /* 3 */ ____, ____, ____, ____, ____, true, true, ____, true, ____,
3965 /* 4 */ ____, ____, ____, true, true, ____, ____, true, ____, ____,
3966 /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true,
3967 /* 6 */ ____, true, ____, true, true, ____, ____, ____, ____, ____,
3968 /* 7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3969 /* 8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3970 /* 9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3971 /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3972 /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3973 /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
3974 // clang-format on
3975 };
3976
3977 /*
3978 * Uri unescaped chars:
3979 * - 33: !
3980 * - 39: '
3981 * - 40: (
3982 * - 41: )
3983 * - 42: *
3984 * - 45: -
3985 * - 46: .
3986 * - 48..57: 0-9
3987 * - 65..90: A-Z
3988 * - 95: _
3989 * - 97..122: a-z
3990 * - 126: ~
3991 */
3992 static const bool js_isUriUnescaped[] = {
3993 // clang-format off
3994 /* 0 1 2 3 4 5 6 7 8 9 */
3995 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3996 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3997 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3998 /* 3 */ ____, ____, ____, true, ____, ____, ____, ____, ____, true,
3999 /* 4 */ true, true, true, ____, ____, true, true, ____, true, true,
4000 /* 5 */ true, true, true, true, true, true, true, true, ____, ____,
4001 /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
4002 /* 7 */ true, true, true, true, true, true, true, true, true, true,
4003 /* 8 */ true, true, true, true, true, true, true, true, true, true,
4004 /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
4005 /* 10 */ true, true, true, true, true, true, true, true, true, true,
4006 /* 11 */ true, true, true, true, true, true, true, true, true, true,
4007 /* 12 */ true, true, true, ____, ____, ____, true, ____
4008 // clang-format on
4009 };
4010
4011 #undef ____
4012
TransferBufferToString(JSStringBuilder & sb,JSString * str,MutableHandleValue rval)4013 static inline bool TransferBufferToString(JSStringBuilder& sb, JSString* str,
4014 MutableHandleValue rval) {
4015 if (!sb.empty()) {
4016 str = sb.finishString();
4017 if (!str) {
4018 return false;
4019 }
4020 }
4021 rval.setString(str);
4022 return true;
4023 }
4024
4025 /*
4026 * ECMA 3, 15.1.3 URI Handling Function Properties
4027 *
4028 * The following are implementations of the algorithms
4029 * given in the ECMA specification for the hidden functions
4030 * 'Encode' and 'Decode'.
4031 */
4032 enum EncodeResult { Encode_Failure, Encode_BadUri, Encode_Success };
4033
4034 // Bug 1403318: GCC sometimes inlines this Encode function rather than the
4035 // caller Encode function. Annotate both functions with MOZ_NEVER_INLINE resp.
4036 // MOZ_ALWAYS_INLINE to ensure we get the desired inlining behavior.
4037 template <typename CharT>
Encode(StringBuffer & sb,const CharT * chars,size_t length,const bool * unescapedSet)4038 static MOZ_NEVER_INLINE EncodeResult Encode(StringBuffer& sb,
4039 const CharT* chars, size_t length,
4040 const bool* unescapedSet) {
4041 Latin1Char hexBuf[3];
4042 hexBuf[0] = '%';
4043
4044 auto appendEncoded = [&sb, &hexBuf](Latin1Char c) {
4045 static const char HexDigits[] = "0123456789ABCDEF"; /* NB: uppercase */
4046
4047 hexBuf[1] = HexDigits[c >> 4];
4048 hexBuf[2] = HexDigits[c & 0xf];
4049 return sb.append(hexBuf, 3);
4050 };
4051
4052 auto appendRange = [&sb, chars, length](size_t start, size_t end) {
4053 MOZ_ASSERT(start <= end);
4054
4055 if (start < end) {
4056 if (start == 0) {
4057 if (!sb.reserve(length)) {
4058 return false;
4059 }
4060 }
4061 return sb.append(chars + start, chars + end);
4062 }
4063 return true;
4064 };
4065
4066 size_t startAppend = 0;
4067 for (size_t k = 0; k < length; k++) {
4068 CharT c = chars[k];
4069 if (c < 128 &&
4070 (js_isUriUnescaped[c] || (unescapedSet && unescapedSet[c]))) {
4071 continue;
4072 } else {
4073 if (!appendRange(startAppend, k)) {
4074 return Encode_Failure;
4075 }
4076
4077 if constexpr (std::is_same_v<CharT, Latin1Char>) {
4078 if (c < 0x80) {
4079 if (!appendEncoded(c)) {
4080 return Encode_Failure;
4081 }
4082 } else {
4083 if (!appendEncoded(0xC0 | (c >> 6)) ||
4084 !appendEncoded(0x80 | (c & 0x3F))) {
4085 return Encode_Failure;
4086 }
4087 }
4088 } else {
4089 if (unicode::IsTrailSurrogate(c)) {
4090 return Encode_BadUri;
4091 }
4092
4093 uint32_t v;
4094 if (!unicode::IsLeadSurrogate(c)) {
4095 v = c;
4096 } else {
4097 k++;
4098 if (k == length) {
4099 return Encode_BadUri;
4100 }
4101
4102 char16_t c2 = chars[k];
4103 if (!unicode::IsTrailSurrogate(c2)) {
4104 return Encode_BadUri;
4105 }
4106
4107 v = unicode::UTF16Decode(c, c2);
4108 }
4109
4110 uint8_t utf8buf[4];
4111 size_t L = OneUcs4ToUtf8Char(utf8buf, v);
4112 for (size_t j = 0; j < L; j++) {
4113 if (!appendEncoded(utf8buf[j])) {
4114 return Encode_Failure;
4115 }
4116 }
4117 }
4118
4119 startAppend = k + 1;
4120 }
4121 }
4122
4123 if (startAppend > 0) {
4124 if (!appendRange(startAppend, length)) {
4125 return Encode_Failure;
4126 }
4127 }
4128
4129 return Encode_Success;
4130 }
4131
Encode(JSContext * cx,HandleLinearString str,const bool * unescapedSet,MutableHandleValue rval)4132 static MOZ_ALWAYS_INLINE bool Encode(JSContext* cx, HandleLinearString str,
4133 const bool* unescapedSet,
4134 MutableHandleValue rval) {
4135 size_t length = str->length();
4136 if (length == 0) {
4137 rval.setString(cx->runtime()->emptyString);
4138 return true;
4139 }
4140
4141 JSStringBuilder sb(cx);
4142
4143 EncodeResult res;
4144 if (str->hasLatin1Chars()) {
4145 AutoCheckCannotGC nogc;
4146 res = Encode(sb, str->latin1Chars(nogc), str->length(), unescapedSet);
4147 } else {
4148 AutoCheckCannotGC nogc;
4149 res = Encode(sb, str->twoByteChars(nogc), str->length(), unescapedSet);
4150 }
4151
4152 if (res == Encode_Failure) {
4153 return false;
4154 }
4155
4156 if (res == Encode_BadUri) {
4157 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
4158 return false;
4159 }
4160
4161 MOZ_ASSERT(res == Encode_Success);
4162 return TransferBufferToString(sb, str, rval);
4163 }
4164
4165 enum DecodeResult { Decode_Failure, Decode_BadUri, Decode_Success };
4166
4167 template <typename CharT>
Decode(StringBuffer & sb,const CharT * chars,size_t length,const bool * reservedSet)4168 static DecodeResult Decode(StringBuffer& sb, const CharT* chars, size_t length,
4169 const bool* reservedSet) {
4170 auto appendRange = [&sb, chars](size_t start, size_t end) {
4171 MOZ_ASSERT(start <= end);
4172
4173 if (start < end) {
4174 return sb.append(chars + start, chars + end);
4175 }
4176 return true;
4177 };
4178
4179 size_t startAppend = 0;
4180 for (size_t k = 0; k < length; k++) {
4181 CharT c = chars[k];
4182 if (c == '%') {
4183 size_t start = k;
4184 if ((k + 2) >= length) {
4185 return Decode_BadUri;
4186 }
4187
4188 if (!IsAsciiHexDigit(chars[k + 1]) || !IsAsciiHexDigit(chars[k + 2])) {
4189 return Decode_BadUri;
4190 }
4191
4192 uint32_t B = AsciiAlphanumericToNumber(chars[k + 1]) * 16 +
4193 AsciiAlphanumericToNumber(chars[k + 2]);
4194 k += 2;
4195 if (B < 128) {
4196 Latin1Char ch = Latin1Char(B);
4197 if (reservedSet && reservedSet[ch]) {
4198 continue;
4199 }
4200
4201 if (!appendRange(startAppend, start)) {
4202 return Decode_Failure;
4203 }
4204 if (!sb.append(ch)) {
4205 return Decode_Failure;
4206 }
4207 } else {
4208 int n = 1;
4209 while (B & (0x80 >> n)) {
4210 n++;
4211 }
4212
4213 if (n == 1 || n > 4) {
4214 return Decode_BadUri;
4215 }
4216
4217 uint8_t octets[4];
4218 octets[0] = (uint8_t)B;
4219 if (k + 3 * (n - 1) >= length) {
4220 return Decode_BadUri;
4221 }
4222
4223 for (int j = 1; j < n; j++) {
4224 k++;
4225 if (chars[k] != '%') {
4226 return Decode_BadUri;
4227 }
4228
4229 if (!IsAsciiHexDigit(chars[k + 1]) ||
4230 !IsAsciiHexDigit(chars[k + 2])) {
4231 return Decode_BadUri;
4232 }
4233
4234 B = AsciiAlphanumericToNumber(chars[k + 1]) * 16 +
4235 AsciiAlphanumericToNumber(chars[k + 2]);
4236 if ((B & 0xC0) != 0x80) {
4237 return Decode_BadUri;
4238 }
4239
4240 k += 2;
4241 octets[j] = char(B);
4242 }
4243
4244 if (!appendRange(startAppend, start)) {
4245 return Decode_Failure;
4246 }
4247
4248 uint32_t v = JS::Utf8ToOneUcs4Char(octets, n);
4249 MOZ_ASSERT(v >= 128);
4250 if (v >= unicode::NonBMPMin) {
4251 if (v > unicode::NonBMPMax) {
4252 return Decode_BadUri;
4253 }
4254
4255 if (!sb.append(unicode::LeadSurrogate(v))) {
4256 return Decode_Failure;
4257 }
4258 if (!sb.append(unicode::TrailSurrogate(v))) {
4259 return Decode_Failure;
4260 }
4261 } else {
4262 if (!sb.append(char16_t(v))) {
4263 return Decode_Failure;
4264 }
4265 }
4266 }
4267
4268 startAppend = k + 1;
4269 }
4270 }
4271
4272 if (startAppend > 0) {
4273 if (!appendRange(startAppend, length)) {
4274 return Decode_Failure;
4275 }
4276 }
4277
4278 return Decode_Success;
4279 }
4280
Decode(JSContext * cx,HandleLinearString str,const bool * reservedSet,MutableHandleValue rval)4281 static bool Decode(JSContext* cx, HandleLinearString str,
4282 const bool* reservedSet, MutableHandleValue rval) {
4283 size_t length = str->length();
4284 if (length == 0) {
4285 rval.setString(cx->runtime()->emptyString);
4286 return true;
4287 }
4288
4289 JSStringBuilder sb(cx);
4290
4291 DecodeResult res;
4292 if (str->hasLatin1Chars()) {
4293 AutoCheckCannotGC nogc;
4294 res = Decode(sb, str->latin1Chars(nogc), str->length(), reservedSet);
4295 } else {
4296 AutoCheckCannotGC nogc;
4297 res = Decode(sb, str->twoByteChars(nogc), str->length(), reservedSet);
4298 }
4299
4300 if (res == Decode_Failure) {
4301 return false;
4302 }
4303
4304 if (res == Decode_BadUri) {
4305 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
4306 return false;
4307 }
4308
4309 MOZ_ASSERT(res == Decode_Success);
4310 return TransferBufferToString(sb, str, rval);
4311 }
4312
str_decodeURI(JSContext * cx,unsigned argc,Value * vp)4313 static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp) {
4314 CallArgs args = CallArgsFromVp(argc, vp);
4315 RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
4316 if (!str) {
4317 return false;
4318 }
4319
4320 return Decode(cx, str, js_isUriReservedPlusPound, args.rval());
4321 }
4322
str_decodeURI_Component(JSContext * cx,unsigned argc,Value * vp)4323 static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp) {
4324 CallArgs args = CallArgsFromVp(argc, vp);
4325 RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
4326 if (!str) {
4327 return false;
4328 }
4329
4330 return Decode(cx, str, nullptr, args.rval());
4331 }
4332
str_encodeURI(JSContext * cx,unsigned argc,Value * vp)4333 static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp) {
4334 CallArgs args = CallArgsFromVp(argc, vp);
4335 RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
4336 if (!str) {
4337 return false;
4338 }
4339
4340 return Encode(cx, str, js_isUriReservedPlusPound, args.rval());
4341 }
4342
str_encodeURI_Component(JSContext * cx,unsigned argc,Value * vp)4343 static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp) {
4344 CallArgs args = CallArgsFromVp(argc, vp);
4345 RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
4346 if (!str) {
4347 return false;
4348 }
4349
4350 return Encode(cx, str, nullptr, args.rval());
4351 }
4352
EncodeURI(JSContext * cx,const char * chars,size_t length)4353 JSString* js::EncodeURI(JSContext* cx, const char* chars, size_t length) {
4354 JSStringBuilder sb(cx);
4355 EncodeResult result = Encode(sb, reinterpret_cast<const Latin1Char*>(chars),
4356 length, js_isUriReservedPlusPound);
4357 if (result == EncodeResult::Encode_Failure) {
4358 return nullptr;
4359 }
4360 if (result == EncodeResult::Encode_BadUri) {
4361 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
4362 return nullptr;
4363 }
4364 if (sb.empty()) {
4365 return NewStringCopyN<CanGC>(cx, chars, length);
4366 }
4367 return sb.finishString();
4368 }
4369
FlatStringMatchHelper(JSContext * cx,HandleString str,HandleString pattern,bool * isFlat,int32_t * match)4370 static bool FlatStringMatchHelper(JSContext* cx, HandleString str,
4371 HandleString pattern, bool* isFlat,
4372 int32_t* match) {
4373 RootedLinearString linearPattern(cx, pattern->ensureLinear(cx));
4374 if (!linearPattern) {
4375 return false;
4376 }
4377
4378 static const size_t MAX_FLAT_PAT_LEN = 256;
4379 if (linearPattern->length() > MAX_FLAT_PAT_LEN ||
4380 StringHasRegExpMetaChars(linearPattern)) {
4381 *isFlat = false;
4382 return true;
4383 }
4384
4385 *isFlat = true;
4386 if (str->isRope()) {
4387 if (!RopeMatch(cx, &str->asRope(), linearPattern, match)) {
4388 return false;
4389 }
4390 } else {
4391 *match = StringMatch(&str->asLinear(), linearPattern);
4392 }
4393
4394 return true;
4395 }
4396
BuildFlatMatchArray(JSContext * cx,HandleString str,HandleString pattern,int32_t match,MutableHandleValue rval)4397 static bool BuildFlatMatchArray(JSContext* cx, HandleString str,
4398 HandleString pattern, int32_t match,
4399 MutableHandleValue rval) {
4400 if (match < 0) {
4401 rval.setNull();
4402 return true;
4403 }
4404
4405 // Get the templateObject that defines the shape and type of the output
4406 // object.
4407 ArrayObject* templateObject =
4408 cx->realm()->regExps.getOrCreateMatchResultTemplateObject(cx);
4409 if (!templateObject) {
4410 return false;
4411 }
4412
4413 RootedArrayObject arr(
4414 cx, NewDenseFullyAllocatedArrayWithTemplate(cx, 1, templateObject));
4415 if (!arr) {
4416 return false;
4417 }
4418
4419 // Store a Value for each pair.
4420 arr->setDenseInitializedLength(1);
4421 arr->initDenseElement(0, StringValue(pattern));
4422
4423 // Set the |index| property. (TemplateObject positions it in slot 0).
4424 arr->setSlot(0, Int32Value(match));
4425
4426 // Set the |input| property. (TemplateObject positions it in slot 1).
4427 arr->setSlot(1, StringValue(str));
4428
4429 #ifdef DEBUG
4430 RootedValue test(cx);
4431 RootedId id(cx, NameToId(cx->names().index));
4432 if (!NativeGetProperty(cx, arr, id, &test)) {
4433 return false;
4434 }
4435 MOZ_ASSERT(test == arr->getSlot(0));
4436 id = NameToId(cx->names().input);
4437 if (!NativeGetProperty(cx, arr, id, &test)) {
4438 return false;
4439 }
4440 MOZ_ASSERT(test == arr->getSlot(1));
4441 #endif
4442
4443 rval.setObject(*arr);
4444 return true;
4445 }
4446
4447 #ifdef DEBUG
CallIsStringOptimizable(JSContext * cx,const char * name,bool * result)4448 static bool CallIsStringOptimizable(JSContext* cx, const char* name,
4449 bool* result) {
4450 FixedInvokeArgs<0> args(cx);
4451
4452 RootedValue rval(cx);
4453 if (!CallSelfHostedFunction(cx, name, UndefinedHandleValue, args, &rval)) {
4454 return false;
4455 }
4456
4457 *result = rval.toBoolean();
4458 return true;
4459 }
4460 #endif
4461
FlatStringMatch(JSContext * cx,unsigned argc,Value * vp)4462 bool js::FlatStringMatch(JSContext* cx, unsigned argc, Value* vp) {
4463 CallArgs args = CallArgsFromVp(argc, vp);
4464 MOZ_ASSERT(args.length() == 2);
4465 MOZ_ASSERT(args[0].isString());
4466 MOZ_ASSERT(args[1].isString());
4467 #ifdef DEBUG
4468 bool isOptimizable = false;
4469 if (!CallIsStringOptimizable(cx, "IsStringMatchOptimizable",
4470 &isOptimizable)) {
4471 return false;
4472 }
4473 MOZ_ASSERT(isOptimizable);
4474 #endif
4475
4476 RootedString str(cx, args[0].toString());
4477 RootedString pattern(cx, args[1].toString());
4478
4479 bool isFlat = false;
4480 int32_t match = 0;
4481 if (!FlatStringMatchHelper(cx, str, pattern, &isFlat, &match)) {
4482 return false;
4483 }
4484
4485 if (!isFlat) {
4486 args.rval().setUndefined();
4487 return true;
4488 }
4489
4490 return BuildFlatMatchArray(cx, str, pattern, match, args.rval());
4491 }
4492
FlatStringSearch(JSContext * cx,unsigned argc,Value * vp)4493 bool js::FlatStringSearch(JSContext* cx, unsigned argc, Value* vp) {
4494 CallArgs args = CallArgsFromVp(argc, vp);
4495 MOZ_ASSERT(args.length() == 2);
4496 MOZ_ASSERT(args[0].isString());
4497 MOZ_ASSERT(args[1].isString());
4498 #ifdef DEBUG
4499 bool isOptimizable = false;
4500 if (!CallIsStringOptimizable(cx, "IsStringSearchOptimizable",
4501 &isOptimizable)) {
4502 return false;
4503 }
4504 MOZ_ASSERT(isOptimizable);
4505 #endif
4506
4507 RootedString str(cx, args[0].toString());
4508 RootedString pattern(cx, args[1].toString());
4509
4510 bool isFlat = false;
4511 int32_t match = 0;
4512 if (!FlatStringMatchHelper(cx, str, pattern, &isFlat, &match)) {
4513 return false;
4514 }
4515
4516 if (!isFlat) {
4517 args.rval().setInt32(-2);
4518 return true;
4519 }
4520
4521 args.rval().setInt32(match);
4522 return true;
4523 }
4524