1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "builtin/String.h"
8
9 #include "mozilla/Attributes.h"
10 #include "mozilla/CheckedInt.h"
11 #include "mozilla/FloatingPoint.h"
12 #if JS_HAS_INTL_API
13 # include "mozilla/intl/String.h"
14 #endif
15 #include "mozilla/PodOperations.h"
16 #include "mozilla/Range.h"
17 #include "mozilla/TextUtils.h"
18
19 #include <algorithm>
20 #include <limits>
21 #include <string.h>
22 #include <type_traits>
23
24 #include "jsnum.h"
25 #include "jstypes.h"
26
27 #include "builtin/Array.h"
28 #include "builtin/Boolean.h"
29 #if JS_HAS_INTL_API
30 # include "builtin/intl/CommonFunctions.h"
31 # include "builtin/intl/FormatBuffer.h"
32 #endif
33 #include "builtin/RegExp.h"
34 #include "jit/InlinableNatives.h"
35 #include "js/Conversions.h"
36 #include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_*
37 #include "js/friend/StackLimits.h" // js::AutoCheckRecursionLimit
38 #if !JS_HAS_INTL_API
39 # include "js/LocaleSensitive.h"
40 #endif
41 #include "js/PropertyAndElement.h" // JS_DefineFunctions
42 #include "js/PropertySpec.h"
43 #include "js/StableStringChars.h"
44 #include "js/UniquePtr.h"
45 #include "util/StringBuffer.h"
46 #include "util/Unicode.h"
47 #include "vm/GlobalObject.h"
48 #include "vm/Interpreter.h"
49 #include "vm/JSAtom.h"
50 #include "vm/JSContext.h"
51 #include "vm/JSObject.h"
52 #include "vm/Opcodes.h"
53 #include "vm/Printer.h"
54 #include "vm/RegExpObject.h"
55 #include "vm/RegExpStatics.h"
56 #include "vm/SelfHosting.h"
57 #include "vm/StaticStrings.h"
58 #include "vm/ToSource.h" // js::ValueToSource
59 #include "vm/WellKnownAtom.h" // js_*_str
60
61 #include "vm/InlineCharBuffer-inl.h"
62 #include "vm/Interpreter-inl.h"
63 #include "vm/StringObject-inl.h"
64 #include "vm/StringType-inl.h"
65
66 using namespace js;
67
68 using JS::Symbol;
69 using JS::SymbolCode;
70
71 using mozilla::AsciiAlphanumericToNumber;
72 using mozilla::CheckedInt;
73 using mozilla::IsAsciiHexDigit;
74 using mozilla::IsNaN;
75 using mozilla::PodCopy;
76 using mozilla::RangedPtr;
77
78 using JS::AutoCheckCannotGC;
79 using JS::AutoStableStringChars;
80
ArgToLinearString(JSContext * cx,const CallArgs & args,unsigned argno)81 static JSLinearString* ArgToLinearString(JSContext* cx, const CallArgs& args,
82 unsigned argno) {
83 if (argno >= args.length()) {
84 return cx->names().undefined;
85 }
86
87 JSString* str = ToString<CanGC>(cx, args[argno]);
88 if (!str) {
89 return nullptr;
90 }
91
92 return str->ensureLinear(cx);
93 }
94
95 /*
96 * Forward declarations for URI encode/decode and helper routines
97 */
98 static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp);
99
100 static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
101
102 static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp);
103
104 static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
105
106 /*
107 * Global string methods
108 */
109
110 /* ES5 B.2.1 */
111 template <typename CharT>
Escape(JSContext * cx,const CharT * chars,uint32_t length,InlineCharBuffer<Latin1Char> & newChars,uint32_t * newLengthOut)112 static bool Escape(JSContext* cx, const CharT* chars, uint32_t length,
113 InlineCharBuffer<Latin1Char>& newChars,
114 uint32_t* newLengthOut) {
115 // clang-format off
116 static const uint8_t shouldPassThrough[128] = {
117 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
118 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
119 0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1, /* !"#$%&'()*+,-./ */
120 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 0123456789:;<=>? */
121 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* @ABCDEFGHIJKLMNO */
122 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* PQRSTUVWXYZ[\]^_ */
123 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* `abcdefghijklmno */
124 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, /* pqrstuvwxyz{\}~ DEL */
125 };
126 // clang-format on
127
128 /* Take a first pass and see how big the result string will need to be. */
129 uint32_t newLength = length;
130 for (size_t i = 0; i < length; i++) {
131 char16_t ch = chars[i];
132 if (ch < 128 && shouldPassThrough[ch]) {
133 continue;
134 }
135
136 /*
137 * newlength is incremented below by at most 5 and at this point it must
138 * be a valid string length, so this should never overflow uint32_t.
139 */
140 static_assert(JSString::MAX_LENGTH < UINT32_MAX - 5,
141 "Adding 5 to valid string length should not overflow");
142
143 MOZ_ASSERT(newLength <= JSString::MAX_LENGTH);
144
145 /* The character will be encoded as %XX or %uXXXX. */
146 newLength += (ch < 256) ? 2 : 5;
147
148 if (MOZ_UNLIKELY(newLength > JSString::MAX_LENGTH)) {
149 ReportAllocationOverflow(cx);
150 return false;
151 }
152 }
153
154 if (newLength == length) {
155 *newLengthOut = newLength;
156 return true;
157 }
158
159 if (!newChars.maybeAlloc(cx, newLength)) {
160 return false;
161 }
162
163 static const char digits[] = "0123456789ABCDEF";
164
165 Latin1Char* rawNewChars = newChars.get();
166 size_t i, ni;
167 for (i = 0, ni = 0; i < length; i++) {
168 char16_t ch = chars[i];
169 if (ch < 128 && shouldPassThrough[ch]) {
170 rawNewChars[ni++] = ch;
171 } else if (ch < 256) {
172 rawNewChars[ni++] = '%';
173 rawNewChars[ni++] = digits[ch >> 4];
174 rawNewChars[ni++] = digits[ch & 0xF];
175 } else {
176 rawNewChars[ni++] = '%';
177 rawNewChars[ni++] = 'u';
178 rawNewChars[ni++] = digits[ch >> 12];
179 rawNewChars[ni++] = digits[(ch & 0xF00) >> 8];
180 rawNewChars[ni++] = digits[(ch & 0xF0) >> 4];
181 rawNewChars[ni++] = digits[ch & 0xF];
182 }
183 }
184 MOZ_ASSERT(ni == newLength);
185
186 *newLengthOut = newLength;
187 return true;
188 }
189
str_escape(JSContext * cx,unsigned argc,Value * vp)190 static bool str_escape(JSContext* cx, unsigned argc, Value* vp) {
191 CallArgs args = CallArgsFromVp(argc, vp);
192
193 RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
194 if (!str) {
195 return false;
196 }
197
198 InlineCharBuffer<Latin1Char> newChars;
199 uint32_t newLength = 0; // initialize to silence GCC warning
200 if (str->hasLatin1Chars()) {
201 AutoCheckCannotGC nogc;
202 if (!Escape(cx, str->latin1Chars(nogc), str->length(), newChars,
203 &newLength)) {
204 return false;
205 }
206 } else {
207 AutoCheckCannotGC nogc;
208 if (!Escape(cx, str->twoByteChars(nogc), str->length(), newChars,
209 &newLength)) {
210 return false;
211 }
212 }
213
214 // Return input if no characters need to be escaped.
215 if (newLength == str->length()) {
216 args.rval().setString(str);
217 return true;
218 }
219
220 JSString* res = newChars.toString(cx, newLength);
221 if (!res) {
222 return false;
223 }
224
225 args.rval().setString(res);
226 return true;
227 }
228
229 template <typename CharT>
Unhex4(const RangedPtr<const CharT> chars,char16_t * result)230 static inline bool Unhex4(const RangedPtr<const CharT> chars,
231 char16_t* result) {
232 CharT a = chars[0], b = chars[1], c = chars[2], d = chars[3];
233
234 if (!(IsAsciiHexDigit(a) && IsAsciiHexDigit(b) && IsAsciiHexDigit(c) &&
235 IsAsciiHexDigit(d))) {
236 return false;
237 }
238
239 char16_t unhex = AsciiAlphanumericToNumber(a);
240 unhex = (unhex << 4) + AsciiAlphanumericToNumber(b);
241 unhex = (unhex << 4) + AsciiAlphanumericToNumber(c);
242 unhex = (unhex << 4) + AsciiAlphanumericToNumber(d);
243 *result = unhex;
244 return true;
245 }
246
247 template <typename CharT>
Unhex2(const RangedPtr<const CharT> chars,char16_t * result)248 static inline bool Unhex2(const RangedPtr<const CharT> chars,
249 char16_t* result) {
250 CharT a = chars[0], b = chars[1];
251
252 if (!(IsAsciiHexDigit(a) && IsAsciiHexDigit(b))) {
253 return false;
254 }
255
256 *result = (AsciiAlphanumericToNumber(a) << 4) + AsciiAlphanumericToNumber(b);
257 return true;
258 }
259
260 template <typename CharT>
Unescape(StringBuffer & sb,const mozilla::Range<const CharT> chars)261 static bool Unescape(StringBuffer& sb,
262 const mozilla::Range<const CharT> chars) {
263 // Step 2.
264 uint32_t length = chars.length();
265
266 /*
267 * Note that the spec algorithm has been optimized to avoid building
268 * a string in the case where no escapes are present.
269 */
270 bool building = false;
271
272 #define ENSURE_BUILDING \
273 do { \
274 if (!building) { \
275 building = true; \
276 if (!sb.reserve(length)) return false; \
277 sb.infallibleAppend(chars.begin().get(), k); \
278 } \
279 } while (false);
280
281 // Step 4.
282 uint32_t k = 0;
283
284 // Step 5.
285 while (k < length) {
286 // Step 5.a.
287 char16_t c = chars[k];
288
289 // Step 5.b.
290 if (c == '%') {
291 static_assert(JSString::MAX_LENGTH < UINT32_MAX - 6,
292 "String length is not near UINT32_MAX");
293
294 // Steps 5.b.i-ii.
295 if (k + 6 <= length && chars[k + 1] == 'u') {
296 if (Unhex4(chars.begin() + k + 2, &c)) {
297 ENSURE_BUILDING
298 k += 5;
299 }
300 } else if (k + 3 <= length) {
301 if (Unhex2(chars.begin() + k + 1, &c)) {
302 ENSURE_BUILDING
303 k += 2;
304 }
305 }
306 }
307
308 // Step 5.c.
309 if (building && !sb.append(c)) {
310 return false;
311 }
312
313 // Step 5.d.
314 k += 1;
315 }
316
317 return true;
318 #undef ENSURE_BUILDING
319 }
320
321 // ES2018 draft rev f83aa38282c2a60c6916ebc410bfdf105a0f6a54
322 // B.2.1.2 unescape ( string )
str_unescape(JSContext * cx,unsigned argc,Value * vp)323 static bool str_unescape(JSContext* cx, unsigned argc, Value* vp) {
324 CallArgs args = CallArgsFromVp(argc, vp);
325
326 // Step 1.
327 RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
328 if (!str) {
329 return false;
330 }
331
332 // Step 3.
333 JSStringBuilder sb(cx);
334 if (str->hasTwoByteChars() && !sb.ensureTwoByteChars()) {
335 return false;
336 }
337
338 // Steps 2, 4-5.
339 if (str->hasLatin1Chars()) {
340 AutoCheckCannotGC nogc;
341 if (!Unescape(sb, str->latin1Range(nogc))) {
342 return false;
343 }
344 } else {
345 AutoCheckCannotGC nogc;
346 if (!Unescape(sb, str->twoByteRange(nogc))) {
347 return false;
348 }
349 }
350
351 // Step 6.
352 JSLinearString* result;
353 if (!sb.empty()) {
354 result = sb.finishString();
355 if (!result) {
356 return false;
357 }
358 } else {
359 result = str;
360 }
361
362 args.rval().setString(result);
363 return true;
364 }
365
str_uneval(JSContext * cx,unsigned argc,Value * vp)366 static bool str_uneval(JSContext* cx, unsigned argc, Value* vp) {
367 CallArgs args = CallArgsFromVp(argc, vp);
368 JSString* str = ValueToSource(cx, args.get(0));
369 if (!str) {
370 return false;
371 }
372
373 args.rval().setString(str);
374 return true;
375 }
376
377 static const JSFunctionSpec string_functions[] = {
378 JS_FN(js_escape_str, str_escape, 1, JSPROP_RESOLVING),
379 JS_FN(js_unescape_str, str_unescape, 1, JSPROP_RESOLVING),
380 JS_FN(js_uneval_str, str_uneval, 1, JSPROP_RESOLVING),
381 JS_FN(js_decodeURI_str, str_decodeURI, 1, JSPROP_RESOLVING),
382 JS_FN(js_encodeURI_str, str_encodeURI, 1, JSPROP_RESOLVING),
383 JS_FN(js_decodeURIComponent_str, str_decodeURI_Component, 1,
384 JSPROP_RESOLVING),
385 JS_FN(js_encodeURIComponent_str, str_encodeURI_Component, 1,
386 JSPROP_RESOLVING),
387
388 JS_FS_END};
389
390 static const unsigned STRING_ELEMENT_ATTRS =
391 JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
392
str_enumerate(JSContext * cx,HandleObject obj)393 static bool str_enumerate(JSContext* cx, HandleObject obj) {
394 RootedString str(cx, obj->as<StringObject>().unbox());
395 js::StaticStrings& staticStrings = cx->staticStrings();
396
397 RootedValue value(cx);
398 for (size_t i = 0, length = str->length(); i < length; i++) {
399 JSString* str1 = staticStrings.getUnitStringForElement(cx, str, i);
400 if (!str1) {
401 return false;
402 }
403 value.setString(str1);
404 if (!DefineDataElement(cx, obj, i, value,
405 STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) {
406 return false;
407 }
408 }
409
410 return true;
411 }
412
str_mayResolve(const JSAtomState &,jsid id,JSObject *)413 static bool str_mayResolve(const JSAtomState&, jsid id, JSObject*) {
414 // str_resolve ignores non-integer ids.
415 return id.isInt();
416 }
417
str_resolve(JSContext * cx,HandleObject obj,HandleId id,bool * resolvedp)418 static bool str_resolve(JSContext* cx, HandleObject obj, HandleId id,
419 bool* resolvedp) {
420 if (!id.isInt()) {
421 return true;
422 }
423
424 RootedString str(cx, obj->as<StringObject>().unbox());
425
426 int32_t slot = id.toInt();
427 if ((size_t)slot < str->length()) {
428 JSString* str1 =
429 cx->staticStrings().getUnitStringForElement(cx, str, size_t(slot));
430 if (!str1) {
431 return false;
432 }
433 RootedValue value(cx, StringValue(str1));
434 if (!DefineDataElement(cx, obj, uint32_t(slot), value,
435 STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) {
436 return false;
437 }
438 *resolvedp = true;
439 }
440 return true;
441 }
442
443 static const JSClassOps StringObjectClassOps = {
444 nullptr, // addProperty
445 nullptr, // delProperty
446 str_enumerate, // enumerate
447 nullptr, // newEnumerate
448 str_resolve, // resolve
449 str_mayResolve, // mayResolve
450 nullptr, // finalize
451 nullptr, // call
452 nullptr, // hasInstance
453 nullptr, // construct
454 nullptr, // trace
455 };
456
457 const JSClass StringObject::class_ = {
458 js_String_str,
459 JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
460 JSCLASS_HAS_CACHED_PROTO(JSProto_String),
461 &StringObjectClassOps, &StringObject::classSpec_};
462
463 /*
464 * Perform the initial |RequireObjectCoercible(thisv)| and |ToString(thisv)|
465 * from nearly all String.prototype.* functions.
466 */
ToStringForStringFunction(JSContext * cx,const char * funName,HandleValue thisv)467 static MOZ_ALWAYS_INLINE JSString* ToStringForStringFunction(
468 JSContext* cx, const char* funName, HandleValue thisv) {
469 AutoCheckRecursionLimit recursion(cx);
470 if (!recursion.check(cx)) {
471 return nullptr;
472 }
473
474 if (thisv.isString()) {
475 return thisv.toString();
476 }
477
478 if (thisv.isObject()) {
479 RootedObject obj(cx, &thisv.toObject());
480 if (obj->is<StringObject>()) {
481 StringObject* nobj = &obj->as<StringObject>();
482 // We have to make sure that the ToPrimitive call from ToString
483 // would be unobservable.
484 if (HasNoToPrimitiveMethodPure(nobj, cx) &&
485 HasNativeMethodPure(nobj, cx->names().toString, str_toString, cx)) {
486 return nobj->unbox();
487 }
488 }
489 } else if (thisv.isNullOrUndefined()) {
490 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
491 JSMSG_INCOMPATIBLE_PROTO, "String", funName,
492 thisv.isNull() ? "null" : "undefined");
493 return nullptr;
494 }
495
496 return ToStringSlow<CanGC>(cx, thisv);
497 }
498
IsString(HandleValue v)499 MOZ_ALWAYS_INLINE bool IsString(HandleValue v) {
500 return v.isString() || (v.isObject() && v.toObject().is<StringObject>());
501 }
502
str_toSource_impl(JSContext * cx,const CallArgs & args)503 MOZ_ALWAYS_INLINE bool str_toSource_impl(JSContext* cx, const CallArgs& args) {
504 MOZ_ASSERT(IsString(args.thisv()));
505
506 JSString* str = ToString<CanGC>(cx, args.thisv());
507 if (!str) {
508 return false;
509 }
510
511 UniqueChars quoted = QuoteString(cx, str, '"');
512 if (!quoted) {
513 return false;
514 }
515
516 JSStringBuilder sb(cx);
517 if (!sb.append("(new String(") ||
518 !sb.append(quoted.get(), strlen(quoted.get())) || !sb.append("))")) {
519 return false;
520 }
521
522 JSString* result = sb.finishString();
523 if (!result) {
524 return false;
525 }
526 args.rval().setString(result);
527 return true;
528 }
529
str_toSource(JSContext * cx,unsigned argc,Value * vp)530 static bool str_toSource(JSContext* cx, unsigned argc, Value* vp) {
531 CallArgs args = CallArgsFromVp(argc, vp);
532 return CallNonGenericMethod<IsString, str_toSource_impl>(cx, args);
533 }
534
str_toString_impl(JSContext * cx,const CallArgs & args)535 MOZ_ALWAYS_INLINE bool str_toString_impl(JSContext* cx, const CallArgs& args) {
536 MOZ_ASSERT(IsString(args.thisv()));
537
538 args.rval().setString(
539 args.thisv().isString()
540 ? args.thisv().toString()
541 : args.thisv().toObject().as<StringObject>().unbox());
542 return true;
543 }
544
str_toString(JSContext * cx,unsigned argc,Value * vp)545 bool js::str_toString(JSContext* cx, unsigned argc, Value* vp) {
546 CallArgs args = CallArgsFromVp(argc, vp);
547 return CallNonGenericMethod<IsString, str_toString_impl>(cx, args);
548 }
549
550 /*
551 * Java-like string native methods.
552 */
553
SubstringKernel(JSContext * cx,HandleString str,int32_t beginInt,int32_t lengthInt)554 JSString* js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt,
555 int32_t lengthInt) {
556 MOZ_ASSERT(0 <= beginInt);
557 MOZ_ASSERT(0 <= lengthInt);
558 MOZ_ASSERT(uint32_t(beginInt) <= str->length());
559 MOZ_ASSERT(uint32_t(lengthInt) <= str->length() - beginInt);
560
561 uint32_t begin = beginInt;
562 uint32_t len = lengthInt;
563
564 /*
565 * Optimization for one level deep ropes.
566 * This is common for the following pattern:
567 *
568 * while() {
569 * text = text.substr(0, x) + "bla" + text.substr(x)
570 * test.charCodeAt(x + 1)
571 * }
572 */
573 if (str->isRope()) {
574 JSRope* rope = &str->asRope();
575
576 /* Substring is totally in leftChild of rope. */
577 if (begin + len <= rope->leftChild()->length()) {
578 return NewDependentString(cx, rope->leftChild(), begin, len);
579 }
580
581 /* Substring is totally in rightChild of rope. */
582 if (begin >= rope->leftChild()->length()) {
583 begin -= rope->leftChild()->length();
584 return NewDependentString(cx, rope->rightChild(), begin, len);
585 }
586
587 /*
588 * Requested substring is partly in the left and partly in right child.
589 * Create a rope of substrings for both childs.
590 */
591 MOZ_ASSERT(begin < rope->leftChild()->length() &&
592 begin + len > rope->leftChild()->length());
593
594 size_t lhsLength = rope->leftChild()->length() - begin;
595 size_t rhsLength = begin + len - rope->leftChild()->length();
596
597 Rooted<JSRope*> ropeRoot(cx, rope);
598 RootedString lhs(
599 cx, NewDependentString(cx, ropeRoot->leftChild(), begin, lhsLength));
600 if (!lhs) {
601 return nullptr;
602 }
603
604 RootedString rhs(
605 cx, NewDependentString(cx, ropeRoot->rightChild(), 0, rhsLength));
606 if (!rhs) {
607 return nullptr;
608 }
609
610 return JSRope::new_<CanGC>(cx, lhs, rhs, len);
611 }
612
613 return NewDependentString(cx, str, begin, len);
614 }
615
616 /**
617 * U+03A3 GREEK CAPITAL LETTER SIGMA has two different lower case mappings
618 * depending on its context:
619 * When it's preceded by a cased character and not followed by another cased
620 * character, its lower case form is U+03C2 GREEK SMALL LETTER FINAL SIGMA.
621 * Otherwise its lower case mapping is U+03C3 GREEK SMALL LETTER SIGMA.
622 *
623 * Unicode 9.0, §3.13 Default Case Algorithms
624 */
Final_Sigma(const char16_t * chars,size_t length,size_t index)625 static char16_t Final_Sigma(const char16_t* chars, size_t length,
626 size_t index) {
627 MOZ_ASSERT(index < length);
628 MOZ_ASSERT(chars[index] == unicode::GREEK_CAPITAL_LETTER_SIGMA);
629 MOZ_ASSERT(unicode::ToLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA) ==
630 unicode::GREEK_SMALL_LETTER_SIGMA);
631
632 #if JS_HAS_INTL_API
633 // Tell the analysis the BinaryProperty.contains function pointer called by
634 // mozilla::intl::String::Is{CaseIgnorable, Cased} cannot GC.
635 JS::AutoSuppressGCAnalysis nogc;
636
637 bool precededByCased = false;
638 for (size_t i = index; i > 0;) {
639 char16_t c = chars[--i];
640 uint32_t codePoint = c;
641 if (unicode::IsTrailSurrogate(c) && i > 0) {
642 char16_t lead = chars[i - 1];
643 if (unicode::IsLeadSurrogate(lead)) {
644 codePoint = unicode::UTF16Decode(lead, c);
645 i--;
646 }
647 }
648
649 // Ignore any characters with the property Case_Ignorable.
650 // NB: We need to skip over all Case_Ignorable characters, even when
651 // they also have the Cased binary property.
652 if (mozilla::intl::String::IsCaseIgnorable(codePoint)) {
653 continue;
654 }
655
656 precededByCased = mozilla::intl::String::IsCased(codePoint);
657 break;
658 }
659 if (!precededByCased) {
660 return unicode::GREEK_SMALL_LETTER_SIGMA;
661 }
662
663 bool followedByCased = false;
664 for (size_t i = index + 1; i < length;) {
665 char16_t c = chars[i++];
666 uint32_t codePoint = c;
667 if (unicode::IsLeadSurrogate(c) && i < length) {
668 char16_t trail = chars[i];
669 if (unicode::IsTrailSurrogate(trail)) {
670 codePoint = unicode::UTF16Decode(c, trail);
671 i++;
672 }
673 }
674
675 // Ignore any characters with the property Case_Ignorable.
676 // NB: We need to skip over all Case_Ignorable characters, even when
677 // they also have the Cased binary property.
678 if (mozilla::intl::String::IsCaseIgnorable(codePoint)) {
679 continue;
680 }
681
682 followedByCased = mozilla::intl::String::IsCased(codePoint);
683 break;
684 }
685 if (!followedByCased) {
686 return unicode::GREEK_SMALL_LETTER_FINAL_SIGMA;
687 }
688 #endif
689
690 return unicode::GREEK_SMALL_LETTER_SIGMA;
691 }
692
693 // If |srcLength == destLength| is true, the destination buffer was allocated
694 // with the same size as the source buffer. When we append characters which
695 // have special casing mappings, we test |srcLength == destLength| to decide
696 // if we need to back out and reallocate a sufficiently large destination
697 // buffer. Otherwise the destination buffer was allocated with the correct
698 // size to hold all lower case mapped characters, i.e.
699 // |destLength == ToLowerCaseLength(srcChars, 0, srcLength)| is true.
700 template <typename CharT>
ToLowerCaseImpl(CharT * destChars,const CharT * srcChars,size_t startIndex,size_t srcLength,size_t destLength)701 static size_t ToLowerCaseImpl(CharT* destChars, const CharT* srcChars,
702 size_t startIndex, size_t srcLength,
703 size_t destLength) {
704 MOZ_ASSERT(startIndex < srcLength);
705 MOZ_ASSERT(srcLength <= destLength);
706 if constexpr (std::is_same_v<CharT, Latin1Char>) {
707 MOZ_ASSERT(srcLength == destLength);
708 }
709
710 size_t j = startIndex;
711 for (size_t i = startIndex; i < srcLength; i++) {
712 CharT c = srcChars[i];
713 if constexpr (!std::is_same_v<CharT, Latin1Char>) {
714 if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
715 char16_t trail = srcChars[i + 1];
716 if (unicode::IsTrailSurrogate(trail)) {
717 trail = unicode::ToLowerCaseNonBMPTrail(c, trail);
718 destChars[j++] = c;
719 destChars[j++] = trail;
720 i++;
721 continue;
722 }
723 }
724
725 // Special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
726 // lowercases to <U+0069 U+0307>.
727 if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
728 // Return if the output buffer is too small.
729 if (srcLength == destLength) {
730 return i;
731 }
732
733 destChars[j++] = CharT('i');
734 destChars[j++] = CharT(unicode::COMBINING_DOT_ABOVE);
735 continue;
736 }
737
738 // Special case: U+03A3 GREEK CAPITAL LETTER SIGMA lowercases to
739 // one of two codepoints depending on context.
740 if (c == unicode::GREEK_CAPITAL_LETTER_SIGMA) {
741 destChars[j++] = Final_Sigma(srcChars, srcLength, i);
742 continue;
743 }
744 }
745
746 c = unicode::ToLowerCase(c);
747 destChars[j++] = c;
748 }
749
750 MOZ_ASSERT(j == destLength);
751 return srcLength;
752 }
753
ToLowerCaseLength(const char16_t * chars,size_t startIndex,size_t length)754 static size_t ToLowerCaseLength(const char16_t* chars, size_t startIndex,
755 size_t length) {
756 size_t lowerLength = length;
757 for (size_t i = startIndex; i < length; i++) {
758 char16_t c = chars[i];
759
760 // U+0130 is lowercased to the two-element sequence <U+0069 U+0307>.
761 if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
762 lowerLength += 1;
763 }
764 }
765 return lowerLength;
766 }
767
768 template <typename CharT>
ToLowerCase(JSContext * cx,JSLinearString * str)769 static JSString* ToLowerCase(JSContext* cx, JSLinearString* str) {
770 // Unlike toUpperCase, toLowerCase has the nice invariant that if the
771 // input is a Latin-1 string, the output is also a Latin-1 string.
772
773 InlineCharBuffer<CharT> newChars;
774
775 const size_t length = str->length();
776 size_t resultLength;
777 {
778 AutoCheckCannotGC nogc;
779 const CharT* chars = str->chars<CharT>(nogc);
780
781 // We don't need extra special casing checks in the loop below,
782 // because U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+03A3
783 // GREEK CAPITAL LETTER SIGMA already have simple lower case mappings.
784 MOZ_ASSERT(unicode::ChangesWhenLowerCased(
785 unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE),
786 "U+0130 has a simple lower case mapping");
787 MOZ_ASSERT(
788 unicode::ChangesWhenLowerCased(unicode::GREEK_CAPITAL_LETTER_SIGMA),
789 "U+03A3 has a simple lower case mapping");
790
791 // One element Latin-1 strings can be directly retrieved from the
792 // static strings cache.
793 if constexpr (std::is_same_v<CharT, Latin1Char>) {
794 if (length == 1) {
795 CharT lower = unicode::ToLowerCase(chars[0]);
796 MOZ_ASSERT(StaticStrings::hasUnit(lower));
797
798 return cx->staticStrings().getUnit(lower);
799 }
800 }
801
802 // Look for the first character that changes when lowercased.
803 size_t i = 0;
804 for (; i < length; i++) {
805 CharT c = chars[i];
806 if constexpr (!std::is_same_v<CharT, Latin1Char>) {
807 if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
808 CharT trail = chars[i + 1];
809 if (unicode::IsTrailSurrogate(trail)) {
810 if (unicode::ChangesWhenLowerCasedNonBMP(c, trail)) {
811 break;
812 }
813
814 i++;
815 continue;
816 }
817 }
818 }
819 if (unicode::ChangesWhenLowerCased(c)) {
820 break;
821 }
822 }
823
824 // If no character needs to change, return the input string.
825 if (i == length) {
826 return str;
827 }
828
829 resultLength = length;
830 if (!newChars.maybeAlloc(cx, resultLength)) {
831 return nullptr;
832 }
833
834 PodCopy(newChars.get(), chars, i);
835
836 size_t readChars =
837 ToLowerCaseImpl(newChars.get(), chars, i, length, resultLength);
838 if constexpr (!std::is_same_v<CharT, Latin1Char>) {
839 if (readChars < length) {
840 resultLength = ToLowerCaseLength(chars, readChars, length);
841
842 if (!newChars.maybeRealloc(cx, length, resultLength)) {
843 return nullptr;
844 }
845
846 MOZ_ALWAYS_TRUE(length == ToLowerCaseImpl(newChars.get(), chars,
847 readChars, length,
848 resultLength));
849 }
850 } else {
851 MOZ_ASSERT(readChars == length,
852 "Latin-1 strings don't have special lower case mappings");
853 }
854 }
855
856 return newChars.toStringDontDeflate(cx, resultLength);
857 }
858
StringToLowerCase(JSContext * cx,HandleString string)859 JSString* js::StringToLowerCase(JSContext* cx, HandleString string) {
860 JSLinearString* linear = string->ensureLinear(cx);
861 if (!linear) {
862 return nullptr;
863 }
864
865 if (linear->hasLatin1Chars()) {
866 return ToLowerCase<Latin1Char>(cx, linear);
867 }
868 return ToLowerCase<char16_t>(cx, linear);
869 }
870
str_toLowerCase(JSContext * cx,unsigned argc,Value * vp)871 static bool str_toLowerCase(JSContext* cx, unsigned argc, Value* vp) {
872 CallArgs args = CallArgsFromVp(argc, vp);
873
874 RootedString str(cx,
875 ToStringForStringFunction(cx, "toLowerCase", args.thisv()));
876 if (!str) {
877 return false;
878 }
879
880 JSString* result = StringToLowerCase(cx, str);
881 if (!result) {
882 return false;
883 }
884
885 args.rval().setString(result);
886 return true;
887 }
888
889 #if JS_HAS_INTL_API
890 // String.prototype.toLocaleLowerCase is self-hosted when Intl is exposed,
891 // with core functionality performed by the intrinsic below.
892
CaseMappingLocale(JSContext * cx,JSString * str)893 static const char* CaseMappingLocale(JSContext* cx, JSString* str) {
894 JSLinearString* locale = str->ensureLinear(cx);
895 if (!locale) {
896 return nullptr;
897 }
898
899 MOZ_ASSERT(locale->length() >= 2, "locale is a valid language tag");
900
901 // Lithuanian, Turkish, and Azeri have language dependent case mappings.
902 static const char languagesWithSpecialCasing[][3] = {"lt", "tr", "az"};
903
904 // All strings in |languagesWithSpecialCasing| are of length two, so we
905 // only need to compare the first two characters to find a matching locale.
906 // ES2017 Intl, §9.2.2 BestAvailableLocale
907 if (locale->length() == 2 || locale->latin1OrTwoByteChar(2) == '-') {
908 for (const auto& language : languagesWithSpecialCasing) {
909 if (locale->latin1OrTwoByteChar(0) == language[0] &&
910 locale->latin1OrTwoByteChar(1) == language[1]) {
911 return language;
912 }
913 }
914 }
915
916 return ""; // ICU root locale
917 }
918
intl_toLocaleLowerCase(JSContext * cx,unsigned argc,Value * vp)919 bool js::intl_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
920 CallArgs args = CallArgsFromVp(argc, vp);
921 MOZ_ASSERT(args.length() == 2);
922 MOZ_ASSERT(args[0].isString());
923 MOZ_ASSERT(args[1].isString());
924
925 RootedString string(cx, args[0].toString());
926
927 const char* locale = CaseMappingLocale(cx, args[1].toString());
928 if (!locale) {
929 return false;
930 }
931
932 // Call String.prototype.toLowerCase() for language independent casing.
933 if (intl::StringsAreEqual(locale, "")) {
934 JSString* str = StringToLowerCase(cx, string);
935 if (!str) {
936 return false;
937 }
938
939 args.rval().setString(str);
940 return true;
941 }
942
943 AutoStableStringChars inputChars(cx);
944 if (!inputChars.initTwoByte(cx, string)) {
945 return false;
946 }
947 mozilla::Range<const char16_t> input = inputChars.twoByteRange();
948
949 // Note: maximum case mapping length is three characters, so the result
950 // length might be > INT32_MAX. ICU will fail in this case.
951 static_assert(JSString::MAX_LENGTH <= INT32_MAX,
952 "String length must fit in int32_t for ICU");
953
954 static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
955
956 intl::FormatBuffer<char16_t, INLINE_CAPACITY> buffer(cx);
957
958 auto ok = mozilla::intl::String::ToLocaleLowerCase(locale, input, buffer);
959 if (ok.isErr()) {
960 intl::ReportInternalError(cx, ok.unwrapErr());
961 return false;
962 }
963
964 JSString* result = buffer.toString(cx);
965 if (!result) {
966 return false;
967 }
968
969 args.rval().setString(result);
970 return true;
971 }
972
973 #else
974
975 // When the Intl API is not exposed, String.prototype.toLowerCase is implemented
976 // in C++.
str_toLocaleLowerCase(JSContext * cx,unsigned argc,Value * vp)977 static bool str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
978 CallArgs args = CallArgsFromVp(argc, vp);
979
980 RootedString str(
981 cx, ToStringForStringFunction(cx, "toLocaleLowerCase", args.thisv()));
982 if (!str) {
983 return false;
984 }
985
986 /*
987 * Forcefully ignore the first (or any) argument and return toLowerCase(),
988 * ECMA has reserved that argument, presumably for defining the locale.
989 */
990 if (cx->runtime()->localeCallbacks &&
991 cx->runtime()->localeCallbacks->localeToLowerCase) {
992 RootedValue result(cx);
993 if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result)) {
994 return false;
995 }
996
997 args.rval().set(result);
998 return true;
999 }
1000
1001 RootedLinearString linear(cx, str->ensureLinear(cx));
1002 if (!linear) {
1003 return false;
1004 }
1005
1006 JSString* result = StringToLowerCase(cx, linear);
1007 if (!result) {
1008 return false;
1009 }
1010
1011 args.rval().setString(result);
1012 return true;
1013 }
1014
1015 #endif // JS_HAS_INTL_API
1016
ToUpperCaseHasSpecialCasing(Latin1Char charCode)1017 static inline bool ToUpperCaseHasSpecialCasing(Latin1Char charCode) {
1018 // U+00DF LATIN SMALL LETTER SHARP S is the only Latin-1 code point with
1019 // special casing rules, so detect it inline.
1020 bool hasUpperCaseSpecialCasing =
1021 charCode == unicode::LATIN_SMALL_LETTER_SHARP_S;
1022 MOZ_ASSERT(hasUpperCaseSpecialCasing ==
1023 unicode::ChangesWhenUpperCasedSpecialCasing(charCode));
1024
1025 return hasUpperCaseSpecialCasing;
1026 }
1027
ToUpperCaseHasSpecialCasing(char16_t charCode)1028 static inline bool ToUpperCaseHasSpecialCasing(char16_t charCode) {
1029 return unicode::ChangesWhenUpperCasedSpecialCasing(charCode);
1030 }
1031
ToUpperCaseLengthSpecialCasing(Latin1Char charCode)1032 static inline size_t ToUpperCaseLengthSpecialCasing(Latin1Char charCode) {
1033 // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
1034 MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
1035
1036 return 2;
1037 }
1038
ToUpperCaseLengthSpecialCasing(char16_t charCode)1039 static inline size_t ToUpperCaseLengthSpecialCasing(char16_t charCode) {
1040 MOZ_ASSERT(ToUpperCaseHasSpecialCasing(charCode));
1041
1042 return unicode::LengthUpperCaseSpecialCasing(charCode);
1043 }
1044
ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,Latin1Char * elements,size_t * index)1045 static inline void ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,
1046 Latin1Char* elements,
1047 size_t* index) {
1048 // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
1049 MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
1050 static_assert('S' <= JSString::MAX_LATIN1_CHAR, "'S' is a Latin-1 character");
1051
1052 elements[(*index)++] = 'S';
1053 elements[(*index)++] = 'S';
1054 }
1055
ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,char16_t * elements,size_t * index)1056 static inline void ToUpperCaseAppendUpperCaseSpecialCasing(char16_t charCode,
1057 char16_t* elements,
1058 size_t* index) {
1059 unicode::AppendUpperCaseSpecialCasing(charCode, elements, index);
1060 }
1061
1062 // See ToLowerCaseImpl for an explanation of the parameters.
1063 template <typename DestChar, typename SrcChar>
ToUpperCaseImpl(DestChar * destChars,const SrcChar * srcChars,size_t startIndex,size_t srcLength,size_t destLength)1064 static size_t ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars,
1065 size_t startIndex, size_t srcLength,
1066 size_t destLength) {
1067 static_assert(std::is_same_v<SrcChar, Latin1Char> ||
1068 !std::is_same_v<DestChar, Latin1Char>,
1069 "cannot write non-Latin-1 characters into Latin-1 string");
1070 MOZ_ASSERT(startIndex < srcLength);
1071 MOZ_ASSERT(srcLength <= destLength);
1072
1073 size_t j = startIndex;
1074 for (size_t i = startIndex; i < srcLength; i++) {
1075 char16_t c = srcChars[i];
1076 if constexpr (!std::is_same_v<DestChar, Latin1Char>) {
1077 if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
1078 char16_t trail = srcChars[i + 1];
1079 if (unicode::IsTrailSurrogate(trail)) {
1080 trail = unicode::ToUpperCaseNonBMPTrail(c, trail);
1081 destChars[j++] = c;
1082 destChars[j++] = trail;
1083 i++;
1084 continue;
1085 }
1086 }
1087 }
1088
1089 if (MOZ_UNLIKELY(c > 0x7f &&
1090 ToUpperCaseHasSpecialCasing(static_cast<SrcChar>(c)))) {
1091 // Return if the output buffer is too small.
1092 if (srcLength == destLength) {
1093 return i;
1094 }
1095
1096 ToUpperCaseAppendUpperCaseSpecialCasing(c, destChars, &j);
1097 continue;
1098 }
1099
1100 c = unicode::ToUpperCase(c);
1101 if constexpr (std::is_same_v<DestChar, Latin1Char>) {
1102 MOZ_ASSERT(c <= JSString::MAX_LATIN1_CHAR);
1103 }
1104 destChars[j++] = c;
1105 }
1106
1107 MOZ_ASSERT(j == destLength);
1108 return srcLength;
1109 }
1110
1111 template <typename CharT>
ToUpperCaseLength(const CharT * chars,size_t startIndex,size_t length)1112 static size_t ToUpperCaseLength(const CharT* chars, size_t startIndex,
1113 size_t length) {
1114 size_t upperLength = length;
1115 for (size_t i = startIndex; i < length; i++) {
1116 char16_t c = chars[i];
1117
1118 if (c > 0x7f && ToUpperCaseHasSpecialCasing(static_cast<CharT>(c))) {
1119 upperLength += ToUpperCaseLengthSpecialCasing(static_cast<CharT>(c)) - 1;
1120 }
1121 }
1122 return upperLength;
1123 }
1124
1125 template <typename DestChar, typename SrcChar>
CopyChars(DestChar * destChars,const SrcChar * srcChars,size_t length)1126 static inline void CopyChars(DestChar* destChars, const SrcChar* srcChars,
1127 size_t length) {
1128 static_assert(!std::is_same_v<DestChar, SrcChar>,
1129 "PodCopy is used for the same type case");
1130 for (size_t i = 0; i < length; i++) {
1131 destChars[i] = srcChars[i];
1132 }
1133 }
1134
1135 template <typename CharT>
CopyChars(CharT * destChars,const CharT * srcChars,size_t length)1136 static inline void CopyChars(CharT* destChars, const CharT* srcChars,
1137 size_t length) {
1138 PodCopy(destChars, srcChars, length);
1139 }
1140
1141 template <typename DestChar, typename SrcChar>
ToUpperCase(JSContext * cx,InlineCharBuffer<DestChar> & newChars,const SrcChar * chars,size_t startIndex,size_t length,size_t * resultLength)1142 static inline bool ToUpperCase(JSContext* cx,
1143 InlineCharBuffer<DestChar>& newChars,
1144 const SrcChar* chars, size_t startIndex,
1145 size_t length, size_t* resultLength) {
1146 MOZ_ASSERT(startIndex < length);
1147
1148 *resultLength = length;
1149 if (!newChars.maybeAlloc(cx, length)) {
1150 return false;
1151 }
1152
1153 CopyChars(newChars.get(), chars, startIndex);
1154
1155 size_t readChars =
1156 ToUpperCaseImpl(newChars.get(), chars, startIndex, length, length);
1157 if (readChars < length) {
1158 size_t actualLength = ToUpperCaseLength(chars, readChars, length);
1159
1160 *resultLength = actualLength;
1161 if (!newChars.maybeRealloc(cx, length, actualLength)) {
1162 return false;
1163 }
1164
1165 MOZ_ALWAYS_TRUE(length == ToUpperCaseImpl(newChars.get(), chars, readChars,
1166 length, actualLength));
1167 }
1168
1169 return true;
1170 }
1171
1172 template <typename CharT>
ToUpperCase(JSContext * cx,JSLinearString * str)1173 static JSString* ToUpperCase(JSContext* cx, JSLinearString* str) {
1174 using Latin1Buffer = InlineCharBuffer<Latin1Char>;
1175 using TwoByteBuffer = InlineCharBuffer<char16_t>;
1176
1177 mozilla::MaybeOneOf<Latin1Buffer, TwoByteBuffer> newChars;
1178 const size_t length = str->length();
1179 size_t resultLength;
1180 {
1181 AutoCheckCannotGC nogc;
1182 const CharT* chars = str->chars<CharT>(nogc);
1183
1184 // Most one element Latin-1 strings can be directly retrieved from the
1185 // static strings cache.
1186 if constexpr (std::is_same_v<CharT, Latin1Char>) {
1187 if (length == 1) {
1188 Latin1Char c = chars[0];
1189 if (c != unicode::MICRO_SIGN &&
1190 c != unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS &&
1191 c != unicode::LATIN_SMALL_LETTER_SHARP_S) {
1192 char16_t upper = unicode::ToUpperCase(c);
1193 MOZ_ASSERT(upper <= JSString::MAX_LATIN1_CHAR);
1194 MOZ_ASSERT(StaticStrings::hasUnit(upper));
1195
1196 return cx->staticStrings().getUnit(upper);
1197 }
1198
1199 MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR ||
1200 ToUpperCaseHasSpecialCasing(c));
1201 }
1202 }
1203
1204 // Look for the first character that changes when uppercased.
1205 size_t i = 0;
1206 for (; i < length; i++) {
1207 CharT c = chars[i];
1208 if constexpr (!std::is_same_v<CharT, Latin1Char>) {
1209 if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
1210 CharT trail = chars[i + 1];
1211 if (unicode::IsTrailSurrogate(trail)) {
1212 if (unicode::ChangesWhenUpperCasedNonBMP(c, trail)) {
1213 break;
1214 }
1215
1216 i++;
1217 continue;
1218 }
1219 }
1220 }
1221 if (unicode::ChangesWhenUpperCased(c)) {
1222 break;
1223 }
1224 if (MOZ_UNLIKELY(c > 0x7f && ToUpperCaseHasSpecialCasing(c))) {
1225 break;
1226 }
1227 }
1228
1229 // If no character needs to change, return the input string.
1230 if (i == length) {
1231 return str;
1232 }
1233
1234 // The string changes when uppercased, so we must create a new string.
1235 // Can it be Latin-1?
1236 //
1237 // If the original string is Latin-1, it can -- unless the string
1238 // contains U+00B5 MICRO SIGN or U+00FF SMALL LETTER Y WITH DIAERESIS,
1239 // the only Latin-1 codepoints that don't uppercase within Latin-1.
1240 // Search for those codepoints to decide whether the new string can be
1241 // Latin-1.
1242 // If the original string is a two-byte string, its uppercase form is
1243 // so rarely Latin-1 that we don't even consider creating a new
1244 // Latin-1 string.
1245 if constexpr (std::is_same_v<CharT, Latin1Char>) {
1246 bool resultIsLatin1 = true;
1247 for (size_t j = i; j < length; j++) {
1248 Latin1Char c = chars[j];
1249 if (c == unicode::MICRO_SIGN ||
1250 c == unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
1251 MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR);
1252 resultIsLatin1 = false;
1253 break;
1254 } else {
1255 MOZ_ASSERT(unicode::ToUpperCase(c) <= JSString::MAX_LATIN1_CHAR);
1256 }
1257 }
1258
1259 if (resultIsLatin1) {
1260 newChars.construct<Latin1Buffer>();
1261
1262 if (!ToUpperCase(cx, newChars.ref<Latin1Buffer>(), chars, i, length,
1263 &resultLength)) {
1264 return nullptr;
1265 }
1266 } else {
1267 newChars.construct<TwoByteBuffer>();
1268
1269 if (!ToUpperCase(cx, newChars.ref<TwoByteBuffer>(), chars, i, length,
1270 &resultLength)) {
1271 return nullptr;
1272 }
1273 }
1274 } else {
1275 newChars.construct<TwoByteBuffer>();
1276
1277 if (!ToUpperCase(cx, newChars.ref<TwoByteBuffer>(), chars, i, length,
1278 &resultLength)) {
1279 return nullptr;
1280 }
1281 }
1282 }
1283
1284 return newChars.constructed<Latin1Buffer>()
1285 ? newChars.ref<Latin1Buffer>().toStringDontDeflate(cx,
1286 resultLength)
1287 : newChars.ref<TwoByteBuffer>().toStringDontDeflate(cx,
1288 resultLength);
1289 }
1290
StringToUpperCase(JSContext * cx,HandleString string)1291 JSString* js::StringToUpperCase(JSContext* cx, HandleString string) {
1292 JSLinearString* linear = string->ensureLinear(cx);
1293 if (!linear) {
1294 return nullptr;
1295 }
1296
1297 if (linear->hasLatin1Chars()) {
1298 return ToUpperCase<Latin1Char>(cx, linear);
1299 }
1300 return ToUpperCase<char16_t>(cx, linear);
1301 }
1302
str_toUpperCase(JSContext * cx,unsigned argc,Value * vp)1303 static bool str_toUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1304 CallArgs args = CallArgsFromVp(argc, vp);
1305
1306 RootedString str(cx,
1307 ToStringForStringFunction(cx, "toUpperCase", args.thisv()));
1308 if (!str) {
1309 return false;
1310 }
1311
1312 JSString* result = StringToUpperCase(cx, str);
1313 if (!result) {
1314 return false;
1315 }
1316
1317 args.rval().setString(result);
1318 return true;
1319 }
1320
1321 #if JS_HAS_INTL_API
1322 // String.prototype.toLocaleUpperCase is self-hosted when Intl is exposed,
1323 // with core functionality performed by the intrinsic below.
1324
intl_toLocaleUpperCase(JSContext * cx,unsigned argc,Value * vp)1325 bool js::intl_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1326 CallArgs args = CallArgsFromVp(argc, vp);
1327 MOZ_ASSERT(args.length() == 2);
1328 MOZ_ASSERT(args[0].isString());
1329 MOZ_ASSERT(args[1].isString());
1330
1331 RootedString string(cx, args[0].toString());
1332
1333 const char* locale = CaseMappingLocale(cx, args[1].toString());
1334 if (!locale) {
1335 return false;
1336 }
1337
1338 // Call String.prototype.toUpperCase() for language independent casing.
1339 if (intl::StringsAreEqual(locale, "")) {
1340 JSString* str = js::StringToUpperCase(cx, string);
1341 if (!str) {
1342 return false;
1343 }
1344
1345 args.rval().setString(str);
1346 return true;
1347 }
1348
1349 AutoStableStringChars inputChars(cx);
1350 if (!inputChars.initTwoByte(cx, string)) {
1351 return false;
1352 }
1353 mozilla::Range<const char16_t> input = inputChars.twoByteRange();
1354
1355 // Note: maximum case mapping length is three characters, so the result
1356 // length might be > INT32_MAX. ICU will fail in this case.
1357 static_assert(JSString::MAX_LENGTH <= INT32_MAX,
1358 "String length must fit in int32_t for ICU");
1359
1360 static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
1361
1362 intl::FormatBuffer<char16_t, INLINE_CAPACITY> buffer(cx);
1363
1364 auto ok = mozilla::intl::String::ToLocaleUpperCase(locale, input, buffer);
1365 if (ok.isErr()) {
1366 intl::ReportInternalError(cx, ok.unwrapErr());
1367 return false;
1368 }
1369
1370 JSString* result = buffer.toString(cx);
1371 if (!result) {
1372 return false;
1373 }
1374
1375 args.rval().setString(result);
1376 return true;
1377 }
1378
1379 #else
1380
1381 // When the Intl API is not exposed, String.prototype.toUpperCase is implemented
1382 // in C++.
str_toLocaleUpperCase(JSContext * cx,unsigned argc,Value * vp)1383 static bool str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1384 CallArgs args = CallArgsFromVp(argc, vp);
1385
1386 RootedString str(
1387 cx, ToStringForStringFunction(cx, "toLocaleUpperCase", args.thisv()));
1388 if (!str) {
1389 return false;
1390 }
1391
1392 /*
1393 * Forcefully ignore the first (or any) argument and return toUpperCase(),
1394 * ECMA has reserved that argument, presumably for defining the locale.
1395 */
1396 if (cx->runtime()->localeCallbacks &&
1397 cx->runtime()->localeCallbacks->localeToUpperCase) {
1398 RootedValue result(cx);
1399 if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result)) {
1400 return false;
1401 }
1402
1403 args.rval().set(result);
1404 return true;
1405 }
1406
1407 RootedLinearString linear(cx, str->ensureLinear(cx));
1408 if (!linear) {
1409 return false;
1410 }
1411
1412 JSString* result = StringToUpperCase(cx, linear);
1413 if (!result) {
1414 return false;
1415 }
1416
1417 args.rval().setString(result);
1418 return true;
1419 }
1420
1421 #endif // JS_HAS_INTL_API
1422
1423 #if JS_HAS_INTL_API
1424
1425 // String.prototype.localeCompare is self-hosted when Intl functionality is
1426 // exposed, and the only intrinsics it requires are provided in the
1427 // implementation of Intl.Collator.
1428
1429 #else
1430
1431 // String.prototype.localeCompare is implemented in C++ (delegating to
1432 // JSLocaleCallbacks) when Intl functionality is not exposed.
str_localeCompare(JSContext * cx,unsigned argc,Value * vp)1433 static bool str_localeCompare(JSContext* cx, unsigned argc, Value* vp) {
1434 CallArgs args = CallArgsFromVp(argc, vp);
1435 RootedString str(
1436 cx, ToStringForStringFunction(cx, "localeCompare", args.thisv()));
1437 if (!str) {
1438 return false;
1439 }
1440
1441 RootedString thatStr(cx, ToString<CanGC>(cx, args.get(0)));
1442 if (!thatStr) {
1443 return false;
1444 }
1445
1446 if (cx->runtime()->localeCallbacks &&
1447 cx->runtime()->localeCallbacks->localeCompare) {
1448 RootedValue result(cx);
1449 if (!cx->runtime()->localeCallbacks->localeCompare(cx, str, thatStr,
1450 &result)) {
1451 return false;
1452 }
1453
1454 args.rval().set(result);
1455 return true;
1456 }
1457
1458 int32_t result;
1459 if (!CompareStrings(cx, str, thatStr, &result)) {
1460 return false;
1461 }
1462
1463 args.rval().setInt32(result);
1464 return true;
1465 }
1466
1467 #endif // JS_HAS_INTL_API
1468
1469 #if JS_HAS_INTL_API
1470
1471 // ES2017 draft rev 45e890512fd77add72cc0ee742785f9f6f6482de
1472 // 21.1.3.12 String.prototype.normalize ( [ form ] )
1473 //
1474 // String.prototype.normalize is only implementable if ICU's normalization
1475 // functionality is available.
str_normalize(JSContext * cx,unsigned argc,Value * vp)1476 static bool str_normalize(JSContext* cx, unsigned argc, Value* vp) {
1477 CallArgs args = CallArgsFromVp(argc, vp);
1478
1479 // Steps 1-2.
1480 RootedString str(cx,
1481 ToStringForStringFunction(cx, "normalize", args.thisv()));
1482 if (!str) {
1483 return false;
1484 }
1485
1486 using NormalizationForm = mozilla::intl::String::NormalizationForm;
1487
1488 NormalizationForm form;
1489 if (!args.hasDefined(0)) {
1490 // Step 3.
1491 form = NormalizationForm::NFC;
1492 } else {
1493 // Step 4.
1494 JSLinearString* formStr = ArgToLinearString(cx, args, 0);
1495 if (!formStr) {
1496 return false;
1497 }
1498
1499 // Step 5.
1500 if (EqualStrings(formStr, cx->names().NFC)) {
1501 form = NormalizationForm::NFC;
1502 } else if (EqualStrings(formStr, cx->names().NFD)) {
1503 form = NormalizationForm::NFD;
1504 } else if (EqualStrings(formStr, cx->names().NFKC)) {
1505 form = NormalizationForm::NFKC;
1506 } else if (EqualStrings(formStr, cx->names().NFKD)) {
1507 form = NormalizationForm::NFKD;
1508 } else {
1509 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1510 JSMSG_INVALID_NORMALIZE_FORM);
1511 return false;
1512 }
1513 }
1514
1515 // Latin-1 strings are already in Normalization Form C.
1516 if (form == NormalizationForm::NFC && str->hasLatin1Chars()) {
1517 // Step 7.
1518 args.rval().setString(str);
1519 return true;
1520 }
1521
1522 // Step 6.
1523 AutoStableStringChars stableChars(cx);
1524 if (!stableChars.initTwoByte(cx, str)) {
1525 return false;
1526 }
1527
1528 mozilla::Range<const char16_t> srcChars = stableChars.twoByteRange();
1529
1530 static const size_t INLINE_CAPACITY = js::intl::INITIAL_CHAR_BUFFER_SIZE;
1531
1532 intl::FormatBuffer<char16_t, INLINE_CAPACITY> buffer(cx);
1533
1534 auto alreadyNormalized =
1535 mozilla::intl::String::Normalize(form, srcChars, buffer);
1536 if (alreadyNormalized.isErr()) {
1537 intl::ReportInternalError(cx, alreadyNormalized.unwrapErr());
1538 return false;
1539 }
1540
1541 using AlreadyNormalized = mozilla::intl::String::AlreadyNormalized;
1542
1543 // Return if the input string is already normalized.
1544 if (alreadyNormalized.unwrap() == AlreadyNormalized::Yes) {
1545 // Step 7.
1546 args.rval().setString(str);
1547 return true;
1548 }
1549
1550 JSString* ns = buffer.toString(cx);
1551 if (!ns) {
1552 return false;
1553 }
1554
1555 // Step 7.
1556 args.rval().setString(ns);
1557 return true;
1558 }
1559
1560 #endif // JS_HAS_INTL_API
1561
str_charAt(JSContext * cx,unsigned argc,Value * vp)1562 static bool str_charAt(JSContext* cx, unsigned argc, Value* vp) {
1563 CallArgs args = CallArgsFromVp(argc, vp);
1564
1565 RootedString str(cx);
1566 size_t i;
1567 if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
1568 str = args.thisv().toString();
1569 i = size_t(args[0].toInt32());
1570 if (i >= str->length()) {
1571 goto out_of_range;
1572 }
1573 } else {
1574 str = ToStringForStringFunction(cx, "charAt", args.thisv());
1575 if (!str) {
1576 return false;
1577 }
1578
1579 double d = 0.0;
1580 if (args.length() > 0 && !ToInteger(cx, args[0], &d)) {
1581 return false;
1582 }
1583
1584 if (d < 0 || str->length() <= d) {
1585 goto out_of_range;
1586 }
1587 i = size_t(d);
1588 }
1589
1590 str = cx->staticStrings().getUnitStringForElement(cx, str, i);
1591 if (!str) {
1592 return false;
1593 }
1594 args.rval().setString(str);
1595 return true;
1596
1597 out_of_range:
1598 args.rval().setString(cx->runtime()->emptyString);
1599 return true;
1600 }
1601
str_charCodeAt_impl(JSContext * cx,HandleString string,HandleValue index,MutableHandleValue res)1602 bool js::str_charCodeAt_impl(JSContext* cx, HandleString string,
1603 HandleValue index, MutableHandleValue res) {
1604 size_t i;
1605 if (index.isInt32()) {
1606 i = index.toInt32();
1607 if (i >= string->length()) {
1608 goto out_of_range;
1609 }
1610 } else {
1611 double d = 0.0;
1612 if (!ToInteger(cx, index, &d)) {
1613 return false;
1614 }
1615 // check whether d is negative as size_t is unsigned
1616 if (d < 0 || string->length() <= d) {
1617 goto out_of_range;
1618 }
1619 i = size_t(d);
1620 }
1621 char16_t c;
1622 if (!string->getChar(cx, i, &c)) {
1623 return false;
1624 }
1625 res.setInt32(c);
1626 return true;
1627
1628 out_of_range:
1629 res.setNaN();
1630 return true;
1631 }
1632
str_charCodeAt(JSContext * cx,unsigned argc,Value * vp)1633 bool js::str_charCodeAt(JSContext* cx, unsigned argc, Value* vp) {
1634 CallArgs args = CallArgsFromVp(argc, vp);
1635 RootedString str(cx);
1636 RootedValue index(cx);
1637 if (args.thisv().isString()) {
1638 str = args.thisv().toString();
1639 } else {
1640 str = ToStringForStringFunction(cx, "charCodeAt", args.thisv());
1641 if (!str) {
1642 return false;
1643 }
1644 }
1645 if (args.length() != 0) {
1646 index = args[0];
1647 } else {
1648 index.setInt32(0);
1649 }
1650
1651 return js::str_charCodeAt_impl(cx, str, index, args.rval());
1652 }
1653
1654 /*
1655 * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
1656 * The patlen argument must be positive and no greater than sBMHPatLenMax.
1657 *
1658 * Return the index of pat in text, or -1 if not found.
1659 */
1660 static const uint32_t sBMHCharSetSize = 256; /* ISO-Latin-1 */
1661 static const uint32_t sBMHPatLenMax = 255; /* skip table element is uint8_t */
1662 static const int sBMHBadPattern =
1663 -2; /* return value if pat is not ISO-Latin-1 */
1664
1665 template <typename TextChar, typename PatChar>
BoyerMooreHorspool(const TextChar * text,uint32_t textLen,const PatChar * pat,uint32_t patLen)1666 static int BoyerMooreHorspool(const TextChar* text, uint32_t textLen,
1667 const PatChar* pat, uint32_t patLen) {
1668 MOZ_ASSERT(0 < patLen && patLen <= sBMHPatLenMax);
1669
1670 uint8_t skip[sBMHCharSetSize];
1671 for (uint32_t i = 0; i < sBMHCharSetSize; i++) {
1672 skip[i] = uint8_t(patLen);
1673 }
1674
1675 uint32_t patLast = patLen - 1;
1676 for (uint32_t i = 0; i < patLast; i++) {
1677 char16_t c = pat[i];
1678 if (c >= sBMHCharSetSize) {
1679 return sBMHBadPattern;
1680 }
1681 skip[c] = uint8_t(patLast - i);
1682 }
1683
1684 for (uint32_t k = patLast; k < textLen;) {
1685 for (uint32_t i = k, j = patLast;; i--, j--) {
1686 if (text[i] != pat[j]) {
1687 break;
1688 }
1689 if (j == 0) {
1690 return static_cast<int>(i); /* safe: max string size */
1691 }
1692 }
1693
1694 char16_t c = text[k];
1695 k += (c >= sBMHCharSetSize) ? patLen : skip[c];
1696 }
1697 return -1;
1698 }
1699
1700 template <typename TextChar, typename PatChar>
1701 struct MemCmp {
1702 using Extent = uint32_t;
computeExtentMemCmp1703 static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar*,
1704 uint32_t patLen) {
1705 return (patLen - 1) * sizeof(PatChar);
1706 }
matchMemCmp1707 static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t,
1708 Extent extent) {
1709 MOZ_ASSERT(sizeof(TextChar) == sizeof(PatChar));
1710 return memcmp(p, t, extent) == 0;
1711 }
1712 };
1713
1714 template <typename TextChar, typename PatChar>
1715 struct ManualCmp {
1716 using Extent = const PatChar*;
computeExtentManualCmp1717 static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar* pat,
1718 uint32_t patLen) {
1719 return pat + patLen;
1720 }
matchManualCmp1721 static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t,
1722 Extent extent) {
1723 for (; p != extent; ++p, ++t) {
1724 if (*p != *t) {
1725 return false;
1726 }
1727 }
1728 return true;
1729 }
1730 };
1731
1732 template <typename TextChar, typename PatChar>
FirstCharMatcherUnrolled(const TextChar * text,uint32_t n,const PatChar pat)1733 static const TextChar* FirstCharMatcherUnrolled(const TextChar* text,
1734 uint32_t n, const PatChar pat) {
1735 const TextChar* textend = text + n;
1736 const TextChar* t = text;
1737
1738 switch ((textend - t) & 7) {
1739 case 0:
1740 if (*t++ == pat) return t - 1;
1741 [[fallthrough]];
1742 case 7:
1743 if (*t++ == pat) return t - 1;
1744 [[fallthrough]];
1745 case 6:
1746 if (*t++ == pat) return t - 1;
1747 [[fallthrough]];
1748 case 5:
1749 if (*t++ == pat) return t - 1;
1750 [[fallthrough]];
1751 case 4:
1752 if (*t++ == pat) return t - 1;
1753 [[fallthrough]];
1754 case 3:
1755 if (*t++ == pat) return t - 1;
1756 [[fallthrough]];
1757 case 2:
1758 if (*t++ == pat) return t - 1;
1759 [[fallthrough]];
1760 case 1:
1761 if (*t++ == pat) return t - 1;
1762 }
1763 while (textend != t) {
1764 if (t[0] == pat) return t;
1765 if (t[1] == pat) return t + 1;
1766 if (t[2] == pat) return t + 2;
1767 if (t[3] == pat) return t + 3;
1768 if (t[4] == pat) return t + 4;
1769 if (t[5] == pat) return t + 5;
1770 if (t[6] == pat) return t + 6;
1771 if (t[7] == pat) return t + 7;
1772 t += 8;
1773 }
1774 return nullptr;
1775 }
1776
FirstCharMatcher8bit(const char * text,uint32_t n,const char pat)1777 static const char* FirstCharMatcher8bit(const char* text, uint32_t n,
1778 const char pat) {
1779 return reinterpret_cast<const char*>(memchr(text, pat, n));
1780 }
1781
1782 template <class InnerMatch, typename TextChar, typename PatChar>
Matcher(const TextChar * text,uint32_t textlen,const PatChar * pat,uint32_t patlen)1783 static int Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat,
1784 uint32_t patlen) {
1785 MOZ_ASSERT(patlen > 0);
1786
1787 if (sizeof(TextChar) == 1 && sizeof(PatChar) > 1 && pat[0] > 0xff) {
1788 return -1;
1789 }
1790
1791 const typename InnerMatch::Extent extent =
1792 InnerMatch::computeExtent(pat, patlen);
1793
1794 uint32_t i = 0;
1795 uint32_t n = textlen - patlen + 1;
1796 while (i < n) {
1797 const TextChar* pos;
1798
1799 if (sizeof(TextChar) == 1) {
1800 MOZ_ASSERT(pat[0] <= 0xff);
1801 pos = (TextChar*)FirstCharMatcher8bit((char*)text + i, n - i, pat[0]);
1802 } else {
1803 pos = FirstCharMatcherUnrolled(text + i, n - i, char16_t(pat[0]));
1804 }
1805
1806 if (pos == nullptr) {
1807 return -1;
1808 }
1809
1810 i = static_cast<uint32_t>(pos - text);
1811 if (InnerMatch::match(pat + 1, text + i + 1, extent)) {
1812 return i;
1813 }
1814
1815 i += 1;
1816 }
1817 return -1;
1818 }
1819
1820 template <typename TextChar, typename PatChar>
StringMatch(const TextChar * text,uint32_t textLen,const PatChar * pat,uint32_t patLen)1821 static MOZ_ALWAYS_INLINE int StringMatch(const TextChar* text, uint32_t textLen,
1822 const PatChar* pat, uint32_t patLen) {
1823 if (patLen == 0) {
1824 return 0;
1825 }
1826 if (textLen < patLen) {
1827 return -1;
1828 }
1829
1830 #if defined(__i386__) || defined(_M_IX86) || defined(__i386)
1831 /*
1832 * Given enough registers, the unrolled loop below is faster than the
1833 * following loop. 32-bit x86 does not have enough registers.
1834 */
1835 if (patLen == 1) {
1836 const PatChar p0 = *pat;
1837 const TextChar* end = text + textLen;
1838 for (const TextChar* c = text; c != end; ++c) {
1839 if (*c == p0) {
1840 return c - text;
1841 }
1842 }
1843 return -1;
1844 }
1845 #endif
1846
1847 /*
1848 * If the text or pattern string is short, BMH will be more expensive than
1849 * the basic linear scan due to initialization cost and a more complex loop
1850 * body. While the correct threshold is input-dependent, we can make a few
1851 * conservative observations:
1852 * - When |textLen| is "big enough", the initialization time will be
1853 * proportionally small, so the worst-case slowdown is minimized.
1854 * - When |patLen| is "too small", even the best case for BMH will be
1855 * slower than a simple scan for large |textLen| due to the more complex
1856 * loop body of BMH.
1857 * From this, the values for "big enough" and "too small" are determined
1858 * empirically. See bug 526348.
1859 */
1860 if (textLen >= 512 && patLen >= 11 && patLen <= sBMHPatLenMax) {
1861 int index = BoyerMooreHorspool(text, textLen, pat, patLen);
1862 if (index != sBMHBadPattern) {
1863 return index;
1864 }
1865 }
1866
1867 /*
1868 * For big patterns with large potential overlap we want the SIMD-optimized
1869 * speed of memcmp. For small patterns, a simple loop is faster. We also can't
1870 * use memcmp if one of the strings is TwoByte and the other is Latin-1.
1871 */
1872 return (patLen > 128 && std::is_same_v<TextChar, PatChar>)
1873 ? Matcher<MemCmp<TextChar, PatChar>, TextChar, PatChar>(
1874 text, textLen, pat, patLen)
1875 : Matcher<ManualCmp<TextChar, PatChar>, TextChar, PatChar>(
1876 text, textLen, pat, patLen);
1877 }
1878
StringMatch(JSLinearString * text,JSLinearString * pat,uint32_t start=0)1879 static int32_t StringMatch(JSLinearString* text, JSLinearString* pat,
1880 uint32_t start = 0) {
1881 MOZ_ASSERT(start <= text->length());
1882 uint32_t textLen = text->length() - start;
1883 uint32_t patLen = pat->length();
1884
1885 int match;
1886 AutoCheckCannotGC nogc;
1887 if (text->hasLatin1Chars()) {
1888 const Latin1Char* textChars = text->latin1Chars(nogc) + start;
1889 if (pat->hasLatin1Chars()) {
1890 match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1891 } else {
1892 match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1893 }
1894 } else {
1895 const char16_t* textChars = text->twoByteChars(nogc) + start;
1896 if (pat->hasLatin1Chars()) {
1897 match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1898 } else {
1899 match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1900 }
1901 }
1902
1903 return (match == -1) ? -1 : start + match;
1904 }
1905
1906 static const size_t sRopeMatchThresholdRatioLog2 = 4;
1907
StringFindPattern(JSLinearString * text,JSLinearString * pat,size_t start)1908 int js::StringFindPattern(JSLinearString* text, JSLinearString* pat,
1909 size_t start) {
1910 return StringMatch(text, pat, start);
1911 }
1912
1913 // When an algorithm does not need a string represented as a single linear
1914 // array of characters, this range utility may be used to traverse the string a
1915 // sequence of linear arrays of characters. This avoids flattening ropes.
1916 class StringSegmentRange {
1917 // If malloc() shows up in any profiles from this vector, we can add a new
1918 // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx.
1919 using StackVector = JS::GCVector<JSString*, 16>;
1920 Rooted<StackVector> stack;
1921 RootedLinearString cur;
1922
settle(JSString * str)1923 bool settle(JSString* str) {
1924 while (str->isRope()) {
1925 JSRope& rope = str->asRope();
1926 if (!stack.append(rope.rightChild())) {
1927 return false;
1928 }
1929 str = rope.leftChild();
1930 }
1931 cur = &str->asLinear();
1932 return true;
1933 }
1934
1935 public:
StringSegmentRange(JSContext * cx)1936 explicit StringSegmentRange(JSContext* cx)
1937 : stack(cx, StackVector(cx)), cur(cx) {}
1938
init(JSString * str)1939 [[nodiscard]] bool init(JSString* str) {
1940 MOZ_ASSERT(stack.empty());
1941 return settle(str);
1942 }
1943
empty() const1944 bool empty() const { return cur == nullptr; }
1945
front() const1946 JSLinearString* front() const {
1947 MOZ_ASSERT(!cur->isRope());
1948 return cur;
1949 }
1950
popFront()1951 [[nodiscard]] bool popFront() {
1952 MOZ_ASSERT(!empty());
1953 if (stack.empty()) {
1954 cur = nullptr;
1955 return true;
1956 }
1957 return settle(stack.popCopy());
1958 }
1959 };
1960
1961 typedef Vector<JSLinearString*, 16, SystemAllocPolicy> LinearStringVector;
1962
1963 template <typename TextChar, typename PatChar>
RopeMatchImpl(const AutoCheckCannotGC & nogc,LinearStringVector & strings,const PatChar * pat,size_t patLen)1964 static int RopeMatchImpl(const AutoCheckCannotGC& nogc,
1965 LinearStringVector& strings, const PatChar* pat,
1966 size_t patLen) {
1967 /* Absolute offset from the beginning of the logical text string. */
1968 int pos = 0;
1969
1970 for (JSLinearString** outerp = strings.begin(); outerp != strings.end();
1971 ++outerp) {
1972 /* Try to find a match within 'outer'. */
1973 JSLinearString* outer = *outerp;
1974 const TextChar* chars = outer->chars<TextChar>(nogc);
1975 size_t len = outer->length();
1976 int matchResult = StringMatch(chars, len, pat, patLen);
1977 if (matchResult != -1) {
1978 /* Matched! */
1979 return pos + matchResult;
1980 }
1981
1982 /* Try to find a match starting in 'outer' and running into other nodes. */
1983 const TextChar* const text = chars + (patLen > len ? 0 : len - patLen + 1);
1984 const TextChar* const textend = chars + len;
1985 const PatChar p0 = *pat;
1986 const PatChar* const p1 = pat + 1;
1987 const PatChar* const patend = pat + patLen;
1988 for (const TextChar* t = text; t != textend;) {
1989 if (*t++ != p0) {
1990 continue;
1991 }
1992
1993 JSLinearString** innerp = outerp;
1994 const TextChar* ttend = textend;
1995 const TextChar* tt = t;
1996 for (const PatChar* pp = p1; pp != patend; ++pp, ++tt) {
1997 while (tt == ttend) {
1998 if (++innerp == strings.end()) {
1999 return -1;
2000 }
2001
2002 JSLinearString* inner = *innerp;
2003 tt = inner->chars<TextChar>(nogc);
2004 ttend = tt + inner->length();
2005 }
2006 if (*pp != *tt) {
2007 goto break_continue;
2008 }
2009 }
2010
2011 /* Matched! */
2012 return pos + (t - chars) - 1; /* -1 because of *t++ above */
2013
2014 break_continue:;
2015 }
2016
2017 pos += len;
2018 }
2019
2020 return -1;
2021 }
2022
2023 /*
2024 * RopeMatch takes the text to search and the pattern to search for in the text.
2025 * RopeMatch returns false on OOM and otherwise returns the match index through
2026 * the 'match' outparam (-1 for not found).
2027 */
RopeMatch(JSContext * cx,JSRope * text,JSLinearString * pat,int * match)2028 static bool RopeMatch(JSContext* cx, JSRope* text, JSLinearString* pat,
2029 int* match) {
2030 uint32_t patLen = pat->length();
2031 if (patLen == 0) {
2032 *match = 0;
2033 return true;
2034 }
2035 if (text->length() < patLen) {
2036 *match = -1;
2037 return true;
2038 }
2039
2040 /*
2041 * List of leaf nodes in the rope. If we run out of memory when trying to
2042 * append to this list, we can still fall back to StringMatch, so use the
2043 * system allocator so we don't report OOM in that case.
2044 */
2045 LinearStringVector strings;
2046
2047 /*
2048 * We don't want to do rope matching if there is a poor node-to-char ratio,
2049 * since this means spending a lot of time in the match loop below. We also
2050 * need to build the list of leaf nodes. Do both here: iterate over the
2051 * nodes so long as there are not too many.
2052 *
2053 * We also don't use rope matching if the rope contains both Latin-1 and
2054 * TwoByte nodes, to simplify the match algorithm.
2055 */
2056 {
2057 size_t threshold = text->length() >> sRopeMatchThresholdRatioLog2;
2058 StringSegmentRange r(cx);
2059 if (!r.init(text)) {
2060 return false;
2061 }
2062
2063 bool textIsLatin1 = text->hasLatin1Chars();
2064 while (!r.empty()) {
2065 if (threshold-- == 0 || r.front()->hasLatin1Chars() != textIsLatin1 ||
2066 !strings.append(r.front())) {
2067 JSLinearString* linear = text->ensureLinear(cx);
2068 if (!linear) {
2069 return false;
2070 }
2071
2072 *match = StringMatch(linear, pat);
2073 return true;
2074 }
2075 if (!r.popFront()) {
2076 return false;
2077 }
2078 }
2079 }
2080
2081 AutoCheckCannotGC nogc;
2082 if (text->hasLatin1Chars()) {
2083 if (pat->hasLatin1Chars()) {
2084 *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->latin1Chars(nogc),
2085 patLen);
2086 } else {
2087 *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->twoByteChars(nogc),
2088 patLen);
2089 }
2090 } else {
2091 if (pat->hasLatin1Chars()) {
2092 *match = RopeMatchImpl<char16_t>(nogc, strings, pat->latin1Chars(nogc),
2093 patLen);
2094 } else {
2095 *match = RopeMatchImpl<char16_t>(nogc, strings, pat->twoByteChars(nogc),
2096 patLen);
2097 }
2098 }
2099
2100 return true;
2101 }
2102
ReportErrorIfFirstArgIsRegExp(JSContext * cx,const CallArgs & args)2103 static MOZ_ALWAYS_INLINE bool ReportErrorIfFirstArgIsRegExp(
2104 JSContext* cx, const CallArgs& args) {
2105 // Only call IsRegExp if the first argument is definitely an object, so we
2106 // don't pay the cost of an additional function call in the common case.
2107 if (args.length() == 0 || !args[0].isObject()) {
2108 return true;
2109 }
2110
2111 bool isRegExp;
2112 if (!IsRegExp(cx, args[0], &isRegExp)) {
2113 return false;
2114 }
2115
2116 if (isRegExp) {
2117 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
2118 JSMSG_INVALID_ARG_TYPE, "first", "",
2119 "Regular Expression");
2120 return false;
2121 }
2122 return true;
2123 }
2124
2125 // ES2018 draft rev de77aaeffce115deaf948ed30c7dbe4c60983c0c
2126 // 21.1.3.7 String.prototype.includes ( searchString [ , position ] )
str_includes(JSContext * cx,unsigned argc,Value * vp)2127 bool js::str_includes(JSContext* cx, unsigned argc, Value* vp) {
2128 CallArgs args = CallArgsFromVp(argc, vp);
2129
2130 // Steps 1-2.
2131 RootedString str(cx, ToStringForStringFunction(cx, "includes", args.thisv()));
2132 if (!str) {
2133 return false;
2134 }
2135
2136 // Steps 3-4.
2137 if (!ReportErrorIfFirstArgIsRegExp(cx, args)) {
2138 return false;
2139 }
2140
2141 // Step 5.
2142 RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2143 if (!searchStr) {
2144 return false;
2145 }
2146
2147 // Step 6.
2148 uint32_t pos = 0;
2149 if (args.hasDefined(1)) {
2150 if (args[1].isInt32()) {
2151 int i = args[1].toInt32();
2152 pos = (i < 0) ? 0U : uint32_t(i);
2153 } else {
2154 double d;
2155 if (!ToInteger(cx, args[1], &d)) {
2156 return false;
2157 }
2158 pos = uint32_t(std::min(std::max(d, 0.0), double(UINT32_MAX)));
2159 }
2160 }
2161
2162 // Step 7.
2163 uint32_t textLen = str->length();
2164
2165 // Step 8.
2166 uint32_t start = std::min(pos, textLen);
2167
2168 // Steps 9-10.
2169 JSLinearString* text = str->ensureLinear(cx);
2170 if (!text) {
2171 return false;
2172 }
2173
2174 args.rval().setBoolean(StringMatch(text, searchStr, start) != -1);
2175 return true;
2176 }
2177
2178 /* ES6 20120927 draft 15.5.4.7. */
str_indexOf(JSContext * cx,unsigned argc,Value * vp)2179 bool js::str_indexOf(JSContext* cx, unsigned argc, Value* vp) {
2180 CallArgs args = CallArgsFromVp(argc, vp);
2181
2182 // Steps 1, 2, and 3
2183 RootedString str(cx, ToStringForStringFunction(cx, "indexOf", args.thisv()));
2184 if (!str) {
2185 return false;
2186 }
2187
2188 // Steps 4 and 5
2189 RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2190 if (!searchStr) {
2191 return false;
2192 }
2193
2194 // Steps 6 and 7
2195 uint32_t pos = 0;
2196 if (args.hasDefined(1)) {
2197 if (args[1].isInt32()) {
2198 int i = args[1].toInt32();
2199 pos = (i < 0) ? 0U : uint32_t(i);
2200 } else {
2201 double d;
2202 if (!ToInteger(cx, args[1], &d)) {
2203 return false;
2204 }
2205 pos = uint32_t(std::min(std::max(d, 0.0), double(UINT32_MAX)));
2206 }
2207 }
2208
2209 // Step 8
2210 uint32_t textLen = str->length();
2211
2212 // Step 9
2213 uint32_t start = std::min(pos, textLen);
2214
2215 if (str == searchStr) {
2216 // AngularJS often invokes "false".indexOf("false"). This check should
2217 // be cheap enough to not hurt anything else.
2218 args.rval().setInt32(start == 0 ? 0 : -1);
2219 return true;
2220 }
2221
2222 // Steps 10 and 11
2223 JSLinearString* text = str->ensureLinear(cx);
2224 if (!text) {
2225 return false;
2226 }
2227
2228 args.rval().setInt32(StringMatch(text, searchStr, start));
2229 return true;
2230 }
2231
2232 template <typename TextChar, typename PatChar>
LastIndexOfImpl(const TextChar * text,size_t textLen,const PatChar * pat,size_t patLen,size_t start)2233 static int32_t LastIndexOfImpl(const TextChar* text, size_t textLen,
2234 const PatChar* pat, size_t patLen,
2235 size_t start) {
2236 MOZ_ASSERT(patLen > 0);
2237 MOZ_ASSERT(patLen <= textLen);
2238 MOZ_ASSERT(start <= textLen - patLen);
2239
2240 const PatChar p0 = *pat;
2241 const PatChar* patNext = pat + 1;
2242 const PatChar* patEnd = pat + patLen;
2243
2244 for (const TextChar* t = text + start; t >= text; --t) {
2245 if (*t == p0) {
2246 const TextChar* t1 = t + 1;
2247 for (const PatChar* p1 = patNext; p1 < patEnd; ++p1, ++t1) {
2248 if (*t1 != *p1) {
2249 goto break_continue;
2250 }
2251 }
2252
2253 return static_cast<int32_t>(t - text);
2254 }
2255 break_continue:;
2256 }
2257
2258 return -1;
2259 }
2260
2261 // ES2017 draft rev 6859bb9ccaea9c6ede81d71e5320e3833b92cb3e
2262 // 21.1.3.9 String.prototype.lastIndexOf ( searchString [ , position ] )
str_lastIndexOf(JSContext * cx,unsigned argc,Value * vp)2263 static bool str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp) {
2264 CallArgs args = CallArgsFromVp(argc, vp);
2265
2266 // Steps 1-2.
2267 RootedString str(cx,
2268 ToStringForStringFunction(cx, "lastIndexOf", args.thisv()));
2269 if (!str) {
2270 return false;
2271 }
2272
2273 // Step 3.
2274 RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2275 if (!searchStr) {
2276 return false;
2277 }
2278
2279 // Step 6.
2280 size_t len = str->length();
2281
2282 // Step 8.
2283 size_t searchLen = searchStr->length();
2284
2285 // Steps 4-5, 7.
2286 int start = len - searchLen; // Start searching here
2287 if (args.hasDefined(1)) {
2288 if (args[1].isInt32()) {
2289 int i = args[1].toInt32();
2290 if (i <= 0) {
2291 start = 0;
2292 } else if (i < start) {
2293 start = i;
2294 }
2295 } else {
2296 double d;
2297 if (!ToNumber(cx, args[1], &d)) {
2298 return false;
2299 }
2300 if (!IsNaN(d)) {
2301 d = JS::ToInteger(d);
2302 if (d <= 0) {
2303 start = 0;
2304 } else if (d < start) {
2305 start = int(d);
2306 }
2307 }
2308 }
2309 }
2310
2311 if (str == searchStr) {
2312 args.rval().setInt32(0);
2313 return true;
2314 }
2315
2316 if (searchLen > len) {
2317 args.rval().setInt32(-1);
2318 return true;
2319 }
2320
2321 if (searchLen == 0) {
2322 args.rval().setInt32(start);
2323 return true;
2324 }
2325 MOZ_ASSERT(0 <= start && size_t(start) < len);
2326
2327 JSLinearString* text = str->ensureLinear(cx);
2328 if (!text) {
2329 return false;
2330 }
2331
2332 // Step 9.
2333 int32_t res;
2334 AutoCheckCannotGC nogc;
2335 if (text->hasLatin1Chars()) {
2336 const Latin1Char* textChars = text->latin1Chars(nogc);
2337 if (searchStr->hasLatin1Chars()) {
2338 res = LastIndexOfImpl(textChars, len, searchStr->latin1Chars(nogc),
2339 searchLen, start);
2340 } else {
2341 res = LastIndexOfImpl(textChars, len, searchStr->twoByteChars(nogc),
2342 searchLen, start);
2343 }
2344 } else {
2345 const char16_t* textChars = text->twoByteChars(nogc);
2346 if (searchStr->hasLatin1Chars()) {
2347 res = LastIndexOfImpl(textChars, len, searchStr->latin1Chars(nogc),
2348 searchLen, start);
2349 } else {
2350 res = LastIndexOfImpl(textChars, len, searchStr->twoByteChars(nogc),
2351 searchLen, start);
2352 }
2353 }
2354
2355 args.rval().setInt32(res);
2356 return true;
2357 }
2358
2359 // ES2018 draft rev de77aaeffce115deaf948ed30c7dbe4c60983c0c
2360 // 21.1.3.20 String.prototype.startsWith ( searchString [ , position ] )
str_startsWith(JSContext * cx,unsigned argc,Value * vp)2361 bool js::str_startsWith(JSContext* cx, unsigned argc, Value* vp) {
2362 CallArgs args = CallArgsFromVp(argc, vp);
2363
2364 // Steps 1-2.
2365 RootedString str(cx,
2366 ToStringForStringFunction(cx, "startsWith", args.thisv()));
2367 if (!str) {
2368 return false;
2369 }
2370
2371 // Steps 3-4.
2372 if (!ReportErrorIfFirstArgIsRegExp(cx, args)) {
2373 return false;
2374 }
2375
2376 // Step 5.
2377 RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2378 if (!searchStr) {
2379 return false;
2380 }
2381
2382 // Step 6.
2383 uint32_t pos = 0;
2384 if (args.hasDefined(1)) {
2385 if (args[1].isInt32()) {
2386 int i = args[1].toInt32();
2387 pos = (i < 0) ? 0U : uint32_t(i);
2388 } else {
2389 double d;
2390 if (!ToInteger(cx, args[1], &d)) {
2391 return false;
2392 }
2393 pos = uint32_t(std::min(std::max(d, 0.0), double(UINT32_MAX)));
2394 }
2395 }
2396
2397 // Step 7.
2398 uint32_t textLen = str->length();
2399
2400 // Step 8.
2401 uint32_t start = std::min(pos, textLen);
2402
2403 // Step 9.
2404 uint32_t searchLen = searchStr->length();
2405
2406 // Step 10.
2407 if (searchLen + start < searchLen || searchLen + start > textLen) {
2408 args.rval().setBoolean(false);
2409 return true;
2410 }
2411
2412 // Steps 11-12.
2413 JSLinearString* text = str->ensureLinear(cx);
2414 if (!text) {
2415 return false;
2416 }
2417
2418 args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
2419 return true;
2420 }
2421
2422 // ES2018 draft rev de77aaeffce115deaf948ed30c7dbe4c60983c0c
2423 // 21.1.3.6 String.prototype.endsWith ( searchString [ , endPosition ] )
str_endsWith(JSContext * cx,unsigned argc,Value * vp)2424 bool js::str_endsWith(JSContext* cx, unsigned argc, Value* vp) {
2425 CallArgs args = CallArgsFromVp(argc, vp);
2426
2427 // Steps 1-2.
2428 RootedString str(cx, ToStringForStringFunction(cx, "endsWith", args.thisv()));
2429 if (!str) {
2430 return false;
2431 }
2432
2433 // Steps 3-4.
2434 if (!ReportErrorIfFirstArgIsRegExp(cx, args)) {
2435 return false;
2436 }
2437
2438 // Step 5.
2439 RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2440 if (!searchStr) {
2441 return false;
2442 }
2443
2444 // Step 6.
2445 uint32_t textLen = str->length();
2446
2447 // Step 7.
2448 uint32_t pos = textLen;
2449 if (args.hasDefined(1)) {
2450 if (args[1].isInt32()) {
2451 int i = args[1].toInt32();
2452 pos = (i < 0) ? 0U : uint32_t(i);
2453 } else {
2454 double d;
2455 if (!ToInteger(cx, args[1], &d)) {
2456 return false;
2457 }
2458 pos = uint32_t(std::min(std::max(d, 0.0), double(UINT32_MAX)));
2459 }
2460 }
2461
2462 // Step 8.
2463 uint32_t end = std::min(pos, textLen);
2464
2465 // Step 9.
2466 uint32_t searchLen = searchStr->length();
2467
2468 // Step 11 (reordered).
2469 if (searchLen > end) {
2470 args.rval().setBoolean(false);
2471 return true;
2472 }
2473
2474 // Step 10.
2475 uint32_t start = end - searchLen;
2476
2477 // Steps 12-13.
2478 JSLinearString* text = str->ensureLinear(cx);
2479 if (!text) {
2480 return false;
2481 }
2482
2483 args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
2484 return true;
2485 }
2486
2487 template <typename CharT>
TrimString(const CharT * chars,bool trimStart,bool trimEnd,size_t length,size_t * pBegin,size_t * pEnd)2488 static void TrimString(const CharT* chars, bool trimStart, bool trimEnd,
2489 size_t length, size_t* pBegin, size_t* pEnd) {
2490 size_t begin = 0, end = length;
2491
2492 if (trimStart) {
2493 while (begin < length && unicode::IsSpace(chars[begin])) {
2494 ++begin;
2495 }
2496 }
2497
2498 if (trimEnd) {
2499 while (end > begin && unicode::IsSpace(chars[end - 1])) {
2500 --end;
2501 }
2502 }
2503
2504 *pBegin = begin;
2505 *pEnd = end;
2506 }
2507
TrimString(JSContext * cx,const CallArgs & args,const char * funName,bool trimStart,bool trimEnd)2508 static bool TrimString(JSContext* cx, const CallArgs& args, const char* funName,
2509 bool trimStart, bool trimEnd) {
2510 JSString* str = ToStringForStringFunction(cx, funName, args.thisv());
2511 if (!str) {
2512 return false;
2513 }
2514
2515 JSLinearString* linear = str->ensureLinear(cx);
2516 if (!linear) {
2517 return false;
2518 }
2519
2520 size_t length = linear->length();
2521 size_t begin, end;
2522 if (linear->hasLatin1Chars()) {
2523 AutoCheckCannotGC nogc;
2524 TrimString(linear->latin1Chars(nogc), trimStart, trimEnd, length, &begin,
2525 &end);
2526 } else {
2527 AutoCheckCannotGC nogc;
2528 TrimString(linear->twoByteChars(nogc), trimStart, trimEnd, length, &begin,
2529 &end);
2530 }
2531
2532 JSLinearString* result = NewDependentString(cx, linear, begin, end - begin);
2533 if (!result) {
2534 return false;
2535 }
2536
2537 args.rval().setString(result);
2538 return true;
2539 }
2540
str_trim(JSContext * cx,unsigned argc,Value * vp)2541 static bool str_trim(JSContext* cx, unsigned argc, Value* vp) {
2542 CallArgs args = CallArgsFromVp(argc, vp);
2543 return TrimString(cx, args, "trim", true, true);
2544 }
2545
str_trimStart(JSContext * cx,unsigned argc,Value * vp)2546 static bool str_trimStart(JSContext* cx, unsigned argc, Value* vp) {
2547 CallArgs args = CallArgsFromVp(argc, vp);
2548 return TrimString(cx, args, "trimStart", true, false);
2549 }
2550
str_trimEnd(JSContext * cx,unsigned argc,Value * vp)2551 static bool str_trimEnd(JSContext* cx, unsigned argc, Value* vp) {
2552 CallArgs args = CallArgsFromVp(argc, vp);
2553 return TrimString(cx, args, "trimEnd", false, true);
2554 }
2555
2556 // Utility for building a rope (lazy concatenation) of strings.
2557 class RopeBuilder {
2558 JSContext* cx;
2559 RootedString res;
2560
2561 RopeBuilder(const RopeBuilder& other) = delete;
2562 void operator=(const RopeBuilder& other) = delete;
2563
2564 public:
RopeBuilder(JSContext * cx)2565 explicit RopeBuilder(JSContext* cx)
2566 : cx(cx), res(cx, cx->runtime()->emptyString) {}
2567
append(HandleString str)2568 inline bool append(HandleString str) {
2569 res = ConcatStrings<CanGC>(cx, res, str);
2570 return !!res;
2571 }
2572
result()2573 inline JSString* result() { return res; }
2574 };
2575
2576 namespace {
2577
2578 template <typename CharT>
FindDollarIndex(const CharT * chars,size_t length)2579 static uint32_t FindDollarIndex(const CharT* chars, size_t length) {
2580 if (const CharT* p = js_strchr_limit(chars, '$', chars + length)) {
2581 uint32_t dollarIndex = p - chars;
2582 MOZ_ASSERT(dollarIndex < length);
2583 return dollarIndex;
2584 }
2585 return UINT32_MAX;
2586 }
2587
2588 } /* anonymous namespace */
2589
2590 /*
2591 * Constructs a result string that looks like:
2592 *
2593 * newstring = string[:matchStart] + repstr + string[matchEnd:]
2594 */
BuildFlatReplacement(JSContext * cx,HandleString textstr,HandleLinearString repstr,size_t matchStart,size_t patternLength)2595 static JSString* BuildFlatReplacement(JSContext* cx, HandleString textstr,
2596 HandleLinearString repstr,
2597 size_t matchStart, size_t patternLength) {
2598 size_t matchEnd = matchStart + patternLength;
2599
2600 RootedString resultStr(cx, NewDependentString(cx, textstr, 0, matchStart));
2601 if (!resultStr) {
2602 return nullptr;
2603 }
2604
2605 resultStr = ConcatStrings<CanGC>(cx, resultStr, repstr);
2606 if (!resultStr) {
2607 return nullptr;
2608 }
2609
2610 MOZ_ASSERT(textstr->length() >= matchEnd);
2611 RootedString rest(cx, NewDependentString(cx, textstr, matchEnd,
2612 textstr->length() - matchEnd));
2613 if (!rest) {
2614 return nullptr;
2615 }
2616
2617 return ConcatStrings<CanGC>(cx, resultStr, rest);
2618 }
2619
BuildFlatRopeReplacement(JSContext * cx,HandleString textstr,HandleLinearString repstr,size_t match,size_t patternLength)2620 static JSString* BuildFlatRopeReplacement(JSContext* cx, HandleString textstr,
2621 HandleLinearString repstr,
2622 size_t match, size_t patternLength) {
2623 MOZ_ASSERT(textstr->isRope());
2624
2625 size_t matchEnd = match + patternLength;
2626
2627 /*
2628 * If we are replacing over a rope, avoid flattening it by iterating
2629 * through it, building a new rope.
2630 */
2631 StringSegmentRange r(cx);
2632 if (!r.init(textstr)) {
2633 return nullptr;
2634 }
2635
2636 RopeBuilder builder(cx);
2637
2638 /*
2639 * Special case when the pattern string is '', which matches to the
2640 * head of the string and doesn't overlap with any component of the rope.
2641 */
2642 if (patternLength == 0) {
2643 MOZ_ASSERT(match == 0);
2644 if (!builder.append(repstr)) {
2645 return nullptr;
2646 }
2647 }
2648
2649 size_t pos = 0;
2650 while (!r.empty()) {
2651 RootedString str(cx, r.front());
2652 size_t len = str->length();
2653 size_t strEnd = pos + len;
2654 if (pos < matchEnd && strEnd > match) {
2655 /*
2656 * We need to special-case any part of the rope that overlaps
2657 * with the replacement string.
2658 */
2659 if (match >= pos) {
2660 /*
2661 * If this part of the rope overlaps with the left side of
2662 * the pattern, then it must be the only one to overlap with
2663 * the first character in the pattern, so we include the
2664 * replacement string here.
2665 */
2666 RootedString leftSide(cx, NewDependentString(cx, str, 0, match - pos));
2667 if (!leftSide || !builder.append(leftSide) || !builder.append(repstr)) {
2668 return nullptr;
2669 }
2670 }
2671
2672 /*
2673 * If str runs off the end of the matched string, append the
2674 * last part of str.
2675 */
2676 if (strEnd > matchEnd) {
2677 RootedString rightSide(
2678 cx, NewDependentString(cx, str, matchEnd - pos, strEnd - matchEnd));
2679 if (!rightSide || !builder.append(rightSide)) {
2680 return nullptr;
2681 }
2682 }
2683 } else {
2684 if (!builder.append(str)) {
2685 return nullptr;
2686 }
2687 }
2688 pos += str->length();
2689 if (!r.popFront()) {
2690 return nullptr;
2691 }
2692 }
2693
2694 return builder.result();
2695 }
2696
2697 template <typename CharT>
AppendDollarReplacement(StringBuffer & newReplaceChars,size_t firstDollarIndex,size_t matchStart,size_t matchLimit,JSLinearString * text,const CharT * repChars,size_t repLength)2698 static bool AppendDollarReplacement(StringBuffer& newReplaceChars,
2699 size_t firstDollarIndex, size_t matchStart,
2700 size_t matchLimit, JSLinearString* text,
2701 const CharT* repChars, size_t repLength) {
2702 MOZ_ASSERT(firstDollarIndex < repLength);
2703 MOZ_ASSERT(matchStart <= matchLimit);
2704 MOZ_ASSERT(matchLimit <= text->length());
2705
2706 // Move the pre-dollar chunk in bulk.
2707 if (!newReplaceChars.append(repChars, firstDollarIndex)) {
2708 return false;
2709 }
2710
2711 // Move the rest char-by-char, interpreting dollars as we encounter them.
2712 const CharT* repLimit = repChars + repLength;
2713 for (const CharT* it = repChars + firstDollarIndex; it < repLimit; ++it) {
2714 if (*it != '$' || it == repLimit - 1) {
2715 if (!newReplaceChars.append(*it)) {
2716 return false;
2717 }
2718 continue;
2719 }
2720
2721 switch (*(it + 1)) {
2722 case '$':
2723 // Eat one of the dollars.
2724 if (!newReplaceChars.append(*it)) {
2725 return false;
2726 }
2727 break;
2728 case '&':
2729 if (!newReplaceChars.appendSubstring(text, matchStart,
2730 matchLimit - matchStart)) {
2731 return false;
2732 }
2733 break;
2734 case '`':
2735 if (!newReplaceChars.appendSubstring(text, 0, matchStart)) {
2736 return false;
2737 }
2738 break;
2739 case '\'':
2740 if (!newReplaceChars.appendSubstring(text, matchLimit,
2741 text->length() - matchLimit)) {
2742 return false;
2743 }
2744 break;
2745 default:
2746 // The dollar we saw was not special (no matter what its mother told
2747 // it).
2748 if (!newReplaceChars.append(*it)) {
2749 return false;
2750 }
2751 continue;
2752 }
2753 ++it; // We always eat an extra char in the above switch.
2754 }
2755
2756 return true;
2757 }
2758
2759 /*
2760 * Perform a linear-scan dollar substitution on the replacement text.
2761 */
InterpretDollarReplacement(JSContext * cx,HandleString textstrArg,HandleLinearString repstr,uint32_t firstDollarIndex,size_t matchStart,size_t patternLength)2762 static JSLinearString* InterpretDollarReplacement(
2763 JSContext* cx, HandleString textstrArg, HandleLinearString repstr,
2764 uint32_t firstDollarIndex, size_t matchStart, size_t patternLength) {
2765 RootedLinearString textstr(cx, textstrArg->ensureLinear(cx));
2766 if (!textstr) {
2767 return nullptr;
2768 }
2769
2770 size_t matchLimit = matchStart + patternLength;
2771
2772 /*
2773 * Most probably:
2774 *
2775 * len(newstr) >= len(orig) - len(match) + len(replacement)
2776 *
2777 * Note that dollar vars _could_ make the resulting text smaller than this.
2778 */
2779 JSStringBuilder newReplaceChars(cx);
2780 if (repstr->hasTwoByteChars() && !newReplaceChars.ensureTwoByteChars()) {
2781 return nullptr;
2782 }
2783
2784 if (!newReplaceChars.reserve(textstr->length() - patternLength +
2785 repstr->length())) {
2786 return nullptr;
2787 }
2788
2789 bool res;
2790 if (repstr->hasLatin1Chars()) {
2791 AutoCheckCannotGC nogc;
2792 res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, matchStart,
2793 matchLimit, textstr,
2794 repstr->latin1Chars(nogc), repstr->length());
2795 } else {
2796 AutoCheckCannotGC nogc;
2797 res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, matchStart,
2798 matchLimit, textstr,
2799 repstr->twoByteChars(nogc), repstr->length());
2800 }
2801 if (!res) {
2802 return nullptr;
2803 }
2804
2805 return newReplaceChars.finishString();
2806 }
2807
2808 template <typename StrChar, typename RepChar>
StrFlatReplaceGlobal(JSContext * cx,JSLinearString * str,JSLinearString * pat,JSLinearString * rep,StringBuffer & sb)2809 static bool StrFlatReplaceGlobal(JSContext* cx, JSLinearString* str,
2810 JSLinearString* pat, JSLinearString* rep,
2811 StringBuffer& sb) {
2812 MOZ_ASSERT(str->length() > 0);
2813
2814 AutoCheckCannotGC nogc;
2815 const StrChar* strChars = str->chars<StrChar>(nogc);
2816 const RepChar* repChars = rep->chars<RepChar>(nogc);
2817
2818 // The pattern is empty, so we interleave the replacement string in-between
2819 // each character.
2820 if (!pat->length()) {
2821 CheckedInt<uint32_t> strLength(str->length());
2822 CheckedInt<uint32_t> repLength(rep->length());
2823 CheckedInt<uint32_t> length = repLength * (strLength - 1) + strLength;
2824 if (!length.isValid()) {
2825 ReportAllocationOverflow(cx);
2826 return false;
2827 }
2828
2829 if (!sb.reserve(length.value())) {
2830 return false;
2831 }
2832
2833 for (unsigned i = 0; i < str->length() - 1; ++i, ++strChars) {
2834 sb.infallibleAppend(*strChars);
2835 sb.infallibleAppend(repChars, rep->length());
2836 }
2837 sb.infallibleAppend(*strChars);
2838 return true;
2839 }
2840
2841 // If it's true, we are sure that the result's length is, at least, the same
2842 // length as |str->length()|.
2843 if (rep->length() >= pat->length()) {
2844 if (!sb.reserve(str->length())) {
2845 return false;
2846 }
2847 }
2848
2849 uint32_t start = 0;
2850 for (;;) {
2851 int match = StringMatch(str, pat, start);
2852 if (match < 0) {
2853 break;
2854 }
2855 if (!sb.append(strChars + start, match - start)) {
2856 return false;
2857 }
2858 if (!sb.append(repChars, rep->length())) {
2859 return false;
2860 }
2861 start = match + pat->length();
2862 }
2863
2864 if (!sb.append(strChars + start, str->length() - start)) {
2865 return false;
2866 }
2867
2868 return true;
2869 }
2870
2871 // This is identical to "str.split(pattern).join(replacement)" except that we
2872 // do some deforestation optimization in Ion.
StringFlatReplaceString(JSContext * cx,HandleString string,HandleString pattern,HandleString replacement)2873 JSString* js::StringFlatReplaceString(JSContext* cx, HandleString string,
2874 HandleString pattern,
2875 HandleString replacement) {
2876 MOZ_ASSERT(string);
2877 MOZ_ASSERT(pattern);
2878 MOZ_ASSERT(replacement);
2879
2880 if (!string->length()) {
2881 return string;
2882 }
2883
2884 RootedLinearString linearRepl(cx, replacement->ensureLinear(cx));
2885 if (!linearRepl) {
2886 return nullptr;
2887 }
2888
2889 RootedLinearString linearPat(cx, pattern->ensureLinear(cx));
2890 if (!linearPat) {
2891 return nullptr;
2892 }
2893
2894 RootedLinearString linearStr(cx, string->ensureLinear(cx));
2895 if (!linearStr) {
2896 return nullptr;
2897 }
2898
2899 JSStringBuilder sb(cx);
2900 if (linearStr->hasTwoByteChars()) {
2901 if (!sb.ensureTwoByteChars()) {
2902 return nullptr;
2903 }
2904 if (linearRepl->hasTwoByteChars()) {
2905 if (!StrFlatReplaceGlobal<char16_t, char16_t>(cx, linearStr, linearPat,
2906 linearRepl, sb)) {
2907 return nullptr;
2908 }
2909 } else {
2910 if (!StrFlatReplaceGlobal<char16_t, Latin1Char>(cx, linearStr, linearPat,
2911 linearRepl, sb)) {
2912 return nullptr;
2913 }
2914 }
2915 } else {
2916 if (linearRepl->hasTwoByteChars()) {
2917 if (!sb.ensureTwoByteChars()) {
2918 return nullptr;
2919 }
2920 if (!StrFlatReplaceGlobal<Latin1Char, char16_t>(cx, linearStr, linearPat,
2921 linearRepl, sb)) {
2922 return nullptr;
2923 }
2924 } else {
2925 if (!StrFlatReplaceGlobal<Latin1Char, Latin1Char>(
2926 cx, linearStr, linearPat, linearRepl, sb)) {
2927 return nullptr;
2928 }
2929 }
2930 }
2931
2932 return sb.finishString();
2933 }
2934
str_replace_string_raw(JSContext * cx,HandleString string,HandleString pattern,HandleString replacement)2935 JSString* js::str_replace_string_raw(JSContext* cx, HandleString string,
2936 HandleString pattern,
2937 HandleString replacement) {
2938 RootedLinearString repl(cx, replacement->ensureLinear(cx));
2939 if (!repl) {
2940 return nullptr;
2941 }
2942
2943 RootedLinearString pat(cx, pattern->ensureLinear(cx));
2944 if (!pat) {
2945 return nullptr;
2946 }
2947
2948 size_t patternLength = pat->length();
2949 int32_t match;
2950 uint32_t dollarIndex;
2951
2952 {
2953 AutoCheckCannotGC nogc;
2954 dollarIndex =
2955 repl->hasLatin1Chars()
2956 ? FindDollarIndex(repl->latin1Chars(nogc), repl->length())
2957 : FindDollarIndex(repl->twoByteChars(nogc), repl->length());
2958 }
2959
2960 /*
2961 * |string| could be a rope, so we want to avoid flattening it for as
2962 * long as possible.
2963 */
2964 if (string->isRope()) {
2965 if (!RopeMatch(cx, &string->asRope(), pat, &match)) {
2966 return nullptr;
2967 }
2968 } else {
2969 match = StringMatch(&string->asLinear(), pat, 0);
2970 }
2971
2972 if (match < 0) {
2973 return string;
2974 }
2975
2976 if (dollarIndex != UINT32_MAX) {
2977 repl = InterpretDollarReplacement(cx, string, repl, dollarIndex, match,
2978 patternLength);
2979 if (!repl) {
2980 return nullptr;
2981 }
2982 } else if (string->isRope()) {
2983 return BuildFlatRopeReplacement(cx, string, repl, match, patternLength);
2984 }
2985 return BuildFlatReplacement(cx, string, repl, match, patternLength);
2986 }
2987
2988 // https://tc39.es/proposal-string-replaceall/#sec-string.prototype.replaceall
2989 // Steps 7-16 when functionalReplace is false and searchString is not empty.
2990 //
2991 // The steps are quite different, for performance. Loops in steps 11 and 14
2992 // are fused. GetSubstitution is optimized away when possible.
2993 template <typename StrChar, typename RepChar>
ReplaceAll(JSContext * cx,JSLinearString * string,JSLinearString * searchString,JSLinearString * replaceString)2994 static JSString* ReplaceAll(JSContext* cx, JSLinearString* string,
2995 JSLinearString* searchString,
2996 JSLinearString* replaceString) {
2997 // Step 7.
2998 const size_t stringLength = string->length();
2999 const size_t searchLength = searchString->length();
3000 const size_t replaceLength = replaceString->length();
3001
3002 MOZ_ASSERT(stringLength > 0);
3003 MOZ_ASSERT(searchLength > 0);
3004 MOZ_ASSERT(stringLength >= searchLength);
3005
3006 // Step 8 (advanceBy is equal to searchLength when searchLength > 0).
3007
3008 // Step 9 (not needed in this implementation).
3009
3010 // Step 10.
3011 // Find the first match.
3012 int32_t position = StringMatch(string, searchString, 0);
3013
3014 // Nothing to replace, so return early.
3015 if (position < 0) {
3016 return string;
3017 }
3018
3019 // Step 11 (moved below).
3020
3021 // Step 12.
3022 uint32_t endOfLastMatch = 0;
3023
3024 // Step 13.
3025 JSStringBuilder result(cx);
3026 if constexpr (std::is_same_v<StrChar, char16_t> ||
3027 std::is_same_v<RepChar, char16_t>) {
3028 if (!result.ensureTwoByteChars()) {
3029 return nullptr;
3030 }
3031 }
3032
3033 {
3034 AutoCheckCannotGC nogc;
3035 const StrChar* strChars = string->chars<StrChar>(nogc);
3036 const RepChar* repChars = replaceString->chars<RepChar>(nogc);
3037
3038 uint32_t dollarIndex = FindDollarIndex(repChars, replaceLength);
3039
3040 // If it's true, we are sure that the result's length is, at least, the same
3041 // length as |str->length()|.
3042 if (replaceLength >= searchLength) {
3043 if (!result.reserve(stringLength)) {
3044 return nullptr;
3045 }
3046 }
3047
3048 do {
3049 // Step 14.c.
3050 // Append the substring before the current match.
3051 if (!result.append(strChars + endOfLastMatch,
3052 position - endOfLastMatch)) {
3053 return nullptr;
3054 }
3055
3056 // Steps 14.a-b and 14.d.
3057 // Append the replacement.
3058 if (dollarIndex != UINT32_MAX) {
3059 size_t matchLimit = position + searchLength;
3060 if (!AppendDollarReplacement(result, dollarIndex, position, matchLimit,
3061 string, repChars, replaceLength)) {
3062 return nullptr;
3063 }
3064 } else {
3065 if (!result.append(repChars, replaceLength)) {
3066 return nullptr;
3067 }
3068 }
3069
3070 // Step 14.e.
3071 endOfLastMatch = position + searchLength;
3072
3073 // Step 11.
3074 // Find the next match.
3075 position = StringMatch(string, searchString, endOfLastMatch);
3076 } while (position >= 0);
3077
3078 // Step 15.
3079 // Append the substring after the last match.
3080 if (!result.append(strChars + endOfLastMatch,
3081 stringLength - endOfLastMatch)) {
3082 return nullptr;
3083 }
3084 }
3085
3086 // Step 16.
3087 return result.finishString();
3088 }
3089
3090 // https://tc39.es/proposal-string-replaceall/#sec-string.prototype.replaceall
3091 // Steps 7-16 when functionalReplace is false and searchString is the empty
3092 // string.
3093 //
3094 // The steps are quite different, for performance. Loops in steps 11 and 14
3095 // are fused. GetSubstitution is optimized away when possible.
3096 template <typename StrChar, typename RepChar>
ReplaceAllInterleave(JSContext * cx,JSLinearString * string,JSLinearString * replaceString)3097 static JSString* ReplaceAllInterleave(JSContext* cx, JSLinearString* string,
3098 JSLinearString* replaceString) {
3099 // Step 7.
3100 const size_t stringLength = string->length();
3101 const size_t replaceLength = replaceString->length();
3102
3103 // Step 8 (advanceBy is 1 when searchString is the empty string).
3104
3105 // Steps 9-12 (trivial when searchString is the empty string).
3106
3107 // Step 13.
3108 JSStringBuilder result(cx);
3109 if constexpr (std::is_same_v<StrChar, char16_t> ||
3110 std::is_same_v<RepChar, char16_t>) {
3111 if (!result.ensureTwoByteChars()) {
3112 return nullptr;
3113 }
3114 }
3115
3116 {
3117 AutoCheckCannotGC nogc;
3118 const StrChar* strChars = string->chars<StrChar>(nogc);
3119 const RepChar* repChars = replaceString->chars<RepChar>(nogc);
3120
3121 uint32_t dollarIndex = FindDollarIndex(repChars, replaceLength);
3122
3123 if (dollarIndex != UINT32_MAX) {
3124 if (!result.reserve(stringLength)) {
3125 return nullptr;
3126 }
3127 } else {
3128 // Compute the exact result length when no substitutions take place.
3129 CheckedInt<uint32_t> strLength(stringLength);
3130 CheckedInt<uint32_t> repLength(replaceLength);
3131 CheckedInt<uint32_t> length = strLength + (strLength + 1) * repLength;
3132 if (!length.isValid()) {
3133 ReportAllocationOverflow(cx);
3134 return nullptr;
3135 }
3136
3137 if (!result.reserve(length.value())) {
3138 return nullptr;
3139 }
3140 }
3141
3142 auto appendReplacement = [&](size_t match) {
3143 if (dollarIndex != UINT32_MAX) {
3144 return AppendDollarReplacement(result, dollarIndex, match, match,
3145 string, repChars, replaceLength);
3146 }
3147 return result.append(repChars, replaceLength);
3148 };
3149
3150 for (size_t index = 0; index < stringLength; index++) {
3151 // Steps 11, 14.a-b and 14.d.
3152 // The empty string matches before each character.
3153 if (!appendReplacement(index)) {
3154 return nullptr;
3155 }
3156
3157 // Step 14.c.
3158 if (!result.append(strChars[index])) {
3159 return nullptr;
3160 }
3161 }
3162
3163 // Steps 11, 14.a-b and 14.d.
3164 // The empty string also matches at the end of the string.
3165 if (!appendReplacement(stringLength)) {
3166 return nullptr;
3167 }
3168
3169 // Step 15 (not applicable when searchString is the empty string).
3170 }
3171
3172 // Step 16.
3173 return result.finishString();
3174 }
3175
3176 // String.prototype.replaceAll (Stage 3 proposal)
3177 // https://tc39.es/proposal-string-replaceall/
3178 //
3179 // String.prototype.replaceAll ( searchValue, replaceValue )
3180 //
3181 // Steps 7-16 when functionalReplace is false.
str_replaceAll_string_raw(JSContext * cx,HandleString string,HandleString searchString,HandleString replaceString)3182 JSString* js::str_replaceAll_string_raw(JSContext* cx, HandleString string,
3183 HandleString searchString,
3184 HandleString replaceString) {
3185 const size_t stringLength = string->length();
3186 const size_t searchLength = searchString->length();
3187
3188 // Directly return when we're guaranteed to find no match.
3189 if (searchLength > stringLength) {
3190 return string;
3191 }
3192
3193 RootedLinearString str(cx, string->ensureLinear(cx));
3194 if (!str) {
3195 return nullptr;
3196 }
3197
3198 RootedLinearString repl(cx, replaceString->ensureLinear(cx));
3199 if (!repl) {
3200 return nullptr;
3201 }
3202
3203 RootedLinearString search(cx, searchString->ensureLinear(cx));
3204 if (!search) {
3205 return nullptr;
3206 }
3207
3208 // The pattern is empty, so we interleave the replacement string in-between
3209 // each character.
3210 if (searchLength == 0) {
3211 if (str->hasTwoByteChars()) {
3212 if (repl->hasTwoByteChars()) {
3213 return ReplaceAllInterleave<char16_t, char16_t>(cx, str, repl);
3214 }
3215 return ReplaceAllInterleave<char16_t, Latin1Char>(cx, str, repl);
3216 }
3217 if (repl->hasTwoByteChars()) {
3218 return ReplaceAllInterleave<Latin1Char, char16_t>(cx, str, repl);
3219 }
3220 return ReplaceAllInterleave<Latin1Char, Latin1Char>(cx, str, repl);
3221 }
3222
3223 MOZ_ASSERT(stringLength > 0);
3224
3225 if (str->hasTwoByteChars()) {
3226 if (repl->hasTwoByteChars()) {
3227 return ReplaceAll<char16_t, char16_t>(cx, str, search, repl);
3228 }
3229 return ReplaceAll<char16_t, Latin1Char>(cx, str, search, repl);
3230 }
3231 if (repl->hasTwoByteChars()) {
3232 return ReplaceAll<Latin1Char, char16_t>(cx, str, search, repl);
3233 }
3234 return ReplaceAll<Latin1Char, Latin1Char>(cx, str, search, repl);
3235 }
3236
SingleElementStringArray(JSContext * cx,HandleLinearString str)3237 static ArrayObject* SingleElementStringArray(JSContext* cx,
3238 HandleLinearString str) {
3239 ArrayObject* array = NewDenseFullyAllocatedArray(cx, 1);
3240 if (!array) {
3241 return nullptr;
3242 }
3243 array->setDenseInitializedLength(1);
3244 array->initDenseElement(0, StringValue(str));
3245 return array;
3246 }
3247
3248 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
SplitHelper(JSContext * cx,HandleLinearString str,uint32_t limit,HandleLinearString sep)3249 static ArrayObject* SplitHelper(JSContext* cx, HandleLinearString str,
3250 uint32_t limit, HandleLinearString sep) {
3251 size_t strLength = str->length();
3252 size_t sepLength = sep->length();
3253 MOZ_ASSERT(sepLength != 0);
3254
3255 // Step 12.
3256 if (strLength == 0) {
3257 // Step 12.a.
3258 int match = StringMatch(str, sep, 0);
3259
3260 // Step 12.b.
3261 if (match != -1) {
3262 return NewDenseEmptyArray(cx);
3263 }
3264
3265 // Steps 12.c-e.
3266 return SingleElementStringArray(cx, str);
3267 }
3268
3269 // Step 3 (reordered).
3270 RootedValueVector splits(cx);
3271
3272 // Step 8 (reordered).
3273 size_t lastEndIndex = 0;
3274
3275 // Step 13.
3276 size_t index = 0;
3277
3278 // Step 14.
3279 while (index != strLength) {
3280 // Step 14.a.
3281 int match = StringMatch(str, sep, index);
3282
3283 // Step 14.b.
3284 //
3285 // Our match algorithm differs from the spec in that it returns the
3286 // next index at which a match happens. If no match happens we're
3287 // done.
3288 //
3289 // But what if the match is at the end of the string (and the string is
3290 // not empty)? Per 14.c.i this shouldn't be a match, so we have to
3291 // specially exclude it. Thus this case should hold:
3292 //
3293 // var a = "abc".split(/\b/);
3294 // assertEq(a.length, 1);
3295 // assertEq(a[0], "abc");
3296 if (match == -1) {
3297 break;
3298 }
3299
3300 // Step 14.c.
3301 size_t endIndex = match + sepLength;
3302
3303 // Step 14.c.i.
3304 if (endIndex == lastEndIndex) {
3305 index++;
3306 continue;
3307 }
3308
3309 // Step 14.c.ii.
3310 MOZ_ASSERT(lastEndIndex < endIndex);
3311 MOZ_ASSERT(sepLength <= strLength);
3312 MOZ_ASSERT(lastEndIndex + sepLength <= endIndex);
3313
3314 // Step 14.c.ii.1.
3315 size_t subLength = size_t(endIndex - sepLength - lastEndIndex);
3316 JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength);
3317
3318 // Steps 14.c.ii.2-4.
3319 if (!sub || !splits.append(StringValue(sub))) {
3320 return nullptr;
3321 }
3322
3323 // Step 14.c.ii.5.
3324 if (splits.length() == limit) {
3325 return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3326 }
3327
3328 // Step 14.c.ii.6.
3329 index = endIndex;
3330
3331 // Step 14.c.ii.7.
3332 lastEndIndex = index;
3333 }
3334
3335 // Step 15.
3336 JSString* sub =
3337 NewDependentString(cx, str, lastEndIndex, strLength - lastEndIndex);
3338
3339 // Steps 16-17.
3340 if (!sub || !splits.append(StringValue(sub))) {
3341 return nullptr;
3342 }
3343
3344 // Step 18.
3345 return NewDenseCopiedArray(cx, splits.length(), splits.begin());
3346 }
3347
3348 // Fast-path for splitting a string into a character array via split("").
CharSplitHelper(JSContext * cx,HandleLinearString str,uint32_t limit)3349 static ArrayObject* CharSplitHelper(JSContext* cx, HandleLinearString str,
3350 uint32_t limit) {
3351 size_t strLength = str->length();
3352 if (strLength == 0) {
3353 return NewDenseEmptyArray(cx);
3354 }
3355
3356 js::StaticStrings& staticStrings = cx->staticStrings();
3357 uint32_t resultlen = (limit < strLength ? limit : strLength);
3358 MOZ_ASSERT(limit > 0 && resultlen > 0,
3359 "Neither limit nor strLength is zero, so resultlen is greater "
3360 "than zero.");
3361
3362 RootedArrayObject splits(cx, NewDenseFullyAllocatedArray(cx, resultlen));
3363 if (!splits) {
3364 return nullptr;
3365 }
3366
3367 if (str->hasLatin1Chars()) {
3368 splits->setDenseInitializedLength(resultlen);
3369
3370 JS::AutoCheckCannotGC nogc;
3371 const Latin1Char* latin1Chars = str->latin1Chars(nogc);
3372 for (size_t i = 0; i < resultlen; ++i) {
3373 Latin1Char c = latin1Chars[i];
3374 MOZ_ASSERT(staticStrings.hasUnit(c));
3375 splits->initDenseElement(i, StringValue(staticStrings.getUnit(c)));
3376 }
3377 } else {
3378 splits->ensureDenseInitializedLength(0, resultlen);
3379
3380 for (size_t i = 0; i < resultlen; ++i) {
3381 JSString* sub = staticStrings.getUnitStringForElement(cx, str, i);
3382 if (!sub) {
3383 return nullptr;
3384 }
3385 splits->initDenseElement(i, StringValue(sub));
3386 }
3387 }
3388
3389 return splits;
3390 }
3391
3392 template <typename TextChar>
SplitSingleCharHelper(JSContext * cx,HandleLinearString str,const TextChar * text,uint32_t textLen,char16_t patCh)3393 static MOZ_ALWAYS_INLINE ArrayObject* SplitSingleCharHelper(
3394 JSContext* cx, HandleLinearString str, const TextChar* text,
3395 uint32_t textLen, char16_t patCh) {
3396 // Count the number of occurrences of patCh within text.
3397 uint32_t count = 0;
3398 for (size_t index = 0; index < textLen; index++) {
3399 if (static_cast<char16_t>(text[index]) == patCh) {
3400 count++;
3401 }
3402 }
3403
3404 // Handle zero-occurrence case - return input string in an array.
3405 if (count == 0) {
3406 return SingleElementStringArray(cx, str);
3407 }
3408
3409 // Create the result array for the substring values.
3410 RootedArrayObject splits(cx, NewDenseFullyAllocatedArray(cx, count + 1));
3411 if (!splits) {
3412 return nullptr;
3413 }
3414 splits->ensureDenseInitializedLength(0, count + 1);
3415
3416 // Add substrings.
3417 uint32_t splitsIndex = 0;
3418 size_t lastEndIndex = 0;
3419 for (size_t index = 0; index < textLen; index++) {
3420 if (static_cast<char16_t>(text[index]) == patCh) {
3421 size_t subLength = size_t(index - lastEndIndex);
3422 JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength);
3423 if (!sub) {
3424 return nullptr;
3425 }
3426 splits->initDenseElement(splitsIndex++, StringValue(sub));
3427 lastEndIndex = index + 1;
3428 }
3429 }
3430
3431 // Add substring for tail of string (after last match).
3432 JSString* sub =
3433 NewDependentString(cx, str, lastEndIndex, textLen - lastEndIndex);
3434 if (!sub) {
3435 return nullptr;
3436 }
3437 splits->initDenseElement(splitsIndex++, StringValue(sub));
3438
3439 return splits;
3440 }
3441
3442 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
SplitSingleCharHelper(JSContext * cx,HandleLinearString str,char16_t ch)3443 static ArrayObject* SplitSingleCharHelper(JSContext* cx, HandleLinearString str,
3444 char16_t ch) {
3445 // Step 12.
3446 size_t strLength = str->length();
3447
3448 AutoStableStringChars linearChars(cx);
3449 if (!linearChars.init(cx, str)) {
3450 return nullptr;
3451 }
3452
3453 if (linearChars.isLatin1()) {
3454 return SplitSingleCharHelper(cx, str, linearChars.latin1Chars(), strLength,
3455 ch);
3456 }
3457
3458 return SplitSingleCharHelper(cx, str, linearChars.twoByteChars(), strLength,
3459 ch);
3460 }
3461
3462 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
StringSplitString(JSContext * cx,HandleString str,HandleString sep,uint32_t limit)3463 ArrayObject* js::StringSplitString(JSContext* cx, HandleString str,
3464 HandleString sep, uint32_t limit) {
3465 MOZ_ASSERT(limit > 0, "Only called for strictly positive limit.");
3466
3467 RootedLinearString linearStr(cx, str->ensureLinear(cx));
3468 if (!linearStr) {
3469 return nullptr;
3470 }
3471
3472 RootedLinearString linearSep(cx, sep->ensureLinear(cx));
3473 if (!linearSep) {
3474 return nullptr;
3475 }
3476
3477 if (linearSep->length() == 0) {
3478 return CharSplitHelper(cx, linearStr, limit);
3479 }
3480
3481 if (linearSep->length() == 1 && limit >= static_cast<uint32_t>(INT32_MAX)) {
3482 char16_t ch = linearSep->latin1OrTwoByteChar(0);
3483 return SplitSingleCharHelper(cx, linearStr, ch);
3484 }
3485
3486 return SplitHelper(cx, linearStr, limit, linearSep);
3487 }
3488
3489 static const JSFunctionSpec string_methods[] = {
3490 JS_FN(js_toSource_str, str_toSource, 0, 0),
3491
3492 /* Java-like methods. */
3493 JS_INLINABLE_FN(js_toString_str, str_toString, 0, 0, StringToString),
3494 JS_INLINABLE_FN(js_valueOf_str, str_toString, 0, 0, StringValueOf),
3495 JS_INLINABLE_FN("toLowerCase", str_toLowerCase, 0, 0, StringToLowerCase),
3496 JS_INLINABLE_FN("toUpperCase", str_toUpperCase, 0, 0, StringToUpperCase),
3497 JS_INLINABLE_FN("charAt", str_charAt, 1, 0, StringCharAt),
3498 JS_INLINABLE_FN("charCodeAt", str_charCodeAt, 1, 0, StringCharCodeAt),
3499 JS_SELF_HOSTED_FN("substring", "String_substring", 2, 0),
3500 JS_SELF_HOSTED_FN("padStart", "String_pad_start", 2, 0),
3501 JS_SELF_HOSTED_FN("padEnd", "String_pad_end", 2, 0),
3502 JS_SELF_HOSTED_FN("codePointAt", "String_codePointAt", 1, 0),
3503 JS_FN("includes", str_includes, 1, 0), JS_FN("indexOf", str_indexOf, 1, 0),
3504 JS_FN("lastIndexOf", str_lastIndexOf, 1, 0),
3505 JS_FN("startsWith", str_startsWith, 1, 0),
3506 JS_FN("endsWith", str_endsWith, 1, 0), JS_FN("trim", str_trim, 0, 0),
3507 JS_FN("trimStart", str_trimStart, 0, 0),
3508 JS_FN("trimEnd", str_trimEnd, 0, 0),
3509 #if JS_HAS_INTL_API
3510 JS_SELF_HOSTED_FN("toLocaleLowerCase", "String_toLocaleLowerCase", 0, 0),
3511 JS_SELF_HOSTED_FN("toLocaleUpperCase", "String_toLocaleUpperCase", 0, 0),
3512 JS_SELF_HOSTED_FN("localeCompare", "String_localeCompare", 1, 0),
3513 #else
3514 JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0, 0),
3515 JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0, 0),
3516 JS_FN("localeCompare", str_localeCompare, 1, 0),
3517 #endif
3518 JS_SELF_HOSTED_FN("repeat", "String_repeat", 1, 0),
3519 #if JS_HAS_INTL_API
3520 JS_FN("normalize", str_normalize, 0, 0),
3521 #endif
3522
3523 /* Perl-ish methods (search is actually Python-esque). */
3524 JS_SELF_HOSTED_FN("match", "String_match", 1, 0),
3525 JS_SELF_HOSTED_FN("matchAll", "String_matchAll", 1, 0),
3526 JS_SELF_HOSTED_FN("search", "String_search", 1, 0),
3527 JS_SELF_HOSTED_FN("replace", "String_replace", 2, 0),
3528 JS_SELF_HOSTED_FN("replaceAll", "String_replaceAll", 2, 0),
3529 JS_SELF_HOSTED_FN("split", "String_split", 2, 0),
3530 JS_SELF_HOSTED_FN("substr", "String_substr", 2, 0),
3531
3532 /* Python-esque sequence methods. */
3533 JS_SELF_HOSTED_FN("concat", "String_concat", 1, 0),
3534 JS_SELF_HOSTED_FN("slice", "String_slice", 2, 0),
3535
3536 JS_SELF_HOSTED_FN("at", "String_at", 1, 0),
3537
3538 /* HTML string methods. */
3539 JS_SELF_HOSTED_FN("bold", "String_bold", 0, 0),
3540 JS_SELF_HOSTED_FN("italics", "String_italics", 0, 0),
3541 JS_SELF_HOSTED_FN("fixed", "String_fixed", 0, 0),
3542 JS_SELF_HOSTED_FN("strike", "String_strike", 0, 0),
3543 JS_SELF_HOSTED_FN("small", "String_small", 0, 0),
3544 JS_SELF_HOSTED_FN("big", "String_big", 0, 0),
3545 JS_SELF_HOSTED_FN("blink", "String_blink", 0, 0),
3546 JS_SELF_HOSTED_FN("sup", "String_sup", 0, 0),
3547 JS_SELF_HOSTED_FN("sub", "String_sub", 0, 0),
3548 JS_SELF_HOSTED_FN("anchor", "String_anchor", 1, 0),
3549 JS_SELF_HOSTED_FN("link", "String_link", 1, 0),
3550 JS_SELF_HOSTED_FN("fontcolor", "String_fontcolor", 1, 0),
3551 JS_SELF_HOSTED_FN("fontsize", "String_fontsize", 1, 0),
3552
3553 JS_SELF_HOSTED_SYM_FN(iterator, "String_iterator", 0, 0), JS_FS_END};
3554
3555 // ES6 rev 27 (2014 Aug 24) 21.1.1
StringConstructor(JSContext * cx,unsigned argc,Value * vp)3556 bool js::StringConstructor(JSContext* cx, unsigned argc, Value* vp) {
3557 CallArgs args = CallArgsFromVp(argc, vp);
3558
3559 RootedString str(cx);
3560 if (args.length() > 0) {
3561 if (!args.isConstructing() && args[0].isSymbol()) {
3562 return js::SymbolDescriptiveString(cx, args[0].toSymbol(), args.rval());
3563 }
3564
3565 str = ToString<CanGC>(cx, args[0]);
3566 if (!str) {
3567 return false;
3568 }
3569 } else {
3570 str = cx->runtime()->emptyString;
3571 }
3572
3573 if (args.isConstructing()) {
3574 RootedObject proto(cx);
3575 if (!GetPrototypeFromBuiltinConstructor(cx, args, JSProto_String, &proto)) {
3576 return false;
3577 }
3578
3579 StringObject* strobj = StringObject::create(cx, str, proto);
3580 if (!strobj) {
3581 return false;
3582 }
3583 args.rval().setObject(*strobj);
3584 return true;
3585 }
3586
3587 args.rval().setString(str);
3588 return true;
3589 }
3590
str_fromCharCode(JSContext * cx,unsigned argc,Value * vp)3591 bool js::str_fromCharCode(JSContext* cx, unsigned argc, Value* vp) {
3592 CallArgs args = CallArgsFromVp(argc, vp);
3593
3594 MOZ_ASSERT(args.length() <= ARGS_LENGTH_MAX);
3595
3596 // Optimize the single-char case.
3597 if (args.length() == 1) {
3598 return str_fromCharCode_one_arg(cx, args[0], args.rval());
3599 }
3600
3601 // Optimize the case where the result will definitely fit in an inline
3602 // string (thin or fat) and so we don't need to malloc the chars. (We could
3603 // cover some cases where args.length() goes up to
3604 // JSFatInlineString::MAX_LENGTH_LATIN1 if we also checked if the chars are
3605 // all Latin-1, but it doesn't seem worth the effort.)
3606 InlineCharBuffer<char16_t> chars;
3607 if (!chars.maybeAlloc(cx, args.length())) {
3608 return false;
3609 }
3610
3611 char16_t* rawChars = chars.get();
3612 for (unsigned i = 0; i < args.length(); i++) {
3613 uint16_t code;
3614 if (!ToUint16(cx, args[i], &code)) {
3615 return false;
3616 }
3617
3618 rawChars[i] = char16_t(code);
3619 }
3620
3621 JSString* str = chars.toString(cx, args.length());
3622 if (!str) {
3623 return false;
3624 }
3625
3626 args.rval().setString(str);
3627 return true;
3628 }
3629
CodeUnitToString(JSContext * cx,uint16_t ucode,MutableHandleValue rval)3630 static inline bool CodeUnitToString(JSContext* cx, uint16_t ucode,
3631 MutableHandleValue rval) {
3632 if (StaticStrings::hasUnit(ucode)) {
3633 rval.setString(cx->staticStrings().getUnit(ucode));
3634 return true;
3635 }
3636
3637 char16_t c = char16_t(ucode);
3638 JSString* str = NewStringCopyNDontDeflate<CanGC>(cx, &c, 1);
3639 if (!str) {
3640 return false;
3641 }
3642
3643 rval.setString(str);
3644 return true;
3645 }
3646
str_fromCharCode_one_arg(JSContext * cx,HandleValue code,MutableHandleValue rval)3647 bool js::str_fromCharCode_one_arg(JSContext* cx, HandleValue code,
3648 MutableHandleValue rval) {
3649 uint16_t ucode;
3650
3651 if (!ToUint16(cx, code, &ucode)) {
3652 return false;
3653 }
3654
3655 return CodeUnitToString(cx, ucode, rval);
3656 }
3657
ToCodePoint(JSContext * cx,HandleValue code,uint32_t * codePoint)3658 static MOZ_ALWAYS_INLINE bool ToCodePoint(JSContext* cx, HandleValue code,
3659 uint32_t* codePoint) {
3660 // String.fromCodePoint, Steps 5.a-b.
3661
3662 // Fast path for the common case - the input is already an int32.
3663 if (code.isInt32()) {
3664 int32_t nextCP = code.toInt32();
3665 if (nextCP >= 0 && nextCP <= int32_t(unicode::NonBMPMax)) {
3666 *codePoint = uint32_t(nextCP);
3667 return true;
3668 }
3669 }
3670
3671 double nextCP;
3672 if (!ToNumber(cx, code, &nextCP)) {
3673 return false;
3674 }
3675
3676 // String.fromCodePoint, Steps 5.c-d.
3677 if (JS::ToInteger(nextCP) != nextCP || nextCP < 0 ||
3678 nextCP > unicode::NonBMPMax) {
3679 ToCStringBuf cbuf;
3680 if (const char* numStr = NumberToCString(cx, &cbuf, nextCP)) {
3681 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
3682 JSMSG_NOT_A_CODEPOINT, numStr);
3683 }
3684 return false;
3685 }
3686
3687 *codePoint = uint32_t(nextCP);
3688 return true;
3689 }
3690
str_fromCodePoint_one_arg(JSContext * cx,HandleValue code,MutableHandleValue rval)3691 bool js::str_fromCodePoint_one_arg(JSContext* cx, HandleValue code,
3692 MutableHandleValue rval) {
3693 // Steps 1-4 (omitted).
3694
3695 // Steps 5.a-d.
3696 uint32_t codePoint;
3697 if (!ToCodePoint(cx, code, &codePoint)) {
3698 return false;
3699 }
3700
3701 // Steps 5.e, 6.
3702 if (!unicode::IsSupplementary(codePoint)) {
3703 return CodeUnitToString(cx, uint16_t(codePoint), rval);
3704 }
3705
3706 char16_t chars[] = {unicode::LeadSurrogate(codePoint),
3707 unicode::TrailSurrogate(codePoint)};
3708 JSString* str = NewStringCopyNDontDeflate<CanGC>(cx, chars, 2);
3709 if (!str) {
3710 return false;
3711 }
3712
3713 rval.setString(str);
3714 return true;
3715 }
3716
str_fromCodePoint_few_args(JSContext * cx,const CallArgs & args)3717 static bool str_fromCodePoint_few_args(JSContext* cx, const CallArgs& args) {
3718 MOZ_ASSERT(args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2);
3719
3720 // Steps 1-2 (omitted).
3721
3722 // Step 3.
3723 char16_t elements[JSFatInlineString::MAX_LENGTH_TWO_BYTE];
3724
3725 // Steps 4-5.
3726 unsigned length = 0;
3727 for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) {
3728 // Steps 5.a-d.
3729 uint32_t codePoint;
3730 if (!ToCodePoint(cx, args[nextIndex], &codePoint)) {
3731 return false;
3732 }
3733
3734 // Step 5.e.
3735 unicode::UTF16Encode(codePoint, elements, &length);
3736 }
3737
3738 // Step 6.
3739 JSString* str = NewStringCopyN<CanGC>(cx, elements, length);
3740 if (!str) {
3741 return false;
3742 }
3743
3744 args.rval().setString(str);
3745 return true;
3746 }
3747
3748 // ES2017 draft rev 40edb3a95a475c1b251141ac681b8793129d9a6d
3749 // 21.1.2.2 String.fromCodePoint(...codePoints)
str_fromCodePoint(JSContext * cx,unsigned argc,Value * vp)3750 bool js::str_fromCodePoint(JSContext* cx, unsigned argc, Value* vp) {
3751 CallArgs args = CallArgsFromVp(argc, vp);
3752
3753 // Optimize the single code-point case.
3754 if (args.length() == 1) {
3755 return str_fromCodePoint_one_arg(cx, args[0], args.rval());
3756 }
3757
3758 // Optimize the case where the result will definitely fit in an inline
3759 // string (thin or fat) and so we don't need to malloc the chars. (We could
3760 // cover some cases where |args.length()| goes up to
3761 // JSFatInlineString::MAX_LENGTH_LATIN1 / 2 if we also checked if the chars
3762 // are all Latin-1, but it doesn't seem worth the effort.)
3763 if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2) {
3764 return str_fromCodePoint_few_args(cx, args);
3765 }
3766
3767 // Steps 1-2 (omitted).
3768
3769 // Step 3.
3770 static_assert(
3771 ARGS_LENGTH_MAX < std::numeric_limits<decltype(args.length())>::max() / 2,
3772 "|args.length() * 2| does not overflow");
3773 auto elements = cx->make_pod_arena_array<char16_t>(js::StringBufferArena,
3774 args.length() * 2);
3775 if (!elements) {
3776 return false;
3777 }
3778
3779 // Steps 4-5.
3780 unsigned length = 0;
3781 for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) {
3782 // Steps 5.a-d.
3783 uint32_t codePoint;
3784 if (!ToCodePoint(cx, args[nextIndex], &codePoint)) {
3785 return false;
3786 }
3787
3788 // Step 5.e.
3789 unicode::UTF16Encode(codePoint, elements.get(), &length);
3790 }
3791
3792 // Step 6.
3793 JSString* str = NewString<CanGC>(cx, std::move(elements), length);
3794 if (!str) {
3795 return false;
3796 }
3797
3798 args.rval().setString(str);
3799 return true;
3800 }
3801
3802 static const JSFunctionSpec string_static_methods[] = {
3803 JS_INLINABLE_FN("fromCharCode", js::str_fromCharCode, 1, 0,
3804 StringFromCharCode),
3805 JS_INLINABLE_FN("fromCodePoint", js::str_fromCodePoint, 1, 0,
3806 StringFromCodePoint),
3807
3808 JS_SELF_HOSTED_FN("raw", "String_static_raw", 1, 0), JS_FS_END};
3809
3810 /* static */
assignInitialShape(JSContext * cx,Handle<StringObject * > obj)3811 Shape* StringObject::assignInitialShape(JSContext* cx,
3812 Handle<StringObject*> obj) {
3813 MOZ_ASSERT(obj->empty());
3814
3815 if (!NativeObject::addPropertyInReservedSlot(cx, obj, cx->names().length,
3816 LENGTH_SLOT, {})) {
3817 return nullptr;
3818 }
3819
3820 return obj->shape();
3821 }
3822
createPrototype(JSContext * cx,JSProtoKey key)3823 JSObject* StringObject::createPrototype(JSContext* cx, JSProtoKey key) {
3824 Rooted<JSString*> empty(cx, cx->runtime()->emptyString);
3825 Rooted<StringObject*> proto(
3826 cx, GlobalObject::createBlankPrototype<StringObject>(cx, cx->global()));
3827 if (!proto) {
3828 return nullptr;
3829 }
3830 if (!StringObject::init(cx, proto, empty)) {
3831 return nullptr;
3832 }
3833 return proto;
3834 }
3835
StringClassFinish(JSContext * cx,HandleObject ctor,HandleObject proto)3836 static bool StringClassFinish(JSContext* cx, HandleObject ctor,
3837 HandleObject proto) {
3838 HandleNativeObject nativeProto = proto.as<NativeObject>();
3839
3840 // Create "trimLeft" as an alias for "trimStart".
3841 RootedValue trimFn(cx);
3842 RootedId trimId(cx, NameToId(cx->names().trimStart));
3843 RootedId trimAliasId(cx, NameToId(cx->names().trimLeft));
3844 if (!NativeGetProperty(cx, nativeProto, trimId, &trimFn) ||
3845 !NativeDefineDataProperty(cx, nativeProto, trimAliasId, trimFn, 0)) {
3846 return false;
3847 }
3848
3849 // Create "trimRight" as an alias for "trimEnd".
3850 trimId = NameToId(cx->names().trimEnd);
3851 trimAliasId = NameToId(cx->names().trimRight);
3852 if (!NativeGetProperty(cx, nativeProto, trimId, &trimFn) ||
3853 !NativeDefineDataProperty(cx, nativeProto, trimAliasId, trimFn, 0)) {
3854 return false;
3855 }
3856
3857 /*
3858 * Define escape/unescape, the URI encode/decode functions, and maybe
3859 * uneval on the global object.
3860 */
3861 if (!JS_DefineFunctions(cx, cx->global(), string_functions)) {
3862 return false;
3863 }
3864
3865 return true;
3866 }
3867
3868 const ClassSpec StringObject::classSpec_ = {
3869 GenericCreateConstructor<StringConstructor, 1, gc::AllocKind::FUNCTION,
3870 &jit::JitInfo_String>,
3871 StringObject::createPrototype,
3872 string_static_methods,
3873 nullptr,
3874 string_methods,
3875 nullptr,
3876 StringClassFinish};
3877
3878 #define ____ false
3879
3880 /*
3881 * Uri reserved chars + #:
3882 * - 35: #
3883 * - 36: $
3884 * - 38: &
3885 * - 43: +
3886 * - 44: ,
3887 * - 47: /
3888 * - 58: :
3889 * - 59: ;
3890 * - 61: =
3891 * - 63: ?
3892 * - 64: @
3893 */
3894 static const bool js_isUriReservedPlusPound[] = {
3895 // clang-format off
3896 /* 0 1 2 3 4 5 6 7 8 9 */
3897 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3898 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3899 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3900 /* 3 */ ____, ____, ____, ____, ____, true, true, ____, true, ____,
3901 /* 4 */ ____, ____, ____, true, true, ____, ____, true, ____, ____,
3902 /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true,
3903 /* 6 */ ____, true, ____, true, true, ____, ____, ____, ____, ____,
3904 /* 7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3905 /* 8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3906 /* 9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3907 /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3908 /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3909 /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
3910 // clang-format on
3911 };
3912
3913 /*
3914 * Uri unescaped chars:
3915 * - 33: !
3916 * - 39: '
3917 * - 40: (
3918 * - 41: )
3919 * - 42: *
3920 * - 45: -
3921 * - 46: .
3922 * - 48..57: 0-9
3923 * - 65..90: A-Z
3924 * - 95: _
3925 * - 97..122: a-z
3926 * - 126: ~
3927 */
3928 static const bool js_isUriUnescaped[] = {
3929 // clang-format off
3930 /* 0 1 2 3 4 5 6 7 8 9 */
3931 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3932 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3933 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3934 /* 3 */ ____, ____, ____, true, ____, ____, ____, ____, ____, true,
3935 /* 4 */ true, true, true, ____, ____, true, true, ____, true, true,
3936 /* 5 */ true, true, true, true, true, true, true, true, ____, ____,
3937 /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
3938 /* 7 */ true, true, true, true, true, true, true, true, true, true,
3939 /* 8 */ true, true, true, true, true, true, true, true, true, true,
3940 /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
3941 /* 10 */ true, true, true, true, true, true, true, true, true, true,
3942 /* 11 */ true, true, true, true, true, true, true, true, true, true,
3943 /* 12 */ true, true, true, ____, ____, ____, true, ____
3944 // clang-format on
3945 };
3946
3947 #undef ____
3948
TransferBufferToString(JSStringBuilder & sb,JSString * str,MutableHandleValue rval)3949 static inline bool TransferBufferToString(JSStringBuilder& sb, JSString* str,
3950 MutableHandleValue rval) {
3951 if (!sb.empty()) {
3952 str = sb.finishString();
3953 if (!str) {
3954 return false;
3955 }
3956 }
3957 rval.setString(str);
3958 return true;
3959 }
3960
3961 /*
3962 * ECMA 3, 15.1.3 URI Handling Function Properties
3963 *
3964 * The following are implementations of the algorithms
3965 * given in the ECMA specification for the hidden functions
3966 * 'Encode' and 'Decode'.
3967 */
3968 enum EncodeResult { Encode_Failure, Encode_BadUri, Encode_Success };
3969
3970 // Bug 1403318: GCC sometimes inlines this Encode function rather than the
3971 // caller Encode function. Annotate both functions with MOZ_NEVER_INLINE resp.
3972 // MOZ_ALWAYS_INLINE to ensure we get the desired inlining behavior.
3973 template <typename CharT>
Encode(StringBuffer & sb,const CharT * chars,size_t length,const bool * unescapedSet)3974 static MOZ_NEVER_INLINE EncodeResult Encode(StringBuffer& sb,
3975 const CharT* chars, size_t length,
3976 const bool* unescapedSet) {
3977 Latin1Char hexBuf[3];
3978 hexBuf[0] = '%';
3979
3980 auto appendEncoded = [&sb, &hexBuf](Latin1Char c) {
3981 static const char HexDigits[] = "0123456789ABCDEF"; /* NB: uppercase */
3982
3983 hexBuf[1] = HexDigits[c >> 4];
3984 hexBuf[2] = HexDigits[c & 0xf];
3985 return sb.append(hexBuf, 3);
3986 };
3987
3988 auto appendRange = [&sb, chars, length](size_t start, size_t end) {
3989 MOZ_ASSERT(start <= end);
3990
3991 if (start < end) {
3992 if (start == 0) {
3993 if (!sb.reserve(length)) {
3994 return false;
3995 }
3996 }
3997 return sb.append(chars + start, chars + end);
3998 }
3999 return true;
4000 };
4001
4002 size_t startAppend = 0;
4003 for (size_t k = 0; k < length; k++) {
4004 CharT c = chars[k];
4005 if (c < 128 &&
4006 (js_isUriUnescaped[c] || (unescapedSet && unescapedSet[c]))) {
4007 continue;
4008 } else {
4009 if (!appendRange(startAppend, k)) {
4010 return Encode_Failure;
4011 }
4012
4013 if constexpr (std::is_same_v<CharT, Latin1Char>) {
4014 if (c < 0x80) {
4015 if (!appendEncoded(c)) {
4016 return Encode_Failure;
4017 }
4018 } else {
4019 if (!appendEncoded(0xC0 | (c >> 6)) ||
4020 !appendEncoded(0x80 | (c & 0x3F))) {
4021 return Encode_Failure;
4022 }
4023 }
4024 } else {
4025 if (unicode::IsTrailSurrogate(c)) {
4026 return Encode_BadUri;
4027 }
4028
4029 uint32_t v;
4030 if (!unicode::IsLeadSurrogate(c)) {
4031 v = c;
4032 } else {
4033 k++;
4034 if (k == length) {
4035 return Encode_BadUri;
4036 }
4037
4038 char16_t c2 = chars[k];
4039 if (!unicode::IsTrailSurrogate(c2)) {
4040 return Encode_BadUri;
4041 }
4042
4043 v = unicode::UTF16Decode(c, c2);
4044 }
4045
4046 uint8_t utf8buf[4];
4047 size_t L = OneUcs4ToUtf8Char(utf8buf, v);
4048 for (size_t j = 0; j < L; j++) {
4049 if (!appendEncoded(utf8buf[j])) {
4050 return Encode_Failure;
4051 }
4052 }
4053 }
4054
4055 startAppend = k + 1;
4056 }
4057 }
4058
4059 if (startAppend > 0) {
4060 if (!appendRange(startAppend, length)) {
4061 return Encode_Failure;
4062 }
4063 }
4064
4065 return Encode_Success;
4066 }
4067
Encode(JSContext * cx,HandleLinearString str,const bool * unescapedSet,MutableHandleValue rval)4068 static MOZ_ALWAYS_INLINE bool Encode(JSContext* cx, HandleLinearString str,
4069 const bool* unescapedSet,
4070 MutableHandleValue rval) {
4071 size_t length = str->length();
4072 if (length == 0) {
4073 rval.setString(cx->runtime()->emptyString);
4074 return true;
4075 }
4076
4077 JSStringBuilder sb(cx);
4078
4079 EncodeResult res;
4080 if (str->hasLatin1Chars()) {
4081 AutoCheckCannotGC nogc;
4082 res = Encode(sb, str->latin1Chars(nogc), str->length(), unescapedSet);
4083 } else {
4084 AutoCheckCannotGC nogc;
4085 res = Encode(sb, str->twoByteChars(nogc), str->length(), unescapedSet);
4086 }
4087
4088 if (res == Encode_Failure) {
4089 return false;
4090 }
4091
4092 if (res == Encode_BadUri) {
4093 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
4094 return false;
4095 }
4096
4097 MOZ_ASSERT(res == Encode_Success);
4098 return TransferBufferToString(sb, str, rval);
4099 }
4100
4101 enum DecodeResult { Decode_Failure, Decode_BadUri, Decode_Success };
4102
4103 template <typename CharT>
Decode(StringBuffer & sb,const CharT * chars,size_t length,const bool * reservedSet)4104 static DecodeResult Decode(StringBuffer& sb, const CharT* chars, size_t length,
4105 const bool* reservedSet) {
4106 auto appendRange = [&sb, chars](size_t start, size_t end) {
4107 MOZ_ASSERT(start <= end);
4108
4109 if (start < end) {
4110 return sb.append(chars + start, chars + end);
4111 }
4112 return true;
4113 };
4114
4115 size_t startAppend = 0;
4116 for (size_t k = 0; k < length; k++) {
4117 CharT c = chars[k];
4118 if (c == '%') {
4119 size_t start = k;
4120 if ((k + 2) >= length) {
4121 return Decode_BadUri;
4122 }
4123
4124 if (!IsAsciiHexDigit(chars[k + 1]) || !IsAsciiHexDigit(chars[k + 2])) {
4125 return Decode_BadUri;
4126 }
4127
4128 uint32_t B = AsciiAlphanumericToNumber(chars[k + 1]) * 16 +
4129 AsciiAlphanumericToNumber(chars[k + 2]);
4130 k += 2;
4131 if (B < 128) {
4132 Latin1Char ch = Latin1Char(B);
4133 if (reservedSet && reservedSet[ch]) {
4134 continue;
4135 }
4136
4137 if (!appendRange(startAppend, start)) {
4138 return Decode_Failure;
4139 }
4140 if (!sb.append(ch)) {
4141 return Decode_Failure;
4142 }
4143 } else {
4144 int n = 1;
4145 while (B & (0x80 >> n)) {
4146 n++;
4147 }
4148
4149 if (n == 1 || n > 4) {
4150 return Decode_BadUri;
4151 }
4152
4153 uint8_t octets[4];
4154 octets[0] = (uint8_t)B;
4155 if (k + 3 * (n - 1) >= length) {
4156 return Decode_BadUri;
4157 }
4158
4159 for (int j = 1; j < n; j++) {
4160 k++;
4161 if (chars[k] != '%') {
4162 return Decode_BadUri;
4163 }
4164
4165 if (!IsAsciiHexDigit(chars[k + 1]) ||
4166 !IsAsciiHexDigit(chars[k + 2])) {
4167 return Decode_BadUri;
4168 }
4169
4170 B = AsciiAlphanumericToNumber(chars[k + 1]) * 16 +
4171 AsciiAlphanumericToNumber(chars[k + 2]);
4172 if ((B & 0xC0) != 0x80) {
4173 return Decode_BadUri;
4174 }
4175
4176 k += 2;
4177 octets[j] = char(B);
4178 }
4179
4180 if (!appendRange(startAppend, start)) {
4181 return Decode_Failure;
4182 }
4183
4184 uint32_t v = JS::Utf8ToOneUcs4Char(octets, n);
4185 MOZ_ASSERT(v >= 128);
4186 if (v >= unicode::NonBMPMin) {
4187 if (v > unicode::NonBMPMax) {
4188 return Decode_BadUri;
4189 }
4190
4191 if (!sb.append(unicode::LeadSurrogate(v))) {
4192 return Decode_Failure;
4193 }
4194 if (!sb.append(unicode::TrailSurrogate(v))) {
4195 return Decode_Failure;
4196 }
4197 } else {
4198 if (!sb.append(char16_t(v))) {
4199 return Decode_Failure;
4200 }
4201 }
4202 }
4203
4204 startAppend = k + 1;
4205 }
4206 }
4207
4208 if (startAppend > 0) {
4209 if (!appendRange(startAppend, length)) {
4210 return Decode_Failure;
4211 }
4212 }
4213
4214 return Decode_Success;
4215 }
4216
Decode(JSContext * cx,HandleLinearString str,const bool * reservedSet,MutableHandleValue rval)4217 static bool Decode(JSContext* cx, HandleLinearString str,
4218 const bool* reservedSet, MutableHandleValue rval) {
4219 size_t length = str->length();
4220 if (length == 0) {
4221 rval.setString(cx->runtime()->emptyString);
4222 return true;
4223 }
4224
4225 JSStringBuilder sb(cx);
4226
4227 DecodeResult res;
4228 if (str->hasLatin1Chars()) {
4229 AutoCheckCannotGC nogc;
4230 res = Decode(sb, str->latin1Chars(nogc), str->length(), reservedSet);
4231 } else {
4232 AutoCheckCannotGC nogc;
4233 res = Decode(sb, str->twoByteChars(nogc), str->length(), reservedSet);
4234 }
4235
4236 if (res == Decode_Failure) {
4237 return false;
4238 }
4239
4240 if (res == Decode_BadUri) {
4241 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
4242 return false;
4243 }
4244
4245 MOZ_ASSERT(res == Decode_Success);
4246 return TransferBufferToString(sb, str, rval);
4247 }
4248
str_decodeURI(JSContext * cx,unsigned argc,Value * vp)4249 static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp) {
4250 CallArgs args = CallArgsFromVp(argc, vp);
4251 RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
4252 if (!str) {
4253 return false;
4254 }
4255
4256 return Decode(cx, str, js_isUriReservedPlusPound, args.rval());
4257 }
4258
str_decodeURI_Component(JSContext * cx,unsigned argc,Value * vp)4259 static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp) {
4260 CallArgs args = CallArgsFromVp(argc, vp);
4261 RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
4262 if (!str) {
4263 return false;
4264 }
4265
4266 return Decode(cx, str, nullptr, args.rval());
4267 }
4268
str_encodeURI(JSContext * cx,unsigned argc,Value * vp)4269 static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp) {
4270 CallArgs args = CallArgsFromVp(argc, vp);
4271 RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
4272 if (!str) {
4273 return false;
4274 }
4275
4276 return Encode(cx, str, js_isUriReservedPlusPound, args.rval());
4277 }
4278
str_encodeURI_Component(JSContext * cx,unsigned argc,Value * vp)4279 static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp) {
4280 CallArgs args = CallArgsFromVp(argc, vp);
4281 RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
4282 if (!str) {
4283 return false;
4284 }
4285
4286 return Encode(cx, str, nullptr, args.rval());
4287 }
4288
EncodeURI(JSContext * cx,const char * chars,size_t length)4289 JSString* js::EncodeURI(JSContext* cx, const char* chars, size_t length) {
4290 JSStringBuilder sb(cx);
4291 EncodeResult result = Encode(sb, reinterpret_cast<const Latin1Char*>(chars),
4292 length, js_isUriReservedPlusPound);
4293 if (result == EncodeResult::Encode_Failure) {
4294 return nullptr;
4295 }
4296 if (result == EncodeResult::Encode_BadUri) {
4297 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
4298 return nullptr;
4299 }
4300 if (sb.empty()) {
4301 return NewStringCopyN<CanGC>(cx, chars, length);
4302 }
4303 return sb.finishString();
4304 }
4305
FlatStringMatchHelper(JSContext * cx,HandleString str,HandleString pattern,bool * isFlat,int32_t * match)4306 static bool FlatStringMatchHelper(JSContext* cx, HandleString str,
4307 HandleString pattern, bool* isFlat,
4308 int32_t* match) {
4309 RootedLinearString linearPattern(cx, pattern->ensureLinear(cx));
4310 if (!linearPattern) {
4311 return false;
4312 }
4313
4314 static const size_t MAX_FLAT_PAT_LEN = 256;
4315 if (linearPattern->length() > MAX_FLAT_PAT_LEN ||
4316 StringHasRegExpMetaChars(linearPattern)) {
4317 *isFlat = false;
4318 return true;
4319 }
4320
4321 *isFlat = true;
4322 if (str->isRope()) {
4323 if (!RopeMatch(cx, &str->asRope(), linearPattern, match)) {
4324 return false;
4325 }
4326 } else {
4327 *match = StringMatch(&str->asLinear(), linearPattern);
4328 }
4329
4330 return true;
4331 }
4332
BuildFlatMatchArray(JSContext * cx,HandleString str,HandleString pattern,int32_t match,MutableHandleValue rval)4333 static bool BuildFlatMatchArray(JSContext* cx, HandleString str,
4334 HandleString pattern, int32_t match,
4335 MutableHandleValue rval) {
4336 if (match < 0) {
4337 rval.setNull();
4338 return true;
4339 }
4340
4341 // Get the templateObject that defines the shape and type of the output
4342 // object.
4343 ArrayObject* templateObject =
4344 cx->realm()->regExps.getOrCreateMatchResultTemplateObject(cx);
4345 if (!templateObject) {
4346 return false;
4347 }
4348
4349 RootedArrayObject arr(
4350 cx, NewDenseFullyAllocatedArrayWithTemplate(cx, 1, templateObject));
4351 if (!arr) {
4352 return false;
4353 }
4354
4355 // Store a Value for each pair.
4356 arr->setDenseInitializedLength(1);
4357 arr->initDenseElement(0, StringValue(pattern));
4358
4359 // Set the |index| property. (TemplateObject positions it in slot 0).
4360 arr->setSlot(0, Int32Value(match));
4361
4362 // Set the |input| property. (TemplateObject positions it in slot 1).
4363 arr->setSlot(1, StringValue(str));
4364
4365 #ifdef DEBUG
4366 RootedValue test(cx);
4367 RootedId id(cx, NameToId(cx->names().index));
4368 if (!NativeGetProperty(cx, arr, id, &test)) {
4369 return false;
4370 }
4371 MOZ_ASSERT(test == arr->getSlot(0));
4372 id = NameToId(cx->names().input);
4373 if (!NativeGetProperty(cx, arr, id, &test)) {
4374 return false;
4375 }
4376 MOZ_ASSERT(test == arr->getSlot(1));
4377 #endif
4378
4379 rval.setObject(*arr);
4380 return true;
4381 }
4382
4383 #ifdef DEBUG
CallIsStringOptimizable(JSContext * cx,const char * name,bool * result)4384 static bool CallIsStringOptimizable(JSContext* cx, const char* name,
4385 bool* result) {
4386 FixedInvokeArgs<0> args(cx);
4387
4388 RootedValue rval(cx);
4389 if (!CallSelfHostedFunction(cx, name, UndefinedHandleValue, args, &rval)) {
4390 return false;
4391 }
4392
4393 *result = rval.toBoolean();
4394 return true;
4395 }
4396 #endif
4397
FlatStringMatch(JSContext * cx,unsigned argc,Value * vp)4398 bool js::FlatStringMatch(JSContext* cx, unsigned argc, Value* vp) {
4399 CallArgs args = CallArgsFromVp(argc, vp);
4400 MOZ_ASSERT(args.length() == 2);
4401 MOZ_ASSERT(args[0].isString());
4402 MOZ_ASSERT(args[1].isString());
4403 #ifdef DEBUG
4404 bool isOptimizable = false;
4405 if (!CallIsStringOptimizable(cx, "IsStringMatchOptimizable",
4406 &isOptimizable)) {
4407 return false;
4408 }
4409 MOZ_ASSERT(isOptimizable);
4410 #endif
4411
4412 RootedString str(cx, args[0].toString());
4413 RootedString pattern(cx, args[1].toString());
4414
4415 bool isFlat = false;
4416 int32_t match = 0;
4417 if (!FlatStringMatchHelper(cx, str, pattern, &isFlat, &match)) {
4418 return false;
4419 }
4420
4421 if (!isFlat) {
4422 args.rval().setUndefined();
4423 return true;
4424 }
4425
4426 return BuildFlatMatchArray(cx, str, pattern, match, args.rval());
4427 }
4428
FlatStringSearch(JSContext * cx,unsigned argc,Value * vp)4429 bool js::FlatStringSearch(JSContext* cx, unsigned argc, Value* vp) {
4430 CallArgs args = CallArgsFromVp(argc, vp);
4431 MOZ_ASSERT(args.length() == 2);
4432 MOZ_ASSERT(args[0].isString());
4433 MOZ_ASSERT(args[1].isString());
4434 #ifdef DEBUG
4435 bool isOptimizable = false;
4436 if (!CallIsStringOptimizable(cx, "IsStringSearchOptimizable",
4437 &isOptimizable)) {
4438 return false;
4439 }
4440 MOZ_ASSERT(isOptimizable);
4441 #endif
4442
4443 RootedString str(cx, args[0].toString());
4444 RootedString pattern(cx, args[1].toString());
4445
4446 bool isFlat = false;
4447 int32_t match = 0;
4448 if (!FlatStringMatchHelper(cx, str, pattern, &isFlat, &match)) {
4449 return false;
4450 }
4451
4452 if (!isFlat) {
4453 args.rval().setInt32(-2);
4454 return true;
4455 }
4456
4457 args.rval().setInt32(match);
4458 return true;
4459 }
4460