1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "builtin/String.h"
8
9 #include "mozilla/ArrayUtils.h"
10 #include "mozilla/Attributes.h"
11 #include "mozilla/CheckedInt.h"
12 #include "mozilla/FloatingPoint.h"
13 #include "mozilla/PodOperations.h"
14 #include "mozilla/Range.h"
15 #include "mozilla/TypeTraits.h"
16 #include "mozilla/Unused.h"
17
18 #include <ctype.h>
19 #include <limits>
20 #include <string.h>
21
22 #include "jsapi.h"
23 #include "jsarray.h"
24 #include "jsbool.h"
25 #include "jsnum.h"
26 #include "jstypes.h"
27 #include "jsutil.h"
28
29 #include "builtin/intl/CommonFunctions.h"
30 #include "builtin/intl/ICUStubs.h"
31 #include "builtin/RegExp.h"
32 #include "jit/InlinableNatives.h"
33 #include "js/Conversions.h"
34 #include "js/UniquePtr.h"
35 #if ENABLE_INTL_API
36 #include "unicode/uchar.h"
37 #include "unicode/unorm2.h"
38 #endif
39 #include "util/StringBuffer.h"
40 #include "util/Unicode.h"
41 #include "vm/BytecodeUtil.h"
42 #include "vm/GlobalObject.h"
43 #include "vm/Interpreter.h"
44 #include "vm/JSAtom.h"
45 #include "vm/JSContext.h"
46 #include "vm/JSObject.h"
47 #include "vm/Opcodes.h"
48 #include "vm/Printer.h"
49 #include "vm/RegExpObject.h"
50 #include "vm/RegExpStatics.h"
51 #include "vm/SelfHosting.h"
52
53 #include "vm/Interpreter-inl.h"
54 #include "vm/StringObject-inl.h"
55 #include "vm/StringType-inl.h"
56 #include "vm/TypeInference-inl.h"
57
58 using namespace js;
59 using namespace js::gc;
60
61 using JS::Symbol;
62 using JS::SymbolCode;
63
64 using mozilla::CheckedInt;
65 using mozilla::IsNaN;
66 using mozilla::IsSame;
67 using mozilla::PodCopy;
68 using mozilla::RangedPtr;
69
70 using JS::AutoCheckCannotGC;
71
ArgToLinearString(JSContext * cx,const CallArgs & args,unsigned argno)72 static JSLinearString* ArgToLinearString(JSContext* cx, const CallArgs& args,
73 unsigned argno) {
74 if (argno >= args.length()) return cx->names().undefined;
75
76 JSString* str = ToString<CanGC>(cx, args[argno]);
77 if (!str) return nullptr;
78
79 return str->ensureLinear(cx);
80 }
81
82 template <typename CharT>
83 struct MaximumInlineLength;
84
85 template <>
86 struct MaximumInlineLength<Latin1Char> {
87 static constexpr size_t value = JSFatInlineString::MAX_LENGTH_LATIN1;
88 };
89
90 template <>
91 struct MaximumInlineLength<char16_t> {
92 static constexpr size_t value = JSFatInlineString::MAX_LENGTH_TWO_BYTE;
93 };
94
95 // Character buffer class used for ToLowerCase and ToUpperCase operations, as
96 // well as other string operations where the final string length is known in
97 // advance.
98 //
99 // Case conversion operations normally return a string with the same length as
100 // the input string. To avoid over-allocation, we optimistically allocate an
101 // array with same size as the input string and only when we detect special
102 // casing characters, which can change the output string length, we reallocate
103 // the output buffer to the final string length.
104 //
105 // As a further mean to improve runtime performance, the character buffer
106 // contains an inline storage, so we don't need to heap-allocate an array when
107 // a JSInlineString will be used for the output string.
108 //
109 // Why not use mozilla::Vector instead? mozilla::Vector doesn't provide enough
110 // fine-grained control to avoid over-allocation when (re)allocating for exact
111 // buffer sizes. This led to visible performance regressions in µ-benchmarks.
112 template <typename CharT>
113 class MOZ_NON_PARAM InlineCharBuffer {
114 static constexpr size_t InlineCapacity = MaximumInlineLength<CharT>::value;
115
116 CharT inlineStorage[InlineCapacity];
117 UniquePtr<CharT[], JS::FreePolicy> heapStorage;
118
119 #ifdef DEBUG
120 // In debug mode, we keep track of the requested string lengths to ensure
121 // all character buffer methods are called in the correct order and with
122 // the expected argument values.
123 size_t lastRequestedLength = 0;
124
assertValidRequest(size_t expectedLastLength,size_t length)125 void assertValidRequest(size_t expectedLastLength, size_t length) {
126 MOZ_ASSERT(length >= expectedLastLength, "cannot shrink requested length");
127 MOZ_ASSERT(lastRequestedLength == expectedLastLength);
128 lastRequestedLength = length;
129 }
130 #else
assertValidRequest(size_t expectedLastLength,size_t length)131 void assertValidRequest(size_t expectedLastLength, size_t length) {}
132 #endif
133
134 public:
get()135 CharT* get() { return heapStorage ? heapStorage.get() : inlineStorage; }
136
maybeAlloc(JSContext * cx,size_t length)137 bool maybeAlloc(JSContext* cx, size_t length) {
138 assertValidRequest(0, length);
139
140 if (length <= InlineCapacity) return true;
141
142 MOZ_ASSERT(!heapStorage, "heap storage already allocated");
143 heapStorage = cx->make_pod_array<CharT>(length + 1);
144 return !!heapStorage;
145 }
146
maybeRealloc(JSContext * cx,size_t oldLength,size_t newLength)147 bool maybeRealloc(JSContext* cx, size_t oldLength, size_t newLength) {
148 assertValidRequest(oldLength, newLength);
149
150 if (newLength <= InlineCapacity) return true;
151
152 if (!heapStorage) {
153 heapStorage = cx->make_pod_array<CharT>(newLength + 1);
154 if (!heapStorage) return false;
155
156 MOZ_ASSERT(oldLength <= InlineCapacity);
157 PodCopy(heapStorage.get(), inlineStorage, oldLength);
158 return true;
159 }
160
161 CharT* oldChars = heapStorage.release();
162 CharT* newChars = cx->pod_realloc(oldChars, oldLength + 1, newLength + 1);
163 if (!newChars) {
164 js_free(oldChars);
165 return false;
166 }
167
168 heapStorage.reset(newChars);
169 return true;
170 }
171
toStringDontDeflate(JSContext * cx,size_t length)172 JSString* toStringDontDeflate(JSContext* cx, size_t length) {
173 MOZ_ASSERT(length == lastRequestedLength);
174
175 if (JSInlineString::lengthFits<CharT>(length)) {
176 MOZ_ASSERT(
177 !heapStorage,
178 "expected only inline storage when length fits in inline string");
179
180 return NewStringCopyNDontDeflate<CanGC>(cx, inlineStorage, length);
181 }
182
183 MOZ_ASSERT(heapStorage,
184 "heap storage was not allocated for non-inline string");
185
186 heapStorage.get()[length] = '\0'; // Null-terminate
187 JSString* res = NewStringDontDeflate<CanGC>(cx, heapStorage.get(), length);
188 if (!res) return nullptr;
189
190 mozilla::Unused << heapStorage.release();
191 return res;
192 }
193
toString(JSContext * cx,size_t length)194 JSString* toString(JSContext* cx, size_t length) {
195 MOZ_ASSERT(length == lastRequestedLength);
196
197 if (JSInlineString::lengthFits<CharT>(length)) {
198 MOZ_ASSERT(
199 !heapStorage,
200 "expected only inline storage when length fits in inline string");
201
202 return NewStringCopyN<CanGC>(cx, inlineStorage, length);
203 }
204
205 MOZ_ASSERT(heapStorage,
206 "heap storage was not allocated for non-inline string");
207
208 heapStorage.get()[length] = '\0'; // Null-terminate
209 JSString* res = NewString<CanGC>(cx, heapStorage.get(), length);
210 if (!res) return nullptr;
211
212 mozilla::Unused << heapStorage.release();
213 return res;
214 }
215 };
216
217 /*
218 * Forward declarations for URI encode/decode and helper routines
219 */
220 static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp);
221
222 static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
223
224 static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp);
225
226 static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp);
227
228 /*
229 * Global string methods
230 */
231
232 /* ES5 B.2.1 */
233 template <typename CharT>
Escape(JSContext * cx,const CharT * chars,uint32_t length,InlineCharBuffer<Latin1Char> & newChars,uint32_t * newLengthOut)234 static bool Escape(JSContext* cx, const CharT* chars, uint32_t length,
235 InlineCharBuffer<Latin1Char>& newChars,
236 uint32_t* newLengthOut) {
237 // clang-format off
238 static const uint8_t shouldPassThrough[128] = {
239 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
240 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
241 0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1, /* !"#$%&'()*+,-./ */
242 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 0123456789:;<=>? */
243 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* @ABCDEFGHIJKLMNO */
244 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* PQRSTUVWXYZ[\]^_ */
245 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* `abcdefghijklmno */
246 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0, /* pqrstuvwxyz{\}~ DEL */
247 };
248 // clang-format on
249
250 /* Take a first pass and see how big the result string will need to be. */
251 uint32_t newLength = length;
252 for (size_t i = 0; i < length; i++) {
253 char16_t ch = chars[i];
254 if (ch < 128 && shouldPassThrough[ch]) continue;
255
256 /* The character will be encoded as %XX or %uXXXX. */
257 newLength += (ch < 256) ? 2 : 5;
258
259 /*
260 * newlength is incremented by at most 5 on each iteration, so worst
261 * case newlength == length * 6. This can't overflow.
262 */
263 static_assert(JSString::MAX_LENGTH < UINT32_MAX / 6,
264 "newlength must not overflow");
265 }
266
267 if (newLength == length) {
268 *newLengthOut = newLength;
269 return true;
270 }
271
272 if (!newChars.maybeAlloc(cx, newLength)) return false;
273
274 static const char digits[] = "0123456789ABCDEF";
275
276 Latin1Char* rawNewChars = newChars.get();
277 size_t i, ni;
278 for (i = 0, ni = 0; i < length; i++) {
279 char16_t ch = chars[i];
280 if (ch < 128 && shouldPassThrough[ch]) {
281 rawNewChars[ni++] = ch;
282 } else if (ch < 256) {
283 rawNewChars[ni++] = '%';
284 rawNewChars[ni++] = digits[ch >> 4];
285 rawNewChars[ni++] = digits[ch & 0xF];
286 } else {
287 rawNewChars[ni++] = '%';
288 rawNewChars[ni++] = 'u';
289 rawNewChars[ni++] = digits[ch >> 12];
290 rawNewChars[ni++] = digits[(ch & 0xF00) >> 8];
291 rawNewChars[ni++] = digits[(ch & 0xF0) >> 4];
292 rawNewChars[ni++] = digits[ch & 0xF];
293 }
294 }
295 MOZ_ASSERT(ni == newLength);
296
297 *newLengthOut = newLength;
298 return true;
299 }
300
str_escape(JSContext * cx,unsigned argc,Value * vp)301 static bool str_escape(JSContext* cx, unsigned argc, Value* vp) {
302 CallArgs args = CallArgsFromVp(argc, vp);
303
304 RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
305 if (!str) return false;
306
307 InlineCharBuffer<Latin1Char> newChars;
308 uint32_t newLength = 0; // initialize to silence GCC warning
309 if (str->hasLatin1Chars()) {
310 AutoCheckCannotGC nogc;
311 if (!Escape(cx, str->latin1Chars(nogc), str->length(), newChars,
312 &newLength))
313 return false;
314 } else {
315 AutoCheckCannotGC nogc;
316 if (!Escape(cx, str->twoByteChars(nogc), str->length(), newChars,
317 &newLength))
318 return false;
319 }
320
321 // Return input if no characters need to be escaped.
322 if (newLength == str->length()) {
323 args.rval().setString(str);
324 return true;
325 }
326
327 JSString* res = newChars.toString(cx, newLength);
328 if (!res) return false;
329
330 args.rval().setString(res);
331 return true;
332 }
333
334 template <typename CharT>
Unhex4(const RangedPtr<const CharT> chars,char16_t * result)335 static inline bool Unhex4(const RangedPtr<const CharT> chars,
336 char16_t* result) {
337 char16_t a = chars[0], b = chars[1], c = chars[2], d = chars[3];
338
339 if (!(JS7_ISHEX(a) && JS7_ISHEX(b) && JS7_ISHEX(c) && JS7_ISHEX(d)))
340 return false;
341
342 *result =
343 (((((JS7_UNHEX(a) << 4) + JS7_UNHEX(b)) << 4) + JS7_UNHEX(c)) << 4) +
344 JS7_UNHEX(d);
345 return true;
346 }
347
348 template <typename CharT>
Unhex2(const RangedPtr<const CharT> chars,char16_t * result)349 static inline bool Unhex2(const RangedPtr<const CharT> chars,
350 char16_t* result) {
351 char16_t a = chars[0], b = chars[1];
352
353 if (!(JS7_ISHEX(a) && JS7_ISHEX(b))) return false;
354
355 *result = (JS7_UNHEX(a) << 4) + JS7_UNHEX(b);
356 return true;
357 }
358
359 template <typename CharT>
Unescape(StringBuffer & sb,const mozilla::Range<const CharT> chars)360 static bool Unescape(StringBuffer& sb,
361 const mozilla::Range<const CharT> chars) {
362 // Step 2.
363 uint32_t length = chars.length();
364
365 /*
366 * Note that the spec algorithm has been optimized to avoid building
367 * a string in the case where no escapes are present.
368 */
369 bool building = false;
370
371 #define ENSURE_BUILDING \
372 do { \
373 if (!building) { \
374 building = true; \
375 if (!sb.reserve(length)) return false; \
376 sb.infallibleAppend(chars.begin().get(), k); \
377 } \
378 } while (false);
379
380 // Step 4.
381 uint32_t k = 0;
382
383 // Step 5.
384 while (k < length) {
385 // Step 5.a.
386 char16_t c = chars[k];
387
388 // Step 5.b.
389 if (c == '%') {
390 static_assert(JSString::MAX_LENGTH < UINT32_MAX - 6,
391 "String length is not near UINT32_MAX");
392
393 // Steps 5.b.i-ii.
394 if (k + 6 <= length && chars[k + 1] == 'u') {
395 if (Unhex4(chars.begin() + k + 2, &c)) {
396 ENSURE_BUILDING
397 k += 5;
398 }
399 } else if (k + 3 <= length) {
400 if (Unhex2(chars.begin() + k + 1, &c)) {
401 ENSURE_BUILDING
402 k += 2;
403 }
404 }
405 }
406
407 // Step 5.c.
408 if (building && !sb.append(c)) return false;
409
410 // Step 5.d.
411 k += 1;
412 }
413
414 return true;
415 #undef ENSURE_BUILDING
416 }
417
418 // ES2018 draft rev f83aa38282c2a60c6916ebc410bfdf105a0f6a54
419 // B.2.1.2 unescape ( string )
str_unescape(JSContext * cx,unsigned argc,Value * vp)420 static bool str_unescape(JSContext* cx, unsigned argc, Value* vp) {
421 CallArgs args = CallArgsFromVp(argc, vp);
422
423 // Step 1.
424 RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
425 if (!str) return false;
426
427 // Step 3.
428 StringBuffer sb(cx);
429 if (str->hasTwoByteChars() && !sb.ensureTwoByteChars()) return false;
430
431 // Steps 2, 4-5.
432 if (str->hasLatin1Chars()) {
433 AutoCheckCannotGC nogc;
434 if (!Unescape(sb, str->latin1Range(nogc))) return false;
435 } else {
436 AutoCheckCannotGC nogc;
437 if (!Unescape(sb, str->twoByteRange(nogc))) return false;
438 }
439
440 // Step 6.
441 JSLinearString* result;
442 if (!sb.empty()) {
443 result = sb.finishString();
444 if (!result) return false;
445 } else {
446 result = str;
447 }
448
449 args.rval().setString(result);
450 return true;
451 }
452
str_uneval(JSContext * cx,unsigned argc,Value * vp)453 static bool str_uneval(JSContext* cx, unsigned argc, Value* vp) {
454 CallArgs args = CallArgsFromVp(argc, vp);
455 JSString* str = ValueToSource(cx, args.get(0));
456 if (!str) return false;
457
458 args.rval().setString(str);
459 return true;
460 }
461
462 static const JSFunctionSpec string_functions[] = {
463 JS_FN(js_escape_str, str_escape, 1, JSPROP_RESOLVING),
464 JS_FN(js_unescape_str, str_unescape, 1, JSPROP_RESOLVING),
465 JS_FN(js_uneval_str, str_uneval, 1, JSPROP_RESOLVING),
466 JS_FN(js_decodeURI_str, str_decodeURI, 1, JSPROP_RESOLVING),
467 JS_FN(js_encodeURI_str, str_encodeURI, 1, JSPROP_RESOLVING),
468 JS_FN(js_decodeURIComponent_str, str_decodeURI_Component, 1,
469 JSPROP_RESOLVING),
470 JS_FN(js_encodeURIComponent_str, str_encodeURI_Component, 1,
471 JSPROP_RESOLVING),
472
473 JS_FS_END};
474
475 static const unsigned STRING_ELEMENT_ATTRS =
476 JSPROP_ENUMERATE | JSPROP_READONLY | JSPROP_PERMANENT;
477
str_enumerate(JSContext * cx,HandleObject obj)478 static bool str_enumerate(JSContext* cx, HandleObject obj) {
479 RootedString str(cx, obj->as<StringObject>().unbox());
480 js::StaticStrings& staticStrings = cx->staticStrings();
481
482 RootedValue value(cx);
483 for (size_t i = 0, length = str->length(); i < length; i++) {
484 JSString* str1 = staticStrings.getUnitStringForElement(cx, str, i);
485 if (!str1) return false;
486 value.setString(str1);
487 if (!DefineDataElement(cx, obj, i, value,
488 STRING_ELEMENT_ATTRS | JSPROP_RESOLVING))
489 return false;
490 }
491
492 return true;
493 }
494
str_mayResolve(const JSAtomState &,jsid id,JSObject *)495 static bool str_mayResolve(const JSAtomState&, jsid id, JSObject*) {
496 // str_resolve ignores non-integer ids.
497 return JSID_IS_INT(id);
498 }
499
str_resolve(JSContext * cx,HandleObject obj,HandleId id,bool * resolvedp)500 static bool str_resolve(JSContext* cx, HandleObject obj, HandleId id,
501 bool* resolvedp) {
502 if (!JSID_IS_INT(id)) return true;
503
504 RootedString str(cx, obj->as<StringObject>().unbox());
505
506 int32_t slot = JSID_TO_INT(id);
507 if ((size_t)slot < str->length()) {
508 JSString* str1 =
509 cx->staticStrings().getUnitStringForElement(cx, str, size_t(slot));
510 if (!str1) return false;
511 RootedValue value(cx, StringValue(str1));
512 if (!DefineDataElement(cx, obj, uint32_t(slot), value,
513 STRING_ELEMENT_ATTRS | JSPROP_RESOLVING)) {
514 return false;
515 }
516 *resolvedp = true;
517 }
518 return true;
519 }
520
521 static const ClassOps StringObjectClassOps = {
522 nullptr, /* addProperty */
523 nullptr, /* delProperty */
524 str_enumerate, nullptr, /* newEnumerate */
525 str_resolve, str_mayResolve};
526
527 const Class StringObject::class_ = {
528 js_String_str,
529 JSCLASS_HAS_RESERVED_SLOTS(StringObject::RESERVED_SLOTS) |
530 JSCLASS_HAS_CACHED_PROTO(JSProto_String),
531 &StringObjectClassOps};
532
533 /*
534 * Perform the initial |RequireObjectCoercible(thisv)| and |ToString(thisv)|
535 * from nearly all String.prototype.* functions.
536 */
ToStringForStringFunction(JSContext * cx,HandleValue thisv)537 static MOZ_ALWAYS_INLINE JSString* ToStringForStringFunction(
538 JSContext* cx, HandleValue thisv) {
539 if (!CheckRecursionLimit(cx)) return nullptr;
540
541 if (thisv.isString()) return thisv.toString();
542
543 if (thisv.isObject()) {
544 RootedObject obj(cx, &thisv.toObject());
545 if (obj->is<StringObject>()) {
546 StringObject* nobj = &obj->as<StringObject>();
547 // We have to make sure that the ToPrimitive call from ToString
548 // would be unobservable.
549 if (HasNoToPrimitiveMethodPure(nobj, cx) &&
550 HasNativeMethodPure(nobj, cx->names().toString, str_toString, cx)) {
551 return nobj->unbox();
552 }
553 }
554 } else if (thisv.isNullOrUndefined()) {
555 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
556 JSMSG_CANT_CONVERT_TO,
557 thisv.isNull() ? "null" : "undefined", "object");
558 return nullptr;
559 }
560
561 return ToStringSlow<CanGC>(cx, thisv);
562 }
563
IsString(HandleValue v)564 MOZ_ALWAYS_INLINE bool IsString(HandleValue v) {
565 return v.isString() || (v.isObject() && v.toObject().is<StringObject>());
566 }
567
str_toSource_impl(JSContext * cx,const CallArgs & args)568 MOZ_ALWAYS_INLINE bool str_toSource_impl(JSContext* cx, const CallArgs& args) {
569 MOZ_ASSERT(IsString(args.thisv()));
570
571 Rooted<JSString*> str(cx, ToString<CanGC>(cx, args.thisv()));
572 if (!str) return false;
573
574 str = QuoteString(cx, str, '"');
575 if (!str) return false;
576
577 StringBuffer sb(cx);
578 if (!sb.append("(new String(") || !sb.append(str) || !sb.append("))"))
579 return false;
580
581 str = sb.finishString();
582 if (!str) return false;
583 args.rval().setString(str);
584 return true;
585 }
586
str_toSource(JSContext * cx,unsigned argc,Value * vp)587 static bool str_toSource(JSContext* cx, unsigned argc, Value* vp) {
588 CallArgs args = CallArgsFromVp(argc, vp);
589 return CallNonGenericMethod<IsString, str_toSource_impl>(cx, args);
590 }
591
str_toString_impl(JSContext * cx,const CallArgs & args)592 MOZ_ALWAYS_INLINE bool str_toString_impl(JSContext* cx, const CallArgs& args) {
593 MOZ_ASSERT(IsString(args.thisv()));
594
595 args.rval().setString(
596 args.thisv().isString()
597 ? args.thisv().toString()
598 : args.thisv().toObject().as<StringObject>().unbox());
599 return true;
600 }
601
str_toString(JSContext * cx,unsigned argc,Value * vp)602 bool js::str_toString(JSContext* cx, unsigned argc, Value* vp) {
603 CallArgs args = CallArgsFromVp(argc, vp);
604 return CallNonGenericMethod<IsString, str_toString_impl>(cx, args);
605 }
606
607 /*
608 * Java-like string native methods.
609 */
610
SubstringKernel(JSContext * cx,HandleString str,int32_t beginInt,int32_t lengthInt)611 JSString* js::SubstringKernel(JSContext* cx, HandleString str, int32_t beginInt,
612 int32_t lengthInt) {
613 MOZ_ASSERT(0 <= beginInt);
614 MOZ_ASSERT(0 <= lengthInt);
615 MOZ_ASSERT(uint32_t(beginInt) <= str->length());
616 MOZ_ASSERT(uint32_t(lengthInt) <= str->length() - beginInt);
617
618 uint32_t begin = beginInt;
619 uint32_t len = lengthInt;
620
621 /*
622 * Optimization for one level deep ropes.
623 * This is common for the following pattern:
624 *
625 * while() {
626 * text = text.substr(0, x) + "bla" + text.substr(x)
627 * test.charCodeAt(x + 1)
628 * }
629 */
630 if (str->isRope()) {
631 JSRope* rope = &str->asRope();
632
633 /* Substring is totally in leftChild of rope. */
634 if (begin + len <= rope->leftChild()->length())
635 return NewDependentString(cx, rope->leftChild(), begin, len);
636
637 /* Substring is totally in rightChild of rope. */
638 if (begin >= rope->leftChild()->length()) {
639 begin -= rope->leftChild()->length();
640 return NewDependentString(cx, rope->rightChild(), begin, len);
641 }
642
643 /*
644 * Requested substring is partly in the left and partly in right child.
645 * Create a rope of substrings for both childs.
646 */
647 MOZ_ASSERT(begin < rope->leftChild()->length() &&
648 begin + len > rope->leftChild()->length());
649
650 size_t lhsLength = rope->leftChild()->length() - begin;
651 size_t rhsLength = begin + len - rope->leftChild()->length();
652
653 Rooted<JSRope*> ropeRoot(cx, rope);
654 RootedString lhs(
655 cx, NewDependentString(cx, ropeRoot->leftChild(), begin, lhsLength));
656 if (!lhs) return nullptr;
657
658 RootedString rhs(
659 cx, NewDependentString(cx, ropeRoot->rightChild(), 0, rhsLength));
660 if (!rhs) return nullptr;
661
662 return JSRope::new_<CanGC>(cx, lhs, rhs, len);
663 }
664
665 return NewDependentString(cx, str, begin, len);
666 }
667
668 /**
669 * U+03A3 GREEK CAPITAL LETTER SIGMA has two different lower case mappings
670 * depending on its context:
671 * When it's preceded by a cased character and not followed by another cased
672 * character, its lower case form is U+03C2 GREEK SMALL LETTER FINAL SIGMA.
673 * Otherwise its lower case mapping is U+03C3 GREEK SMALL LETTER SIGMA.
674 *
675 * Unicode 9.0, §3.13 Default Case Algorithms
676 */
Final_Sigma(const char16_t * chars,size_t length,size_t index)677 static char16_t Final_Sigma(const char16_t* chars, size_t length,
678 size_t index) {
679 MOZ_ASSERT(index < length);
680 MOZ_ASSERT(chars[index] == unicode::GREEK_CAPITAL_LETTER_SIGMA);
681 MOZ_ASSERT(unicode::ToLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA) ==
682 unicode::GREEK_SMALL_LETTER_SIGMA);
683
684 #if ENABLE_INTL_API
685 // Tell the analysis the BinaryProperty.contains function pointer called by
686 // u_hasBinaryProperty cannot GC.
687 JS::AutoSuppressGCAnalysis nogc;
688
689 bool precededByCased = false;
690 for (size_t i = index; i > 0;) {
691 char16_t c = chars[--i];
692 uint32_t codePoint = c;
693 if (unicode::IsTrailSurrogate(c) && i > 0) {
694 char16_t lead = chars[i - 1];
695 if (unicode::IsLeadSurrogate(lead)) {
696 codePoint = unicode::UTF16Decode(lead, c);
697 i--;
698 }
699 }
700
701 // Ignore any characters with the property Case_Ignorable.
702 // NB: We need to skip over all Case_Ignorable characters, even when
703 // they also have the Cased binary property.
704 if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE)) continue;
705
706 precededByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED);
707 break;
708 }
709 if (!precededByCased) return unicode::GREEK_SMALL_LETTER_SIGMA;
710
711 bool followedByCased = false;
712 for (size_t i = index + 1; i < length;) {
713 char16_t c = chars[i++];
714 uint32_t codePoint = c;
715 if (unicode::IsLeadSurrogate(c) && i < length) {
716 char16_t trail = chars[i];
717 if (unicode::IsTrailSurrogate(trail)) {
718 codePoint = unicode::UTF16Decode(c, trail);
719 i++;
720 }
721 }
722
723 // Ignore any characters with the property Case_Ignorable.
724 // NB: We need to skip over all Case_Ignorable characters, even when
725 // they also have the Cased binary property.
726 if (u_hasBinaryProperty(codePoint, UCHAR_CASE_IGNORABLE)) continue;
727
728 followedByCased = u_hasBinaryProperty(codePoint, UCHAR_CASED);
729 break;
730 }
731 if (!followedByCased) return unicode::GREEK_SMALL_LETTER_FINAL_SIGMA;
732 #endif
733
734 return unicode::GREEK_SMALL_LETTER_SIGMA;
735 }
736
Final_Sigma(const Latin1Char * chars,size_t length,size_t index)737 static Latin1Char Final_Sigma(const Latin1Char* chars, size_t length,
738 size_t index) {
739 MOZ_ASSERT_UNREACHABLE("U+03A3 is not a Latin-1 character");
740 return 0;
741 }
742
743 // If |srcLength == destLength| is true, the destination buffer was allocated
744 // with the same size as the source buffer. When we append characters which
745 // have special casing mappings, we test |srcLength == destLength| to decide
746 // if we need to back out and reallocate a sufficiently large destination
747 // buffer. Otherwise the destination buffer was allocated with the correct
748 // size to hold all lower case mapped characters, i.e.
749 // |destLength == ToLowerCaseLength(srcChars, 0, srcLength)| is true.
750 template <typename CharT>
ToLowerCaseImpl(CharT * destChars,const CharT * srcChars,size_t startIndex,size_t srcLength,size_t destLength)751 static size_t ToLowerCaseImpl(CharT* destChars, const CharT* srcChars,
752 size_t startIndex, size_t srcLength,
753 size_t destLength) {
754 MOZ_ASSERT(startIndex < srcLength);
755 MOZ_ASSERT(srcLength <= destLength);
756 MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value), srcLength == destLength);
757
758 size_t j = startIndex;
759 for (size_t i = startIndex; i < srcLength; i++) {
760 char16_t c = srcChars[i];
761 if (!IsSame<CharT, Latin1Char>::value) {
762 if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
763 char16_t trail = srcChars[i + 1];
764 if (unicode::IsTrailSurrogate(trail)) {
765 trail = unicode::ToLowerCaseNonBMPTrail(c, trail);
766 destChars[j++] = c;
767 destChars[j++] = trail;
768 i++;
769 continue;
770 }
771 }
772
773 // Special case: U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE
774 // lowercases to <U+0069 U+0307>.
775 if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
776 // Return if the output buffer is too small.
777 if (srcLength == destLength) return i;
778
779 destChars[j++] = CharT('i');
780 destChars[j++] = CharT(unicode::COMBINING_DOT_ABOVE);
781 continue;
782 }
783
784 // Special case: U+03A3 GREEK CAPITAL LETTER SIGMA lowercases to
785 // one of two codepoints depending on context.
786 if (c == unicode::GREEK_CAPITAL_LETTER_SIGMA) {
787 destChars[j++] = Final_Sigma(srcChars, srcLength, i);
788 continue;
789 }
790 }
791
792 c = unicode::ToLowerCase(c);
793 MOZ_ASSERT_IF((IsSame<CharT, Latin1Char>::value),
794 c <= JSString::MAX_LATIN1_CHAR);
795 destChars[j++] = c;
796 }
797
798 MOZ_ASSERT(j == destLength);
799 return srcLength;
800 }
801
ToLowerCaseLength(const char16_t * chars,size_t startIndex,size_t length)802 static size_t ToLowerCaseLength(const char16_t* chars, size_t startIndex,
803 size_t length) {
804 size_t lowerLength = length;
805 for (size_t i = startIndex; i < length; i++) {
806 char16_t c = chars[i];
807
808 // U+0130 is lowercased to the two-element sequence <U+0069 U+0307>.
809 if (c == unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) lowerLength += 1;
810 }
811 return lowerLength;
812 }
813
ToLowerCaseLength(const Latin1Char * chars,size_t startIndex,size_t length)814 static size_t ToLowerCaseLength(const Latin1Char* chars, size_t startIndex,
815 size_t length) {
816 MOZ_ASSERT_UNREACHABLE("never called for Latin-1 strings");
817 return 0;
818 }
819
820 template <typename CharT>
ToLowerCase(JSContext * cx,JSLinearString * str)821 static JSString* ToLowerCase(JSContext* cx, JSLinearString* str) {
822 // Unlike toUpperCase, toLowerCase has the nice invariant that if the
823 // input is a Latin-1 string, the output is also a Latin-1 string.
824
825 InlineCharBuffer<CharT> newChars;
826
827 const size_t length = str->length();
828 size_t resultLength;
829 {
830 AutoCheckCannotGC nogc;
831 const CharT* chars = str->chars<CharT>(nogc);
832
833 // We don't need extra special casing checks in the loop below,
834 // because U+0130 LATIN CAPITAL LETTER I WITH DOT ABOVE and U+03A3
835 // GREEK CAPITAL LETTER SIGMA already have simple lower case mappings.
836 MOZ_ASSERT(
837 unicode::CanLowerCase(unicode::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE),
838 "U+0130 has a simple lower case mapping");
839 MOZ_ASSERT(unicode::CanLowerCase(unicode::GREEK_CAPITAL_LETTER_SIGMA),
840 "U+03A3 has a simple lower case mapping");
841
842 // One element Latin-1 strings can be directly retrieved from the
843 // static strings cache.
844 if (IsSame<CharT, Latin1Char>::value) {
845 if (length == 1) {
846 char16_t lower = unicode::ToLowerCase(chars[0]);
847 MOZ_ASSERT(lower <= JSString::MAX_LATIN1_CHAR);
848 MOZ_ASSERT(StaticStrings::hasUnit(lower));
849
850 return cx->staticStrings().getUnit(lower);
851 }
852 }
853
854 // Look for the first character that changes when lowercased.
855 size_t i = 0;
856 for (; i < length; i++) {
857 CharT c = chars[i];
858 if (!IsSame<CharT, Latin1Char>::value) {
859 if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
860 CharT trail = chars[i + 1];
861 if (unicode::IsTrailSurrogate(trail)) {
862 if (unicode::CanLowerCaseNonBMP(c, trail)) break;
863
864 i++;
865 continue;
866 }
867 }
868 }
869 if (unicode::CanLowerCase(c)) break;
870 }
871
872 // If no character needs to change, return the input string.
873 if (i == length) return str;
874
875 resultLength = length;
876 if (!newChars.maybeAlloc(cx, resultLength)) return nullptr;
877
878 PodCopy(newChars.get(), chars, i);
879
880 size_t readChars =
881 ToLowerCaseImpl(newChars.get(), chars, i, length, resultLength);
882 if (readChars < length) {
883 MOZ_ASSERT((!IsSame<CharT, Latin1Char>::value),
884 "Latin-1 strings don't have special lower case mappings");
885 resultLength = ToLowerCaseLength(chars, readChars, length);
886
887 if (!newChars.maybeRealloc(cx, length, resultLength)) return nullptr;
888
889 MOZ_ALWAYS_TRUE(length == ToLowerCaseImpl(newChars.get(), chars,
890 readChars, length,
891 resultLength));
892 }
893 }
894
895 return newChars.toStringDontDeflate(cx, resultLength);
896 }
897
StringToLowerCase(JSContext * cx,HandleString string)898 JSString* js::StringToLowerCase(JSContext* cx, HandleString string) {
899 JSLinearString* linear = string->ensureLinear(cx);
900 if (!linear) return nullptr;
901
902 if (linear->hasLatin1Chars()) return ToLowerCase<Latin1Char>(cx, linear);
903 return ToLowerCase<char16_t>(cx, linear);
904 }
905
str_toLowerCase(JSContext * cx,unsigned argc,Value * vp)906 bool js::str_toLowerCase(JSContext* cx, unsigned argc, Value* vp) {
907 CallArgs args = CallArgsFromVp(argc, vp);
908
909 RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
910 if (!str) return false;
911
912 JSString* result = StringToLowerCase(cx, str);
913 if (!result) return false;
914
915 args.rval().setString(result);
916 return true;
917 }
918
CaseMappingLocale(JSContext * cx,JSString * str)919 static const char* CaseMappingLocale(JSContext* cx, JSString* str) {
920 JSLinearString* locale = str->ensureLinear(cx);
921 if (!locale) return nullptr;
922
923 MOZ_ASSERT(locale->length() >= 2, "locale is a valid language tag");
924
925 // Lithuanian, Turkish, and Azeri have language dependent case mappings.
926 static const char languagesWithSpecialCasing[][3] = {"lt", "tr", "az"};
927
928 // All strings in |languagesWithSpecialCasing| are of length two, so we
929 // only need to compare the first two characters to find a matching locale.
930 // ES2017 Intl, §9.2.2 BestAvailableLocale
931 if (locale->length() == 2 || locale->latin1OrTwoByteChar(2) == '-') {
932 for (const auto& language : languagesWithSpecialCasing) {
933 if (locale->latin1OrTwoByteChar(0) == language[0] &&
934 locale->latin1OrTwoByteChar(1) == language[1]) {
935 return language;
936 }
937 }
938 }
939
940 return ""; // ICU root locale
941 }
942
intl_toLocaleLowerCase(JSContext * cx,unsigned argc,Value * vp)943 bool js::intl_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
944 CallArgs args = CallArgsFromVp(argc, vp);
945 MOZ_ASSERT(args.length() == 2);
946 MOZ_ASSERT(args[0].isString());
947 MOZ_ASSERT(args[1].isString());
948
949 RootedString string(cx, args[0].toString());
950
951 const char* locale = CaseMappingLocale(cx, args[1].toString());
952 if (!locale) return false;
953
954 // Call String.prototype.toLowerCase() for language independent casing.
955 if (intl::StringsAreEqual(locale, "")) {
956 JSString* str = StringToLowerCase(cx, string);
957 if (!str) return false;
958
959 args.rval().setString(str);
960 return true;
961 }
962
963 AutoStableStringChars inputChars(cx);
964 if (!inputChars.initTwoByte(cx, string)) return false;
965 mozilla::Range<const char16_t> input = inputChars.twoByteRange();
966
967 // Maximum case mapping length is three characters.
968 static_assert(JSString::MAX_LENGTH < INT32_MAX / 3,
969 "Case conversion doesn't overflow int32_t indices");
970
971 JSString* str = intl::CallICU(
972 cx, [&input, locale](UChar* chars, int32_t size, UErrorCode* status) {
973 return u_strToLower(chars, size, input.begin().get(), input.length(),
974 locale, status);
975 });
976 if (!str) return false;
977
978 args.rval().setString(str);
979 return true;
980 }
981
982 #if EXPOSE_INTL_API
983
984 // String.prototype.toLocaleLowerCase is self-hosted when Intl is exposed,
985 // with core functionality performed by the intrinsic above.
986
987 #else
988
str_toLocaleLowerCase(JSContext * cx,unsigned argc,Value * vp)989 bool js::str_toLocaleLowerCase(JSContext* cx, unsigned argc, Value* vp) {
990 CallArgs args = CallArgsFromVp(argc, vp);
991
992 RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
993 if (!str) return false;
994
995 /*
996 * Forcefully ignore the first (or any) argument and return toLowerCase(),
997 * ECMA has reserved that argument, presumably for defining the locale.
998 */
999 if (cx->runtime()->localeCallbacks &&
1000 cx->runtime()->localeCallbacks->localeToLowerCase) {
1001 RootedValue result(cx);
1002 if (!cx->runtime()->localeCallbacks->localeToLowerCase(cx, str, &result))
1003 return false;
1004
1005 args.rval().set(result);
1006 return true;
1007 }
1008
1009 RootedLinearString linear(cx, str->ensureLinear(cx));
1010 if (!linear) return false;
1011
1012 JSString* result = StringToLowerCase(cx, linear);
1013 if (!result) return false;
1014
1015 args.rval().setString(result);
1016 return true;
1017 }
1018
1019 #endif // EXPOSE_INTL_API
1020
CanUpperCaseSpecialCasing(Latin1Char charCode)1021 static inline bool CanUpperCaseSpecialCasing(Latin1Char charCode) {
1022 // Handle U+00DF LATIN SMALL LETTER SHARP S inline, all other Latin-1
1023 // characters don't have special casing rules.
1024 MOZ_ASSERT_IF(charCode != unicode::LATIN_SMALL_LETTER_SHARP_S,
1025 !unicode::CanUpperCaseSpecialCasing(charCode));
1026
1027 return charCode == unicode::LATIN_SMALL_LETTER_SHARP_S;
1028 }
1029
CanUpperCaseSpecialCasing(char16_t charCode)1030 static inline bool CanUpperCaseSpecialCasing(char16_t charCode) {
1031 return unicode::CanUpperCaseSpecialCasing(charCode);
1032 }
1033
LengthUpperCaseSpecialCasing(Latin1Char charCode)1034 static inline size_t LengthUpperCaseSpecialCasing(Latin1Char charCode) {
1035 // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
1036 MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
1037
1038 return 2;
1039 }
1040
LengthUpperCaseSpecialCasing(char16_t charCode)1041 static inline size_t LengthUpperCaseSpecialCasing(char16_t charCode) {
1042 MOZ_ASSERT(CanUpperCaseSpecialCasing(charCode));
1043
1044 return unicode::LengthUpperCaseSpecialCasing(charCode);
1045 }
1046
AppendUpperCaseSpecialCasing(char16_t charCode,Latin1Char * elements,size_t * index)1047 static inline void AppendUpperCaseSpecialCasing(char16_t charCode,
1048 Latin1Char* elements,
1049 size_t* index) {
1050 // U+00DF LATIN SMALL LETTER SHARP S is uppercased to two 'S'.
1051 MOZ_ASSERT(charCode == unicode::LATIN_SMALL_LETTER_SHARP_S);
1052 static_assert('S' <= JSString::MAX_LATIN1_CHAR, "'S' is a Latin-1 character");
1053
1054 elements[(*index)++] = 'S';
1055 elements[(*index)++] = 'S';
1056 }
1057
AppendUpperCaseSpecialCasing(char16_t charCode,char16_t * elements,size_t * index)1058 static inline void AppendUpperCaseSpecialCasing(char16_t charCode,
1059 char16_t* elements,
1060 size_t* index) {
1061 unicode::AppendUpperCaseSpecialCasing(charCode, elements, index);
1062 }
1063
1064 // See ToLowerCaseImpl for an explanation of the parameters.
1065 template <typename DestChar, typename SrcChar>
ToUpperCaseImpl(DestChar * destChars,const SrcChar * srcChars,size_t startIndex,size_t srcLength,size_t destLength)1066 static size_t ToUpperCaseImpl(DestChar* destChars, const SrcChar* srcChars,
1067 size_t startIndex, size_t srcLength,
1068 size_t destLength) {
1069 static_assert(IsSame<SrcChar, Latin1Char>::value ||
1070 !IsSame<DestChar, Latin1Char>::value,
1071 "cannot write non-Latin-1 characters into Latin-1 string");
1072 MOZ_ASSERT(startIndex < srcLength);
1073 MOZ_ASSERT(srcLength <= destLength);
1074
1075 size_t j = startIndex;
1076 for (size_t i = startIndex; i < srcLength; i++) {
1077 char16_t c = srcChars[i];
1078 if (!IsSame<DestChar, Latin1Char>::value) {
1079 if (unicode::IsLeadSurrogate(c) && i + 1 < srcLength) {
1080 char16_t trail = srcChars[i + 1];
1081 if (unicode::IsTrailSurrogate(trail)) {
1082 trail = unicode::ToUpperCaseNonBMPTrail(c, trail);
1083 destChars[j++] = c;
1084 destChars[j++] = trail;
1085 i++;
1086 continue;
1087 }
1088 }
1089 }
1090
1091 if (MOZ_UNLIKELY(c > 0x7f &&
1092 CanUpperCaseSpecialCasing(static_cast<SrcChar>(c)))) {
1093 // Return if the output buffer is too small.
1094 if (srcLength == destLength) return i;
1095
1096 AppendUpperCaseSpecialCasing(c, destChars, &j);
1097 continue;
1098 }
1099
1100 c = unicode::ToUpperCase(c);
1101 MOZ_ASSERT_IF((IsSame<DestChar, Latin1Char>::value),
1102 c <= JSString::MAX_LATIN1_CHAR);
1103 destChars[j++] = c;
1104 }
1105
1106 MOZ_ASSERT(j == destLength);
1107 return srcLength;
1108 }
1109
1110 // Explicit instantiation so we don't hit the static_assert from above.
ToUpperCaseImpl(Latin1Char * destChars,const char16_t * srcChars,size_t startIndex,size_t srcLength,size_t destLength)1111 static bool ToUpperCaseImpl(Latin1Char* destChars, const char16_t* srcChars,
1112 size_t startIndex, size_t srcLength,
1113 size_t destLength) {
1114 MOZ_ASSERT_UNREACHABLE(
1115 "cannot write non-Latin-1 characters into Latin-1 string");
1116 return false;
1117 }
1118
1119 template <typename CharT>
ToUpperCaseLength(const CharT * chars,size_t startIndex,size_t length)1120 static size_t ToUpperCaseLength(const CharT* chars, size_t startIndex,
1121 size_t length) {
1122 size_t upperLength = length;
1123 for (size_t i = startIndex; i < length; i++) {
1124 char16_t c = chars[i];
1125
1126 if (c > 0x7f && CanUpperCaseSpecialCasing(static_cast<CharT>(c)))
1127 upperLength += LengthUpperCaseSpecialCasing(static_cast<CharT>(c)) - 1;
1128 }
1129 return upperLength;
1130 }
1131
1132 template <typename DestChar, typename SrcChar>
CopyChars(DestChar * destChars,const SrcChar * srcChars,size_t length)1133 static inline void CopyChars(DestChar* destChars, const SrcChar* srcChars,
1134 size_t length) {
1135 static_assert(!IsSame<DestChar, SrcChar>::value,
1136 "PodCopy is used for the same type case");
1137 for (size_t i = 0; i < length; i++) destChars[i] = srcChars[i];
1138 }
1139
1140 template <typename CharT>
CopyChars(CharT * destChars,const CharT * srcChars,size_t length)1141 static inline void CopyChars(CharT* destChars, const CharT* srcChars,
1142 size_t length) {
1143 PodCopy(destChars, srcChars, length);
1144 }
1145
1146 template <typename DestChar, typename SrcChar>
ToUpperCase(JSContext * cx,InlineCharBuffer<DestChar> & newChars,const SrcChar * chars,size_t startIndex,size_t length,size_t * resultLength)1147 static inline bool ToUpperCase(JSContext* cx,
1148 InlineCharBuffer<DestChar>& newChars,
1149 const SrcChar* chars, size_t startIndex,
1150 size_t length, size_t* resultLength) {
1151 MOZ_ASSERT(startIndex < length);
1152
1153 *resultLength = length;
1154 if (!newChars.maybeAlloc(cx, length)) return false;
1155
1156 CopyChars(newChars.get(), chars, startIndex);
1157
1158 size_t readChars =
1159 ToUpperCaseImpl(newChars.get(), chars, startIndex, length, length);
1160 if (readChars < length) {
1161 size_t actualLength = ToUpperCaseLength(chars, readChars, length);
1162
1163 *resultLength = actualLength;
1164 if (!newChars.maybeRealloc(cx, length, actualLength)) return false;
1165
1166 MOZ_ALWAYS_TRUE(length == ToUpperCaseImpl(newChars.get(), chars, readChars,
1167 length, actualLength));
1168 }
1169
1170 return true;
1171 }
1172
1173 template <typename CharT>
ToUpperCase(JSContext * cx,JSLinearString * str)1174 static JSString* ToUpperCase(JSContext* cx, JSLinearString* str) {
1175 using Latin1Buffer = InlineCharBuffer<Latin1Char>;
1176 using TwoByteBuffer = InlineCharBuffer<char16_t>;
1177
1178 mozilla::MaybeOneOf<Latin1Buffer, TwoByteBuffer> newChars;
1179 const size_t length = str->length();
1180 size_t resultLength;
1181 {
1182 AutoCheckCannotGC nogc;
1183 const CharT* chars = str->chars<CharT>(nogc);
1184
1185 // Most one element Latin-1 strings can be directly retrieved from the
1186 // static strings cache.
1187 if (IsSame<CharT, Latin1Char>::value) {
1188 if (length == 1) {
1189 Latin1Char c = chars[0];
1190 if (c != unicode::MICRO_SIGN &&
1191 c != unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS &&
1192 c != unicode::LATIN_SMALL_LETTER_SHARP_S) {
1193 char16_t upper = unicode::ToUpperCase(c);
1194 MOZ_ASSERT(upper <= JSString::MAX_LATIN1_CHAR);
1195 MOZ_ASSERT(StaticStrings::hasUnit(upper));
1196
1197 return cx->staticStrings().getUnit(upper);
1198 }
1199
1200 MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR ||
1201 CanUpperCaseSpecialCasing(c));
1202 }
1203 }
1204
1205 // Look for the first character that changes when uppercased.
1206 size_t i = 0;
1207 for (; i < length; i++) {
1208 CharT c = chars[i];
1209 if (!IsSame<CharT, Latin1Char>::value) {
1210 if (unicode::IsLeadSurrogate(c) && i + 1 < length) {
1211 CharT trail = chars[i + 1];
1212 if (unicode::IsTrailSurrogate(trail)) {
1213 if (unicode::CanUpperCaseNonBMP(c, trail)) break;
1214
1215 i++;
1216 continue;
1217 }
1218 }
1219 }
1220 if (unicode::CanUpperCase(c)) break;
1221 if (MOZ_UNLIKELY(c > 0x7f && CanUpperCaseSpecialCasing(c))) break;
1222 }
1223
1224 // If no character needs to change, return the input string.
1225 if (i == length) return str;
1226
1227 // The string changes when uppercased, so we must create a new string.
1228 // Can it be Latin-1?
1229 //
1230 // If the original string is Latin-1, it can -- unless the string
1231 // contains U+00B5 MICRO SIGN or U+00FF SMALL LETTER Y WITH DIAERESIS,
1232 // the only Latin-1 codepoints that don't uppercase within Latin-1.
1233 // Search for those codepoints to decide whether the new string can be
1234 // Latin-1.
1235 // If the original string is a two-byte string, its uppercase form is
1236 // so rarely Latin-1 that we don't even consider creating a new
1237 // Latin-1 string.
1238 bool resultIsLatin1;
1239 if (IsSame<CharT, Latin1Char>::value) {
1240 resultIsLatin1 = true;
1241 for (size_t j = i; j < length; j++) {
1242 Latin1Char c = chars[j];
1243 if (c == unicode::MICRO_SIGN ||
1244 c == unicode::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS) {
1245 MOZ_ASSERT(unicode::ToUpperCase(c) > JSString::MAX_LATIN1_CHAR);
1246 resultIsLatin1 = false;
1247 break;
1248 } else {
1249 MOZ_ASSERT(unicode::ToUpperCase(c) <= JSString::MAX_LATIN1_CHAR);
1250 }
1251 }
1252 } else {
1253 resultIsLatin1 = false;
1254 }
1255
1256 if (resultIsLatin1) {
1257 newChars.construct<Latin1Buffer>();
1258
1259 if (!ToUpperCase(cx, newChars.ref<Latin1Buffer>(), chars, i, length,
1260 &resultLength))
1261 return nullptr;
1262 } else {
1263 newChars.construct<TwoByteBuffer>();
1264
1265 if (!ToUpperCase(cx, newChars.ref<TwoByteBuffer>(), chars, i, length,
1266 &resultLength))
1267 return nullptr;
1268 }
1269 }
1270
1271 return newChars.constructed<Latin1Buffer>()
1272 ? newChars.ref<Latin1Buffer>().toStringDontDeflate(cx,
1273 resultLength)
1274 : newChars.ref<TwoByteBuffer>().toStringDontDeflate(cx,
1275 resultLength);
1276 }
1277
StringToUpperCase(JSContext * cx,HandleString string)1278 JSString* js::StringToUpperCase(JSContext* cx, HandleString string) {
1279 JSLinearString* linear = string->ensureLinear(cx);
1280 if (!linear) return nullptr;
1281
1282 if (linear->hasLatin1Chars()) return ToUpperCase<Latin1Char>(cx, linear);
1283 return ToUpperCase<char16_t>(cx, linear);
1284 }
1285
str_toUpperCase(JSContext * cx,unsigned argc,Value * vp)1286 bool js::str_toUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1287 CallArgs args = CallArgsFromVp(argc, vp);
1288
1289 RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
1290 if (!str) return false;
1291
1292 JSString* result = StringToUpperCase(cx, str);
1293 if (!result) return false;
1294
1295 args.rval().setString(result);
1296 return true;
1297 }
1298
intl_toLocaleUpperCase(JSContext * cx,unsigned argc,Value * vp)1299 bool js::intl_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1300 CallArgs args = CallArgsFromVp(argc, vp);
1301 MOZ_ASSERT(args.length() == 2);
1302 MOZ_ASSERT(args[0].isString());
1303 MOZ_ASSERT(args[1].isString());
1304
1305 RootedString string(cx, args[0].toString());
1306
1307 const char* locale = CaseMappingLocale(cx, args[1].toString());
1308 if (!locale) return false;
1309
1310 // Call String.prototype.toUpperCase() for language independent casing.
1311 if (intl::StringsAreEqual(locale, "")) {
1312 JSString* str = js::StringToUpperCase(cx, string);
1313 if (!str) return false;
1314
1315 args.rval().setString(str);
1316 return true;
1317 }
1318
1319 AutoStableStringChars inputChars(cx);
1320 if (!inputChars.initTwoByte(cx, string)) return false;
1321 mozilla::Range<const char16_t> input = inputChars.twoByteRange();
1322
1323 // Maximum case mapping length is three characters.
1324 static_assert(JSString::MAX_LENGTH < INT32_MAX / 3,
1325 "Case conversion doesn't overflow int32_t indices");
1326
1327 JSString* str = intl::CallICU(
1328 cx, [&input, locale](UChar* chars, int32_t size, UErrorCode* status) {
1329 return u_strToUpper(chars, size, input.begin().get(), input.length(),
1330 locale, status);
1331 });
1332 if (!str) return false;
1333
1334 args.rval().setString(str);
1335 return true;
1336 }
1337
1338 #if EXPOSE_INTL_API
1339
1340 // String.prototype.toLocaleLowerCase is self-hosted when Intl is exposed,
1341 // with core functionality performed by the intrinsic above.
1342
1343 #else
1344
str_toLocaleUpperCase(JSContext * cx,unsigned argc,Value * vp)1345 bool js::str_toLocaleUpperCase(JSContext* cx, unsigned argc, Value* vp) {
1346 CallArgs args = CallArgsFromVp(argc, vp);
1347
1348 RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
1349 if (!str) return false;
1350
1351 /*
1352 * Forcefully ignore the first (or any) argument and return toUpperCase(),
1353 * ECMA has reserved that argument, presumably for defining the locale.
1354 */
1355 if (cx->runtime()->localeCallbacks &&
1356 cx->runtime()->localeCallbacks->localeToUpperCase) {
1357 RootedValue result(cx);
1358 if (!cx->runtime()->localeCallbacks->localeToUpperCase(cx, str, &result))
1359 return false;
1360
1361 args.rval().set(result);
1362 return true;
1363 }
1364
1365 RootedLinearString linear(cx, str->ensureLinear(cx));
1366 if (!linear) return false;
1367
1368 JSString* result = StringToUpperCase(cx, linear);
1369 if (!result) return false;
1370
1371 args.rval().setString(result);
1372 return true;
1373 }
1374
1375 #endif // EXPOSE_INTL_API
1376
1377 #if EXPOSE_INTL_API
1378
1379 // String.prototype.localeCompare is self-hosted when Intl is exposed.
1380
1381 #else
1382
str_localeCompare(JSContext * cx,unsigned argc,Value * vp)1383 bool js::str_localeCompare(JSContext* cx, unsigned argc, Value* vp) {
1384 CallArgs args = CallArgsFromVp(argc, vp);
1385 RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
1386 if (!str) return false;
1387
1388 RootedString thatStr(cx, ToString<CanGC>(cx, args.get(0)));
1389 if (!thatStr) return false;
1390
1391 if (cx->runtime()->localeCallbacks &&
1392 cx->runtime()->localeCallbacks->localeCompare) {
1393 RootedValue result(cx);
1394 if (!cx->runtime()->localeCallbacks->localeCompare(cx, str, thatStr,
1395 &result))
1396 return false;
1397
1398 args.rval().set(result);
1399 return true;
1400 }
1401
1402 int32_t result;
1403 if (!CompareStrings(cx, str, thatStr, &result)) return false;
1404
1405 args.rval().setInt32(result);
1406 return true;
1407 }
1408
1409 #endif // EXPOSE_INTL_API
1410
1411 #if EXPOSE_INTL_API
1412
1413 // ES2017 draft rev 45e890512fd77add72cc0ee742785f9f6f6482de
1414 // 21.1.3.12 String.prototype.normalize ( [ form ] )
str_normalize(JSContext * cx,unsigned argc,Value * vp)1415 bool js::str_normalize(JSContext* cx, unsigned argc, Value* vp) {
1416 CallArgs args = CallArgsFromVp(argc, vp);
1417
1418 // Steps 1-2.
1419 RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
1420 if (!str) return false;
1421
1422 enum NormalizationForm { NFC, NFD, NFKC, NFKD };
1423
1424 NormalizationForm form;
1425 if (!args.hasDefined(0)) {
1426 // Step 3.
1427 form = NFC;
1428 } else {
1429 // Step 4.
1430 JSLinearString* formStr = ArgToLinearString(cx, args, 0);
1431 if (!formStr) return false;
1432
1433 // Step 5.
1434 if (EqualStrings(formStr, cx->names().NFC)) {
1435 form = NFC;
1436 } else if (EqualStrings(formStr, cx->names().NFD)) {
1437 form = NFD;
1438 } else if (EqualStrings(formStr, cx->names().NFKC)) {
1439 form = NFKC;
1440 } else if (EqualStrings(formStr, cx->names().NFKD)) {
1441 form = NFKD;
1442 } else {
1443 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1444 JSMSG_INVALID_NORMALIZE_FORM);
1445 return false;
1446 }
1447 }
1448
1449 // Latin-1 strings are already in Normalization Form C.
1450 if (form == NFC && str->hasLatin1Chars()) {
1451 // Step 7.
1452 args.rval().setString(str);
1453 return true;
1454 }
1455
1456 // Step 6.
1457 AutoStableStringChars stableChars(cx);
1458 if (!stableChars.initTwoByte(cx, str)) return false;
1459
1460 mozilla::Range<const char16_t> srcChars = stableChars.twoByteRange();
1461
1462 // The unorm2_getXXXInstance() methods return a shared instance which must
1463 // not be deleted.
1464 UErrorCode status = U_ZERO_ERROR;
1465 const UNormalizer2* normalizer;
1466 if (form == NFC) {
1467 normalizer = unorm2_getNFCInstance(&status);
1468 } else if (form == NFD) {
1469 normalizer = unorm2_getNFDInstance(&status);
1470 } else if (form == NFKC) {
1471 normalizer = unorm2_getNFKCInstance(&status);
1472 } else {
1473 MOZ_ASSERT(form == NFKD);
1474 normalizer = unorm2_getNFKDInstance(&status);
1475 }
1476 if (U_FAILURE(status)) {
1477 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1478 JSMSG_INTERNAL_INTL_ERROR);
1479 return false;
1480 }
1481
1482 int32_t spanLength = unorm2_spanQuickCheckYes(
1483 normalizer, srcChars.begin().get(), srcChars.length(), &status);
1484 if (U_FAILURE(status)) {
1485 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1486 JSMSG_INTERNAL_INTL_ERROR);
1487 return false;
1488 }
1489 MOZ_ASSERT(0 <= spanLength && size_t(spanLength) <= srcChars.length());
1490
1491 // Return if the input string is already normalized.
1492 if (size_t(spanLength) == srcChars.length()) {
1493 // Step 7.
1494 args.rval().setString(str);
1495 return true;
1496 }
1497
1498 static const size_t INLINE_CAPACITY = 32;
1499
1500 Vector<char16_t, INLINE_CAPACITY> chars(cx);
1501 if (!chars.resize(Max(INLINE_CAPACITY, srcChars.length()))) return false;
1502
1503 // Copy the already normalized prefix.
1504 if (spanLength > 0)
1505 PodCopy(chars.begin(), srcChars.begin().get(), size_t(spanLength));
1506
1507 mozilla::RangedPtr<const char16_t> remainingStart =
1508 srcChars.begin() + spanLength;
1509 size_t remainingLength = srcChars.length() - size_t(spanLength);
1510
1511 int32_t size = unorm2_normalizeSecondAndAppend(
1512 normalizer, chars.begin(), spanLength, chars.length(),
1513 remainingStart.get(), remainingLength, &status);
1514 if (status == U_BUFFER_OVERFLOW_ERROR) {
1515 MOZ_ASSERT(size >= 0);
1516 if (!chars.resize(size)) return false;
1517 status = U_ZERO_ERROR;
1518 #ifdef DEBUG
1519 int32_t finalSize =
1520 #endif
1521 unorm2_normalizeSecondAndAppend(normalizer, chars.begin(), spanLength,
1522 chars.length(), remainingStart.get(),
1523 remainingLength, &status);
1524 MOZ_ASSERT_IF(!U_FAILURE(status), size == finalSize);
1525 }
1526 if (U_FAILURE(status)) {
1527 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
1528 JSMSG_INTERNAL_INTL_ERROR);
1529 return false;
1530 }
1531
1532 MOZ_ASSERT(size >= 0);
1533 JSString* ns = NewStringCopyN<CanGC>(cx, chars.begin(), size);
1534 if (!ns) return false;
1535
1536 // Step 7.
1537 args.rval().setString(ns);
1538 return true;
1539 }
1540
1541 #endif // EXPOSE_INTL_API
1542
str_charAt(JSContext * cx,unsigned argc,Value * vp)1543 bool js::str_charAt(JSContext* cx, unsigned argc, Value* vp) {
1544 CallArgs args = CallArgsFromVp(argc, vp);
1545
1546 RootedString str(cx);
1547 size_t i;
1548 if (args.thisv().isString() && args.length() != 0 && args[0].isInt32()) {
1549 str = args.thisv().toString();
1550 i = size_t(args[0].toInt32());
1551 if (i >= str->length()) goto out_of_range;
1552 } else {
1553 str = ToStringForStringFunction(cx, args.thisv());
1554 if (!str) return false;
1555
1556 double d = 0.0;
1557 if (args.length() > 0 && !ToInteger(cx, args[0], &d)) return false;
1558
1559 if (d < 0 || str->length() <= d) goto out_of_range;
1560 i = size_t(d);
1561 }
1562
1563 str = cx->staticStrings().getUnitStringForElement(cx, str, i);
1564 if (!str) return false;
1565 args.rval().setString(str);
1566 return true;
1567
1568 out_of_range:
1569 args.rval().setString(cx->runtime()->emptyString);
1570 return true;
1571 }
1572
str_charCodeAt_impl(JSContext * cx,HandleString string,HandleValue index,MutableHandleValue res)1573 bool js::str_charCodeAt_impl(JSContext* cx, HandleString string,
1574 HandleValue index, MutableHandleValue res) {
1575 size_t i;
1576 if (index.isInt32()) {
1577 i = index.toInt32();
1578 if (i >= string->length()) goto out_of_range;
1579 } else {
1580 double d = 0.0;
1581 if (!ToInteger(cx, index, &d)) return false;
1582 // check whether d is negative as size_t is unsigned
1583 if (d < 0 || string->length() <= d) goto out_of_range;
1584 i = size_t(d);
1585 }
1586 char16_t c;
1587 if (!string->getChar(cx, i, &c)) return false;
1588 res.setInt32(c);
1589 return true;
1590
1591 out_of_range:
1592 res.setNaN();
1593 return true;
1594 }
1595
str_charCodeAt(JSContext * cx,unsigned argc,Value * vp)1596 bool js::str_charCodeAt(JSContext* cx, unsigned argc, Value* vp) {
1597 CallArgs args = CallArgsFromVp(argc, vp);
1598 RootedString str(cx);
1599 RootedValue index(cx);
1600 if (args.thisv().isString()) {
1601 str = args.thisv().toString();
1602 } else {
1603 str = ToStringForStringFunction(cx, args.thisv());
1604 if (!str) return false;
1605 }
1606 if (args.length() != 0)
1607 index = args[0];
1608 else
1609 index.setInt32(0);
1610
1611 return js::str_charCodeAt_impl(cx, str, index, args.rval());
1612 }
1613
1614 /*
1615 * Boyer-Moore-Horspool superlinear search for pat:patlen in text:textlen.
1616 * The patlen argument must be positive and no greater than sBMHPatLenMax.
1617 *
1618 * Return the index of pat in text, or -1 if not found.
1619 */
1620 static const uint32_t sBMHCharSetSize = 256; /* ISO-Latin-1 */
1621 static const uint32_t sBMHPatLenMax = 255; /* skip table element is uint8_t */
1622 static const int sBMHBadPattern =
1623 -2; /* return value if pat is not ISO-Latin-1 */
1624
1625 template <typename TextChar, typename PatChar>
BoyerMooreHorspool(const TextChar * text,uint32_t textLen,const PatChar * pat,uint32_t patLen)1626 static int BoyerMooreHorspool(const TextChar* text, uint32_t textLen,
1627 const PatChar* pat, uint32_t patLen) {
1628 MOZ_ASSERT(0 < patLen && patLen <= sBMHPatLenMax);
1629
1630 uint8_t skip[sBMHCharSetSize];
1631 for (uint32_t i = 0; i < sBMHCharSetSize; i++) skip[i] = uint8_t(patLen);
1632
1633 uint32_t patLast = patLen - 1;
1634 for (uint32_t i = 0; i < patLast; i++) {
1635 char16_t c = pat[i];
1636 if (c >= sBMHCharSetSize) return sBMHBadPattern;
1637 skip[c] = uint8_t(patLast - i);
1638 }
1639
1640 for (uint32_t k = patLast; k < textLen;) {
1641 for (uint32_t i = k, j = patLast;; i--, j--) {
1642 if (text[i] != pat[j]) break;
1643 if (j == 0) return static_cast<int>(i); /* safe: max string size */
1644 }
1645
1646 char16_t c = text[k];
1647 k += (c >= sBMHCharSetSize) ? patLen : skip[c];
1648 }
1649 return -1;
1650 }
1651
1652 template <typename TextChar, typename PatChar>
1653 struct MemCmp {
1654 typedef uint32_t Extent;
computeExtentMemCmp1655 static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar*,
1656 uint32_t patLen) {
1657 return (patLen - 1) * sizeof(PatChar);
1658 }
matchMemCmp1659 static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t,
1660 Extent extent) {
1661 MOZ_ASSERT(sizeof(TextChar) == sizeof(PatChar));
1662 return memcmp(p, t, extent) == 0;
1663 }
1664 };
1665
1666 template <typename TextChar, typename PatChar>
1667 struct ManualCmp {
1668 typedef const PatChar* Extent;
computeExtentManualCmp1669 static MOZ_ALWAYS_INLINE Extent computeExtent(const PatChar* pat,
1670 uint32_t patLen) {
1671 return pat + patLen;
1672 }
matchManualCmp1673 static MOZ_ALWAYS_INLINE bool match(const PatChar* p, const TextChar* t,
1674 Extent extent) {
1675 for (; p != extent; ++p, ++t) {
1676 if (*p != *t) return false;
1677 }
1678 return true;
1679 }
1680 };
1681
1682 template <typename TextChar, typename PatChar>
FirstCharMatcherUnrolled(const TextChar * text,uint32_t n,const PatChar pat)1683 static const TextChar* FirstCharMatcherUnrolled(const TextChar* text,
1684 uint32_t n, const PatChar pat) {
1685 const TextChar* textend = text + n;
1686 const TextChar* t = text;
1687
1688 switch ((textend - t) & 7) {
1689 case 0:
1690 if (*t++ == pat) return t - 1;
1691 MOZ_FALLTHROUGH;
1692 case 7:
1693 if (*t++ == pat) return t - 1;
1694 MOZ_FALLTHROUGH;
1695 case 6:
1696 if (*t++ == pat) return t - 1;
1697 MOZ_FALLTHROUGH;
1698 case 5:
1699 if (*t++ == pat) return t - 1;
1700 MOZ_FALLTHROUGH;
1701 case 4:
1702 if (*t++ == pat) return t - 1;
1703 MOZ_FALLTHROUGH;
1704 case 3:
1705 if (*t++ == pat) return t - 1;
1706 MOZ_FALLTHROUGH;
1707 case 2:
1708 if (*t++ == pat) return t - 1;
1709 MOZ_FALLTHROUGH;
1710 case 1:
1711 if (*t++ == pat) return t - 1;
1712 }
1713 while (textend != t) {
1714 if (t[0] == pat) return t;
1715 if (t[1] == pat) return t + 1;
1716 if (t[2] == pat) return t + 2;
1717 if (t[3] == pat) return t + 3;
1718 if (t[4] == pat) return t + 4;
1719 if (t[5] == pat) return t + 5;
1720 if (t[6] == pat) return t + 6;
1721 if (t[7] == pat) return t + 7;
1722 t += 8;
1723 }
1724 return nullptr;
1725 }
1726
FirstCharMatcher8bit(const char * text,uint32_t n,const char pat)1727 static const char* FirstCharMatcher8bit(const char* text, uint32_t n,
1728 const char pat) {
1729 return reinterpret_cast<const char*>(memchr(text, pat, n));
1730 }
1731
1732 template <class InnerMatch, typename TextChar, typename PatChar>
Matcher(const TextChar * text,uint32_t textlen,const PatChar * pat,uint32_t patlen)1733 static int Matcher(const TextChar* text, uint32_t textlen, const PatChar* pat,
1734 uint32_t patlen) {
1735 MOZ_ASSERT(patlen > 0);
1736
1737 if (sizeof(TextChar) == 1 && sizeof(PatChar) > 1 && pat[0] > 0xff) return -1;
1738
1739 const typename InnerMatch::Extent extent =
1740 InnerMatch::computeExtent(pat, patlen);
1741
1742 uint32_t i = 0;
1743 uint32_t n = textlen - patlen + 1;
1744 while (i < n) {
1745 const TextChar* pos;
1746
1747 if (sizeof(TextChar) == 1) {
1748 MOZ_ASSERT(pat[0] <= 0xff);
1749 pos = (TextChar*)FirstCharMatcher8bit((char*)text + i, n - i, pat[0]);
1750 } else {
1751 pos = FirstCharMatcherUnrolled(text + i, n - i, char16_t(pat[0]));
1752 }
1753
1754 if (pos == nullptr) return -1;
1755
1756 i = static_cast<uint32_t>(pos - text);
1757 if (InnerMatch::match(pat + 1, text + i + 1, extent)) return i;
1758
1759 i += 1;
1760 }
1761 return -1;
1762 }
1763
1764 template <typename TextChar, typename PatChar>
StringMatch(const TextChar * text,uint32_t textLen,const PatChar * pat,uint32_t patLen)1765 static MOZ_ALWAYS_INLINE int StringMatch(const TextChar* text, uint32_t textLen,
1766 const PatChar* pat, uint32_t patLen) {
1767 if (patLen == 0) return 0;
1768 if (textLen < patLen) return -1;
1769
1770 #if defined(__i386__) || defined(_M_IX86) || defined(__i386)
1771 /*
1772 * Given enough registers, the unrolled loop below is faster than the
1773 * following loop. 32-bit x86 does not have enough registers.
1774 */
1775 if (patLen == 1) {
1776 const PatChar p0 = *pat;
1777 const TextChar* end = text + textLen;
1778 for (const TextChar* c = text; c != end; ++c) {
1779 if (*c == p0) return c - text;
1780 }
1781 return -1;
1782 }
1783 #endif
1784
1785 /*
1786 * If the text or pattern string is short, BMH will be more expensive than
1787 * the basic linear scan due to initialization cost and a more complex loop
1788 * body. While the correct threshold is input-dependent, we can make a few
1789 * conservative observations:
1790 * - When |textLen| is "big enough", the initialization time will be
1791 * proportionally small, so the worst-case slowdown is minimized.
1792 * - When |patLen| is "too small", even the best case for BMH will be
1793 * slower than a simple scan for large |textLen| due to the more complex
1794 * loop body of BMH.
1795 * From this, the values for "big enough" and "too small" are determined
1796 * empirically. See bug 526348.
1797 */
1798 if (textLen >= 512 && patLen >= 11 && patLen <= sBMHPatLenMax) {
1799 int index = BoyerMooreHorspool(text, textLen, pat, patLen);
1800 if (index != sBMHBadPattern) return index;
1801 }
1802
1803 /*
1804 * For big patterns with large potential overlap we want the SIMD-optimized
1805 * speed of memcmp. For small patterns, a simple loop is faster. We also can't
1806 * use memcmp if one of the strings is TwoByte and the other is Latin-1.
1807 */
1808 return (patLen > 128 && IsSame<TextChar, PatChar>::value)
1809 ? Matcher<MemCmp<TextChar, PatChar>, TextChar, PatChar>(
1810 text, textLen, pat, patLen)
1811 : Matcher<ManualCmp<TextChar, PatChar>, TextChar, PatChar>(
1812 text, textLen, pat, patLen);
1813 }
1814
StringMatch(JSLinearString * text,JSLinearString * pat,uint32_t start=0)1815 static int32_t StringMatch(JSLinearString* text, JSLinearString* pat,
1816 uint32_t start = 0) {
1817 MOZ_ASSERT(start <= text->length());
1818 uint32_t textLen = text->length() - start;
1819 uint32_t patLen = pat->length();
1820
1821 int match;
1822 AutoCheckCannotGC nogc;
1823 if (text->hasLatin1Chars()) {
1824 const Latin1Char* textChars = text->latin1Chars(nogc) + start;
1825 if (pat->hasLatin1Chars())
1826 match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1827 else
1828 match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1829 } else {
1830 const char16_t* textChars = text->twoByteChars(nogc) + start;
1831 if (pat->hasLatin1Chars())
1832 match = StringMatch(textChars, textLen, pat->latin1Chars(nogc), patLen);
1833 else
1834 match = StringMatch(textChars, textLen, pat->twoByteChars(nogc), patLen);
1835 }
1836
1837 return (match == -1) ? -1 : start + match;
1838 }
1839
1840 static const size_t sRopeMatchThresholdRatioLog2 = 4;
1841
StringFindPattern(JSLinearString * text,JSLinearString * pat,size_t start)1842 int js::StringFindPattern(JSLinearString* text, JSLinearString* pat,
1843 size_t start) {
1844 return StringMatch(text, pat, start);
1845 }
1846
1847 // When an algorithm does not need a string represented as a single linear
1848 // array of characters, this range utility may be used to traverse the string a
1849 // sequence of linear arrays of characters. This avoids flattening ropes.
1850 class StringSegmentRange {
1851 // If malloc() shows up in any profiles from this vector, we can add a new
1852 // StackAllocPolicy which stashes a reusable freed-at-gc buffer in the cx.
1853 using StackVector = JS::GCVector<JSString*, 16>;
1854 Rooted<StackVector> stack;
1855 RootedLinearString cur;
1856
settle(JSString * str)1857 bool settle(JSString* str) {
1858 while (str->isRope()) {
1859 JSRope& rope = str->asRope();
1860 if (!stack.append(rope.rightChild())) return false;
1861 str = rope.leftChild();
1862 }
1863 cur = &str->asLinear();
1864 return true;
1865 }
1866
1867 public:
StringSegmentRange(JSContext * cx)1868 explicit StringSegmentRange(JSContext* cx)
1869 : stack(cx, StackVector(cx)), cur(cx) {}
1870
init(JSString * str)1871 MOZ_MUST_USE bool init(JSString* str) {
1872 MOZ_ASSERT(stack.empty());
1873 return settle(str);
1874 }
1875
empty() const1876 bool empty() const { return cur == nullptr; }
1877
front() const1878 JSLinearString* front() const {
1879 MOZ_ASSERT(!cur->isRope());
1880 return cur;
1881 }
1882
popFront()1883 MOZ_MUST_USE bool popFront() {
1884 MOZ_ASSERT(!empty());
1885 if (stack.empty()) {
1886 cur = nullptr;
1887 return true;
1888 }
1889 return settle(stack.popCopy());
1890 }
1891 };
1892
1893 typedef Vector<JSLinearString*, 16, SystemAllocPolicy> LinearStringVector;
1894
1895 template <typename TextChar, typename PatChar>
RopeMatchImpl(const AutoCheckCannotGC & nogc,LinearStringVector & strings,const PatChar * pat,size_t patLen)1896 static int RopeMatchImpl(const AutoCheckCannotGC& nogc,
1897 LinearStringVector& strings, const PatChar* pat,
1898 size_t patLen) {
1899 /* Absolute offset from the beginning of the logical text string. */
1900 int pos = 0;
1901
1902 for (JSLinearString** outerp = strings.begin(); outerp != strings.end();
1903 ++outerp) {
1904 /* Try to find a match within 'outer'. */
1905 JSLinearString* outer = *outerp;
1906 const TextChar* chars = outer->chars<TextChar>(nogc);
1907 size_t len = outer->length();
1908 int matchResult = StringMatch(chars, len, pat, patLen);
1909 if (matchResult != -1) {
1910 /* Matched! */
1911 return pos + matchResult;
1912 }
1913
1914 /* Try to find a match starting in 'outer' and running into other nodes. */
1915 const TextChar* const text = chars + (patLen > len ? 0 : len - patLen + 1);
1916 const TextChar* const textend = chars + len;
1917 const PatChar p0 = *pat;
1918 const PatChar* const p1 = pat + 1;
1919 const PatChar* const patend = pat + patLen;
1920 for (const TextChar* t = text; t != textend;) {
1921 if (*t++ != p0) continue;
1922
1923 JSLinearString** innerp = outerp;
1924 const TextChar* ttend = textend;
1925 const TextChar* tt = t;
1926 for (const PatChar *pp = p1; pp != patend; ++pp, ++tt) {
1927 while (tt == ttend) {
1928 if (++innerp == strings.end()) return -1;
1929
1930 JSLinearString* inner = *innerp;
1931 tt = inner->chars<TextChar>(nogc);
1932 ttend = tt + inner->length();
1933 }
1934 if (*pp != *tt) goto break_continue;
1935 }
1936
1937 /* Matched! */
1938 return pos + (t - chars) - 1; /* -1 because of *t++ above */
1939
1940 break_continue:;
1941 }
1942
1943 pos += len;
1944 }
1945
1946 return -1;
1947 }
1948
1949 /*
1950 * RopeMatch takes the text to search and the pattern to search for in the text.
1951 * RopeMatch returns false on OOM and otherwise returns the match index through
1952 * the 'match' outparam (-1 for not found).
1953 */
RopeMatch(JSContext * cx,JSRope * text,JSLinearString * pat,int * match)1954 static bool RopeMatch(JSContext* cx, JSRope* text, JSLinearString* pat,
1955 int* match) {
1956 uint32_t patLen = pat->length();
1957 if (patLen == 0) {
1958 *match = 0;
1959 return true;
1960 }
1961 if (text->length() < patLen) {
1962 *match = -1;
1963 return true;
1964 }
1965
1966 /*
1967 * List of leaf nodes in the rope. If we run out of memory when trying to
1968 * append to this list, we can still fall back to StringMatch, so use the
1969 * system allocator so we don't report OOM in that case.
1970 */
1971 LinearStringVector strings;
1972
1973 /*
1974 * We don't want to do rope matching if there is a poor node-to-char ratio,
1975 * since this means spending a lot of time in the match loop below. We also
1976 * need to build the list of leaf nodes. Do both here: iterate over the
1977 * nodes so long as there are not too many.
1978 *
1979 * We also don't use rope matching if the rope contains both Latin-1 and
1980 * TwoByte nodes, to simplify the match algorithm.
1981 */
1982 {
1983 size_t threshold = text->length() >> sRopeMatchThresholdRatioLog2;
1984 StringSegmentRange r(cx);
1985 if (!r.init(text)) return false;
1986
1987 bool textIsLatin1 = text->hasLatin1Chars();
1988 while (!r.empty()) {
1989 if (threshold-- == 0 || r.front()->hasLatin1Chars() != textIsLatin1 ||
1990 !strings.append(r.front())) {
1991 JSLinearString* linear = text->ensureLinear(cx);
1992 if (!linear) return false;
1993
1994 *match = StringMatch(linear, pat);
1995 return true;
1996 }
1997 if (!r.popFront()) return false;
1998 }
1999 }
2000
2001 AutoCheckCannotGC nogc;
2002 if (text->hasLatin1Chars()) {
2003 if (pat->hasLatin1Chars())
2004 *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->latin1Chars(nogc),
2005 patLen);
2006 else
2007 *match = RopeMatchImpl<Latin1Char>(nogc, strings, pat->twoByteChars(nogc),
2008 patLen);
2009 } else {
2010 if (pat->hasLatin1Chars())
2011 *match = RopeMatchImpl<char16_t>(nogc, strings, pat->latin1Chars(nogc),
2012 patLen);
2013 else
2014 *match = RopeMatchImpl<char16_t>(nogc, strings, pat->twoByteChars(nogc),
2015 patLen);
2016 }
2017
2018 return true;
2019 }
2020
ReportErrorIfFirstArgIsRegExp(JSContext * cx,const CallArgs & args)2021 static MOZ_ALWAYS_INLINE bool ReportErrorIfFirstArgIsRegExp(
2022 JSContext* cx, const CallArgs& args) {
2023 // Only call IsRegExp if the first argument is definitely an object, so we
2024 // don't pay the cost of an additional function call in the common case.
2025 if (args.length() == 0 || !args[0].isObject()) return true;
2026
2027 bool isRegExp;
2028 if (!IsRegExp(cx, args[0], &isRegExp)) return false;
2029
2030 if (isRegExp) {
2031 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
2032 JSMSG_INVALID_ARG_TYPE, "first", "",
2033 "Regular Expression");
2034 return false;
2035 }
2036 return true;
2037 }
2038
2039 // ES2018 draft rev de77aaeffce115deaf948ed30c7dbe4c60983c0c
2040 // 21.1.3.7 String.prototype.includes ( searchString [ , position ] )
str_includes(JSContext * cx,unsigned argc,Value * vp)2041 bool js::str_includes(JSContext* cx, unsigned argc, Value* vp) {
2042 CallArgs args = CallArgsFromVp(argc, vp);
2043
2044 // Steps 1-2.
2045 RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
2046 if (!str) return false;
2047
2048 // Steps 3-4.
2049 if (!ReportErrorIfFirstArgIsRegExp(cx, args)) return false;
2050
2051 // Step 5.
2052 RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2053 if (!searchStr) return false;
2054
2055 // Step 6.
2056 uint32_t pos = 0;
2057 if (args.hasDefined(1)) {
2058 if (args[1].isInt32()) {
2059 int i = args[1].toInt32();
2060 pos = (i < 0) ? 0U : uint32_t(i);
2061 } else {
2062 double d;
2063 if (!ToInteger(cx, args[1], &d)) return false;
2064 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
2065 }
2066 }
2067
2068 // Step 7.
2069 uint32_t textLen = str->length();
2070
2071 // Step 8.
2072 uint32_t start = Min(Max(pos, 0U), textLen);
2073
2074 // Steps 9-10.
2075 JSLinearString* text = str->ensureLinear(cx);
2076 if (!text) return false;
2077
2078 args.rval().setBoolean(StringMatch(text, searchStr, start) != -1);
2079 return true;
2080 }
2081
2082 /* ES6 20120927 draft 15.5.4.7. */
str_indexOf(JSContext * cx,unsigned argc,Value * vp)2083 bool js::str_indexOf(JSContext* cx, unsigned argc, Value* vp) {
2084 CallArgs args = CallArgsFromVp(argc, vp);
2085
2086 // Steps 1, 2, and 3
2087 RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
2088 if (!str) return false;
2089
2090 // Steps 4 and 5
2091 RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2092 if (!searchStr) return false;
2093
2094 // Steps 6 and 7
2095 uint32_t pos = 0;
2096 if (args.hasDefined(1)) {
2097 if (args[1].isInt32()) {
2098 int i = args[1].toInt32();
2099 pos = (i < 0) ? 0U : uint32_t(i);
2100 } else {
2101 double d;
2102 if (!ToInteger(cx, args[1], &d)) return false;
2103 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
2104 }
2105 }
2106
2107 // Step 8
2108 uint32_t textLen = str->length();
2109
2110 // Step 9
2111 uint32_t start = Min(Max(pos, 0U), textLen);
2112
2113 if (str == searchStr) {
2114 // AngularJS often invokes "false".indexOf("false"). This check should
2115 // be cheap enough to not hurt anything else.
2116 args.rval().setInt32(start == 0 ? 0 : -1);
2117 return true;
2118 }
2119
2120 // Steps 10 and 11
2121 JSLinearString* text = str->ensureLinear(cx);
2122 if (!text) return false;
2123
2124 args.rval().setInt32(StringMatch(text, searchStr, start));
2125 return true;
2126 }
2127
2128 template <typename TextChar, typename PatChar>
LastIndexOfImpl(const TextChar * text,size_t textLen,const PatChar * pat,size_t patLen,size_t start)2129 static int32_t LastIndexOfImpl(const TextChar* text, size_t textLen,
2130 const PatChar* pat, size_t patLen,
2131 size_t start) {
2132 MOZ_ASSERT(patLen > 0);
2133 MOZ_ASSERT(patLen <= textLen);
2134 MOZ_ASSERT(start <= textLen - patLen);
2135
2136 const PatChar p0 = *pat;
2137 const PatChar* patNext = pat + 1;
2138 const PatChar* patEnd = pat + patLen;
2139
2140 for (const TextChar* t = text + start; t >= text; --t) {
2141 if (*t == p0) {
2142 const TextChar* t1 = t + 1;
2143 for (const PatChar *p1 = patNext; p1 < patEnd; ++p1, ++t1) {
2144 if (*t1 != *p1) goto break_continue;
2145 }
2146
2147 return static_cast<int32_t>(t - text);
2148 }
2149 break_continue:;
2150 }
2151
2152 return -1;
2153 }
2154
2155 // ES2017 draft rev 6859bb9ccaea9c6ede81d71e5320e3833b92cb3e
2156 // 21.1.3.9 String.prototype.lastIndexOf ( searchString [ , position ] )
str_lastIndexOf(JSContext * cx,unsigned argc,Value * vp)2157 bool js::str_lastIndexOf(JSContext* cx, unsigned argc, Value* vp) {
2158 CallArgs args = CallArgsFromVp(argc, vp);
2159
2160 // Steps 1-2.
2161 RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
2162 if (!str) return false;
2163
2164 // Step 3.
2165 RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2166 if (!searchStr) return false;
2167
2168 // Step 6.
2169 size_t len = str->length();
2170
2171 // Step 8.
2172 size_t searchLen = searchStr->length();
2173
2174 // Steps 4-5, 7.
2175 int start = len - searchLen; // Start searching here
2176 if (args.hasDefined(1)) {
2177 if (args[1].isInt32()) {
2178 int i = args[1].toInt32();
2179 if (i <= 0)
2180 start = 0;
2181 else if (i < start)
2182 start = i;
2183 } else {
2184 double d;
2185 if (!ToNumber(cx, args[1], &d)) return false;
2186 if (!IsNaN(d)) {
2187 d = JS::ToInteger(d);
2188 if (d <= 0)
2189 start = 0;
2190 else if (d < start)
2191 start = int(d);
2192 }
2193 }
2194 }
2195
2196 if (str == searchStr) {
2197 args.rval().setInt32(0);
2198 return true;
2199 }
2200
2201 if (searchLen > len) {
2202 args.rval().setInt32(-1);
2203 return true;
2204 }
2205
2206 if (searchLen == 0) {
2207 args.rval().setInt32(start);
2208 return true;
2209 }
2210 MOZ_ASSERT(0 <= start && size_t(start) < len);
2211
2212 JSLinearString* text = str->ensureLinear(cx);
2213 if (!text) return false;
2214
2215 // Step 9.
2216 int32_t res;
2217 AutoCheckCannotGC nogc;
2218 if (text->hasLatin1Chars()) {
2219 const Latin1Char* textChars = text->latin1Chars(nogc);
2220 if (searchStr->hasLatin1Chars())
2221 res = LastIndexOfImpl(textChars, len, searchStr->latin1Chars(nogc),
2222 searchLen, start);
2223 else
2224 res = LastIndexOfImpl(textChars, len, searchStr->twoByteChars(nogc),
2225 searchLen, start);
2226 } else {
2227 const char16_t* textChars = text->twoByteChars(nogc);
2228 if (searchStr->hasLatin1Chars())
2229 res = LastIndexOfImpl(textChars, len, searchStr->latin1Chars(nogc),
2230 searchLen, start);
2231 else
2232 res = LastIndexOfImpl(textChars, len, searchStr->twoByteChars(nogc),
2233 searchLen, start);
2234 }
2235
2236 args.rval().setInt32(res);
2237 return true;
2238 }
2239
2240 // ES2018 draft rev de77aaeffce115deaf948ed30c7dbe4c60983c0c
2241 // 21.1.3.20 String.prototype.startsWith ( searchString [ , position ] )
str_startsWith(JSContext * cx,unsigned argc,Value * vp)2242 bool js::str_startsWith(JSContext* cx, unsigned argc, Value* vp) {
2243 CallArgs args = CallArgsFromVp(argc, vp);
2244
2245 // Steps 1-2.
2246 RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
2247 if (!str) return false;
2248
2249 // Steps 3-4.
2250 if (!ReportErrorIfFirstArgIsRegExp(cx, args)) return false;
2251
2252 // Step 5.
2253 RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2254 if (!searchStr) return false;
2255
2256 // Step 6.
2257 uint32_t pos = 0;
2258 if (args.hasDefined(1)) {
2259 if (args[1].isInt32()) {
2260 int i = args[1].toInt32();
2261 pos = (i < 0) ? 0U : uint32_t(i);
2262 } else {
2263 double d;
2264 if (!ToInteger(cx, args[1], &d)) return false;
2265 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
2266 }
2267 }
2268
2269 // Step 7.
2270 uint32_t textLen = str->length();
2271
2272 // Step 8.
2273 uint32_t start = Min(Max(pos, 0U), textLen);
2274
2275 // Step 9.
2276 uint32_t searchLen = searchStr->length();
2277
2278 // Step 10.
2279 if (searchLen + start < searchLen || searchLen + start > textLen) {
2280 args.rval().setBoolean(false);
2281 return true;
2282 }
2283
2284 // Steps 11-12.
2285 JSLinearString* text = str->ensureLinear(cx);
2286 if (!text) return false;
2287
2288 args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
2289 return true;
2290 }
2291
2292 // ES2018 draft rev de77aaeffce115deaf948ed30c7dbe4c60983c0c
2293 // 21.1.3.6 String.prototype.endsWith ( searchString [ , endPosition ] )
str_endsWith(JSContext * cx,unsigned argc,Value * vp)2294 bool js::str_endsWith(JSContext* cx, unsigned argc, Value* vp) {
2295 CallArgs args = CallArgsFromVp(argc, vp);
2296
2297 // Steps 1-2.
2298 RootedString str(cx, ToStringForStringFunction(cx, args.thisv()));
2299 if (!str) return false;
2300
2301 // Steps 3-4.
2302 if (!ReportErrorIfFirstArgIsRegExp(cx, args)) return false;
2303
2304 // Step 5.
2305 RootedLinearString searchStr(cx, ArgToLinearString(cx, args, 0));
2306 if (!searchStr) return false;
2307
2308 // Step 6.
2309 uint32_t textLen = str->length();
2310
2311 // Step 7.
2312 uint32_t pos = textLen;
2313 if (args.hasDefined(1)) {
2314 if (args[1].isInt32()) {
2315 int i = args[1].toInt32();
2316 pos = (i < 0) ? 0U : uint32_t(i);
2317 } else {
2318 double d;
2319 if (!ToInteger(cx, args[1], &d)) return false;
2320 pos = uint32_t(Min(Max(d, 0.0), double(UINT32_MAX)));
2321 }
2322 }
2323
2324 // Step 8.
2325 uint32_t end = Min(Max(pos, 0U), textLen);
2326
2327 // Step 9.
2328 uint32_t searchLen = searchStr->length();
2329
2330 // Step 11 (reordered).
2331 if (searchLen > end) {
2332 args.rval().setBoolean(false);
2333 return true;
2334 }
2335
2336 // Step 10.
2337 uint32_t start = end - searchLen;
2338
2339 // Steps 12-13.
2340 JSLinearString* text = str->ensureLinear(cx);
2341 if (!text) return false;
2342
2343 args.rval().setBoolean(HasSubstringAt(text, searchStr, start));
2344 return true;
2345 }
2346
2347 template <typename CharT>
TrimString(const CharT * chars,bool trimStart,bool trimEnd,size_t length,size_t * pBegin,size_t * pEnd)2348 static void TrimString(const CharT* chars, bool trimStart, bool trimEnd,
2349 size_t length, size_t* pBegin, size_t* pEnd) {
2350 size_t begin = 0, end = length;
2351
2352 if (trimStart) {
2353 while (begin < length && unicode::IsSpace(chars[begin])) ++begin;
2354 }
2355
2356 if (trimEnd) {
2357 while (end > begin && unicode::IsSpace(chars[end - 1])) --end;
2358 }
2359
2360 *pBegin = begin;
2361 *pEnd = end;
2362 }
2363
TrimString(JSContext * cx,const CallArgs & args,bool trimStart,bool trimEnd)2364 static bool TrimString(JSContext* cx, const CallArgs& args, bool trimStart,
2365 bool trimEnd) {
2366 JSString* str = ToStringForStringFunction(cx, args.thisv());
2367 if (!str) return false;
2368
2369 JSLinearString* linear = str->ensureLinear(cx);
2370 if (!linear) return false;
2371
2372 size_t length = linear->length();
2373 size_t begin, end;
2374 if (linear->hasLatin1Chars()) {
2375 AutoCheckCannotGC nogc;
2376 TrimString(linear->latin1Chars(nogc), trimStart, trimEnd, length, &begin,
2377 &end);
2378 } else {
2379 AutoCheckCannotGC nogc;
2380 TrimString(linear->twoByteChars(nogc), trimStart, trimEnd, length, &begin,
2381 &end);
2382 }
2383
2384 JSLinearString* result = NewDependentString(cx, linear, begin, end - begin);
2385 if (!result) return false;
2386
2387 args.rval().setString(result);
2388 return true;
2389 }
2390
str_trim(JSContext * cx,unsigned argc,Value * vp)2391 bool js::str_trim(JSContext* cx, unsigned argc, Value* vp) {
2392 CallArgs args = CallArgsFromVp(argc, vp);
2393 return TrimString(cx, args, true, true);
2394 }
2395
str_trimStart(JSContext * cx,unsigned argc,Value * vp)2396 bool js::str_trimStart(JSContext* cx, unsigned argc, Value* vp) {
2397 CallArgs args = CallArgsFromVp(argc, vp);
2398 return TrimString(cx, args, true, false);
2399 }
2400
str_trimEnd(JSContext * cx,unsigned argc,Value * vp)2401 bool js::str_trimEnd(JSContext* cx, unsigned argc, Value* vp) {
2402 CallArgs args = CallArgsFromVp(argc, vp);
2403 return TrimString(cx, args, false, true);
2404 }
2405
2406 // Utility for building a rope (lazy concatenation) of strings.
2407 class RopeBuilder {
2408 JSContext* cx;
2409 RootedString res;
2410
2411 RopeBuilder(const RopeBuilder& other) = delete;
2412 void operator=(const RopeBuilder& other) = delete;
2413
2414 public:
RopeBuilder(JSContext * cx)2415 explicit RopeBuilder(JSContext* cx)
2416 : cx(cx), res(cx, cx->runtime()->emptyString) {}
2417
append(HandleString str)2418 inline bool append(HandleString str) {
2419 res = ConcatStrings<CanGC>(cx, res, str);
2420 return !!res;
2421 }
2422
result()2423 inline JSString* result() { return res; }
2424 };
2425
2426 namespace {
2427
2428 template <typename CharT>
FindDollarIndex(const CharT * chars,size_t length)2429 static uint32_t FindDollarIndex(const CharT* chars, size_t length) {
2430 if (const CharT* p = js_strchr_limit(chars, '$', chars + length)) {
2431 uint32_t dollarIndex = p - chars;
2432 MOZ_ASSERT(dollarIndex < length);
2433 return dollarIndex;
2434 }
2435 return UINT32_MAX;
2436 }
2437
2438 } /* anonymous namespace */
2439
2440 /*
2441 * Constructs a result string that looks like:
2442 *
2443 * newstring = string[:matchStart] + repstr + string[matchEnd:]
2444 */
BuildFlatReplacement(JSContext * cx,HandleString textstr,HandleLinearString repstr,size_t matchStart,size_t patternLength)2445 static JSString* BuildFlatReplacement(JSContext* cx, HandleString textstr,
2446 HandleLinearString repstr,
2447 size_t matchStart, size_t patternLength) {
2448 size_t matchEnd = matchStart + patternLength;
2449
2450 RootedString resultStr(cx, NewDependentString(cx, textstr, 0, matchStart));
2451 if (!resultStr) return nullptr;
2452
2453 resultStr = ConcatStrings<CanGC>(cx, resultStr, repstr);
2454 if (!resultStr) return nullptr;
2455
2456 MOZ_ASSERT(textstr->length() >= matchEnd);
2457 RootedString rest(cx, NewDependentString(cx, textstr, matchEnd,
2458 textstr->length() - matchEnd));
2459 if (!rest) return nullptr;
2460
2461 return ConcatStrings<CanGC>(cx, resultStr, rest);
2462 }
2463
BuildFlatRopeReplacement(JSContext * cx,HandleString textstr,HandleLinearString repstr,size_t match,size_t patternLength)2464 static JSString* BuildFlatRopeReplacement(JSContext* cx, HandleString textstr,
2465 HandleLinearString repstr,
2466 size_t match, size_t patternLength) {
2467 MOZ_ASSERT(textstr->isRope());
2468
2469 size_t matchEnd = match + patternLength;
2470
2471 /*
2472 * If we are replacing over a rope, avoid flattening it by iterating
2473 * through it, building a new rope.
2474 */
2475 StringSegmentRange r(cx);
2476 if (!r.init(textstr)) return nullptr;
2477
2478 RopeBuilder builder(cx);
2479 size_t pos = 0;
2480 while (!r.empty()) {
2481 RootedString str(cx, r.front());
2482 size_t len = str->length();
2483 size_t strEnd = pos + len;
2484 if (pos < matchEnd && strEnd > match) {
2485 /*
2486 * We need to special-case any part of the rope that overlaps
2487 * with the replacement string.
2488 */
2489 if (match >= pos) {
2490 /*
2491 * If this part of the rope overlaps with the left side of
2492 * the pattern, then it must be the only one to overlap with
2493 * the first character in the pattern, so we include the
2494 * replacement string here.
2495 */
2496 RootedString leftSide(cx, NewDependentString(cx, str, 0, match - pos));
2497 if (!leftSide || !builder.append(leftSide) || !builder.append(repstr)) {
2498 return nullptr;
2499 }
2500 }
2501
2502 /*
2503 * If str runs off the end of the matched string, append the
2504 * last part of str.
2505 */
2506 if (strEnd > matchEnd) {
2507 RootedString rightSide(
2508 cx, NewDependentString(cx, str, matchEnd - pos, strEnd - matchEnd));
2509 if (!rightSide || !builder.append(rightSide)) return nullptr;
2510 }
2511 } else {
2512 if (!builder.append(str)) return nullptr;
2513 }
2514 pos += str->length();
2515 if (!r.popFront()) return nullptr;
2516 }
2517
2518 return builder.result();
2519 }
2520
2521 template <typename CharT>
AppendDollarReplacement(StringBuffer & newReplaceChars,size_t firstDollarIndex,size_t matchStart,size_t matchLimit,JSLinearString * text,const CharT * repChars,size_t repLength)2522 static bool AppendDollarReplacement(StringBuffer& newReplaceChars,
2523 size_t firstDollarIndex, size_t matchStart,
2524 size_t matchLimit, JSLinearString* text,
2525 const CharT* repChars, size_t repLength) {
2526 MOZ_ASSERT(firstDollarIndex < repLength);
2527
2528 /* Move the pre-dollar chunk in bulk. */
2529 newReplaceChars.infallibleAppend(repChars, firstDollarIndex);
2530
2531 /* Move the rest char-by-char, interpreting dollars as we encounter them. */
2532 const CharT* repLimit = repChars + repLength;
2533 for (const CharT* it = repChars + firstDollarIndex; it < repLimit; ++it) {
2534 if (*it != '$' || it == repLimit - 1) {
2535 if (!newReplaceChars.append(*it)) return false;
2536 continue;
2537 }
2538
2539 switch (*(it + 1)) {
2540 case '$': /* Eat one of the dollars. */
2541 if (!newReplaceChars.append(*it)) return false;
2542 break;
2543 case '&':
2544 if (!newReplaceChars.appendSubstring(text, matchStart,
2545 matchLimit - matchStart))
2546 return false;
2547 break;
2548 case '`':
2549 if (!newReplaceChars.appendSubstring(text, 0, matchStart)) return false;
2550 break;
2551 case '\'':
2552 if (!newReplaceChars.appendSubstring(text, matchLimit,
2553 text->length() - matchLimit))
2554 return false;
2555 break;
2556 default: /* The dollar we saw was not special (no matter what its mother
2557 told it). */
2558 if (!newReplaceChars.append(*it)) return false;
2559 continue;
2560 }
2561 ++it; /* We always eat an extra char in the above switch. */
2562 }
2563
2564 return true;
2565 }
2566
2567 /*
2568 * Perform a linear-scan dollar substitution on the replacement text.
2569 */
InterpretDollarReplacement(JSContext * cx,HandleString textstrArg,HandleLinearString repstr,uint32_t firstDollarIndex,size_t matchStart,size_t patternLength)2570 static JSLinearString* InterpretDollarReplacement(
2571 JSContext* cx, HandleString textstrArg, HandleLinearString repstr,
2572 uint32_t firstDollarIndex, size_t matchStart, size_t patternLength) {
2573 RootedLinearString textstr(cx, textstrArg->ensureLinear(cx));
2574 if (!textstr) return nullptr;
2575
2576 size_t matchLimit = matchStart + patternLength;
2577
2578 /*
2579 * Most probably:
2580 *
2581 * len(newstr) >= len(orig) - len(match) + len(replacement)
2582 *
2583 * Note that dollar vars _could_ make the resulting text smaller than this.
2584 */
2585 StringBuffer newReplaceChars(cx);
2586 if (repstr->hasTwoByteChars() && !newReplaceChars.ensureTwoByteChars())
2587 return nullptr;
2588
2589 if (!newReplaceChars.reserve(textstr->length() - patternLength +
2590 repstr->length()))
2591 return nullptr;
2592
2593 bool res;
2594 if (repstr->hasLatin1Chars()) {
2595 AutoCheckCannotGC nogc;
2596 res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, matchStart,
2597 matchLimit, textstr,
2598 repstr->latin1Chars(nogc), repstr->length());
2599 } else {
2600 AutoCheckCannotGC nogc;
2601 res = AppendDollarReplacement(newReplaceChars, firstDollarIndex, matchStart,
2602 matchLimit, textstr,
2603 repstr->twoByteChars(nogc), repstr->length());
2604 }
2605 if (!res) return nullptr;
2606
2607 return newReplaceChars.finishString();
2608 }
2609
2610 template <typename StrChar, typename RepChar>
StrFlatReplaceGlobal(JSContext * cx,JSLinearString * str,JSLinearString * pat,JSLinearString * rep,StringBuffer & sb)2611 static bool StrFlatReplaceGlobal(JSContext* cx, JSLinearString* str,
2612 JSLinearString* pat, JSLinearString* rep,
2613 StringBuffer& sb) {
2614 MOZ_ASSERT(str->length() > 0);
2615
2616 AutoCheckCannotGC nogc;
2617 const StrChar* strChars = str->chars<StrChar>(nogc);
2618 const RepChar* repChars = rep->chars<RepChar>(nogc);
2619
2620 // The pattern is empty, so we interleave the replacement string in-between
2621 // each character.
2622 if (!pat->length()) {
2623 CheckedInt<uint32_t> strLength(str->length());
2624 CheckedInt<uint32_t> repLength(rep->length());
2625 CheckedInt<uint32_t> length = repLength * (strLength - 1) + strLength;
2626 if (!length.isValid()) {
2627 ReportAllocationOverflow(cx);
2628 return false;
2629 }
2630
2631 if (!sb.reserve(length.value())) return false;
2632
2633 for (unsigned i = 0; i < str->length() - 1; ++i, ++strChars) {
2634 sb.infallibleAppend(*strChars);
2635 sb.infallibleAppend(repChars, rep->length());
2636 }
2637 sb.infallibleAppend(*strChars);
2638 return true;
2639 }
2640
2641 // If it's true, we are sure that the result's length is, at least, the same
2642 // length as |str->length()|.
2643 if (rep->length() >= pat->length()) {
2644 if (!sb.reserve(str->length())) return false;
2645 }
2646
2647 uint32_t start = 0;
2648 for (;;) {
2649 int match = StringMatch(str, pat, start);
2650 if (match < 0) break;
2651 if (!sb.append(strChars + start, match - start)) return false;
2652 if (!sb.append(repChars, rep->length())) return false;
2653 start = match + pat->length();
2654 }
2655
2656 if (!sb.append(strChars + start, str->length() - start)) return false;
2657
2658 return true;
2659 }
2660
2661 // This is identical to "str.split(pattern).join(replacement)" except that we
2662 // do some deforestation optimization in Ion.
str_flat_replace_string(JSContext * cx,HandleString string,HandleString pattern,HandleString replacement)2663 JSString* js::str_flat_replace_string(JSContext* cx, HandleString string,
2664 HandleString pattern,
2665 HandleString replacement) {
2666 MOZ_ASSERT(string);
2667 MOZ_ASSERT(pattern);
2668 MOZ_ASSERT(replacement);
2669
2670 if (!string->length()) return string;
2671
2672 RootedLinearString linearRepl(cx, replacement->ensureLinear(cx));
2673 if (!linearRepl) return nullptr;
2674
2675 RootedLinearString linearPat(cx, pattern->ensureLinear(cx));
2676 if (!linearPat) return nullptr;
2677
2678 RootedLinearString linearStr(cx, string->ensureLinear(cx));
2679 if (!linearStr) return nullptr;
2680
2681 StringBuffer sb(cx);
2682 if (linearStr->hasTwoByteChars()) {
2683 if (!sb.ensureTwoByteChars()) return nullptr;
2684 if (linearRepl->hasTwoByteChars()) {
2685 if (!StrFlatReplaceGlobal<char16_t, char16_t>(cx, linearStr, linearPat,
2686 linearRepl, sb))
2687 return nullptr;
2688 } else {
2689 if (!StrFlatReplaceGlobal<char16_t, Latin1Char>(cx, linearStr, linearPat,
2690 linearRepl, sb))
2691 return nullptr;
2692 }
2693 } else {
2694 if (linearRepl->hasTwoByteChars()) {
2695 if (!sb.ensureTwoByteChars()) return nullptr;
2696 if (!StrFlatReplaceGlobal<Latin1Char, char16_t>(cx, linearStr, linearPat,
2697 linearRepl, sb))
2698 return nullptr;
2699 } else {
2700 if (!StrFlatReplaceGlobal<Latin1Char, Latin1Char>(
2701 cx, linearStr, linearPat, linearRepl, sb))
2702 return nullptr;
2703 }
2704 }
2705
2706 return sb.finishString();
2707 }
2708
str_replace_string_raw(JSContext * cx,HandleString string,HandleString pattern,HandleString replacement)2709 JSString* js::str_replace_string_raw(JSContext* cx, HandleString string,
2710 HandleString pattern,
2711 HandleString replacement) {
2712 RootedLinearString repl(cx, replacement->ensureLinear(cx));
2713 if (!repl) return nullptr;
2714
2715 RootedLinearString pat(cx, pattern->ensureLinear(cx));
2716 if (!pat) return nullptr;
2717
2718 size_t patternLength = pat->length();
2719 int32_t match;
2720 uint32_t dollarIndex;
2721
2722 {
2723 AutoCheckCannotGC nogc;
2724 dollarIndex =
2725 repl->hasLatin1Chars()
2726 ? FindDollarIndex(repl->latin1Chars(nogc), repl->length())
2727 : FindDollarIndex(repl->twoByteChars(nogc), repl->length());
2728 }
2729
2730 /*
2731 * |string| could be a rope, so we want to avoid flattening it for as
2732 * long as possible.
2733 */
2734 if (string->isRope()) {
2735 if (!RopeMatch(cx, &string->asRope(), pat, &match)) return nullptr;
2736 } else {
2737 match = StringMatch(&string->asLinear(), pat, 0);
2738 }
2739
2740 if (match < 0) return string;
2741
2742 if (dollarIndex != UINT32_MAX) {
2743 repl = InterpretDollarReplacement(cx, string, repl, dollarIndex, match,
2744 patternLength);
2745 if (!repl) return nullptr;
2746 } else if (string->isRope()) {
2747 return BuildFlatRopeReplacement(cx, string, repl, match, patternLength);
2748 }
2749 return BuildFlatReplacement(cx, string, repl, match, patternLength);
2750 }
2751
NewFullyAllocatedStringArray(JSContext * cx,HandleObjectGroup group,uint32_t length)2752 static ArrayObject* NewFullyAllocatedStringArray(JSContext* cx,
2753 HandleObjectGroup group,
2754 uint32_t length) {
2755 ArrayObject* array = NewFullyAllocatedArrayTryUseGroup(cx, group, length);
2756 if (!array) return nullptr;
2757
2758 // Only string values will be added to this array. Inform TI early about
2759 // the element type, so we can directly initialize all elements using
2760 // NativeObject::initDenseElement() instead of the slightly more expensive
2761 // NativeObject::initDenseElementWithType() method.
2762 // Since this function is never called to create a zero-length array, it's
2763 // always necessary and correct to call AddTypePropertyId here.
2764 MOZ_ASSERT(length > 0);
2765 AddTypePropertyId(cx, array, JSID_VOID, TypeSet::StringType());
2766
2767 return array;
2768 }
2769
SingleElementStringArray(JSContext * cx,HandleObjectGroup group,HandleLinearString str)2770 static ArrayObject* SingleElementStringArray(JSContext* cx,
2771 HandleObjectGroup group,
2772 HandleLinearString str) {
2773 ArrayObject* array = NewFullyAllocatedStringArray(cx, group, 1);
2774 if (!array) return nullptr;
2775 array->setDenseInitializedLength(1);
2776 array->initDenseElement(0, StringValue(str));
2777 return array;
2778 }
2779
2780 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
SplitHelper(JSContext * cx,HandleLinearString str,uint32_t limit,HandleLinearString sep,HandleObjectGroup group)2781 static ArrayObject* SplitHelper(JSContext* cx, HandleLinearString str,
2782 uint32_t limit, HandleLinearString sep,
2783 HandleObjectGroup group) {
2784 size_t strLength = str->length();
2785 size_t sepLength = sep->length();
2786 MOZ_ASSERT(sepLength != 0);
2787
2788 // Step 12.
2789 if (strLength == 0) {
2790 // Step 12.a.
2791 int match = StringMatch(str, sep, 0);
2792
2793 // Step 12.b.
2794 if (match != -1) return NewFullyAllocatedArrayTryUseGroup(cx, group, 0);
2795
2796 // Steps 12.c-e.
2797 return SingleElementStringArray(cx, group, str);
2798 }
2799
2800 // Step 3 (reordered).
2801 AutoValueVector splits(cx);
2802
2803 // Step 8 (reordered).
2804 size_t lastEndIndex = 0;
2805
2806 // Step 13.
2807 size_t index = 0;
2808
2809 // Step 14.
2810 while (index != strLength) {
2811 // Step 14.a.
2812 int match = StringMatch(str, sep, index);
2813
2814 // Step 14.b.
2815 //
2816 // Our match algorithm differs from the spec in that it returns the
2817 // next index at which a match happens. If no match happens we're
2818 // done.
2819 //
2820 // But what if the match is at the end of the string (and the string is
2821 // not empty)? Per 14.c.i this shouldn't be a match, so we have to
2822 // specially exclude it. Thus this case should hold:
2823 //
2824 // var a = "abc".split(/\b/);
2825 // assertEq(a.length, 1);
2826 // assertEq(a[0], "abc");
2827 if (match == -1) break;
2828
2829 // Step 14.c.
2830 size_t endIndex = match + sepLength;
2831
2832 // Step 14.c.i.
2833 if (endIndex == lastEndIndex) {
2834 index++;
2835 continue;
2836 }
2837
2838 // Step 14.c.ii.
2839 MOZ_ASSERT(lastEndIndex < endIndex);
2840 MOZ_ASSERT(sepLength <= strLength);
2841 MOZ_ASSERT(lastEndIndex + sepLength <= endIndex);
2842
2843 // Step 14.c.ii.1.
2844 size_t subLength = size_t(endIndex - sepLength - lastEndIndex);
2845 JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength);
2846
2847 // Steps 14.c.ii.2-4.
2848 if (!sub || !splits.append(StringValue(sub))) return nullptr;
2849
2850 // Step 14.c.ii.5.
2851 if (splits.length() == limit)
2852 return NewCopiedArrayTryUseGroup(cx, group, splits.begin(),
2853 splits.length());
2854
2855 // Step 14.c.ii.6.
2856 index = endIndex;
2857
2858 // Step 14.c.ii.7.
2859 lastEndIndex = index;
2860 }
2861
2862 // Step 15.
2863 JSString* sub =
2864 NewDependentString(cx, str, lastEndIndex, strLength - lastEndIndex);
2865
2866 // Steps 16-17.
2867 if (!sub || !splits.append(StringValue(sub))) return nullptr;
2868
2869 // Step 18.
2870 return NewCopiedArrayTryUseGroup(cx, group, splits.begin(), splits.length());
2871 }
2872
2873 // Fast-path for splitting a string into a character array via split("").
CharSplitHelper(JSContext * cx,HandleLinearString str,uint32_t limit,HandleObjectGroup group)2874 static ArrayObject* CharSplitHelper(JSContext* cx, HandleLinearString str,
2875 uint32_t limit, HandleObjectGroup group) {
2876 size_t strLength = str->length();
2877 if (strLength == 0) return NewFullyAllocatedArrayTryUseGroup(cx, group, 0);
2878
2879 js::StaticStrings& staticStrings = cx->staticStrings();
2880 uint32_t resultlen = (limit < strLength ? limit : strLength);
2881 MOZ_ASSERT(limit > 0 && resultlen > 0,
2882 "Neither limit nor strLength is zero, so resultlen is greater "
2883 "than zero.");
2884
2885 RootedArrayObject splits(cx,
2886 NewFullyAllocatedStringArray(cx, group, resultlen));
2887 if (!splits) return nullptr;
2888 splits->ensureDenseInitializedLength(cx, 0, resultlen);
2889
2890 for (size_t i = 0; i < resultlen; ++i) {
2891 JSString* sub = staticStrings.getUnitStringForElement(cx, str, i);
2892 if (!sub) return nullptr;
2893 splits->initDenseElement(i, StringValue(sub));
2894 }
2895
2896 return splits;
2897 }
2898
2899 template <typename TextChar>
SplitSingleCharHelper(JSContext * cx,HandleLinearString str,const TextChar * text,uint32_t textLen,char16_t patCh,HandleObjectGroup group)2900 static MOZ_ALWAYS_INLINE ArrayObject* SplitSingleCharHelper(
2901 JSContext* cx, HandleLinearString str, const TextChar* text,
2902 uint32_t textLen, char16_t patCh, HandleObjectGroup group) {
2903 // Count the number of occurrences of patCh within text.
2904 uint32_t count = 0;
2905 for (size_t index = 0; index < textLen; index++) {
2906 if (static_cast<char16_t>(text[index]) == patCh) count++;
2907 }
2908
2909 // Handle zero-occurrence case - return input string in an array.
2910 if (count == 0) return SingleElementStringArray(cx, group, str);
2911
2912 // Create the result array for the substring values.
2913 RootedArrayObject splits(cx,
2914 NewFullyAllocatedStringArray(cx, group, count + 1));
2915 if (!splits) return nullptr;
2916 splits->ensureDenseInitializedLength(cx, 0, count + 1);
2917
2918 // Add substrings.
2919 uint32_t splitsIndex = 0;
2920 size_t lastEndIndex = 0;
2921 for (size_t index = 0; index < textLen; index++) {
2922 if (static_cast<char16_t>(text[index]) == patCh) {
2923 size_t subLength = size_t(index - lastEndIndex);
2924 JSString* sub = NewDependentString(cx, str, lastEndIndex, subLength);
2925 if (!sub) return nullptr;
2926 splits->initDenseElement(splitsIndex++, StringValue(sub));
2927 lastEndIndex = index + 1;
2928 }
2929 }
2930
2931 // Add substring for tail of string (after last match).
2932 JSString* sub =
2933 NewDependentString(cx, str, lastEndIndex, textLen - lastEndIndex);
2934 if (!sub) return nullptr;
2935 splits->initDenseElement(splitsIndex++, StringValue(sub));
2936
2937 return splits;
2938 }
2939
2940 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
SplitSingleCharHelper(JSContext * cx,HandleLinearString str,char16_t ch,HandleObjectGroup group)2941 static ArrayObject* SplitSingleCharHelper(JSContext* cx, HandleLinearString str,
2942 char16_t ch,
2943 HandleObjectGroup group) {
2944 // Step 12.
2945 size_t strLength = str->length();
2946
2947 AutoStableStringChars linearChars(cx);
2948 if (!linearChars.init(cx, str)) return nullptr;
2949
2950 if (linearChars.isLatin1())
2951 return SplitSingleCharHelper(cx, str, linearChars.latin1Chars(), strLength,
2952 ch, group);
2953
2954 return SplitSingleCharHelper(cx, str, linearChars.twoByteChars(), strLength,
2955 ch, group);
2956 }
2957
2958 // ES 2016 draft Mar 25, 2016 21.1.3.17 steps 4, 8, 12-18.
str_split_string(JSContext * cx,HandleObjectGroup group,HandleString str,HandleString sep,uint32_t limit)2959 ArrayObject* js::str_split_string(JSContext* cx, HandleObjectGroup group,
2960 HandleString str, HandleString sep,
2961 uint32_t limit) {
2962 MOZ_ASSERT(limit > 0, "Only called for strictly positive limit.");
2963
2964 RootedLinearString linearStr(cx, str->ensureLinear(cx));
2965 if (!linearStr) return nullptr;
2966
2967 RootedLinearString linearSep(cx, sep->ensureLinear(cx));
2968 if (!linearSep) return nullptr;
2969
2970 if (linearSep->length() == 0)
2971 return CharSplitHelper(cx, linearStr, limit, group);
2972
2973 if (linearSep->length() == 1 && limit >= static_cast<uint32_t>(INT32_MAX)) {
2974 char16_t ch = linearSep->latin1OrTwoByteChar(0);
2975 return SplitSingleCharHelper(cx, linearStr, ch, group);
2976 }
2977
2978 return SplitHelper(cx, linearStr, limit, linearSep, group);
2979 }
2980
2981 /*
2982 * Python-esque sequence operations.
2983 */
str_concat(JSContext * cx,unsigned argc,Value * vp)2984 bool js::str_concat(JSContext* cx, unsigned argc, Value* vp) {
2985 CallArgs args = CallArgsFromVp(argc, vp);
2986 JSString* str = ToStringForStringFunction(cx, args.thisv());
2987 if (!str) return false;
2988
2989 for (unsigned i = 0; i < args.length(); i++) {
2990 JSString* argStr = ToString<NoGC>(cx, args[i]);
2991 if (!argStr) {
2992 RootedString strRoot(cx, str);
2993 argStr = ToString<CanGC>(cx, args[i]);
2994 if (!argStr) return false;
2995 str = strRoot;
2996 }
2997
2998 JSString* next = ConcatStrings<NoGC>(cx, str, argStr);
2999 if (next) {
3000 str = next;
3001 } else {
3002 RootedString strRoot(cx, str), argStrRoot(cx, argStr);
3003 str = ConcatStrings<CanGC>(cx, strRoot, argStrRoot);
3004 if (!str) return false;
3005 }
3006 }
3007
3008 args.rval().setString(str);
3009 return true;
3010 }
3011
3012 static const JSFunctionSpec string_methods[] = {
3013 JS_FN(js_toSource_str, str_toSource, 0, 0),
3014
3015 /* Java-like methods. */
3016 JS_FN(js_toString_str, str_toString, 0, 0),
3017 JS_FN(js_valueOf_str, str_toString, 0, 0),
3018 JS_INLINABLE_FN("toLowerCase", str_toLowerCase, 0, 0, StringToLowerCase),
3019 JS_INLINABLE_FN("toUpperCase", str_toUpperCase, 0, 0, StringToUpperCase),
3020 JS_INLINABLE_FN("charAt", str_charAt, 1, 0, StringCharAt),
3021 JS_INLINABLE_FN("charCodeAt", str_charCodeAt, 1, 0, StringCharCodeAt),
3022 JS_SELF_HOSTED_FN("substring", "String_substring", 2, 0),
3023 JS_SELF_HOSTED_FN("padStart", "String_pad_start", 2, 0),
3024 JS_SELF_HOSTED_FN("padEnd", "String_pad_end", 2, 0),
3025 JS_SELF_HOSTED_FN("codePointAt", "String_codePointAt", 1, 0),
3026 JS_FN("includes", str_includes, 1, 0), JS_FN("indexOf", str_indexOf, 1, 0),
3027 JS_FN("lastIndexOf", str_lastIndexOf, 1, 0),
3028 JS_FN("startsWith", str_startsWith, 1, 0),
3029 JS_FN("endsWith", str_endsWith, 1, 0), JS_FN("trim", str_trim, 0, 0),
3030 #ifdef NIGHTLY_BUILD
3031 JS_FN("trimStart", str_trimStart, 0, 0),
3032 JS_FN("trimEnd", str_trimEnd, 0, 0),
3033 #else
3034 JS_FN("trimLeft", str_trimStart, 0, 0),
3035 JS_FN("trimRight", str_trimEnd, 0, 0),
3036 #endif
3037 #if EXPOSE_INTL_API
3038 JS_SELF_HOSTED_FN("toLocaleLowerCase", "String_toLocaleLowerCase", 0, 0),
3039 JS_SELF_HOSTED_FN("toLocaleUpperCase", "String_toLocaleUpperCase", 0, 0),
3040 JS_SELF_HOSTED_FN("localeCompare", "String_localeCompare", 1, 0),
3041 #else
3042 JS_FN("toLocaleLowerCase", str_toLocaleLowerCase, 0, 0),
3043 JS_FN("toLocaleUpperCase", str_toLocaleUpperCase, 0, 0),
3044 JS_FN("localeCompare", str_localeCompare, 1, 0),
3045 #endif
3046 JS_SELF_HOSTED_FN("repeat", "String_repeat", 1, 0),
3047 #if EXPOSE_INTL_API
3048 JS_FN("normalize", str_normalize, 0, 0),
3049 #endif
3050
3051 /* Perl-ish methods (search is actually Python-esque). */
3052 JS_SELF_HOSTED_FN("match", "String_match", 1, 0),
3053 JS_SELF_HOSTED_FN("search", "String_search", 1, 0),
3054 JS_SELF_HOSTED_FN("replace", "String_replace", 2, 0),
3055 JS_SELF_HOSTED_FN("split", "String_split", 2, 0),
3056 JS_SELF_HOSTED_FN("substr", "String_substr", 2, 0),
3057
3058 /* Python-esque sequence methods. */
3059 JS_FN("concat", str_concat, 1, 0),
3060 JS_SELF_HOSTED_FN("slice", "String_slice", 2, 0),
3061
3062 /* HTML string methods. */
3063 JS_SELF_HOSTED_FN("bold", "String_bold", 0, 0),
3064 JS_SELF_HOSTED_FN("italics", "String_italics", 0, 0),
3065 JS_SELF_HOSTED_FN("fixed", "String_fixed", 0, 0),
3066 JS_SELF_HOSTED_FN("strike", "String_strike", 0, 0),
3067 JS_SELF_HOSTED_FN("small", "String_small", 0, 0),
3068 JS_SELF_HOSTED_FN("big", "String_big", 0, 0),
3069 JS_SELF_HOSTED_FN("blink", "String_blink", 0, 0),
3070 JS_SELF_HOSTED_FN("sup", "String_sup", 0, 0),
3071 JS_SELF_HOSTED_FN("sub", "String_sub", 0, 0),
3072 JS_SELF_HOSTED_FN("anchor", "String_anchor", 1, 0),
3073 JS_SELF_HOSTED_FN("link", "String_link", 1, 0),
3074 JS_SELF_HOSTED_FN("fontcolor", "String_fontcolor", 1, 0),
3075 JS_SELF_HOSTED_FN("fontsize", "String_fontsize", 1, 0),
3076
3077 JS_SELF_HOSTED_SYM_FN(iterator, "String_iterator", 0, 0), JS_FS_END};
3078
3079 // ES6 rev 27 (2014 Aug 24) 21.1.1
StringConstructor(JSContext * cx,unsigned argc,Value * vp)3080 bool js::StringConstructor(JSContext* cx, unsigned argc, Value* vp) {
3081 CallArgs args = CallArgsFromVp(argc, vp);
3082
3083 RootedString str(cx);
3084 if (args.length() > 0) {
3085 if (!args.isConstructing() && args[0].isSymbol())
3086 return js::SymbolDescriptiveString(cx, args[0].toSymbol(), args.rval());
3087
3088 str = ToString<CanGC>(cx, args[0]);
3089 if (!str) return false;
3090 } else {
3091 str = cx->runtime()->emptyString;
3092 }
3093
3094 if (args.isConstructing()) {
3095 RootedObject proto(cx);
3096 if (!GetPrototypeFromBuiltinConstructor(cx, args, &proto)) return false;
3097
3098 StringObject* strobj = StringObject::create(cx, str, proto);
3099 if (!strobj) return false;
3100 args.rval().setObject(*strobj);
3101 return true;
3102 }
3103
3104 args.rval().setString(str);
3105 return true;
3106 }
3107
str_fromCharCode(JSContext * cx,unsigned argc,Value * vp)3108 bool js::str_fromCharCode(JSContext* cx, unsigned argc, Value* vp) {
3109 CallArgs args = CallArgsFromVp(argc, vp);
3110
3111 MOZ_ASSERT(args.length() <= ARGS_LENGTH_MAX);
3112
3113 // Optimize the single-char case.
3114 if (args.length() == 1)
3115 return str_fromCharCode_one_arg(cx, args[0], args.rval());
3116
3117 // Optimize the case where the result will definitely fit in an inline
3118 // string (thin or fat) and so we don't need to malloc the chars. (We could
3119 // cover some cases where args.length() goes up to
3120 // JSFatInlineString::MAX_LENGTH_LATIN1 if we also checked if the chars are
3121 // all Latin-1, but it doesn't seem worth the effort.)
3122 InlineCharBuffer<char16_t> chars;
3123 if (!chars.maybeAlloc(cx, args.length())) return false;
3124
3125 char16_t* rawChars = chars.get();
3126 for (unsigned i = 0; i < args.length(); i++) {
3127 uint16_t code;
3128 if (!ToUint16(cx, args[i], &code)) return false;
3129
3130 rawChars[i] = char16_t(code);
3131 }
3132
3133 JSString* str = chars.toString(cx, args.length());
3134 if (!str) return false;
3135
3136 args.rval().setString(str);
3137 return true;
3138 }
3139
CodeUnitToString(JSContext * cx,uint16_t ucode,MutableHandleValue rval)3140 static inline bool CodeUnitToString(JSContext* cx, uint16_t ucode,
3141 MutableHandleValue rval) {
3142 if (StaticStrings::hasUnit(ucode)) {
3143 rval.setString(cx->staticStrings().getUnit(ucode));
3144 return true;
3145 }
3146
3147 char16_t c = char16_t(ucode);
3148 JSString* str = NewStringCopyNDontDeflate<CanGC>(cx, &c, 1);
3149 if (!str) return false;
3150
3151 rval.setString(str);
3152 return true;
3153 }
3154
str_fromCharCode_one_arg(JSContext * cx,HandleValue code,MutableHandleValue rval)3155 bool js::str_fromCharCode_one_arg(JSContext* cx, HandleValue code,
3156 MutableHandleValue rval) {
3157 uint16_t ucode;
3158
3159 if (!ToUint16(cx, code, &ucode)) return false;
3160
3161 return CodeUnitToString(cx, ucode, rval);
3162 }
3163
ToCodePoint(JSContext * cx,HandleValue code,uint32_t * codePoint)3164 static MOZ_ALWAYS_INLINE bool ToCodePoint(JSContext* cx, HandleValue code,
3165 uint32_t* codePoint) {
3166 // String.fromCodePoint, Steps 5.a-b.
3167
3168 // Fast path for the common case - the input is already an int32.
3169 if (code.isInt32()) {
3170 int32_t nextCP = code.toInt32();
3171 if (nextCP >= 0 && nextCP <= int32_t(unicode::NonBMPMax)) {
3172 *codePoint = uint32_t(nextCP);
3173 return true;
3174 }
3175 }
3176
3177 double nextCP;
3178 if (!ToNumber(cx, code, &nextCP)) return false;
3179
3180 // String.fromCodePoint, Steps 5.c-d.
3181 if (JS::ToInteger(nextCP) != nextCP || nextCP < 0 ||
3182 nextCP > unicode::NonBMPMax) {
3183 ToCStringBuf cbuf;
3184 if (const char* numStr = NumberToCString(cx, &cbuf, nextCP))
3185 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
3186 JSMSG_NOT_A_CODEPOINT, numStr);
3187 return false;
3188 }
3189
3190 *codePoint = uint32_t(nextCP);
3191 return true;
3192 }
3193
str_fromCodePoint_one_arg(JSContext * cx,HandleValue code,MutableHandleValue rval)3194 bool js::str_fromCodePoint_one_arg(JSContext* cx, HandleValue code,
3195 MutableHandleValue rval) {
3196 // Steps 1-4 (omitted).
3197
3198 // Steps 5.a-d.
3199 uint32_t codePoint;
3200 if (!ToCodePoint(cx, code, &codePoint)) return false;
3201
3202 // Steps 5.e, 6.
3203 if (!unicode::IsSupplementary(codePoint))
3204 return CodeUnitToString(cx, uint16_t(codePoint), rval);
3205
3206 char16_t chars[] = {unicode::LeadSurrogate(codePoint),
3207 unicode::TrailSurrogate(codePoint)};
3208 JSString* str = NewStringCopyNDontDeflate<CanGC>(cx, chars, 2);
3209 if (!str) return false;
3210
3211 rval.setString(str);
3212 return true;
3213 }
3214
str_fromCodePoint_few_args(JSContext * cx,const CallArgs & args)3215 static bool str_fromCodePoint_few_args(JSContext* cx, const CallArgs& args) {
3216 MOZ_ASSERT(args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2);
3217
3218 // Steps 1-2 (omitted).
3219
3220 // Step 3.
3221 char16_t elements[JSFatInlineString::MAX_LENGTH_TWO_BYTE];
3222
3223 // Steps 4-5.
3224 unsigned length = 0;
3225 for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) {
3226 // Steps 5.a-d.
3227 uint32_t codePoint;
3228 if (!ToCodePoint(cx, args[nextIndex], &codePoint)) return false;
3229
3230 // Step 5.e.
3231 unicode::UTF16Encode(codePoint, elements, &length);
3232 }
3233
3234 // Step 6.
3235 JSString* str = NewStringCopyN<CanGC>(cx, elements, length);
3236 if (!str) return false;
3237
3238 args.rval().setString(str);
3239 return true;
3240 }
3241
3242 // ES2017 draft rev 40edb3a95a475c1b251141ac681b8793129d9a6d
3243 // 21.1.2.2 String.fromCodePoint(...codePoints)
str_fromCodePoint(JSContext * cx,unsigned argc,Value * vp)3244 bool js::str_fromCodePoint(JSContext* cx, unsigned argc, Value* vp) {
3245 CallArgs args = CallArgsFromVp(argc, vp);
3246
3247 // Optimize the single code-point case.
3248 if (args.length() == 1)
3249 return str_fromCodePoint_one_arg(cx, args[0], args.rval());
3250
3251 // Optimize the case where the result will definitely fit in an inline
3252 // string (thin or fat) and so we don't need to malloc the chars. (We could
3253 // cover some cases where |args.length()| goes up to
3254 // JSFatInlineString::MAX_LENGTH_LATIN1 / 2 if we also checked if the chars
3255 // are all Latin-1, but it doesn't seem worth the effort.)
3256 if (args.length() <= JSFatInlineString::MAX_LENGTH_TWO_BYTE / 2)
3257 return str_fromCodePoint_few_args(cx, args);
3258
3259 // Steps 1-2 (omitted).
3260
3261 // Step 3.
3262 static_assert(
3263 ARGS_LENGTH_MAX < std::numeric_limits<decltype(args.length())>::max() / 2,
3264 "|args.length() * 2 + 1| does not overflow");
3265 char16_t* elements = cx->pod_malloc<char16_t>(args.length() * 2 + 1);
3266 if (!elements) return false;
3267
3268 // Steps 4-5.
3269 unsigned length = 0;
3270 for (unsigned nextIndex = 0; nextIndex < args.length(); nextIndex++) {
3271 // Steps 5.a-d.
3272 uint32_t codePoint;
3273 if (!ToCodePoint(cx, args[nextIndex], &codePoint)) {
3274 js_free(elements);
3275 return false;
3276 }
3277
3278 // Step 5.e.
3279 unicode::UTF16Encode(codePoint, elements, &length);
3280 }
3281 elements[length] = 0;
3282
3283 // Step 6.
3284 JSString* str = NewString<CanGC>(cx, elements, length);
3285 if (!str) {
3286 js_free(elements);
3287 return false;
3288 }
3289
3290 args.rval().setString(str);
3291 return true;
3292 }
3293
3294 static const JSFunctionSpec string_static_methods[] = {
3295 JS_INLINABLE_FN("fromCharCode", js::str_fromCharCode, 1, 0,
3296 StringFromCharCode),
3297 JS_INLINABLE_FN("fromCodePoint", js::str_fromCodePoint, 1, 0,
3298 StringFromCodePoint),
3299
3300 JS_SELF_HOSTED_FN("raw", "String_static_raw", 1, 0),
3301 JS_SELF_HOSTED_FN("substring", "String_static_substring", 3, 0),
3302 JS_SELF_HOSTED_FN("substr", "String_static_substr", 3, 0),
3303 JS_SELF_HOSTED_FN("slice", "String_static_slice", 3, 0),
3304
3305 JS_SELF_HOSTED_FN("match", "String_generic_match", 2, 0),
3306 JS_SELF_HOSTED_FN("replace", "String_generic_replace", 3, 0),
3307 JS_SELF_HOSTED_FN("search", "String_generic_search", 2, 0),
3308 JS_SELF_HOSTED_FN("split", "String_generic_split", 3, 0),
3309
3310 JS_SELF_HOSTED_FN("toLowerCase", "String_static_toLowerCase", 1, 0),
3311 JS_SELF_HOSTED_FN("toUpperCase", "String_static_toUpperCase", 1, 0),
3312 JS_SELF_HOSTED_FN("charAt", "String_static_charAt", 2, 0),
3313 JS_SELF_HOSTED_FN("charCodeAt", "String_static_charCodeAt", 2, 0),
3314 JS_SELF_HOSTED_FN("includes", "String_static_includes", 2, 0),
3315 JS_SELF_HOSTED_FN("indexOf", "String_static_indexOf", 2, 0),
3316 JS_SELF_HOSTED_FN("lastIndexOf", "String_static_lastIndexOf", 2, 0),
3317 JS_SELF_HOSTED_FN("startsWith", "String_static_startsWith", 2, 0),
3318 JS_SELF_HOSTED_FN("endsWith", "String_static_endsWith", 2, 0),
3319 JS_SELF_HOSTED_FN("trim", "String_static_trim", 1, 0),
3320 JS_SELF_HOSTED_FN("trimLeft", "String_static_trimLeft", 1, 0),
3321 JS_SELF_HOSTED_FN("trimRight", "String_static_trimRight", 1, 0),
3322 JS_SELF_HOSTED_FN("toLocaleLowerCase", "String_static_toLocaleLowerCase", 1,
3323 0),
3324 JS_SELF_HOSTED_FN("toLocaleUpperCase", "String_static_toLocaleUpperCase", 1,
3325 0),
3326 #if EXPOSE_INTL_API
3327 JS_SELF_HOSTED_FN("normalize", "String_static_normalize", 1, 0),
3328 #endif
3329 JS_SELF_HOSTED_FN("concat", "String_static_concat", 2, 0),
3330
3331 JS_SELF_HOSTED_FN("localeCompare", "String_static_localeCompare", 2, 0),
3332 JS_FS_END};
3333
assignInitialShape(JSContext * cx,Handle<StringObject * > obj)3334 /* static */ Shape* StringObject::assignInitialShape(
3335 JSContext* cx, Handle<StringObject*> obj) {
3336 MOZ_ASSERT(obj->empty());
3337
3338 return NativeObject::addDataProperty(cx, obj, cx->names().length, LENGTH_SLOT,
3339 JSPROP_PERMANENT | JSPROP_READONLY);
3340 }
3341
InitStringClass(JSContext * cx,HandleObject obj)3342 JSObject* js::InitStringClass(JSContext* cx, HandleObject obj) {
3343 MOZ_ASSERT(obj->isNative());
3344
3345 Handle<GlobalObject*> global = obj.as<GlobalObject>();
3346
3347 Rooted<JSString*> empty(cx, cx->runtime()->emptyString);
3348 RootedObject proto(cx, GlobalObject::createBlankPrototype(
3349 cx, global, &StringObject::class_));
3350 if (!proto) return nullptr;
3351 Handle<StringObject*> protoObj = proto.as<StringObject>();
3352 if (!StringObject::init(cx, protoObj, empty)) return nullptr;
3353
3354 /* Now create the String function. */
3355 RootedFunction ctor(cx);
3356 ctor = GlobalObject::createConstructor(
3357 cx, StringConstructor, cx->names().String, 1, AllocKind::FUNCTION,
3358 &jit::JitInfo_String);
3359 if (!ctor) return nullptr;
3360
3361 if (!LinkConstructorAndPrototype(cx, ctor, proto)) return nullptr;
3362
3363 if (!DefinePropertiesAndFunctions(cx, proto, nullptr, string_methods) ||
3364 !DefinePropertiesAndFunctions(cx, ctor, nullptr, string_static_methods)) {
3365 return nullptr;
3366 }
3367
3368 #ifdef NIGHTLY_BUILD
3369 // Create "trimLeft" as an alias for "trimStart".
3370 RootedValue trimFn(cx);
3371 RootedId trimId(cx, NameToId(cx->names().trimStart));
3372 RootedId trimAliasId(cx, NameToId(cx->names().trimLeft));
3373 if (!NativeGetProperty(cx, protoObj, trimId, &trimFn) ||
3374 !NativeDefineDataProperty(cx, protoObj, trimAliasId, trimFn, 0)) {
3375 return nullptr;
3376 }
3377
3378 // Create "trimRight" as an alias for "trimEnd".
3379 trimId = NameToId(cx->names().trimEnd);
3380 trimAliasId = NameToId(cx->names().trimRight);
3381 if (!NativeGetProperty(cx, protoObj, trimId, &trimFn) ||
3382 !NativeDefineDataProperty(cx, protoObj, trimAliasId, trimFn, 0)) {
3383 return nullptr;
3384 }
3385 #endif
3386
3387 /*
3388 * Define escape/unescape, the URI encode/decode functions, and maybe
3389 * uneval on the global object.
3390 */
3391 if (!JS_DefineFunctions(cx, global, string_functions)) return nullptr;
3392
3393 if (!GlobalObject::initBuiltinConstructor(cx, global, JSProto_String, ctor,
3394 proto))
3395 return nullptr;
3396
3397 return proto;
3398 }
3399
3400 #define ____ false
3401
3402 /*
3403 * Uri reserved chars + #:
3404 * - 35: #
3405 * - 36: $
3406 * - 38: &
3407 * - 43: +
3408 * - 44: ,
3409 * - 47: /
3410 * - 58: :
3411 * - 59: ;
3412 * - 61: =
3413 * - 63: ?
3414 * - 64: @
3415 */
3416 static const bool js_isUriReservedPlusPound[] = {
3417 // clang-format off
3418 /* 0 1 2 3 4 5 6 7 8 9 */
3419 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3420 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3421 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3422 /* 3 */ ____, ____, ____, ____, ____, true, true, ____, true, ____,
3423 /* 4 */ ____, ____, ____, true, true, ____, ____, true, ____, ____,
3424 /* 5 */ ____, ____, ____, ____, ____, ____, ____, ____, true, true,
3425 /* 6 */ ____, true, ____, true, true, ____, ____, ____, ____, ____,
3426 /* 7 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3427 /* 8 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3428 /* 9 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3429 /* 10 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3430 /* 11 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3431 /* 12 */ ____, ____, ____, ____, ____, ____, ____, ____
3432 // clang-format on
3433 };
3434
3435 /*
3436 * Uri unescaped chars:
3437 * - 33: !
3438 * - 39: '
3439 * - 40: (
3440 * - 41: )
3441 * - 42: *
3442 * - 45: -
3443 * - 46: .
3444 * - 48..57: 0-9
3445 * - 65..90: A-Z
3446 * - 95: _
3447 * - 97..122: a-z
3448 * - 126: ~
3449 */
3450 static const bool js_isUriUnescaped[] = {
3451 // clang-format off
3452 /* 0 1 2 3 4 5 6 7 8 9 */
3453 /* 0 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3454 /* 1 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3455 /* 2 */ ____, ____, ____, ____, ____, ____, ____, ____, ____, ____,
3456 /* 3 */ ____, ____, ____, true, ____, ____, ____, ____, ____, true,
3457 /* 4 */ true, true, true, ____, ____, true, true, ____, true, true,
3458 /* 5 */ true, true, true, true, true, true, true, true, ____, ____,
3459 /* 6 */ ____, ____, ____, ____, ____, true, true, true, true, true,
3460 /* 7 */ true, true, true, true, true, true, true, true, true, true,
3461 /* 8 */ true, true, true, true, true, true, true, true, true, true,
3462 /* 9 */ true, ____, ____, ____, ____, true, ____, true, true, true,
3463 /* 10 */ true, true, true, true, true, true, true, true, true, true,
3464 /* 11 */ true, true, true, true, true, true, true, true, true, true,
3465 /* 12 */ true, true, true, ____, ____, ____, true, ____
3466 // clang-format on
3467 };
3468
3469 #undef ____
3470
TransferBufferToString(StringBuffer & sb,MutableHandleValue rval)3471 static inline bool TransferBufferToString(StringBuffer& sb,
3472 MutableHandleValue rval) {
3473 JSString* str = sb.finishString();
3474 if (!str) return false;
3475 rval.setString(str);
3476 return true;
3477 }
3478
3479 /*
3480 * ECMA 3, 15.1.3 URI Handling Function Properties
3481 *
3482 * The following are implementations of the algorithms
3483 * given in the ECMA specification for the hidden functions
3484 * 'Encode' and 'Decode'.
3485 */
3486 enum EncodeResult { Encode_Failure, Encode_BadUri, Encode_Success };
3487
3488 // Bug 1403318: GCC sometimes inlines this Encode function rather than the
3489 // caller Encode function. Annotate both functions with MOZ_NEVER_INLINE resp.
3490 // MOZ_ALWAYS_INLINE to ensure we get the desired inlining behavior.
3491 template <typename CharT>
Encode(StringBuffer & sb,const CharT * chars,size_t length,const bool * unescapedSet)3492 static MOZ_NEVER_INLINE EncodeResult Encode(StringBuffer& sb,
3493 const CharT* chars, size_t length,
3494 const bool* unescapedSet) {
3495 Latin1Char hexBuf[4];
3496 hexBuf[0] = '%';
3497 hexBuf[3] = 0;
3498
3499 auto appendEncoded = [&sb, &hexBuf](Latin1Char c) {
3500 static const char HexDigits[] = "0123456789ABCDEF"; /* NB: uppercase */
3501
3502 hexBuf[1] = HexDigits[c >> 4];
3503 hexBuf[2] = HexDigits[c & 0xf];
3504 return sb.append(hexBuf, 3);
3505 };
3506
3507 for (size_t k = 0; k < length; k++) {
3508 CharT c = chars[k];
3509 if (c < 128 &&
3510 (js_isUriUnescaped[c] || (unescapedSet && unescapedSet[c]))) {
3511 if (!sb.append(Latin1Char(c))) return Encode_Failure;
3512 } else {
3513 if (mozilla::IsSame<CharT, Latin1Char>::value) {
3514 if (c < 0x80) {
3515 if (!appendEncoded(c)) return Encode_Failure;
3516 } else {
3517 if (!appendEncoded(0xC0 | (c >> 6)) ||
3518 !appendEncoded(0x80 | (c & 0x3F)))
3519 return Encode_Failure;
3520 }
3521 } else {
3522 if (unicode::IsTrailSurrogate(c)) return Encode_BadUri;
3523
3524 uint32_t v;
3525 if (!unicode::IsLeadSurrogate(c)) {
3526 v = c;
3527 } else {
3528 k++;
3529 if (k == length) return Encode_BadUri;
3530
3531 char16_t c2 = chars[k];
3532 if (!unicode::IsTrailSurrogate(c2)) return Encode_BadUri;
3533
3534 v = unicode::UTF16Decode(c, c2);
3535 }
3536
3537 uint8_t utf8buf[4];
3538 size_t L = OneUcs4ToUtf8Char(utf8buf, v);
3539 for (size_t j = 0; j < L; j++) {
3540 if (!appendEncoded(utf8buf[j])) return Encode_Failure;
3541 }
3542 }
3543 }
3544 }
3545
3546 return Encode_Success;
3547 }
3548
Encode(JSContext * cx,HandleLinearString str,const bool * unescapedSet,MutableHandleValue rval)3549 static MOZ_ALWAYS_INLINE bool Encode(JSContext* cx, HandleLinearString str,
3550 const bool* unescapedSet,
3551 MutableHandleValue rval) {
3552 size_t length = str->length();
3553 if (length == 0) {
3554 rval.setString(cx->runtime()->emptyString);
3555 return true;
3556 }
3557
3558 StringBuffer sb(cx);
3559 if (!sb.reserve(length)) return false;
3560
3561 EncodeResult res;
3562 if (str->hasLatin1Chars()) {
3563 AutoCheckCannotGC nogc;
3564 res = Encode(sb, str->latin1Chars(nogc), str->length(), unescapedSet);
3565 } else {
3566 AutoCheckCannotGC nogc;
3567 res = Encode(sb, str->twoByteChars(nogc), str->length(), unescapedSet);
3568 }
3569
3570 if (res == Encode_Failure) return false;
3571
3572 if (res == Encode_BadUri) {
3573 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
3574 return false;
3575 }
3576
3577 MOZ_ASSERT(res == Encode_Success);
3578 return TransferBufferToString(sb, rval);
3579 }
3580
3581 enum DecodeResult { Decode_Failure, Decode_BadUri, Decode_Success };
3582
3583 template <typename CharT>
Decode(StringBuffer & sb,const CharT * chars,size_t length,const bool * reservedSet)3584 static DecodeResult Decode(StringBuffer& sb, const CharT* chars, size_t length,
3585 const bool* reservedSet) {
3586 for (size_t k = 0; k < length; k++) {
3587 CharT c = chars[k];
3588 if (c == '%') {
3589 size_t start = k;
3590 if ((k + 2) >= length) return Decode_BadUri;
3591
3592 if (!JS7_ISHEX(chars[k + 1]) || !JS7_ISHEX(chars[k + 2]))
3593 return Decode_BadUri;
3594
3595 uint32_t B = JS7_UNHEX(chars[k + 1]) * 16 + JS7_UNHEX(chars[k + 2]);
3596 k += 2;
3597 if (B < 128) {
3598 c = CharT(B);
3599 if (reservedSet && reservedSet[c]) {
3600 if (!sb.append(chars + start, k - start + 1)) return Decode_Failure;
3601 } else {
3602 if (!sb.append(c)) return Decode_Failure;
3603 }
3604 } else {
3605 int n = 1;
3606 while (B & (0x80 >> n)) n++;
3607
3608 if (n == 1 || n > 4) return Decode_BadUri;
3609
3610 uint8_t octets[4];
3611 octets[0] = (uint8_t)B;
3612 if (k + 3 * (n - 1) >= length) return Decode_BadUri;
3613
3614 for (int j = 1; j < n; j++) {
3615 k++;
3616 if (chars[k] != '%') return Decode_BadUri;
3617
3618 if (!JS7_ISHEX(chars[k + 1]) || !JS7_ISHEX(chars[k + 2]))
3619 return Decode_BadUri;
3620
3621 B = JS7_UNHEX(chars[k + 1]) * 16 + JS7_UNHEX(chars[k + 2]);
3622 if ((B & 0xC0) != 0x80) return Decode_BadUri;
3623
3624 k += 2;
3625 octets[j] = char(B);
3626 }
3627
3628 uint32_t v = JS::Utf8ToOneUcs4Char(octets, n);
3629 MOZ_ASSERT(v >= 128);
3630 if (v >= unicode::NonBMPMin) {
3631 if (v > unicode::NonBMPMax) return Decode_BadUri;
3632
3633 if (!sb.append(unicode::LeadSurrogate(v))) return Decode_Failure;
3634 if (!sb.append(unicode::TrailSurrogate(v))) return Decode_Failure;
3635 } else {
3636 if (!sb.append(char16_t(v))) return Decode_Failure;
3637 }
3638 }
3639 } else {
3640 if (!sb.append(c)) return Decode_Failure;
3641 }
3642 }
3643
3644 return Decode_Success;
3645 }
3646
Decode(JSContext * cx,HandleLinearString str,const bool * reservedSet,MutableHandleValue rval)3647 static bool Decode(JSContext* cx, HandleLinearString str,
3648 const bool* reservedSet, MutableHandleValue rval) {
3649 size_t length = str->length();
3650 if (length == 0) {
3651 rval.setString(cx->runtime()->emptyString);
3652 return true;
3653 }
3654
3655 StringBuffer sb(cx);
3656
3657 DecodeResult res;
3658 if (str->hasLatin1Chars()) {
3659 AutoCheckCannotGC nogc;
3660 res = Decode(sb, str->latin1Chars(nogc), str->length(), reservedSet);
3661 } else {
3662 AutoCheckCannotGC nogc;
3663 res = Decode(sb, str->twoByteChars(nogc), str->length(), reservedSet);
3664 }
3665
3666 if (res == Decode_Failure) return false;
3667
3668 if (res == Decode_BadUri) {
3669 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
3670 return false;
3671 }
3672
3673 MOZ_ASSERT(res == Decode_Success);
3674 return TransferBufferToString(sb, rval);
3675 }
3676
str_decodeURI(JSContext * cx,unsigned argc,Value * vp)3677 static bool str_decodeURI(JSContext* cx, unsigned argc, Value* vp) {
3678 CallArgs args = CallArgsFromVp(argc, vp);
3679 RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
3680 if (!str) return false;
3681
3682 return Decode(cx, str, js_isUriReservedPlusPound, args.rval());
3683 }
3684
str_decodeURI_Component(JSContext * cx,unsigned argc,Value * vp)3685 static bool str_decodeURI_Component(JSContext* cx, unsigned argc, Value* vp) {
3686 CallArgs args = CallArgsFromVp(argc, vp);
3687 RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
3688 if (!str) return false;
3689
3690 return Decode(cx, str, nullptr, args.rval());
3691 }
3692
str_encodeURI(JSContext * cx,unsigned argc,Value * vp)3693 static bool str_encodeURI(JSContext* cx, unsigned argc, Value* vp) {
3694 CallArgs args = CallArgsFromVp(argc, vp);
3695 RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
3696 if (!str) return false;
3697
3698 return Encode(cx, str, js_isUriReservedPlusPound, args.rval());
3699 }
3700
str_encodeURI_Component(JSContext * cx,unsigned argc,Value * vp)3701 static bool str_encodeURI_Component(JSContext* cx, unsigned argc, Value* vp) {
3702 CallArgs args = CallArgsFromVp(argc, vp);
3703 RootedLinearString str(cx, ArgToLinearString(cx, args, 0));
3704 if (!str) return false;
3705
3706 return Encode(cx, str, nullptr, args.rval());
3707 }
3708
EncodeURI(JSContext * cx,StringBuffer & sb,const char * chars,size_t length)3709 bool js::EncodeURI(JSContext* cx, StringBuffer& sb, const char* chars,
3710 size_t length) {
3711 EncodeResult result = Encode(sb, reinterpret_cast<const Latin1Char*>(chars),
3712 length, js_isUriReservedPlusPound);
3713 if (result == EncodeResult::Encode_Failure) return false;
3714 if (result == EncodeResult::Encode_BadUri) {
3715 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr, JSMSG_BAD_URI);
3716 return false;
3717 }
3718 return true;
3719 }
3720
FlatStringMatchHelper(JSContext * cx,HandleString str,HandleString pattern,bool * isFlat,int32_t * match)3721 static bool FlatStringMatchHelper(JSContext* cx, HandleString str,
3722 HandleString pattern, bool* isFlat,
3723 int32_t* match) {
3724 RootedLinearString linearPattern(cx, pattern->ensureLinear(cx));
3725 if (!linearPattern) return false;
3726
3727 static const size_t MAX_FLAT_PAT_LEN = 256;
3728 if (linearPattern->length() > MAX_FLAT_PAT_LEN ||
3729 StringHasRegExpMetaChars(linearPattern)) {
3730 *isFlat = false;
3731 return true;
3732 }
3733
3734 *isFlat = true;
3735 if (str->isRope()) {
3736 if (!RopeMatch(cx, &str->asRope(), linearPattern, match)) return false;
3737 } else {
3738 *match = StringMatch(&str->asLinear(), linearPattern);
3739 }
3740
3741 return true;
3742 }
3743
BuildFlatMatchArray(JSContext * cx,HandleString str,HandleString pattern,int32_t match,MutableHandleValue rval)3744 static bool BuildFlatMatchArray(JSContext* cx, HandleString str,
3745 HandleString pattern, int32_t match,
3746 MutableHandleValue rval) {
3747 if (match < 0) {
3748 rval.setNull();
3749 return true;
3750 }
3751
3752 /* Get the templateObject that defines the shape and type of the output object
3753 */
3754 JSObject* templateObject =
3755 cx->compartment()->regExps.getOrCreateMatchResultTemplateObject(cx);
3756 if (!templateObject) return false;
3757
3758 RootedArrayObject arr(
3759 cx, NewDenseFullyAllocatedArrayWithTemplate(cx, 1, templateObject));
3760 if (!arr) return false;
3761
3762 /* Store a Value for each pair. */
3763 arr->setDenseInitializedLength(1);
3764 arr->initDenseElement(0, StringValue(pattern));
3765
3766 /* Set the |index| property. (TemplateObject positions it in slot 0) */
3767 arr->setSlot(0, Int32Value(match));
3768
3769 /* Set the |input| property. (TemplateObject positions it in slot 1) */
3770 arr->setSlot(1, StringValue(str));
3771
3772 #ifdef DEBUG
3773 RootedValue test(cx);
3774 RootedId id(cx, NameToId(cx->names().index));
3775 if (!NativeGetProperty(cx, arr, id, &test)) return false;
3776 MOZ_ASSERT(test == arr->getSlot(0));
3777 id = NameToId(cx->names().input);
3778 if (!NativeGetProperty(cx, arr, id, &test)) return false;
3779 MOZ_ASSERT(test == arr->getSlot(1));
3780 #endif
3781
3782 rval.setObject(*arr);
3783 return true;
3784 }
3785
3786 #ifdef DEBUG
CallIsStringOptimizable(JSContext * cx,const char * name,bool * result)3787 static bool CallIsStringOptimizable(JSContext* cx, const char* name,
3788 bool* result) {
3789 FixedInvokeArgs<0> args(cx);
3790
3791 RootedValue rval(cx);
3792 if (!CallSelfHostedFunction(cx, name, UndefinedHandleValue, args, &rval))
3793 return false;
3794
3795 *result = rval.toBoolean();
3796 return true;
3797 }
3798 #endif
3799
FlatStringMatch(JSContext * cx,unsigned argc,Value * vp)3800 bool js::FlatStringMatch(JSContext* cx, unsigned argc, Value* vp) {
3801 CallArgs args = CallArgsFromVp(argc, vp);
3802 MOZ_ASSERT(args.length() == 2);
3803 MOZ_ASSERT(args[0].isString());
3804 MOZ_ASSERT(args[1].isString());
3805 #ifdef DEBUG
3806 bool isOptimizable = false;
3807 if (!CallIsStringOptimizable(cx, "IsStringMatchOptimizable", &isOptimizable))
3808 return false;
3809 MOZ_ASSERT(isOptimizable);
3810 #endif
3811
3812 RootedString str(cx, args[0].toString());
3813 RootedString pattern(cx, args[1].toString());
3814
3815 bool isFlat = false;
3816 int32_t match = 0;
3817 if (!FlatStringMatchHelper(cx, str, pattern, &isFlat, &match)) return false;
3818
3819 if (!isFlat) {
3820 args.rval().setUndefined();
3821 return true;
3822 }
3823
3824 return BuildFlatMatchArray(cx, str, pattern, match, args.rval());
3825 }
3826
FlatStringSearch(JSContext * cx,unsigned argc,Value * vp)3827 bool js::FlatStringSearch(JSContext* cx, unsigned argc, Value* vp) {
3828 CallArgs args = CallArgsFromVp(argc, vp);
3829 MOZ_ASSERT(args.length() == 2);
3830 MOZ_ASSERT(args[0].isString());
3831 MOZ_ASSERT(args[1].isString());
3832 #ifdef DEBUG
3833 bool isOptimizable = false;
3834 if (!CallIsStringOptimizable(cx, "IsStringSearchOptimizable", &isOptimizable))
3835 return false;
3836 MOZ_ASSERT(isOptimizable);
3837 #endif
3838
3839 RootedString str(cx, args[0].toString());
3840 RootedString pattern(cx, args[1].toString());
3841
3842 bool isFlat = false;
3843 int32_t match = 0;
3844 if (!FlatStringMatchHelper(cx, str, pattern, &isFlat, &match)) return false;
3845
3846 if (!isFlat) {
3847 args.rval().setInt32(-2);
3848 return true;
3849 }
3850
3851 args.rval().setInt32(match);
3852 return true;
3853 }
3854