1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "vm/StringType-inl.h"
8
9 #include "mozilla/ArrayUtils.h"
10 #include "mozilla/DebugOnly.h"
11 #include "mozilla/HashFunctions.h"
12 #include "mozilla/Latin1.h"
13 #include "mozilla/MathAlgorithms.h"
14 #include "mozilla/MemoryReporting.h"
15 #include "mozilla/PodOperations.h"
16 #include "mozilla/RangedPtr.h"
17 #include "mozilla/TextUtils.h"
18 #include "mozilla/Utf8.h"
19 #include "mozilla/Vector.h"
20
21 #include <algorithm> // std::{all_of,copy_n,enable_if,is_const,move}
22 #include <iterator> // std::size
23 #include <type_traits> // std::is_same, std::is_unsigned
24
25 #include "jsfriendapi.h"
26
27 #include "builtin/Boolean.h"
28 #ifdef ENABLE_RECORD_TUPLE
29 # include "builtin/RecordObject.h"
30 #endif
31 #include "frontend/BytecodeCompiler.h"
32 #include "gc/MaybeRooted.h"
33 #include "gc/Nursery.h"
34 #include "js/CharacterEncoding.h"
35 #include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_*
36 #include "js/PropertyAndElement.h" // JS_DefineElement
37 #include "js/StableStringChars.h"
38 #include "js/Symbol.h"
39 #include "js/UbiNode.h"
40 #include "util/StringBuffer.h"
41 #include "util/Unicode.h"
42 #include "vm/GeckoProfiler.h"
43 #include "vm/StaticStrings.h"
44 #include "vm/ToSource.h" // js::ValueToSource
45
46 #include "gc/Marking-inl.h"
47 #include "vm/GeckoProfiler-inl.h"
48 #include "vm/JSContext-inl.h"
49 #include "vm/JSObject-inl.h"
50 #include "vm/Realm-inl.h"
51 #ifdef ENABLE_RECORD_TUPLE
52 # include "vm/RecordType.h"
53 # include "vm/TupleType.h"
54 #endif
55
56 using namespace js;
57
58 using mozilla::ArrayEqual;
59 using mozilla::AsWritableChars;
60 using mozilla::ConvertLatin1toUtf16;
61 using mozilla::IsAsciiDigit;
62 using mozilla::IsUtf16Latin1;
63 using mozilla::LossyConvertUtf16toLatin1;
64 using mozilla::PodCopy;
65 using mozilla::RangedPtr;
66 using mozilla::RoundUpPow2;
67 using mozilla::Span;
68
69 using JS::AutoCheckCannotGC;
70 using JS::AutoStableStringChars;
71
72 using UniqueLatin1Chars = UniquePtr<Latin1Char[], JS::FreePolicy>;
73
sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf)74 size_t JSString::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) {
75 // JSRope: do nothing, we'll count all children chars when we hit the leaf
76 // strings.
77 if (isRope()) {
78 return 0;
79 }
80
81 MOZ_ASSERT(isLinear());
82
83 // JSDependentString: do nothing, we'll count the chars when we hit the base
84 // string.
85 if (isDependent()) {
86 return 0;
87 }
88
89 // JSExternalString: Ask the embedding to tell us what's going on.
90 if (isExternal()) {
91 // Our callback isn't supposed to cause GC.
92 JS::AutoSuppressGCAnalysis nogc;
93 return asExternal().callbacks()->sizeOfBuffer(asExternal().twoByteChars(),
94 mallocSizeOf);
95 }
96
97 // JSExtensibleString: count the full capacity, not just the used space.
98 if (isExtensible()) {
99 JSExtensibleString& extensible = asExtensible();
100 return extensible.hasLatin1Chars()
101 ? mallocSizeOf(extensible.rawLatin1Chars())
102 : mallocSizeOf(extensible.rawTwoByteChars());
103 }
104
105 // JSInlineString, JSFatInlineString [JSInlineAtom, JSFatInlineAtom]: the
106 // chars are inline.
107 if (isInline()) {
108 return 0;
109 }
110
111 // Everything else: measure the space for the chars.
112 JSLinearString& linear = asLinear();
113 MOZ_ASSERT(linear.ownsMallocedChars());
114 return linear.hasLatin1Chars() ? mallocSizeOf(linear.rawLatin1Chars())
115 : mallocSizeOf(linear.rawTwoByteChars());
116 }
117
size(mozilla::MallocSizeOf mallocSizeOf) const118 JS::ubi::Node::Size JS::ubi::Concrete<JSString>::size(
119 mozilla::MallocSizeOf mallocSizeOf) const {
120 JSString& str = get();
121 size_t size;
122 if (str.isAtom()) {
123 size =
124 str.isFatInline() ? sizeof(js::FatInlineAtom) : sizeof(js::NormalAtom);
125 } else {
126 size = str.isFatInline() ? sizeof(JSFatInlineString) : sizeof(JSString);
127 }
128
129 if (IsInsideNursery(&str)) {
130 size += Nursery::nurseryCellHeaderSize();
131 }
132
133 size += str.sizeOfExcludingThis(mallocSizeOf);
134
135 return size;
136 }
137
138 const char16_t JS::ubi::Concrete<JSString>::concreteTypeName[] = u"JSString";
139
encodeUTF8Partial(const JS::AutoRequireNoGC & nogc,mozilla::Span<char> buffer) const140 mozilla::Maybe<mozilla::Tuple<size_t, size_t> > JSString::encodeUTF8Partial(
141 const JS::AutoRequireNoGC& nogc, mozilla::Span<char> buffer) const {
142 mozilla::Vector<const JSString*, 16, SystemAllocPolicy> stack;
143 const JSString* current = this;
144 char16_t pendingLeadSurrogate = 0; // U+0000 means no pending lead surrogate
145 size_t totalRead = 0;
146 size_t totalWritten = 0;
147 for (;;) {
148 if (current->isRope()) {
149 JSRope& rope = current->asRope();
150 if (!stack.append(rope.rightChild())) {
151 // OOM
152 return mozilla::Nothing();
153 }
154 current = rope.leftChild();
155 continue;
156 }
157
158 JSLinearString& linear = current->asLinear();
159 if (MOZ_LIKELY(linear.hasLatin1Chars())) {
160 if (MOZ_UNLIKELY(pendingLeadSurrogate)) {
161 if (buffer.Length() < 3) {
162 return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
163 }
164 buffer[0] = '\xEF';
165 buffer[1] = '\xBF';
166 buffer[2] = '\xBD';
167 buffer = buffer.From(3);
168 totalRead += 1; // pendingLeadSurrogate
169 totalWritten += 3;
170 pendingLeadSurrogate = 0;
171 }
172 auto src = mozilla::AsChars(
173 mozilla::Span(linear.latin1Chars(nogc), linear.length()));
174 size_t read;
175 size_t written;
176 mozilla::Tie(read, written) =
177 mozilla::ConvertLatin1toUtf8Partial(src, buffer);
178 buffer = buffer.From(written);
179 totalRead += read;
180 totalWritten += written;
181 if (read < src.Length()) {
182 return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
183 }
184 } else {
185 auto src = mozilla::Span(linear.twoByteChars(nogc), linear.length());
186 if (MOZ_UNLIKELY(pendingLeadSurrogate)) {
187 char16_t first = 0;
188 if (!src.IsEmpty()) {
189 first = src[0];
190 }
191 if (unicode::IsTrailSurrogate(first)) {
192 // Got a surrogate pair
193 if (buffer.Length() < 4) {
194 return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
195 }
196 uint32_t astral = unicode::UTF16Decode(pendingLeadSurrogate, first);
197 buffer[0] = char(0b1111'0000 | (astral >> 18));
198 buffer[1] = char(0b1000'0000 | ((astral >> 12) & 0b11'1111));
199 buffer[2] = char(0b1000'0000 | ((astral >> 6) & 0b11'1111));
200 buffer[3] = char(0b1000'0000 | (astral & 0b11'1111));
201 src = src.From(1);
202 buffer = buffer.From(4);
203 totalRead += 2; // both pendingLeadSurrogate and first!
204 totalWritten += 4;
205 } else {
206 // unpaired surrogate
207 if (buffer.Length() < 3) {
208 return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
209 }
210 buffer[0] = '\xEF';
211 buffer[1] = '\xBF';
212 buffer[2] = '\xBD';
213 buffer = buffer.From(3);
214 totalRead += 1; // pendingLeadSurrogate
215 totalWritten += 3;
216 }
217 pendingLeadSurrogate = 0;
218 }
219 if (src.IsEmpty()) {
220 return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
221 }
222 char16_t last = src[src.Length() - 1];
223 if (unicode::IsLeadSurrogate(last)) {
224 src = src.To(src.Length() - 1);
225 pendingLeadSurrogate = last;
226 } else {
227 MOZ_ASSERT(!pendingLeadSurrogate);
228 }
229 size_t read;
230 size_t written;
231 mozilla::Tie(read, written) =
232 mozilla::ConvertUtf16toUtf8Partial(src, buffer);
233 buffer = buffer.From(written);
234 totalRead += read;
235 totalWritten += written;
236 if (read < src.Length()) {
237 return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
238 }
239 }
240 if (stack.empty()) {
241 break;
242 }
243 current = stack.popCopy();
244 }
245 if (MOZ_UNLIKELY(pendingLeadSurrogate)) {
246 if (buffer.Length() < 3) {
247 return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
248 }
249 buffer[0] = '\xEF';
250 buffer[1] = '\xBF';
251 buffer[2] = '\xBD';
252 // No need to update buffer and pendingLeadSurrogate anymore
253 totalRead += 1;
254 totalWritten += 3;
255 }
256 return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
257 }
258
259 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
260
261 template <typename CharT>
262 /*static */
dumpChars(const CharT * s,size_t n,js::GenericPrinter & out)263 void JSString::dumpChars(const CharT* s, size_t n, js::GenericPrinter& out) {
264 if (n == SIZE_MAX) {
265 n = 0;
266 while (s[n]) {
267 n++;
268 }
269 }
270
271 out.put("\"");
272 dumpCharsNoQuote(s, n, out);
273 out.putChar('"');
274 }
275
276 template void JSString::dumpChars(const Latin1Char* s, size_t n,
277 js::GenericPrinter& out);
278
279 template void JSString::dumpChars(const char16_t* s, size_t n,
280 js::GenericPrinter& out);
281
282 template <typename CharT>
283 /*static */
dumpCharsNoQuote(const CharT * s,size_t n,js::GenericPrinter & out)284 void JSString::dumpCharsNoQuote(const CharT* s, size_t n,
285 js::GenericPrinter& out) {
286 for (size_t i = 0; i < n; i++) {
287 char16_t c = s[i];
288 if (c == '\n') {
289 out.put("\\n");
290 } else if (c == '\t') {
291 out.put("\\t");
292 } else if (c >= 32 && c < 127) {
293 out.putChar((char)s[i]);
294 } else if (c <= 255) {
295 out.printf("\\x%02x", unsigned(c));
296 } else {
297 out.printf("\\u%04x", unsigned(c));
298 }
299 }
300 }
301
302 template void JSString::dumpCharsNoQuote(const Latin1Char* s, size_t n,
303 js::GenericPrinter& out);
304
305 template void JSString::dumpCharsNoQuote(const char16_t* s, size_t n,
306 js::GenericPrinter& out);
307
dumpCharsNoNewline(js::GenericPrinter & out)308 void JSString::dumpCharsNoNewline(js::GenericPrinter& out) {
309 if (JSLinearString* linear = ensureLinear(nullptr)) {
310 AutoCheckCannotGC nogc;
311 if (hasLatin1Chars()) {
312 out.put("[Latin 1]");
313 dumpChars(linear->latin1Chars(nogc), length(), out);
314 } else {
315 out.put("[2 byte]");
316 dumpChars(linear->twoByteChars(nogc), length(), out);
317 }
318 } else {
319 out.put("(oom in JSString::dumpCharsNoNewline)");
320 }
321 }
322
dumpCharsNoQuote(js::GenericPrinter & out)323 void JSString::dumpCharsNoQuote(js::GenericPrinter& out) {
324 if (JSLinearString* linear = ensureLinear(nullptr)) {
325 AutoCheckCannotGC nogc;
326 if (hasLatin1Chars()) {
327 dumpCharsNoQuote(linear->latin1Chars(nogc), length(), out);
328 } else {
329 dumpCharsNoQuote(linear->twoByteChars(nogc), length(), out);
330 }
331 } else {
332 out.put("(oom in JSString::dumpCharsNoNewline)");
333 }
334 }
335
dump()336 void JSString::dump() {
337 js::Fprinter out(stderr);
338 dump(out);
339 }
340
dump(js::GenericPrinter & out)341 void JSString::dump(js::GenericPrinter& out) {
342 dumpNoNewline(out);
343 out.putChar('\n');
344 }
345
dumpNoNewline(js::GenericPrinter & out)346 void JSString::dumpNoNewline(js::GenericPrinter& out) {
347 if (JSLinearString* linear = ensureLinear(nullptr)) {
348 AutoCheckCannotGC nogc;
349 if (hasLatin1Chars()) {
350 const Latin1Char* chars = linear->latin1Chars(nogc);
351 out.printf("JSString* (%p) = Latin1Char * (%p) = ", (void*)this,
352 (void*)chars);
353 dumpChars(chars, length(), out);
354 } else {
355 const char16_t* chars = linear->twoByteChars(nogc);
356 out.printf("JSString* (%p) = char16_t * (%p) = ", (void*)this,
357 (void*)chars);
358 dumpChars(chars, length(), out);
359 }
360 } else {
361 out.put("(oom in JSString::dump)");
362 }
363 }
364
dumpRepresentation(js::GenericPrinter & out,int indent) const365 void JSString::dumpRepresentation(js::GenericPrinter& out, int indent) const {
366 if (isRope()) {
367 asRope().dumpRepresentation(out, indent);
368 } else if (isDependent()) {
369 asDependent().dumpRepresentation(out, indent);
370 } else if (isExternal()) {
371 asExternal().dumpRepresentation(out, indent);
372 } else if (isExtensible()) {
373 asExtensible().dumpRepresentation(out, indent);
374 } else if (isInline()) {
375 asInline().dumpRepresentation(out, indent);
376 } else if (isLinear()) {
377 asLinear().dumpRepresentation(out, indent);
378 } else {
379 MOZ_CRASH("Unexpected JSString representation");
380 }
381 }
382
dumpRepresentationHeader(js::GenericPrinter & out,const char * subclass) const383 void JSString::dumpRepresentationHeader(js::GenericPrinter& out,
384 const char* subclass) const {
385 uint32_t flags = JSString::flags();
386 // Print the string's address as an actual C++ expression, to facilitate
387 // copy-and-paste into a debugger.
388 out.printf("((%s*) %p) length: %zu flags: 0x%x", subclass, this, length(),
389 flags);
390 if (flags & LINEAR_BIT) out.put(" LINEAR");
391 if (flags & DEPENDENT_BIT) out.put(" DEPENDENT");
392 if (flags & INLINE_CHARS_BIT) out.put(" INLINE_CHARS");
393 if (flags & ATOM_BIT)
394 out.put(" ATOM");
395 else
396 out.put(" (NON ATOM)");
397 if (isPermanentAtom()) out.put(" PERMANENT");
398 if (flags & LATIN1_CHARS_BIT) out.put(" LATIN1");
399 if (flags & INDEX_VALUE_BIT) out.printf(" INDEX_VALUE(%u)", getIndexValue());
400 if (!isTenured()) out.put(" NURSERY");
401 out.putChar('\n');
402 }
403
dumpRepresentationChars(js::GenericPrinter & out,int indent) const404 void JSLinearString::dumpRepresentationChars(js::GenericPrinter& out,
405 int indent) const {
406 if (hasLatin1Chars()) {
407 out.printf("%*schars: ((Latin1Char*) %p) ", indent, "", rawLatin1Chars());
408 dumpChars(rawLatin1Chars(), length(), out);
409 } else {
410 out.printf("%*schars: ((char16_t*) %p) ", indent, "", rawTwoByteChars());
411 dumpChars(rawTwoByteChars(), length(), out);
412 }
413 out.putChar('\n');
414 }
415
equals(const char * s)416 bool JSString::equals(const char* s) {
417 JSLinearString* linear = ensureLinear(nullptr);
418 if (!linear) {
419 // This is DEBUG-only code.
420 fprintf(stderr, "OOM in JSString::equals!\n");
421 return false;
422 }
423
424 return StringEqualsAscii(linear, s);
425 }
426 #endif /* defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) */
427
428 template <typename CharT>
AllocChars(JSString * str,size_t length,CharT ** chars,size_t * capacity)429 static MOZ_ALWAYS_INLINE bool AllocChars(JSString* str, size_t length,
430 CharT** chars, size_t* capacity) {
431 /*
432 * Grow by 12.5% if the buffer is very large. Otherwise, round up to the
433 * next power of 2. This is similar to what we do with arrays; see
434 * JSObject::ensureDenseArrayElements.
435 */
436 static const size_t DOUBLING_MAX = 1024 * 1024;
437 *capacity =
438 length > DOUBLING_MAX ? length + (length / 8) : RoundUpPow2(length);
439
440 static_assert(JSString::MAX_LENGTH * sizeof(CharT) <= UINT32_MAX);
441 *chars =
442 str->zone()->pod_arena_malloc<CharT>(js::StringBufferArena, *capacity);
443 return *chars != nullptr;
444 }
445
copyLatin1Chars(JSContext * maybecx,arena_id_t destArenaId) const446 UniqueLatin1Chars JSRope::copyLatin1Chars(JSContext* maybecx,
447 arena_id_t destArenaId) const {
448 return copyCharsInternal<Latin1Char>(maybecx, destArenaId);
449 }
450
copyTwoByteChars(JSContext * maybecx,arena_id_t destArenaId) const451 UniqueTwoByteChars JSRope::copyTwoByteChars(JSContext* maybecx,
452 arena_id_t destArenaId) const {
453 return copyCharsInternal<char16_t>(maybecx, destArenaId);
454 }
455
456 template <typename CharT>
copyCharsInternal(JSContext * maybecx,arena_id_t destArenaId) const457 UniquePtr<CharT[], JS::FreePolicy> JSRope::copyCharsInternal(
458 JSContext* maybecx, arena_id_t destArenaId) const {
459 // Left-leaning ropes are far more common than right-leaning ropes, so
460 // perform a non-destructive traversal of the rope, right node first,
461 // splatting each node's characters into a contiguous buffer.
462
463 size_t n = length();
464
465 UniquePtr<CharT[], JS::FreePolicy> out;
466 if (maybecx) {
467 out.reset(maybecx->pod_arena_malloc<CharT>(destArenaId, n));
468 } else {
469 out.reset(js_pod_arena_malloc<CharT>(destArenaId, n));
470 }
471
472 if (!out) {
473 return nullptr;
474 }
475
476 Vector<const JSString*, 8, SystemAllocPolicy> nodeStack;
477 const JSString* str = this;
478 CharT* end = out.get() + str->length();
479 while (true) {
480 if (str->isRope()) {
481 if (!nodeStack.append(str->asRope().leftChild())) {
482 if (maybecx) {
483 ReportOutOfMemory(maybecx);
484 }
485 return nullptr;
486 }
487 str = str->asRope().rightChild();
488 } else {
489 end -= str->length();
490 CopyChars(end, str->asLinear());
491 if (nodeStack.empty()) {
492 break;
493 }
494 str = nodeStack.popCopy();
495 }
496 }
497 MOZ_ASSERT(end == out.get());
498
499 return out;
500 }
501
502 template <typename CharT>
AddStringToHash(uint32_t * hash,const CharT * chars,size_t len)503 void AddStringToHash(uint32_t* hash, const CharT* chars, size_t len) {
504 // It's tempting to use |HashString| instead of this loop, but that's
505 // slightly different than our existing implementation for non-ropes. We
506 // want to pretend we have a contiguous set of chars so we need to
507 // accumulate char by char rather than generate a new hash for substring
508 // and then accumulate that.
509 for (size_t i = 0; i < len; i++) {
510 *hash = mozilla::AddToHash(*hash, chars[i]);
511 }
512 }
513
AddStringToHash(uint32_t * hash,const JSString * str)514 void AddStringToHash(uint32_t* hash, const JSString* str) {
515 AutoCheckCannotGC nogc;
516 const auto& s = str->asLinear();
517 if (s.hasLatin1Chars()) {
518 AddStringToHash(hash, s.latin1Chars(nogc), s.length());
519 } else {
520 AddStringToHash(hash, s.twoByteChars(nogc), s.length());
521 }
522 }
523
hash(uint32_t * outHash) const524 bool JSRope::hash(uint32_t* outHash) const {
525 Vector<const JSString*, 8, SystemAllocPolicy> nodeStack;
526 const JSString* str = this;
527
528 *outHash = 0;
529
530 while (true) {
531 if (str->isRope()) {
532 if (!nodeStack.append(str->asRope().rightChild())) {
533 return false;
534 }
535 str = str->asRope().leftChild();
536 } else {
537 AddStringToHash(outHash, str);
538 if (nodeStack.empty()) {
539 break;
540 }
541 str = nodeStack.popCopy();
542 }
543 }
544
545 return true;
546 }
547
548 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
dumpRepresentation(js::GenericPrinter & out,int indent) const549 void JSRope::dumpRepresentation(js::GenericPrinter& out, int indent) const {
550 dumpRepresentationHeader(out, "JSRope");
551 indent += 2;
552
553 out.printf("%*sleft: ", indent, "");
554 leftChild()->dumpRepresentation(out, indent);
555
556 out.printf("%*sright: ", indent, "");
557 rightChild()->dumpRepresentation(out, indent);
558 }
559 #endif
560
561 namespace js {
562
563 template <>
CopyChars(char16_t * dest,const JSLinearString & str)564 void CopyChars(char16_t* dest, const JSLinearString& str) {
565 AutoCheckCannotGC nogc;
566 if (str.hasTwoByteChars()) {
567 PodCopy(dest, str.twoByteChars(nogc), str.length());
568 } else {
569 CopyAndInflateChars(dest, str.latin1Chars(nogc), str.length());
570 }
571 }
572
573 template <>
CopyChars(Latin1Char * dest,const JSLinearString & str)574 void CopyChars(Latin1Char* dest, const JSLinearString& str) {
575 AutoCheckCannotGC nogc;
576 if (str.hasLatin1Chars()) {
577 PodCopy(dest, str.latin1Chars(nogc), str.length());
578 } else {
579 /*
580 * When we flatten a TwoByte rope, we turn child ropes (including Latin1
581 * ropes) into TwoByte dependent strings. If one of these strings is
582 * also part of another Latin1 rope tree, we can have a Latin1 rope with
583 * a TwoByte descendent and we end up here when we flatten it. Although
584 * the chars are stored as TwoByte, we know they must be in the Latin1
585 * range, so we can safely deflate here.
586 */
587 size_t len = str.length();
588 const char16_t* chars = str.twoByteChars(nogc);
589 auto src = Span(chars, len);
590 MOZ_ASSERT(IsUtf16Latin1(src));
591 LossyConvertUtf16toLatin1(src, AsWritableChars(Span(dest, len)));
592 }
593 }
594
595 } /* namespace js */
596
597 template <typename CharT>
StringFlagsForCharType(uint32_t baseFlags)598 static constexpr uint32_t StringFlagsForCharType(uint32_t baseFlags) {
599 if constexpr (std::is_same_v<CharT, char16_t>) {
600 return baseFlags;
601 }
602
603 return baseFlags | JSString::LATIN1_CHARS_BIT;
604 }
605
UpdateNurseryBuffersOnTransfer(js::Nursery & nursery,JSString * from,JSString * to,void * buffer,size_t size)606 static bool UpdateNurseryBuffersOnTransfer(js::Nursery& nursery, JSString* from,
607 JSString* to, void* buffer,
608 size_t size) {
609 // Update the list of buffers associated with nursery cells when |buffer| is
610 // moved from string |from| to string |to|, depending on whether those strings
611 // are in the nursery or not.
612
613 if (from->isTenured() && !to->isTenured()) {
614 // Tenured leftmost child is giving its chars buffer to the
615 // nursery-allocated root node.
616 if (!nursery.registerMallocedBuffer(buffer, size)) {
617 return false;
618 }
619 } else if (!from->isTenured() && to->isTenured()) {
620 // Leftmost child is giving its nursery-held chars buffer to a
621 // tenured string.
622 nursery.removeMallocedBuffer(buffer, size);
623 }
624
625 return true;
626 }
627
CanReuseLeftmostBuffer(JSString * leftmostChild,size_t wholeLength,bool hasTwoByteChars)628 static bool CanReuseLeftmostBuffer(JSString* leftmostChild, size_t wholeLength,
629 bool hasTwoByteChars) {
630 if (!leftmostChild->isExtensible()) {
631 return false;
632 }
633
634 JSExtensibleString& str = leftmostChild->asExtensible();
635 return str.capacity() >= wholeLength &&
636 str.hasTwoByteChars() == hasTwoByteChars;
637 }
638
flatten(JSContext * maybecx)639 JSLinearString* JSRope::flatten(JSContext* maybecx) {
640 mozilla::Maybe<AutoGeckoProfilerEntry> entry;
641 if (maybecx && !maybecx->isHelperThreadContext()) {
642 entry.emplace(maybecx, "JSRope::flatten");
643 }
644
645 JSLinearString* str = flattenInternal();
646 if (!str && maybecx) {
647 ReportOutOfMemory(maybecx);
648 }
649
650 return str;
651 }
652
flattenInternal()653 JSLinearString* JSRope::flattenInternal() {
654 if (zone()->needsIncrementalBarrier()) {
655 return flattenInternal<WithIncrementalBarrier>();
656 }
657
658 return flattenInternal<NoBarrier>();
659 }
660
661 template <JSRope::UsingBarrier usingBarrier>
flattenInternal()662 JSLinearString* JSRope::flattenInternal() {
663 if (hasTwoByteChars()) {
664 return flattenInternal<usingBarrier, char16_t>(this);
665 }
666
667 return flattenInternal<usingBarrier, Latin1Char>(this);
668 }
669
670 template <JSRope::UsingBarrier usingBarrier, typename CharT>
671 /* static */
flattenInternal(JSRope * root)672 JSLinearString* JSRope::flattenInternal(JSRope* root) {
673 /*
674 * Consider the DAG of JSRopes rooted at |root|, with non-JSRopes as
675 * its leaves. Mutate the root JSRope into a JSExtensibleString containing
676 * the full flattened text that the root represents, and mutate all other
677 * JSRopes in the interior of the DAG into JSDependentStrings that refer to
678 * this new JSExtensibleString.
679 *
680 * If the leftmost leaf of our DAG is a JSExtensibleString, consider
681 * stealing its buffer for use in our new root, and transforming it into a
682 * JSDependentString too. Do not mutate any of the other leaves.
683 *
684 * Perform a depth-first dag traversal, splatting each node's characters
685 * into a contiguous buffer. Visit each rope node three times:
686 * 1. record position in the buffer and recurse into left child;
687 * 2. recurse into the right child;
688 * 3. transform the node into a dependent string.
689 * To avoid maintaining a stack, tree nodes are mutated to indicate how many
690 * times they have been visited. Since ropes can be dags, a node may be
691 * encountered multiple times during traversal. However, step 3 above leaves
692 * a valid dependent string, so everything works out.
693 *
694 * While ropes avoid all sorts of quadratic cases with string concatenation,
695 * they can't help when ropes are immediately flattened. One idiomatic case
696 * that we'd like to keep linear (and has traditionally been linear in SM
697 * and other JS engines) is:
698 *
699 * while (...) {
700 * s += ...
701 * s.flatten
702 * }
703 *
704 * Two behaviors accomplish this:
705 *
706 * - When the leftmost non-rope in the DAG we're flattening is a
707 * JSExtensibleString with sufficient capacity to hold the entire
708 * flattened string, we just flatten the DAG into its buffer. Then, when
709 * we transform the root of the DAG from a JSRope into a
710 * JSExtensibleString, we steal that buffer, and change the victim from a
711 * JSExtensibleString to a JSDependentString. In this case, the left-hand
712 * side of the string never needs to be copied.
713 *
714 * - Otherwise, we round up the total flattened size and create a fresh
715 * JSExtensibleString with that much capacity. If this in turn becomes the
716 * leftmost leaf of a subsequent flatten, we will hopefully be able to
717 * fill it, as in the case above.
718 *
719 * Note that, even though the code for creating JSDependentStrings avoids
720 * creating dependents of dependents, we can create that situation here: the
721 * JSExtensibleStrings we transform into JSDependentStrings might have
722 * JSDependentStrings pointing to them already. Stealing the buffer doesn't
723 * change its address, only its owning JSExtensibleString, so all chars()
724 * pointers in the JSDependentStrings are still valid.
725 */
726 const size_t wholeLength = root->length();
727 size_t wholeCapacity;
728 CharT* wholeChars;
729
730 AutoCheckCannotGC nogc;
731
732 Nursery& nursery = root->runtimeFromMainThread()->gc.nursery();
733
734 /* Find the left most string, containing the first string. */
735 JSRope* leftmostRope = root;
736 while (leftmostRope->leftChild()->isRope()) {
737 leftmostRope = &leftmostRope->leftChild()->asRope();
738 }
739 JSString* leftmostChild = leftmostRope->leftChild();
740
741 bool reuseLeftmostBuffer = CanReuseLeftmostBuffer(
742 leftmostChild, wholeLength, std::is_same_v<CharT, char16_t>);
743
744 if (reuseLeftmostBuffer) {
745 JSExtensibleString& left = leftmostChild->asExtensible();
746 wholeCapacity = left.capacity();
747 wholeChars = const_cast<CharT*>(left.nonInlineChars<CharT>(nogc));
748
749 // Nursery::registerMallocedBuffer is fallible, so attempt it first before
750 // doing anything irreversible.
751 if (!UpdateNurseryBuffersOnTransfer(nursery, &left, root, wholeChars,
752 wholeCapacity * sizeof(CharT))) {
753 return nullptr;
754 }
755 } else {
756 // If we can't reuse the leftmost child's buffer, allocate a new one.
757 if (!AllocChars(root, wholeLength, &wholeChars, &wholeCapacity)) {
758 return nullptr;
759 }
760
761 if (!root->isTenured()) {
762 if (!nursery.registerMallocedBuffer(wholeChars,
763 wholeCapacity * sizeof(CharT))) {
764 js_free(wholeChars);
765 return nullptr;
766 }
767 }
768 }
769
770 JSRope* str = root;
771 CharT* pos = wholeChars;
772
773 JSRope* parent = nullptr;
774 uint32_t parentFlag = 0;
775
776 first_visit_node : {
777 MOZ_ASSERT_IF(str != root, parent && parentFlag);
778 MOZ_ASSERT(!str->asRope().isBeingFlattened());
779
780 ropeBarrierDuringFlattening<usingBarrier>(str);
781
782 JSString& left = *str->d.s.u2.left;
783 str->d.s.u2.parent = parent;
784 str->setFlagBit(parentFlag);
785 parent = nullptr;
786 parentFlag = 0;
787
788 if (left.isRope()) {
789 /* Return to this node when 'left' done, then goto visit_right_child. */
790 parent = str;
791 parentFlag = FLATTEN_VISIT_RIGHT;
792 str = &left.asRope();
793 goto first_visit_node;
794 }
795 if (!(reuseLeftmostBuffer && &left == leftmostChild)) {
796 CopyChars(pos, left.asLinear());
797 }
798 pos += left.length();
799 }
800
801 visit_right_child : {
802 JSString& right = *str->d.s.u3.right;
803 if (right.isRope()) {
804 /* Return to this node when 'right' done, then goto finish_node. */
805 parent = str;
806 parentFlag = FLATTEN_FINISH_NODE;
807 str = &right.asRope();
808 goto first_visit_node;
809 }
810 CopyChars(pos, right.asLinear());
811 pos += right.length();
812 }
813
814 finish_node : {
815 if (str == root) {
816 goto finish_root;
817 }
818
819 MOZ_ASSERT(pos >= wholeChars);
820 CharT* chars = pos - str->length();
821 JSRope* strParent = str->d.s.u2.parent;
822 str->setNonInlineChars(chars);
823
824 MOZ_ASSERT(str->asRope().isBeingFlattened());
825 mozilla::DebugOnly<bool> visitRight = str->flags() & FLATTEN_VISIT_RIGHT;
826 bool finishNode = str->flags() & FLATTEN_FINISH_NODE;
827 MOZ_ASSERT(visitRight != finishNode);
828
829 // This also clears the flags related to flattening.
830 str->setLengthAndFlags(str->length(),
831 StringFlagsForCharType<CharT>(INIT_DEPENDENT_FLAGS));
832 str->d.s.u3.base =
833 reinterpret_cast<JSLinearString*>(root); /* will be true on exit */
834
835 // Every interior (rope) node in the rope's tree will be visited during
836 // the traversal and post-barriered here, so earlier additions of
837 // dependent.base -> root pointers are handled by this barrier as well.
838 //
839 // The only time post-barriers need do anything is when the root is in
840 // the nursery. Note that the root was a rope but will be an extensible
841 // string when we return, so it will not point to any strings and need
842 // not be barriered.
843 if (str->isTenured() && !root->isTenured()) {
844 root->storeBuffer()->putWholeCell(str);
845 }
846
847 str = strParent;
848 if (finishNode) {
849 goto finish_node;
850 }
851 MOZ_ASSERT(visitRight);
852 goto visit_right_child;
853 }
854
855 finish_root:
856 // We traversed all the way back up to the root so we're finished.
857 MOZ_ASSERT(str == root);
858 MOZ_ASSERT(pos == wholeChars + wholeLength);
859
860 root->setLengthAndFlags(wholeLength,
861 StringFlagsForCharType<CharT>(EXTENSIBLE_FLAGS));
862 root->setNonInlineChars(wholeChars);
863 root->d.s.u3.capacity = wholeCapacity;
864 AddCellMemory(root, root->asLinear().allocSize(), MemoryUse::StringContents);
865
866 if (reuseLeftmostBuffer) {
867 // Remove memory association for left node we're about to make into a
868 // dependent string.
869 JSString& left = *leftmostChild;
870 RemoveCellMemory(&left, left.allocSize(), MemoryUse::StringContents);
871
872 uint32_t flags = INIT_DEPENDENT_FLAGS;
873 if (left.inStringToAtomCache()) {
874 flags |= IN_STRING_TO_ATOM_CACHE;
875 }
876 left.setLengthAndFlags(left.length(), StringFlagsForCharType<CharT>(flags));
877 left.d.s.u3.base = &root->asLinear();
878 if (left.isTenured() && !root->isTenured()) {
879 // leftmost child -> root is a tenured -> nursery edge.
880 root->storeBuffer()->putWholeCell(&left);
881 }
882 }
883
884 return &root->asLinear();
885 }
886
887 template <JSRope::UsingBarrier usingBarrier>
888 /* static */
ropeBarrierDuringFlattening(JSRope * rope)889 inline void JSRope::ropeBarrierDuringFlattening(JSRope* rope) {
890 MOZ_ASSERT(!rope->isBeingFlattened());
891 if constexpr (usingBarrier) {
892 gc::PreWriteBarrierDuringFlattening(rope->leftChild());
893 gc::PreWriteBarrierDuringFlattening(rope->rightChild());
894 }
895 }
896
897 template <AllowGC allowGC>
EnsureLinear(JSContext * cx,typename MaybeRooted<JSString *,allowGC>::HandleType string)898 static JSLinearString* EnsureLinear(
899 JSContext* cx,
900 typename MaybeRooted<JSString*, allowGC>::HandleType string) {
901 JSLinearString* linear = string->ensureLinear(cx);
902 // Don't report an exception if GC is not allowed, just return nullptr.
903 if (!linear && !allowGC) {
904 cx->recoverFromOutOfMemory();
905 }
906 return linear;
907 }
908
909 template <AllowGC allowGC>
ConcatStrings(JSContext * cx,typename MaybeRooted<JSString *,allowGC>::HandleType left,typename MaybeRooted<JSString *,allowGC>::HandleType right,gc::InitialHeap heap)910 JSString* js::ConcatStrings(
911 JSContext* cx, typename MaybeRooted<JSString*, allowGC>::HandleType left,
912 typename MaybeRooted<JSString*, allowGC>::HandleType right,
913 gc::InitialHeap heap) {
914 MOZ_ASSERT_IF(!left->isAtom(), cx->isInsideCurrentZone(left));
915 MOZ_ASSERT_IF(!right->isAtom(), cx->isInsideCurrentZone(right));
916
917 size_t leftLen = left->length();
918 if (leftLen == 0) {
919 return right;
920 }
921
922 size_t rightLen = right->length();
923 if (rightLen == 0) {
924 return left;
925 }
926
927 size_t wholeLength = leftLen + rightLen;
928 if (MOZ_UNLIKELY(wholeLength > JSString::MAX_LENGTH)) {
929 // Don't report an exception if GC is not allowed, just return nullptr.
930 if (allowGC) {
931 js::ReportOversizedAllocation(cx, JSMSG_ALLOC_OVERFLOW);
932 }
933 return nullptr;
934 }
935
936 bool isLatin1 = left->hasLatin1Chars() && right->hasLatin1Chars();
937 bool canUseInline = isLatin1
938 ? JSInlineString::lengthFits<Latin1Char>(wholeLength)
939 : JSInlineString::lengthFits<char16_t>(wholeLength);
940 if (canUseInline) {
941 Latin1Char* latin1Buf = nullptr; // initialize to silence GCC warning
942 char16_t* twoByteBuf = nullptr; // initialize to silence GCC warning
943 JSInlineString* str =
944 isLatin1
945 ? AllocateInlineString<allowGC>(cx, wholeLength, &latin1Buf, heap)
946 : AllocateInlineString<allowGC>(cx, wholeLength, &twoByteBuf, heap);
947 if (!str) {
948 return nullptr;
949 }
950
951 AutoCheckCannotGC nogc;
952 JSLinearString* leftLinear = EnsureLinear<allowGC>(cx, left);
953 if (!leftLinear) {
954 return nullptr;
955 }
956 JSLinearString* rightLinear = EnsureLinear<allowGC>(cx, right);
957 if (!rightLinear) {
958 return nullptr;
959 }
960
961 if (isLatin1) {
962 PodCopy(latin1Buf, leftLinear->latin1Chars(nogc), leftLen);
963 PodCopy(latin1Buf + leftLen, rightLinear->latin1Chars(nogc), rightLen);
964 } else {
965 if (leftLinear->hasTwoByteChars()) {
966 PodCopy(twoByteBuf, leftLinear->twoByteChars(nogc), leftLen);
967 } else {
968 CopyAndInflateChars(twoByteBuf, leftLinear->latin1Chars(nogc), leftLen);
969 }
970 if (rightLinear->hasTwoByteChars()) {
971 PodCopy(twoByteBuf + leftLen, rightLinear->twoByteChars(nogc),
972 rightLen);
973 } else {
974 CopyAndInflateChars(twoByteBuf + leftLen,
975 rightLinear->latin1Chars(nogc), rightLen);
976 }
977 }
978
979 return str;
980 }
981
982 return JSRope::new_<allowGC>(cx, left, right, wholeLength, heap);
983 }
984
985 template JSString* js::ConcatStrings<CanGC>(JSContext* cx, HandleString left,
986 HandleString right,
987 gc::InitialHeap heap);
988
989 template JSString* js::ConcatStrings<NoGC>(JSContext* cx, JSString* const& left,
990 JSString* const& right,
991 gc::InitialHeap heap);
992
993 /**
994 * Copy |src[0..length]| to |dest[0..length]| when copying doesn't narrow and
995 * therefore can't lose information.
996 */
FillChars(char16_t * dest,const unsigned char * src,size_t length)997 static inline void FillChars(char16_t* dest, const unsigned char* src,
998 size_t length) {
999 ConvertLatin1toUtf16(AsChars(Span(src, length)), Span(dest, length));
1000 }
1001
FillChars(char16_t * dest,const char16_t * src,size_t length)1002 static inline void FillChars(char16_t* dest, const char16_t* src,
1003 size_t length) {
1004 PodCopy(dest, src, length);
1005 }
1006
FillChars(unsigned char * dest,const unsigned char * src,size_t length)1007 static inline void FillChars(unsigned char* dest, const unsigned char* src,
1008 size_t length) {
1009 PodCopy(dest, src, length);
1010 }
1011
1012 /**
1013 * Copy |src[0..length]| to |dest[0..length]| when copying *does* narrow, but
1014 * the user guarantees every runtime |src[i]| value can be stored without change
1015 * of value in |dest[i]|.
1016 */
FillFromCompatible(unsigned char * dest,const char16_t * src,size_t length)1017 static inline void FillFromCompatible(unsigned char* dest, const char16_t* src,
1018 size_t length) {
1019 LossyConvertUtf16toLatin1(Span(src, length),
1020 AsWritableChars(Span(dest, length)));
1021 }
1022
1023 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
dumpRepresentation(js::GenericPrinter & out,int indent) const1024 void JSDependentString::dumpRepresentation(js::GenericPrinter& out,
1025 int indent) const {
1026 dumpRepresentationHeader(out, "JSDependentString");
1027 indent += 2;
1028 out.printf("%*soffset: %zu\n", indent, "", baseOffset());
1029 out.printf("%*sbase: ", indent, "");
1030 base()->dumpRepresentation(out, indent);
1031 }
1032 #endif
1033
EqualChars(const JSLinearString * str1,const JSLinearString * str2)1034 bool js::EqualChars(const JSLinearString* str1, const JSLinearString* str2) {
1035 MOZ_ASSERT(str1->length() == str2->length());
1036
1037 size_t len = str1->length();
1038
1039 AutoCheckCannotGC nogc;
1040 if (str1->hasTwoByteChars()) {
1041 if (str2->hasTwoByteChars()) {
1042 return ArrayEqual(str1->twoByteChars(nogc), str2->twoByteChars(nogc),
1043 len);
1044 }
1045
1046 return EqualChars(str2->latin1Chars(nogc), str1->twoByteChars(nogc), len);
1047 }
1048
1049 if (str2->hasLatin1Chars()) {
1050 return ArrayEqual(str1->latin1Chars(nogc), str2->latin1Chars(nogc), len);
1051 }
1052
1053 return EqualChars(str1->latin1Chars(nogc), str2->twoByteChars(nogc), len);
1054 }
1055
HasSubstringAt(JSLinearString * text,JSLinearString * pat,size_t start)1056 bool js::HasSubstringAt(JSLinearString* text, JSLinearString* pat,
1057 size_t start) {
1058 MOZ_ASSERT(start + pat->length() <= text->length());
1059
1060 size_t patLen = pat->length();
1061
1062 AutoCheckCannotGC nogc;
1063 if (text->hasLatin1Chars()) {
1064 const Latin1Char* textChars = text->latin1Chars(nogc) + start;
1065 if (pat->hasLatin1Chars()) {
1066 return ArrayEqual(textChars, pat->latin1Chars(nogc), patLen);
1067 }
1068
1069 return EqualChars(textChars, pat->twoByteChars(nogc), patLen);
1070 }
1071
1072 const char16_t* textChars = text->twoByteChars(nogc) + start;
1073 if (pat->hasTwoByteChars()) {
1074 return ArrayEqual(textChars, pat->twoByteChars(nogc), patLen);
1075 }
1076
1077 return EqualChars(pat->latin1Chars(nogc), textChars, patLen);
1078 }
1079
EqualStrings(JSContext * cx,JSString * str1,JSString * str2,bool * result)1080 bool js::EqualStrings(JSContext* cx, JSString* str1, JSString* str2,
1081 bool* result) {
1082 if (str1 == str2) {
1083 *result = true;
1084 return true;
1085 }
1086
1087 size_t length1 = str1->length();
1088 if (length1 != str2->length()) {
1089 *result = false;
1090 return true;
1091 }
1092
1093 JSLinearString* linear1 = str1->ensureLinear(cx);
1094 if (!linear1) {
1095 return false;
1096 }
1097 JSLinearString* linear2 = str2->ensureLinear(cx);
1098 if (!linear2) {
1099 return false;
1100 }
1101
1102 *result = EqualChars(linear1, linear2);
1103 return true;
1104 }
1105
EqualStrings(const JSLinearString * str1,const JSLinearString * str2)1106 bool js::EqualStrings(const JSLinearString* str1, const JSLinearString* str2) {
1107 if (str1 == str2) {
1108 return true;
1109 }
1110
1111 size_t length1 = str1->length();
1112 if (length1 != str2->length()) {
1113 return false;
1114 }
1115
1116 return EqualChars(str1, str2);
1117 }
1118
CompareChars(const char16_t * s1,size_t len1,JSLinearString * s2)1119 int32_t js::CompareChars(const char16_t* s1, size_t len1, JSLinearString* s2) {
1120 AutoCheckCannotGC nogc;
1121 return s2->hasLatin1Chars()
1122 ? CompareChars(s1, len1, s2->latin1Chars(nogc), s2->length())
1123 : CompareChars(s1, len1, s2->twoByteChars(nogc), s2->length());
1124 }
1125
CompareStringsImpl(const JSLinearString * str1,const JSLinearString * str2)1126 static int32_t CompareStringsImpl(const JSLinearString* str1,
1127 const JSLinearString* str2) {
1128 size_t len1 = str1->length();
1129 size_t len2 = str2->length();
1130
1131 AutoCheckCannotGC nogc;
1132 if (str1->hasLatin1Chars()) {
1133 const Latin1Char* chars1 = str1->latin1Chars(nogc);
1134 return str2->hasLatin1Chars()
1135 ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2)
1136 : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2);
1137 }
1138
1139 const char16_t* chars1 = str1->twoByteChars(nogc);
1140 return str2->hasLatin1Chars()
1141 ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2)
1142 : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2);
1143 }
1144
CompareStrings(JSContext * cx,JSString * str1,JSString * str2,int32_t * result)1145 bool js::CompareStrings(JSContext* cx, JSString* str1, JSString* str2,
1146 int32_t* result) {
1147 MOZ_ASSERT(str1);
1148 MOZ_ASSERT(str2);
1149
1150 if (str1 == str2) {
1151 *result = 0;
1152 return true;
1153 }
1154
1155 JSLinearString* linear1 = str1->ensureLinear(cx);
1156 if (!linear1) {
1157 return false;
1158 }
1159
1160 JSLinearString* linear2 = str2->ensureLinear(cx);
1161 if (!linear2) {
1162 return false;
1163 }
1164
1165 *result = CompareStringsImpl(linear1, linear2);
1166 return true;
1167 }
1168
CompareStrings(const JSLinearString * str1,const JSLinearString * str2)1169 int32_t js::CompareStrings(const JSLinearString* str1,
1170 const JSLinearString* str2) {
1171 MOZ_ASSERT(str1);
1172 MOZ_ASSERT(str2);
1173
1174 if (str1 == str2) {
1175 return 0;
1176 }
1177 return CompareStringsImpl(str1, str2);
1178 }
1179
StringIsAscii(JSLinearString * str)1180 bool js::StringIsAscii(JSLinearString* str) {
1181 JS::AutoCheckCannotGC nogc;
1182 if (str->hasLatin1Chars()) {
1183 return mozilla::IsAscii(
1184 AsChars(Span(str->latin1Chars(nogc), str->length())));
1185 }
1186 return mozilla::IsAscii(Span(str->twoByteChars(nogc), str->length()));
1187 }
1188
StringEqualsAscii(JSLinearString * str,const char * asciiBytes)1189 bool js::StringEqualsAscii(JSLinearString* str, const char* asciiBytes) {
1190 return StringEqualsAscii(str, asciiBytes, strlen(asciiBytes));
1191 }
1192
StringEqualsAscii(JSLinearString * str,const char * asciiBytes,size_t length)1193 bool js::StringEqualsAscii(JSLinearString* str, const char* asciiBytes,
1194 size_t length) {
1195 MOZ_ASSERT(JS::StringIsASCII(Span(asciiBytes, length)));
1196
1197 if (length != str->length()) {
1198 return false;
1199 }
1200
1201 const Latin1Char* latin1 = reinterpret_cast<const Latin1Char*>(asciiBytes);
1202
1203 AutoCheckCannotGC nogc;
1204 return str->hasLatin1Chars()
1205 ? ArrayEqual(latin1, str->latin1Chars(nogc), length)
1206 : EqualChars(latin1, str->twoByteChars(nogc), length);
1207 }
1208
1209 template <typename CharT>
CheckStringIsIndex(const CharT * s,size_t length,uint32_t * indexp)1210 bool js::CheckStringIsIndex(const CharT* s, size_t length, uint32_t* indexp) {
1211 MOZ_ASSERT(length > 0);
1212 MOZ_ASSERT(length <= UINT32_CHAR_BUFFER_LENGTH);
1213 MOZ_ASSERT(IsAsciiDigit(*s),
1214 "caller's fast path must have checked first char");
1215
1216 RangedPtr<const CharT> cp(s, length);
1217 const RangedPtr<const CharT> end(s + length, s, length);
1218
1219 uint32_t index = AsciiDigitToNumber(*cp++);
1220 uint32_t oldIndex = 0;
1221 uint32_t c = 0;
1222
1223 if (index != 0) {
1224 // Consume remaining characters only if the first character isn't '0'.
1225 while (cp < end && IsAsciiDigit(*cp)) {
1226 oldIndex = index;
1227 c = AsciiDigitToNumber(*cp);
1228 index = 10 * index + c;
1229 cp++;
1230 }
1231 }
1232
1233 // It's not an integer index if there are characters after the number.
1234 if (cp != end) {
1235 return false;
1236 }
1237
1238 // Look out for "4294967295" and larger-number strings that fit in
1239 // UINT32_CHAR_BUFFER_LENGTH: only unsigned 32-bit integers less than or equal
1240 // to MAX_ARRAY_INDEX shall pass.
1241 if (oldIndex < MAX_ARRAY_INDEX / 10 ||
1242 (oldIndex == MAX_ARRAY_INDEX / 10 && c <= (MAX_ARRAY_INDEX % 10))) {
1243 MOZ_ASSERT(index <= MAX_ARRAY_INDEX);
1244 *indexp = index;
1245 return true;
1246 }
1247
1248 return false;
1249 }
1250
1251 template bool js::CheckStringIsIndex(const Latin1Char* s, size_t length,
1252 uint32_t* indexp);
1253 template bool js::CheckStringIsIndex(const char16_t* s, size_t length,
1254 uint32_t* indexp);
1255
1256 template <typename CharT>
AtomCharsToIndex(const CharT * s,size_t length)1257 static uint32_t AtomCharsToIndex(const CharT* s, size_t length) {
1258 // Chars are known to be a valid index value (as determined by
1259 // CheckStringIsIndex) that didn't fit in the "index value" bits in the
1260 // header.
1261
1262 MOZ_ASSERT(length > 0);
1263 MOZ_ASSERT(length <= UINT32_CHAR_BUFFER_LENGTH);
1264
1265 RangedPtr<const CharT> cp(s, length);
1266 const RangedPtr<const CharT> end(s + length, s, length);
1267
1268 MOZ_ASSERT(IsAsciiDigit(*cp));
1269 uint32_t index = AsciiDigitToNumber(*cp++);
1270 MOZ_ASSERT(index != 0);
1271
1272 while (cp < end) {
1273 MOZ_ASSERT(IsAsciiDigit(*cp));
1274 index = 10 * index + AsciiDigitToNumber(*cp);
1275 cp++;
1276 }
1277
1278 MOZ_ASSERT(index <= MAX_ARRAY_INDEX);
1279 return index;
1280 }
1281
getIndexSlow() const1282 uint32_t JSAtom::getIndexSlow() const {
1283 MOZ_ASSERT(isIndex());
1284 MOZ_ASSERT(!hasIndexValue());
1285
1286 size_t len = length();
1287
1288 AutoCheckCannotGC nogc;
1289 return hasLatin1Chars() ? AtomCharsToIndex(latin1Chars(nogc), len)
1290 : AtomCharsToIndex(twoByteChars(nogc), len);
1291 }
1292
MarkStringAndBasesNonDeduplicatable(JSLinearString * s)1293 static void MarkStringAndBasesNonDeduplicatable(JSLinearString* s) {
1294 while (true) {
1295 if (!s->isTenured()) {
1296 s->setNonDeduplicatable();
1297 }
1298 if (!s->hasBase()) {
1299 break;
1300 }
1301 s = s->base();
1302 }
1303 }
1304
init(JSContext * cx,JSString * s)1305 bool AutoStableStringChars::init(JSContext* cx, JSString* s) {
1306 RootedLinearString linearString(cx, s->ensureLinear(cx));
1307 if (!linearString) {
1308 return false;
1309 }
1310
1311 MOZ_ASSERT(state_ == Uninitialized);
1312
1313 // If the chars are inline then we need to copy them since they may be moved
1314 // by a compacting GC.
1315 if (baseIsInline(linearString)) {
1316 return linearString->hasTwoByteChars() ? copyTwoByteChars(cx, linearString)
1317 : copyLatin1Chars(cx, linearString);
1318 }
1319
1320 if (linearString->hasLatin1Chars()) {
1321 state_ = Latin1;
1322 latin1Chars_ = linearString->rawLatin1Chars();
1323 } else {
1324 state_ = TwoByte;
1325 twoByteChars_ = linearString->rawTwoByteChars();
1326 }
1327
1328 MarkStringAndBasesNonDeduplicatable(linearString);
1329
1330 s_ = linearString;
1331 return true;
1332 }
1333
initTwoByte(JSContext * cx,JSString * s)1334 bool AutoStableStringChars::initTwoByte(JSContext* cx, JSString* s) {
1335 RootedLinearString linearString(cx, s->ensureLinear(cx));
1336 if (!linearString) {
1337 return false;
1338 }
1339
1340 MOZ_ASSERT(state_ == Uninitialized);
1341
1342 if (linearString->hasLatin1Chars()) {
1343 return copyAndInflateLatin1Chars(cx, linearString);
1344 }
1345
1346 // If the chars are inline then we need to copy them since they may be moved
1347 // by a compacting GC.
1348 if (baseIsInline(linearString)) {
1349 return copyTwoByteChars(cx, linearString);
1350 }
1351
1352 state_ = TwoByte;
1353 twoByteChars_ = linearString->rawTwoByteChars();
1354
1355 MarkStringAndBasesNonDeduplicatable(linearString);
1356
1357 s_ = linearString;
1358 return true;
1359 }
1360
baseIsInline(HandleLinearString linearString)1361 bool AutoStableStringChars::baseIsInline(HandleLinearString linearString) {
1362 JSString* base = linearString;
1363 while (base->isDependent()) {
1364 base = base->asDependent().base();
1365 }
1366 return base->isInline();
1367 }
1368
1369 template <typename T>
allocOwnChars(JSContext * cx,size_t count)1370 T* AutoStableStringChars::allocOwnChars(JSContext* cx, size_t count) {
1371 static_assert(
1372 InlineCapacity >=
1373 sizeof(JS::Latin1Char) * JSFatInlineString::MAX_LENGTH_LATIN1 &&
1374 InlineCapacity >=
1375 sizeof(char16_t) * JSFatInlineString::MAX_LENGTH_TWO_BYTE,
1376 "InlineCapacity too small to hold fat inline strings");
1377
1378 static_assert((JSString::MAX_LENGTH &
1379 mozilla::tl::MulOverflowMask<sizeof(T)>::value) == 0,
1380 "Size calculation can overflow");
1381 MOZ_ASSERT(count <= JSString::MAX_LENGTH);
1382 size_t size = sizeof(T) * count;
1383
1384 ownChars_.emplace(cx);
1385 if (!ownChars_->resize(size)) {
1386 ownChars_.reset();
1387 return nullptr;
1388 }
1389
1390 return reinterpret_cast<T*>(ownChars_->begin());
1391 }
1392
copyAndInflateLatin1Chars(JSContext * cx,HandleLinearString linearString)1393 bool AutoStableStringChars::copyAndInflateLatin1Chars(
1394 JSContext* cx, HandleLinearString linearString) {
1395 char16_t* chars = allocOwnChars<char16_t>(cx, linearString->length());
1396 if (!chars) {
1397 return false;
1398 }
1399
1400 FillChars(chars, linearString->rawLatin1Chars(), linearString->length());
1401
1402 state_ = TwoByte;
1403 twoByteChars_ = chars;
1404 s_ = linearString;
1405 return true;
1406 }
1407
copyLatin1Chars(JSContext * cx,HandleLinearString linearString)1408 bool AutoStableStringChars::copyLatin1Chars(JSContext* cx,
1409 HandleLinearString linearString) {
1410 size_t length = linearString->length();
1411 JS::Latin1Char* chars = allocOwnChars<JS::Latin1Char>(cx, length);
1412 if (!chars) {
1413 return false;
1414 }
1415
1416 FillChars(chars, linearString->rawLatin1Chars(), length);
1417
1418 state_ = Latin1;
1419 latin1Chars_ = chars;
1420 s_ = linearString;
1421 return true;
1422 }
1423
copyTwoByteChars(JSContext * cx,HandleLinearString linearString)1424 bool AutoStableStringChars::copyTwoByteChars(JSContext* cx,
1425 HandleLinearString linearString) {
1426 size_t length = linearString->length();
1427 char16_t* chars = allocOwnChars<char16_t>(cx, length);
1428 if (!chars) {
1429 return false;
1430 }
1431
1432 FillChars(chars, linearString->rawTwoByteChars(), length);
1433
1434 state_ = TwoByte;
1435 twoByteChars_ = chars;
1436 s_ = linearString;
1437 return true;
1438 }
1439
1440 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
dump(js::GenericPrinter & out)1441 void JSAtom::dump(js::GenericPrinter& out) {
1442 out.printf("JSAtom* (%p) = ", (void*)this);
1443 this->JSString::dump(out);
1444 }
1445
dump()1446 void JSAtom::dump() {
1447 Fprinter out(stderr);
1448 dump(out);
1449 }
1450
dumpRepresentation(js::GenericPrinter & out,int indent) const1451 void JSExternalString::dumpRepresentation(js::GenericPrinter& out,
1452 int indent) const {
1453 dumpRepresentationHeader(out, "JSExternalString");
1454 indent += 2;
1455
1456 out.printf("%*sfinalizer: ((JSExternalStringCallbacks*) %p)\n", indent, "",
1457 callbacks());
1458 dumpRepresentationChars(out, indent);
1459 }
1460 #endif /* defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) */
1461
NewDependentString(JSContext * cx,JSString * baseArg,size_t start,size_t length,gc::InitialHeap heap)1462 JSLinearString* js::NewDependentString(JSContext* cx, JSString* baseArg,
1463 size_t start, size_t length,
1464 gc::InitialHeap heap) {
1465 if (length == 0) {
1466 return cx->emptyString();
1467 }
1468
1469 JSLinearString* base = baseArg->ensureLinear(cx);
1470 if (!base) {
1471 return nullptr;
1472 }
1473
1474 if (start == 0 && length == base->length()) {
1475 return base;
1476 }
1477
1478 if (base->hasTwoByteChars()) {
1479 AutoCheckCannotGC nogc;
1480 const char16_t* chars = base->twoByteChars(nogc) + start;
1481 if (JSLinearString* staticStr = cx->staticStrings().lookup(chars, length)) {
1482 return staticStr;
1483 }
1484 } else {
1485 AutoCheckCannotGC nogc;
1486 const Latin1Char* chars = base->latin1Chars(nogc) + start;
1487 if (JSLinearString* staticStr = cx->staticStrings().lookup(chars, length)) {
1488 return staticStr;
1489 }
1490 }
1491
1492 return JSDependentString::new_(cx, base, start, length, heap);
1493 }
1494
CanStoreCharsAsLatin1(const char16_t * s,size_t length)1495 static inline bool CanStoreCharsAsLatin1(const char16_t* s, size_t length) {
1496 return IsUtf16Latin1(Span(s, length));
1497 }
1498
1499 template <AllowGC allowGC>
NewInlineStringDeflated(JSContext * cx,const mozilla::Range<const char16_t> & chars,gc::InitialHeap heap=gc::DefaultHeap)1500 static MOZ_ALWAYS_INLINE JSInlineString* NewInlineStringDeflated(
1501 JSContext* cx, const mozilla::Range<const char16_t>& chars,
1502 gc::InitialHeap heap = gc::DefaultHeap) {
1503 size_t len = chars.length();
1504 Latin1Char* storage;
1505 JSInlineString* str = AllocateInlineString<allowGC>(cx, len, &storage, heap);
1506 if (!str) {
1507 return nullptr;
1508 }
1509
1510 MOZ_ASSERT(CanStoreCharsAsLatin1(chars.begin().get(), len));
1511 FillFromCompatible(storage, chars.begin().get(), len);
1512 return str;
1513 }
1514
1515 template <AllowGC allowGC>
NewStringDeflated(JSContext * cx,const char16_t * s,size_t n,gc::InitialHeap heap)1516 static JSLinearString* NewStringDeflated(JSContext* cx, const char16_t* s,
1517 size_t n, gc::InitialHeap heap) {
1518 if (JSLinearString* str = TryEmptyOrStaticString(cx, s, n)) {
1519 return str;
1520 }
1521
1522 if (JSInlineString::lengthFits<Latin1Char>(n)) {
1523 return NewInlineStringDeflated<allowGC>(
1524 cx, mozilla::Range<const char16_t>(s, n), heap);
1525 }
1526
1527 auto news = cx->make_pod_arena_array<Latin1Char>(js::StringBufferArena, n);
1528 if (!news) {
1529 if (!allowGC) {
1530 cx->recoverFromOutOfMemory();
1531 }
1532 return nullptr;
1533 }
1534
1535 MOZ_ASSERT(CanStoreCharsAsLatin1(s, n));
1536 FillFromCompatible(news.get(), s, n);
1537
1538 return JSLinearString::new_<allowGC>(cx, std::move(news), n, heap);
1539 }
1540
NewInlineStringForAtomDeflated(JSContext * cx,const char16_t * chars,size_t length)1541 static MOZ_ALWAYS_INLINE JSInlineString* NewInlineStringForAtomDeflated(
1542 JSContext* cx, const char16_t* chars, size_t length) {
1543 Latin1Char* storage;
1544 JSInlineString* str = AllocateInlineStringForAtom(cx, length, &storage);
1545 if (!str) {
1546 return nullptr;
1547 }
1548
1549 MOZ_ASSERT(CanStoreCharsAsLatin1(chars, length));
1550 FillFromCompatible(storage, chars, length);
1551 return str;
1552 }
1553
NewStringForAtomDeflatedValidLength(JSContext * cx,const char16_t * s,size_t n)1554 static JSLinearString* NewStringForAtomDeflatedValidLength(JSContext* cx,
1555 const char16_t* s,
1556 size_t n) {
1557 if (JSInlineString::lengthFits<Latin1Char>(n)) {
1558 return NewInlineStringForAtomDeflated(cx, s, n);
1559 }
1560
1561 auto news = cx->make_pod_arena_array<Latin1Char>(js::StringBufferArena, n);
1562 if (!news) {
1563 cx->recoverFromOutOfMemory();
1564 return nullptr;
1565 }
1566
1567 MOZ_ASSERT(CanStoreCharsAsLatin1(s, n));
1568 FillFromCompatible(news.get(), s, n);
1569
1570 return JSLinearString::newForAtomValidLength(cx, std::move(news), n);
1571 }
1572
1573 template <AllowGC allowGC, typename CharT>
NewStringDontDeflate(JSContext * cx,UniquePtr<CharT[],JS::FreePolicy> chars,size_t length,gc::InitialHeap heap)1574 JSLinearString* js::NewStringDontDeflate(
1575 JSContext* cx, UniquePtr<CharT[], JS::FreePolicy> chars, size_t length,
1576 gc::InitialHeap heap) {
1577 if (JSLinearString* str = TryEmptyOrStaticString(cx, chars.get(), length)) {
1578 return str;
1579 }
1580
1581 if (JSInlineString::lengthFits<CharT>(length)) {
1582 // |chars.get()| is safe because 1) |NewInlineString| necessarily *copies*,
1583 // and 2) |chars| frees its contents only when this function returns.
1584 return NewInlineString<allowGC>(
1585 cx, mozilla::Range<const CharT>(chars.get(), length), heap);
1586 }
1587
1588 return JSLinearString::new_<allowGC>(cx, std::move(chars), length, heap);
1589 }
1590
1591 template JSLinearString* js::NewStringDontDeflate<CanGC>(
1592 JSContext* cx, UniqueTwoByteChars chars, size_t length,
1593 gc::InitialHeap heap);
1594
1595 template JSLinearString* js::NewStringDontDeflate<NoGC>(
1596 JSContext* cx, UniqueTwoByteChars chars, size_t length,
1597 gc::InitialHeap heap);
1598
1599 template JSLinearString* js::NewStringDontDeflate<CanGC>(
1600 JSContext* cx, UniqueLatin1Chars chars, size_t length,
1601 gc::InitialHeap heap);
1602
1603 template JSLinearString* js::NewStringDontDeflate<NoGC>(JSContext* cx,
1604 UniqueLatin1Chars chars,
1605 size_t length,
1606 gc::InitialHeap heap);
1607
1608 template <AllowGC allowGC, typename CharT>
NewString(JSContext * cx,UniquePtr<CharT[],JS::FreePolicy> chars,size_t length,gc::InitialHeap heap)1609 JSLinearString* js::NewString(JSContext* cx,
1610 UniquePtr<CharT[], JS::FreePolicy> chars,
1611 size_t length, gc::InitialHeap heap) {
1612 if constexpr (std::is_same_v<CharT, char16_t>) {
1613 if (CanStoreCharsAsLatin1(chars.get(), length)) {
1614 // Deflating copies from |chars.get()| and lets |chars| be freed on
1615 // return.
1616 return NewStringDeflated<allowGC>(cx, chars.get(), length, heap);
1617 }
1618 }
1619
1620 return NewStringDontDeflate<allowGC>(cx, std::move(chars), length, heap);
1621 }
1622
1623 template JSLinearString* js::NewString<CanGC>(JSContext* cx,
1624 UniqueTwoByteChars chars,
1625 size_t length,
1626 gc::InitialHeap heap);
1627
1628 template JSLinearString* js::NewString<NoGC>(JSContext* cx,
1629 UniqueTwoByteChars chars,
1630 size_t length,
1631 gc::InitialHeap heap);
1632
1633 template JSLinearString* js::NewString<CanGC>(JSContext* cx,
1634 UniqueLatin1Chars chars,
1635 size_t length,
1636 gc::InitialHeap heap);
1637
1638 template JSLinearString* js::NewString<NoGC>(JSContext* cx,
1639 UniqueLatin1Chars chars,
1640 size_t length,
1641 gc::InitialHeap heap);
1642
1643 namespace js {
1644
1645 template <AllowGC allowGC, typename CharT>
NewStringCopyNDontDeflateNonStaticValidLength(JSContext * cx,const CharT * s,size_t n,gc::InitialHeap heap)1646 JSLinearString* NewStringCopyNDontDeflateNonStaticValidLength(
1647 JSContext* cx, const CharT* s, size_t n, gc::InitialHeap heap) {
1648 if (JSInlineString::lengthFits<CharT>(n)) {
1649 return NewInlineString<allowGC>(cx, mozilla::Range<const CharT>(s, n),
1650 heap);
1651 }
1652
1653 auto news = cx->make_pod_arena_array<CharT>(js::StringBufferArena, n);
1654 if (!news) {
1655 if (!allowGC) {
1656 cx->recoverFromOutOfMemory();
1657 }
1658 return nullptr;
1659 }
1660
1661 FillChars(news.get(), s, n);
1662
1663 return JSLinearString::newValidLength<allowGC>(cx, std::move(news), n, heap);
1664 }
1665
1666 template JSLinearString* NewStringCopyNDontDeflateNonStaticValidLength<CanGC>(
1667 JSContext* cx, const char16_t* s, size_t n, gc::InitialHeap heap);
1668
1669 template JSLinearString* NewStringCopyNDontDeflateNonStaticValidLength<CanGC>(
1670 JSContext* cx, const Latin1Char* s, size_t n, gc::InitialHeap heap);
1671
1672 template <AllowGC allowGC, typename CharT>
NewStringCopyNDontDeflate(JSContext * cx,const CharT * s,size_t n,gc::InitialHeap heap)1673 JSLinearString* NewStringCopyNDontDeflate(JSContext* cx, const CharT* s,
1674 size_t n, gc::InitialHeap heap) {
1675 if (JSLinearString* str = TryEmptyOrStaticString(cx, s, n)) {
1676 return str;
1677 }
1678
1679 if (MOZ_UNLIKELY(!JSLinearString::validateLength(cx, n))) {
1680 return nullptr;
1681 }
1682
1683 return NewStringCopyNDontDeflateNonStaticValidLength<allowGC>(cx, s, n, heap);
1684 }
1685
1686 template JSLinearString* NewStringCopyNDontDeflate<CanGC>(JSContext* cx,
1687 const char16_t* s,
1688 size_t n,
1689 gc::InitialHeap heap);
1690
1691 template JSLinearString* NewStringCopyNDontDeflate<NoGC>(JSContext* cx,
1692 const char16_t* s,
1693 size_t n,
1694 gc::InitialHeap heap);
1695
1696 template JSLinearString* NewStringCopyNDontDeflate<CanGC>(JSContext* cx,
1697 const Latin1Char* s,
1698 size_t n,
1699 gc::InitialHeap heap);
1700
1701 template JSLinearString* NewStringCopyNDontDeflate<NoGC>(JSContext* cx,
1702 const Latin1Char* s,
1703 size_t n,
1704 gc::InitialHeap heap);
1705
NewLatin1StringZ(JSContext * cx,UniqueChars chars,gc::InitialHeap heap)1706 JSLinearString* NewLatin1StringZ(JSContext* cx, UniqueChars chars,
1707 gc::InitialHeap heap) {
1708 size_t length = strlen(chars.get());
1709 UniqueLatin1Chars latin1(reinterpret_cast<Latin1Char*>(chars.release()));
1710 return NewString<CanGC>(cx, std::move(latin1), length, heap);
1711 }
1712
1713 template <AllowGC allowGC, typename CharT>
NewStringCopyN(JSContext * cx,const CharT * s,size_t n,gc::InitialHeap heap)1714 JSLinearString* NewStringCopyN(JSContext* cx, const CharT* s, size_t n,
1715 gc::InitialHeap heap) {
1716 if constexpr (std::is_same_v<CharT, char16_t>) {
1717 if (CanStoreCharsAsLatin1(s, n)) {
1718 return NewStringDeflated<allowGC>(cx, s, n, heap);
1719 }
1720 }
1721
1722 return NewStringCopyNDontDeflate<allowGC>(cx, s, n, heap);
1723 }
1724
1725 template JSLinearString* NewStringCopyN<CanGC>(JSContext* cx, const char16_t* s,
1726 size_t n, gc::InitialHeap heap);
1727
1728 template JSLinearString* NewStringCopyN<NoGC>(JSContext* cx, const char16_t* s,
1729 size_t n, gc::InitialHeap heap);
1730
1731 template JSLinearString* NewStringCopyN<CanGC>(JSContext* cx,
1732 const Latin1Char* s, size_t n,
1733 gc::InitialHeap heap);
1734
1735 template JSLinearString* NewStringCopyN<NoGC>(JSContext* cx,
1736 const Latin1Char* s, size_t n,
1737 gc::InitialHeap heap);
1738
1739 template <typename CharT>
NewStringForAtomCopyNDontDeflateValidLength(JSContext * cx,const CharT * s,size_t n)1740 JSLinearString* NewStringForAtomCopyNDontDeflateValidLength(JSContext* cx,
1741 const CharT* s,
1742 size_t n) {
1743 if constexpr (std::is_same_v<CharT, char16_t>) {
1744 MOZ_ASSERT(!CanStoreCharsAsLatin1(s, n));
1745 }
1746
1747 if (JSInlineString::lengthFits<CharT>(n)) {
1748 return NewInlineStringForAtom(cx, s, n);
1749 }
1750
1751 auto news = cx->make_pod_arena_array<CharT>(js::StringBufferArena, n);
1752 if (!news) {
1753 cx->recoverFromOutOfMemory();
1754 return nullptr;
1755 }
1756
1757 FillChars(news.get(), s, n);
1758
1759 return JSLinearString::newForAtomValidLength(cx, std::move(news), n);
1760 }
1761
1762 template JSLinearString* NewStringForAtomCopyNDontDeflateValidLength(
1763 JSContext* cx, const char16_t* s, size_t n);
1764
1765 template JSLinearString* NewStringForAtomCopyNDontDeflateValidLength(
1766 JSContext* cx, const Latin1Char* s, size_t n);
1767
1768 template <typename CharT>
NewStringForAtomCopyNMaybeDeflateValidLength(JSContext * cx,const CharT * s,size_t n)1769 JSLinearString* NewStringForAtomCopyNMaybeDeflateValidLength(JSContext* cx,
1770 const CharT* s,
1771 size_t n) {
1772 if constexpr (std::is_same_v<CharT, char16_t>) {
1773 if (CanStoreCharsAsLatin1(s, n)) {
1774 return NewStringForAtomDeflatedValidLength(cx, s, n);
1775 }
1776 }
1777
1778 return NewStringForAtomCopyNDontDeflateValidLength(cx, s, n);
1779 }
1780
1781 template JSLinearString* NewStringForAtomCopyNMaybeDeflateValidLength(
1782 JSContext* cx, const char16_t* s, size_t n);
1783
1784 template JSLinearString* NewStringForAtomCopyNMaybeDeflateValidLength(
1785 JSContext* cx, const Latin1Char* s, size_t n);
1786
NewStringCopyUTF8N(JSContext * cx,const JS::UTF8Chars utf8,gc::InitialHeap heap)1787 JSLinearString* NewStringCopyUTF8N(JSContext* cx, const JS::UTF8Chars utf8,
1788 gc::InitialHeap heap) {
1789 JS::SmallestEncoding encoding = JS::FindSmallestEncoding(utf8);
1790 if (encoding == JS::SmallestEncoding::ASCII) {
1791 return NewStringCopyN<js::CanGC>(cx, utf8.begin().get(), utf8.length(),
1792 heap);
1793 }
1794
1795 size_t length;
1796 if (encoding == JS::SmallestEncoding::Latin1) {
1797 UniqueLatin1Chars latin1(
1798 UTF8CharsToNewLatin1CharsZ(cx, utf8, &length, js::StringBufferArena)
1799 .get());
1800 if (!latin1) {
1801 return nullptr;
1802 }
1803
1804 return NewString<js::CanGC>(cx, std::move(latin1), length, heap);
1805 }
1806
1807 MOZ_ASSERT(encoding == JS::SmallestEncoding::UTF16);
1808
1809 UniqueTwoByteChars utf16(
1810 UTF8CharsToNewTwoByteCharsZ(cx, utf8, &length, js::StringBufferArena)
1811 .get());
1812 if (!utf16) {
1813 return nullptr;
1814 }
1815
1816 return NewString<js::CanGC>(cx, std::move(utf16), length, heap);
1817 }
1818
lookup(const char16_t * chars,size_t len) const1819 MOZ_ALWAYS_INLINE JSString* ExternalStringCache::lookup(const char16_t* chars,
1820 size_t len) const {
1821 AutoCheckCannotGC nogc;
1822
1823 for (size_t i = 0; i < NumEntries; i++) {
1824 JSString* str = entries_[i];
1825 if (!str || str->length() != len) {
1826 continue;
1827 }
1828
1829 const char16_t* strChars = str->asLinear().nonInlineTwoByteChars(nogc);
1830 if (chars == strChars) {
1831 // Note that we don't need an incremental barrier here or below.
1832 // The cache is purged on GC so any string we get from the cache
1833 // must have been allocated after the GC started.
1834 return str;
1835 }
1836
1837 // Compare the chars. Don't do this for long strings as it will be
1838 // faster to allocate a new external string.
1839 static const size_t MaxLengthForCharComparison = 100;
1840 if (len <= MaxLengthForCharComparison && ArrayEqual(chars, strChars, len)) {
1841 return str;
1842 }
1843 }
1844
1845 return nullptr;
1846 }
1847
put(JSString * str)1848 MOZ_ALWAYS_INLINE void ExternalStringCache::put(JSString* str) {
1849 MOZ_ASSERT(str->isExternal());
1850
1851 for (size_t i = NumEntries - 1; i > 0; i--) {
1852 entries_[i] = entries_[i - 1];
1853 }
1854
1855 entries_[0] = str;
1856 }
1857
NewMaybeExternalString(JSContext * cx,const char16_t * s,size_t n,const JSExternalStringCallbacks * callbacks,bool * allocatedExternal,gc::InitialHeap heap)1858 JSString* NewMaybeExternalString(JSContext* cx, const char16_t* s, size_t n,
1859 const JSExternalStringCallbacks* callbacks,
1860 bool* allocatedExternal,
1861 gc::InitialHeap heap) {
1862 if (JSString* str = TryEmptyOrStaticString(cx, s, n)) {
1863 *allocatedExternal = false;
1864 return str;
1865 }
1866
1867 if (JSThinInlineString::lengthFits<Latin1Char>(n) &&
1868 CanStoreCharsAsLatin1(s, n)) {
1869 *allocatedExternal = false;
1870 return NewInlineStringDeflated<AllowGC::CanGC>(
1871 cx, mozilla::Range<const char16_t>(s, n), heap);
1872 }
1873
1874 ExternalStringCache& cache = cx->zone()->externalStringCache();
1875 if (JSString* str = cache.lookup(s, n)) {
1876 *allocatedExternal = false;
1877 return str;
1878 }
1879
1880 JSString* str = JSExternalString::new_(cx, s, n, callbacks);
1881 if (!str) {
1882 return nullptr;
1883 }
1884
1885 *allocatedExternal = true;
1886 cache.put(str);
1887 return str;
1888 }
1889
1890 } /* namespace js */
1891
1892 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
dumpRepresentation(js::GenericPrinter & out,int indent) const1893 void JSExtensibleString::dumpRepresentation(js::GenericPrinter& out,
1894 int indent) const {
1895 dumpRepresentationHeader(out, "JSExtensibleString");
1896 indent += 2;
1897
1898 out.printf("%*scapacity: %zu\n", indent, "", capacity());
1899 dumpRepresentationChars(out, indent);
1900 }
1901
dumpRepresentation(js::GenericPrinter & out,int indent) const1902 void JSInlineString::dumpRepresentation(js::GenericPrinter& out,
1903 int indent) const {
1904 dumpRepresentationHeader(
1905 out, isFatInline() ? "JSFatInlineString" : "JSThinInlineString");
1906 indent += 2;
1907
1908 dumpRepresentationChars(out, indent);
1909 }
1910
dumpRepresentation(js::GenericPrinter & out,int indent) const1911 void JSLinearString::dumpRepresentation(js::GenericPrinter& out,
1912 int indent) const {
1913 dumpRepresentationHeader(out, "JSLinearString");
1914 indent += 2;
1915
1916 dumpRepresentationChars(out, indent);
1917 }
1918 #endif
1919
1920 struct RepresentativeExternalString : public JSExternalStringCallbacks {
finalizeRepresentativeExternalString1921 void finalize(char16_t* chars) const override {
1922 // Constant chars, nothing to do.
1923 }
sizeOfBufferRepresentativeExternalString1924 size_t sizeOfBuffer(const char16_t* chars,
1925 mozilla::MallocSizeOf mallocSizeOf) const override {
1926 // This string's buffer is not heap-allocated, so its malloc size is 0.
1927 return 0;
1928 }
1929 };
1930
1931 static const RepresentativeExternalString RepresentativeExternalStringCallbacks;
1932
1933 template <typename CheckString, typename CharT>
FillWithRepresentatives(JSContext * cx,HandleArrayObject array,uint32_t * index,const CharT * chars,size_t len,size_t fatInlineMaxLength,const CheckString & check)1934 static bool FillWithRepresentatives(JSContext* cx, HandleArrayObject array,
1935 uint32_t* index, const CharT* chars,
1936 size_t len, size_t fatInlineMaxLength,
1937 const CheckString& check) {
1938 auto AppendString = [&check](JSContext* cx, HandleArrayObject array,
1939 uint32_t* index, HandleString s) {
1940 MOZ_ASSERT(check(s));
1941 (void)check; // silence clang -Wunused-lambda-capture in opt builds
1942 RootedValue val(cx, StringValue(s));
1943 return JS_DefineElement(cx, array, (*index)++, val, 0);
1944 };
1945
1946 MOZ_ASSERT(len > fatInlineMaxLength);
1947
1948 // Normal atom.
1949 RootedString atom1(cx, AtomizeChars(cx, chars, len));
1950 if (!atom1 || !AppendString(cx, array, index, atom1)) {
1951 return false;
1952 }
1953 MOZ_ASSERT(atom1->isAtom());
1954
1955 // Inline atom.
1956 RootedString atom2(cx, AtomizeChars(cx, chars, 2));
1957 if (!atom2 || !AppendString(cx, array, index, atom2)) {
1958 return false;
1959 }
1960 MOZ_ASSERT(atom2->isAtom());
1961 MOZ_ASSERT(atom2->isInline());
1962
1963 // Fat inline atom.
1964 RootedString atom3(cx, AtomizeChars(cx, chars, fatInlineMaxLength));
1965 if (!atom3 || !AppendString(cx, array, index, atom3)) {
1966 return false;
1967 }
1968 MOZ_ASSERT(atom3->isAtom());
1969 MOZ_ASSERT(atom3->isFatInline());
1970
1971 // Normal linear string.
1972 RootedString linear1(cx, NewStringCopyN<CanGC>(cx, chars, len));
1973 if (!linear1 || !AppendString(cx, array, index, linear1)) {
1974 return false;
1975 }
1976 MOZ_ASSERT(linear1->isLinear());
1977
1978 // Inline string.
1979 RootedString linear2(cx, NewStringCopyN<CanGC>(cx, chars, 3));
1980 if (!linear2 || !AppendString(cx, array, index, linear2)) {
1981 return false;
1982 }
1983 MOZ_ASSERT(linear2->isLinear());
1984 MOZ_ASSERT(linear2->isInline());
1985
1986 // Fat inline string.
1987 RootedString linear3(cx,
1988 NewStringCopyN<CanGC>(cx, chars, fatInlineMaxLength));
1989 if (!linear3 || !AppendString(cx, array, index, linear3)) {
1990 return false;
1991 }
1992 MOZ_ASSERT(linear3->isLinear());
1993 MOZ_ASSERT(linear3->isFatInline());
1994
1995 // Rope.
1996 RootedString rope(cx, ConcatStrings<CanGC>(cx, atom1, atom3));
1997 if (!rope || !AppendString(cx, array, index, rope)) {
1998 return false;
1999 }
2000 MOZ_ASSERT(rope->isRope());
2001
2002 // Dependent.
2003 RootedString dep(cx, NewDependentString(cx, atom1, 0, len - 2));
2004 if (!dep || !AppendString(cx, array, index, dep)) {
2005 return false;
2006 }
2007 MOZ_ASSERT(dep->isDependent());
2008
2009 // Extensible.
2010 RootedString temp1(cx, NewStringCopyN<CanGC>(cx, chars, len));
2011 if (!temp1) {
2012 return false;
2013 }
2014 RootedString extensible(cx, ConcatStrings<CanGC>(cx, temp1, atom3));
2015 if (!extensible || !extensible->ensureLinear(cx)) {
2016 return false;
2017 }
2018 if (!AppendString(cx, array, index, extensible)) {
2019 return false;
2020 }
2021 MOZ_ASSERT(extensible->isExtensible());
2022
2023 // External. Note that we currently only support TwoByte external strings.
2024 RootedString external1(cx), external2(cx);
2025 if constexpr (std::is_same_v<CharT, char16_t>) {
2026 external1 = JS_NewExternalString(cx, (const char16_t*)chars, len,
2027 &RepresentativeExternalStringCallbacks);
2028 if (!external1 || !AppendString(cx, array, index, external1)) {
2029 return false;
2030 }
2031 MOZ_ASSERT(external1->isExternal());
2032
2033 external2 = JS_NewExternalString(cx, (const char16_t*)chars, 2,
2034 &RepresentativeExternalStringCallbacks);
2035 if (!external2 || !AppendString(cx, array, index, external2)) {
2036 return false;
2037 }
2038 MOZ_ASSERT(external2->isExternal());
2039 }
2040
2041 // Assert the strings still have the types we expect after creating the
2042 // other strings.
2043
2044 MOZ_ASSERT(atom1->isAtom());
2045 MOZ_ASSERT(atom2->isAtom());
2046 MOZ_ASSERT(atom3->isAtom());
2047 MOZ_ASSERT(atom2->isInline());
2048 MOZ_ASSERT(atom3->isFatInline());
2049
2050 MOZ_ASSERT(linear1->isLinear());
2051 MOZ_ASSERT(linear2->isLinear());
2052 MOZ_ASSERT(linear3->isLinear());
2053 MOZ_ASSERT(linear2->isInline());
2054 MOZ_ASSERT(linear3->isFatInline());
2055
2056 MOZ_ASSERT(rope->isRope());
2057 MOZ_ASSERT(dep->isDependent());
2058 MOZ_ASSERT(extensible->isExtensible());
2059 MOZ_ASSERT_IF(external1, external1->isExternal());
2060 MOZ_ASSERT_IF(external2, external2->isExternal());
2061 return true;
2062 }
2063
2064 /* static */
fillWithRepresentatives(JSContext * cx,HandleArrayObject array)2065 bool JSString::fillWithRepresentatives(JSContext* cx, HandleArrayObject array) {
2066 uint32_t index = 0;
2067
2068 auto CheckTwoByte = [](JSString* str) { return str->hasTwoByteChars(); };
2069 auto CheckLatin1 = [](JSString* str) { return str->hasLatin1Chars(); };
2070
2071 // Append TwoByte strings.
2072 static const char16_t twoByteChars[] =
2073 u"\u1234abc\0def\u5678ghijklmasdfa\0xyz0123456789";
2074 if (!FillWithRepresentatives(
2075 cx, array, &index, twoByteChars, std::size(twoByteChars) - 1,
2076 JSFatInlineString::MAX_LENGTH_TWO_BYTE, CheckTwoByte)) {
2077 return false;
2078 }
2079
2080 // Append Latin1 strings.
2081 static const Latin1Char latin1Chars[] = "abc\0defghijklmasdfa\0xyz0123456789";
2082 if (!FillWithRepresentatives(
2083 cx, array, &index, latin1Chars, std::size(latin1Chars) - 1,
2084 JSFatInlineString::MAX_LENGTH_LATIN1, CheckLatin1)) {
2085 return false;
2086 }
2087
2088 // Now create forcibly-tenured versions of each of these string types. Note
2089 // that this is best-effort; if nursery strings are disabled, or we GC
2090 // midway through here, then we may end up with fewer nursery strings than
2091 // desired. Also, some types of strings are not nursery-allocatable, so
2092 // this will always produce some number of redundant strings.
2093 gc::AutoSuppressNurseryCellAlloc suppress(cx);
2094
2095 // Append TwoByte strings.
2096 if (!FillWithRepresentatives(
2097 cx, array, &index, twoByteChars, std::size(twoByteChars) - 1,
2098 JSFatInlineString::MAX_LENGTH_TWO_BYTE, CheckTwoByte)) {
2099 return false;
2100 }
2101
2102 // Append Latin1 strings.
2103 if (!FillWithRepresentatives(
2104 cx, array, &index, latin1Chars, std::size(latin1Chars) - 1,
2105 JSFatInlineString::MAX_LENGTH_LATIN1, CheckLatin1)) {
2106 return false;
2107 }
2108
2109 MOZ_ASSERT(index == 40);
2110
2111 return true;
2112 }
2113
2114 /*** Conversions ************************************************************/
2115
EncodeLatin1(JSContext * cx,JSString * str)2116 UniqueChars js::EncodeLatin1(JSContext* cx, JSString* str) {
2117 JSLinearString* linear = str->ensureLinear(cx);
2118 if (!linear) {
2119 return nullptr;
2120 }
2121
2122 JS::AutoCheckCannotGC nogc;
2123 if (linear->hasTwoByteChars()) {
2124 JS::Latin1CharsZ chars =
2125 JS::LossyTwoByteCharsToNewLatin1CharsZ(cx, linear->twoByteRange(nogc));
2126 return UniqueChars(chars.c_str());
2127 }
2128
2129 size_t len = str->length();
2130 Latin1Char* buf = cx->pod_malloc<Latin1Char>(len + 1);
2131 if (!buf) {
2132 return nullptr;
2133 }
2134
2135 FillChars(buf, linear->latin1Chars(nogc), len);
2136 buf[len] = '\0';
2137
2138 return UniqueChars(reinterpret_cast<char*>(buf));
2139 }
2140
EncodeAscii(JSContext * cx,JSString * str)2141 UniqueChars js::EncodeAscii(JSContext* cx, JSString* str) {
2142 JSLinearString* linear = str->ensureLinear(cx);
2143 if (!linear) {
2144 return nullptr;
2145 }
2146
2147 MOZ_ASSERT(StringIsAscii(linear));
2148 return EncodeLatin1(cx, linear);
2149 }
2150
IdToPrintableUTF8(JSContext * cx,HandleId id,IdToPrintableBehavior behavior)2151 UniqueChars js::IdToPrintableUTF8(JSContext* cx, HandleId id,
2152 IdToPrintableBehavior behavior) {
2153 // ToString(<symbol>) throws a TypeError, therefore require that callers
2154 // request source representation when |id| is a property key.
2155 MOZ_ASSERT_IF(
2156 behavior == IdToPrintableBehavior::IdIsIdentifier,
2157 id.isAtom() && frontend::IsIdentifierNameOrPrivateName(id.toAtom()));
2158
2159 RootedValue v(cx, IdToValue(id));
2160 JSString* str;
2161 if (behavior == IdToPrintableBehavior::IdIsPropertyKey) {
2162 str = ValueToSource(cx, v);
2163 } else {
2164 str = ToString<CanGC>(cx, v);
2165 }
2166 if (!str) {
2167 return nullptr;
2168 }
2169 return StringToNewUTF8CharsZ(cx, *str);
2170 }
2171
2172 template <AllowGC allowGC>
ToStringSlow(JSContext * cx,typename MaybeRooted<Value,allowGC>::HandleType arg)2173 JSString* js::ToStringSlow(
2174 JSContext* cx, typename MaybeRooted<Value, allowGC>::HandleType arg) {
2175 /* As with ToObjectSlow, callers must verify that |arg| isn't a string. */
2176 MOZ_ASSERT(!arg.isString());
2177
2178 Value v = arg;
2179 if (!v.isPrimitive()) {
2180 MOZ_ASSERT(!cx->isHelperThreadContext());
2181 if (!allowGC) {
2182 return nullptr;
2183 }
2184 RootedValue v2(cx, v);
2185 if (!ToPrimitive(cx, JSTYPE_STRING, &v2)) {
2186 return nullptr;
2187 }
2188 v = v2;
2189 }
2190
2191 JSString* str;
2192 if (v.isString()) {
2193 str = v.toString();
2194 } else if (v.isInt32()) {
2195 str = Int32ToString<allowGC>(cx, v.toInt32());
2196 } else if (v.isDouble()) {
2197 str = NumberToString<allowGC>(cx, v.toDouble());
2198 } else if (v.isBoolean()) {
2199 str = BooleanToString(cx, v.toBoolean());
2200 } else if (v.isNull()) {
2201 str = cx->names().null;
2202 } else if (v.isSymbol()) {
2203 MOZ_ASSERT(!cx->isHelperThreadContext());
2204 if (allowGC) {
2205 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
2206 JSMSG_SYMBOL_TO_STRING);
2207 }
2208 return nullptr;
2209 } else if (v.isBigInt()) {
2210 if (!allowGC) {
2211 return nullptr;
2212 }
2213 RootedBigInt i(cx, v.toBigInt());
2214 str = BigInt::toString<CanGC>(cx, i, 10);
2215 }
2216 #ifdef ENABLE_RECORD_TUPLE
2217 else if (v.isExtendedPrimitive()) {
2218 if (!allowGC) {
2219 return nullptr;
2220 }
2221 if (IsTuple(v)) {
2222 Rooted<TupleType*> tup(cx, &TupleType::thisTupleValue(v));
2223 return TupleToSource(cx, tup);
2224 }
2225 Rooted<RecordType*> rec(cx);
2226 MOZ_ALWAYS_TRUE(RecordObject::maybeUnbox(&v.getObjectPayload(), &rec));
2227 return RecordToSource(cx, rec);
2228 }
2229 #endif
2230 else {
2231 MOZ_ASSERT(v.isUndefined());
2232 str = cx->names().undefined;
2233 }
2234 return str;
2235 }
2236
2237 template JSString* js::ToStringSlow<CanGC>(JSContext* cx, HandleValue arg);
2238
2239 template JSString* js::ToStringSlow<NoGC>(JSContext* cx, const Value& arg);
2240
ToStringSlow(JSContext * cx,HandleValue v)2241 JS_PUBLIC_API JSString* js::ToStringSlow(JSContext* cx, HandleValue v) {
2242 return ToStringSlow<CanGC>(cx, v);
2243 }
2244