1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "vm/StringType-inl.h"
8
9 #include "mozilla/ArrayUtils.h"
10 #include "mozilla/Casting.h"
11 #include "mozilla/DebugOnly.h"
12 #include "mozilla/EndianUtils.h"
13 #include "mozilla/HashFunctions.h"
14 #include "mozilla/Latin1.h"
15 #include "mozilla/MathAlgorithms.h"
16 #include "mozilla/MemoryReporting.h"
17 #include "mozilla/PodOperations.h"
18 #include "mozilla/RangedPtr.h"
19 #include "mozilla/TextUtils.h"
20 #include "mozilla/Utf8.h"
21 #include "mozilla/Vector.h"
22
23 #include <algorithm> // std::{all_of,copy_n,enable_if,is_const,move}
24 #include <iterator> // std::size
25 #include <type_traits> // std::is_same, std::is_unsigned
26
27 #include "jsfriendapi.h"
28
29 #include "builtin/Boolean.h"
30 #include "frontend/BytecodeCompiler.h"
31 #include "frontend/ParserAtom.h"
32 #include "gc/MaybeRooted.h"
33 #include "gc/Nursery.h"
34 #include "js/CharacterEncoding.h"
35 #include "js/friend/ErrorMessages.h" // js::GetErrorMessage, JSMSG_*
36 #include "js/StableStringChars.h"
37 #include "js/Symbol.h"
38 #include "js/UbiNode.h"
39 #include "util/StringBuffer.h"
40 #include "util/Unicode.h"
41 #include "vm/GeckoProfiler.h"
42 #include "vm/ToSource.h" // js::ValueToSource
43
44 #include "gc/Marking-inl.h"
45 #include "vm/GeckoProfiler-inl.h"
46 #include "vm/JSContext-inl.h"
47 #include "vm/JSObject-inl.h"
48 #include "vm/Realm-inl.h"
49
50 using namespace js;
51
52 using mozilla::ArrayEqual;
53 using mozilla::AssertedCast;
54 using mozilla::AsWritableChars;
55 using mozilla::ConvertLatin1toUtf16;
56 using mozilla::IsAsciiDigit;
57 using mozilla::IsUtf16Latin1;
58 using mozilla::LossyConvertUtf16toLatin1;
59 using mozilla::PodCopy;
60 using mozilla::RangedPtr;
61 using mozilla::RoundUpPow2;
62 using mozilla::Span;
63
64 using JS::AutoCheckCannotGC;
65 using JS::AutoStableStringChars;
66
67 using UniqueLatin1Chars = UniquePtr<Latin1Char[], JS::FreePolicy>;
68
sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf)69 size_t JSString::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) {
70 // JSRope: do nothing, we'll count all children chars when we hit the leaf
71 // strings.
72 if (isRope()) {
73 return 0;
74 }
75
76 MOZ_ASSERT(isLinear());
77
78 // JSDependentString: do nothing, we'll count the chars when we hit the base
79 // string.
80 if (isDependent()) {
81 return 0;
82 }
83
84 // JSExternalString: Ask the embedding to tell us what's going on.
85 if (isExternal()) {
86 // Our callback isn't supposed to cause GC.
87 JS::AutoSuppressGCAnalysis nogc;
88 return asExternal().callbacks()->sizeOfBuffer(asExternal().twoByteChars(),
89 mallocSizeOf);
90 }
91
92 // JSExtensibleString: count the full capacity, not just the used space.
93 if (isExtensible()) {
94 JSExtensibleString& extensible = asExtensible();
95 return extensible.hasLatin1Chars()
96 ? mallocSizeOf(extensible.rawLatin1Chars())
97 : mallocSizeOf(extensible.rawTwoByteChars());
98 }
99
100 // JSInlineString, JSFatInlineString [JSInlineAtom, JSFatInlineAtom]: the
101 // chars are inline.
102 if (isInline()) {
103 return 0;
104 }
105
106 // Everything else: measure the space for the chars.
107 JSLinearString& linear = asLinear();
108 MOZ_ASSERT(linear.ownsMallocedChars());
109 return linear.hasLatin1Chars() ? mallocSizeOf(linear.rawLatin1Chars())
110 : mallocSizeOf(linear.rawTwoByteChars());
111 }
112
size(mozilla::MallocSizeOf mallocSizeOf) const113 JS::ubi::Node::Size JS::ubi::Concrete<JSString>::size(
114 mozilla::MallocSizeOf mallocSizeOf) const {
115 JSString& str = get();
116 size_t size;
117 if (str.isAtom()) {
118 size =
119 str.isFatInline() ? sizeof(js::FatInlineAtom) : sizeof(js::NormalAtom);
120 } else {
121 size = str.isFatInline() ? sizeof(JSFatInlineString) : sizeof(JSString);
122 }
123
124 if (IsInsideNursery(&str)) {
125 size += Nursery::nurseryCellHeaderSize();
126 }
127
128 size += str.sizeOfExcludingThis(mallocSizeOf);
129
130 return size;
131 }
132
133 const char16_t JS::ubi::Concrete<JSString>::concreteTypeName[] = u"JSString";
134
encodeUTF8Partial(const JS::AutoRequireNoGC & nogc,mozilla::Span<char> buffer) const135 mozilla::Maybe<mozilla::Tuple<size_t, size_t> > JSString::encodeUTF8Partial(
136 const JS::AutoRequireNoGC& nogc, mozilla::Span<char> buffer) const {
137 mozilla::Vector<const JSString*, 16, SystemAllocPolicy> stack;
138 const JSString* current = this;
139 char16_t pendingLeadSurrogate = 0; // U+0000 means no pending lead surrogate
140 size_t totalRead = 0;
141 size_t totalWritten = 0;
142 for (;;) {
143 if (current->isRope()) {
144 JSRope& rope = current->asRope();
145 if (!stack.append(rope.rightChild())) {
146 // OOM
147 return mozilla::Nothing();
148 }
149 current = rope.leftChild();
150 continue;
151 }
152
153 JSLinearString& linear = current->asLinear();
154 if (MOZ_LIKELY(linear.hasLatin1Chars())) {
155 if (MOZ_UNLIKELY(pendingLeadSurrogate)) {
156 if (buffer.Length() < 3) {
157 return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
158 }
159 buffer[0] = '\xEF';
160 buffer[1] = '\xBF';
161 buffer[2] = '\xBD';
162 buffer = buffer.From(3);
163 totalRead += 1; // pendingLeadSurrogate
164 totalWritten += 3;
165 pendingLeadSurrogate = 0;
166 }
167 auto src = mozilla::AsChars(
168 mozilla::Span(linear.latin1Chars(nogc), linear.length()));
169 size_t read;
170 size_t written;
171 mozilla::Tie(read, written) =
172 mozilla::ConvertLatin1toUtf8Partial(src, buffer);
173 buffer = buffer.From(written);
174 totalRead += read;
175 totalWritten += written;
176 if (read < src.Length()) {
177 return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
178 }
179 } else {
180 auto src = mozilla::Span(linear.twoByteChars(nogc), linear.length());
181 if (MOZ_UNLIKELY(pendingLeadSurrogate)) {
182 char16_t first = 0;
183 if (!src.IsEmpty()) {
184 first = src[0];
185 }
186 if (unicode::IsTrailSurrogate(first)) {
187 // Got a surrogate pair
188 if (buffer.Length() < 4) {
189 return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
190 }
191 uint32_t astral = unicode::UTF16Decode(pendingLeadSurrogate, first);
192 buffer[0] = char(0b1111'0000 | (astral >> 18));
193 buffer[1] = char(0b1000'0000 | ((astral >> 12) & 0b11'1111));
194 buffer[2] = char(0b1000'0000 | ((astral >> 6) & 0b11'1111));
195 buffer[3] = char(0b1000'0000 | (astral & 0b11'1111));
196 src = src.From(1);
197 buffer = buffer.From(4);
198 totalRead += 2; // both pendingLeadSurrogate and first!
199 totalWritten += 4;
200 } else {
201 // unpaired surrogate
202 if (buffer.Length() < 3) {
203 return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
204 }
205 buffer[0] = '\xEF';
206 buffer[1] = '\xBF';
207 buffer[2] = '\xBD';
208 buffer = buffer.From(3);
209 totalRead += 1; // pendingLeadSurrogate
210 totalWritten += 3;
211 }
212 pendingLeadSurrogate = 0;
213 }
214 if (src.IsEmpty()) {
215 return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
216 }
217 char16_t last = src[src.Length() - 1];
218 if (unicode::IsLeadSurrogate(last)) {
219 src = src.To(src.Length() - 1);
220 pendingLeadSurrogate = last;
221 } else {
222 MOZ_ASSERT(!pendingLeadSurrogate);
223 }
224 size_t read;
225 size_t written;
226 mozilla::Tie(read, written) =
227 mozilla::ConvertUtf16toUtf8Partial(src, buffer);
228 buffer = buffer.From(written);
229 totalRead += read;
230 totalWritten += written;
231 if (read < src.Length()) {
232 return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
233 }
234 }
235 if (stack.empty()) {
236 break;
237 }
238 current = stack.popCopy();
239 }
240 if (MOZ_UNLIKELY(pendingLeadSurrogate)) {
241 if (buffer.Length() < 3) {
242 return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
243 }
244 buffer[0] = '\xEF';
245 buffer[1] = '\xBF';
246 buffer[2] = '\xBD';
247 // No need to update buffer and pendingLeadSurrogate anymore
248 totalRead += 1;
249 totalWritten += 3;
250 }
251 return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
252 }
253
254 #if defined(DEBUG) || defined(JS_JITSPEW)
255
256 template <typename CharT>
257 /*static */
dumpChars(const CharT * s,size_t n,js::GenericPrinter & out)258 void JSString::dumpChars(const CharT* s, size_t n, js::GenericPrinter& out) {
259 if (n == SIZE_MAX) {
260 n = 0;
261 while (s[n]) {
262 n++;
263 }
264 }
265
266 out.put("\"");
267 dumpCharsNoQuote(s, n, out);
268 out.putChar('"');
269 }
270
271 template void JSString::dumpChars(const Latin1Char* s, size_t n,
272 js::GenericPrinter& out);
273
274 template void JSString::dumpChars(const char16_t* s, size_t n,
275 js::GenericPrinter& out);
276
277 template <typename CharT>
278 /*static */
dumpCharsNoQuote(const CharT * s,size_t n,js::GenericPrinter & out)279 void JSString::dumpCharsNoQuote(const CharT* s, size_t n,
280 js::GenericPrinter& out) {
281 for (size_t i = 0; i < n; i++) {
282 char16_t c = s[i];
283 if (c == '\n') {
284 out.put("\\n");
285 } else if (c == '\t') {
286 out.put("\\t");
287 } else if (c >= 32 && c < 127) {
288 out.putChar((char)s[i]);
289 } else if (c <= 255) {
290 out.printf("\\x%02x", unsigned(c));
291 } else {
292 out.printf("\\u%04x", unsigned(c));
293 }
294 }
295 }
296
297 template void JSString::dumpCharsNoQuote(const Latin1Char* s, size_t n,
298 js::GenericPrinter& out);
299
300 template void JSString::dumpCharsNoQuote(const char16_t* s, size_t n,
301 js::GenericPrinter& out);
302
dumpCharsNoNewline(js::GenericPrinter & out)303 void JSString::dumpCharsNoNewline(js::GenericPrinter& out) {
304 if (JSLinearString* linear = ensureLinear(nullptr)) {
305 AutoCheckCannotGC nogc;
306 if (hasLatin1Chars()) {
307 out.put("[Latin 1]");
308 dumpChars(linear->latin1Chars(nogc), length(), out);
309 } else {
310 out.put("[2 byte]");
311 dumpChars(linear->twoByteChars(nogc), length(), out);
312 }
313 } else {
314 out.put("(oom in JSString::dumpCharsNoNewline)");
315 }
316 }
317
dumpCharsNoQuote(js::GenericPrinter & out)318 void JSString::dumpCharsNoQuote(js::GenericPrinter& out) {
319 if (JSLinearString* linear = ensureLinear(nullptr)) {
320 AutoCheckCannotGC nogc;
321 if (hasLatin1Chars()) {
322 dumpCharsNoQuote(linear->latin1Chars(nogc), length(), out);
323 } else {
324 dumpCharsNoQuote(linear->twoByteChars(nogc), length(), out);
325 }
326 } else {
327 out.put("(oom in JSString::dumpCharsNoNewline)");
328 }
329 }
330
dump()331 void JSString::dump() {
332 js::Fprinter out(stderr);
333 dump(out);
334 }
335
dump(js::GenericPrinter & out)336 void JSString::dump(js::GenericPrinter& out) {
337 dumpNoNewline(out);
338 out.putChar('\n');
339 }
340
dumpNoNewline(js::GenericPrinter & out)341 void JSString::dumpNoNewline(js::GenericPrinter& out) {
342 if (JSLinearString* linear = ensureLinear(nullptr)) {
343 AutoCheckCannotGC nogc;
344 if (hasLatin1Chars()) {
345 const Latin1Char* chars = linear->latin1Chars(nogc);
346 out.printf("JSString* (%p) = Latin1Char * (%p) = ", (void*)this,
347 (void*)chars);
348 dumpChars(chars, length(), out);
349 } else {
350 const char16_t* chars = linear->twoByteChars(nogc);
351 out.printf("JSString* (%p) = char16_t * (%p) = ", (void*)this,
352 (void*)chars);
353 dumpChars(chars, length(), out);
354 }
355 } else {
356 out.put("(oom in JSString::dump)");
357 }
358 }
359
dumpRepresentation(js::GenericPrinter & out,int indent) const360 void JSString::dumpRepresentation(js::GenericPrinter& out, int indent) const {
361 if (isRope()) {
362 asRope().dumpRepresentation(out, indent);
363 } else if (isDependent()) {
364 asDependent().dumpRepresentation(out, indent);
365 } else if (isExternal()) {
366 asExternal().dumpRepresentation(out, indent);
367 } else if (isExtensible()) {
368 asExtensible().dumpRepresentation(out, indent);
369 } else if (isInline()) {
370 asInline().dumpRepresentation(out, indent);
371 } else if (isLinear()) {
372 asLinear().dumpRepresentation(out, indent);
373 } else {
374 MOZ_CRASH("Unexpected JSString representation");
375 }
376 }
377
dumpRepresentationHeader(js::GenericPrinter & out,const char * subclass) const378 void JSString::dumpRepresentationHeader(js::GenericPrinter& out,
379 const char* subclass) const {
380 uint32_t flags = JSString::flags();
381 // Print the string's address as an actual C++ expression, to facilitate
382 // copy-and-paste into a debugger.
383 out.printf("((%s*) %p) length: %zu flags: 0x%x", subclass, this, length(),
384 flags);
385 if (flags & LINEAR_BIT) out.put(" LINEAR");
386 if (flags & DEPENDENT_BIT) out.put(" DEPENDENT");
387 if (flags & INLINE_CHARS_BIT) out.put(" INLINE_CHARS");
388 if (flags & ATOM_BIT)
389 out.put(" ATOM");
390 else
391 out.put(" (NON ATOM)");
392 if (isPermanentAtom()) out.put(" PERMANENT");
393 if (flags & LATIN1_CHARS_BIT) out.put(" LATIN1");
394 if (flags & INDEX_VALUE_BIT) out.printf(" INDEX_VALUE(%u)", getIndexValue());
395 if (!isTenured()) out.put(" NURSERY");
396 out.putChar('\n');
397 }
398
dumpRepresentationChars(js::GenericPrinter & out,int indent) const399 void JSLinearString::dumpRepresentationChars(js::GenericPrinter& out,
400 int indent) const {
401 if (hasLatin1Chars()) {
402 out.printf("%*schars: ((Latin1Char*) %p) ", indent, "", rawLatin1Chars());
403 dumpChars(rawLatin1Chars(), length(), out);
404 } else {
405 out.printf("%*schars: ((char16_t*) %p) ", indent, "", rawTwoByteChars());
406 dumpChars(rawTwoByteChars(), length(), out);
407 }
408 out.putChar('\n');
409 }
410
equals(const char * s)411 bool JSString::equals(const char* s) {
412 JSLinearString* linear = ensureLinear(nullptr);
413 if (!linear) {
414 // This is DEBUG-only code.
415 fprintf(stderr, "OOM in JSString::equals!\n");
416 return false;
417 }
418
419 return StringEqualsAscii(linear, s);
420 }
421 #endif /* defined(DEBUG) || defined(JS_JITSPEW) */
422
423 template <typename CharT>
AllocChars(JSString * str,size_t length,CharT ** chars,size_t * capacity)424 static MOZ_ALWAYS_INLINE bool AllocChars(JSString* str, size_t length,
425 CharT** chars, size_t* capacity) {
426 /*
427 * Grow by 12.5% if the buffer is very large. Otherwise, round up to the
428 * next power of 2. This is similar to what we do with arrays; see
429 * JSObject::ensureDenseArrayElements.
430 */
431 static const size_t DOUBLING_MAX = 1024 * 1024;
432 *capacity =
433 length > DOUBLING_MAX ? length + (length / 8) : RoundUpPow2(length);
434
435 static_assert(JSString::MAX_LENGTH * sizeof(CharT) <= UINT32_MAX);
436 *chars =
437 str->zone()->pod_arena_malloc<CharT>(js::StringBufferArena, *capacity);
438 return *chars != nullptr;
439 }
440
copyLatin1Chars(JSContext * maybecx,arena_id_t destArenaId) const441 UniqueLatin1Chars JSRope::copyLatin1Chars(JSContext* maybecx,
442 arena_id_t destArenaId) const {
443 return copyCharsInternal<Latin1Char>(maybecx, destArenaId);
444 }
445
copyTwoByteChars(JSContext * maybecx,arena_id_t destArenaId) const446 UniqueTwoByteChars JSRope::copyTwoByteChars(JSContext* maybecx,
447 arena_id_t destArenaId) const {
448 return copyCharsInternal<char16_t>(maybecx, destArenaId);
449 }
450
451 template <typename CharT>
copyCharsInternal(JSContext * maybecx,arena_id_t destArenaId) const452 UniquePtr<CharT[], JS::FreePolicy> JSRope::copyCharsInternal(
453 JSContext* maybecx, arena_id_t destArenaId) const {
454 // Left-leaning ropes are far more common than right-leaning ropes, so
455 // perform a non-destructive traversal of the rope, right node first,
456 // splatting each node's characters into a contiguous buffer.
457
458 size_t n = length();
459
460 UniquePtr<CharT[], JS::FreePolicy> out;
461 if (maybecx) {
462 out.reset(maybecx->pod_arena_malloc<CharT>(destArenaId, n));
463 } else {
464 out.reset(js_pod_arena_malloc<CharT>(destArenaId, n));
465 }
466
467 if (!out) {
468 return nullptr;
469 }
470
471 Vector<const JSString*, 8, SystemAllocPolicy> nodeStack;
472 const JSString* str = this;
473 CharT* end = out.get() + str->length();
474 while (true) {
475 if (str->isRope()) {
476 if (!nodeStack.append(str->asRope().leftChild())) {
477 if (maybecx) {
478 ReportOutOfMemory(maybecx);
479 }
480 return nullptr;
481 }
482 str = str->asRope().rightChild();
483 } else {
484 end -= str->length();
485 CopyChars(end, str->asLinear());
486 if (nodeStack.empty()) {
487 break;
488 }
489 str = nodeStack.popCopy();
490 }
491 }
492 MOZ_ASSERT(end == out.get());
493
494 return out;
495 }
496
497 template <typename CharT>
AddStringToHash(uint32_t * hash,const CharT * chars,size_t len)498 void AddStringToHash(uint32_t* hash, const CharT* chars, size_t len) {
499 // It's tempting to use |HashString| instead of this loop, but that's
500 // slightly different than our existing implementation for non-ropes. We
501 // want to pretend we have a contiguous set of chars so we need to
502 // accumulate char by char rather than generate a new hash for substring
503 // and then accumulate that.
504 for (size_t i = 0; i < len; i++) {
505 *hash = mozilla::AddToHash(*hash, chars[i]);
506 }
507 }
508
AddStringToHash(uint32_t * hash,const JSString * str)509 void AddStringToHash(uint32_t* hash, const JSString* str) {
510 AutoCheckCannotGC nogc;
511 const auto& s = str->asLinear();
512 if (s.hasLatin1Chars()) {
513 AddStringToHash(hash, s.latin1Chars(nogc), s.length());
514 } else {
515 AddStringToHash(hash, s.twoByteChars(nogc), s.length());
516 }
517 }
518
hash(uint32_t * outHash) const519 bool JSRope::hash(uint32_t* outHash) const {
520 Vector<const JSString*, 8, SystemAllocPolicy> nodeStack;
521 const JSString* str = this;
522
523 *outHash = 0;
524
525 while (true) {
526 if (str->isRope()) {
527 if (!nodeStack.append(str->asRope().rightChild())) {
528 return false;
529 }
530 str = str->asRope().leftChild();
531 } else {
532 AddStringToHash(outHash, str);
533 if (nodeStack.empty()) {
534 break;
535 }
536 str = nodeStack.popCopy();
537 }
538 }
539
540 return true;
541 }
542
543 #if defined(DEBUG) || defined(JS_JITSPEW)
dumpRepresentation(js::GenericPrinter & out,int indent) const544 void JSRope::dumpRepresentation(js::GenericPrinter& out, int indent) const {
545 dumpRepresentationHeader(out, "JSRope");
546 indent += 2;
547
548 out.printf("%*sleft: ", indent, "");
549 leftChild()->dumpRepresentation(out, indent);
550
551 out.printf("%*sright: ", indent, "");
552 rightChild()->dumpRepresentation(out, indent);
553 }
554 #endif
555
556 namespace js {
557
558 template <>
CopyChars(char16_t * dest,const JSLinearString & str)559 void CopyChars(char16_t* dest, const JSLinearString& str) {
560 AutoCheckCannotGC nogc;
561 if (str.hasTwoByteChars()) {
562 PodCopy(dest, str.twoByteChars(nogc), str.length());
563 } else {
564 CopyAndInflateChars(dest, str.latin1Chars(nogc), str.length());
565 }
566 }
567
568 template <>
CopyChars(Latin1Char * dest,const JSLinearString & str)569 void CopyChars(Latin1Char* dest, const JSLinearString& str) {
570 AutoCheckCannotGC nogc;
571 if (str.hasLatin1Chars()) {
572 PodCopy(dest, str.latin1Chars(nogc), str.length());
573 } else {
574 /*
575 * When we flatten a TwoByte rope, we turn child ropes (including Latin1
576 * ropes) into TwoByte dependent strings. If one of these strings is
577 * also part of another Latin1 rope tree, we can have a Latin1 rope with
578 * a TwoByte descendent and we end up here when we flatten it. Although
579 * the chars are stored as TwoByte, we know they must be in the Latin1
580 * range, so we can safely deflate here.
581 */
582 size_t len = str.length();
583 const char16_t* chars = str.twoByteChars(nogc);
584 auto src = Span(chars, len);
585 MOZ_ASSERT(IsUtf16Latin1(src));
586 LossyConvertUtf16toLatin1(src, AsWritableChars(Span(dest, len)));
587 }
588 }
589
590 } /* namespace js */
591
592 template <typename CharT>
StringFlagsForCharType(uint32_t baseFlags)593 static constexpr uint32_t StringFlagsForCharType(uint32_t baseFlags) {
594 if constexpr (std::is_same_v<CharT, char16_t>) {
595 return baseFlags;
596 }
597
598 return baseFlags | JSString::LATIN1_CHARS_BIT;
599 }
600
UpdateNurseryBuffersOnTransfer(js::Nursery & nursery,JSString * from,JSString * to,void * buffer,size_t size)601 static bool UpdateNurseryBuffersOnTransfer(js::Nursery& nursery, JSString* from,
602 JSString* to, void* buffer,
603 size_t size) {
604 // Update the list of buffers associated with nursery cells when |buffer| is
605 // moved from string |from| to string |to|, depending on whether those strings
606 // are in the nursery or not.
607
608 if (from->isTenured() && !to->isTenured()) {
609 // Tenured leftmost child is giving its chars buffer to the
610 // nursery-allocated root node.
611 if (!nursery.registerMallocedBuffer(buffer, size)) {
612 return false;
613 }
614 } else if (!from->isTenured() && to->isTenured()) {
615 // Leftmost child is giving its nursery-held chars buffer to a
616 // tenured string.
617 nursery.removeMallocedBuffer(buffer, size);
618 }
619
620 return true;
621 }
622
CanReuseLeftmostBuffer(JSString * leftmostChild,size_t wholeLength,bool hasTwoByteChars)623 static bool CanReuseLeftmostBuffer(JSString* leftmostChild, size_t wholeLength,
624 bool hasTwoByteChars) {
625 if (!leftmostChild->isExtensible()) {
626 return false;
627 }
628
629 JSExtensibleString& str = leftmostChild->asExtensible();
630 return str.capacity() >= wholeLength &&
631 str.hasTwoByteChars() == hasTwoByteChars;
632 }
633
flatten(JSContext * maybecx)634 JSLinearString* JSRope::flatten(JSContext* maybecx) {
635 mozilla::Maybe<AutoGeckoProfilerEntry> entry;
636 if (maybecx && !maybecx->isHelperThreadContext()) {
637 entry.emplace(maybecx, "JSRope::flatten");
638 }
639
640 JSLinearString* str = flattenInternal();
641 if (!str && maybecx) {
642 ReportOutOfMemory(maybecx);
643 }
644
645 return str;
646 }
647
flattenInternal()648 JSLinearString* JSRope::flattenInternal() {
649 if (zone()->needsIncrementalBarrier()) {
650 return flattenInternal<WithIncrementalBarrier>();
651 }
652
653 return flattenInternal<NoBarrier>();
654 }
655
656 template <JSRope::UsingBarrier usingBarrier>
flattenInternal()657 JSLinearString* JSRope::flattenInternal() {
658 if (hasTwoByteChars()) {
659 return flattenInternal<usingBarrier, char16_t>(this);
660 }
661
662 return flattenInternal<usingBarrier, Latin1Char>(this);
663 }
664
665 template <JSRope::UsingBarrier usingBarrier, typename CharT>
666 /* static */
flattenInternal(JSRope * root)667 JSLinearString* JSRope::flattenInternal(JSRope* root) {
668 /*
669 * Consider the DAG of JSRopes rooted at |root|, with non-JSRopes as
670 * its leaves. Mutate the root JSRope into a JSExtensibleString containing
671 * the full flattened text that the root represents, and mutate all other
672 * JSRopes in the interior of the DAG into JSDependentStrings that refer to
673 * this new JSExtensibleString.
674 *
675 * If the leftmost leaf of our DAG is a JSExtensibleString, consider
676 * stealing its buffer for use in our new root, and transforming it into a
677 * JSDependentString too. Do not mutate any of the other leaves.
678 *
679 * Perform a depth-first dag traversal, splatting each node's characters
680 * into a contiguous buffer. Visit each rope node three times:
681 * 1. record position in the buffer and recurse into left child;
682 * 2. recurse into the right child;
683 * 3. transform the node into a dependent string.
684 * To avoid maintaining a stack, tree nodes are mutated to indicate how many
685 * times they have been visited. Since ropes can be dags, a node may be
686 * encountered multiple times during traversal. However, step 3 above leaves
687 * a valid dependent string, so everything works out.
688 *
689 * While ropes avoid all sorts of quadratic cases with string concatenation,
690 * they can't help when ropes are immediately flattened. One idiomatic case
691 * that we'd like to keep linear (and has traditionally been linear in SM
692 * and other JS engines) is:
693 *
694 * while (...) {
695 * s += ...
696 * s.flatten
697 * }
698 *
699 * Two behaviors accomplish this:
700 *
701 * - When the leftmost non-rope in the DAG we're flattening is a
702 * JSExtensibleString with sufficient capacity to hold the entire
703 * flattened string, we just flatten the DAG into its buffer. Then, when
704 * we transform the root of the DAG from a JSRope into a
705 * JSExtensibleString, we steal that buffer, and change the victim from a
706 * JSExtensibleString to a JSDependentString. In this case, the left-hand
707 * side of the string never needs to be copied.
708 *
709 * - Otherwise, we round up the total flattened size and create a fresh
710 * JSExtensibleString with that much capacity. If this in turn becomes the
711 * leftmost leaf of a subsequent flatten, we will hopefully be able to
712 * fill it, as in the case above.
713 *
714 * Note that, even though the code for creating JSDependentStrings avoids
715 * creating dependents of dependents, we can create that situation here: the
716 * JSExtensibleStrings we transform into JSDependentStrings might have
717 * JSDependentStrings pointing to them already. Stealing the buffer doesn't
718 * change its address, only its owning JSExtensibleString, so all chars()
719 * pointers in the JSDependentStrings are still valid.
720 */
721 const size_t wholeLength = root->length();
722 size_t wholeCapacity;
723 CharT* wholeChars;
724
725 AutoCheckCannotGC nogc;
726
727 Nursery& nursery = root->runtimeFromMainThread()->gc.nursery();
728
729 /* Find the left most string, containing the first string. */
730 JSRope* leftmostRope = root;
731 while (leftmostRope->leftChild()->isRope()) {
732 leftmostRope = &leftmostRope->leftChild()->asRope();
733 }
734 JSString* leftmostChild = leftmostRope->leftChild();
735
736 bool reuseLeftmostBuffer = CanReuseLeftmostBuffer(
737 leftmostChild, wholeLength, std::is_same_v<CharT, char16_t>);
738
739 if (reuseLeftmostBuffer) {
740 JSExtensibleString& left = leftmostChild->asExtensible();
741 wholeCapacity = left.capacity();
742 wholeChars = const_cast<CharT*>(left.nonInlineChars<CharT>(nogc));
743
744 // Nursery::registerMallocedBuffer is fallible, so attempt it first before
745 // doing anything irreversible.
746 if (!UpdateNurseryBuffersOnTransfer(nursery, &left, root, wholeChars,
747 wholeCapacity * sizeof(CharT))) {
748 return nullptr;
749 }
750 } else {
751 // If we can't reuse the leftmost child's buffer, allocate a new one.
752 if (!AllocChars(root, wholeLength, &wholeChars, &wholeCapacity)) {
753 return nullptr;
754 }
755
756 if (!root->isTenured()) {
757 if (!nursery.registerMallocedBuffer(wholeChars,
758 wholeCapacity * sizeof(CharT))) {
759 js_free(wholeChars);
760 return nullptr;
761 }
762 }
763 }
764
765 JSRope* str = root;
766 CharT* pos = wholeChars;
767
768 JSRope* parent = nullptr;
769 uint32_t parentFlag = 0;
770
771 first_visit_node : {
772 MOZ_ASSERT_IF(str != root, parent && parentFlag);
773 MOZ_ASSERT(!str->asRope().isBeingFlattened());
774
775 ropeBarrierDuringFlattening<usingBarrier>(str);
776
777 JSString& left = *str->d.s.u2.left;
778 str->d.s.u2.parent = parent;
779 str->setFlagBit(parentFlag);
780 parent = nullptr;
781 parentFlag = 0;
782
783 if (left.isRope()) {
784 /* Return to this node when 'left' done, then goto visit_right_child. */
785 parent = str;
786 parentFlag = FLATTEN_VISIT_RIGHT;
787 str = &left.asRope();
788 goto first_visit_node;
789 }
790 if (!(reuseLeftmostBuffer && &left == leftmostChild)) {
791 CopyChars(pos, left.asLinear());
792 }
793 pos += left.length();
794 }
795
796 visit_right_child : {
797 JSString& right = *str->d.s.u3.right;
798 if (right.isRope()) {
799 /* Return to this node when 'right' done, then goto finish_node. */
800 parent = str;
801 parentFlag = FLATTEN_FINISH_NODE;
802 str = &right.asRope();
803 goto first_visit_node;
804 }
805 CopyChars(pos, right.asLinear());
806 pos += right.length();
807 }
808
809 finish_node : {
810 if (str == root) {
811 goto finish_root;
812 }
813
814 MOZ_ASSERT(pos >= wholeChars);
815 CharT* chars = pos - str->length();
816 JSRope* strParent = str->d.s.u2.parent;
817 str->setNonInlineChars(chars);
818
819 MOZ_ASSERT(str->asRope().isBeingFlattened());
820 mozilla::DebugOnly<bool> visitRight = str->flags() & FLATTEN_VISIT_RIGHT;
821 bool finishNode = str->flags() & FLATTEN_FINISH_NODE;
822 MOZ_ASSERT(visitRight != finishNode);
823
824 // This also clears the flags related to flattening.
825 str->setLengthAndFlags(str->length(),
826 StringFlagsForCharType<CharT>(INIT_DEPENDENT_FLAGS));
827 str->d.s.u3.base =
828 reinterpret_cast<JSLinearString*>(root); /* will be true on exit */
829
830 // Every interior (rope) node in the rope's tree will be visited during
831 // the traversal and post-barriered here, so earlier additions of
832 // dependent.base -> root pointers are handled by this barrier as well.
833 //
834 // The only time post-barriers need do anything is when the root is in
835 // the nursery. Note that the root was a rope but will be an extensible
836 // string when we return, so it will not point to any strings and need
837 // not be barriered.
838 if (str->isTenured() && !root->isTenured()) {
839 root->storeBuffer()->putWholeCell(str);
840 }
841
842 str = strParent;
843 if (finishNode) {
844 goto finish_node;
845 }
846 MOZ_ASSERT(visitRight);
847 goto visit_right_child;
848 }
849
850 finish_root:
851 // We traversed all the way back up to the root so we're finished.
852 MOZ_ASSERT(str == root);
853 MOZ_ASSERT(pos == wholeChars + wholeLength);
854
855 root->setLengthAndFlags(wholeLength,
856 StringFlagsForCharType<CharT>(EXTENSIBLE_FLAGS));
857 root->setNonInlineChars(wholeChars);
858 root->d.s.u3.capacity = wholeCapacity;
859 AddCellMemory(root, root->asLinear().allocSize(), MemoryUse::StringContents);
860
861 if (reuseLeftmostBuffer) {
862 // Remove memory association for left node we're about to make into a
863 // dependent string.
864 JSString& left = *leftmostChild;
865 RemoveCellMemory(&left, left.allocSize(), MemoryUse::StringContents);
866
867 uint32_t flags = INIT_DEPENDENT_FLAGS;
868 if (left.inStringToAtomCache()) {
869 flags |= IN_STRING_TO_ATOM_CACHE;
870 }
871 left.setLengthAndFlags(left.length(), StringFlagsForCharType<CharT>(flags));
872 left.d.s.u3.base = &root->asLinear();
873 if (left.isTenured() && !root->isTenured()) {
874 // leftmost child -> root is a tenured -> nursery edge.
875 root->storeBuffer()->putWholeCell(&left);
876 }
877 }
878
879 return &root->asLinear();
880 }
881
882 template <JSRope::UsingBarrier usingBarrier>
883 /* static */
ropeBarrierDuringFlattening(JSRope * rope)884 inline void JSRope::ropeBarrierDuringFlattening(JSRope* rope) {
885 MOZ_ASSERT(!rope->isBeingFlattened());
886 if constexpr (usingBarrier) {
887 gc::PreWriteBarrierDuringFlattening(rope->leftChild());
888 gc::PreWriteBarrierDuringFlattening(rope->rightChild());
889 }
890 }
891
892 template <AllowGC allowGC>
EnsureLinear(JSContext * cx,typename MaybeRooted<JSString *,allowGC>::HandleType string)893 static JSLinearString* EnsureLinear(
894 JSContext* cx,
895 typename MaybeRooted<JSString*, allowGC>::HandleType string) {
896 JSLinearString* linear = string->ensureLinear(cx);
897 // Don't report an exception if GC is not allowed, just return nullptr.
898 if (!linear && !allowGC) {
899 cx->recoverFromOutOfMemory();
900 }
901 return linear;
902 }
903
904 template <AllowGC allowGC>
ConcatStrings(JSContext * cx,typename MaybeRooted<JSString *,allowGC>::HandleType left,typename MaybeRooted<JSString *,allowGC>::HandleType right,gc::InitialHeap heap)905 JSString* js::ConcatStrings(
906 JSContext* cx, typename MaybeRooted<JSString*, allowGC>::HandleType left,
907 typename MaybeRooted<JSString*, allowGC>::HandleType right,
908 gc::InitialHeap heap) {
909 MOZ_ASSERT_IF(!left->isAtom(), cx->isInsideCurrentZone(left));
910 MOZ_ASSERT_IF(!right->isAtom(), cx->isInsideCurrentZone(right));
911
912 size_t leftLen = left->length();
913 if (leftLen == 0) {
914 return right;
915 }
916
917 size_t rightLen = right->length();
918 if (rightLen == 0) {
919 return left;
920 }
921
922 size_t wholeLength = leftLen + rightLen;
923 if (MOZ_UNLIKELY(wholeLength > JSString::MAX_LENGTH)) {
924 // Don't report an exception if GC is not allowed, just return nullptr.
925 if (allowGC) {
926 js::ReportAllocationOverflow(cx);
927 }
928 return nullptr;
929 }
930
931 bool isLatin1 = left->hasLatin1Chars() && right->hasLatin1Chars();
932 bool canUseInline = isLatin1
933 ? JSInlineString::lengthFits<Latin1Char>(wholeLength)
934 : JSInlineString::lengthFits<char16_t>(wholeLength);
935 if (canUseInline) {
936 Latin1Char* latin1Buf = nullptr; // initialize to silence GCC warning
937 char16_t* twoByteBuf = nullptr; // initialize to silence GCC warning
938 JSInlineString* str =
939 isLatin1
940 ? AllocateInlineString<allowGC>(cx, wholeLength, &latin1Buf, heap)
941 : AllocateInlineString<allowGC>(cx, wholeLength, &twoByteBuf, heap);
942 if (!str) {
943 return nullptr;
944 }
945
946 AutoCheckCannotGC nogc;
947 JSLinearString* leftLinear = EnsureLinear<allowGC>(cx, left);
948 if (!leftLinear) {
949 return nullptr;
950 }
951 JSLinearString* rightLinear = EnsureLinear<allowGC>(cx, right);
952 if (!rightLinear) {
953 return nullptr;
954 }
955
956 if (isLatin1) {
957 PodCopy(latin1Buf, leftLinear->latin1Chars(nogc), leftLen);
958 PodCopy(latin1Buf + leftLen, rightLinear->latin1Chars(nogc), rightLen);
959 } else {
960 if (leftLinear->hasTwoByteChars()) {
961 PodCopy(twoByteBuf, leftLinear->twoByteChars(nogc), leftLen);
962 } else {
963 CopyAndInflateChars(twoByteBuf, leftLinear->latin1Chars(nogc), leftLen);
964 }
965 if (rightLinear->hasTwoByteChars()) {
966 PodCopy(twoByteBuf + leftLen, rightLinear->twoByteChars(nogc),
967 rightLen);
968 } else {
969 CopyAndInflateChars(twoByteBuf + leftLen,
970 rightLinear->latin1Chars(nogc), rightLen);
971 }
972 }
973
974 return str;
975 }
976
977 return JSRope::new_<allowGC>(cx, left, right, wholeLength, heap);
978 }
979
980 template JSString* js::ConcatStrings<CanGC>(JSContext* cx, HandleString left,
981 HandleString right,
982 gc::InitialHeap heap);
983
984 template JSString* js::ConcatStrings<NoGC>(JSContext* cx, JSString* const& left,
985 JSString* const& right,
986 gc::InitialHeap heap);
987
988 /**
989 * Copy |src[0..length]| to |dest[0..length]| when copying doesn't narrow and
990 * therefore can't lose information.
991 */
FillChars(char16_t * dest,const unsigned char * src,size_t length)992 static inline void FillChars(char16_t* dest, const unsigned char* src,
993 size_t length) {
994 ConvertLatin1toUtf16(AsChars(Span(src, length)), Span(dest, length));
995 }
996
FillChars(char16_t * dest,const char16_t * src,size_t length)997 static inline void FillChars(char16_t* dest, const char16_t* src,
998 size_t length) {
999 PodCopy(dest, src, length);
1000 }
1001
FillChars(unsigned char * dest,const unsigned char * src,size_t length)1002 static inline void FillChars(unsigned char* dest, const unsigned char* src,
1003 size_t length) {
1004 PodCopy(dest, src, length);
1005 }
1006
FillChars(char16_t * dest,LittleEndianChars src,size_t length)1007 static inline void FillChars(char16_t* dest, LittleEndianChars src,
1008 size_t length) {
1009 #if MOZ_LITTLE_ENDIAN()
1010 memcpy(dest, src.get(), length * sizeof(char16_t));
1011 #else
1012 for (size_t i = 0; i < length; ++i) {
1013 dest[i] = src[i];
1014 }
1015 #endif
1016 }
1017
1018 /**
1019 * Copy |src[0..length]| to |dest[0..length]| when copying *does* narrow, but
1020 * the user guarantees every runtime |src[i]| value can be stored without change
1021 * of value in |dest[i]|.
1022 */
FillFromCompatible(unsigned char * dest,const char16_t * src,size_t length)1023 static inline void FillFromCompatible(unsigned char* dest, const char16_t* src,
1024 size_t length) {
1025 LossyConvertUtf16toLatin1(Span(src, length),
1026 AsWritableChars(Span(dest, length)));
1027 }
1028
FillFromCompatible(unsigned char * dest,LittleEndianChars src,size_t length)1029 static inline void FillFromCompatible(unsigned char* dest,
1030 LittleEndianChars src, size_t length) {
1031 for (size_t i = 0; i < length; ++i) {
1032 dest[i] = AssertedCast<unsigned char>(src[i]);
1033 }
1034 }
1035
1036 #if defined(DEBUG) || defined(JS_JITSPEW)
dumpRepresentation(js::GenericPrinter & out,int indent) const1037 void JSDependentString::dumpRepresentation(js::GenericPrinter& out,
1038 int indent) const {
1039 dumpRepresentationHeader(out, "JSDependentString");
1040 indent += 2;
1041 out.printf("%*soffset: %zu\n", indent, "", baseOffset());
1042 out.printf("%*sbase: ", indent, "");
1043 base()->dumpRepresentation(out, indent);
1044 }
1045 #endif
1046
EqualChars(JSLinearString * str1,JSLinearString * str2)1047 bool js::EqualChars(JSLinearString* str1, JSLinearString* str2) {
1048 MOZ_ASSERT(str1->length() == str2->length());
1049
1050 size_t len = str1->length();
1051
1052 AutoCheckCannotGC nogc;
1053 if (str1->hasTwoByteChars()) {
1054 if (str2->hasTwoByteChars()) {
1055 return ArrayEqual(str1->twoByteChars(nogc), str2->twoByteChars(nogc),
1056 len);
1057 }
1058
1059 return EqualChars(str2->latin1Chars(nogc), str1->twoByteChars(nogc), len);
1060 }
1061
1062 if (str2->hasLatin1Chars()) {
1063 return ArrayEqual(str1->latin1Chars(nogc), str2->latin1Chars(nogc), len);
1064 }
1065
1066 return EqualChars(str1->latin1Chars(nogc), str2->twoByteChars(nogc), len);
1067 }
1068
HasSubstringAt(JSLinearString * text,JSLinearString * pat,size_t start)1069 bool js::HasSubstringAt(JSLinearString* text, JSLinearString* pat,
1070 size_t start) {
1071 MOZ_ASSERT(start + pat->length() <= text->length());
1072
1073 size_t patLen = pat->length();
1074
1075 AutoCheckCannotGC nogc;
1076 if (text->hasLatin1Chars()) {
1077 const Latin1Char* textChars = text->latin1Chars(nogc) + start;
1078 if (pat->hasLatin1Chars()) {
1079 return ArrayEqual(textChars, pat->latin1Chars(nogc), patLen);
1080 }
1081
1082 return EqualChars(textChars, pat->twoByteChars(nogc), patLen);
1083 }
1084
1085 const char16_t* textChars = text->twoByteChars(nogc) + start;
1086 if (pat->hasTwoByteChars()) {
1087 return ArrayEqual(textChars, pat->twoByteChars(nogc), patLen);
1088 }
1089
1090 return EqualChars(pat->latin1Chars(nogc), textChars, patLen);
1091 }
1092
EqualStrings(JSContext * cx,JSString * str1,JSString * str2,bool * result)1093 bool js::EqualStrings(JSContext* cx, JSString* str1, JSString* str2,
1094 bool* result) {
1095 if (str1 == str2) {
1096 *result = true;
1097 return true;
1098 }
1099
1100 size_t length1 = str1->length();
1101 if (length1 != str2->length()) {
1102 *result = false;
1103 return true;
1104 }
1105
1106 JSLinearString* linear1 = str1->ensureLinear(cx);
1107 if (!linear1) {
1108 return false;
1109 }
1110 JSLinearString* linear2 = str2->ensureLinear(cx);
1111 if (!linear2) {
1112 return false;
1113 }
1114
1115 *result = EqualChars(linear1, linear2);
1116 return true;
1117 }
1118
EqualStrings(JSLinearString * str1,JSLinearString * str2)1119 bool js::EqualStrings(JSLinearString* str1, JSLinearString* str2) {
1120 if (str1 == str2) {
1121 return true;
1122 }
1123
1124 size_t length1 = str1->length();
1125 if (length1 != str2->length()) {
1126 return false;
1127 }
1128
1129 return EqualChars(str1, str2);
1130 }
1131
CompareChars(const char16_t * s1,size_t len1,JSLinearString * s2)1132 int32_t js::CompareChars(const char16_t* s1, size_t len1, JSLinearString* s2) {
1133 AutoCheckCannotGC nogc;
1134 return s2->hasLatin1Chars()
1135 ? CompareChars(s1, len1, s2->latin1Chars(nogc), s2->length())
1136 : CompareChars(s1, len1, s2->twoByteChars(nogc), s2->length());
1137 }
1138
CompareStringsImpl(JSLinearString * str1,JSLinearString * str2)1139 static int32_t CompareStringsImpl(JSLinearString* str1, JSLinearString* str2) {
1140 size_t len1 = str1->length();
1141 size_t len2 = str2->length();
1142
1143 AutoCheckCannotGC nogc;
1144 if (str1->hasLatin1Chars()) {
1145 const Latin1Char* chars1 = str1->latin1Chars(nogc);
1146 return str2->hasLatin1Chars()
1147 ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2)
1148 : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2);
1149 }
1150
1151 const char16_t* chars1 = str1->twoByteChars(nogc);
1152 return str2->hasLatin1Chars()
1153 ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2)
1154 : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2);
1155 }
1156
CompareStrings(JSContext * cx,JSString * str1,JSString * str2,int32_t * result)1157 bool js::CompareStrings(JSContext* cx, JSString* str1, JSString* str2,
1158 int32_t* result) {
1159 MOZ_ASSERT(str1);
1160 MOZ_ASSERT(str2);
1161
1162 if (str1 == str2) {
1163 *result = 0;
1164 return true;
1165 }
1166
1167 JSLinearString* linear1 = str1->ensureLinear(cx);
1168 if (!linear1) {
1169 return false;
1170 }
1171
1172 JSLinearString* linear2 = str2->ensureLinear(cx);
1173 if (!linear2) {
1174 return false;
1175 }
1176
1177 *result = CompareStringsImpl(linear1, linear2);
1178 return true;
1179 }
1180
CompareAtoms(JSAtom * atom1,JSAtom * atom2)1181 int32_t js::CompareAtoms(JSAtom* atom1, JSAtom* atom2) {
1182 return CompareStringsImpl(atom1, atom2);
1183 }
1184
StringIsAscii(JSLinearString * str)1185 bool js::StringIsAscii(JSLinearString* str) {
1186 JS::AutoCheckCannotGC nogc;
1187 if (str->hasLatin1Chars()) {
1188 return mozilla::IsAscii(
1189 AsChars(Span(str->latin1Chars(nogc), str->length())));
1190 }
1191 return mozilla::IsAscii(Span(str->twoByteChars(nogc), str->length()));
1192 }
1193
StringEqualsAscii(JSLinearString * str,const char * asciiBytes)1194 bool js::StringEqualsAscii(JSLinearString* str, const char* asciiBytes) {
1195 return StringEqualsAscii(str, asciiBytes, strlen(asciiBytes));
1196 }
1197
StringEqualsAscii(JSLinearString * str,const char * asciiBytes,size_t length)1198 bool js::StringEqualsAscii(JSLinearString* str, const char* asciiBytes,
1199 size_t length) {
1200 MOZ_ASSERT(JS::StringIsASCII(Span(asciiBytes, length)));
1201
1202 if (length != str->length()) {
1203 return false;
1204 }
1205
1206 const Latin1Char* latin1 = reinterpret_cast<const Latin1Char*>(asciiBytes);
1207
1208 AutoCheckCannotGC nogc;
1209 return str->hasLatin1Chars()
1210 ? ArrayEqual(latin1, str->latin1Chars(nogc), length)
1211 : EqualChars(latin1, str->twoByteChars(nogc), length);
1212 }
1213
1214 template <typename CharT>
CheckStringIsIndex(const CharT * s,size_t length,uint32_t * indexp)1215 bool js::CheckStringIsIndex(const CharT* s, size_t length, uint32_t* indexp) {
1216 MOZ_ASSERT(length > 0);
1217 MOZ_ASSERT(length <= UINT32_CHAR_BUFFER_LENGTH);
1218 MOZ_ASSERT(IsAsciiDigit(*s),
1219 "caller's fast path must have checked first char");
1220
1221 RangedPtr<const CharT> cp(s, length);
1222 const RangedPtr<const CharT> end(s + length, s, length);
1223
1224 uint32_t index = AsciiDigitToNumber(*cp++);
1225 uint32_t oldIndex = 0;
1226 uint32_t c = 0;
1227
1228 if (index != 0) {
1229 // Consume remaining characters only if the first character isn't '0'.
1230 while (cp < end && IsAsciiDigit(*cp)) {
1231 oldIndex = index;
1232 c = AsciiDigitToNumber(*cp);
1233 index = 10 * index + c;
1234 cp++;
1235 }
1236 }
1237
1238 // It's not an integer index if there are characters after the number.
1239 if (cp != end) {
1240 return false;
1241 }
1242
1243 // Look out for "4294967295" and larger-number strings that fit in
1244 // UINT32_CHAR_BUFFER_LENGTH: only unsigned 32-bit integers less than or equal
1245 // to MAX_ARRAY_INDEX shall pass.
1246 if (oldIndex < MAX_ARRAY_INDEX / 10 ||
1247 (oldIndex == MAX_ARRAY_INDEX / 10 && c <= (MAX_ARRAY_INDEX % 10))) {
1248 MOZ_ASSERT(index <= MAX_ARRAY_INDEX);
1249 *indexp = index;
1250 return true;
1251 }
1252
1253 return false;
1254 }
1255
1256 template bool js::CheckStringIsIndex(const Latin1Char* s, size_t length,
1257 uint32_t* indexp);
1258 template bool js::CheckStringIsIndex(const char16_t* s, size_t length,
1259 uint32_t* indexp);
1260
1261 template <typename CharT>
AtomCharsToIndex(const CharT * s,size_t length)1262 static uint32_t AtomCharsToIndex(const CharT* s, size_t length) {
1263 // Chars are known to be a valid index value (as determined by
1264 // CheckStringIsIndex) that didn't fit in the "index value" bits in the
1265 // header.
1266
1267 MOZ_ASSERT(length > 0);
1268 MOZ_ASSERT(length <= UINT32_CHAR_BUFFER_LENGTH);
1269
1270 RangedPtr<const CharT> cp(s, length);
1271 const RangedPtr<const CharT> end(s + length, s, length);
1272
1273 MOZ_ASSERT(IsAsciiDigit(*cp));
1274 uint32_t index = AsciiDigitToNumber(*cp++);
1275 MOZ_ASSERT(index != 0);
1276
1277 while (cp < end) {
1278 MOZ_ASSERT(IsAsciiDigit(*cp));
1279 index = 10 * index + AsciiDigitToNumber(*cp);
1280 cp++;
1281 }
1282
1283 MOZ_ASSERT(index <= MAX_ARRAY_INDEX);
1284 return index;
1285 }
1286
getIndexSlow() const1287 uint32_t JSAtom::getIndexSlow() const {
1288 MOZ_ASSERT(isIndex());
1289 MOZ_ASSERT(!hasIndexValue());
1290
1291 size_t len = length();
1292
1293 AutoCheckCannotGC nogc;
1294 return hasLatin1Chars() ? AtomCharsToIndex(latin1Chars(nogc), len)
1295 : AtomCharsToIndex(twoByteChars(nogc), len);
1296 }
1297
createSmallCharTable()1298 constexpr StaticStrings::SmallCharTable StaticStrings::createSmallCharTable() {
1299 SmallCharTable array{};
1300 for (size_t i = 0; i < SMALL_CHAR_TABLE_SIZE; i++) {
1301 array[i] = toSmallChar(i);
1302 }
1303 return array;
1304 }
1305
1306 const StaticStrings::SmallCharTable StaticStrings::toSmallCharTable =
1307 createSmallCharTable();
1308
init(JSContext * cx)1309 bool StaticStrings::init(JSContext* cx) {
1310 AutoAllocInAtomsZone az(cx);
1311
1312 static_assert(UNIT_STATIC_LIMIT - 1 <= JSString::MAX_LATIN1_CHAR,
1313 "Unit strings must fit in Latin1Char.");
1314
1315 using Latin1Range = mozilla::Range<const Latin1Char>;
1316
1317 for (uint32_t i = 0; i < UNIT_STATIC_LIMIT; i++) {
1318 Latin1Char ch = Latin1Char(i);
1319 JSLinearString* s =
1320 NewInlineString<NoGC>(cx, Latin1Range(&ch, 1), gc::TenuredHeap);
1321 if (!s) {
1322 return false;
1323 }
1324 HashNumber hash = mozilla::HashString(&ch, 1);
1325 unitStaticTable[i] = s->morphAtomizedStringIntoPermanentAtom(hash);
1326 }
1327
1328 for (uint32_t i = 0; i < NUM_LENGTH2_ENTRIES; i++) {
1329 Latin1Char buffer[] = {firstCharOfLength2(i), secondCharOfLength2(i)};
1330 JSLinearString* s =
1331 NewInlineString<NoGC>(cx, Latin1Range(buffer, 2), gc::TenuredHeap);
1332 if (!s) {
1333 return false;
1334 }
1335 HashNumber hash = mozilla::HashString(buffer, 2);
1336 length2StaticTable[i] = s->morphAtomizedStringIntoPermanentAtom(hash);
1337 }
1338
1339 for (uint32_t i = 0; i < INT_STATIC_LIMIT; i++) {
1340 if (i < 10) {
1341 intStaticTable[i] = unitStaticTable[i + '0'];
1342 } else if (i < 100) {
1343 auto index =
1344 getLength2IndexStatic(char(i / 10) + '0', char(i % 10) + '0');
1345 intStaticTable[i] = length2StaticTable[index];
1346 } else {
1347 Latin1Char buffer[] = {Latin1Char('0' + (i / 100)),
1348 Latin1Char('0' + ((i / 10) % 10)),
1349 Latin1Char('0' + (i % 10))};
1350 JSLinearString* s =
1351 NewInlineString<NoGC>(cx, Latin1Range(buffer, 3), gc::TenuredHeap);
1352 if (!s) {
1353 return false;
1354 }
1355 HashNumber hash = mozilla::HashString(buffer, 3);
1356 intStaticTable[i] = s->morphAtomizedStringIntoPermanentAtom(hash);
1357 }
1358
1359 // Static string initialization can not race, so allow even without the
1360 // lock.
1361 intStaticTable[i]->setIsIndex(i);
1362 }
1363
1364 return true;
1365 }
1366
TraceStaticString(JSTracer * trc,JSAtom * atom,const char * name)1367 inline void TraceStaticString(JSTracer* trc, JSAtom* atom, const char* name) {
1368 MOZ_ASSERT(atom->isPermanentAtom());
1369 TraceProcessGlobalRoot(trc, atom, name);
1370 }
1371
trace(JSTracer * trc)1372 void StaticStrings::trace(JSTracer* trc) {
1373 /* These strings never change, so barriers are not needed. */
1374
1375 for (auto& s : unitStaticTable) {
1376 TraceStaticString(trc, s, "unit-static-string");
1377 }
1378
1379 for (auto& s : length2StaticTable) {
1380 TraceStaticString(trc, s, "length2-static-string");
1381 }
1382
1383 /* This may mark some strings more than once, but so be it. */
1384 for (auto& s : intStaticTable) {
1385 TraceStaticString(trc, s, "int-static-string");
1386 }
1387 }
1388
MarkStringAndBasesNonDeduplicatable(JSLinearString * s)1389 static void MarkStringAndBasesNonDeduplicatable(JSLinearString* s) {
1390 while (true) {
1391 if (!s->isTenured()) {
1392 s->setNonDeduplicatable();
1393 }
1394 if (!s->hasBase()) {
1395 break;
1396 }
1397 s = s->base();
1398 }
1399 }
1400
init(JSContext * cx,JSString * s)1401 bool AutoStableStringChars::init(JSContext* cx, JSString* s) {
1402 RootedLinearString linearString(cx, s->ensureLinear(cx));
1403 if (!linearString) {
1404 return false;
1405 }
1406
1407 MOZ_ASSERT(state_ == Uninitialized);
1408
1409 // If the chars are inline then we need to copy them since they may be moved
1410 // by a compacting GC.
1411 if (baseIsInline(linearString)) {
1412 return linearString->hasTwoByteChars() ? copyTwoByteChars(cx, linearString)
1413 : copyLatin1Chars(cx, linearString);
1414 }
1415
1416 if (linearString->hasLatin1Chars()) {
1417 state_ = Latin1;
1418 latin1Chars_ = linearString->rawLatin1Chars();
1419 } else {
1420 state_ = TwoByte;
1421 twoByteChars_ = linearString->rawTwoByteChars();
1422 }
1423
1424 MarkStringAndBasesNonDeduplicatable(linearString);
1425
1426 s_ = linearString;
1427 return true;
1428 }
1429
initTwoByte(JSContext * cx,JSString * s)1430 bool AutoStableStringChars::initTwoByte(JSContext* cx, JSString* s) {
1431 RootedLinearString linearString(cx, s->ensureLinear(cx));
1432 if (!linearString) {
1433 return false;
1434 }
1435
1436 MOZ_ASSERT(state_ == Uninitialized);
1437
1438 if (linearString->hasLatin1Chars()) {
1439 return copyAndInflateLatin1Chars(cx, linearString);
1440 }
1441
1442 // If the chars are inline then we need to copy them since they may be moved
1443 // by a compacting GC.
1444 if (baseIsInline(linearString)) {
1445 return copyTwoByteChars(cx, linearString);
1446 }
1447
1448 state_ = TwoByte;
1449 twoByteChars_ = linearString->rawTwoByteChars();
1450
1451 MarkStringAndBasesNonDeduplicatable(linearString);
1452
1453 s_ = linearString;
1454 return true;
1455 }
1456
baseIsInline(HandleLinearString linearString)1457 bool AutoStableStringChars::baseIsInline(HandleLinearString linearString) {
1458 JSString* base = linearString;
1459 while (base->isDependent()) {
1460 base = base->asDependent().base();
1461 }
1462 return base->isInline();
1463 }
1464
1465 template <typename T>
allocOwnChars(JSContext * cx,size_t count)1466 T* AutoStableStringChars::allocOwnChars(JSContext* cx, size_t count) {
1467 static_assert(
1468 InlineCapacity >=
1469 sizeof(JS::Latin1Char) * JSFatInlineString::MAX_LENGTH_LATIN1 &&
1470 InlineCapacity >=
1471 sizeof(char16_t) * JSFatInlineString::MAX_LENGTH_TWO_BYTE,
1472 "InlineCapacity too small to hold fat inline strings");
1473
1474 static_assert((JSString::MAX_LENGTH &
1475 mozilla::tl::MulOverflowMask<sizeof(T)>::value) == 0,
1476 "Size calculation can overflow");
1477 MOZ_ASSERT(count <= JSString::MAX_LENGTH);
1478 size_t size = sizeof(T) * count;
1479
1480 ownChars_.emplace(cx);
1481 if (!ownChars_->resize(size)) {
1482 ownChars_.reset();
1483 return nullptr;
1484 }
1485
1486 return reinterpret_cast<T*>(ownChars_->begin());
1487 }
1488
copyAndInflateLatin1Chars(JSContext * cx,HandleLinearString linearString)1489 bool AutoStableStringChars::copyAndInflateLatin1Chars(
1490 JSContext* cx, HandleLinearString linearString) {
1491 char16_t* chars = allocOwnChars<char16_t>(cx, linearString->length());
1492 if (!chars) {
1493 return false;
1494 }
1495
1496 FillChars(chars, linearString->rawLatin1Chars(), linearString->length());
1497
1498 state_ = TwoByte;
1499 twoByteChars_ = chars;
1500 s_ = linearString;
1501 return true;
1502 }
1503
copyLatin1Chars(JSContext * cx,HandleLinearString linearString)1504 bool AutoStableStringChars::copyLatin1Chars(JSContext* cx,
1505 HandleLinearString linearString) {
1506 size_t length = linearString->length();
1507 JS::Latin1Char* chars = allocOwnChars<JS::Latin1Char>(cx, length);
1508 if (!chars) {
1509 return false;
1510 }
1511
1512 FillChars(chars, linearString->rawLatin1Chars(), length);
1513
1514 state_ = Latin1;
1515 latin1Chars_ = chars;
1516 s_ = linearString;
1517 return true;
1518 }
1519
copyTwoByteChars(JSContext * cx,HandleLinearString linearString)1520 bool AutoStableStringChars::copyTwoByteChars(JSContext* cx,
1521 HandleLinearString linearString) {
1522 size_t length = linearString->length();
1523 char16_t* chars = allocOwnChars<char16_t>(cx, length);
1524 if (!chars) {
1525 return false;
1526 }
1527
1528 FillChars(chars, linearString->rawTwoByteChars(), length);
1529
1530 state_ = TwoByte;
1531 twoByteChars_ = chars;
1532 s_ = linearString;
1533 return true;
1534 }
1535
1536 #if defined(DEBUG) || defined(JS_JITSPEW)
dump(js::GenericPrinter & out)1537 void JSAtom::dump(js::GenericPrinter& out) {
1538 out.printf("JSAtom* (%p) = ", (void*)this);
1539 this->JSString::dump(out);
1540 }
1541
dump()1542 void JSAtom::dump() {
1543 Fprinter out(stderr);
1544 dump(out);
1545 }
1546
dumpRepresentation(js::GenericPrinter & out,int indent) const1547 void JSExternalString::dumpRepresentation(js::GenericPrinter& out,
1548 int indent) const {
1549 dumpRepresentationHeader(out, "JSExternalString");
1550 indent += 2;
1551
1552 out.printf("%*sfinalizer: ((JSExternalStringCallbacks*) %p)\n", indent, "",
1553 callbacks());
1554 dumpRepresentationChars(out, indent);
1555 }
1556 #endif /* defined(DEBUG) || defined(JS_JITSPEW) */
1557
NewDependentString(JSContext * cx,JSString * baseArg,size_t start,size_t length,gc::InitialHeap heap)1558 JSLinearString* js::NewDependentString(JSContext* cx, JSString* baseArg,
1559 size_t start, size_t length,
1560 gc::InitialHeap heap) {
1561 if (length == 0) {
1562 return cx->emptyString();
1563 }
1564
1565 JSLinearString* base = baseArg->ensureLinear(cx);
1566 if (!base) {
1567 return nullptr;
1568 }
1569
1570 if (start == 0 && length == base->length()) {
1571 return base;
1572 }
1573
1574 if (base->hasTwoByteChars()) {
1575 AutoCheckCannotGC nogc;
1576 const char16_t* chars = base->twoByteChars(nogc) + start;
1577 if (JSLinearString* staticStr = cx->staticStrings().lookup(chars, length)) {
1578 return staticStr;
1579 }
1580 } else {
1581 AutoCheckCannotGC nogc;
1582 const Latin1Char* chars = base->latin1Chars(nogc) + start;
1583 if (JSLinearString* staticStr = cx->staticStrings().lookup(chars, length)) {
1584 return staticStr;
1585 }
1586 }
1587
1588 return JSDependentString::new_(cx, base, start, length, heap);
1589 }
1590
CanStoreCharsAsLatin1(const char16_t * s,size_t length)1591 static inline bool CanStoreCharsAsLatin1(const char16_t* s, size_t length) {
1592 return IsUtf16Latin1(Span(s, length));
1593 }
1594
CanStoreCharsAsLatin1(LittleEndianChars chars,size_t length)1595 static bool CanStoreCharsAsLatin1(LittleEndianChars chars, size_t length) {
1596 for (size_t i = 0; i < length; i++) {
1597 if (chars[i] > JSString::MAX_LATIN1_CHAR) {
1598 return false;
1599 }
1600 }
1601
1602 return true;
1603 }
1604
1605 template <AllowGC allowGC>
NewInlineStringDeflated(JSContext * cx,const mozilla::Range<const char16_t> & chars,gc::InitialHeap heap=gc::DefaultHeap)1606 static MOZ_ALWAYS_INLINE JSInlineString* NewInlineStringDeflated(
1607 JSContext* cx, const mozilla::Range<const char16_t>& chars,
1608 gc::InitialHeap heap = gc::DefaultHeap) {
1609 size_t len = chars.length();
1610 Latin1Char* storage;
1611 JSInlineString* str = AllocateInlineString<allowGC>(cx, len, &storage, heap);
1612 if (!str) {
1613 return nullptr;
1614 }
1615
1616 MOZ_ASSERT(CanStoreCharsAsLatin1(chars.begin().get(), len));
1617 FillFromCompatible(storage, chars.begin().get(), len);
1618 return str;
1619 }
1620
1621 template <AllowGC allowGC>
NewStringDeflated(JSContext * cx,const char16_t * s,size_t n,gc::InitialHeap heap)1622 static JSLinearString* NewStringDeflated(JSContext* cx, const char16_t* s,
1623 size_t n, gc::InitialHeap heap) {
1624 if (JSLinearString* str = TryEmptyOrStaticString(cx, s, n)) {
1625 return str;
1626 }
1627
1628 if (JSInlineString::lengthFits<Latin1Char>(n)) {
1629 return NewInlineStringDeflated<allowGC>(
1630 cx, mozilla::Range<const char16_t>(s, n), heap);
1631 }
1632
1633 auto news = cx->make_pod_arena_array<Latin1Char>(js::StringBufferArena, n);
1634 if (!news) {
1635 if (!allowGC) {
1636 cx->recoverFromOutOfMemory();
1637 }
1638 return nullptr;
1639 }
1640
1641 MOZ_ASSERT(CanStoreCharsAsLatin1(s, n));
1642 FillFromCompatible(news.get(), s, n);
1643
1644 return JSLinearString::new_<allowGC>(cx, std::move(news), n, heap);
1645 }
1646
NewStringDeflatedFromLittleEndianNoGC(JSContext * cx,LittleEndianChars chars,size_t length,gc::InitialHeap heap)1647 static JSLinearString* NewStringDeflatedFromLittleEndianNoGC(
1648 JSContext* cx, LittleEndianChars chars, size_t length,
1649 gc::InitialHeap heap) {
1650 MOZ_ASSERT(CanStoreCharsAsLatin1(chars, length));
1651
1652 if (JSInlineString::lengthFits<Latin1Char>(length)) {
1653 Latin1Char* storage;
1654 JSInlineString* str =
1655 AllocateInlineString<NoGC>(cx, length, &storage, heap);
1656 if (!str) {
1657 return nullptr;
1658 }
1659
1660 FillFromCompatible(storage, chars, length);
1661 return str;
1662 }
1663
1664 auto news =
1665 cx->make_pod_arena_array<Latin1Char>(js::StringBufferArena, length);
1666 if (!news) {
1667 cx->recoverFromOutOfMemory();
1668 return nullptr;
1669 }
1670
1671 FillFromCompatible(news.get(), chars, length);
1672
1673 return JSLinearString::new_<NoGC>(cx, std::move(news), length, heap);
1674 }
1675
1676 template <AllowGC allowGC, typename CharT>
NewStringDontDeflate(JSContext * cx,UniquePtr<CharT[],JS::FreePolicy> chars,size_t length,gc::InitialHeap heap)1677 JSLinearString* js::NewStringDontDeflate(
1678 JSContext* cx, UniquePtr<CharT[], JS::FreePolicy> chars, size_t length,
1679 gc::InitialHeap heap) {
1680 if (JSLinearString* str = TryEmptyOrStaticString(cx, chars.get(), length)) {
1681 return str;
1682 }
1683
1684 if (JSInlineString::lengthFits<CharT>(length)) {
1685 // |chars.get()| is safe because 1) |NewInlineString| necessarily *copies*,
1686 // and 2) |chars| frees its contents only when this function returns.
1687 return NewInlineString<allowGC>(
1688 cx, mozilla::Range<const CharT>(chars.get(), length), heap);
1689 }
1690
1691 return JSLinearString::new_<allowGC>(cx, std::move(chars), length, heap);
1692 }
1693
1694 template JSLinearString* js::NewStringDontDeflate<CanGC>(
1695 JSContext* cx, UniqueTwoByteChars chars, size_t length,
1696 gc::InitialHeap heap);
1697
1698 template JSLinearString* js::NewStringDontDeflate<NoGC>(
1699 JSContext* cx, UniqueTwoByteChars chars, size_t length,
1700 gc::InitialHeap heap);
1701
1702 template JSLinearString* js::NewStringDontDeflate<CanGC>(
1703 JSContext* cx, UniqueLatin1Chars chars, size_t length,
1704 gc::InitialHeap heap);
1705
1706 template JSLinearString* js::NewStringDontDeflate<NoGC>(JSContext* cx,
1707 UniqueLatin1Chars chars,
1708 size_t length,
1709 gc::InitialHeap heap);
1710
1711 template <AllowGC allowGC, typename CharT>
NewString(JSContext * cx,UniquePtr<CharT[],JS::FreePolicy> chars,size_t length,gc::InitialHeap heap)1712 JSLinearString* js::NewString(JSContext* cx,
1713 UniquePtr<CharT[], JS::FreePolicy> chars,
1714 size_t length, gc::InitialHeap heap) {
1715 if constexpr (std::is_same_v<CharT, char16_t>) {
1716 if (CanStoreCharsAsLatin1(chars.get(), length)) {
1717 // Deflating copies from |chars.get()| and lets |chars| be freed on
1718 // return.
1719 return NewStringDeflated<allowGC>(cx, chars.get(), length, heap);
1720 }
1721 }
1722
1723 return NewStringDontDeflate<allowGC>(cx, std::move(chars), length, heap);
1724 }
1725
1726 template JSLinearString* js::NewString<CanGC>(JSContext* cx,
1727 UniqueTwoByteChars chars,
1728 size_t length,
1729 gc::InitialHeap heap);
1730
1731 template JSLinearString* js::NewString<NoGC>(JSContext* cx,
1732 UniqueTwoByteChars chars,
1733 size_t length,
1734 gc::InitialHeap heap);
1735
1736 template JSLinearString* js::NewString<CanGC>(JSContext* cx,
1737 UniqueLatin1Chars chars,
1738 size_t length,
1739 gc::InitialHeap heap);
1740
1741 template JSLinearString* js::NewString<NoGC>(JSContext* cx,
1742 UniqueLatin1Chars chars,
1743 size_t length,
1744 gc::InitialHeap heap);
1745
1746 namespace js {
1747
1748 template <AllowGC allowGC, typename CharT>
NewStringCopyNDontDeflate(JSContext * cx,const CharT * s,size_t n,gc::InitialHeap heap)1749 JSLinearString* NewStringCopyNDontDeflate(JSContext* cx, const CharT* s,
1750 size_t n, gc::InitialHeap heap) {
1751 if (JSLinearString* str = TryEmptyOrStaticString(cx, s, n)) {
1752 return str;
1753 }
1754
1755 if (JSInlineString::lengthFits<CharT>(n)) {
1756 return NewInlineString<allowGC>(cx, mozilla::Range<const CharT>(s, n),
1757 heap);
1758 }
1759
1760 auto news = cx->make_pod_arena_array<CharT>(js::StringBufferArena, n);
1761 if (!news) {
1762 if (!allowGC) {
1763 cx->recoverFromOutOfMemory();
1764 }
1765 return nullptr;
1766 }
1767
1768 FillChars(news.get(), s, n);
1769
1770 return JSLinearString::new_<allowGC>(cx, std::move(news), n, heap);
1771 }
1772
1773 template JSLinearString* NewStringCopyNDontDeflate<CanGC>(JSContext* cx,
1774 const char16_t* s,
1775 size_t n,
1776 gc::InitialHeap heap);
1777
1778 template JSLinearString* NewStringCopyNDontDeflate<NoGC>(JSContext* cx,
1779 const char16_t* s,
1780 size_t n,
1781 gc::InitialHeap heap);
1782
1783 template JSLinearString* NewStringCopyNDontDeflate<CanGC>(JSContext* cx,
1784 const Latin1Char* s,
1785 size_t n,
1786 gc::InitialHeap heap);
1787
1788 template JSLinearString* NewStringCopyNDontDeflate<NoGC>(JSContext* cx,
1789 const Latin1Char* s,
1790 size_t n,
1791 gc::InitialHeap heap);
1792
NewUndeflatedStringFromLittleEndianNoGC(JSContext * cx,LittleEndianChars chars,size_t length,gc::InitialHeap heap)1793 static JSLinearString* NewUndeflatedStringFromLittleEndianNoGC(
1794 JSContext* cx, LittleEndianChars chars, size_t length,
1795 gc::InitialHeap heap) {
1796 if (JSInlineString::lengthFits<char16_t>(length)) {
1797 char16_t* storage;
1798 JSInlineString* str =
1799 AllocateInlineString<NoGC>(cx, length, &storage, heap);
1800 if (!str) {
1801 return nullptr;
1802 }
1803
1804 FillChars(storage, chars, length);
1805 return str;
1806 }
1807
1808 auto news = cx->make_pod_arena_array<char16_t>(js::StringBufferArena, length);
1809 if (!news) {
1810 cx->recoverFromOutOfMemory();
1811 return nullptr;
1812 }
1813
1814 FillChars(news.get(), chars, length);
1815
1816 return JSLinearString::new_<NoGC>(cx, std::move(news), length, heap);
1817 }
1818
NewLatin1StringZ(JSContext * cx,UniqueChars chars,gc::InitialHeap heap)1819 JSLinearString* NewLatin1StringZ(JSContext* cx, UniqueChars chars,
1820 gc::InitialHeap heap) {
1821 size_t length = strlen(chars.get());
1822 UniqueLatin1Chars latin1(reinterpret_cast<Latin1Char*>(chars.release()));
1823 return NewString<CanGC>(cx, std::move(latin1), length, heap);
1824 }
1825
1826 template <AllowGC allowGC, typename CharT>
NewStringCopyN(JSContext * cx,const CharT * s,size_t n,gc::InitialHeap heap)1827 JSLinearString* NewStringCopyN(JSContext* cx, const CharT* s, size_t n,
1828 gc::InitialHeap heap) {
1829 if constexpr (std::is_same_v<CharT, char16_t>) {
1830 if (CanStoreCharsAsLatin1(s, n)) {
1831 return NewStringDeflated<allowGC>(cx, s, n, heap);
1832 }
1833 }
1834
1835 return NewStringCopyNDontDeflate<allowGC>(cx, s, n, heap);
1836 }
1837
1838 template JSLinearString* NewStringCopyN<CanGC>(JSContext* cx, const char16_t* s,
1839 size_t n, gc::InitialHeap heap);
1840
1841 template JSLinearString* NewStringCopyN<NoGC>(JSContext* cx, const char16_t* s,
1842 size_t n, gc::InitialHeap heap);
1843
1844 template JSLinearString* NewStringCopyN<CanGC>(JSContext* cx,
1845 const Latin1Char* s, size_t n,
1846 gc::InitialHeap heap);
1847
1848 template JSLinearString* NewStringCopyN<NoGC>(JSContext* cx,
1849 const Latin1Char* s, size_t n,
1850 gc::InitialHeap heap);
1851
NewStringFromLittleEndianNoGC(JSContext * cx,LittleEndianChars chars,size_t length,gc::InitialHeap heap)1852 JSLinearString* NewStringFromLittleEndianNoGC(JSContext* cx,
1853 LittleEndianChars chars,
1854 size_t length,
1855 gc::InitialHeap heap) {
1856 if (JSLinearString* str = TryEmptyOrStaticString(cx, chars, length)) {
1857 return str;
1858 }
1859
1860 if (CanStoreCharsAsLatin1(chars, length)) {
1861 return NewStringDeflatedFromLittleEndianNoGC(cx, chars, length, heap);
1862 }
1863
1864 return NewUndeflatedStringFromLittleEndianNoGC(cx, chars, length, heap);
1865 }
1866
1867 template <js::AllowGC allowGC>
NewStringCopyUTF8N(JSContext * cx,const JS::UTF8Chars utf8,gc::InitialHeap heap)1868 JSLinearString* NewStringCopyUTF8N(JSContext* cx, const JS::UTF8Chars utf8,
1869 gc::InitialHeap heap) {
1870 JS::SmallestEncoding encoding = JS::FindSmallestEncoding(utf8);
1871 if (encoding == JS::SmallestEncoding::ASCII) {
1872 return NewStringCopyN<allowGC>(cx, utf8.begin().get(), utf8.length(), heap);
1873 }
1874
1875 size_t length;
1876 if (encoding == JS::SmallestEncoding::Latin1) {
1877 UniqueLatin1Chars latin1(
1878 UTF8CharsToNewLatin1CharsZ(cx, utf8, &length, js::StringBufferArena)
1879 .get());
1880 if (!latin1) {
1881 return nullptr;
1882 }
1883
1884 return NewString<allowGC>(cx, std::move(latin1), length, heap);
1885 }
1886
1887 MOZ_ASSERT(encoding == JS::SmallestEncoding::UTF16);
1888
1889 UniqueTwoByteChars utf16(
1890 UTF8CharsToNewTwoByteCharsZ(cx, utf8, &length, js::StringBufferArena)
1891 .get());
1892 if (!utf16) {
1893 return nullptr;
1894 }
1895
1896 return NewString<allowGC>(cx, std::move(utf16), length, heap);
1897 }
1898
1899 template JSLinearString* NewStringCopyUTF8N<CanGC>(JSContext* cx,
1900 const JS::UTF8Chars utf8,
1901 gc::InitialHeap heap);
1902
lookup(const char16_t * chars,size_t len) const1903 MOZ_ALWAYS_INLINE JSString* ExternalStringCache::lookup(const char16_t* chars,
1904 size_t len) const {
1905 AutoCheckCannotGC nogc;
1906
1907 for (size_t i = 0; i < NumEntries; i++) {
1908 JSString* str = entries_[i];
1909 if (!str || str->length() != len) {
1910 continue;
1911 }
1912
1913 const char16_t* strChars = str->asLinear().nonInlineTwoByteChars(nogc);
1914 if (chars == strChars) {
1915 // Note that we don't need an incremental barrier here or below.
1916 // The cache is purged on GC so any string we get from the cache
1917 // must have been allocated after the GC started.
1918 return str;
1919 }
1920
1921 // Compare the chars. Don't do this for long strings as it will be
1922 // faster to allocate a new external string.
1923 static const size_t MaxLengthForCharComparison = 100;
1924 if (len <= MaxLengthForCharComparison && ArrayEqual(chars, strChars, len)) {
1925 return str;
1926 }
1927 }
1928
1929 return nullptr;
1930 }
1931
put(JSString * str)1932 MOZ_ALWAYS_INLINE void ExternalStringCache::put(JSString* str) {
1933 MOZ_ASSERT(str->isExternal());
1934
1935 for (size_t i = NumEntries - 1; i > 0; i--) {
1936 entries_[i] = entries_[i - 1];
1937 }
1938
1939 entries_[0] = str;
1940 }
1941
NewMaybeExternalString(JSContext * cx,const char16_t * s,size_t n,const JSExternalStringCallbacks * callbacks,bool * allocatedExternal,gc::InitialHeap heap)1942 JSString* NewMaybeExternalString(JSContext* cx, const char16_t* s, size_t n,
1943 const JSExternalStringCallbacks* callbacks,
1944 bool* allocatedExternal,
1945 gc::InitialHeap heap) {
1946 if (JSString* str = TryEmptyOrStaticString(cx, s, n)) {
1947 *allocatedExternal = false;
1948 return str;
1949 }
1950
1951 if (JSThinInlineString::lengthFits<Latin1Char>(n) &&
1952 CanStoreCharsAsLatin1(s, n)) {
1953 *allocatedExternal = false;
1954 return NewInlineStringDeflated<AllowGC::CanGC>(
1955 cx, mozilla::Range<const char16_t>(s, n), heap);
1956 }
1957
1958 ExternalStringCache& cache = cx->zone()->externalStringCache();
1959 if (JSString* str = cache.lookup(s, n)) {
1960 *allocatedExternal = false;
1961 return str;
1962 }
1963
1964 JSString* str = JSExternalString::new_(cx, s, n, callbacks);
1965 if (!str) {
1966 return nullptr;
1967 }
1968
1969 *allocatedExternal = true;
1970 cache.put(str);
1971 return str;
1972 }
1973
1974 } /* namespace js */
1975
1976 #if defined(DEBUG) || defined(JS_JITSPEW)
dumpRepresentation(js::GenericPrinter & out,int indent) const1977 void JSExtensibleString::dumpRepresentation(js::GenericPrinter& out,
1978 int indent) const {
1979 dumpRepresentationHeader(out, "JSExtensibleString");
1980 indent += 2;
1981
1982 out.printf("%*scapacity: %zu\n", indent, "", capacity());
1983 dumpRepresentationChars(out, indent);
1984 }
1985
dumpRepresentation(js::GenericPrinter & out,int indent) const1986 void JSInlineString::dumpRepresentation(js::GenericPrinter& out,
1987 int indent) const {
1988 dumpRepresentationHeader(
1989 out, isFatInline() ? "JSFatInlineString" : "JSThinInlineString");
1990 indent += 2;
1991
1992 dumpRepresentationChars(out, indent);
1993 }
1994
dumpRepresentation(js::GenericPrinter & out,int indent) const1995 void JSLinearString::dumpRepresentation(js::GenericPrinter& out,
1996 int indent) const {
1997 dumpRepresentationHeader(out, "JSLinearString");
1998 indent += 2;
1999
2000 dumpRepresentationChars(out, indent);
2001 }
2002 #endif
2003
2004 struct RepresentativeExternalString : public JSExternalStringCallbacks {
finalizeRepresentativeExternalString2005 void finalize(char16_t* chars) const override {
2006 // Constant chars, nothing to do.
2007 }
sizeOfBufferRepresentativeExternalString2008 size_t sizeOfBuffer(const char16_t* chars,
2009 mozilla::MallocSizeOf mallocSizeOf) const override {
2010 // This string's buffer is not heap-allocated, so its malloc size is 0.
2011 return 0;
2012 }
2013 };
2014
2015 static const RepresentativeExternalString RepresentativeExternalStringCallbacks;
2016
2017 template <typename CheckString, typename CharT>
FillWithRepresentatives(JSContext * cx,HandleArrayObject array,uint32_t * index,const CharT * chars,size_t len,size_t fatInlineMaxLength,const CheckString & check)2018 static bool FillWithRepresentatives(JSContext* cx, HandleArrayObject array,
2019 uint32_t* index, const CharT* chars,
2020 size_t len, size_t fatInlineMaxLength,
2021 const CheckString& check) {
2022 auto AppendString = [&check](JSContext* cx, HandleArrayObject array,
2023 uint32_t* index, HandleString s) {
2024 MOZ_ASSERT(check(s));
2025 (void)check; // silence clang -Wunused-lambda-capture in opt builds
2026 RootedValue val(cx, StringValue(s));
2027 return JS_DefineElement(cx, array, (*index)++, val, 0);
2028 };
2029
2030 MOZ_ASSERT(len > fatInlineMaxLength);
2031
2032 // Normal atom.
2033 RootedString atom1(cx, AtomizeChars(cx, chars, len));
2034 if (!atom1 || !AppendString(cx, array, index, atom1)) {
2035 return false;
2036 }
2037 MOZ_ASSERT(atom1->isAtom());
2038
2039 // Inline atom.
2040 RootedString atom2(cx, AtomizeChars(cx, chars, 2));
2041 if (!atom2 || !AppendString(cx, array, index, atom2)) {
2042 return false;
2043 }
2044 MOZ_ASSERT(atom2->isAtom());
2045 MOZ_ASSERT(atom2->isInline());
2046
2047 // Fat inline atom.
2048 RootedString atom3(cx, AtomizeChars(cx, chars, fatInlineMaxLength));
2049 if (!atom3 || !AppendString(cx, array, index, atom3)) {
2050 return false;
2051 }
2052 MOZ_ASSERT(atom3->isAtom());
2053 MOZ_ASSERT(atom3->isFatInline());
2054
2055 // Normal linear string.
2056 RootedString linear1(cx, NewStringCopyN<CanGC>(cx, chars, len));
2057 if (!linear1 || !AppendString(cx, array, index, linear1)) {
2058 return false;
2059 }
2060 MOZ_ASSERT(linear1->isLinear());
2061
2062 // Inline string.
2063 RootedString linear2(cx, NewStringCopyN<CanGC>(cx, chars, 3));
2064 if (!linear2 || !AppendString(cx, array, index, linear2)) {
2065 return false;
2066 }
2067 MOZ_ASSERT(linear2->isLinear());
2068 MOZ_ASSERT(linear2->isInline());
2069
2070 // Fat inline string.
2071 RootedString linear3(cx,
2072 NewStringCopyN<CanGC>(cx, chars, fatInlineMaxLength));
2073 if (!linear3 || !AppendString(cx, array, index, linear3)) {
2074 return false;
2075 }
2076 MOZ_ASSERT(linear3->isLinear());
2077 MOZ_ASSERT(linear3->isFatInline());
2078
2079 // Rope.
2080 RootedString rope(cx, ConcatStrings<CanGC>(cx, atom1, atom3));
2081 if (!rope || !AppendString(cx, array, index, rope)) {
2082 return false;
2083 }
2084 MOZ_ASSERT(rope->isRope());
2085
2086 // Dependent.
2087 RootedString dep(cx, NewDependentString(cx, atom1, 0, len - 2));
2088 if (!dep || !AppendString(cx, array, index, dep)) {
2089 return false;
2090 }
2091 MOZ_ASSERT(dep->isDependent());
2092
2093 // Extensible.
2094 RootedString temp1(cx, NewStringCopyN<CanGC>(cx, chars, len));
2095 if (!temp1) {
2096 return false;
2097 }
2098 RootedString extensible(cx, ConcatStrings<CanGC>(cx, temp1, atom3));
2099 if (!extensible || !extensible->ensureLinear(cx)) {
2100 return false;
2101 }
2102 if (!AppendString(cx, array, index, extensible)) {
2103 return false;
2104 }
2105 MOZ_ASSERT(extensible->isExtensible());
2106
2107 // External. Note that we currently only support TwoByte external strings.
2108 RootedString external1(cx), external2(cx);
2109 if constexpr (std::is_same_v<CharT, char16_t>) {
2110 external1 = JS_NewExternalString(cx, (const char16_t*)chars, len,
2111 &RepresentativeExternalStringCallbacks);
2112 if (!external1 || !AppendString(cx, array, index, external1)) {
2113 return false;
2114 }
2115 MOZ_ASSERT(external1->isExternal());
2116
2117 external2 = JS_NewExternalString(cx, (const char16_t*)chars, 2,
2118 &RepresentativeExternalStringCallbacks);
2119 if (!external2 || !AppendString(cx, array, index, external2)) {
2120 return false;
2121 }
2122 MOZ_ASSERT(external2->isExternal());
2123 }
2124
2125 // Assert the strings still have the types we expect after creating the
2126 // other strings.
2127
2128 MOZ_ASSERT(atom1->isAtom());
2129 MOZ_ASSERT(atom2->isAtom());
2130 MOZ_ASSERT(atom3->isAtom());
2131 MOZ_ASSERT(atom2->isInline());
2132 MOZ_ASSERT(atom3->isFatInline());
2133
2134 MOZ_ASSERT(linear1->isLinear());
2135 MOZ_ASSERT(linear2->isLinear());
2136 MOZ_ASSERT(linear3->isLinear());
2137 MOZ_ASSERT(linear2->isInline());
2138 MOZ_ASSERT(linear3->isFatInline());
2139
2140 MOZ_ASSERT(rope->isRope());
2141 MOZ_ASSERT(dep->isDependent());
2142 MOZ_ASSERT(extensible->isExtensible());
2143 MOZ_ASSERT_IF(external1, external1->isExternal());
2144 MOZ_ASSERT_IF(external2, external2->isExternal());
2145 return true;
2146 }
2147
2148 /* static */
fillWithRepresentatives(JSContext * cx,HandleArrayObject array)2149 bool JSString::fillWithRepresentatives(JSContext* cx, HandleArrayObject array) {
2150 uint32_t index = 0;
2151
2152 auto CheckTwoByte = [](JSString* str) { return str->hasTwoByteChars(); };
2153 auto CheckLatin1 = [](JSString* str) { return str->hasLatin1Chars(); };
2154
2155 // Append TwoByte strings.
2156 static const char16_t twoByteChars[] =
2157 u"\u1234abc\0def\u5678ghijklmasdfa\0xyz0123456789";
2158 if (!FillWithRepresentatives(
2159 cx, array, &index, twoByteChars, std::size(twoByteChars) - 1,
2160 JSFatInlineString::MAX_LENGTH_TWO_BYTE, CheckTwoByte)) {
2161 return false;
2162 }
2163
2164 // Append Latin1 strings.
2165 static const Latin1Char latin1Chars[] = "abc\0defghijklmasdfa\0xyz0123456789";
2166 if (!FillWithRepresentatives(
2167 cx, array, &index, latin1Chars, std::size(latin1Chars) - 1,
2168 JSFatInlineString::MAX_LENGTH_LATIN1, CheckLatin1)) {
2169 return false;
2170 }
2171
2172 // Now create forcibly-tenured versions of each of these string types. Note
2173 // that this is best-effort; if nursery strings are disabled, or we GC
2174 // midway through here, then we may end up with fewer nursery strings than
2175 // desired. Also, some types of strings are not nursery-allocatable, so
2176 // this will always produce some number of redundant strings.
2177 gc::AutoSuppressNurseryCellAlloc suppress(cx);
2178
2179 // Append TwoByte strings.
2180 if (!FillWithRepresentatives(
2181 cx, array, &index, twoByteChars, std::size(twoByteChars) - 1,
2182 JSFatInlineString::MAX_LENGTH_TWO_BYTE, CheckTwoByte)) {
2183 return false;
2184 }
2185
2186 // Append Latin1 strings.
2187 if (!FillWithRepresentatives(
2188 cx, array, &index, latin1Chars, std::size(latin1Chars) - 1,
2189 JSFatInlineString::MAX_LENGTH_LATIN1, CheckLatin1)) {
2190 return false;
2191 }
2192
2193 MOZ_ASSERT(index == 40);
2194
2195 return true;
2196 }
2197
2198 /*** Conversions ************************************************************/
2199
EncodeLatin1(JSContext * cx,JSString * str)2200 UniqueChars js::EncodeLatin1(JSContext* cx, JSString* str) {
2201 JSLinearString* linear = str->ensureLinear(cx);
2202 if (!linear) {
2203 return nullptr;
2204 }
2205
2206 JS::AutoCheckCannotGC nogc;
2207 if (linear->hasTwoByteChars()) {
2208 JS::Latin1CharsZ chars =
2209 JS::LossyTwoByteCharsToNewLatin1CharsZ(cx, linear->twoByteRange(nogc));
2210 return UniqueChars(chars.c_str());
2211 }
2212
2213 size_t len = str->length();
2214 Latin1Char* buf = cx->pod_malloc<Latin1Char>(len + 1);
2215 if (!buf) {
2216 return nullptr;
2217 }
2218
2219 FillChars(buf, linear->latin1Chars(nogc), len);
2220 buf[len] = '\0';
2221
2222 return UniqueChars(reinterpret_cast<char*>(buf));
2223 }
2224
EncodeAscii(JSContext * cx,JSString * str)2225 UniqueChars js::EncodeAscii(JSContext* cx, JSString* str) {
2226 JSLinearString* linear = str->ensureLinear(cx);
2227 if (!linear) {
2228 return nullptr;
2229 }
2230
2231 MOZ_ASSERT(StringIsAscii(linear));
2232 return EncodeLatin1(cx, linear);
2233 }
2234
IdToPrintableUTF8(JSContext * cx,HandleId id,IdToPrintableBehavior behavior)2235 UniqueChars js::IdToPrintableUTF8(JSContext* cx, HandleId id,
2236 IdToPrintableBehavior behavior) {
2237 // ToString(<symbol>) throws a TypeError, therefore require that callers
2238 // request source representation when |id| is a property key.
2239 MOZ_ASSERT_IF(
2240 behavior == IdToPrintableBehavior::IdIsIdentifier,
2241 id.isAtom() && frontend::IsIdentifierNameOrPrivateName(id.toAtom()));
2242
2243 RootedValue v(cx, IdToValue(id));
2244 JSString* str;
2245 if (behavior == IdToPrintableBehavior::IdIsPropertyKey) {
2246 str = ValueToSource(cx, v);
2247 } else {
2248 str = ToString<CanGC>(cx, v);
2249 }
2250 if (!str) {
2251 return nullptr;
2252 }
2253 return StringToNewUTF8CharsZ(cx, *str);
2254 }
2255
2256 template <AllowGC allowGC>
ToStringSlow(JSContext * cx,typename MaybeRooted<Value,allowGC>::HandleType arg)2257 JSString* js::ToStringSlow(
2258 JSContext* cx, typename MaybeRooted<Value, allowGC>::HandleType arg) {
2259 /* As with ToObjectSlow, callers must verify that |arg| isn't a string. */
2260 MOZ_ASSERT(!arg.isString());
2261
2262 Value v = arg;
2263 if (!v.isPrimitive()) {
2264 MOZ_ASSERT(!cx->isHelperThreadContext());
2265 if (!allowGC) {
2266 return nullptr;
2267 }
2268 RootedValue v2(cx, v);
2269 if (!ToPrimitive(cx, JSTYPE_STRING, &v2)) {
2270 return nullptr;
2271 }
2272 v = v2;
2273 }
2274
2275 JSString* str;
2276 if (v.isString()) {
2277 str = v.toString();
2278 } else if (v.isInt32()) {
2279 str = Int32ToString<allowGC>(cx, v.toInt32());
2280 } else if (v.isDouble()) {
2281 str = NumberToString<allowGC>(cx, v.toDouble());
2282 } else if (v.isBoolean()) {
2283 str = BooleanToString(cx, v.toBoolean());
2284 } else if (v.isNull()) {
2285 str = cx->names().null;
2286 } else if (v.isSymbol()) {
2287 MOZ_ASSERT(!cx->isHelperThreadContext());
2288 if (allowGC) {
2289 JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
2290 JSMSG_SYMBOL_TO_STRING);
2291 }
2292 return nullptr;
2293 } else if (v.isBigInt()) {
2294 if (!allowGC) {
2295 return nullptr;
2296 }
2297 RootedBigInt i(cx, v.toBigInt());
2298 str = BigInt::toString<CanGC>(cx, i, 10);
2299 } else {
2300 MOZ_ASSERT(v.isUndefined());
2301 str = cx->names().undefined;
2302 }
2303 return str;
2304 }
2305
2306 template JSString* js::ToStringSlow<CanGC>(JSContext* cx, HandleValue arg);
2307
2308 template JSString* js::ToStringSlow<NoGC>(JSContext* cx, const Value& arg);
2309
ToStringSlow(JSContext * cx,HandleValue v)2310 JS_PUBLIC_API JSString* js::ToStringSlow(JSContext* cx, HandleValue v) {
2311 return ToStringSlow<CanGC>(cx, v);
2312 }
2313