1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "vm/StringType-inl.h"
8 
9 #include "mozilla/ArrayUtils.h"
10 #include "mozilla/DebugOnly.h"
11 #include "mozilla/HashFunctions.h"
12 #include "mozilla/Latin1.h"
13 #include "mozilla/MathAlgorithms.h"
14 #include "mozilla/MemoryReporting.h"
15 #include "mozilla/PodOperations.h"
16 #include "mozilla/RangedPtr.h"
17 #include "mozilla/TextUtils.h"
18 #include "mozilla/Utf8.h"
19 #include "mozilla/Vector.h"
20 
21 #include <algorithm>    // std::{all_of,copy_n,enable_if,is_const,move}
22 #include <iterator>     // std::size
23 #include <type_traits>  // std::is_same, std::is_unsigned
24 
25 #include "jsfriendapi.h"
26 
27 #include "builtin/Boolean.h"
28 #ifdef ENABLE_RECORD_TUPLE
29 #  include "builtin/RecordObject.h"
30 #endif
31 #include "frontend/BytecodeCompiler.h"
32 #include "gc/MaybeRooted.h"
33 #include "gc/Nursery.h"
34 #include "js/CharacterEncoding.h"
35 #include "js/friend/ErrorMessages.h"  // js::GetErrorMessage, JSMSG_*
36 #include "js/PropertyAndElement.h"    // JS_DefineElement
37 #include "js/StableStringChars.h"
38 #include "js/Symbol.h"
39 #include "js/UbiNode.h"
40 #include "util/StringBuffer.h"
41 #include "util/Unicode.h"
42 #include "vm/GeckoProfiler.h"
43 #include "vm/StaticStrings.h"
44 #include "vm/ToSource.h"  // js::ValueToSource
45 
46 #include "gc/Marking-inl.h"
47 #include "vm/GeckoProfiler-inl.h"
48 #include "vm/JSContext-inl.h"
49 #include "vm/JSObject-inl.h"
50 #include "vm/Realm-inl.h"
51 #ifdef ENABLE_RECORD_TUPLE
52 #  include "vm/RecordType.h"
53 #  include "vm/TupleType.h"
54 #endif
55 
56 using namespace js;
57 
58 using mozilla::ArrayEqual;
59 using mozilla::AsWritableChars;
60 using mozilla::ConvertLatin1toUtf16;
61 using mozilla::IsAsciiDigit;
62 using mozilla::IsUtf16Latin1;
63 using mozilla::LossyConvertUtf16toLatin1;
64 using mozilla::PodCopy;
65 using mozilla::RangedPtr;
66 using mozilla::RoundUpPow2;
67 using mozilla::Span;
68 
69 using JS::AutoCheckCannotGC;
70 using JS::AutoStableStringChars;
71 
72 using UniqueLatin1Chars = UniquePtr<Latin1Char[], JS::FreePolicy>;
73 
sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf)74 size_t JSString::sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf) {
75   // JSRope: do nothing, we'll count all children chars when we hit the leaf
76   // strings.
77   if (isRope()) {
78     return 0;
79   }
80 
81   MOZ_ASSERT(isLinear());
82 
83   // JSDependentString: do nothing, we'll count the chars when we hit the base
84   // string.
85   if (isDependent()) {
86     return 0;
87   }
88 
89   // JSExternalString: Ask the embedding to tell us what's going on.
90   if (isExternal()) {
91     // Our callback isn't supposed to cause GC.
92     JS::AutoSuppressGCAnalysis nogc;
93     return asExternal().callbacks()->sizeOfBuffer(asExternal().twoByteChars(),
94                                                   mallocSizeOf);
95   }
96 
97   // JSExtensibleString: count the full capacity, not just the used space.
98   if (isExtensible()) {
99     JSExtensibleString& extensible = asExtensible();
100     return extensible.hasLatin1Chars()
101                ? mallocSizeOf(extensible.rawLatin1Chars())
102                : mallocSizeOf(extensible.rawTwoByteChars());
103   }
104 
105   // JSInlineString, JSFatInlineString [JSInlineAtom, JSFatInlineAtom]: the
106   // chars are inline.
107   if (isInline()) {
108     return 0;
109   }
110 
111   // Everything else: measure the space for the chars.
112   JSLinearString& linear = asLinear();
113   MOZ_ASSERT(linear.ownsMallocedChars());
114   return linear.hasLatin1Chars() ? mallocSizeOf(linear.rawLatin1Chars())
115                                  : mallocSizeOf(linear.rawTwoByteChars());
116 }
117 
size(mozilla::MallocSizeOf mallocSizeOf) const118 JS::ubi::Node::Size JS::ubi::Concrete<JSString>::size(
119     mozilla::MallocSizeOf mallocSizeOf) const {
120   JSString& str = get();
121   size_t size;
122   if (str.isAtom()) {
123     size =
124         str.isFatInline() ? sizeof(js::FatInlineAtom) : sizeof(js::NormalAtom);
125   } else {
126     size = str.isFatInline() ? sizeof(JSFatInlineString) : sizeof(JSString);
127   }
128 
129   if (IsInsideNursery(&str)) {
130     size += Nursery::nurseryCellHeaderSize();
131   }
132 
133   size += str.sizeOfExcludingThis(mallocSizeOf);
134 
135   return size;
136 }
137 
138 const char16_t JS::ubi::Concrete<JSString>::concreteTypeName[] = u"JSString";
139 
encodeUTF8Partial(const JS::AutoRequireNoGC & nogc,mozilla::Span<char> buffer) const140 mozilla::Maybe<mozilla::Tuple<size_t, size_t> > JSString::encodeUTF8Partial(
141     const JS::AutoRequireNoGC& nogc, mozilla::Span<char> buffer) const {
142   mozilla::Vector<const JSString*, 16, SystemAllocPolicy> stack;
143   const JSString* current = this;
144   char16_t pendingLeadSurrogate = 0;  // U+0000 means no pending lead surrogate
145   size_t totalRead = 0;
146   size_t totalWritten = 0;
147   for (;;) {
148     if (current->isRope()) {
149       JSRope& rope = current->asRope();
150       if (!stack.append(rope.rightChild())) {
151         // OOM
152         return mozilla::Nothing();
153       }
154       current = rope.leftChild();
155       continue;
156     }
157 
158     JSLinearString& linear = current->asLinear();
159     if (MOZ_LIKELY(linear.hasLatin1Chars())) {
160       if (MOZ_UNLIKELY(pendingLeadSurrogate)) {
161         if (buffer.Length() < 3) {
162           return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
163         }
164         buffer[0] = '\xEF';
165         buffer[1] = '\xBF';
166         buffer[2] = '\xBD';
167         buffer = buffer.From(3);
168         totalRead += 1;  // pendingLeadSurrogate
169         totalWritten += 3;
170         pendingLeadSurrogate = 0;
171       }
172       auto src = mozilla::AsChars(
173           mozilla::Span(linear.latin1Chars(nogc), linear.length()));
174       size_t read;
175       size_t written;
176       mozilla::Tie(read, written) =
177           mozilla::ConvertLatin1toUtf8Partial(src, buffer);
178       buffer = buffer.From(written);
179       totalRead += read;
180       totalWritten += written;
181       if (read < src.Length()) {
182         return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
183       }
184     } else {
185       auto src = mozilla::Span(linear.twoByteChars(nogc), linear.length());
186       if (MOZ_UNLIKELY(pendingLeadSurrogate)) {
187         char16_t first = 0;
188         if (!src.IsEmpty()) {
189           first = src[0];
190         }
191         if (unicode::IsTrailSurrogate(first)) {
192           // Got a surrogate pair
193           if (buffer.Length() < 4) {
194             return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
195           }
196           uint32_t astral = unicode::UTF16Decode(pendingLeadSurrogate, first);
197           buffer[0] = char(0b1111'0000 | (astral >> 18));
198           buffer[1] = char(0b1000'0000 | ((astral >> 12) & 0b11'1111));
199           buffer[2] = char(0b1000'0000 | ((astral >> 6) & 0b11'1111));
200           buffer[3] = char(0b1000'0000 | (astral & 0b11'1111));
201           src = src.From(1);
202           buffer = buffer.From(4);
203           totalRead += 2;  // both pendingLeadSurrogate and first!
204           totalWritten += 4;
205         } else {
206           // unpaired surrogate
207           if (buffer.Length() < 3) {
208             return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
209           }
210           buffer[0] = '\xEF';
211           buffer[1] = '\xBF';
212           buffer[2] = '\xBD';
213           buffer = buffer.From(3);
214           totalRead += 1;  // pendingLeadSurrogate
215           totalWritten += 3;
216         }
217         pendingLeadSurrogate = 0;
218       }
219       if (src.IsEmpty()) {
220         return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
221       }
222       char16_t last = src[src.Length() - 1];
223       if (unicode::IsLeadSurrogate(last)) {
224         src = src.To(src.Length() - 1);
225         pendingLeadSurrogate = last;
226       } else {
227         MOZ_ASSERT(!pendingLeadSurrogate);
228       }
229       size_t read;
230       size_t written;
231       mozilla::Tie(read, written) =
232           mozilla::ConvertUtf16toUtf8Partial(src, buffer);
233       buffer = buffer.From(written);
234       totalRead += read;
235       totalWritten += written;
236       if (read < src.Length()) {
237         return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
238       }
239     }
240     if (stack.empty()) {
241       break;
242     }
243     current = stack.popCopy();
244   }
245   if (MOZ_UNLIKELY(pendingLeadSurrogate)) {
246     if (buffer.Length() < 3) {
247       return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
248     }
249     buffer[0] = '\xEF';
250     buffer[1] = '\xBF';
251     buffer[2] = '\xBD';
252     // No need to update buffer and pendingLeadSurrogate anymore
253     totalRead += 1;
254     totalWritten += 3;
255   }
256   return mozilla::Some(mozilla::MakeTuple(totalRead, totalWritten));
257 }
258 
259 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
260 
261 template <typename CharT>
262 /*static */
dumpChars(const CharT * s,size_t n,js::GenericPrinter & out)263 void JSString::dumpChars(const CharT* s, size_t n, js::GenericPrinter& out) {
264   if (n == SIZE_MAX) {
265     n = 0;
266     while (s[n]) {
267       n++;
268     }
269   }
270 
271   out.put("\"");
272   dumpCharsNoQuote(s, n, out);
273   out.putChar('"');
274 }
275 
276 template void JSString::dumpChars(const Latin1Char* s, size_t n,
277                                   js::GenericPrinter& out);
278 
279 template void JSString::dumpChars(const char16_t* s, size_t n,
280                                   js::GenericPrinter& out);
281 
282 template <typename CharT>
283 /*static */
dumpCharsNoQuote(const CharT * s,size_t n,js::GenericPrinter & out)284 void JSString::dumpCharsNoQuote(const CharT* s, size_t n,
285                                 js::GenericPrinter& out) {
286   for (size_t i = 0; i < n; i++) {
287     char16_t c = s[i];
288     if (c == '\n') {
289       out.put("\\n");
290     } else if (c == '\t') {
291       out.put("\\t");
292     } else if (c >= 32 && c < 127) {
293       out.putChar((char)s[i]);
294     } else if (c <= 255) {
295       out.printf("\\x%02x", unsigned(c));
296     } else {
297       out.printf("\\u%04x", unsigned(c));
298     }
299   }
300 }
301 
302 template void JSString::dumpCharsNoQuote(const Latin1Char* s, size_t n,
303                                          js::GenericPrinter& out);
304 
305 template void JSString::dumpCharsNoQuote(const char16_t* s, size_t n,
306                                          js::GenericPrinter& out);
307 
dumpCharsNoNewline(js::GenericPrinter & out)308 void JSString::dumpCharsNoNewline(js::GenericPrinter& out) {
309   if (JSLinearString* linear = ensureLinear(nullptr)) {
310     AutoCheckCannotGC nogc;
311     if (hasLatin1Chars()) {
312       out.put("[Latin 1]");
313       dumpChars(linear->latin1Chars(nogc), length(), out);
314     } else {
315       out.put("[2 byte]");
316       dumpChars(linear->twoByteChars(nogc), length(), out);
317     }
318   } else {
319     out.put("(oom in JSString::dumpCharsNoNewline)");
320   }
321 }
322 
dumpCharsNoQuote(js::GenericPrinter & out)323 void JSString::dumpCharsNoQuote(js::GenericPrinter& out) {
324   if (JSLinearString* linear = ensureLinear(nullptr)) {
325     AutoCheckCannotGC nogc;
326     if (hasLatin1Chars()) {
327       dumpCharsNoQuote(linear->latin1Chars(nogc), length(), out);
328     } else {
329       dumpCharsNoQuote(linear->twoByteChars(nogc), length(), out);
330     }
331   } else {
332     out.put("(oom in JSString::dumpCharsNoNewline)");
333   }
334 }
335 
dump()336 void JSString::dump() {
337   js::Fprinter out(stderr);
338   dump(out);
339 }
340 
dump(js::GenericPrinter & out)341 void JSString::dump(js::GenericPrinter& out) {
342   dumpNoNewline(out);
343   out.putChar('\n');
344 }
345 
dumpNoNewline(js::GenericPrinter & out)346 void JSString::dumpNoNewline(js::GenericPrinter& out) {
347   if (JSLinearString* linear = ensureLinear(nullptr)) {
348     AutoCheckCannotGC nogc;
349     if (hasLatin1Chars()) {
350       const Latin1Char* chars = linear->latin1Chars(nogc);
351       out.printf("JSString* (%p) = Latin1Char * (%p) = ", (void*)this,
352                  (void*)chars);
353       dumpChars(chars, length(), out);
354     } else {
355       const char16_t* chars = linear->twoByteChars(nogc);
356       out.printf("JSString* (%p) = char16_t * (%p) = ", (void*)this,
357                  (void*)chars);
358       dumpChars(chars, length(), out);
359     }
360   } else {
361     out.put("(oom in JSString::dump)");
362   }
363 }
364 
dumpRepresentation(js::GenericPrinter & out,int indent) const365 void JSString::dumpRepresentation(js::GenericPrinter& out, int indent) const {
366   if (isRope()) {
367     asRope().dumpRepresentation(out, indent);
368   } else if (isDependent()) {
369     asDependent().dumpRepresentation(out, indent);
370   } else if (isExternal()) {
371     asExternal().dumpRepresentation(out, indent);
372   } else if (isExtensible()) {
373     asExtensible().dumpRepresentation(out, indent);
374   } else if (isInline()) {
375     asInline().dumpRepresentation(out, indent);
376   } else if (isLinear()) {
377     asLinear().dumpRepresentation(out, indent);
378   } else {
379     MOZ_CRASH("Unexpected JSString representation");
380   }
381 }
382 
dumpRepresentationHeader(js::GenericPrinter & out,const char * subclass) const383 void JSString::dumpRepresentationHeader(js::GenericPrinter& out,
384                                         const char* subclass) const {
385   uint32_t flags = JSString::flags();
386   // Print the string's address as an actual C++ expression, to facilitate
387   // copy-and-paste into a debugger.
388   out.printf("((%s*) %p) length: %zu  flags: 0x%x", subclass, this, length(),
389              flags);
390   if (flags & LINEAR_BIT) out.put(" LINEAR");
391   if (flags & DEPENDENT_BIT) out.put(" DEPENDENT");
392   if (flags & INLINE_CHARS_BIT) out.put(" INLINE_CHARS");
393   if (flags & ATOM_BIT)
394     out.put(" ATOM");
395   else
396     out.put(" (NON ATOM)");
397   if (isPermanentAtom()) out.put(" PERMANENT");
398   if (flags & LATIN1_CHARS_BIT) out.put(" LATIN1");
399   if (flags & INDEX_VALUE_BIT) out.printf(" INDEX_VALUE(%u)", getIndexValue());
400   if (!isTenured()) out.put(" NURSERY");
401   out.putChar('\n');
402 }
403 
dumpRepresentationChars(js::GenericPrinter & out,int indent) const404 void JSLinearString::dumpRepresentationChars(js::GenericPrinter& out,
405                                              int indent) const {
406   if (hasLatin1Chars()) {
407     out.printf("%*schars: ((Latin1Char*) %p) ", indent, "", rawLatin1Chars());
408     dumpChars(rawLatin1Chars(), length(), out);
409   } else {
410     out.printf("%*schars: ((char16_t*) %p) ", indent, "", rawTwoByteChars());
411     dumpChars(rawTwoByteChars(), length(), out);
412   }
413   out.putChar('\n');
414 }
415 
equals(const char * s)416 bool JSString::equals(const char* s) {
417   JSLinearString* linear = ensureLinear(nullptr);
418   if (!linear) {
419     // This is DEBUG-only code.
420     fprintf(stderr, "OOM in JSString::equals!\n");
421     return false;
422   }
423 
424   return StringEqualsAscii(linear, s);
425 }
426 #endif /* defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) */
427 
428 template <typename CharT>
AllocChars(JSString * str,size_t length,CharT ** chars,size_t * capacity)429 static MOZ_ALWAYS_INLINE bool AllocChars(JSString* str, size_t length,
430                                          CharT** chars, size_t* capacity) {
431   /*
432    * Grow by 12.5% if the buffer is very large. Otherwise, round up to the
433    * next power of 2. This is similar to what we do with arrays; see
434    * JSObject::ensureDenseArrayElements.
435    */
436   static const size_t DOUBLING_MAX = 1024 * 1024;
437   *capacity =
438       length > DOUBLING_MAX ? length + (length / 8) : RoundUpPow2(length);
439 
440   static_assert(JSString::MAX_LENGTH * sizeof(CharT) <= UINT32_MAX);
441   *chars =
442       str->zone()->pod_arena_malloc<CharT>(js::StringBufferArena, *capacity);
443   return *chars != nullptr;
444 }
445 
copyLatin1Chars(JSContext * maybecx,arena_id_t destArenaId) const446 UniqueLatin1Chars JSRope::copyLatin1Chars(JSContext* maybecx,
447                                           arena_id_t destArenaId) const {
448   return copyCharsInternal<Latin1Char>(maybecx, destArenaId);
449 }
450 
copyTwoByteChars(JSContext * maybecx,arena_id_t destArenaId) const451 UniqueTwoByteChars JSRope::copyTwoByteChars(JSContext* maybecx,
452                                             arena_id_t destArenaId) const {
453   return copyCharsInternal<char16_t>(maybecx, destArenaId);
454 }
455 
456 template <typename CharT>
copyCharsInternal(JSContext * maybecx,arena_id_t destArenaId) const457 UniquePtr<CharT[], JS::FreePolicy> JSRope::copyCharsInternal(
458     JSContext* maybecx, arena_id_t destArenaId) const {
459   // Left-leaning ropes are far more common than right-leaning ropes, so
460   // perform a non-destructive traversal of the rope, right node first,
461   // splatting each node's characters into a contiguous buffer.
462 
463   size_t n = length();
464 
465   UniquePtr<CharT[], JS::FreePolicy> out;
466   if (maybecx) {
467     out.reset(maybecx->pod_arena_malloc<CharT>(destArenaId, n));
468   } else {
469     out.reset(js_pod_arena_malloc<CharT>(destArenaId, n));
470   }
471 
472   if (!out) {
473     return nullptr;
474   }
475 
476   Vector<const JSString*, 8, SystemAllocPolicy> nodeStack;
477   const JSString* str = this;
478   CharT* end = out.get() + str->length();
479   while (true) {
480     if (str->isRope()) {
481       if (!nodeStack.append(str->asRope().leftChild())) {
482         if (maybecx) {
483           ReportOutOfMemory(maybecx);
484         }
485         return nullptr;
486       }
487       str = str->asRope().rightChild();
488     } else {
489       end -= str->length();
490       CopyChars(end, str->asLinear());
491       if (nodeStack.empty()) {
492         break;
493       }
494       str = nodeStack.popCopy();
495     }
496   }
497   MOZ_ASSERT(end == out.get());
498 
499   return out;
500 }
501 
502 template <typename CharT>
AddStringToHash(uint32_t * hash,const CharT * chars,size_t len)503 void AddStringToHash(uint32_t* hash, const CharT* chars, size_t len) {
504   // It's tempting to use |HashString| instead of this loop, but that's
505   // slightly different than our existing implementation for non-ropes. We
506   // want to pretend we have a contiguous set of chars so we need to
507   // accumulate char by char rather than generate a new hash for substring
508   // and then accumulate that.
509   for (size_t i = 0; i < len; i++) {
510     *hash = mozilla::AddToHash(*hash, chars[i]);
511   }
512 }
513 
AddStringToHash(uint32_t * hash,const JSString * str)514 void AddStringToHash(uint32_t* hash, const JSString* str) {
515   AutoCheckCannotGC nogc;
516   const auto& s = str->asLinear();
517   if (s.hasLatin1Chars()) {
518     AddStringToHash(hash, s.latin1Chars(nogc), s.length());
519   } else {
520     AddStringToHash(hash, s.twoByteChars(nogc), s.length());
521   }
522 }
523 
hash(uint32_t * outHash) const524 bool JSRope::hash(uint32_t* outHash) const {
525   Vector<const JSString*, 8, SystemAllocPolicy> nodeStack;
526   const JSString* str = this;
527 
528   *outHash = 0;
529 
530   while (true) {
531     if (str->isRope()) {
532       if (!nodeStack.append(str->asRope().rightChild())) {
533         return false;
534       }
535       str = str->asRope().leftChild();
536     } else {
537       AddStringToHash(outHash, str);
538       if (nodeStack.empty()) {
539         break;
540       }
541       str = nodeStack.popCopy();
542     }
543   }
544 
545   return true;
546 }
547 
548 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
dumpRepresentation(js::GenericPrinter & out,int indent) const549 void JSRope::dumpRepresentation(js::GenericPrinter& out, int indent) const {
550   dumpRepresentationHeader(out, "JSRope");
551   indent += 2;
552 
553   out.printf("%*sleft:  ", indent, "");
554   leftChild()->dumpRepresentation(out, indent);
555 
556   out.printf("%*sright: ", indent, "");
557   rightChild()->dumpRepresentation(out, indent);
558 }
559 #endif
560 
561 namespace js {
562 
563 template <>
CopyChars(char16_t * dest,const JSLinearString & str)564 void CopyChars(char16_t* dest, const JSLinearString& str) {
565   AutoCheckCannotGC nogc;
566   if (str.hasTwoByteChars()) {
567     PodCopy(dest, str.twoByteChars(nogc), str.length());
568   } else {
569     CopyAndInflateChars(dest, str.latin1Chars(nogc), str.length());
570   }
571 }
572 
573 template <>
CopyChars(Latin1Char * dest,const JSLinearString & str)574 void CopyChars(Latin1Char* dest, const JSLinearString& str) {
575   AutoCheckCannotGC nogc;
576   if (str.hasLatin1Chars()) {
577     PodCopy(dest, str.latin1Chars(nogc), str.length());
578   } else {
579     /*
580      * When we flatten a TwoByte rope, we turn child ropes (including Latin1
581      * ropes) into TwoByte dependent strings. If one of these strings is
582      * also part of another Latin1 rope tree, we can have a Latin1 rope with
583      * a TwoByte descendent and we end up here when we flatten it. Although
584      * the chars are stored as TwoByte, we know they must be in the Latin1
585      * range, so we can safely deflate here.
586      */
587     size_t len = str.length();
588     const char16_t* chars = str.twoByteChars(nogc);
589     auto src = Span(chars, len);
590     MOZ_ASSERT(IsUtf16Latin1(src));
591     LossyConvertUtf16toLatin1(src, AsWritableChars(Span(dest, len)));
592   }
593 }
594 
595 } /* namespace js */
596 
597 template <typename CharT>
StringFlagsForCharType(uint32_t baseFlags)598 static constexpr uint32_t StringFlagsForCharType(uint32_t baseFlags) {
599   if constexpr (std::is_same_v<CharT, char16_t>) {
600     return baseFlags;
601   }
602 
603   return baseFlags | JSString::LATIN1_CHARS_BIT;
604 }
605 
UpdateNurseryBuffersOnTransfer(js::Nursery & nursery,JSString * from,JSString * to,void * buffer,size_t size)606 static bool UpdateNurseryBuffersOnTransfer(js::Nursery& nursery, JSString* from,
607                                            JSString* to, void* buffer,
608                                            size_t size) {
609   // Update the list of buffers associated with nursery cells when |buffer| is
610   // moved from string |from| to string |to|, depending on whether those strings
611   // are in the nursery or not.
612 
613   if (from->isTenured() && !to->isTenured()) {
614     // Tenured leftmost child is giving its chars buffer to the
615     // nursery-allocated root node.
616     if (!nursery.registerMallocedBuffer(buffer, size)) {
617       return false;
618     }
619   } else if (!from->isTenured() && to->isTenured()) {
620     // Leftmost child is giving its nursery-held chars buffer to a
621     // tenured string.
622     nursery.removeMallocedBuffer(buffer, size);
623   }
624 
625   return true;
626 }
627 
CanReuseLeftmostBuffer(JSString * leftmostChild,size_t wholeLength,bool hasTwoByteChars)628 static bool CanReuseLeftmostBuffer(JSString* leftmostChild, size_t wholeLength,
629                                    bool hasTwoByteChars) {
630   if (!leftmostChild->isExtensible()) {
631     return false;
632   }
633 
634   JSExtensibleString& str = leftmostChild->asExtensible();
635   return str.capacity() >= wholeLength &&
636          str.hasTwoByteChars() == hasTwoByteChars;
637 }
638 
flatten(JSContext * maybecx)639 JSLinearString* JSRope::flatten(JSContext* maybecx) {
640   mozilla::Maybe<AutoGeckoProfilerEntry> entry;
641   if (maybecx && !maybecx->isHelperThreadContext()) {
642     entry.emplace(maybecx, "JSRope::flatten");
643   }
644 
645   JSLinearString* str = flattenInternal();
646   if (!str && maybecx) {
647     ReportOutOfMemory(maybecx);
648   }
649 
650   return str;
651 }
652 
flattenInternal()653 JSLinearString* JSRope::flattenInternal() {
654   if (zone()->needsIncrementalBarrier()) {
655     return flattenInternal<WithIncrementalBarrier>();
656   }
657 
658   return flattenInternal<NoBarrier>();
659 }
660 
661 template <JSRope::UsingBarrier usingBarrier>
flattenInternal()662 JSLinearString* JSRope::flattenInternal() {
663   if (hasTwoByteChars()) {
664     return flattenInternal<usingBarrier, char16_t>(this);
665   }
666 
667   return flattenInternal<usingBarrier, Latin1Char>(this);
668 }
669 
670 template <JSRope::UsingBarrier usingBarrier, typename CharT>
671 /* static */
flattenInternal(JSRope * root)672 JSLinearString* JSRope::flattenInternal(JSRope* root) {
673   /*
674    * Consider the DAG of JSRopes rooted at |root|, with non-JSRopes as
675    * its leaves. Mutate the root JSRope into a JSExtensibleString containing
676    * the full flattened text that the root represents, and mutate all other
677    * JSRopes in the interior of the DAG into JSDependentStrings that refer to
678    * this new JSExtensibleString.
679    *
680    * If the leftmost leaf of our DAG is a JSExtensibleString, consider
681    * stealing its buffer for use in our new root, and transforming it into a
682    * JSDependentString too. Do not mutate any of the other leaves.
683    *
684    * Perform a depth-first dag traversal, splatting each node's characters
685    * into a contiguous buffer. Visit each rope node three times:
686    *   1. record position in the buffer and recurse into left child;
687    *   2. recurse into the right child;
688    *   3. transform the node into a dependent string.
689    * To avoid maintaining a stack, tree nodes are mutated to indicate how many
690    * times they have been visited. Since ropes can be dags, a node may be
691    * encountered multiple times during traversal. However, step 3 above leaves
692    * a valid dependent string, so everything works out.
693    *
694    * While ropes avoid all sorts of quadratic cases with string concatenation,
695    * they can't help when ropes are immediately flattened. One idiomatic case
696    * that we'd like to keep linear (and has traditionally been linear in SM
697    * and other JS engines) is:
698    *
699    *   while (...) {
700    *     s += ...
701    *     s.flatten
702    *   }
703    *
704    * Two behaviors accomplish this:
705    *
706    * - When the leftmost non-rope in the DAG we're flattening is a
707    *   JSExtensibleString with sufficient capacity to hold the entire
708    *   flattened string, we just flatten the DAG into its buffer. Then, when
709    *   we transform the root of the DAG from a JSRope into a
710    *   JSExtensibleString, we steal that buffer, and change the victim from a
711    *   JSExtensibleString to a JSDependentString. In this case, the left-hand
712    *   side of the string never needs to be copied.
713    *
714    * - Otherwise, we round up the total flattened size and create a fresh
715    *   JSExtensibleString with that much capacity. If this in turn becomes the
716    *   leftmost leaf of a subsequent flatten, we will hopefully be able to
717    *   fill it, as in the case above.
718    *
719    * Note that, even though the code for creating JSDependentStrings avoids
720    * creating dependents of dependents, we can create that situation here: the
721    * JSExtensibleStrings we transform into JSDependentStrings might have
722    * JSDependentStrings pointing to them already. Stealing the buffer doesn't
723    * change its address, only its owning JSExtensibleString, so all chars()
724    * pointers in the JSDependentStrings are still valid.
725    */
726   const size_t wholeLength = root->length();
727   size_t wholeCapacity;
728   CharT* wholeChars;
729 
730   AutoCheckCannotGC nogc;
731 
732   Nursery& nursery = root->runtimeFromMainThread()->gc.nursery();
733 
734   /* Find the left most string, containing the first string. */
735   JSRope* leftmostRope = root;
736   while (leftmostRope->leftChild()->isRope()) {
737     leftmostRope = &leftmostRope->leftChild()->asRope();
738   }
739   JSString* leftmostChild = leftmostRope->leftChild();
740 
741   bool reuseLeftmostBuffer = CanReuseLeftmostBuffer(
742       leftmostChild, wholeLength, std::is_same_v<CharT, char16_t>);
743 
744   if (reuseLeftmostBuffer) {
745     JSExtensibleString& left = leftmostChild->asExtensible();
746     wholeCapacity = left.capacity();
747     wholeChars = const_cast<CharT*>(left.nonInlineChars<CharT>(nogc));
748 
749     // Nursery::registerMallocedBuffer is fallible, so attempt it first before
750     // doing anything irreversible.
751     if (!UpdateNurseryBuffersOnTransfer(nursery, &left, root, wholeChars,
752                                         wholeCapacity * sizeof(CharT))) {
753       return nullptr;
754     }
755   } else {
756     // If we can't reuse the leftmost child's buffer, allocate a new one.
757     if (!AllocChars(root, wholeLength, &wholeChars, &wholeCapacity)) {
758       return nullptr;
759     }
760 
761     if (!root->isTenured()) {
762       if (!nursery.registerMallocedBuffer(wholeChars,
763                                           wholeCapacity * sizeof(CharT))) {
764         js_free(wholeChars);
765         return nullptr;
766       }
767     }
768   }
769 
770   JSRope* str = root;
771   CharT* pos = wholeChars;
772 
773   JSRope* parent = nullptr;
774   uint32_t parentFlag = 0;
775 
776 first_visit_node : {
777   MOZ_ASSERT_IF(str != root, parent && parentFlag);
778   MOZ_ASSERT(!str->asRope().isBeingFlattened());
779 
780   ropeBarrierDuringFlattening<usingBarrier>(str);
781 
782   JSString& left = *str->d.s.u2.left;
783   str->d.s.u2.parent = parent;
784   str->setFlagBit(parentFlag);
785   parent = nullptr;
786   parentFlag = 0;
787 
788   if (left.isRope()) {
789     /* Return to this node when 'left' done, then goto visit_right_child. */
790     parent = str;
791     parentFlag = FLATTEN_VISIT_RIGHT;
792     str = &left.asRope();
793     goto first_visit_node;
794   }
795   if (!(reuseLeftmostBuffer && &left == leftmostChild)) {
796     CopyChars(pos, left.asLinear());
797   }
798   pos += left.length();
799 }
800 
801 visit_right_child : {
802   JSString& right = *str->d.s.u3.right;
803   if (right.isRope()) {
804     /* Return to this node when 'right' done, then goto finish_node. */
805     parent = str;
806     parentFlag = FLATTEN_FINISH_NODE;
807     str = &right.asRope();
808     goto first_visit_node;
809   }
810   CopyChars(pos, right.asLinear());
811   pos += right.length();
812 }
813 
814 finish_node : {
815   if (str == root) {
816     goto finish_root;
817   }
818 
819   MOZ_ASSERT(pos >= wholeChars);
820   CharT* chars = pos - str->length();
821   JSRope* strParent = str->d.s.u2.parent;
822   str->setNonInlineChars(chars);
823 
824   MOZ_ASSERT(str->asRope().isBeingFlattened());
825   mozilla::DebugOnly<bool> visitRight = str->flags() & FLATTEN_VISIT_RIGHT;
826   bool finishNode = str->flags() & FLATTEN_FINISH_NODE;
827   MOZ_ASSERT(visitRight != finishNode);
828 
829   // This also clears the flags related to flattening.
830   str->setLengthAndFlags(str->length(),
831                          StringFlagsForCharType<CharT>(INIT_DEPENDENT_FLAGS));
832   str->d.s.u3.base =
833       reinterpret_cast<JSLinearString*>(root); /* will be true on exit */
834 
835   // Every interior (rope) node in the rope's tree will be visited during
836   // the traversal and post-barriered here, so earlier additions of
837   // dependent.base -> root pointers are handled by this barrier as well.
838   //
839   // The only time post-barriers need do anything is when the root is in
840   // the nursery. Note that the root was a rope but will be an extensible
841   // string when we return, so it will not point to any strings and need
842   // not be barriered.
843   if (str->isTenured() && !root->isTenured()) {
844     root->storeBuffer()->putWholeCell(str);
845   }
846 
847   str = strParent;
848   if (finishNode) {
849     goto finish_node;
850   }
851   MOZ_ASSERT(visitRight);
852   goto visit_right_child;
853 }
854 
855 finish_root:
856   // We traversed all the way back up to the root so we're finished.
857   MOZ_ASSERT(str == root);
858   MOZ_ASSERT(pos == wholeChars + wholeLength);
859 
860   root->setLengthAndFlags(wholeLength,
861                           StringFlagsForCharType<CharT>(EXTENSIBLE_FLAGS));
862   root->setNonInlineChars(wholeChars);
863   root->d.s.u3.capacity = wholeCapacity;
864   AddCellMemory(root, root->asLinear().allocSize(), MemoryUse::StringContents);
865 
866   if (reuseLeftmostBuffer) {
867     // Remove memory association for left node we're about to make into a
868     // dependent string.
869     JSString& left = *leftmostChild;
870     RemoveCellMemory(&left, left.allocSize(), MemoryUse::StringContents);
871 
872     uint32_t flags = INIT_DEPENDENT_FLAGS;
873     if (left.inStringToAtomCache()) {
874       flags |= IN_STRING_TO_ATOM_CACHE;
875     }
876     left.setLengthAndFlags(left.length(), StringFlagsForCharType<CharT>(flags));
877     left.d.s.u3.base = &root->asLinear();
878     if (left.isTenured() && !root->isTenured()) {
879       // leftmost child -> root is a tenured -> nursery edge.
880       root->storeBuffer()->putWholeCell(&left);
881     }
882   }
883 
884   return &root->asLinear();
885 }
886 
887 template <JSRope::UsingBarrier usingBarrier>
888 /* static */
ropeBarrierDuringFlattening(JSRope * rope)889 inline void JSRope::ropeBarrierDuringFlattening(JSRope* rope) {
890   MOZ_ASSERT(!rope->isBeingFlattened());
891   if constexpr (usingBarrier) {
892     gc::PreWriteBarrierDuringFlattening(rope->leftChild());
893     gc::PreWriteBarrierDuringFlattening(rope->rightChild());
894   }
895 }
896 
897 template <AllowGC allowGC>
EnsureLinear(JSContext * cx,typename MaybeRooted<JSString *,allowGC>::HandleType string)898 static JSLinearString* EnsureLinear(
899     JSContext* cx,
900     typename MaybeRooted<JSString*, allowGC>::HandleType string) {
901   JSLinearString* linear = string->ensureLinear(cx);
902   // Don't report an exception if GC is not allowed, just return nullptr.
903   if (!linear && !allowGC) {
904     cx->recoverFromOutOfMemory();
905   }
906   return linear;
907 }
908 
909 template <AllowGC allowGC>
ConcatStrings(JSContext * cx,typename MaybeRooted<JSString *,allowGC>::HandleType left,typename MaybeRooted<JSString *,allowGC>::HandleType right,gc::InitialHeap heap)910 JSString* js::ConcatStrings(
911     JSContext* cx, typename MaybeRooted<JSString*, allowGC>::HandleType left,
912     typename MaybeRooted<JSString*, allowGC>::HandleType right,
913     gc::InitialHeap heap) {
914   MOZ_ASSERT_IF(!left->isAtom(), cx->isInsideCurrentZone(left));
915   MOZ_ASSERT_IF(!right->isAtom(), cx->isInsideCurrentZone(right));
916 
917   size_t leftLen = left->length();
918   if (leftLen == 0) {
919     return right;
920   }
921 
922   size_t rightLen = right->length();
923   if (rightLen == 0) {
924     return left;
925   }
926 
927   size_t wholeLength = leftLen + rightLen;
928   if (MOZ_UNLIKELY(wholeLength > JSString::MAX_LENGTH)) {
929     // Don't report an exception if GC is not allowed, just return nullptr.
930     if (allowGC) {
931       js::ReportOversizedAllocation(cx, JSMSG_ALLOC_OVERFLOW);
932     }
933     return nullptr;
934   }
935 
936   bool isLatin1 = left->hasLatin1Chars() && right->hasLatin1Chars();
937   bool canUseInline = isLatin1
938                           ? JSInlineString::lengthFits<Latin1Char>(wholeLength)
939                           : JSInlineString::lengthFits<char16_t>(wholeLength);
940   if (canUseInline) {
941     Latin1Char* latin1Buf = nullptr;  // initialize to silence GCC warning
942     char16_t* twoByteBuf = nullptr;   // initialize to silence GCC warning
943     JSInlineString* str =
944         isLatin1
945             ? AllocateInlineString<allowGC>(cx, wholeLength, &latin1Buf, heap)
946             : AllocateInlineString<allowGC>(cx, wholeLength, &twoByteBuf, heap);
947     if (!str) {
948       return nullptr;
949     }
950 
951     AutoCheckCannotGC nogc;
952     JSLinearString* leftLinear = EnsureLinear<allowGC>(cx, left);
953     if (!leftLinear) {
954       return nullptr;
955     }
956     JSLinearString* rightLinear = EnsureLinear<allowGC>(cx, right);
957     if (!rightLinear) {
958       return nullptr;
959     }
960 
961     if (isLatin1) {
962       PodCopy(latin1Buf, leftLinear->latin1Chars(nogc), leftLen);
963       PodCopy(latin1Buf + leftLen, rightLinear->latin1Chars(nogc), rightLen);
964     } else {
965       if (leftLinear->hasTwoByteChars()) {
966         PodCopy(twoByteBuf, leftLinear->twoByteChars(nogc), leftLen);
967       } else {
968         CopyAndInflateChars(twoByteBuf, leftLinear->latin1Chars(nogc), leftLen);
969       }
970       if (rightLinear->hasTwoByteChars()) {
971         PodCopy(twoByteBuf + leftLen, rightLinear->twoByteChars(nogc),
972                 rightLen);
973       } else {
974         CopyAndInflateChars(twoByteBuf + leftLen,
975                             rightLinear->latin1Chars(nogc), rightLen);
976       }
977     }
978 
979     return str;
980   }
981 
982   return JSRope::new_<allowGC>(cx, left, right, wholeLength, heap);
983 }
984 
985 template JSString* js::ConcatStrings<CanGC>(JSContext* cx, HandleString left,
986                                             HandleString right,
987                                             gc::InitialHeap heap);
988 
989 template JSString* js::ConcatStrings<NoGC>(JSContext* cx, JSString* const& left,
990                                            JSString* const& right,
991                                            gc::InitialHeap heap);
992 
993 /**
994  * Copy |src[0..length]| to |dest[0..length]| when copying doesn't narrow and
995  * therefore can't lose information.
996  */
FillChars(char16_t * dest,const unsigned char * src,size_t length)997 static inline void FillChars(char16_t* dest, const unsigned char* src,
998                              size_t length) {
999   ConvertLatin1toUtf16(AsChars(Span(src, length)), Span(dest, length));
1000 }
1001 
FillChars(char16_t * dest,const char16_t * src,size_t length)1002 static inline void FillChars(char16_t* dest, const char16_t* src,
1003                              size_t length) {
1004   PodCopy(dest, src, length);
1005 }
1006 
FillChars(unsigned char * dest,const unsigned char * src,size_t length)1007 static inline void FillChars(unsigned char* dest, const unsigned char* src,
1008                              size_t length) {
1009   PodCopy(dest, src, length);
1010 }
1011 
1012 /**
1013  * Copy |src[0..length]| to |dest[0..length]| when copying *does* narrow, but
1014  * the user guarantees every runtime |src[i]| value can be stored without change
1015  * of value in |dest[i]|.
1016  */
FillFromCompatible(unsigned char * dest,const char16_t * src,size_t length)1017 static inline void FillFromCompatible(unsigned char* dest, const char16_t* src,
1018                                       size_t length) {
1019   LossyConvertUtf16toLatin1(Span(src, length),
1020                             AsWritableChars(Span(dest, length)));
1021 }
1022 
1023 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
dumpRepresentation(js::GenericPrinter & out,int indent) const1024 void JSDependentString::dumpRepresentation(js::GenericPrinter& out,
1025                                            int indent) const {
1026   dumpRepresentationHeader(out, "JSDependentString");
1027   indent += 2;
1028   out.printf("%*soffset: %zu\n", indent, "", baseOffset());
1029   out.printf("%*sbase: ", indent, "");
1030   base()->dumpRepresentation(out, indent);
1031 }
1032 #endif
1033 
EqualChars(const JSLinearString * str1,const JSLinearString * str2)1034 bool js::EqualChars(const JSLinearString* str1, const JSLinearString* str2) {
1035   MOZ_ASSERT(str1->length() == str2->length());
1036 
1037   size_t len = str1->length();
1038 
1039   AutoCheckCannotGC nogc;
1040   if (str1->hasTwoByteChars()) {
1041     if (str2->hasTwoByteChars()) {
1042       return ArrayEqual(str1->twoByteChars(nogc), str2->twoByteChars(nogc),
1043                         len);
1044     }
1045 
1046     return EqualChars(str2->latin1Chars(nogc), str1->twoByteChars(nogc), len);
1047   }
1048 
1049   if (str2->hasLatin1Chars()) {
1050     return ArrayEqual(str1->latin1Chars(nogc), str2->latin1Chars(nogc), len);
1051   }
1052 
1053   return EqualChars(str1->latin1Chars(nogc), str2->twoByteChars(nogc), len);
1054 }
1055 
HasSubstringAt(JSLinearString * text,JSLinearString * pat,size_t start)1056 bool js::HasSubstringAt(JSLinearString* text, JSLinearString* pat,
1057                         size_t start) {
1058   MOZ_ASSERT(start + pat->length() <= text->length());
1059 
1060   size_t patLen = pat->length();
1061 
1062   AutoCheckCannotGC nogc;
1063   if (text->hasLatin1Chars()) {
1064     const Latin1Char* textChars = text->latin1Chars(nogc) + start;
1065     if (pat->hasLatin1Chars()) {
1066       return ArrayEqual(textChars, pat->latin1Chars(nogc), patLen);
1067     }
1068 
1069     return EqualChars(textChars, pat->twoByteChars(nogc), patLen);
1070   }
1071 
1072   const char16_t* textChars = text->twoByteChars(nogc) + start;
1073   if (pat->hasTwoByteChars()) {
1074     return ArrayEqual(textChars, pat->twoByteChars(nogc), patLen);
1075   }
1076 
1077   return EqualChars(pat->latin1Chars(nogc), textChars, patLen);
1078 }
1079 
EqualStrings(JSContext * cx,JSString * str1,JSString * str2,bool * result)1080 bool js::EqualStrings(JSContext* cx, JSString* str1, JSString* str2,
1081                       bool* result) {
1082   if (str1 == str2) {
1083     *result = true;
1084     return true;
1085   }
1086 
1087   size_t length1 = str1->length();
1088   if (length1 != str2->length()) {
1089     *result = false;
1090     return true;
1091   }
1092 
1093   JSLinearString* linear1 = str1->ensureLinear(cx);
1094   if (!linear1) {
1095     return false;
1096   }
1097   JSLinearString* linear2 = str2->ensureLinear(cx);
1098   if (!linear2) {
1099     return false;
1100   }
1101 
1102   *result = EqualChars(linear1, linear2);
1103   return true;
1104 }
1105 
EqualStrings(const JSLinearString * str1,const JSLinearString * str2)1106 bool js::EqualStrings(const JSLinearString* str1, const JSLinearString* str2) {
1107   if (str1 == str2) {
1108     return true;
1109   }
1110 
1111   size_t length1 = str1->length();
1112   if (length1 != str2->length()) {
1113     return false;
1114   }
1115 
1116   return EqualChars(str1, str2);
1117 }
1118 
CompareChars(const char16_t * s1,size_t len1,JSLinearString * s2)1119 int32_t js::CompareChars(const char16_t* s1, size_t len1, JSLinearString* s2) {
1120   AutoCheckCannotGC nogc;
1121   return s2->hasLatin1Chars()
1122              ? CompareChars(s1, len1, s2->latin1Chars(nogc), s2->length())
1123              : CompareChars(s1, len1, s2->twoByteChars(nogc), s2->length());
1124 }
1125 
CompareStringsImpl(const JSLinearString * str1,const JSLinearString * str2)1126 static int32_t CompareStringsImpl(const JSLinearString* str1,
1127                                   const JSLinearString* str2) {
1128   size_t len1 = str1->length();
1129   size_t len2 = str2->length();
1130 
1131   AutoCheckCannotGC nogc;
1132   if (str1->hasLatin1Chars()) {
1133     const Latin1Char* chars1 = str1->latin1Chars(nogc);
1134     return str2->hasLatin1Chars()
1135                ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2)
1136                : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2);
1137   }
1138 
1139   const char16_t* chars1 = str1->twoByteChars(nogc);
1140   return str2->hasLatin1Chars()
1141              ? CompareChars(chars1, len1, str2->latin1Chars(nogc), len2)
1142              : CompareChars(chars1, len1, str2->twoByteChars(nogc), len2);
1143 }
1144 
CompareStrings(JSContext * cx,JSString * str1,JSString * str2,int32_t * result)1145 bool js::CompareStrings(JSContext* cx, JSString* str1, JSString* str2,
1146                         int32_t* result) {
1147   MOZ_ASSERT(str1);
1148   MOZ_ASSERT(str2);
1149 
1150   if (str1 == str2) {
1151     *result = 0;
1152     return true;
1153   }
1154 
1155   JSLinearString* linear1 = str1->ensureLinear(cx);
1156   if (!linear1) {
1157     return false;
1158   }
1159 
1160   JSLinearString* linear2 = str2->ensureLinear(cx);
1161   if (!linear2) {
1162     return false;
1163   }
1164 
1165   *result = CompareStringsImpl(linear1, linear2);
1166   return true;
1167 }
1168 
CompareStrings(const JSLinearString * str1,const JSLinearString * str2)1169 int32_t js::CompareStrings(const JSLinearString* str1,
1170                            const JSLinearString* str2) {
1171   MOZ_ASSERT(str1);
1172   MOZ_ASSERT(str2);
1173 
1174   if (str1 == str2) {
1175     return 0;
1176   }
1177   return CompareStringsImpl(str1, str2);
1178 }
1179 
StringIsAscii(JSLinearString * str)1180 bool js::StringIsAscii(JSLinearString* str) {
1181   JS::AutoCheckCannotGC nogc;
1182   if (str->hasLatin1Chars()) {
1183     return mozilla::IsAscii(
1184         AsChars(Span(str->latin1Chars(nogc), str->length())));
1185   }
1186   return mozilla::IsAscii(Span(str->twoByteChars(nogc), str->length()));
1187 }
1188 
StringEqualsAscii(JSLinearString * str,const char * asciiBytes)1189 bool js::StringEqualsAscii(JSLinearString* str, const char* asciiBytes) {
1190   return StringEqualsAscii(str, asciiBytes, strlen(asciiBytes));
1191 }
1192 
StringEqualsAscii(JSLinearString * str,const char * asciiBytes,size_t length)1193 bool js::StringEqualsAscii(JSLinearString* str, const char* asciiBytes,
1194                            size_t length) {
1195   MOZ_ASSERT(JS::StringIsASCII(Span(asciiBytes, length)));
1196 
1197   if (length != str->length()) {
1198     return false;
1199   }
1200 
1201   const Latin1Char* latin1 = reinterpret_cast<const Latin1Char*>(asciiBytes);
1202 
1203   AutoCheckCannotGC nogc;
1204   return str->hasLatin1Chars()
1205              ? ArrayEqual(latin1, str->latin1Chars(nogc), length)
1206              : EqualChars(latin1, str->twoByteChars(nogc), length);
1207 }
1208 
1209 template <typename CharT>
CheckStringIsIndex(const CharT * s,size_t length,uint32_t * indexp)1210 bool js::CheckStringIsIndex(const CharT* s, size_t length, uint32_t* indexp) {
1211   MOZ_ASSERT(length > 0);
1212   MOZ_ASSERT(length <= UINT32_CHAR_BUFFER_LENGTH);
1213   MOZ_ASSERT(IsAsciiDigit(*s),
1214              "caller's fast path must have checked first char");
1215 
1216   RangedPtr<const CharT> cp(s, length);
1217   const RangedPtr<const CharT> end(s + length, s, length);
1218 
1219   uint32_t index = AsciiDigitToNumber(*cp++);
1220   uint32_t oldIndex = 0;
1221   uint32_t c = 0;
1222 
1223   if (index != 0) {
1224     // Consume remaining characters only if the first character isn't '0'.
1225     while (cp < end && IsAsciiDigit(*cp)) {
1226       oldIndex = index;
1227       c = AsciiDigitToNumber(*cp);
1228       index = 10 * index + c;
1229       cp++;
1230     }
1231   }
1232 
1233   // It's not an integer index if there are characters after the number.
1234   if (cp != end) {
1235     return false;
1236   }
1237 
1238   // Look out for "4294967295" and larger-number strings that fit in
1239   // UINT32_CHAR_BUFFER_LENGTH: only unsigned 32-bit integers less than or equal
1240   // to MAX_ARRAY_INDEX shall pass.
1241   if (oldIndex < MAX_ARRAY_INDEX / 10 ||
1242       (oldIndex == MAX_ARRAY_INDEX / 10 && c <= (MAX_ARRAY_INDEX % 10))) {
1243     MOZ_ASSERT(index <= MAX_ARRAY_INDEX);
1244     *indexp = index;
1245     return true;
1246   }
1247 
1248   return false;
1249 }
1250 
1251 template bool js::CheckStringIsIndex(const Latin1Char* s, size_t length,
1252                                      uint32_t* indexp);
1253 template bool js::CheckStringIsIndex(const char16_t* s, size_t length,
1254                                      uint32_t* indexp);
1255 
1256 template <typename CharT>
AtomCharsToIndex(const CharT * s,size_t length)1257 static uint32_t AtomCharsToIndex(const CharT* s, size_t length) {
1258   // Chars are known to be a valid index value (as determined by
1259   // CheckStringIsIndex) that didn't fit in the "index value" bits in the
1260   // header.
1261 
1262   MOZ_ASSERT(length > 0);
1263   MOZ_ASSERT(length <= UINT32_CHAR_BUFFER_LENGTH);
1264 
1265   RangedPtr<const CharT> cp(s, length);
1266   const RangedPtr<const CharT> end(s + length, s, length);
1267 
1268   MOZ_ASSERT(IsAsciiDigit(*cp));
1269   uint32_t index = AsciiDigitToNumber(*cp++);
1270   MOZ_ASSERT(index != 0);
1271 
1272   while (cp < end) {
1273     MOZ_ASSERT(IsAsciiDigit(*cp));
1274     index = 10 * index + AsciiDigitToNumber(*cp);
1275     cp++;
1276   }
1277 
1278   MOZ_ASSERT(index <= MAX_ARRAY_INDEX);
1279   return index;
1280 }
1281 
getIndexSlow() const1282 uint32_t JSAtom::getIndexSlow() const {
1283   MOZ_ASSERT(isIndex());
1284   MOZ_ASSERT(!hasIndexValue());
1285 
1286   size_t len = length();
1287 
1288   AutoCheckCannotGC nogc;
1289   return hasLatin1Chars() ? AtomCharsToIndex(latin1Chars(nogc), len)
1290                           : AtomCharsToIndex(twoByteChars(nogc), len);
1291 }
1292 
MarkStringAndBasesNonDeduplicatable(JSLinearString * s)1293 static void MarkStringAndBasesNonDeduplicatable(JSLinearString* s) {
1294   while (true) {
1295     if (!s->isTenured()) {
1296       s->setNonDeduplicatable();
1297     }
1298     if (!s->hasBase()) {
1299       break;
1300     }
1301     s = s->base();
1302   }
1303 }
1304 
init(JSContext * cx,JSString * s)1305 bool AutoStableStringChars::init(JSContext* cx, JSString* s) {
1306   RootedLinearString linearString(cx, s->ensureLinear(cx));
1307   if (!linearString) {
1308     return false;
1309   }
1310 
1311   MOZ_ASSERT(state_ == Uninitialized);
1312 
1313   // If the chars are inline then we need to copy them since they may be moved
1314   // by a compacting GC.
1315   if (baseIsInline(linearString)) {
1316     return linearString->hasTwoByteChars() ? copyTwoByteChars(cx, linearString)
1317                                            : copyLatin1Chars(cx, linearString);
1318   }
1319 
1320   if (linearString->hasLatin1Chars()) {
1321     state_ = Latin1;
1322     latin1Chars_ = linearString->rawLatin1Chars();
1323   } else {
1324     state_ = TwoByte;
1325     twoByteChars_ = linearString->rawTwoByteChars();
1326   }
1327 
1328   MarkStringAndBasesNonDeduplicatable(linearString);
1329 
1330   s_ = linearString;
1331   return true;
1332 }
1333 
initTwoByte(JSContext * cx,JSString * s)1334 bool AutoStableStringChars::initTwoByte(JSContext* cx, JSString* s) {
1335   RootedLinearString linearString(cx, s->ensureLinear(cx));
1336   if (!linearString) {
1337     return false;
1338   }
1339 
1340   MOZ_ASSERT(state_ == Uninitialized);
1341 
1342   if (linearString->hasLatin1Chars()) {
1343     return copyAndInflateLatin1Chars(cx, linearString);
1344   }
1345 
1346   // If the chars are inline then we need to copy them since they may be moved
1347   // by a compacting GC.
1348   if (baseIsInline(linearString)) {
1349     return copyTwoByteChars(cx, linearString);
1350   }
1351 
1352   state_ = TwoByte;
1353   twoByteChars_ = linearString->rawTwoByteChars();
1354 
1355   MarkStringAndBasesNonDeduplicatable(linearString);
1356 
1357   s_ = linearString;
1358   return true;
1359 }
1360 
baseIsInline(HandleLinearString linearString)1361 bool AutoStableStringChars::baseIsInline(HandleLinearString linearString) {
1362   JSString* base = linearString;
1363   while (base->isDependent()) {
1364     base = base->asDependent().base();
1365   }
1366   return base->isInline();
1367 }
1368 
1369 template <typename T>
allocOwnChars(JSContext * cx,size_t count)1370 T* AutoStableStringChars::allocOwnChars(JSContext* cx, size_t count) {
1371   static_assert(
1372       InlineCapacity >=
1373               sizeof(JS::Latin1Char) * JSFatInlineString::MAX_LENGTH_LATIN1 &&
1374           InlineCapacity >=
1375               sizeof(char16_t) * JSFatInlineString::MAX_LENGTH_TWO_BYTE,
1376       "InlineCapacity too small to hold fat inline strings");
1377 
1378   static_assert((JSString::MAX_LENGTH &
1379                  mozilla::tl::MulOverflowMask<sizeof(T)>::value) == 0,
1380                 "Size calculation can overflow");
1381   MOZ_ASSERT(count <= JSString::MAX_LENGTH);
1382   size_t size = sizeof(T) * count;
1383 
1384   ownChars_.emplace(cx);
1385   if (!ownChars_->resize(size)) {
1386     ownChars_.reset();
1387     return nullptr;
1388   }
1389 
1390   return reinterpret_cast<T*>(ownChars_->begin());
1391 }
1392 
copyAndInflateLatin1Chars(JSContext * cx,HandleLinearString linearString)1393 bool AutoStableStringChars::copyAndInflateLatin1Chars(
1394     JSContext* cx, HandleLinearString linearString) {
1395   char16_t* chars = allocOwnChars<char16_t>(cx, linearString->length());
1396   if (!chars) {
1397     return false;
1398   }
1399 
1400   FillChars(chars, linearString->rawLatin1Chars(), linearString->length());
1401 
1402   state_ = TwoByte;
1403   twoByteChars_ = chars;
1404   s_ = linearString;
1405   return true;
1406 }
1407 
copyLatin1Chars(JSContext * cx,HandleLinearString linearString)1408 bool AutoStableStringChars::copyLatin1Chars(JSContext* cx,
1409                                             HandleLinearString linearString) {
1410   size_t length = linearString->length();
1411   JS::Latin1Char* chars = allocOwnChars<JS::Latin1Char>(cx, length);
1412   if (!chars) {
1413     return false;
1414   }
1415 
1416   FillChars(chars, linearString->rawLatin1Chars(), length);
1417 
1418   state_ = Latin1;
1419   latin1Chars_ = chars;
1420   s_ = linearString;
1421   return true;
1422 }
1423 
copyTwoByteChars(JSContext * cx,HandleLinearString linearString)1424 bool AutoStableStringChars::copyTwoByteChars(JSContext* cx,
1425                                              HandleLinearString linearString) {
1426   size_t length = linearString->length();
1427   char16_t* chars = allocOwnChars<char16_t>(cx, length);
1428   if (!chars) {
1429     return false;
1430   }
1431 
1432   FillChars(chars, linearString->rawTwoByteChars(), length);
1433 
1434   state_ = TwoByte;
1435   twoByteChars_ = chars;
1436   s_ = linearString;
1437   return true;
1438 }
1439 
1440 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
dump(js::GenericPrinter & out)1441 void JSAtom::dump(js::GenericPrinter& out) {
1442   out.printf("JSAtom* (%p) = ", (void*)this);
1443   this->JSString::dump(out);
1444 }
1445 
dump()1446 void JSAtom::dump() {
1447   Fprinter out(stderr);
1448   dump(out);
1449 }
1450 
dumpRepresentation(js::GenericPrinter & out,int indent) const1451 void JSExternalString::dumpRepresentation(js::GenericPrinter& out,
1452                                           int indent) const {
1453   dumpRepresentationHeader(out, "JSExternalString");
1454   indent += 2;
1455 
1456   out.printf("%*sfinalizer: ((JSExternalStringCallbacks*) %p)\n", indent, "",
1457              callbacks());
1458   dumpRepresentationChars(out, indent);
1459 }
1460 #endif /* defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW) */
1461 
NewDependentString(JSContext * cx,JSString * baseArg,size_t start,size_t length,gc::InitialHeap heap)1462 JSLinearString* js::NewDependentString(JSContext* cx, JSString* baseArg,
1463                                        size_t start, size_t length,
1464                                        gc::InitialHeap heap) {
1465   if (length == 0) {
1466     return cx->emptyString();
1467   }
1468 
1469   JSLinearString* base = baseArg->ensureLinear(cx);
1470   if (!base) {
1471     return nullptr;
1472   }
1473 
1474   if (start == 0 && length == base->length()) {
1475     return base;
1476   }
1477 
1478   if (base->hasTwoByteChars()) {
1479     AutoCheckCannotGC nogc;
1480     const char16_t* chars = base->twoByteChars(nogc) + start;
1481     if (JSLinearString* staticStr = cx->staticStrings().lookup(chars, length)) {
1482       return staticStr;
1483     }
1484   } else {
1485     AutoCheckCannotGC nogc;
1486     const Latin1Char* chars = base->latin1Chars(nogc) + start;
1487     if (JSLinearString* staticStr = cx->staticStrings().lookup(chars, length)) {
1488       return staticStr;
1489     }
1490   }
1491 
1492   return JSDependentString::new_(cx, base, start, length, heap);
1493 }
1494 
CanStoreCharsAsLatin1(const char16_t * s,size_t length)1495 static inline bool CanStoreCharsAsLatin1(const char16_t* s, size_t length) {
1496   return IsUtf16Latin1(Span(s, length));
1497 }
1498 
1499 template <AllowGC allowGC>
NewInlineStringDeflated(JSContext * cx,const mozilla::Range<const char16_t> & chars,gc::InitialHeap heap=gc::DefaultHeap)1500 static MOZ_ALWAYS_INLINE JSInlineString* NewInlineStringDeflated(
1501     JSContext* cx, const mozilla::Range<const char16_t>& chars,
1502     gc::InitialHeap heap = gc::DefaultHeap) {
1503   size_t len = chars.length();
1504   Latin1Char* storage;
1505   JSInlineString* str = AllocateInlineString<allowGC>(cx, len, &storage, heap);
1506   if (!str) {
1507     return nullptr;
1508   }
1509 
1510   MOZ_ASSERT(CanStoreCharsAsLatin1(chars.begin().get(), len));
1511   FillFromCompatible(storage, chars.begin().get(), len);
1512   return str;
1513 }
1514 
1515 template <AllowGC allowGC>
NewStringDeflated(JSContext * cx,const char16_t * s,size_t n,gc::InitialHeap heap)1516 static JSLinearString* NewStringDeflated(JSContext* cx, const char16_t* s,
1517                                          size_t n, gc::InitialHeap heap) {
1518   if (JSLinearString* str = TryEmptyOrStaticString(cx, s, n)) {
1519     return str;
1520   }
1521 
1522   if (JSInlineString::lengthFits<Latin1Char>(n)) {
1523     return NewInlineStringDeflated<allowGC>(
1524         cx, mozilla::Range<const char16_t>(s, n), heap);
1525   }
1526 
1527   auto news = cx->make_pod_arena_array<Latin1Char>(js::StringBufferArena, n);
1528   if (!news) {
1529     if (!allowGC) {
1530       cx->recoverFromOutOfMemory();
1531     }
1532     return nullptr;
1533   }
1534 
1535   MOZ_ASSERT(CanStoreCharsAsLatin1(s, n));
1536   FillFromCompatible(news.get(), s, n);
1537 
1538   return JSLinearString::new_<allowGC>(cx, std::move(news), n, heap);
1539 }
1540 
NewInlineStringForAtomDeflated(JSContext * cx,const char16_t * chars,size_t length)1541 static MOZ_ALWAYS_INLINE JSInlineString* NewInlineStringForAtomDeflated(
1542     JSContext* cx, const char16_t* chars, size_t length) {
1543   Latin1Char* storage;
1544   JSInlineString* str = AllocateInlineStringForAtom(cx, length, &storage);
1545   if (!str) {
1546     return nullptr;
1547   }
1548 
1549   MOZ_ASSERT(CanStoreCharsAsLatin1(chars, length));
1550   FillFromCompatible(storage, chars, length);
1551   return str;
1552 }
1553 
NewStringForAtomDeflatedValidLength(JSContext * cx,const char16_t * s,size_t n)1554 static JSLinearString* NewStringForAtomDeflatedValidLength(JSContext* cx,
1555                                                            const char16_t* s,
1556                                                            size_t n) {
1557   if (JSInlineString::lengthFits<Latin1Char>(n)) {
1558     return NewInlineStringForAtomDeflated(cx, s, n);
1559   }
1560 
1561   auto news = cx->make_pod_arena_array<Latin1Char>(js::StringBufferArena, n);
1562   if (!news) {
1563     cx->recoverFromOutOfMemory();
1564     return nullptr;
1565   }
1566 
1567   MOZ_ASSERT(CanStoreCharsAsLatin1(s, n));
1568   FillFromCompatible(news.get(), s, n);
1569 
1570   return JSLinearString::newForAtomValidLength(cx, std::move(news), n);
1571 }
1572 
1573 template <AllowGC allowGC, typename CharT>
NewStringDontDeflate(JSContext * cx,UniquePtr<CharT[],JS::FreePolicy> chars,size_t length,gc::InitialHeap heap)1574 JSLinearString* js::NewStringDontDeflate(
1575     JSContext* cx, UniquePtr<CharT[], JS::FreePolicy> chars, size_t length,
1576     gc::InitialHeap heap) {
1577   if (JSLinearString* str = TryEmptyOrStaticString(cx, chars.get(), length)) {
1578     return str;
1579   }
1580 
1581   if (JSInlineString::lengthFits<CharT>(length)) {
1582     // |chars.get()| is safe because 1) |NewInlineString| necessarily *copies*,
1583     // and 2) |chars| frees its contents only when this function returns.
1584     return NewInlineString<allowGC>(
1585         cx, mozilla::Range<const CharT>(chars.get(), length), heap);
1586   }
1587 
1588   return JSLinearString::new_<allowGC>(cx, std::move(chars), length, heap);
1589 }
1590 
1591 template JSLinearString* js::NewStringDontDeflate<CanGC>(
1592     JSContext* cx, UniqueTwoByteChars chars, size_t length,
1593     gc::InitialHeap heap);
1594 
1595 template JSLinearString* js::NewStringDontDeflate<NoGC>(
1596     JSContext* cx, UniqueTwoByteChars chars, size_t length,
1597     gc::InitialHeap heap);
1598 
1599 template JSLinearString* js::NewStringDontDeflate<CanGC>(
1600     JSContext* cx, UniqueLatin1Chars chars, size_t length,
1601     gc::InitialHeap heap);
1602 
1603 template JSLinearString* js::NewStringDontDeflate<NoGC>(JSContext* cx,
1604                                                         UniqueLatin1Chars chars,
1605                                                         size_t length,
1606                                                         gc::InitialHeap heap);
1607 
1608 template <AllowGC allowGC, typename CharT>
NewString(JSContext * cx,UniquePtr<CharT[],JS::FreePolicy> chars,size_t length,gc::InitialHeap heap)1609 JSLinearString* js::NewString(JSContext* cx,
1610                               UniquePtr<CharT[], JS::FreePolicy> chars,
1611                               size_t length, gc::InitialHeap heap) {
1612   if constexpr (std::is_same_v<CharT, char16_t>) {
1613     if (CanStoreCharsAsLatin1(chars.get(), length)) {
1614       // Deflating copies from |chars.get()| and lets |chars| be freed on
1615       // return.
1616       return NewStringDeflated<allowGC>(cx, chars.get(), length, heap);
1617     }
1618   }
1619 
1620   return NewStringDontDeflate<allowGC>(cx, std::move(chars), length, heap);
1621 }
1622 
1623 template JSLinearString* js::NewString<CanGC>(JSContext* cx,
1624                                               UniqueTwoByteChars chars,
1625                                               size_t length,
1626                                               gc::InitialHeap heap);
1627 
1628 template JSLinearString* js::NewString<NoGC>(JSContext* cx,
1629                                              UniqueTwoByteChars chars,
1630                                              size_t length,
1631                                              gc::InitialHeap heap);
1632 
1633 template JSLinearString* js::NewString<CanGC>(JSContext* cx,
1634                                               UniqueLatin1Chars chars,
1635                                               size_t length,
1636                                               gc::InitialHeap heap);
1637 
1638 template JSLinearString* js::NewString<NoGC>(JSContext* cx,
1639                                              UniqueLatin1Chars chars,
1640                                              size_t length,
1641                                              gc::InitialHeap heap);
1642 
1643 namespace js {
1644 
1645 template <AllowGC allowGC, typename CharT>
NewStringCopyNDontDeflateNonStaticValidLength(JSContext * cx,const CharT * s,size_t n,gc::InitialHeap heap)1646 JSLinearString* NewStringCopyNDontDeflateNonStaticValidLength(
1647     JSContext* cx, const CharT* s, size_t n, gc::InitialHeap heap) {
1648   if (JSInlineString::lengthFits<CharT>(n)) {
1649     return NewInlineString<allowGC>(cx, mozilla::Range<const CharT>(s, n),
1650                                     heap);
1651   }
1652 
1653   auto news = cx->make_pod_arena_array<CharT>(js::StringBufferArena, n);
1654   if (!news) {
1655     if (!allowGC) {
1656       cx->recoverFromOutOfMemory();
1657     }
1658     return nullptr;
1659   }
1660 
1661   FillChars(news.get(), s, n);
1662 
1663   return JSLinearString::newValidLength<allowGC>(cx, std::move(news), n, heap);
1664 }
1665 
1666 template JSLinearString* NewStringCopyNDontDeflateNonStaticValidLength<CanGC>(
1667     JSContext* cx, const char16_t* s, size_t n, gc::InitialHeap heap);
1668 
1669 template JSLinearString* NewStringCopyNDontDeflateNonStaticValidLength<CanGC>(
1670     JSContext* cx, const Latin1Char* s, size_t n, gc::InitialHeap heap);
1671 
1672 template <AllowGC allowGC, typename CharT>
NewStringCopyNDontDeflate(JSContext * cx,const CharT * s,size_t n,gc::InitialHeap heap)1673 JSLinearString* NewStringCopyNDontDeflate(JSContext* cx, const CharT* s,
1674                                           size_t n, gc::InitialHeap heap) {
1675   if (JSLinearString* str = TryEmptyOrStaticString(cx, s, n)) {
1676     return str;
1677   }
1678 
1679   if (MOZ_UNLIKELY(!JSLinearString::validateLength(cx, n))) {
1680     return nullptr;
1681   }
1682 
1683   return NewStringCopyNDontDeflateNonStaticValidLength<allowGC>(cx, s, n, heap);
1684 }
1685 
1686 template JSLinearString* NewStringCopyNDontDeflate<CanGC>(JSContext* cx,
1687                                                           const char16_t* s,
1688                                                           size_t n,
1689                                                           gc::InitialHeap heap);
1690 
1691 template JSLinearString* NewStringCopyNDontDeflate<NoGC>(JSContext* cx,
1692                                                          const char16_t* s,
1693                                                          size_t n,
1694                                                          gc::InitialHeap heap);
1695 
1696 template JSLinearString* NewStringCopyNDontDeflate<CanGC>(JSContext* cx,
1697                                                           const Latin1Char* s,
1698                                                           size_t n,
1699                                                           gc::InitialHeap heap);
1700 
1701 template JSLinearString* NewStringCopyNDontDeflate<NoGC>(JSContext* cx,
1702                                                          const Latin1Char* s,
1703                                                          size_t n,
1704                                                          gc::InitialHeap heap);
1705 
NewLatin1StringZ(JSContext * cx,UniqueChars chars,gc::InitialHeap heap)1706 JSLinearString* NewLatin1StringZ(JSContext* cx, UniqueChars chars,
1707                                  gc::InitialHeap heap) {
1708   size_t length = strlen(chars.get());
1709   UniqueLatin1Chars latin1(reinterpret_cast<Latin1Char*>(chars.release()));
1710   return NewString<CanGC>(cx, std::move(latin1), length, heap);
1711 }
1712 
1713 template <AllowGC allowGC, typename CharT>
NewStringCopyN(JSContext * cx,const CharT * s,size_t n,gc::InitialHeap heap)1714 JSLinearString* NewStringCopyN(JSContext* cx, const CharT* s, size_t n,
1715                                gc::InitialHeap heap) {
1716   if constexpr (std::is_same_v<CharT, char16_t>) {
1717     if (CanStoreCharsAsLatin1(s, n)) {
1718       return NewStringDeflated<allowGC>(cx, s, n, heap);
1719     }
1720   }
1721 
1722   return NewStringCopyNDontDeflate<allowGC>(cx, s, n, heap);
1723 }
1724 
1725 template JSLinearString* NewStringCopyN<CanGC>(JSContext* cx, const char16_t* s,
1726                                                size_t n, gc::InitialHeap heap);
1727 
1728 template JSLinearString* NewStringCopyN<NoGC>(JSContext* cx, const char16_t* s,
1729                                               size_t n, gc::InitialHeap heap);
1730 
1731 template JSLinearString* NewStringCopyN<CanGC>(JSContext* cx,
1732                                                const Latin1Char* s, size_t n,
1733                                                gc::InitialHeap heap);
1734 
1735 template JSLinearString* NewStringCopyN<NoGC>(JSContext* cx,
1736                                               const Latin1Char* s, size_t n,
1737                                               gc::InitialHeap heap);
1738 
1739 template <typename CharT>
NewStringForAtomCopyNDontDeflateValidLength(JSContext * cx,const CharT * s,size_t n)1740 JSLinearString* NewStringForAtomCopyNDontDeflateValidLength(JSContext* cx,
1741                                                             const CharT* s,
1742                                                             size_t n) {
1743   if constexpr (std::is_same_v<CharT, char16_t>) {
1744     MOZ_ASSERT(!CanStoreCharsAsLatin1(s, n));
1745   }
1746 
1747   if (JSInlineString::lengthFits<CharT>(n)) {
1748     return NewInlineStringForAtom(cx, s, n);
1749   }
1750 
1751   auto news = cx->make_pod_arena_array<CharT>(js::StringBufferArena, n);
1752   if (!news) {
1753     cx->recoverFromOutOfMemory();
1754     return nullptr;
1755   }
1756 
1757   FillChars(news.get(), s, n);
1758 
1759   return JSLinearString::newForAtomValidLength(cx, std::move(news), n);
1760 }
1761 
1762 template JSLinearString* NewStringForAtomCopyNDontDeflateValidLength(
1763     JSContext* cx, const char16_t* s, size_t n);
1764 
1765 template JSLinearString* NewStringForAtomCopyNDontDeflateValidLength(
1766     JSContext* cx, const Latin1Char* s, size_t n);
1767 
1768 template <typename CharT>
NewStringForAtomCopyNMaybeDeflateValidLength(JSContext * cx,const CharT * s,size_t n)1769 JSLinearString* NewStringForAtomCopyNMaybeDeflateValidLength(JSContext* cx,
1770                                                              const CharT* s,
1771                                                              size_t n) {
1772   if constexpr (std::is_same_v<CharT, char16_t>) {
1773     if (CanStoreCharsAsLatin1(s, n)) {
1774       return NewStringForAtomDeflatedValidLength(cx, s, n);
1775     }
1776   }
1777 
1778   return NewStringForAtomCopyNDontDeflateValidLength(cx, s, n);
1779 }
1780 
1781 template JSLinearString* NewStringForAtomCopyNMaybeDeflateValidLength(
1782     JSContext* cx, const char16_t* s, size_t n);
1783 
1784 template JSLinearString* NewStringForAtomCopyNMaybeDeflateValidLength(
1785     JSContext* cx, const Latin1Char* s, size_t n);
1786 
NewStringCopyUTF8N(JSContext * cx,const JS::UTF8Chars utf8,gc::InitialHeap heap)1787 JSLinearString* NewStringCopyUTF8N(JSContext* cx, const JS::UTF8Chars utf8,
1788                                    gc::InitialHeap heap) {
1789   JS::SmallestEncoding encoding = JS::FindSmallestEncoding(utf8);
1790   if (encoding == JS::SmallestEncoding::ASCII) {
1791     return NewStringCopyN<js::CanGC>(cx, utf8.begin().get(), utf8.length(),
1792                                      heap);
1793   }
1794 
1795   size_t length;
1796   if (encoding == JS::SmallestEncoding::Latin1) {
1797     UniqueLatin1Chars latin1(
1798         UTF8CharsToNewLatin1CharsZ(cx, utf8, &length, js::StringBufferArena)
1799             .get());
1800     if (!latin1) {
1801       return nullptr;
1802     }
1803 
1804     return NewString<js::CanGC>(cx, std::move(latin1), length, heap);
1805   }
1806 
1807   MOZ_ASSERT(encoding == JS::SmallestEncoding::UTF16);
1808 
1809   UniqueTwoByteChars utf16(
1810       UTF8CharsToNewTwoByteCharsZ(cx, utf8, &length, js::StringBufferArena)
1811           .get());
1812   if (!utf16) {
1813     return nullptr;
1814   }
1815 
1816   return NewString<js::CanGC>(cx, std::move(utf16), length, heap);
1817 }
1818 
lookup(const char16_t * chars,size_t len) const1819 MOZ_ALWAYS_INLINE JSString* ExternalStringCache::lookup(const char16_t* chars,
1820                                                         size_t len) const {
1821   AutoCheckCannotGC nogc;
1822 
1823   for (size_t i = 0; i < NumEntries; i++) {
1824     JSString* str = entries_[i];
1825     if (!str || str->length() != len) {
1826       continue;
1827     }
1828 
1829     const char16_t* strChars = str->asLinear().nonInlineTwoByteChars(nogc);
1830     if (chars == strChars) {
1831       // Note that we don't need an incremental barrier here or below.
1832       // The cache is purged on GC so any string we get from the cache
1833       // must have been allocated after the GC started.
1834       return str;
1835     }
1836 
1837     // Compare the chars. Don't do this for long strings as it will be
1838     // faster to allocate a new external string.
1839     static const size_t MaxLengthForCharComparison = 100;
1840     if (len <= MaxLengthForCharComparison && ArrayEqual(chars, strChars, len)) {
1841       return str;
1842     }
1843   }
1844 
1845   return nullptr;
1846 }
1847 
put(JSString * str)1848 MOZ_ALWAYS_INLINE void ExternalStringCache::put(JSString* str) {
1849   MOZ_ASSERT(str->isExternal());
1850 
1851   for (size_t i = NumEntries - 1; i > 0; i--) {
1852     entries_[i] = entries_[i - 1];
1853   }
1854 
1855   entries_[0] = str;
1856 }
1857 
NewMaybeExternalString(JSContext * cx,const char16_t * s,size_t n,const JSExternalStringCallbacks * callbacks,bool * allocatedExternal,gc::InitialHeap heap)1858 JSString* NewMaybeExternalString(JSContext* cx, const char16_t* s, size_t n,
1859                                  const JSExternalStringCallbacks* callbacks,
1860                                  bool* allocatedExternal,
1861                                  gc::InitialHeap heap) {
1862   if (JSString* str = TryEmptyOrStaticString(cx, s, n)) {
1863     *allocatedExternal = false;
1864     return str;
1865   }
1866 
1867   if (JSThinInlineString::lengthFits<Latin1Char>(n) &&
1868       CanStoreCharsAsLatin1(s, n)) {
1869     *allocatedExternal = false;
1870     return NewInlineStringDeflated<AllowGC::CanGC>(
1871         cx, mozilla::Range<const char16_t>(s, n), heap);
1872   }
1873 
1874   ExternalStringCache& cache = cx->zone()->externalStringCache();
1875   if (JSString* str = cache.lookup(s, n)) {
1876     *allocatedExternal = false;
1877     return str;
1878   }
1879 
1880   JSString* str = JSExternalString::new_(cx, s, n, callbacks);
1881   if (!str) {
1882     return nullptr;
1883   }
1884 
1885   *allocatedExternal = true;
1886   cache.put(str);
1887   return str;
1888 }
1889 
1890 } /* namespace js */
1891 
1892 #if defined(DEBUG) || defined(JS_JITSPEW) || defined(JS_CACHEIR_SPEW)
dumpRepresentation(js::GenericPrinter & out,int indent) const1893 void JSExtensibleString::dumpRepresentation(js::GenericPrinter& out,
1894                                             int indent) const {
1895   dumpRepresentationHeader(out, "JSExtensibleString");
1896   indent += 2;
1897 
1898   out.printf("%*scapacity: %zu\n", indent, "", capacity());
1899   dumpRepresentationChars(out, indent);
1900 }
1901 
dumpRepresentation(js::GenericPrinter & out,int indent) const1902 void JSInlineString::dumpRepresentation(js::GenericPrinter& out,
1903                                         int indent) const {
1904   dumpRepresentationHeader(
1905       out, isFatInline() ? "JSFatInlineString" : "JSThinInlineString");
1906   indent += 2;
1907 
1908   dumpRepresentationChars(out, indent);
1909 }
1910 
dumpRepresentation(js::GenericPrinter & out,int indent) const1911 void JSLinearString::dumpRepresentation(js::GenericPrinter& out,
1912                                         int indent) const {
1913   dumpRepresentationHeader(out, "JSLinearString");
1914   indent += 2;
1915 
1916   dumpRepresentationChars(out, indent);
1917 }
1918 #endif
1919 
1920 struct RepresentativeExternalString : public JSExternalStringCallbacks {
finalizeRepresentativeExternalString1921   void finalize(char16_t* chars) const override {
1922     // Constant chars, nothing to do.
1923   }
sizeOfBufferRepresentativeExternalString1924   size_t sizeOfBuffer(const char16_t* chars,
1925                       mozilla::MallocSizeOf mallocSizeOf) const override {
1926     // This string's buffer is not heap-allocated, so its malloc size is 0.
1927     return 0;
1928   }
1929 };
1930 
1931 static const RepresentativeExternalString RepresentativeExternalStringCallbacks;
1932 
1933 template <typename CheckString, typename CharT>
FillWithRepresentatives(JSContext * cx,HandleArrayObject array,uint32_t * index,const CharT * chars,size_t len,size_t fatInlineMaxLength,const CheckString & check)1934 static bool FillWithRepresentatives(JSContext* cx, HandleArrayObject array,
1935                                     uint32_t* index, const CharT* chars,
1936                                     size_t len, size_t fatInlineMaxLength,
1937                                     const CheckString& check) {
1938   auto AppendString = [&check](JSContext* cx, HandleArrayObject array,
1939                                uint32_t* index, HandleString s) {
1940     MOZ_ASSERT(check(s));
1941     (void)check;  // silence clang -Wunused-lambda-capture in opt builds
1942     RootedValue val(cx, StringValue(s));
1943     return JS_DefineElement(cx, array, (*index)++, val, 0);
1944   };
1945 
1946   MOZ_ASSERT(len > fatInlineMaxLength);
1947 
1948   // Normal atom.
1949   RootedString atom1(cx, AtomizeChars(cx, chars, len));
1950   if (!atom1 || !AppendString(cx, array, index, atom1)) {
1951     return false;
1952   }
1953   MOZ_ASSERT(atom1->isAtom());
1954 
1955   // Inline atom.
1956   RootedString atom2(cx, AtomizeChars(cx, chars, 2));
1957   if (!atom2 || !AppendString(cx, array, index, atom2)) {
1958     return false;
1959   }
1960   MOZ_ASSERT(atom2->isAtom());
1961   MOZ_ASSERT(atom2->isInline());
1962 
1963   // Fat inline atom.
1964   RootedString atom3(cx, AtomizeChars(cx, chars, fatInlineMaxLength));
1965   if (!atom3 || !AppendString(cx, array, index, atom3)) {
1966     return false;
1967   }
1968   MOZ_ASSERT(atom3->isAtom());
1969   MOZ_ASSERT(atom3->isFatInline());
1970 
1971   // Normal linear string.
1972   RootedString linear1(cx, NewStringCopyN<CanGC>(cx, chars, len));
1973   if (!linear1 || !AppendString(cx, array, index, linear1)) {
1974     return false;
1975   }
1976   MOZ_ASSERT(linear1->isLinear());
1977 
1978   // Inline string.
1979   RootedString linear2(cx, NewStringCopyN<CanGC>(cx, chars, 3));
1980   if (!linear2 || !AppendString(cx, array, index, linear2)) {
1981     return false;
1982   }
1983   MOZ_ASSERT(linear2->isLinear());
1984   MOZ_ASSERT(linear2->isInline());
1985 
1986   // Fat inline string.
1987   RootedString linear3(cx,
1988                        NewStringCopyN<CanGC>(cx, chars, fatInlineMaxLength));
1989   if (!linear3 || !AppendString(cx, array, index, linear3)) {
1990     return false;
1991   }
1992   MOZ_ASSERT(linear3->isLinear());
1993   MOZ_ASSERT(linear3->isFatInline());
1994 
1995   // Rope.
1996   RootedString rope(cx, ConcatStrings<CanGC>(cx, atom1, atom3));
1997   if (!rope || !AppendString(cx, array, index, rope)) {
1998     return false;
1999   }
2000   MOZ_ASSERT(rope->isRope());
2001 
2002   // Dependent.
2003   RootedString dep(cx, NewDependentString(cx, atom1, 0, len - 2));
2004   if (!dep || !AppendString(cx, array, index, dep)) {
2005     return false;
2006   }
2007   MOZ_ASSERT(dep->isDependent());
2008 
2009   // Extensible.
2010   RootedString temp1(cx, NewStringCopyN<CanGC>(cx, chars, len));
2011   if (!temp1) {
2012     return false;
2013   }
2014   RootedString extensible(cx, ConcatStrings<CanGC>(cx, temp1, atom3));
2015   if (!extensible || !extensible->ensureLinear(cx)) {
2016     return false;
2017   }
2018   if (!AppendString(cx, array, index, extensible)) {
2019     return false;
2020   }
2021   MOZ_ASSERT(extensible->isExtensible());
2022 
2023   // External. Note that we currently only support TwoByte external strings.
2024   RootedString external1(cx), external2(cx);
2025   if constexpr (std::is_same_v<CharT, char16_t>) {
2026     external1 = JS_NewExternalString(cx, (const char16_t*)chars, len,
2027                                      &RepresentativeExternalStringCallbacks);
2028     if (!external1 || !AppendString(cx, array, index, external1)) {
2029       return false;
2030     }
2031     MOZ_ASSERT(external1->isExternal());
2032 
2033     external2 = JS_NewExternalString(cx, (const char16_t*)chars, 2,
2034                                      &RepresentativeExternalStringCallbacks);
2035     if (!external2 || !AppendString(cx, array, index, external2)) {
2036       return false;
2037     }
2038     MOZ_ASSERT(external2->isExternal());
2039   }
2040 
2041   // Assert the strings still have the types we expect after creating the
2042   // other strings.
2043 
2044   MOZ_ASSERT(atom1->isAtom());
2045   MOZ_ASSERT(atom2->isAtom());
2046   MOZ_ASSERT(atom3->isAtom());
2047   MOZ_ASSERT(atom2->isInline());
2048   MOZ_ASSERT(atom3->isFatInline());
2049 
2050   MOZ_ASSERT(linear1->isLinear());
2051   MOZ_ASSERT(linear2->isLinear());
2052   MOZ_ASSERT(linear3->isLinear());
2053   MOZ_ASSERT(linear2->isInline());
2054   MOZ_ASSERT(linear3->isFatInline());
2055 
2056   MOZ_ASSERT(rope->isRope());
2057   MOZ_ASSERT(dep->isDependent());
2058   MOZ_ASSERT(extensible->isExtensible());
2059   MOZ_ASSERT_IF(external1, external1->isExternal());
2060   MOZ_ASSERT_IF(external2, external2->isExternal());
2061   return true;
2062 }
2063 
2064 /* static */
fillWithRepresentatives(JSContext * cx,HandleArrayObject array)2065 bool JSString::fillWithRepresentatives(JSContext* cx, HandleArrayObject array) {
2066   uint32_t index = 0;
2067 
2068   auto CheckTwoByte = [](JSString* str) { return str->hasTwoByteChars(); };
2069   auto CheckLatin1 = [](JSString* str) { return str->hasLatin1Chars(); };
2070 
2071   // Append TwoByte strings.
2072   static const char16_t twoByteChars[] =
2073       u"\u1234abc\0def\u5678ghijklmasdfa\0xyz0123456789";
2074   if (!FillWithRepresentatives(
2075           cx, array, &index, twoByteChars, std::size(twoByteChars) - 1,
2076           JSFatInlineString::MAX_LENGTH_TWO_BYTE, CheckTwoByte)) {
2077     return false;
2078   }
2079 
2080   // Append Latin1 strings.
2081   static const Latin1Char latin1Chars[] = "abc\0defghijklmasdfa\0xyz0123456789";
2082   if (!FillWithRepresentatives(
2083           cx, array, &index, latin1Chars, std::size(latin1Chars) - 1,
2084           JSFatInlineString::MAX_LENGTH_LATIN1, CheckLatin1)) {
2085     return false;
2086   }
2087 
2088   // Now create forcibly-tenured versions of each of these string types. Note
2089   // that this is best-effort; if nursery strings are disabled, or we GC
2090   // midway through here, then we may end up with fewer nursery strings than
2091   // desired. Also, some types of strings are not nursery-allocatable, so
2092   // this will always produce some number of redundant strings.
2093   gc::AutoSuppressNurseryCellAlloc suppress(cx);
2094 
2095   // Append TwoByte strings.
2096   if (!FillWithRepresentatives(
2097           cx, array, &index, twoByteChars, std::size(twoByteChars) - 1,
2098           JSFatInlineString::MAX_LENGTH_TWO_BYTE, CheckTwoByte)) {
2099     return false;
2100   }
2101 
2102   // Append Latin1 strings.
2103   if (!FillWithRepresentatives(
2104           cx, array, &index, latin1Chars, std::size(latin1Chars) - 1,
2105           JSFatInlineString::MAX_LENGTH_LATIN1, CheckLatin1)) {
2106     return false;
2107   }
2108 
2109   MOZ_ASSERT(index == 40);
2110 
2111   return true;
2112 }
2113 
2114 /*** Conversions ************************************************************/
2115 
EncodeLatin1(JSContext * cx,JSString * str)2116 UniqueChars js::EncodeLatin1(JSContext* cx, JSString* str) {
2117   JSLinearString* linear = str->ensureLinear(cx);
2118   if (!linear) {
2119     return nullptr;
2120   }
2121 
2122   JS::AutoCheckCannotGC nogc;
2123   if (linear->hasTwoByteChars()) {
2124     JS::Latin1CharsZ chars =
2125         JS::LossyTwoByteCharsToNewLatin1CharsZ(cx, linear->twoByteRange(nogc));
2126     return UniqueChars(chars.c_str());
2127   }
2128 
2129   size_t len = str->length();
2130   Latin1Char* buf = cx->pod_malloc<Latin1Char>(len + 1);
2131   if (!buf) {
2132     return nullptr;
2133   }
2134 
2135   FillChars(buf, linear->latin1Chars(nogc), len);
2136   buf[len] = '\0';
2137 
2138   return UniqueChars(reinterpret_cast<char*>(buf));
2139 }
2140 
EncodeAscii(JSContext * cx,JSString * str)2141 UniqueChars js::EncodeAscii(JSContext* cx, JSString* str) {
2142   JSLinearString* linear = str->ensureLinear(cx);
2143   if (!linear) {
2144     return nullptr;
2145   }
2146 
2147   MOZ_ASSERT(StringIsAscii(linear));
2148   return EncodeLatin1(cx, linear);
2149 }
2150 
IdToPrintableUTF8(JSContext * cx,HandleId id,IdToPrintableBehavior behavior)2151 UniqueChars js::IdToPrintableUTF8(JSContext* cx, HandleId id,
2152                                   IdToPrintableBehavior behavior) {
2153   // ToString(<symbol>) throws a TypeError, therefore require that callers
2154   // request source representation when |id| is a property key.
2155   MOZ_ASSERT_IF(
2156       behavior == IdToPrintableBehavior::IdIsIdentifier,
2157       id.isAtom() && frontend::IsIdentifierNameOrPrivateName(id.toAtom()));
2158 
2159   RootedValue v(cx, IdToValue(id));
2160   JSString* str;
2161   if (behavior == IdToPrintableBehavior::IdIsPropertyKey) {
2162     str = ValueToSource(cx, v);
2163   } else {
2164     str = ToString<CanGC>(cx, v);
2165   }
2166   if (!str) {
2167     return nullptr;
2168   }
2169   return StringToNewUTF8CharsZ(cx, *str);
2170 }
2171 
2172 template <AllowGC allowGC>
ToStringSlow(JSContext * cx,typename MaybeRooted<Value,allowGC>::HandleType arg)2173 JSString* js::ToStringSlow(
2174     JSContext* cx, typename MaybeRooted<Value, allowGC>::HandleType arg) {
2175   /* As with ToObjectSlow, callers must verify that |arg| isn't a string. */
2176   MOZ_ASSERT(!arg.isString());
2177 
2178   Value v = arg;
2179   if (!v.isPrimitive()) {
2180     MOZ_ASSERT(!cx->isHelperThreadContext());
2181     if (!allowGC) {
2182       return nullptr;
2183     }
2184     RootedValue v2(cx, v);
2185     if (!ToPrimitive(cx, JSTYPE_STRING, &v2)) {
2186       return nullptr;
2187     }
2188     v = v2;
2189   }
2190 
2191   JSString* str;
2192   if (v.isString()) {
2193     str = v.toString();
2194   } else if (v.isInt32()) {
2195     str = Int32ToString<allowGC>(cx, v.toInt32());
2196   } else if (v.isDouble()) {
2197     str = NumberToString<allowGC>(cx, v.toDouble());
2198   } else if (v.isBoolean()) {
2199     str = BooleanToString(cx, v.toBoolean());
2200   } else if (v.isNull()) {
2201     str = cx->names().null;
2202   } else if (v.isSymbol()) {
2203     MOZ_ASSERT(!cx->isHelperThreadContext());
2204     if (allowGC) {
2205       JS_ReportErrorNumberASCII(cx, GetErrorMessage, nullptr,
2206                                 JSMSG_SYMBOL_TO_STRING);
2207     }
2208     return nullptr;
2209   } else if (v.isBigInt()) {
2210     if (!allowGC) {
2211       return nullptr;
2212     }
2213     RootedBigInt i(cx, v.toBigInt());
2214     str = BigInt::toString<CanGC>(cx, i, 10);
2215   }
2216 #ifdef ENABLE_RECORD_TUPLE
2217   else if (v.isExtendedPrimitive()) {
2218     if (!allowGC) {
2219       return nullptr;
2220     }
2221     if (IsTuple(v)) {
2222       Rooted<TupleType*> tup(cx, &TupleType::thisTupleValue(v));
2223       return TupleToSource(cx, tup);
2224     }
2225     Rooted<RecordType*> rec(cx);
2226     MOZ_ALWAYS_TRUE(RecordObject::maybeUnbox(&v.getObjectPayload(), &rec));
2227     return RecordToSource(cx, rec);
2228   }
2229 #endif
2230   else {
2231     MOZ_ASSERT(v.isUndefined());
2232     str = cx->names().undefined;
2233   }
2234   return str;
2235 }
2236 
2237 template JSString* js::ToStringSlow<CanGC>(JSContext* cx, HandleValue arg);
2238 
2239 template JSString* js::ToStringSlow<NoGC>(JSContext* cx, const Value& arg);
2240 
ToStringSlow(JSContext * cx,HandleValue v)2241 JS_PUBLIC_API JSString* js::ToStringSlow(JSContext* cx, HandleValue v) {
2242   return ToStringSlow<CanGC>(cx, v);
2243 }
2244