1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2  * vim: set ts=8 sts=4 et sw=4 tw=99:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #ifndef vm_String_h
8 #define vm_String_h
9 
10 #include "mozilla/MemoryReporting.h"
11 #include "mozilla/PodOperations.h"
12 #include "mozilla/Range.h"
13 
14 #include "jsapi.h"
15 #include "jsfriendapi.h"
16 #include "jsstr.h"
17 
18 #include "gc/Barrier.h"
19 #include "gc/Heap.h"
20 #include "gc/Marking.h"
21 #include "gc/Rooting.h"
22 #include "js/CharacterEncoding.h"
23 #include "js/GCAPI.h"
24 #include "js/RootingAPI.h"
25 
26 class JSDependentString;
27 class JSExtensibleString;
28 class JSExternalString;
29 class JSInlineString;
30 class JSRope;
31 
32 namespace js {
33 
34 class AutoStableStringChars;
35 class StaticStrings;
36 class PropertyName;
37 
38 /* The buffer length required to contain any unsigned 32-bit integer. */
39 static const size_t UINT32_CHAR_BUFFER_LENGTH = sizeof("4294967295") - 1;
40 
41 } /* namespace js */
42 
43 /*
44  * JavaScript strings
45  *
46  * Conceptually, a JS string is just an array of chars and a length. This array
47  * of chars may or may not be null-terminated and, if it is, the null character
48  * is not included in the length.
49  *
50  * To improve performance of common operations, the following optimizations are
51  * made which affect the engine's representation of strings:
52  *
53  *  - The plain vanilla representation is a "flat" string which consists of a
54  *    string header in the GC heap and a malloc'd null terminated char array.
55  *
56  *  - To avoid copying a substring of an existing "base" string , a "dependent"
57  *    string (JSDependentString) can be created which points into the base
58  *    string's char array.
59  *
60  *  - To avoid O(n^2) char buffer copying, a "rope" node (JSRope) can be created
61  *    to represent a delayed string concatenation. Concatenation (called
62  *    flattening) is performed if and when a linear char array is requested. In
63  *    general, ropes form a binary dag whose internal nodes are JSRope string
64  *    headers with no associated char array and whose leaf nodes are either flat
65  *    or dependent strings.
66  *
67  *  - To avoid copying the leftmost string when flattening, we may produce an
68  *    "extensible" string, which tracks not only its actual length but also its
69  *    buffer's overall size. If such an "extensible" string appears as the
70  *    leftmost string in a subsequent flatten, and its buffer has enough unused
71  *    space, we can simply flatten the rest of the ropes into its buffer,
72  *    leaving its text in place. We then transfer ownership of its buffer to the
73  *    flattened rope, and mutate the donor extensible string into a dependent
74  *    string referencing its original buffer.
75  *
76  *    (The term "extensible" does not imply that we ever 'realloc' the buffer.
77  *    Extensible strings may have dependent strings pointing into them, and the
78  *    JSAPI hands out pointers to flat strings' buffers, so resizing with
79  *    'realloc' is generally not possible.)
80  *
81  *  - To avoid allocating small char arrays, short strings can be stored inline
82  *    in the string header (JSInlineString). These come in two flavours:
83  *    JSThinInlineString, which is the same size as JSString; and
84  *    JSFatInlineString, which has a larger header and so can fit more chars.
85  *
86  *  - To avoid comparing O(n) string equality comparison, strings can be
87  *    canonicalized to "atoms" (JSAtom) such that there is a single atom with a
88  *    given (length,chars).
89  *
90  *  - To avoid copying all strings created through the JSAPI, an "external"
91  *    string (JSExternalString) can be created whose chars are managed by the
92  *    JSAPI client.
93  *
94  *  - To avoid using two bytes per character for every string, string characters
95  *    are stored as Latin1 instead of TwoByte if all characters are representable
96  *    in Latin1.
97  *
98  * Although all strings share the same basic memory layout, we can conceptually
99  * arrange them into a hierarchy of operations/invariants and represent this
100  * hierarchy in C++ with classes:
101  *
102  * C++ type                     operations+fields / invariants+properties
103  * ==========================   =========================================
104  * JSString (abstract)          get(Latin1|TwoByte)CharsZ, get(Latin1|TwoByte)Chars, length / -
105  *  | \
106  *  | JSRope                    leftChild, rightChild / -
107  *  |
108  * JSLinearString (abstract)    latin1Chars, twoByteChars / might be null-terminated
109  *  |  |
110  *  |  +-- JSDependentString    base / -
111  *  |  |
112  *  |  +-- JSExternalString     - / char array memory managed by embedding
113  *  |
114  * JSFlatString                 - / null terminated
115  *  |  |
116  *  |  +-- JSExtensibleString   tracks total buffer capacity (including current text)
117  *  |  |
118  *  |  +-- JSUndependedString   original dependent base / -
119  *  |  |
120  *  |  +-- JSInlineString (abstract)    - / chars stored in header
121  *  |      |
122  *  |      +-- JSThinInlineString       - / header is normal
123  *  |      |
124  *  |      +-- JSFatInlineString        - / header is fat
125  *  |
126  * JSAtom (abstract)            - / string equality === pointer equality
127  *  |  |
128  *  |  +-- js::NormalAtom       - JSFlatString + atom hash code
129  *  |  |
130  *  |  +-- js::FatInlineAtom    - JSFatInlineString + atom hash code
131  *  |
132  * js::PropertyName             - / chars don't contain an index (uint32_t)
133  *
134  * Classes marked with (abstract) above are not literally C++ Abstract Base
135  * Classes (since there are no virtual functions, pure or not, in this
136  * hierarchy), but have the same meaning: there are no strings with this type as
137  * its most-derived type.
138  *
139  * Atoms can additionally be permanent, i.e. unable to be collected, and can
140  * be combined with other string types to create additional most-derived types
141  * that satisfy the invariants of more than one of the abovementioned
142  * most-derived types. Furthermore, each atom stores a hash number (based on its
143  * chars). This hash number is used as key in the atoms table and when the atom
144  * is used as key in a JS Map/Set.
145  *
146  * Derived string types can be queried from ancestor types via isX() and
147  * retrieved with asX() debug-only-checked casts.
148  *
149  * The ensureX() operations mutate 'this' in place to effectively the type to be
150  * at least X (e.g., ensureLinear will change a JSRope to be a JSFlatString).
151  */
152 
153 class JSString : public js::gc::TenuredCell
154 {
155   protected:
156     static const size_t NUM_INLINE_CHARS_LATIN1   = 2 * sizeof(void*) / sizeof(JS::Latin1Char);
157     static const size_t NUM_INLINE_CHARS_TWO_BYTE = 2 * sizeof(void*) / sizeof(char16_t);
158 
159     /* Fields only apply to string types commented on the right. */
160     struct Data
161     {
162         union {
163             struct {
164                 uint32_t           flags;               /* JSString */
165                 uint32_t           length;              /* JSString */
166             };
167             uintptr_t              flattenData;         /* JSRope (temporary while flattening) */
168         } u1;
169         union {
170             union {
171                 /* JS(Fat)InlineString */
172                 JS::Latin1Char     inlineStorageLatin1[NUM_INLINE_CHARS_LATIN1];
173                 char16_t           inlineStorageTwoByte[NUM_INLINE_CHARS_TWO_BYTE];
174             };
175             struct {
176                 union {
177                     const JS::Latin1Char* nonInlineCharsLatin1; /* JSLinearString, except JS(Fat)InlineString */
178                     const char16_t* nonInlineCharsTwoByte;/* JSLinearString, except JS(Fat)InlineString */
179                     JSString*      left;               /* JSRope */
180                 } u2;
181                 union {
182                     JSLinearString* base;               /* JS(Dependent|Undepended)String */
183                     JSString*      right;              /* JSRope */
184                     size_t         capacity;            /* JSFlatString (extensible) */
185                     const JSStringFinalizer* externalFinalizer;/* JSExternalString */
186                 } u3;
187             } s;
188         };
189     } d;
190 
191   public:
192     /* Flags exposed only for jits */
193 
194     /*
195      * The Flags Word
196      *
197      * The flags word stores both the string's type and its character encoding.
198      *
199      * If LATIN1_CHARS_BIT is set, the string's characters are stored as Latin1
200      * instead of TwoByte. This flag can also be set for ropes, if both the
201      * left and right nodes are Latin1. Flattening will result in a Latin1
202      * string in this case.
203      *
204      * The other flags store the string's type. Instead of using a dense index
205      * to represent the most-derived type, string types are encoded to allow
206      * single-op tests for hot queries (isRope, isDependent, isFlat, isAtom)
207      * which, in view of subtyping, would require slower
208      * (isX() || isY() || isZ()).
209      *
210      * The string type encoding can be summarized as follows. The "instance
211      * encoding" entry for a type specifies the flag bits used to create a
212      * string instance of that type. Abstract types have no instances and thus
213      * have no such entry. The "subtype predicate" entry for a type specifies
214      * the predicate used to query whether a JSString instance is subtype
215      * (reflexively) of that type.
216      *
217      *   String        Instance     Subtype
218      *   type          encoding     predicate
219      *   ------------------------------------
220      *   Rope          000000       000000
221      *   Linear        -           !000000
222      *   HasBase       -            xxxx1x
223      *   Dependent     000010       000010
224      *   External      100000       100000
225      *   Flat          -            xxxxx1
226      *   Undepended    000011       000011
227      *   Extensible    010001       010001
228      *   Inline        000101       xxx1xx
229      *   FatInline     010101       x1x1xx
230      *   Atom          001001       xx1xxx
231      *   PermanentAtom 101001       1x1xxx
232      *   InlineAtom    -            xx11xx
233      *   FatInlineAtom -            x111xx
234      *
235      * Note that the first 4 flag bits (from right to left in the previous table)
236      * have the following meaning and can be used for some hot queries:
237      *
238      *   Bit 0: IsFlat
239      *   Bit 1: HasBase (Dependent, Undepended)
240      *   Bit 2: IsInline (Inline, FatInline)
241      *   Bit 3: IsAtom (Atom, PermanentAtom)
242      *
243      *  "HasBase" here refers to the two string types that have a 'base' field:
244      *  JSDependentString and JSUndependedString.
245      *  A JSUndependedString is a JSDependentString which has been 'fixed' (by ensureFixed)
246      *  to be null-terminated.  In such cases, the string must keep marking its base since
247      *  there may be any number of *other* JSDependentStrings transitively depending on it.
248      *
249      */
250 
251     static const uint32_t FLAT_BIT               = JS_BIT(0);
252     static const uint32_t HAS_BASE_BIT           = JS_BIT(1);
253     static const uint32_t INLINE_CHARS_BIT       = JS_BIT(2);
254     static const uint32_t ATOM_BIT               = JS_BIT(3);
255 
256     static const uint32_t ROPE_FLAGS             = 0;
257     static const uint32_t DEPENDENT_FLAGS        = HAS_BASE_BIT;
258     static const uint32_t UNDEPENDED_FLAGS       = FLAT_BIT | HAS_BASE_BIT;
259     static const uint32_t EXTENSIBLE_FLAGS       = FLAT_BIT | JS_BIT(4);
260     static const uint32_t EXTERNAL_FLAGS         = JS_BIT(5);
261 
262     static const uint32_t FAT_INLINE_MASK        = INLINE_CHARS_BIT | JS_BIT(4);
263     static const uint32_t PERMANENT_ATOM_MASK    = ATOM_BIT | JS_BIT(5);
264 
265     /* Initial flags for thin inline and fat inline strings. */
266     static const uint32_t INIT_THIN_INLINE_FLAGS = FLAT_BIT | INLINE_CHARS_BIT;
267     static const uint32_t INIT_FAT_INLINE_FLAGS  = FLAT_BIT | FAT_INLINE_MASK;
268 
269     static const uint32_t TYPE_FLAGS_MASK        = JS_BIT(6) - 1;
270 
271     static const uint32_t LATIN1_CHARS_BIT       = JS_BIT(6);
272 
273     static const uint32_t MAX_LENGTH             = js::MaxStringLength;
274 
275     static const JS::Latin1Char MAX_LATIN1_CHAR = 0xff;
276 
277     /*
278      * Helper function to validate that a string of a given length is
279      * representable by a JSString. An allocation overflow is reported if false
280      * is returned.
281      */
282     static inline bool validateLength(js::ExclusiveContext* maybecx, size_t length);
283 
staticAsserts()284     static void staticAsserts() {
285         static_assert(JSString::MAX_LENGTH < UINT32_MAX, "Length must fit in 32 bits");
286         static_assert(sizeof(JSString) ==
287                       (offsetof(JSString, d.inlineStorageLatin1) +
288                        NUM_INLINE_CHARS_LATIN1 * sizeof(char)),
289                       "Inline Latin1 chars must fit in a JSString");
290         static_assert(sizeof(JSString) ==
291                       (offsetof(JSString, d.inlineStorageTwoByte) +
292                        NUM_INLINE_CHARS_TWO_BYTE * sizeof(char16_t)),
293                       "Inline char16_t chars must fit in a JSString");
294 
295         /* Ensure js::shadow::String has the same layout. */
296         using js::shadow::String;
297         static_assert(offsetof(JSString, d.u1.length) == offsetof(String, length),
298                       "shadow::String length offset must match JSString");
299         static_assert(offsetof(JSString, d.u1.flags) == offsetof(String, flags),
300                       "shadow::String flags offset must match JSString");
301         static_assert(offsetof(JSString, d.s.u2.nonInlineCharsLatin1) == offsetof(String, nonInlineCharsLatin1),
302                       "shadow::String nonInlineChars offset must match JSString");
303         static_assert(offsetof(JSString, d.s.u2.nonInlineCharsTwoByte) == offsetof(String, nonInlineCharsTwoByte),
304                       "shadow::String nonInlineChars offset must match JSString");
305         static_assert(offsetof(JSString, d.inlineStorageLatin1) == offsetof(String, inlineStorageLatin1),
306                       "shadow::String inlineStorage offset must match JSString");
307         static_assert(offsetof(JSString, d.inlineStorageTwoByte) == offsetof(String, inlineStorageTwoByte),
308                       "shadow::String inlineStorage offset must match JSString");
309         static_assert(INLINE_CHARS_BIT == String::INLINE_CHARS_BIT,
310                       "shadow::String::INLINE_CHARS_BIT must match JSString::INLINE_CHARS_BIT");
311         static_assert(LATIN1_CHARS_BIT == String::LATIN1_CHARS_BIT,
312                       "shadow::String::LATIN1_CHARS_BIT must match JSString::LATIN1_CHARS_BIT");
313         static_assert(TYPE_FLAGS_MASK == String::TYPE_FLAGS_MASK,
314                       "shadow::String::TYPE_FLAGS_MASK must match JSString::TYPE_FLAGS_MASK");
315         static_assert(ROPE_FLAGS == String::ROPE_FLAGS,
316                       "shadow::String::ROPE_FLAGS must match JSString::ROPE_FLAGS");
317     }
318 
319     /* Avoid lame compile errors in JSRope::flatten */
320     friend class JSRope;
321 
322     friend class js::gc::RelocationOverlay;
323 
324   protected:
325     template <typename CharT>
326     MOZ_ALWAYS_INLINE
327     void setNonInlineChars(const CharT* chars);
328 
329   public:
330     /* All strings have length. */
331 
332     MOZ_ALWAYS_INLINE
length()333     size_t length() const {
334         return d.u1.length;
335     }
336 
337     MOZ_ALWAYS_INLINE
empty()338     bool empty() const {
339         return d.u1.length == 0;
340     }
341 
342     inline bool getChar(js::ExclusiveContext* cx, size_t index, char16_t* code);
343 
344     /* Strings have either Latin1 or TwoByte chars. */
hasLatin1Chars()345     bool hasLatin1Chars() const {
346         return d.u1.flags & LATIN1_CHARS_BIT;
347     }
hasTwoByteChars()348     bool hasTwoByteChars() const {
349         return !(d.u1.flags & LATIN1_CHARS_BIT);
350     }
351 
352     /* Fallible conversions to more-derived string types. */
353 
354     inline JSLinearString* ensureLinear(js::ExclusiveContext* cx);
355     JSFlatString* ensureFlat(JSContext* cx);
356 
ensureLinear(js::ExclusiveContext * cx,JSString * str)357     static bool ensureLinear(js::ExclusiveContext* cx, JSString* str) {
358         return str->ensureLinear(cx) != nullptr;
359     }
360 
361     /* Type query and debug-checked casts */
362 
363     MOZ_ALWAYS_INLINE
isRope()364     bool isRope() const {
365         return (d.u1.flags & TYPE_FLAGS_MASK) == ROPE_FLAGS;
366     }
367 
368     MOZ_ALWAYS_INLINE
asRope()369     JSRope& asRope() const {
370         MOZ_ASSERT(isRope());
371         return *(JSRope*)this;
372     }
373 
374     MOZ_ALWAYS_INLINE
isLinear()375     bool isLinear() const {
376         return !isRope();
377     }
378 
379     MOZ_ALWAYS_INLINE
asLinear()380     JSLinearString& asLinear() const {
381         MOZ_ASSERT(JSString::isLinear());
382         return *(JSLinearString*)this;
383     }
384 
385     MOZ_ALWAYS_INLINE
isDependent()386     bool isDependent() const {
387         return (d.u1.flags & TYPE_FLAGS_MASK) == DEPENDENT_FLAGS;
388     }
389 
390     MOZ_ALWAYS_INLINE
asDependent()391     JSDependentString& asDependent() const {
392         MOZ_ASSERT(isDependent());
393         return *(JSDependentString*)this;
394     }
395 
396     MOZ_ALWAYS_INLINE
isFlat()397     bool isFlat() const {
398         return d.u1.flags & FLAT_BIT;
399     }
400 
401     MOZ_ALWAYS_INLINE
asFlat()402     JSFlatString& asFlat() const {
403         MOZ_ASSERT(isFlat());
404         return *(JSFlatString*)this;
405     }
406 
407     MOZ_ALWAYS_INLINE
isExtensible()408     bool isExtensible() const {
409         return (d.u1.flags & TYPE_FLAGS_MASK) == EXTENSIBLE_FLAGS;
410     }
411 
412     MOZ_ALWAYS_INLINE
asExtensible()413     JSExtensibleString& asExtensible() const {
414         MOZ_ASSERT(isExtensible());
415         return *(JSExtensibleString*)this;
416     }
417 
418     MOZ_ALWAYS_INLINE
isInline()419     bool isInline() const {
420         return d.u1.flags & INLINE_CHARS_BIT;
421     }
422 
423     MOZ_ALWAYS_INLINE
asInline()424     JSInlineString& asInline() const {
425         MOZ_ASSERT(isInline());
426         return *(JSInlineString*)this;
427     }
428 
429     MOZ_ALWAYS_INLINE
isFatInline()430     bool isFatInline() const {
431         return (d.u1.flags & FAT_INLINE_MASK) == FAT_INLINE_MASK;
432     }
433 
434     /* For hot code, prefer other type queries. */
isExternal()435     bool isExternal() const {
436         return (d.u1.flags & TYPE_FLAGS_MASK) == EXTERNAL_FLAGS;
437     }
438 
439     MOZ_ALWAYS_INLINE
asExternal()440     JSExternalString& asExternal() const {
441         MOZ_ASSERT(isExternal());
442         return *(JSExternalString*)this;
443     }
444 
445     MOZ_ALWAYS_INLINE
isUndepended()446     bool isUndepended() const {
447         return (d.u1.flags & TYPE_FLAGS_MASK) == UNDEPENDED_FLAGS;
448     }
449 
450     MOZ_ALWAYS_INLINE
isAtom()451     bool isAtom() const {
452         return d.u1.flags & ATOM_BIT;
453     }
454 
455     MOZ_ALWAYS_INLINE
isPermanentAtom()456     bool isPermanentAtom() const {
457         return (d.u1.flags & PERMANENT_ATOM_MASK) == PERMANENT_ATOM_MASK;
458     }
459 
460     MOZ_ALWAYS_INLINE
asAtom()461     JSAtom& asAtom() const {
462         MOZ_ASSERT(isAtom());
463         return *(JSAtom*)this;
464     }
465 
466     /* Only called by the GC for dependent or undepended strings. */
467 
hasBase()468     inline bool hasBase() const {
469         return d.u1.flags & HAS_BASE_BIT;
470     }
471 
472     inline JSLinearString* base() const;
473 
474     void traceBase(JSTracer* trc);
475 
476     /* Only called by the GC for strings with the AllocKind::STRING kind. */
477 
478     inline void finalize(js::FreeOp* fop);
479 
480     /* Gets the number of bytes that the chars take on the heap. */
481 
482     size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);
483 
484     /* Offsets for direct field from jit code. */
485 
offsetOfLength()486     static size_t offsetOfLength() {
487         return offsetof(JSString, d.u1.length);
488     }
offsetOfFlags()489     static size_t offsetOfFlags() {
490         return offsetof(JSString, d.u1.flags);
491     }
492 
offsetOfNonInlineChars()493     static size_t offsetOfNonInlineChars() {
494         static_assert(offsetof(JSString, d.s.u2.nonInlineCharsTwoByte) ==
495                       offsetof(JSString, d.s.u2.nonInlineCharsLatin1),
496                       "nonInlineCharsTwoByte and nonInlineCharsLatin1 must have same offset");
497         return offsetof(JSString, d.s.u2.nonInlineCharsTwoByte);
498     }
499 
500     static const JS::TraceKind TraceKind = JS::TraceKind::String;
501 
502 #ifdef DEBUG
503     void dump(FILE* fp);
504     void dumpCharsNoNewline(FILE* fp);
505     void dump();
506     void dumpCharsNoNewline();
507     void dumpRepresentation(FILE* fp, int indent) const;
508     void dumpRepresentationHeader(FILE* fp, int indent, const char* subclass) const;
509 
510     template <typename CharT>
511     static void dumpChars(const CharT* s, size_t len, FILE* fp=stderr);
512 
513     bool equals(const char* s);
514 #endif
515 
516     void traceChildren(JSTracer* trc);
517 
readBarrier(JSString * thing)518     static MOZ_ALWAYS_INLINE void readBarrier(JSString* thing) {
519         if (thing->isPermanentAtom())
520             return;
521 
522         TenuredCell::readBarrier(thing);
523     }
524 
writeBarrierPre(JSString * thing)525     static MOZ_ALWAYS_INLINE void writeBarrierPre(JSString* thing) {
526         if (isNullLike(thing) || thing->isPermanentAtom())
527             return;
528 
529         TenuredCell::writeBarrierPre(thing);
530     }
531 
532   private:
533     JSString() = delete;
534     JSString(const JSString& other) = delete;
535     void operator=(const JSString& other) = delete;
536 };
537 
538 class JSRope : public JSString
539 {
540     template <typename CharT>
541     bool copyCharsInternal(js::ExclusiveContext* cx, js::ScopedJSFreePtr<CharT>& out,
542                            bool nullTerminate) const;
543 
544     enum UsingBarrier { WithIncrementalBarrier, NoBarrier };
545 
546     template<UsingBarrier b, typename CharT>
547     JSFlatString* flattenInternal(js::ExclusiveContext* cx);
548 
549     template<UsingBarrier b>
550     JSFlatString* flattenInternal(js::ExclusiveContext* cx);
551 
552     friend class JSString;
553     JSFlatString* flatten(js::ExclusiveContext* cx);
554 
555     void init(js::ExclusiveContext* cx, JSString* left, JSString* right, size_t length);
556 
557   public:
558     template <js::AllowGC allowGC>
559     static inline JSRope* new_(js::ExclusiveContext* cx,
560                                typename js::MaybeRooted<JSString*, allowGC>::HandleType left,
561                                typename js::MaybeRooted<JSString*, allowGC>::HandleType right,
562                                size_t length);
563 
564     bool copyLatin1Chars(js::ExclusiveContext* cx,
565                          js::ScopedJSFreePtr<JS::Latin1Char>& out) const;
566     bool copyTwoByteChars(js::ExclusiveContext* cx, js::ScopedJSFreePtr<char16_t>& out) const;
567 
568     bool copyLatin1CharsZ(js::ExclusiveContext* cx,
569                           js::ScopedJSFreePtr<JS::Latin1Char>& out) const;
570     bool copyTwoByteCharsZ(js::ExclusiveContext* cx, js::ScopedJSFreePtr<char16_t>& out) const;
571 
572     template <typename CharT>
573     bool copyChars(js::ExclusiveContext* cx, js::ScopedJSFreePtr<CharT>& out) const;
574 
leftChild()575     JSString* leftChild() const {
576         MOZ_ASSERT(isRope());
577         return d.s.u2.left;
578     }
579 
rightChild()580     JSString* rightChild() const {
581         MOZ_ASSERT(isRope());
582         return d.s.u3.right;
583     }
584 
585     void traceChildren(JSTracer* trc);
586 
offsetOfLeft()587     static size_t offsetOfLeft() {
588         return offsetof(JSRope, d.s.u2.left);
589     }
offsetOfRight()590     static size_t offsetOfRight() {
591         return offsetof(JSRope, d.s.u3.right);
592     }
593 
594 #ifdef DEBUG
595     void dumpRepresentation(FILE* fp, int indent) const;
596 #endif
597 };
598 
599 static_assert(sizeof(JSRope) == sizeof(JSString),
600               "string subclasses must be binary-compatible with JSString");
601 
602 class JSLinearString : public JSString
603 {
604     friend class JSString;
605     friend class js::AutoStableStringChars;
606 
607     /* Vacuous and therefore unimplemented. */
608     JSLinearString* ensureLinear(js::ExclusiveContext* cx) = delete;
609     bool isLinear() const = delete;
610     JSLinearString& asLinear() const = delete;
611 
612   protected:
613     /* Returns void pointer to latin1/twoByte chars, for finalizers. */
614     MOZ_ALWAYS_INLINE
nonInlineCharsRaw()615     void* nonInlineCharsRaw() const {
616         MOZ_ASSERT(!isInline());
617         static_assert(offsetof(JSLinearString, d.s.u2.nonInlineCharsTwoByte) ==
618                       offsetof(JSLinearString, d.s.u2.nonInlineCharsLatin1),
619                       "nonInlineCharsTwoByte and nonInlineCharsLatin1 must have same offset");
620         return (void*)d.s.u2.nonInlineCharsTwoByte;
621     }
622 
623     MOZ_ALWAYS_INLINE const JS::Latin1Char* rawLatin1Chars() const;
624     MOZ_ALWAYS_INLINE const char16_t* rawTwoByteChars() const;
625 
626   public:
627     template<typename CharT>
628     MOZ_ALWAYS_INLINE
629     const CharT* nonInlineChars(const JS::AutoCheckCannotGC& nogc) const;
630 
631     MOZ_ALWAYS_INLINE
nonInlineLatin1Chars(const JS::AutoCheckCannotGC & nogc)632     const JS::Latin1Char* nonInlineLatin1Chars(const JS::AutoCheckCannotGC& nogc) const {
633         MOZ_ASSERT(!isInline());
634         MOZ_ASSERT(hasLatin1Chars());
635         return d.s.u2.nonInlineCharsLatin1;
636     }
637 
638     MOZ_ALWAYS_INLINE
nonInlineTwoByteChars(const JS::AutoCheckCannotGC & nogc)639     const char16_t* nonInlineTwoByteChars(const JS::AutoCheckCannotGC& nogc) const {
640         MOZ_ASSERT(!isInline());
641         MOZ_ASSERT(hasTwoByteChars());
642         return d.s.u2.nonInlineCharsTwoByte;
643     }
644 
645     template<typename CharT>
646     MOZ_ALWAYS_INLINE
647     const CharT* chars(const JS::AutoCheckCannotGC& nogc) const;
648 
649     MOZ_ALWAYS_INLINE
latin1Chars(const JS::AutoCheckCannotGC & nogc)650     const JS::Latin1Char* latin1Chars(const JS::AutoCheckCannotGC& nogc) const {
651         return rawLatin1Chars();
652     }
653 
654     MOZ_ALWAYS_INLINE
twoByteChars(const JS::AutoCheckCannotGC & nogc)655     const char16_t* twoByteChars(const JS::AutoCheckCannotGC& nogc) const {
656         return rawTwoByteChars();
657     }
658 
latin1Range(const JS::AutoCheckCannotGC & nogc)659     mozilla::Range<const JS::Latin1Char> latin1Range(const JS::AutoCheckCannotGC& nogc) const {
660         MOZ_ASSERT(JSString::isLinear());
661         return mozilla::Range<const JS::Latin1Char>(latin1Chars(nogc), length());
662     }
663 
twoByteRange(const JS::AutoCheckCannotGC & nogc)664     mozilla::Range<const char16_t> twoByteRange(const JS::AutoCheckCannotGC& nogc) const {
665         MOZ_ASSERT(JSString::isLinear());
666         return mozilla::Range<const char16_t>(twoByteChars(nogc), length());
667     }
668 
669     MOZ_ALWAYS_INLINE
latin1OrTwoByteChar(size_t index)670     char16_t latin1OrTwoByteChar(size_t index) const {
671         MOZ_ASSERT(JSString::isLinear());
672         MOZ_ASSERT(index < length());
673         JS::AutoCheckCannotGC nogc;
674         return hasLatin1Chars() ? latin1Chars(nogc)[index] : twoByteChars(nogc)[index];
675     }
676 
677 #ifdef DEBUG
678     void dumpRepresentationChars(FILE* fp, int indent) const;
679 #endif
680 };
681 
682 static_assert(sizeof(JSLinearString) == sizeof(JSString),
683               "string subclasses must be binary-compatible with JSString");
684 
685 class JSDependentString : public JSLinearString
686 {
687     friend class JSString;
688     JSFlatString* undepend(JSContext* cx);
689 
690     template <typename CharT>
691     JSFlatString* undependInternal(JSContext* cx);
692 
693     void init(js::ExclusiveContext* cx, JSLinearString* base, size_t start,
694               size_t length);
695 
696     /* Vacuous and therefore unimplemented. */
697     bool isDependent() const = delete;
698     JSDependentString& asDependent() const = delete;
699 
700     /* The offset of this string's chars in base->chars(). */
baseOffset()701     MOZ_ALWAYS_INLINE mozilla::Maybe<size_t> baseOffset() const {
702         MOZ_ASSERT(JSString::isDependent());
703         JS::AutoCheckCannotGC nogc;
704         if (MOZ_UNLIKELY(base()->isUndepended()))
705             return mozilla::Nothing();
706         size_t offset;
707         if (hasTwoByteChars())
708             offset = twoByteChars(nogc) - base()->twoByteChars(nogc);
709         else
710             offset = latin1Chars(nogc) - base()->latin1Chars(nogc);
711         MOZ_ASSERT(offset < base()->length());
712         return mozilla::Some(offset);
713     }
714 
715   public:
716     static inline JSLinearString* new_(js::ExclusiveContext* cx, JSLinearString* base,
717                                        size_t start, size_t length);
718 
offsetOfBase()719     inline static size_t offsetOfBase() {
720         return offsetof(JSDependentString, d.s.u3.base);
721     }
722 
723 #ifdef DEBUG
724     void dumpRepresentation(FILE* fp, int indent) const;
725 #endif
726 };
727 
728 static_assert(sizeof(JSDependentString) == sizeof(JSString),
729               "string subclasses must be binary-compatible with JSString");
730 
731 class JSFlatString : public JSLinearString
732 {
733     /* Vacuous and therefore unimplemented. */
734     JSFlatString* ensureFlat(JSContext* cx) = delete;
735     bool isFlat() const = delete;
736     JSFlatString& asFlat() const = delete;
737 
738     template <typename CharT>
739     static bool isIndexSlow(const CharT* s, size_t length, uint32_t* indexp);
740 
741     void init(const char16_t* chars, size_t length);
742     void init(const JS::Latin1Char* chars, size_t length);
743 
744   public:
745     template <js::AllowGC allowGC, typename CharT>
746     static inline JSFlatString* new_(js::ExclusiveContext* cx,
747                                      const CharT* chars, size_t length);
748 
749     /*
750      * Returns true if this string's characters store an unsigned 32-bit
751      * integer value, initializing *indexp to that value if so.  (Thus if
752      * calling isIndex returns true, js::IndexToString(cx, *indexp) will be a
753      * string equal to this string.)
754      */
isIndex(uint32_t * indexp)755     inline bool isIndex(uint32_t* indexp) const {
756         MOZ_ASSERT(JSString::isFlat());
757         JS::AutoCheckCannotGC nogc;
758         if (hasLatin1Chars()) {
759             const JS::Latin1Char* s = latin1Chars(nogc);
760             return JS7_ISDEC(*s) && isIndexSlow(s, length(), indexp);
761         }
762         const char16_t* s = twoByteChars(nogc);
763         return JS7_ISDEC(*s) && isIndexSlow(s, length(), indexp);
764     }
765 
766     /*
767      * Returns a property name represented by this string, or null on failure.
768      * You must verify that this is not an index per isIndex before calling
769      * this method.
770      */
771     inline js::PropertyName* toPropertyName(JSContext* cx);
772 
773     /*
774      * Once a JSFlatString sub-class has been added to the atom state, this
775      * operation changes the string to the JSAtom type, in place.
776      */
777     MOZ_ALWAYS_INLINE JSAtom* morphAtomizedStringIntoAtom(js::HashNumber hash);
778     MOZ_ALWAYS_INLINE JSAtom* morphAtomizedStringIntoPermanentAtom(js::HashNumber hash);
779 
780     inline void finalize(js::FreeOp* fop);
781 
782 #ifdef DEBUG
783     void dumpRepresentation(FILE* fp, int indent) const;
784 #endif
785 };
786 
787 static_assert(sizeof(JSFlatString) == sizeof(JSString),
788               "string subclasses must be binary-compatible with JSString");
789 
790 class JSExtensibleString : public JSFlatString
791 {
792     /* Vacuous and therefore unimplemented. */
793     bool isExtensible() const = delete;
794     JSExtensibleString& asExtensible() const = delete;
795 
796   public:
797     MOZ_ALWAYS_INLINE
capacity()798     size_t capacity() const {
799         MOZ_ASSERT(JSString::isExtensible());
800         return d.s.u3.capacity;
801     }
802 
803 #ifdef DEBUG
804     void dumpRepresentation(FILE* fp, int indent) const;
805 #endif
806 };
807 
808 static_assert(sizeof(JSExtensibleString) == sizeof(JSString),
809               "string subclasses must be binary-compatible with JSString");
810 
811 class JSInlineString : public JSFlatString
812 {
813   public:
814     MOZ_ALWAYS_INLINE
latin1Chars(const JS::AutoCheckCannotGC & nogc)815     const JS::Latin1Char* latin1Chars(const JS::AutoCheckCannotGC& nogc) const {
816         MOZ_ASSERT(JSString::isInline());
817         MOZ_ASSERT(hasLatin1Chars());
818         return d.inlineStorageLatin1;
819     }
820 
821     MOZ_ALWAYS_INLINE
twoByteChars(const JS::AutoCheckCannotGC & nogc)822     const char16_t* twoByteChars(const JS::AutoCheckCannotGC& nogc) const {
823         MOZ_ASSERT(JSString::isInline());
824         MOZ_ASSERT(hasTwoByteChars());
825         return d.inlineStorageTwoByte;
826     }
827 
828     template<typename CharT>
829     static bool lengthFits(size_t length);
830 
offsetOfInlineStorage()831     static size_t offsetOfInlineStorage() {
832         return offsetof(JSInlineString, d.inlineStorageTwoByte);
833     }
834 
835 #ifdef DEBUG
836     void dumpRepresentation(FILE* fp, int indent) const;
837 #endif
838 };
839 
840 static_assert(sizeof(JSInlineString) == sizeof(JSString),
841               "string subclasses must be binary-compatible with JSString");
842 
843 /*
844  * On 32-bit platforms, JSThinInlineString can store 7 Latin1 characters or 3
845  * TwoByte characters (excluding null terminator) inline. On 64-bit platforms,
846  * these numbers are 15 and 7, respectively.
847  */
848 class JSThinInlineString : public JSInlineString
849 {
850   public:
851     static const size_t MAX_LENGTH_LATIN1 = NUM_INLINE_CHARS_LATIN1 - 1;
852     static const size_t MAX_LENGTH_TWO_BYTE = NUM_INLINE_CHARS_TWO_BYTE - 1;
853 
854     template <js::AllowGC allowGC>
855     static inline JSThinInlineString* new_(js::ExclusiveContext* cx);
856 
857     template <typename CharT>
858     inline CharT* init(size_t length);
859 
860     template<typename CharT>
861     static bool lengthFits(size_t length);
862 };
863 
864 static_assert(sizeof(JSThinInlineString) == sizeof(JSString),
865               "string subclasses must be binary-compatible with JSString");
866 
867 /*
868  * On both 32-bit and 64-bit platforms, MAX_LENGTH_TWO_BYTE is 11 and
869  * MAX_LENGTH_LATIN1 is 23 (excluding null terminator). This is deliberate,
870  * in order to minimize potential performance differences between 32-bit and
871  * 64-bit platforms.
872  *
873  * There are still some differences due to NUM_INLINE_CHARS_* being different.
874  * E.g. TwoByte strings of length 4--7 will be JSFatInlineStrings on 32-bit
875  * platforms and JSThinInlineStrings on 64-bit platforms. But the more
876  * significant transition from inline strings to non-inline strings occurs at
877  * length 11 (for TwoByte strings) and 23 (Latin1 strings) on both 32-bit and
878  * 64-bit platforms.
879  */
880 class JSFatInlineString : public JSInlineString
881 {
882     static const size_t INLINE_EXTENSION_CHARS_LATIN1 = 24 - NUM_INLINE_CHARS_LATIN1;
883     static const size_t INLINE_EXTENSION_CHARS_TWO_BYTE = 12 - NUM_INLINE_CHARS_TWO_BYTE;
884 
885   protected: /* to fool clang into not warning this is unused */
886     union {
887         char   inlineStorageExtensionLatin1[INLINE_EXTENSION_CHARS_LATIN1];
888         char16_t inlineStorageExtensionTwoByte[INLINE_EXTENSION_CHARS_TWO_BYTE];
889     };
890 
891   public:
892     template <js::AllowGC allowGC>
893     static inline JSFatInlineString* new_(js::ExclusiveContext* cx);
894 
895     static const size_t MAX_LENGTH_LATIN1 = JSString::NUM_INLINE_CHARS_LATIN1 +
896                                             INLINE_EXTENSION_CHARS_LATIN1
897                                             -1 /* null terminator */;
898 
899     static const size_t MAX_LENGTH_TWO_BYTE = JSString::NUM_INLINE_CHARS_TWO_BYTE +
900                                               INLINE_EXTENSION_CHARS_TWO_BYTE
901                                               -1 /* null terminator */;
902 
903     template <typename CharT>
904     inline CharT* init(size_t length);
905 
906     template<typename CharT>
907     static bool lengthFits(size_t length);
908 
909     /* Only called by the GC for strings with the AllocKind::FAT_INLINE_STRING kind. */
910 
911     MOZ_ALWAYS_INLINE void finalize(js::FreeOp* fop);
912 };
913 
914 static_assert(sizeof(JSFatInlineString) % js::gc::CellSize == 0,
915               "fat inline strings shouldn't waste space up to the next cell "
916               "boundary");
917 
918 class JSExternalString : public JSLinearString
919 {
920     void init(const char16_t* chars, size_t length, const JSStringFinalizer* fin);
921 
922     /* Vacuous and therefore unimplemented. */
923     bool isExternal() const = delete;
924     JSExternalString& asExternal() const = delete;
925 
926   public:
927     static inline JSExternalString* new_(JSContext* cx, const char16_t* chars, size_t length,
928                                          const JSStringFinalizer* fin);
929 
externalFinalizer()930     const JSStringFinalizer* externalFinalizer() const {
931         MOZ_ASSERT(JSString::isExternal());
932         return d.s.u3.externalFinalizer;
933     }
934 
935     /*
936      * External chars are never allocated inline or in the nursery, so we can
937      * safely expose this without requiring an AutoCheckCannotGC argument.
938      */
twoByteChars()939     const char16_t* twoByteChars() const {
940         return rawTwoByteChars();
941     }
942 
943     /* Only called by the GC for strings with the AllocKind::EXTERNAL_STRING kind. */
944 
945     inline void finalize(js::FreeOp* fop);
946 
947     JSFlatString* ensureFlat(JSContext* cx);
948 
949 #ifdef DEBUG
950     void dumpRepresentation(FILE* fp, int indent) const;
951 #endif
952 };
953 
954 static_assert(sizeof(JSExternalString) == sizeof(JSString),
955               "string subclasses must be binary-compatible with JSString");
956 
957 class JSUndependedString : public JSFlatString
958 {
959     /*
960      * JSUndependedString is not explicitly used and is only present for
961      * consistency. See JSDependentString::undepend for how a JSDependentString
962      * gets morphed into a JSUndependedString.
963      */
964 };
965 
966 static_assert(sizeof(JSUndependedString) == sizeof(JSString),
967               "string subclasses must be binary-compatible with JSString");
968 
969 class JSAtom : public JSFlatString
970 {
971     /* Vacuous and therefore unimplemented. */
972     bool isAtom() const = delete;
973     JSAtom& asAtom() const = delete;
974 
975   public:
976     /* Returns the PropertyName for this.  isIndex() must be false. */
977     inline js::PropertyName* asPropertyName();
978 
979     inline void finalize(js::FreeOp* fop);
980 
981     MOZ_ALWAYS_INLINE
isPermanent()982     bool isPermanent() const {
983         return JSString::isPermanentAtom();
984     }
985 
986     // Transform this atom into a permanent atom. This is only done during
987     // initialization of the runtime.
morphIntoPermanentAtom()988     MOZ_ALWAYS_INLINE void morphIntoPermanentAtom() {
989         d.u1.flags |= PERMANENT_ATOM_MASK;
990     }
991 
992     inline js::HashNumber hash() const;
993     inline void initHash(js::HashNumber hash);
994 
995 #ifdef DEBUG
996     void dump(FILE* fp);
997     void dump();
998 #endif
999 };
1000 
1001 static_assert(sizeof(JSAtom) == sizeof(JSString),
1002               "string subclasses must be binary-compatible with JSString");
1003 
1004 namespace js {
1005 
1006 class NormalAtom : public JSAtom
1007 {
1008   protected: // Silence Clang unused-field warning.
1009     HashNumber hash_;
1010     uint32_t padding_; // Ensure the size is a multiple of gc::CellSize.
1011 
1012   public:
hash()1013     HashNumber hash() const {
1014         return hash_;
1015     }
initHash(HashNumber hash)1016     void initHash(HashNumber hash) {
1017         hash_ = hash;
1018     }
1019 };
1020 
1021 static_assert(sizeof(NormalAtom) == sizeof(JSString) + sizeof(uint64_t),
1022               "NormalAtom must have size of a string + HashNumber, "
1023               "aligned to gc::CellSize");
1024 
1025 class FatInlineAtom : public JSAtom
1026 {
1027   protected: // Silence Clang unused-field warning.
1028     char inlineStorage_[sizeof(JSFatInlineString) - sizeof(JSString)];
1029     HashNumber hash_;
1030     uint32_t padding_; // Ensure the size is a multiple of gc::CellSize.
1031 
1032   public:
hash()1033     HashNumber hash() const {
1034         return hash_;
1035     }
initHash(HashNumber hash)1036     void initHash(HashNumber hash) {
1037         hash_ = hash;
1038     }
1039 };
1040 
1041 static_assert(sizeof(FatInlineAtom) == sizeof(JSFatInlineString) + sizeof(uint64_t),
1042               "FatInlineAtom must have size of a fat inline string + HashNumber, "
1043               "aligned to gc::CellSize");
1044 
1045 } // namespace js
1046 
1047 inline js::HashNumber
hash()1048 JSAtom::hash() const
1049 {
1050     if (isFatInline())
1051         return static_cast<const js::FatInlineAtom*>(this)->hash();
1052     return static_cast<const js::NormalAtom*>(this)->hash();
1053 }
1054 
1055 inline void
initHash(js::HashNumber hash)1056 JSAtom::initHash(js::HashNumber hash)
1057 {
1058     if (isFatInline())
1059         return static_cast<js::FatInlineAtom*>(this)->initHash(hash);
1060     return static_cast<js::NormalAtom*>(this)->initHash(hash);
1061 }
1062 
1063 MOZ_ALWAYS_INLINE JSAtom*
morphAtomizedStringIntoAtom(js::HashNumber hash)1064 JSFlatString::morphAtomizedStringIntoAtom(js::HashNumber hash)
1065 {
1066     d.u1.flags |= ATOM_BIT;
1067     JSAtom* atom = &asAtom();
1068     atom->initHash(hash);
1069     return atom;
1070 }
1071 
1072 MOZ_ALWAYS_INLINE JSAtom*
morphAtomizedStringIntoPermanentAtom(js::HashNumber hash)1073 JSFlatString::morphAtomizedStringIntoPermanentAtom(js::HashNumber hash)
1074 {
1075     d.u1.flags |= PERMANENT_ATOM_MASK;
1076     JSAtom* atom = &asAtom();
1077     atom->initHash(hash);
1078     return atom;
1079 }
1080 
1081 namespace js {
1082 
1083 class StaticStrings
1084 {
1085   private:
1086     /* Bigger chars cannot be in a length-2 string. */
1087     static const size_t SMALL_CHAR_LIMIT    = 128U;
1088     static const size_t NUM_SMALL_CHARS     = 64U;
1089 
1090     JSAtom* length2StaticTable[NUM_SMALL_CHARS * NUM_SMALL_CHARS];
1091 
1092   public:
1093     /* We keep these public for the JITs. */
1094     static const size_t UNIT_STATIC_LIMIT   = 256U;
1095     JSAtom* unitStaticTable[UNIT_STATIC_LIMIT];
1096 
1097     static const size_t INT_STATIC_LIMIT    = 256U;
1098     JSAtom* intStaticTable[INT_STATIC_LIMIT];
1099 
StaticStrings()1100     StaticStrings() {
1101         mozilla::PodZero(this);
1102     }
1103 
1104     bool init(JSContext* cx);
1105     void trace(JSTracer* trc);
1106 
hasUint(uint32_t u)1107     static bool hasUint(uint32_t u) { return u < INT_STATIC_LIMIT; }
1108 
getUint(uint32_t u)1109     JSAtom* getUint(uint32_t u) {
1110         MOZ_ASSERT(hasUint(u));
1111         return intStaticTable[u];
1112     }
1113 
hasInt(int32_t i)1114     static bool hasInt(int32_t i) {
1115         return uint32_t(i) < INT_STATIC_LIMIT;
1116     }
1117 
getInt(int32_t i)1118     JSAtom* getInt(int32_t i) {
1119         MOZ_ASSERT(hasInt(i));
1120         return getUint(uint32_t(i));
1121     }
1122 
hasUnit(char16_t c)1123     static bool hasUnit(char16_t c) { return c < UNIT_STATIC_LIMIT; }
1124 
getUnit(char16_t c)1125     JSAtom* getUnit(char16_t c) {
1126         MOZ_ASSERT(hasUnit(c));
1127         return unitStaticTable[c];
1128     }
1129 
1130     /* May not return atom, returns null on (reported) failure. */
1131     inline JSLinearString* getUnitStringForElement(JSContext* cx, JSString* str, size_t index);
1132 
1133     template <typename CharT>
1134     static bool isStatic(const CharT* chars, size_t len);
1135     static bool isStatic(JSAtom* atom);
1136 
1137     /* Return null if no static atom exists for the given (chars, length). */
1138     template <typename CharT>
lookup(const CharT * chars,size_t length)1139     JSAtom* lookup(const CharT* chars, size_t length) {
1140         switch (length) {
1141           case 1: {
1142             char16_t c = chars[0];
1143             if (c < UNIT_STATIC_LIMIT)
1144                 return getUnit(c);
1145             return nullptr;
1146           }
1147           case 2:
1148             if (fitsInSmallChar(chars[0]) && fitsInSmallChar(chars[1]))
1149                 return getLength2(chars[0], chars[1]);
1150             return nullptr;
1151           case 3:
1152             /*
1153              * Here we know that JSString::intStringTable covers only 256 (or at least
1154              * not 1000 or more) chars. We rely on order here to resolve the unit vs.
1155              * int string/length-2 string atom identity issue by giving priority to unit
1156              * strings for "0" through "9" and length-2 strings for "10" through "99".
1157              */
1158             static_assert(INT_STATIC_LIMIT <= 999,
1159                           "static int strings assumed below to be at most "
1160                           "three digits");
1161             if ('1' <= chars[0] && chars[0] <= '9' &&
1162                 '0' <= chars[1] && chars[1] <= '9' &&
1163                 '0' <= chars[2] && chars[2] <= '9') {
1164                 int i = (chars[0] - '0') * 100 +
1165                           (chars[1] - '0') * 10 +
1166                           (chars[2] - '0');
1167 
1168                 if (unsigned(i) < INT_STATIC_LIMIT)
1169                     return getInt(i);
1170             }
1171             return nullptr;
1172         }
1173 
1174         return nullptr;
1175     }
1176 
1177   private:
1178     typedef uint8_t SmallChar;
1179     static const SmallChar INVALID_SMALL_CHAR = -1;
1180 
fitsInSmallChar(char16_t c)1181     static bool fitsInSmallChar(char16_t c) {
1182         return c < SMALL_CHAR_LIMIT && toSmallChar[c] != INVALID_SMALL_CHAR;
1183     }
1184 
1185     static const SmallChar toSmallChar[];
1186 
1187     JSAtom* getLength2(char16_t c1, char16_t c2);
getLength2(uint32_t u)1188     JSAtom* getLength2(uint32_t u) {
1189         MOZ_ASSERT(u < 100);
1190         return getLength2('0' + u / 10, '0' + u % 10);
1191     }
1192 };
1193 
1194 /*
1195  * Represents an atomized string which does not contain an index (that is, an
1196  * unsigned 32-bit value).  Thus for any PropertyName propname,
1197  * ToString(ToUint32(propname)) never equals propname.
1198  *
1199  * To more concretely illustrate the utility of PropertyName, consider that it
1200  * is used to partition, in a type-safe manner, the ways to refer to a
1201  * property, as follows:
1202  *
1203  *   - uint32_t indexes,
1204  *   - PropertyName strings which don't encode uint32_t indexes, and
1205  *   - jsspecial special properties (non-ES5 properties like object-valued
1206  *     jsids, JSID_EMPTY, JSID_VOID, and maybe in the future Harmony-proposed
1207  *     private names).
1208  */
1209 class PropertyName : public JSAtom
1210 {
1211   private:
1212     /* Vacuous and therefore unimplemented. */
1213     PropertyName* asPropertyName() = delete;
1214 };
1215 
1216 static_assert(sizeof(PropertyName) == sizeof(JSString),
1217               "string subclasses must be binary-compatible with JSString");
1218 
1219 static MOZ_ALWAYS_INLINE jsid
NameToId(PropertyName * name)1220 NameToId(PropertyName* name)
1221 {
1222     return NON_INTEGER_ATOM_TO_JSID(name);
1223 }
1224 
1225 using PropertyNameVector = JS::GCVector<PropertyName*>;
1226 
1227 template <typename CharT>
1228 void
1229 CopyChars(CharT* dest, const JSLinearString& str);
1230 
1231 static inline UniqueChars
StringToNewUTF8CharsZ(ExclusiveContext * maybecx,JSString & str)1232 StringToNewUTF8CharsZ(ExclusiveContext* maybecx, JSString& str)
1233 {
1234     JS::AutoCheckCannotGC nogc;
1235 
1236     JSLinearString* linear = str.ensureLinear(maybecx);
1237     if (!linear)
1238         return nullptr;
1239 
1240     return UniqueChars(linear->hasLatin1Chars()
1241                        ? JS::CharsToNewUTF8CharsZ(maybecx, linear->latin1Range(nogc)).c_str()
1242                        : JS::CharsToNewUTF8CharsZ(maybecx, linear->twoByteRange(nogc)).c_str());
1243 }
1244 
1245 /* GC-allocate a string descriptor for the given malloc-allocated chars. */
1246 template <js::AllowGC allowGC, typename CharT>
1247 extern JSFlatString*
1248 NewString(js::ExclusiveContext* cx, CharT* chars, size_t length);
1249 
1250 /* Like NewString, but doesn't try to deflate to Latin1. */
1251 template <js::AllowGC allowGC, typename CharT>
1252 extern JSFlatString*
1253 NewStringDontDeflate(js::ExclusiveContext* cx, CharT* chars, size_t length);
1254 
1255 extern JSLinearString*
1256 NewDependentString(JSContext* cx, JSString* base, size_t start, size_t length);
1257 
1258 /* Take ownership of an array of Latin1Chars. */
1259 extern JSFlatString*
1260 NewLatin1StringZ(js::ExclusiveContext* cx, UniqueChars chars);
1261 
1262 /* Copy a counted string and GC-allocate a descriptor for it. */
1263 template <js::AllowGC allowGC, typename CharT>
1264 extern JSFlatString*
1265 NewStringCopyN(js::ExclusiveContext* cx, const CharT* s, size_t n);
1266 
1267 template <js::AllowGC allowGC>
1268 inline JSFlatString*
NewStringCopyN(ExclusiveContext * cx,const char * s,size_t n)1269 NewStringCopyN(ExclusiveContext* cx, const char* s, size_t n)
1270 {
1271     return NewStringCopyN<allowGC>(cx, reinterpret_cast<const Latin1Char*>(s), n);
1272 }
1273 
1274 /* Like NewStringCopyN, but doesn't try to deflate to Latin1. */
1275 template <js::AllowGC allowGC, typename CharT>
1276 extern JSFlatString*
1277 NewStringCopyNDontDeflate(js::ExclusiveContext* cx, const CharT* s, size_t n);
1278 
1279 /* Copy a C string and GC-allocate a descriptor for it. */
1280 template <js::AllowGC allowGC>
1281 inline JSFlatString*
NewStringCopyZ(js::ExclusiveContext * cx,const char16_t * s)1282 NewStringCopyZ(js::ExclusiveContext* cx, const char16_t* s)
1283 {
1284     return NewStringCopyN<allowGC>(cx, s, js_strlen(s));
1285 }
1286 
1287 template <js::AllowGC allowGC>
1288 inline JSFlatString*
NewStringCopyZ(js::ExclusiveContext * cx,const char * s)1289 NewStringCopyZ(js::ExclusiveContext* cx, const char* s)
1290 {
1291     return NewStringCopyN<allowGC>(cx, s, strlen(s));
1292 }
1293 
1294 template <js::AllowGC allowGC>
1295 extern JSFlatString*
1296 NewStringCopyUTF8N(JSContext* cx, const JS::UTF8Chars utf8);
1297 
1298 template <js::AllowGC allowGC>
1299 inline JSFlatString*
NewStringCopyUTF8Z(JSContext * cx,const JS::ConstUTF8CharsZ utf8)1300 NewStringCopyUTF8Z(JSContext* cx, const JS::ConstUTF8CharsZ utf8)
1301 {
1302     return NewStringCopyUTF8N<allowGC>(cx, JS::UTF8Chars(utf8.c_str(), strlen(utf8.c_str())));
1303 }
1304 
1305 JS_STATIC_ASSERT(sizeof(HashNumber) == 4);
1306 
1307 } /* namespace js */
1308 
1309 // Addon IDs are interned atoms which are never destroyed. This detail is
1310 // not exposed outside the API.
1311 class JSAddonId : public JSAtom
1312 {};
1313 
1314 MOZ_ALWAYS_INLINE bool
getChar(js::ExclusiveContext * cx,size_t index,char16_t * code)1315 JSString::getChar(js::ExclusiveContext* cx, size_t index, char16_t* code)
1316 {
1317     MOZ_ASSERT(index < length());
1318 
1319     /*
1320      * Optimization for one level deep ropes.
1321      * This is common for the following pattern:
1322      *
1323      * while() {
1324      *   text = text.substr(0, x) + "bla" + text.substr(x)
1325      *   test.charCodeAt(x + 1)
1326      * }
1327      */
1328     JSString* str;
1329     if (isRope()) {
1330         JSRope* rope = &asRope();
1331         if (uint32_t(index) < rope->leftChild()->length()) {
1332             str = rope->leftChild();
1333         } else {
1334             str = rope->rightChild();
1335             index -= rope->leftChild()->length();
1336         }
1337     } else {
1338         str = this;
1339     }
1340 
1341     if (!str->ensureLinear(cx))
1342         return false;
1343 
1344     *code = str->asLinear().latin1OrTwoByteChar(index);
1345     return true;
1346 }
1347 
1348 MOZ_ALWAYS_INLINE JSLinearString*
ensureLinear(js::ExclusiveContext * cx)1349 JSString::ensureLinear(js::ExclusiveContext* cx)
1350 {
1351     return isLinear()
1352            ? &asLinear()
1353            : asRope().flatten(cx);
1354 }
1355 
1356 inline JSLinearString*
base()1357 JSString::base() const
1358 {
1359     MOZ_ASSERT(hasBase());
1360     MOZ_ASSERT(!d.s.u3.base->isInline());
1361     return d.s.u3.base;
1362 }
1363 
1364 template<>
1365 MOZ_ALWAYS_INLINE const char16_t*
nonInlineChars(const JS::AutoCheckCannotGC & nogc)1366 JSLinearString::nonInlineChars(const JS::AutoCheckCannotGC& nogc) const
1367 {
1368     return nonInlineTwoByteChars(nogc);
1369 }
1370 
1371 template<>
1372 MOZ_ALWAYS_INLINE const JS::Latin1Char*
nonInlineChars(const JS::AutoCheckCannotGC & nogc)1373 JSLinearString::nonInlineChars(const JS::AutoCheckCannotGC& nogc) const
1374 {
1375     return nonInlineLatin1Chars(nogc);
1376 }
1377 
1378 template<>
1379 MOZ_ALWAYS_INLINE const char16_t*
chars(const JS::AutoCheckCannotGC & nogc)1380 JSLinearString::chars(const JS::AutoCheckCannotGC& nogc) const
1381 {
1382     return rawTwoByteChars();
1383 }
1384 
1385 template<>
1386 MOZ_ALWAYS_INLINE const JS::Latin1Char*
chars(const JS::AutoCheckCannotGC & nogc)1387 JSLinearString::chars(const JS::AutoCheckCannotGC& nogc) const
1388 {
1389     return rawLatin1Chars();
1390 }
1391 
1392 template <>
1393 MOZ_ALWAYS_INLINE bool
1394 JSRope::copyChars<JS::Latin1Char>(js::ExclusiveContext* cx,
1395                                   js::ScopedJSFreePtr<JS::Latin1Char>& out) const
1396 {
1397     return copyLatin1Chars(cx, out);
1398 }
1399 
1400 template <>
1401 MOZ_ALWAYS_INLINE bool
1402 JSRope::copyChars<char16_t>(js::ExclusiveContext* cx, js::ScopedJSFreePtr<char16_t>& out) const
1403 {
1404     return copyTwoByteChars(cx, out);
1405 }
1406 
1407 template<>
1408 MOZ_ALWAYS_INLINE bool
1409 JSThinInlineString::lengthFits<JS::Latin1Char>(size_t length)
1410 {
1411     return length <= MAX_LENGTH_LATIN1;
1412 }
1413 
1414 template<>
1415 MOZ_ALWAYS_INLINE bool
1416 JSThinInlineString::lengthFits<char16_t>(size_t length)
1417 {
1418     return length <= MAX_LENGTH_TWO_BYTE;
1419 }
1420 
1421 template<>
1422 MOZ_ALWAYS_INLINE bool
1423 JSFatInlineString::lengthFits<JS::Latin1Char>(size_t length)
1424 {
1425     static_assert((INLINE_EXTENSION_CHARS_LATIN1 * sizeof(char)) % js::gc::CellSize == 0,
1426                   "fat inline strings' Latin1 characters don't exactly "
1427                   "fill subsequent cells and thus are wasteful");
1428     static_assert(MAX_LENGTH_LATIN1 + 1 ==
1429                   (sizeof(JSFatInlineString) -
1430                    offsetof(JSFatInlineString, d.inlineStorageLatin1)) / sizeof(char),
1431                   "MAX_LENGTH_LATIN1 must be one less than inline Latin1 "
1432                   "storage count");
1433 
1434     return length <= MAX_LENGTH_LATIN1;
1435 }
1436 
1437 template<>
1438 MOZ_ALWAYS_INLINE bool
1439 JSFatInlineString::lengthFits<char16_t>(size_t length)
1440 {
1441     static_assert((INLINE_EXTENSION_CHARS_TWO_BYTE * sizeof(char16_t)) % js::gc::CellSize == 0,
1442                   "fat inline strings' char16_t characters don't exactly "
1443                   "fill subsequent cells and thus are wasteful");
1444     static_assert(MAX_LENGTH_TWO_BYTE + 1 ==
1445                   (sizeof(JSFatInlineString) -
1446                    offsetof(JSFatInlineString, d.inlineStorageTwoByte)) / sizeof(char16_t),
1447                   "MAX_LENGTH_TWO_BYTE must be one less than inline "
1448                   "char16_t storage count");
1449 
1450     return length <= MAX_LENGTH_TWO_BYTE;
1451 }
1452 
1453 template<>
1454 MOZ_ALWAYS_INLINE bool
1455 JSInlineString::lengthFits<JS::Latin1Char>(size_t length)
1456 {
1457     // If it fits in a fat inline string, it fits in any inline string.
1458     return JSFatInlineString::lengthFits<JS::Latin1Char>(length);
1459 }
1460 
1461 template<>
1462 MOZ_ALWAYS_INLINE bool
1463 JSInlineString::lengthFits<char16_t>(size_t length)
1464 {
1465     // If it fits in a fat inline string, it fits in any inline string.
1466     return JSFatInlineString::lengthFits<char16_t>(length);
1467 }
1468 
1469 template<>
1470 MOZ_ALWAYS_INLINE void
setNonInlineChars(const char16_t * chars)1471 JSString::setNonInlineChars(const char16_t* chars)
1472 {
1473     d.s.u2.nonInlineCharsTwoByte = chars;
1474 }
1475 
1476 template<>
1477 MOZ_ALWAYS_INLINE void
setNonInlineChars(const JS::Latin1Char * chars)1478 JSString::setNonInlineChars(const JS::Latin1Char* chars)
1479 {
1480     d.s.u2.nonInlineCharsLatin1 = chars;
1481 }
1482 
1483 MOZ_ALWAYS_INLINE const JS::Latin1Char*
rawLatin1Chars()1484 JSLinearString::rawLatin1Chars() const
1485 {
1486     MOZ_ASSERT(JSString::isLinear());
1487     MOZ_ASSERT(hasLatin1Chars());
1488     return isInline() ? d.inlineStorageLatin1 : d.s.u2.nonInlineCharsLatin1;
1489 }
1490 
1491 MOZ_ALWAYS_INLINE const char16_t*
rawTwoByteChars()1492 JSLinearString::rawTwoByteChars() const
1493 {
1494     MOZ_ASSERT(JSString::isLinear());
1495     MOZ_ASSERT(hasTwoByteChars());
1496     return isInline() ? d.inlineStorageTwoByte : d.s.u2.nonInlineCharsTwoByte;
1497 }
1498 
1499 inline js::PropertyName*
asPropertyName()1500 JSAtom::asPropertyName()
1501 {
1502 #ifdef DEBUG
1503     uint32_t dummy;
1504     MOZ_ASSERT(!isIndex(&dummy));
1505 #endif
1506     return static_cast<js::PropertyName*>(this);
1507 }
1508 
1509 #endif /* vm_String_h */
1510