1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #ifndef vm_String_h
8 #define vm_String_h
9
10 #include "mozilla/MemoryReporting.h"
11 #include "mozilla/PodOperations.h"
12 #include "mozilla/Range.h"
13
14 #include "jsapi.h"
15 #include "jsfriendapi.h"
16 #include "jsstr.h"
17
18 #include "gc/Barrier.h"
19 #include "gc/Heap.h"
20 #include "gc/Marking.h"
21 #include "gc/Rooting.h"
22 #include "js/CharacterEncoding.h"
23 #include "js/GCAPI.h"
24 #include "js/RootingAPI.h"
25
26 class JSDependentString;
27 class JSExtensibleString;
28 class JSExternalString;
29 class JSInlineString;
30 class JSRope;
31
32 namespace js {
33
34 class AutoStableStringChars;
35 class StaticStrings;
36 class PropertyName;
37
38 /* The buffer length required to contain any unsigned 32-bit integer. */
39 static const size_t UINT32_CHAR_BUFFER_LENGTH = sizeof("4294967295") - 1;
40
41 } /* namespace js */
42
43 /*
44 * JavaScript strings
45 *
46 * Conceptually, a JS string is just an array of chars and a length. This array
47 * of chars may or may not be null-terminated and, if it is, the null character
48 * is not included in the length.
49 *
50 * To improve performance of common operations, the following optimizations are
51 * made which affect the engine's representation of strings:
52 *
53 * - The plain vanilla representation is a "flat" string which consists of a
54 * string header in the GC heap and a malloc'd null terminated char array.
55 *
56 * - To avoid copying a substring of an existing "base" string , a "dependent"
57 * string (JSDependentString) can be created which points into the base
58 * string's char array.
59 *
60 * - To avoid O(n^2) char buffer copying, a "rope" node (JSRope) can be created
61 * to represent a delayed string concatenation. Concatenation (called
62 * flattening) is performed if and when a linear char array is requested. In
63 * general, ropes form a binary dag whose internal nodes are JSRope string
64 * headers with no associated char array and whose leaf nodes are either flat
65 * or dependent strings.
66 *
67 * - To avoid copying the leftmost string when flattening, we may produce an
68 * "extensible" string, which tracks not only its actual length but also its
69 * buffer's overall size. If such an "extensible" string appears as the
70 * leftmost string in a subsequent flatten, and its buffer has enough unused
71 * space, we can simply flatten the rest of the ropes into its buffer,
72 * leaving its text in place. We then transfer ownership of its buffer to the
73 * flattened rope, and mutate the donor extensible string into a dependent
74 * string referencing its original buffer.
75 *
76 * (The term "extensible" does not imply that we ever 'realloc' the buffer.
77 * Extensible strings may have dependent strings pointing into them, and the
78 * JSAPI hands out pointers to flat strings' buffers, so resizing with
79 * 'realloc' is generally not possible.)
80 *
81 * - To avoid allocating small char arrays, short strings can be stored inline
82 * in the string header (JSInlineString). These come in two flavours:
83 * JSThinInlineString, which is the same size as JSString; and
84 * JSFatInlineString, which has a larger header and so can fit more chars.
85 *
86 * - To avoid comparing O(n) string equality comparison, strings can be
87 * canonicalized to "atoms" (JSAtom) such that there is a single atom with a
88 * given (length,chars).
89 *
90 * - To avoid copying all strings created through the JSAPI, an "external"
91 * string (JSExternalString) can be created whose chars are managed by the
92 * JSAPI client.
93 *
94 * - To avoid using two bytes per character for every string, string characters
95 * are stored as Latin1 instead of TwoByte if all characters are representable
96 * in Latin1.
97 *
98 * Although all strings share the same basic memory layout, we can conceptually
99 * arrange them into a hierarchy of operations/invariants and represent this
100 * hierarchy in C++ with classes:
101 *
102 * C++ type operations+fields / invariants+properties
103 * ========================== =========================================
104 * JSString (abstract) get(Latin1|TwoByte)CharsZ, get(Latin1|TwoByte)Chars, length / -
105 * | \
106 * | JSRope leftChild, rightChild / -
107 * |
108 * JSLinearString (abstract) latin1Chars, twoByteChars / might be null-terminated
109 * | |
110 * | +-- JSDependentString base / -
111 * | |
112 * | +-- JSExternalString - / char array memory managed by embedding
113 * |
114 * JSFlatString - / null terminated
115 * | |
116 * | +-- JSExtensibleString tracks total buffer capacity (including current text)
117 * | |
118 * | +-- JSUndependedString original dependent base / -
119 * | |
120 * | +-- JSInlineString (abstract) - / chars stored in header
121 * | |
122 * | +-- JSThinInlineString - / header is normal
123 * | |
124 * | +-- JSFatInlineString - / header is fat
125 * |
126 * JSAtom (abstract) - / string equality === pointer equality
127 * | |
128 * | +-- js::NormalAtom - JSFlatString + atom hash code
129 * | |
130 * | +-- js::FatInlineAtom - JSFatInlineString + atom hash code
131 * |
132 * js::PropertyName - / chars don't contain an index (uint32_t)
133 *
134 * Classes marked with (abstract) above are not literally C++ Abstract Base
135 * Classes (since there are no virtual functions, pure or not, in this
136 * hierarchy), but have the same meaning: there are no strings with this type as
137 * its most-derived type.
138 *
139 * Atoms can additionally be permanent, i.e. unable to be collected, and can
140 * be combined with other string types to create additional most-derived types
141 * that satisfy the invariants of more than one of the abovementioned
142 * most-derived types. Furthermore, each atom stores a hash number (based on its
143 * chars). This hash number is used as key in the atoms table and when the atom
144 * is used as key in a JS Map/Set.
145 *
146 * Derived string types can be queried from ancestor types via isX() and
147 * retrieved with asX() debug-only-checked casts.
148 *
149 * The ensureX() operations mutate 'this' in place to effectively the type to be
150 * at least X (e.g., ensureLinear will change a JSRope to be a JSFlatString).
151 */
152
153 class JSString : public js::gc::TenuredCell
154 {
155 protected:
156 static const size_t NUM_INLINE_CHARS_LATIN1 = 2 * sizeof(void*) / sizeof(JS::Latin1Char);
157 static const size_t NUM_INLINE_CHARS_TWO_BYTE = 2 * sizeof(void*) / sizeof(char16_t);
158
159 /* Fields only apply to string types commented on the right. */
160 struct Data
161 {
162 union {
163 struct {
164 uint32_t flags; /* JSString */
165 uint32_t length; /* JSString */
166 };
167 uintptr_t flattenData; /* JSRope (temporary while flattening) */
168 } u1;
169 union {
170 union {
171 /* JS(Fat)InlineString */
172 JS::Latin1Char inlineStorageLatin1[NUM_INLINE_CHARS_LATIN1];
173 char16_t inlineStorageTwoByte[NUM_INLINE_CHARS_TWO_BYTE];
174 };
175 struct {
176 union {
177 const JS::Latin1Char* nonInlineCharsLatin1; /* JSLinearString, except JS(Fat)InlineString */
178 const char16_t* nonInlineCharsTwoByte;/* JSLinearString, except JS(Fat)InlineString */
179 JSString* left; /* JSRope */
180 } u2;
181 union {
182 JSLinearString* base; /* JS(Dependent|Undepended)String */
183 JSString* right; /* JSRope */
184 size_t capacity; /* JSFlatString (extensible) */
185 const JSStringFinalizer* externalFinalizer;/* JSExternalString */
186 } u3;
187 } s;
188 };
189 } d;
190
191 public:
192 /* Flags exposed only for jits */
193
194 /*
195 * The Flags Word
196 *
197 * The flags word stores both the string's type and its character encoding.
198 *
199 * If LATIN1_CHARS_BIT is set, the string's characters are stored as Latin1
200 * instead of TwoByte. This flag can also be set for ropes, if both the
201 * left and right nodes are Latin1. Flattening will result in a Latin1
202 * string in this case.
203 *
204 * The other flags store the string's type. Instead of using a dense index
205 * to represent the most-derived type, string types are encoded to allow
206 * single-op tests for hot queries (isRope, isDependent, isFlat, isAtom)
207 * which, in view of subtyping, would require slower
208 * (isX() || isY() || isZ()).
209 *
210 * The string type encoding can be summarized as follows. The "instance
211 * encoding" entry for a type specifies the flag bits used to create a
212 * string instance of that type. Abstract types have no instances and thus
213 * have no such entry. The "subtype predicate" entry for a type specifies
214 * the predicate used to query whether a JSString instance is subtype
215 * (reflexively) of that type.
216 *
217 * String Instance Subtype
218 * type encoding predicate
219 * ------------------------------------
220 * Rope 000000 000000
221 * Linear - !000000
222 * HasBase - xxxx1x
223 * Dependent 000010 000010
224 * External 100000 100000
225 * Flat - xxxxx1
226 * Undepended 000011 000011
227 * Extensible 010001 010001
228 * Inline 000101 xxx1xx
229 * FatInline 010101 x1x1xx
230 * Atom 001001 xx1xxx
231 * PermanentAtom 101001 1x1xxx
232 * InlineAtom - xx11xx
233 * FatInlineAtom - x111xx
234 *
235 * Note that the first 4 flag bits (from right to left in the previous table)
236 * have the following meaning and can be used for some hot queries:
237 *
238 * Bit 0: IsFlat
239 * Bit 1: HasBase (Dependent, Undepended)
240 * Bit 2: IsInline (Inline, FatInline)
241 * Bit 3: IsAtom (Atom, PermanentAtom)
242 *
243 * "HasBase" here refers to the two string types that have a 'base' field:
244 * JSDependentString and JSUndependedString.
245 * A JSUndependedString is a JSDependentString which has been 'fixed' (by ensureFixed)
246 * to be null-terminated. In such cases, the string must keep marking its base since
247 * there may be any number of *other* JSDependentStrings transitively depending on it.
248 *
249 */
250
251 static const uint32_t FLAT_BIT = JS_BIT(0);
252 static const uint32_t HAS_BASE_BIT = JS_BIT(1);
253 static const uint32_t INLINE_CHARS_BIT = JS_BIT(2);
254 static const uint32_t ATOM_BIT = JS_BIT(3);
255
256 static const uint32_t ROPE_FLAGS = 0;
257 static const uint32_t DEPENDENT_FLAGS = HAS_BASE_BIT;
258 static const uint32_t UNDEPENDED_FLAGS = FLAT_BIT | HAS_BASE_BIT;
259 static const uint32_t EXTENSIBLE_FLAGS = FLAT_BIT | JS_BIT(4);
260 static const uint32_t EXTERNAL_FLAGS = JS_BIT(5);
261
262 static const uint32_t FAT_INLINE_MASK = INLINE_CHARS_BIT | JS_BIT(4);
263 static const uint32_t PERMANENT_ATOM_MASK = ATOM_BIT | JS_BIT(5);
264
265 /* Initial flags for thin inline and fat inline strings. */
266 static const uint32_t INIT_THIN_INLINE_FLAGS = FLAT_BIT | INLINE_CHARS_BIT;
267 static const uint32_t INIT_FAT_INLINE_FLAGS = FLAT_BIT | FAT_INLINE_MASK;
268
269 static const uint32_t TYPE_FLAGS_MASK = JS_BIT(6) - 1;
270
271 static const uint32_t LATIN1_CHARS_BIT = JS_BIT(6);
272
273 static const uint32_t MAX_LENGTH = js::MaxStringLength;
274
275 static const JS::Latin1Char MAX_LATIN1_CHAR = 0xff;
276
277 /*
278 * Helper function to validate that a string of a given length is
279 * representable by a JSString. An allocation overflow is reported if false
280 * is returned.
281 */
282 static inline bool validateLength(js::ExclusiveContext* maybecx, size_t length);
283
staticAsserts()284 static void staticAsserts() {
285 static_assert(JSString::MAX_LENGTH < UINT32_MAX, "Length must fit in 32 bits");
286 static_assert(sizeof(JSString) ==
287 (offsetof(JSString, d.inlineStorageLatin1) +
288 NUM_INLINE_CHARS_LATIN1 * sizeof(char)),
289 "Inline Latin1 chars must fit in a JSString");
290 static_assert(sizeof(JSString) ==
291 (offsetof(JSString, d.inlineStorageTwoByte) +
292 NUM_INLINE_CHARS_TWO_BYTE * sizeof(char16_t)),
293 "Inline char16_t chars must fit in a JSString");
294
295 /* Ensure js::shadow::String has the same layout. */
296 using js::shadow::String;
297 static_assert(offsetof(JSString, d.u1.length) == offsetof(String, length),
298 "shadow::String length offset must match JSString");
299 static_assert(offsetof(JSString, d.u1.flags) == offsetof(String, flags),
300 "shadow::String flags offset must match JSString");
301 static_assert(offsetof(JSString, d.s.u2.nonInlineCharsLatin1) == offsetof(String, nonInlineCharsLatin1),
302 "shadow::String nonInlineChars offset must match JSString");
303 static_assert(offsetof(JSString, d.s.u2.nonInlineCharsTwoByte) == offsetof(String, nonInlineCharsTwoByte),
304 "shadow::String nonInlineChars offset must match JSString");
305 static_assert(offsetof(JSString, d.inlineStorageLatin1) == offsetof(String, inlineStorageLatin1),
306 "shadow::String inlineStorage offset must match JSString");
307 static_assert(offsetof(JSString, d.inlineStorageTwoByte) == offsetof(String, inlineStorageTwoByte),
308 "shadow::String inlineStorage offset must match JSString");
309 static_assert(INLINE_CHARS_BIT == String::INLINE_CHARS_BIT,
310 "shadow::String::INLINE_CHARS_BIT must match JSString::INLINE_CHARS_BIT");
311 static_assert(LATIN1_CHARS_BIT == String::LATIN1_CHARS_BIT,
312 "shadow::String::LATIN1_CHARS_BIT must match JSString::LATIN1_CHARS_BIT");
313 static_assert(TYPE_FLAGS_MASK == String::TYPE_FLAGS_MASK,
314 "shadow::String::TYPE_FLAGS_MASK must match JSString::TYPE_FLAGS_MASK");
315 static_assert(ROPE_FLAGS == String::ROPE_FLAGS,
316 "shadow::String::ROPE_FLAGS must match JSString::ROPE_FLAGS");
317 }
318
319 /* Avoid lame compile errors in JSRope::flatten */
320 friend class JSRope;
321
322 friend class js::gc::RelocationOverlay;
323
324 protected:
325 template <typename CharT>
326 MOZ_ALWAYS_INLINE
327 void setNonInlineChars(const CharT* chars);
328
329 public:
330 /* All strings have length. */
331
332 MOZ_ALWAYS_INLINE
length()333 size_t length() const {
334 return d.u1.length;
335 }
336
337 MOZ_ALWAYS_INLINE
empty()338 bool empty() const {
339 return d.u1.length == 0;
340 }
341
342 inline bool getChar(js::ExclusiveContext* cx, size_t index, char16_t* code);
343
344 /* Strings have either Latin1 or TwoByte chars. */
hasLatin1Chars()345 bool hasLatin1Chars() const {
346 return d.u1.flags & LATIN1_CHARS_BIT;
347 }
hasTwoByteChars()348 bool hasTwoByteChars() const {
349 return !(d.u1.flags & LATIN1_CHARS_BIT);
350 }
351
352 /* Fallible conversions to more-derived string types. */
353
354 inline JSLinearString* ensureLinear(js::ExclusiveContext* cx);
355 JSFlatString* ensureFlat(JSContext* cx);
356
ensureLinear(js::ExclusiveContext * cx,JSString * str)357 static bool ensureLinear(js::ExclusiveContext* cx, JSString* str) {
358 return str->ensureLinear(cx) != nullptr;
359 }
360
361 /* Type query and debug-checked casts */
362
363 MOZ_ALWAYS_INLINE
isRope()364 bool isRope() const {
365 return (d.u1.flags & TYPE_FLAGS_MASK) == ROPE_FLAGS;
366 }
367
368 MOZ_ALWAYS_INLINE
asRope()369 JSRope& asRope() const {
370 MOZ_ASSERT(isRope());
371 return *(JSRope*)this;
372 }
373
374 MOZ_ALWAYS_INLINE
isLinear()375 bool isLinear() const {
376 return !isRope();
377 }
378
379 MOZ_ALWAYS_INLINE
asLinear()380 JSLinearString& asLinear() const {
381 MOZ_ASSERT(JSString::isLinear());
382 return *(JSLinearString*)this;
383 }
384
385 MOZ_ALWAYS_INLINE
isDependent()386 bool isDependent() const {
387 return (d.u1.flags & TYPE_FLAGS_MASK) == DEPENDENT_FLAGS;
388 }
389
390 MOZ_ALWAYS_INLINE
asDependent()391 JSDependentString& asDependent() const {
392 MOZ_ASSERT(isDependent());
393 return *(JSDependentString*)this;
394 }
395
396 MOZ_ALWAYS_INLINE
isFlat()397 bool isFlat() const {
398 return d.u1.flags & FLAT_BIT;
399 }
400
401 MOZ_ALWAYS_INLINE
asFlat()402 JSFlatString& asFlat() const {
403 MOZ_ASSERT(isFlat());
404 return *(JSFlatString*)this;
405 }
406
407 MOZ_ALWAYS_INLINE
isExtensible()408 bool isExtensible() const {
409 return (d.u1.flags & TYPE_FLAGS_MASK) == EXTENSIBLE_FLAGS;
410 }
411
412 MOZ_ALWAYS_INLINE
asExtensible()413 JSExtensibleString& asExtensible() const {
414 MOZ_ASSERT(isExtensible());
415 return *(JSExtensibleString*)this;
416 }
417
418 MOZ_ALWAYS_INLINE
isInline()419 bool isInline() const {
420 return d.u1.flags & INLINE_CHARS_BIT;
421 }
422
423 MOZ_ALWAYS_INLINE
asInline()424 JSInlineString& asInline() const {
425 MOZ_ASSERT(isInline());
426 return *(JSInlineString*)this;
427 }
428
429 MOZ_ALWAYS_INLINE
isFatInline()430 bool isFatInline() const {
431 return (d.u1.flags & FAT_INLINE_MASK) == FAT_INLINE_MASK;
432 }
433
434 /* For hot code, prefer other type queries. */
isExternal()435 bool isExternal() const {
436 return (d.u1.flags & TYPE_FLAGS_MASK) == EXTERNAL_FLAGS;
437 }
438
439 MOZ_ALWAYS_INLINE
asExternal()440 JSExternalString& asExternal() const {
441 MOZ_ASSERT(isExternal());
442 return *(JSExternalString*)this;
443 }
444
445 MOZ_ALWAYS_INLINE
isUndepended()446 bool isUndepended() const {
447 return (d.u1.flags & TYPE_FLAGS_MASK) == UNDEPENDED_FLAGS;
448 }
449
450 MOZ_ALWAYS_INLINE
isAtom()451 bool isAtom() const {
452 return d.u1.flags & ATOM_BIT;
453 }
454
455 MOZ_ALWAYS_INLINE
isPermanentAtom()456 bool isPermanentAtom() const {
457 return (d.u1.flags & PERMANENT_ATOM_MASK) == PERMANENT_ATOM_MASK;
458 }
459
460 MOZ_ALWAYS_INLINE
asAtom()461 JSAtom& asAtom() const {
462 MOZ_ASSERT(isAtom());
463 return *(JSAtom*)this;
464 }
465
466 /* Only called by the GC for dependent or undepended strings. */
467
hasBase()468 inline bool hasBase() const {
469 return d.u1.flags & HAS_BASE_BIT;
470 }
471
472 inline JSLinearString* base() const;
473
474 void traceBase(JSTracer* trc);
475
476 /* Only called by the GC for strings with the AllocKind::STRING kind. */
477
478 inline void finalize(js::FreeOp* fop);
479
480 /* Gets the number of bytes that the chars take on the heap. */
481
482 size_t sizeOfExcludingThis(mozilla::MallocSizeOf mallocSizeOf);
483
484 /* Offsets for direct field from jit code. */
485
offsetOfLength()486 static size_t offsetOfLength() {
487 return offsetof(JSString, d.u1.length);
488 }
offsetOfFlags()489 static size_t offsetOfFlags() {
490 return offsetof(JSString, d.u1.flags);
491 }
492
offsetOfNonInlineChars()493 static size_t offsetOfNonInlineChars() {
494 static_assert(offsetof(JSString, d.s.u2.nonInlineCharsTwoByte) ==
495 offsetof(JSString, d.s.u2.nonInlineCharsLatin1),
496 "nonInlineCharsTwoByte and nonInlineCharsLatin1 must have same offset");
497 return offsetof(JSString, d.s.u2.nonInlineCharsTwoByte);
498 }
499
500 static const JS::TraceKind TraceKind = JS::TraceKind::String;
501
502 #ifdef DEBUG
503 void dump(FILE* fp);
504 void dumpCharsNoNewline(FILE* fp);
505 void dump();
506 void dumpCharsNoNewline();
507 void dumpRepresentation(FILE* fp, int indent) const;
508 void dumpRepresentationHeader(FILE* fp, int indent, const char* subclass) const;
509
510 template <typename CharT>
511 static void dumpChars(const CharT* s, size_t len, FILE* fp=stderr);
512
513 bool equals(const char* s);
514 #endif
515
516 void traceChildren(JSTracer* trc);
517
readBarrier(JSString * thing)518 static MOZ_ALWAYS_INLINE void readBarrier(JSString* thing) {
519 if (thing->isPermanentAtom())
520 return;
521
522 TenuredCell::readBarrier(thing);
523 }
524
writeBarrierPre(JSString * thing)525 static MOZ_ALWAYS_INLINE void writeBarrierPre(JSString* thing) {
526 if (isNullLike(thing) || thing->isPermanentAtom())
527 return;
528
529 TenuredCell::writeBarrierPre(thing);
530 }
531
532 private:
533 JSString() = delete;
534 JSString(const JSString& other) = delete;
535 void operator=(const JSString& other) = delete;
536 };
537
538 class JSRope : public JSString
539 {
540 template <typename CharT>
541 bool copyCharsInternal(js::ExclusiveContext* cx, js::ScopedJSFreePtr<CharT>& out,
542 bool nullTerminate) const;
543
544 enum UsingBarrier { WithIncrementalBarrier, NoBarrier };
545
546 template<UsingBarrier b, typename CharT>
547 JSFlatString* flattenInternal(js::ExclusiveContext* cx);
548
549 template<UsingBarrier b>
550 JSFlatString* flattenInternal(js::ExclusiveContext* cx);
551
552 friend class JSString;
553 JSFlatString* flatten(js::ExclusiveContext* cx);
554
555 void init(js::ExclusiveContext* cx, JSString* left, JSString* right, size_t length);
556
557 public:
558 template <js::AllowGC allowGC>
559 static inline JSRope* new_(js::ExclusiveContext* cx,
560 typename js::MaybeRooted<JSString*, allowGC>::HandleType left,
561 typename js::MaybeRooted<JSString*, allowGC>::HandleType right,
562 size_t length);
563
564 bool copyLatin1Chars(js::ExclusiveContext* cx,
565 js::ScopedJSFreePtr<JS::Latin1Char>& out) const;
566 bool copyTwoByteChars(js::ExclusiveContext* cx, js::ScopedJSFreePtr<char16_t>& out) const;
567
568 bool copyLatin1CharsZ(js::ExclusiveContext* cx,
569 js::ScopedJSFreePtr<JS::Latin1Char>& out) const;
570 bool copyTwoByteCharsZ(js::ExclusiveContext* cx, js::ScopedJSFreePtr<char16_t>& out) const;
571
572 template <typename CharT>
573 bool copyChars(js::ExclusiveContext* cx, js::ScopedJSFreePtr<CharT>& out) const;
574
leftChild()575 JSString* leftChild() const {
576 MOZ_ASSERT(isRope());
577 return d.s.u2.left;
578 }
579
rightChild()580 JSString* rightChild() const {
581 MOZ_ASSERT(isRope());
582 return d.s.u3.right;
583 }
584
585 void traceChildren(JSTracer* trc);
586
offsetOfLeft()587 static size_t offsetOfLeft() {
588 return offsetof(JSRope, d.s.u2.left);
589 }
offsetOfRight()590 static size_t offsetOfRight() {
591 return offsetof(JSRope, d.s.u3.right);
592 }
593
594 #ifdef DEBUG
595 void dumpRepresentation(FILE* fp, int indent) const;
596 #endif
597 };
598
599 static_assert(sizeof(JSRope) == sizeof(JSString),
600 "string subclasses must be binary-compatible with JSString");
601
602 class JSLinearString : public JSString
603 {
604 friend class JSString;
605 friend class js::AutoStableStringChars;
606
607 /* Vacuous and therefore unimplemented. */
608 JSLinearString* ensureLinear(js::ExclusiveContext* cx) = delete;
609 bool isLinear() const = delete;
610 JSLinearString& asLinear() const = delete;
611
612 protected:
613 /* Returns void pointer to latin1/twoByte chars, for finalizers. */
614 MOZ_ALWAYS_INLINE
nonInlineCharsRaw()615 void* nonInlineCharsRaw() const {
616 MOZ_ASSERT(!isInline());
617 static_assert(offsetof(JSLinearString, d.s.u2.nonInlineCharsTwoByte) ==
618 offsetof(JSLinearString, d.s.u2.nonInlineCharsLatin1),
619 "nonInlineCharsTwoByte and nonInlineCharsLatin1 must have same offset");
620 return (void*)d.s.u2.nonInlineCharsTwoByte;
621 }
622
623 MOZ_ALWAYS_INLINE const JS::Latin1Char* rawLatin1Chars() const;
624 MOZ_ALWAYS_INLINE const char16_t* rawTwoByteChars() const;
625
626 public:
627 template<typename CharT>
628 MOZ_ALWAYS_INLINE
629 const CharT* nonInlineChars(const JS::AutoCheckCannotGC& nogc) const;
630
631 MOZ_ALWAYS_INLINE
nonInlineLatin1Chars(const JS::AutoCheckCannotGC & nogc)632 const JS::Latin1Char* nonInlineLatin1Chars(const JS::AutoCheckCannotGC& nogc) const {
633 MOZ_ASSERT(!isInline());
634 MOZ_ASSERT(hasLatin1Chars());
635 return d.s.u2.nonInlineCharsLatin1;
636 }
637
638 MOZ_ALWAYS_INLINE
nonInlineTwoByteChars(const JS::AutoCheckCannotGC & nogc)639 const char16_t* nonInlineTwoByteChars(const JS::AutoCheckCannotGC& nogc) const {
640 MOZ_ASSERT(!isInline());
641 MOZ_ASSERT(hasTwoByteChars());
642 return d.s.u2.nonInlineCharsTwoByte;
643 }
644
645 template<typename CharT>
646 MOZ_ALWAYS_INLINE
647 const CharT* chars(const JS::AutoCheckCannotGC& nogc) const;
648
649 MOZ_ALWAYS_INLINE
latin1Chars(const JS::AutoCheckCannotGC & nogc)650 const JS::Latin1Char* latin1Chars(const JS::AutoCheckCannotGC& nogc) const {
651 return rawLatin1Chars();
652 }
653
654 MOZ_ALWAYS_INLINE
twoByteChars(const JS::AutoCheckCannotGC & nogc)655 const char16_t* twoByteChars(const JS::AutoCheckCannotGC& nogc) const {
656 return rawTwoByteChars();
657 }
658
latin1Range(const JS::AutoCheckCannotGC & nogc)659 mozilla::Range<const JS::Latin1Char> latin1Range(const JS::AutoCheckCannotGC& nogc) const {
660 MOZ_ASSERT(JSString::isLinear());
661 return mozilla::Range<const JS::Latin1Char>(latin1Chars(nogc), length());
662 }
663
twoByteRange(const JS::AutoCheckCannotGC & nogc)664 mozilla::Range<const char16_t> twoByteRange(const JS::AutoCheckCannotGC& nogc) const {
665 MOZ_ASSERT(JSString::isLinear());
666 return mozilla::Range<const char16_t>(twoByteChars(nogc), length());
667 }
668
669 MOZ_ALWAYS_INLINE
latin1OrTwoByteChar(size_t index)670 char16_t latin1OrTwoByteChar(size_t index) const {
671 MOZ_ASSERT(JSString::isLinear());
672 MOZ_ASSERT(index < length());
673 JS::AutoCheckCannotGC nogc;
674 return hasLatin1Chars() ? latin1Chars(nogc)[index] : twoByteChars(nogc)[index];
675 }
676
677 #ifdef DEBUG
678 void dumpRepresentationChars(FILE* fp, int indent) const;
679 #endif
680 };
681
682 static_assert(sizeof(JSLinearString) == sizeof(JSString),
683 "string subclasses must be binary-compatible with JSString");
684
685 class JSDependentString : public JSLinearString
686 {
687 friend class JSString;
688 JSFlatString* undepend(JSContext* cx);
689
690 template <typename CharT>
691 JSFlatString* undependInternal(JSContext* cx);
692
693 void init(js::ExclusiveContext* cx, JSLinearString* base, size_t start,
694 size_t length);
695
696 /* Vacuous and therefore unimplemented. */
697 bool isDependent() const = delete;
698 JSDependentString& asDependent() const = delete;
699
700 /* The offset of this string's chars in base->chars(). */
baseOffset()701 MOZ_ALWAYS_INLINE mozilla::Maybe<size_t> baseOffset() const {
702 MOZ_ASSERT(JSString::isDependent());
703 JS::AutoCheckCannotGC nogc;
704 if (MOZ_UNLIKELY(base()->isUndepended()))
705 return mozilla::Nothing();
706 size_t offset;
707 if (hasTwoByteChars())
708 offset = twoByteChars(nogc) - base()->twoByteChars(nogc);
709 else
710 offset = latin1Chars(nogc) - base()->latin1Chars(nogc);
711 MOZ_ASSERT(offset < base()->length());
712 return mozilla::Some(offset);
713 }
714
715 public:
716 static inline JSLinearString* new_(js::ExclusiveContext* cx, JSLinearString* base,
717 size_t start, size_t length);
718
offsetOfBase()719 inline static size_t offsetOfBase() {
720 return offsetof(JSDependentString, d.s.u3.base);
721 }
722
723 #ifdef DEBUG
724 void dumpRepresentation(FILE* fp, int indent) const;
725 #endif
726 };
727
728 static_assert(sizeof(JSDependentString) == sizeof(JSString),
729 "string subclasses must be binary-compatible with JSString");
730
731 class JSFlatString : public JSLinearString
732 {
733 /* Vacuous and therefore unimplemented. */
734 JSFlatString* ensureFlat(JSContext* cx) = delete;
735 bool isFlat() const = delete;
736 JSFlatString& asFlat() const = delete;
737
738 template <typename CharT>
739 static bool isIndexSlow(const CharT* s, size_t length, uint32_t* indexp);
740
741 void init(const char16_t* chars, size_t length);
742 void init(const JS::Latin1Char* chars, size_t length);
743
744 public:
745 template <js::AllowGC allowGC, typename CharT>
746 static inline JSFlatString* new_(js::ExclusiveContext* cx,
747 const CharT* chars, size_t length);
748
749 /*
750 * Returns true if this string's characters store an unsigned 32-bit
751 * integer value, initializing *indexp to that value if so. (Thus if
752 * calling isIndex returns true, js::IndexToString(cx, *indexp) will be a
753 * string equal to this string.)
754 */
isIndex(uint32_t * indexp)755 inline bool isIndex(uint32_t* indexp) const {
756 MOZ_ASSERT(JSString::isFlat());
757 JS::AutoCheckCannotGC nogc;
758 if (hasLatin1Chars()) {
759 const JS::Latin1Char* s = latin1Chars(nogc);
760 return JS7_ISDEC(*s) && isIndexSlow(s, length(), indexp);
761 }
762 const char16_t* s = twoByteChars(nogc);
763 return JS7_ISDEC(*s) && isIndexSlow(s, length(), indexp);
764 }
765
766 /*
767 * Returns a property name represented by this string, or null on failure.
768 * You must verify that this is not an index per isIndex before calling
769 * this method.
770 */
771 inline js::PropertyName* toPropertyName(JSContext* cx);
772
773 /*
774 * Once a JSFlatString sub-class has been added to the atom state, this
775 * operation changes the string to the JSAtom type, in place.
776 */
777 MOZ_ALWAYS_INLINE JSAtom* morphAtomizedStringIntoAtom(js::HashNumber hash);
778 MOZ_ALWAYS_INLINE JSAtom* morphAtomizedStringIntoPermanentAtom(js::HashNumber hash);
779
780 inline void finalize(js::FreeOp* fop);
781
782 #ifdef DEBUG
783 void dumpRepresentation(FILE* fp, int indent) const;
784 #endif
785 };
786
787 static_assert(sizeof(JSFlatString) == sizeof(JSString),
788 "string subclasses must be binary-compatible with JSString");
789
790 class JSExtensibleString : public JSFlatString
791 {
792 /* Vacuous and therefore unimplemented. */
793 bool isExtensible() const = delete;
794 JSExtensibleString& asExtensible() const = delete;
795
796 public:
797 MOZ_ALWAYS_INLINE
capacity()798 size_t capacity() const {
799 MOZ_ASSERT(JSString::isExtensible());
800 return d.s.u3.capacity;
801 }
802
803 #ifdef DEBUG
804 void dumpRepresentation(FILE* fp, int indent) const;
805 #endif
806 };
807
808 static_assert(sizeof(JSExtensibleString) == sizeof(JSString),
809 "string subclasses must be binary-compatible with JSString");
810
811 class JSInlineString : public JSFlatString
812 {
813 public:
814 MOZ_ALWAYS_INLINE
latin1Chars(const JS::AutoCheckCannotGC & nogc)815 const JS::Latin1Char* latin1Chars(const JS::AutoCheckCannotGC& nogc) const {
816 MOZ_ASSERT(JSString::isInline());
817 MOZ_ASSERT(hasLatin1Chars());
818 return d.inlineStorageLatin1;
819 }
820
821 MOZ_ALWAYS_INLINE
twoByteChars(const JS::AutoCheckCannotGC & nogc)822 const char16_t* twoByteChars(const JS::AutoCheckCannotGC& nogc) const {
823 MOZ_ASSERT(JSString::isInline());
824 MOZ_ASSERT(hasTwoByteChars());
825 return d.inlineStorageTwoByte;
826 }
827
828 template<typename CharT>
829 static bool lengthFits(size_t length);
830
offsetOfInlineStorage()831 static size_t offsetOfInlineStorage() {
832 return offsetof(JSInlineString, d.inlineStorageTwoByte);
833 }
834
835 #ifdef DEBUG
836 void dumpRepresentation(FILE* fp, int indent) const;
837 #endif
838 };
839
840 static_assert(sizeof(JSInlineString) == sizeof(JSString),
841 "string subclasses must be binary-compatible with JSString");
842
843 /*
844 * On 32-bit platforms, JSThinInlineString can store 7 Latin1 characters or 3
845 * TwoByte characters (excluding null terminator) inline. On 64-bit platforms,
846 * these numbers are 15 and 7, respectively.
847 */
848 class JSThinInlineString : public JSInlineString
849 {
850 public:
851 static const size_t MAX_LENGTH_LATIN1 = NUM_INLINE_CHARS_LATIN1 - 1;
852 static const size_t MAX_LENGTH_TWO_BYTE = NUM_INLINE_CHARS_TWO_BYTE - 1;
853
854 template <js::AllowGC allowGC>
855 static inline JSThinInlineString* new_(js::ExclusiveContext* cx);
856
857 template <typename CharT>
858 inline CharT* init(size_t length);
859
860 template<typename CharT>
861 static bool lengthFits(size_t length);
862 };
863
864 static_assert(sizeof(JSThinInlineString) == sizeof(JSString),
865 "string subclasses must be binary-compatible with JSString");
866
867 /*
868 * On both 32-bit and 64-bit platforms, MAX_LENGTH_TWO_BYTE is 11 and
869 * MAX_LENGTH_LATIN1 is 23 (excluding null terminator). This is deliberate,
870 * in order to minimize potential performance differences between 32-bit and
871 * 64-bit platforms.
872 *
873 * There are still some differences due to NUM_INLINE_CHARS_* being different.
874 * E.g. TwoByte strings of length 4--7 will be JSFatInlineStrings on 32-bit
875 * platforms and JSThinInlineStrings on 64-bit platforms. But the more
876 * significant transition from inline strings to non-inline strings occurs at
877 * length 11 (for TwoByte strings) and 23 (Latin1 strings) on both 32-bit and
878 * 64-bit platforms.
879 */
880 class JSFatInlineString : public JSInlineString
881 {
882 static const size_t INLINE_EXTENSION_CHARS_LATIN1 = 24 - NUM_INLINE_CHARS_LATIN1;
883 static const size_t INLINE_EXTENSION_CHARS_TWO_BYTE = 12 - NUM_INLINE_CHARS_TWO_BYTE;
884
885 protected: /* to fool clang into not warning this is unused */
886 union {
887 char inlineStorageExtensionLatin1[INLINE_EXTENSION_CHARS_LATIN1];
888 char16_t inlineStorageExtensionTwoByte[INLINE_EXTENSION_CHARS_TWO_BYTE];
889 };
890
891 public:
892 template <js::AllowGC allowGC>
893 static inline JSFatInlineString* new_(js::ExclusiveContext* cx);
894
895 static const size_t MAX_LENGTH_LATIN1 = JSString::NUM_INLINE_CHARS_LATIN1 +
896 INLINE_EXTENSION_CHARS_LATIN1
897 -1 /* null terminator */;
898
899 static const size_t MAX_LENGTH_TWO_BYTE = JSString::NUM_INLINE_CHARS_TWO_BYTE +
900 INLINE_EXTENSION_CHARS_TWO_BYTE
901 -1 /* null terminator */;
902
903 template <typename CharT>
904 inline CharT* init(size_t length);
905
906 template<typename CharT>
907 static bool lengthFits(size_t length);
908
909 /* Only called by the GC for strings with the AllocKind::FAT_INLINE_STRING kind. */
910
911 MOZ_ALWAYS_INLINE void finalize(js::FreeOp* fop);
912 };
913
914 static_assert(sizeof(JSFatInlineString) % js::gc::CellSize == 0,
915 "fat inline strings shouldn't waste space up to the next cell "
916 "boundary");
917
918 class JSExternalString : public JSLinearString
919 {
920 void init(const char16_t* chars, size_t length, const JSStringFinalizer* fin);
921
922 /* Vacuous and therefore unimplemented. */
923 bool isExternal() const = delete;
924 JSExternalString& asExternal() const = delete;
925
926 public:
927 static inline JSExternalString* new_(JSContext* cx, const char16_t* chars, size_t length,
928 const JSStringFinalizer* fin);
929
externalFinalizer()930 const JSStringFinalizer* externalFinalizer() const {
931 MOZ_ASSERT(JSString::isExternal());
932 return d.s.u3.externalFinalizer;
933 }
934
935 /*
936 * External chars are never allocated inline or in the nursery, so we can
937 * safely expose this without requiring an AutoCheckCannotGC argument.
938 */
twoByteChars()939 const char16_t* twoByteChars() const {
940 return rawTwoByteChars();
941 }
942
943 /* Only called by the GC for strings with the AllocKind::EXTERNAL_STRING kind. */
944
945 inline void finalize(js::FreeOp* fop);
946
947 JSFlatString* ensureFlat(JSContext* cx);
948
949 #ifdef DEBUG
950 void dumpRepresentation(FILE* fp, int indent) const;
951 #endif
952 };
953
954 static_assert(sizeof(JSExternalString) == sizeof(JSString),
955 "string subclasses must be binary-compatible with JSString");
956
957 class JSUndependedString : public JSFlatString
958 {
959 /*
960 * JSUndependedString is not explicitly used and is only present for
961 * consistency. See JSDependentString::undepend for how a JSDependentString
962 * gets morphed into a JSUndependedString.
963 */
964 };
965
966 static_assert(sizeof(JSUndependedString) == sizeof(JSString),
967 "string subclasses must be binary-compatible with JSString");
968
969 class JSAtom : public JSFlatString
970 {
971 /* Vacuous and therefore unimplemented. */
972 bool isAtom() const = delete;
973 JSAtom& asAtom() const = delete;
974
975 public:
976 /* Returns the PropertyName for this. isIndex() must be false. */
977 inline js::PropertyName* asPropertyName();
978
979 inline void finalize(js::FreeOp* fop);
980
981 MOZ_ALWAYS_INLINE
isPermanent()982 bool isPermanent() const {
983 return JSString::isPermanentAtom();
984 }
985
986 // Transform this atom into a permanent atom. This is only done during
987 // initialization of the runtime.
morphIntoPermanentAtom()988 MOZ_ALWAYS_INLINE void morphIntoPermanentAtom() {
989 d.u1.flags |= PERMANENT_ATOM_MASK;
990 }
991
992 inline js::HashNumber hash() const;
993 inline void initHash(js::HashNumber hash);
994
995 #ifdef DEBUG
996 void dump(FILE* fp);
997 void dump();
998 #endif
999 };
1000
1001 static_assert(sizeof(JSAtom) == sizeof(JSString),
1002 "string subclasses must be binary-compatible with JSString");
1003
1004 namespace js {
1005
1006 class NormalAtom : public JSAtom
1007 {
1008 protected: // Silence Clang unused-field warning.
1009 HashNumber hash_;
1010 uint32_t padding_; // Ensure the size is a multiple of gc::CellSize.
1011
1012 public:
hash()1013 HashNumber hash() const {
1014 return hash_;
1015 }
initHash(HashNumber hash)1016 void initHash(HashNumber hash) {
1017 hash_ = hash;
1018 }
1019 };
1020
1021 static_assert(sizeof(NormalAtom) == sizeof(JSString) + sizeof(uint64_t),
1022 "NormalAtom must have size of a string + HashNumber, "
1023 "aligned to gc::CellSize");
1024
1025 class FatInlineAtom : public JSAtom
1026 {
1027 protected: // Silence Clang unused-field warning.
1028 char inlineStorage_[sizeof(JSFatInlineString) - sizeof(JSString)];
1029 HashNumber hash_;
1030 uint32_t padding_; // Ensure the size is a multiple of gc::CellSize.
1031
1032 public:
hash()1033 HashNumber hash() const {
1034 return hash_;
1035 }
initHash(HashNumber hash)1036 void initHash(HashNumber hash) {
1037 hash_ = hash;
1038 }
1039 };
1040
1041 static_assert(sizeof(FatInlineAtom) == sizeof(JSFatInlineString) + sizeof(uint64_t),
1042 "FatInlineAtom must have size of a fat inline string + HashNumber, "
1043 "aligned to gc::CellSize");
1044
1045 } // namespace js
1046
1047 inline js::HashNumber
hash()1048 JSAtom::hash() const
1049 {
1050 if (isFatInline())
1051 return static_cast<const js::FatInlineAtom*>(this)->hash();
1052 return static_cast<const js::NormalAtom*>(this)->hash();
1053 }
1054
1055 inline void
initHash(js::HashNumber hash)1056 JSAtom::initHash(js::HashNumber hash)
1057 {
1058 if (isFatInline())
1059 return static_cast<js::FatInlineAtom*>(this)->initHash(hash);
1060 return static_cast<js::NormalAtom*>(this)->initHash(hash);
1061 }
1062
1063 MOZ_ALWAYS_INLINE JSAtom*
morphAtomizedStringIntoAtom(js::HashNumber hash)1064 JSFlatString::morphAtomizedStringIntoAtom(js::HashNumber hash)
1065 {
1066 d.u1.flags |= ATOM_BIT;
1067 JSAtom* atom = &asAtom();
1068 atom->initHash(hash);
1069 return atom;
1070 }
1071
1072 MOZ_ALWAYS_INLINE JSAtom*
morphAtomizedStringIntoPermanentAtom(js::HashNumber hash)1073 JSFlatString::morphAtomizedStringIntoPermanentAtom(js::HashNumber hash)
1074 {
1075 d.u1.flags |= PERMANENT_ATOM_MASK;
1076 JSAtom* atom = &asAtom();
1077 atom->initHash(hash);
1078 return atom;
1079 }
1080
1081 namespace js {
1082
1083 class StaticStrings
1084 {
1085 private:
1086 /* Bigger chars cannot be in a length-2 string. */
1087 static const size_t SMALL_CHAR_LIMIT = 128U;
1088 static const size_t NUM_SMALL_CHARS = 64U;
1089
1090 JSAtom* length2StaticTable[NUM_SMALL_CHARS * NUM_SMALL_CHARS];
1091
1092 public:
1093 /* We keep these public for the JITs. */
1094 static const size_t UNIT_STATIC_LIMIT = 256U;
1095 JSAtom* unitStaticTable[UNIT_STATIC_LIMIT];
1096
1097 static const size_t INT_STATIC_LIMIT = 256U;
1098 JSAtom* intStaticTable[INT_STATIC_LIMIT];
1099
StaticStrings()1100 StaticStrings() {
1101 mozilla::PodZero(this);
1102 }
1103
1104 bool init(JSContext* cx);
1105 void trace(JSTracer* trc);
1106
hasUint(uint32_t u)1107 static bool hasUint(uint32_t u) { return u < INT_STATIC_LIMIT; }
1108
getUint(uint32_t u)1109 JSAtom* getUint(uint32_t u) {
1110 MOZ_ASSERT(hasUint(u));
1111 return intStaticTable[u];
1112 }
1113
hasInt(int32_t i)1114 static bool hasInt(int32_t i) {
1115 return uint32_t(i) < INT_STATIC_LIMIT;
1116 }
1117
getInt(int32_t i)1118 JSAtom* getInt(int32_t i) {
1119 MOZ_ASSERT(hasInt(i));
1120 return getUint(uint32_t(i));
1121 }
1122
hasUnit(char16_t c)1123 static bool hasUnit(char16_t c) { return c < UNIT_STATIC_LIMIT; }
1124
getUnit(char16_t c)1125 JSAtom* getUnit(char16_t c) {
1126 MOZ_ASSERT(hasUnit(c));
1127 return unitStaticTable[c];
1128 }
1129
1130 /* May not return atom, returns null on (reported) failure. */
1131 inline JSLinearString* getUnitStringForElement(JSContext* cx, JSString* str, size_t index);
1132
1133 template <typename CharT>
1134 static bool isStatic(const CharT* chars, size_t len);
1135 static bool isStatic(JSAtom* atom);
1136
1137 /* Return null if no static atom exists for the given (chars, length). */
1138 template <typename CharT>
lookup(const CharT * chars,size_t length)1139 JSAtom* lookup(const CharT* chars, size_t length) {
1140 switch (length) {
1141 case 1: {
1142 char16_t c = chars[0];
1143 if (c < UNIT_STATIC_LIMIT)
1144 return getUnit(c);
1145 return nullptr;
1146 }
1147 case 2:
1148 if (fitsInSmallChar(chars[0]) && fitsInSmallChar(chars[1]))
1149 return getLength2(chars[0], chars[1]);
1150 return nullptr;
1151 case 3:
1152 /*
1153 * Here we know that JSString::intStringTable covers only 256 (or at least
1154 * not 1000 or more) chars. We rely on order here to resolve the unit vs.
1155 * int string/length-2 string atom identity issue by giving priority to unit
1156 * strings for "0" through "9" and length-2 strings for "10" through "99".
1157 */
1158 static_assert(INT_STATIC_LIMIT <= 999,
1159 "static int strings assumed below to be at most "
1160 "three digits");
1161 if ('1' <= chars[0] && chars[0] <= '9' &&
1162 '0' <= chars[1] && chars[1] <= '9' &&
1163 '0' <= chars[2] && chars[2] <= '9') {
1164 int i = (chars[0] - '0') * 100 +
1165 (chars[1] - '0') * 10 +
1166 (chars[2] - '0');
1167
1168 if (unsigned(i) < INT_STATIC_LIMIT)
1169 return getInt(i);
1170 }
1171 return nullptr;
1172 }
1173
1174 return nullptr;
1175 }
1176
1177 private:
1178 typedef uint8_t SmallChar;
1179 static const SmallChar INVALID_SMALL_CHAR = -1;
1180
fitsInSmallChar(char16_t c)1181 static bool fitsInSmallChar(char16_t c) {
1182 return c < SMALL_CHAR_LIMIT && toSmallChar[c] != INVALID_SMALL_CHAR;
1183 }
1184
1185 static const SmallChar toSmallChar[];
1186
1187 JSAtom* getLength2(char16_t c1, char16_t c2);
getLength2(uint32_t u)1188 JSAtom* getLength2(uint32_t u) {
1189 MOZ_ASSERT(u < 100);
1190 return getLength2('0' + u / 10, '0' + u % 10);
1191 }
1192 };
1193
1194 /*
1195 * Represents an atomized string which does not contain an index (that is, an
1196 * unsigned 32-bit value). Thus for any PropertyName propname,
1197 * ToString(ToUint32(propname)) never equals propname.
1198 *
1199 * To more concretely illustrate the utility of PropertyName, consider that it
1200 * is used to partition, in a type-safe manner, the ways to refer to a
1201 * property, as follows:
1202 *
1203 * - uint32_t indexes,
1204 * - PropertyName strings which don't encode uint32_t indexes, and
1205 * - jsspecial special properties (non-ES5 properties like object-valued
1206 * jsids, JSID_EMPTY, JSID_VOID, and maybe in the future Harmony-proposed
1207 * private names).
1208 */
1209 class PropertyName : public JSAtom
1210 {
1211 private:
1212 /* Vacuous and therefore unimplemented. */
1213 PropertyName* asPropertyName() = delete;
1214 };
1215
1216 static_assert(sizeof(PropertyName) == sizeof(JSString),
1217 "string subclasses must be binary-compatible with JSString");
1218
1219 static MOZ_ALWAYS_INLINE jsid
NameToId(PropertyName * name)1220 NameToId(PropertyName* name)
1221 {
1222 return NON_INTEGER_ATOM_TO_JSID(name);
1223 }
1224
1225 using PropertyNameVector = JS::GCVector<PropertyName*>;
1226
1227 template <typename CharT>
1228 void
1229 CopyChars(CharT* dest, const JSLinearString& str);
1230
1231 static inline UniqueChars
StringToNewUTF8CharsZ(ExclusiveContext * maybecx,JSString & str)1232 StringToNewUTF8CharsZ(ExclusiveContext* maybecx, JSString& str)
1233 {
1234 JS::AutoCheckCannotGC nogc;
1235
1236 JSLinearString* linear = str.ensureLinear(maybecx);
1237 if (!linear)
1238 return nullptr;
1239
1240 return UniqueChars(linear->hasLatin1Chars()
1241 ? JS::CharsToNewUTF8CharsZ(maybecx, linear->latin1Range(nogc)).c_str()
1242 : JS::CharsToNewUTF8CharsZ(maybecx, linear->twoByteRange(nogc)).c_str());
1243 }
1244
1245 /* GC-allocate a string descriptor for the given malloc-allocated chars. */
1246 template <js::AllowGC allowGC, typename CharT>
1247 extern JSFlatString*
1248 NewString(js::ExclusiveContext* cx, CharT* chars, size_t length);
1249
1250 /* Like NewString, but doesn't try to deflate to Latin1. */
1251 template <js::AllowGC allowGC, typename CharT>
1252 extern JSFlatString*
1253 NewStringDontDeflate(js::ExclusiveContext* cx, CharT* chars, size_t length);
1254
1255 extern JSLinearString*
1256 NewDependentString(JSContext* cx, JSString* base, size_t start, size_t length);
1257
1258 /* Take ownership of an array of Latin1Chars. */
1259 extern JSFlatString*
1260 NewLatin1StringZ(js::ExclusiveContext* cx, UniqueChars chars);
1261
1262 /* Copy a counted string and GC-allocate a descriptor for it. */
1263 template <js::AllowGC allowGC, typename CharT>
1264 extern JSFlatString*
1265 NewStringCopyN(js::ExclusiveContext* cx, const CharT* s, size_t n);
1266
1267 template <js::AllowGC allowGC>
1268 inline JSFlatString*
NewStringCopyN(ExclusiveContext * cx,const char * s,size_t n)1269 NewStringCopyN(ExclusiveContext* cx, const char* s, size_t n)
1270 {
1271 return NewStringCopyN<allowGC>(cx, reinterpret_cast<const Latin1Char*>(s), n);
1272 }
1273
1274 /* Like NewStringCopyN, but doesn't try to deflate to Latin1. */
1275 template <js::AllowGC allowGC, typename CharT>
1276 extern JSFlatString*
1277 NewStringCopyNDontDeflate(js::ExclusiveContext* cx, const CharT* s, size_t n);
1278
1279 /* Copy a C string and GC-allocate a descriptor for it. */
1280 template <js::AllowGC allowGC>
1281 inline JSFlatString*
NewStringCopyZ(js::ExclusiveContext * cx,const char16_t * s)1282 NewStringCopyZ(js::ExclusiveContext* cx, const char16_t* s)
1283 {
1284 return NewStringCopyN<allowGC>(cx, s, js_strlen(s));
1285 }
1286
1287 template <js::AllowGC allowGC>
1288 inline JSFlatString*
NewStringCopyZ(js::ExclusiveContext * cx,const char * s)1289 NewStringCopyZ(js::ExclusiveContext* cx, const char* s)
1290 {
1291 return NewStringCopyN<allowGC>(cx, s, strlen(s));
1292 }
1293
1294 template <js::AllowGC allowGC>
1295 extern JSFlatString*
1296 NewStringCopyUTF8N(JSContext* cx, const JS::UTF8Chars utf8);
1297
1298 template <js::AllowGC allowGC>
1299 inline JSFlatString*
NewStringCopyUTF8Z(JSContext * cx,const JS::ConstUTF8CharsZ utf8)1300 NewStringCopyUTF8Z(JSContext* cx, const JS::ConstUTF8CharsZ utf8)
1301 {
1302 return NewStringCopyUTF8N<allowGC>(cx, JS::UTF8Chars(utf8.c_str(), strlen(utf8.c_str())));
1303 }
1304
1305 JS_STATIC_ASSERT(sizeof(HashNumber) == 4);
1306
1307 } /* namespace js */
1308
1309 // Addon IDs are interned atoms which are never destroyed. This detail is
1310 // not exposed outside the API.
1311 class JSAddonId : public JSAtom
1312 {};
1313
1314 MOZ_ALWAYS_INLINE bool
getChar(js::ExclusiveContext * cx,size_t index,char16_t * code)1315 JSString::getChar(js::ExclusiveContext* cx, size_t index, char16_t* code)
1316 {
1317 MOZ_ASSERT(index < length());
1318
1319 /*
1320 * Optimization for one level deep ropes.
1321 * This is common for the following pattern:
1322 *
1323 * while() {
1324 * text = text.substr(0, x) + "bla" + text.substr(x)
1325 * test.charCodeAt(x + 1)
1326 * }
1327 */
1328 JSString* str;
1329 if (isRope()) {
1330 JSRope* rope = &asRope();
1331 if (uint32_t(index) < rope->leftChild()->length()) {
1332 str = rope->leftChild();
1333 } else {
1334 str = rope->rightChild();
1335 index -= rope->leftChild()->length();
1336 }
1337 } else {
1338 str = this;
1339 }
1340
1341 if (!str->ensureLinear(cx))
1342 return false;
1343
1344 *code = str->asLinear().latin1OrTwoByteChar(index);
1345 return true;
1346 }
1347
1348 MOZ_ALWAYS_INLINE JSLinearString*
ensureLinear(js::ExclusiveContext * cx)1349 JSString::ensureLinear(js::ExclusiveContext* cx)
1350 {
1351 return isLinear()
1352 ? &asLinear()
1353 : asRope().flatten(cx);
1354 }
1355
1356 inline JSLinearString*
base()1357 JSString::base() const
1358 {
1359 MOZ_ASSERT(hasBase());
1360 MOZ_ASSERT(!d.s.u3.base->isInline());
1361 return d.s.u3.base;
1362 }
1363
1364 template<>
1365 MOZ_ALWAYS_INLINE const char16_t*
nonInlineChars(const JS::AutoCheckCannotGC & nogc)1366 JSLinearString::nonInlineChars(const JS::AutoCheckCannotGC& nogc) const
1367 {
1368 return nonInlineTwoByteChars(nogc);
1369 }
1370
1371 template<>
1372 MOZ_ALWAYS_INLINE const JS::Latin1Char*
nonInlineChars(const JS::AutoCheckCannotGC & nogc)1373 JSLinearString::nonInlineChars(const JS::AutoCheckCannotGC& nogc) const
1374 {
1375 return nonInlineLatin1Chars(nogc);
1376 }
1377
1378 template<>
1379 MOZ_ALWAYS_INLINE const char16_t*
chars(const JS::AutoCheckCannotGC & nogc)1380 JSLinearString::chars(const JS::AutoCheckCannotGC& nogc) const
1381 {
1382 return rawTwoByteChars();
1383 }
1384
1385 template<>
1386 MOZ_ALWAYS_INLINE const JS::Latin1Char*
chars(const JS::AutoCheckCannotGC & nogc)1387 JSLinearString::chars(const JS::AutoCheckCannotGC& nogc) const
1388 {
1389 return rawLatin1Chars();
1390 }
1391
1392 template <>
1393 MOZ_ALWAYS_INLINE bool
1394 JSRope::copyChars<JS::Latin1Char>(js::ExclusiveContext* cx,
1395 js::ScopedJSFreePtr<JS::Latin1Char>& out) const
1396 {
1397 return copyLatin1Chars(cx, out);
1398 }
1399
1400 template <>
1401 MOZ_ALWAYS_INLINE bool
1402 JSRope::copyChars<char16_t>(js::ExclusiveContext* cx, js::ScopedJSFreePtr<char16_t>& out) const
1403 {
1404 return copyTwoByteChars(cx, out);
1405 }
1406
1407 template<>
1408 MOZ_ALWAYS_INLINE bool
1409 JSThinInlineString::lengthFits<JS::Latin1Char>(size_t length)
1410 {
1411 return length <= MAX_LENGTH_LATIN1;
1412 }
1413
1414 template<>
1415 MOZ_ALWAYS_INLINE bool
1416 JSThinInlineString::lengthFits<char16_t>(size_t length)
1417 {
1418 return length <= MAX_LENGTH_TWO_BYTE;
1419 }
1420
1421 template<>
1422 MOZ_ALWAYS_INLINE bool
1423 JSFatInlineString::lengthFits<JS::Latin1Char>(size_t length)
1424 {
1425 static_assert((INLINE_EXTENSION_CHARS_LATIN1 * sizeof(char)) % js::gc::CellSize == 0,
1426 "fat inline strings' Latin1 characters don't exactly "
1427 "fill subsequent cells and thus are wasteful");
1428 static_assert(MAX_LENGTH_LATIN1 + 1 ==
1429 (sizeof(JSFatInlineString) -
1430 offsetof(JSFatInlineString, d.inlineStorageLatin1)) / sizeof(char),
1431 "MAX_LENGTH_LATIN1 must be one less than inline Latin1 "
1432 "storage count");
1433
1434 return length <= MAX_LENGTH_LATIN1;
1435 }
1436
1437 template<>
1438 MOZ_ALWAYS_INLINE bool
1439 JSFatInlineString::lengthFits<char16_t>(size_t length)
1440 {
1441 static_assert((INLINE_EXTENSION_CHARS_TWO_BYTE * sizeof(char16_t)) % js::gc::CellSize == 0,
1442 "fat inline strings' char16_t characters don't exactly "
1443 "fill subsequent cells and thus are wasteful");
1444 static_assert(MAX_LENGTH_TWO_BYTE + 1 ==
1445 (sizeof(JSFatInlineString) -
1446 offsetof(JSFatInlineString, d.inlineStorageTwoByte)) / sizeof(char16_t),
1447 "MAX_LENGTH_TWO_BYTE must be one less than inline "
1448 "char16_t storage count");
1449
1450 return length <= MAX_LENGTH_TWO_BYTE;
1451 }
1452
1453 template<>
1454 MOZ_ALWAYS_INLINE bool
1455 JSInlineString::lengthFits<JS::Latin1Char>(size_t length)
1456 {
1457 // If it fits in a fat inline string, it fits in any inline string.
1458 return JSFatInlineString::lengthFits<JS::Latin1Char>(length);
1459 }
1460
1461 template<>
1462 MOZ_ALWAYS_INLINE bool
1463 JSInlineString::lengthFits<char16_t>(size_t length)
1464 {
1465 // If it fits in a fat inline string, it fits in any inline string.
1466 return JSFatInlineString::lengthFits<char16_t>(length);
1467 }
1468
1469 template<>
1470 MOZ_ALWAYS_INLINE void
setNonInlineChars(const char16_t * chars)1471 JSString::setNonInlineChars(const char16_t* chars)
1472 {
1473 d.s.u2.nonInlineCharsTwoByte = chars;
1474 }
1475
1476 template<>
1477 MOZ_ALWAYS_INLINE void
setNonInlineChars(const JS::Latin1Char * chars)1478 JSString::setNonInlineChars(const JS::Latin1Char* chars)
1479 {
1480 d.s.u2.nonInlineCharsLatin1 = chars;
1481 }
1482
1483 MOZ_ALWAYS_INLINE const JS::Latin1Char*
rawLatin1Chars()1484 JSLinearString::rawLatin1Chars() const
1485 {
1486 MOZ_ASSERT(JSString::isLinear());
1487 MOZ_ASSERT(hasLatin1Chars());
1488 return isInline() ? d.inlineStorageLatin1 : d.s.u2.nonInlineCharsLatin1;
1489 }
1490
1491 MOZ_ALWAYS_INLINE const char16_t*
rawTwoByteChars()1492 JSLinearString::rawTwoByteChars() const
1493 {
1494 MOZ_ASSERT(JSString::isLinear());
1495 MOZ_ASSERT(hasTwoByteChars());
1496 return isInline() ? d.inlineStorageTwoByte : d.s.u2.nonInlineCharsTwoByte;
1497 }
1498
1499 inline js::PropertyName*
asPropertyName()1500 JSAtom::asPropertyName()
1501 {
1502 #ifdef DEBUG
1503 uint32_t dummy;
1504 MOZ_ASSERT(!isIndex(&dummy));
1505 #endif
1506 return static_cast<js::PropertyName*>(this);
1507 }
1508
1509 #endif /* vm_String_h */
1510