1 // Copyright 2017 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_OBJECTS_STRING_H_ 6 #define V8_OBJECTS_STRING_H_ 7 8 #include "src/base/bits.h" 9 #include "src/objects/name.h" 10 #include "src/unicode-decoder.h" 11 12 // Has to be the last include (doesn't have include guards): 13 #include "src/objects/object-macros.h" 14 15 namespace v8 { 16 namespace internal { 17 18 class BigInt; 19 20 enum AllowNullsFlag { ALLOW_NULLS, DISALLOW_NULLS }; 21 enum RobustnessFlag { ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL }; 22 23 // The characteristics of a string are stored in its map. Retrieving these 24 // few bits of information is moderately expensive, involving two memory 25 // loads where the second is dependent on the first. To improve efficiency 26 // the shape of the string is given its own class so that it can be retrieved 27 // once and used for several string operations. A StringShape is small enough 28 // to be passed by value and is immutable, but be aware that flattening a 29 // string can potentially alter its shape. Also be aware that a GC caused by 30 // something else can alter the shape of a string due to ConsString 31 // shortcutting. Keeping these restrictions in mind has proven to be error- 32 // prone and so we no longer put StringShapes in variables unless there is a 33 // concrete performance benefit at that particular point in the code. 34 class StringShape BASE_EMBEDDED { 35 public: 36 inline explicit StringShape(const String* s); 37 inline explicit StringShape(Map* s); 38 inline explicit StringShape(InstanceType t); 39 inline bool IsSequential(); 40 inline bool IsExternal(); 41 inline bool IsCons(); 42 inline bool IsSliced(); 43 inline bool IsThin(); 44 inline bool IsIndirect(); 45 inline bool IsExternalOneByte(); 46 inline bool IsExternalTwoByte(); 47 inline bool IsSequentialOneByte(); 48 inline bool IsSequentialTwoByte(); 49 inline bool IsInternalized(); 50 inline StringRepresentationTag representation_tag(); 51 inline uint32_t encoding_tag(); 52 inline uint32_t full_representation_tag(); 53 inline bool HasOnlyOneByteChars(); 54 #ifdef DEBUG type()55 inline uint32_t type() { return type_; } invalidate()56 inline void invalidate() { valid_ = false; } valid()57 inline bool valid() { return valid_; } 58 #else invalidate()59 inline void invalidate() {} 60 #endif 61 62 private: 63 uint32_t type_; 64 #ifdef DEBUG set_valid()65 inline void set_valid() { valid_ = true; } 66 bool valid_; 67 #else set_valid()68 inline void set_valid() {} 69 #endif 70 }; 71 72 // The String abstract class captures JavaScript string values: 73 // 74 // Ecma-262: 75 // 4.3.16 String Value 76 // A string value is a member of the type String and is a finite 77 // ordered sequence of zero or more 16-bit unsigned integer values. 78 // 79 // All string values have a length field. 80 class String : public Name { 81 public: 82 enum Encoding { ONE_BYTE_ENCODING, TWO_BYTE_ENCODING }; 83 84 class SubStringRange { 85 public: 86 explicit inline SubStringRange(String* string, int first = 0, 87 int length = -1); 88 class iterator; 89 inline iterator begin(); 90 inline iterator end(); 91 92 private: 93 String* string_; 94 int first_; 95 int length_; 96 }; 97 98 // Representation of the flat content of a String. 99 // A non-flat string doesn't have flat content. 100 // A flat string has content that's encoded as a sequence of either 101 // one-byte chars or two-byte UC16. 102 // Returned by String::GetFlatContent(). 103 class FlatContent { 104 public: 105 // Returns true if the string is flat and this structure contains content. IsFlat()106 bool IsFlat() const { return state_ != NON_FLAT; } 107 // Returns true if the structure contains one-byte content. IsOneByte()108 bool IsOneByte() const { return state_ == ONE_BYTE; } 109 // Returns true if the structure contains two-byte content. IsTwoByte()110 bool IsTwoByte() const { return state_ == TWO_BYTE; } 111 112 // Return the one byte content of the string. Only use if IsOneByte() 113 // returns true. ToOneByteVector()114 Vector<const uint8_t> ToOneByteVector() const { 115 DCHECK_EQ(ONE_BYTE, state_); 116 return Vector<const uint8_t>(onebyte_start, length_); 117 } 118 // Return the two-byte content of the string. Only use if IsTwoByte() 119 // returns true. ToUC16Vector()120 Vector<const uc16> ToUC16Vector() const { 121 DCHECK_EQ(TWO_BYTE, state_); 122 return Vector<const uc16>(twobyte_start, length_); 123 } 124 Get(int i)125 uc16 Get(int i) const { 126 DCHECK(i < length_); 127 DCHECK(state_ != NON_FLAT); 128 if (state_ == ONE_BYTE) return onebyte_start[i]; 129 return twobyte_start[i]; 130 } 131 UsesSameString(const FlatContent & other)132 bool UsesSameString(const FlatContent& other) const { 133 return onebyte_start == other.onebyte_start; 134 } 135 136 private: 137 enum State { NON_FLAT, ONE_BYTE, TWO_BYTE }; 138 139 // Constructors only used by String::GetFlatContent(). FlatContent(const uint8_t * start,int length)140 explicit FlatContent(const uint8_t* start, int length) 141 : onebyte_start(start), length_(length), state_(ONE_BYTE) {} FlatContent(const uc16 * start,int length)142 explicit FlatContent(const uc16* start, int length) 143 : twobyte_start(start), length_(length), state_(TWO_BYTE) {} FlatContent()144 FlatContent() : onebyte_start(nullptr), length_(0), state_(NON_FLAT) {} 145 146 union { 147 const uint8_t* onebyte_start; 148 const uc16* twobyte_start; 149 }; 150 int length_; 151 State state_; 152 153 friend class String; 154 friend class IterableSubString; 155 }; 156 157 template <typename Char> 158 INLINE(Vector<const Char> GetCharVector()); 159 160 // Get and set the length of the string. 161 inline int length() const; 162 inline void set_length(int value); 163 164 // Get and set the length of the string using acquire loads and release 165 // stores. 166 inline int synchronized_length() const; 167 inline void synchronized_set_length(int value); 168 169 // Returns whether this string has only one-byte chars, i.e. all of them can 170 // be one-byte encoded. This might be the case even if the string is 171 // two-byte. Such strings may appear when the embedder prefers 172 // two-byte external representations even for one-byte data. 173 inline bool IsOneByteRepresentation() const; 174 inline bool IsTwoByteRepresentation() const; 175 176 // Cons and slices have an encoding flag that may not represent the actual 177 // encoding of the underlying string. This is taken into account here. 178 // Requires: this->IsFlat() 179 inline bool IsOneByteRepresentationUnderneath(); 180 inline bool IsTwoByteRepresentationUnderneath(); 181 182 // NOTE: this should be considered only a hint. False negatives are 183 // possible. 184 inline bool HasOnlyOneByteChars(); 185 186 // Get and set individual two byte chars in the string. 187 inline void Set(int index, uint16_t value); 188 // Get individual two byte char in the string. Repeated calls 189 // to this method are not efficient unless the string is flat. 190 INLINE(uint16_t Get(int index)); 191 192 // ES6 section 7.1.3.1 ToNumber Applied to the String Type 193 static Handle<Object> ToNumber(Handle<String> subject); 194 195 // Flattens the string. Checks first inline to see if it is 196 // necessary. Does nothing if the string is not a cons string. 197 // Flattening allocates a sequential string with the same data as 198 // the given string and mutates the cons string to a degenerate 199 // form, where the first component is the new sequential string and 200 // the second component is the empty string. If allocation fails, 201 // this function returns a failure. If flattening succeeds, this 202 // function returns the sequential string that is now the first 203 // component of the cons string. 204 // 205 // Degenerate cons strings are handled specially by the garbage 206 // collector (see IsShortcutCandidate). 207 208 static inline Handle<String> Flatten(Handle<String> string, 209 PretenureFlag pretenure = NOT_TENURED); 210 211 // Tries to return the content of a flat string as a structure holding either 212 // a flat vector of char or of uc16. 213 // If the string isn't flat, and therefore doesn't have flat content, the 214 // returned structure will report so, and can't provide a vector of either 215 // kind. 216 FlatContent GetFlatContent(); 217 218 // Returns the parent of a sliced string or first part of a flat cons string. 219 // Requires: StringShape(this).IsIndirect() && this->IsFlat() 220 inline String* GetUnderlying(); 221 222 // String relational comparison, implemented according to ES6 section 7.2.11 223 // Abstract Relational Comparison (step 5): The comparison of Strings uses a 224 // simple lexicographic ordering on sequences of code unit values. There is no 225 // attempt to use the more complex, semantically oriented definitions of 226 // character or string equality and collating order defined in the Unicode 227 // specification. Therefore String values that are canonically equal according 228 // to the Unicode standard could test as unequal. In effect this algorithm 229 // assumes that both Strings are already in normalized form. Also, note that 230 // for strings containing supplementary characters, lexicographic ordering on 231 // sequences of UTF-16 code unit values differs from that on sequences of code 232 // point values. 233 V8_WARN_UNUSED_RESULT static ComparisonResult Compare(Handle<String> x, 234 Handle<String> y); 235 236 // Perform ES6 21.1.3.8, including checking arguments. 237 static Object* IndexOf(Isolate* isolate, Handle<Object> receiver, 238 Handle<Object> search, Handle<Object> position); 239 // Perform string match of pattern on subject, starting at start index. 240 // Caller must ensure that 0 <= start_index <= sub->length(), as this does not 241 // check any arguments. 242 static int IndexOf(Isolate* isolate, Handle<String> receiver, 243 Handle<String> search, int start_index); 244 245 static Object* LastIndexOf(Isolate* isolate, Handle<Object> receiver, 246 Handle<Object> search, Handle<Object> position); 247 248 // Encapsulates logic related to a match and its capture groups as required 249 // by GetSubstitution. 250 class Match { 251 public: 252 virtual Handle<String> GetMatch() = 0; 253 virtual Handle<String> GetPrefix() = 0; 254 virtual Handle<String> GetSuffix() = 0; 255 256 // A named capture can be invalid (if it is not specified in the pattern), 257 // unmatched (specified but not matched in the current string), and matched. 258 enum CaptureState { INVALID, UNMATCHED, MATCHED }; 259 260 virtual int CaptureCount() = 0; 261 virtual bool HasNamedCaptures() = 0; 262 virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0; 263 virtual MaybeHandle<String> GetNamedCapture(Handle<String> name, 264 CaptureState* state) = 0; 265 ~Match()266 virtual ~Match() {} 267 }; 268 269 // ES#sec-getsubstitution 270 // GetSubstitution(matched, str, position, captures, replacement) 271 // Expand the $-expressions in the string and return a new string with 272 // the result. 273 // A {start_index} can be passed to specify where to start scanning the 274 // replacement string. 275 V8_WARN_UNUSED_RESULT static MaybeHandle<String> GetSubstitution( 276 Isolate* isolate, Match* match, Handle<String> replacement, 277 int start_index = 0); 278 279 // String equality operations. 280 inline bool Equals(String* other); 281 inline static bool Equals(Handle<String> one, Handle<String> two); 282 bool IsUtf8EqualTo(Vector<const char> str, bool allow_prefix_match = false); 283 284 // Dispatches to Is{One,Two}ByteEqualTo. 285 template <typename Char> 286 bool IsEqualTo(Vector<const Char> str); 287 288 bool IsOneByteEqualTo(Vector<const uint8_t> str); 289 bool IsTwoByteEqualTo(Vector<const uc16> str); 290 291 // Return a UTF8 representation of the string. The string is null 292 // terminated but may optionally contain nulls. Length is returned 293 // in length_output if length_output is not a null pointer The string 294 // should be nearly flat, otherwise the performance of this method may 295 // be very slow (quadratic in the length). Setting robustness_flag to 296 // ROBUST_STRING_TRAVERSAL invokes behaviour that is robust This means it 297 // handles unexpected data without causing assert failures and it does not 298 // do any heap allocations. This is useful when printing stack traces. 299 std::unique_ptr<char[]> ToCString(AllowNullsFlag allow_nulls, 300 RobustnessFlag robustness_flag, int offset, 301 int length, int* length_output = 0); 302 std::unique_ptr<char[]> ToCString( 303 AllowNullsFlag allow_nulls = DISALLOW_NULLS, 304 RobustnessFlag robustness_flag = FAST_STRING_TRAVERSAL, 305 int* length_output = 0); 306 307 bool ComputeArrayIndex(uint32_t* index); 308 309 // Externalization. 310 bool MakeExternal(v8::String::ExternalStringResource* resource); 311 bool MakeExternal(v8::String::ExternalOneByteStringResource* resource); 312 313 // Conversion. 314 inline bool AsArrayIndex(uint32_t* index); 315 uint32_t inline ToValidIndex(Object* number); 316 317 // Trimming. 318 enum TrimMode { kTrim, kTrimStart, kTrimEnd }; 319 static Handle<String> Trim(Handle<String> string, TrimMode mode); 320 321 DECL_CAST(String) 322 323 void PrintOn(FILE* out); 324 325 // For use during stack traces. Performs rudimentary sanity check. 326 bool LooksValid(); 327 328 // Dispatched behavior. 329 void StringShortPrint(StringStream* accumulator, bool show_details = true); 330 void PrintUC16(std::ostream& os, int start = 0, int end = -1); // NOLINT 331 #if defined(DEBUG) || defined(OBJECT_PRINT) 332 char* ToAsciiArray(); 333 #endif 334 DECL_PRINTER(String) 335 DECL_VERIFIER(String) 336 337 inline bool IsFlat(); 338 339 // Layout description. 340 static const int kLengthOffset = Name::kSize; 341 static const int kSize = kLengthOffset + kPointerSize; 342 343 // Max char codes. 344 static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar; 345 static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar; 346 static const int kMaxUtf16CodeUnit = 0xffff; 347 static const uint32_t kMaxUtf16CodeUnitU = kMaxUtf16CodeUnit; 348 static const uc32 kMaxCodePoint = 0x10ffff; 349 350 // Maximal string length. 351 // The max length is different on 32 and 64 bit platforms. Max length for a 352 // 32-bit platform is ~268.4M chars. On 64-bit platforms, max length is 353 // ~1.073B chars. The limit on 64-bit is so that SeqTwoByteString::kMaxSize 354 // can fit in a 32bit int: 2^31 - 1 is the max positive int, minus one bit as 355 // each char needs two bytes, subtract 24 bytes for the string header size. 356 357 // See include/v8.h for the definition. 358 static const int kMaxLength = v8::String::kMaxLength; 359 360 // Max length for computing hash. For strings longer than this limit the 361 // string length is used as the hash value. 362 static const int kMaxHashCalcLength = 16383; 363 364 // Limit for truncation in short printing. 365 static const int kMaxShortPrintLength = 1024; 366 367 // Support for regular expressions. 368 const uc16* GetTwoByteData(unsigned start); 369 370 // Helper function for flattening strings. 371 template <typename sinkchar> 372 static void WriteToFlat(String* source, sinkchar* sink, int from, int to); 373 374 // The return value may point to the first aligned word containing the first 375 // non-one-byte character, rather than directly to the non-one-byte character. 376 // If the return value is >= the passed length, the entire string was 377 // one-byte. NonAsciiStart(const char * chars,int length)378 static inline int NonAsciiStart(const char* chars, int length) { 379 const char* start = chars; 380 const char* limit = chars + length; 381 382 if (length >= kIntptrSize) { 383 // Check unaligned bytes. 384 while (!IsAligned(reinterpret_cast<intptr_t>(chars), sizeof(uintptr_t))) { 385 if (static_cast<uint8_t>(*chars) > unibrow::Utf8::kMaxOneByteChar) { 386 return static_cast<int>(chars - start); 387 } 388 ++chars; 389 } 390 // Check aligned words. 391 DCHECK_EQ(unibrow::Utf8::kMaxOneByteChar, 0x7F); 392 const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFF * 0x80; 393 while (chars + sizeof(uintptr_t) <= limit) { 394 if (*reinterpret_cast<const uintptr_t*>(chars) & non_one_byte_mask) { 395 return static_cast<int>(chars - start); 396 } 397 chars += sizeof(uintptr_t); 398 } 399 } 400 // Check remaining unaligned bytes. 401 while (chars < limit) { 402 if (static_cast<uint8_t>(*chars) > unibrow::Utf8::kMaxOneByteChar) { 403 return static_cast<int>(chars - start); 404 } 405 ++chars; 406 } 407 408 return static_cast<int>(chars - start); 409 } 410 IsAscii(const char * chars,int length)411 static inline bool IsAscii(const char* chars, int length) { 412 return NonAsciiStart(chars, length) >= length; 413 } 414 IsAscii(const uint8_t * chars,int length)415 static inline bool IsAscii(const uint8_t* chars, int length) { 416 return NonAsciiStart(reinterpret_cast<const char*>(chars), length) >= 417 length; 418 } 419 NonOneByteStart(const uc16 * chars,int length)420 static inline int NonOneByteStart(const uc16* chars, int length) { 421 const uc16* limit = chars + length; 422 const uc16* start = chars; 423 while (chars < limit) { 424 if (*chars > kMaxOneByteCharCodeU) return static_cast<int>(chars - start); 425 ++chars; 426 } 427 return static_cast<int>(chars - start); 428 } 429 IsOneByte(const uc16 * chars,int length)430 static inline bool IsOneByte(const uc16* chars, int length) { 431 return NonOneByteStart(chars, length) >= length; 432 } 433 434 template <class Visitor> 435 static inline ConsString* VisitFlat(Visitor* visitor, String* string, 436 int offset = 0); 437 438 static Handle<FixedArray> CalculateLineEnds(Handle<String> string, 439 bool include_ending_line); 440 441 private: 442 friend class Name; 443 friend class StringTableInsertionKey; 444 friend class InternalizedStringKey; 445 446 static Handle<String> SlowFlatten(Handle<ConsString> cons, 447 PretenureFlag tenure); 448 449 // Slow case of String::Equals. This implementation works on any strings 450 // but it is most efficient on strings that are almost flat. 451 bool SlowEquals(String* other); 452 453 static bool SlowEquals(Handle<String> one, Handle<String> two); 454 455 // Slow case of AsArrayIndex. 456 V8_EXPORT_PRIVATE bool SlowAsArrayIndex(uint32_t* index); 457 458 // Compute and set the hash code. 459 uint32_t ComputeAndSetHash(); 460 461 DISALLOW_IMPLICIT_CONSTRUCTORS(String); 462 }; 463 464 // The SeqString abstract class captures sequential string values. 465 class SeqString : public String { 466 public: 467 DECL_CAST(SeqString) 468 469 // Layout description. 470 static const int kHeaderSize = String::kSize; 471 472 // Truncate the string in-place if possible and return the result. 473 // In case of new_length == 0, the empty string is returned without 474 // truncating the original string. 475 V8_WARN_UNUSED_RESULT static Handle<String> Truncate(Handle<SeqString> string, 476 int new_length); 477 478 private: 479 DISALLOW_IMPLICIT_CONSTRUCTORS(SeqString); 480 }; 481 482 // The OneByteString class captures sequential one-byte string objects. 483 // Each character in the OneByteString is an one-byte character. 484 class SeqOneByteString : public SeqString { 485 public: 486 static const bool kHasOneByteEncoding = true; 487 488 // Dispatched behavior. 489 inline uint16_t SeqOneByteStringGet(int index); 490 inline void SeqOneByteStringSet(int index, uint16_t value); 491 492 // Get the address of the characters in this string. 493 inline Address GetCharsAddress(); 494 495 inline uint8_t* GetChars(); 496 497 // Clear uninitialized padding space. This ensures that the snapshot content 498 // is deterministic. 499 void clear_padding(); 500 501 DECL_CAST(SeqOneByteString) 502 503 // Garbage collection support. This method is called by the 504 // garbage collector to compute the actual size of an OneByteString 505 // instance. 506 inline int SeqOneByteStringSize(InstanceType instance_type); 507 508 // Computes the size for an OneByteString instance of a given length. SizeFor(int length)509 static int SizeFor(int length) { 510 return OBJECT_POINTER_ALIGN(kHeaderSize + length * kCharSize); 511 } 512 513 // Maximal memory usage for a single sequential one-byte string. 514 static const int kMaxCharsSize = kMaxLength; 515 static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize); 516 STATIC_ASSERT((kMaxSize - kHeaderSize) >= String::kMaxLength); 517 518 class BodyDescriptor; 519 // No weak fields. 520 typedef BodyDescriptor BodyDescriptorWeak; 521 522 private: 523 DISALLOW_IMPLICIT_CONSTRUCTORS(SeqOneByteString); 524 }; 525 526 // The TwoByteString class captures sequential unicode string objects. 527 // Each character in the TwoByteString is a two-byte uint16_t. 528 class SeqTwoByteString : public SeqString { 529 public: 530 static const bool kHasOneByteEncoding = false; 531 532 // Dispatched behavior. 533 inline uint16_t SeqTwoByteStringGet(int index); 534 inline void SeqTwoByteStringSet(int index, uint16_t value); 535 536 // Get the address of the characters in this string. 537 inline Address GetCharsAddress(); 538 539 inline uc16* GetChars(); 540 541 // Clear uninitialized padding space. This ensures that the snapshot content 542 // is deterministic. 543 void clear_padding(); 544 545 // For regexp code. 546 const uint16_t* SeqTwoByteStringGetData(unsigned start); 547 548 DECL_CAST(SeqTwoByteString) 549 550 // Garbage collection support. This method is called by the 551 // garbage collector to compute the actual size of a TwoByteString 552 // instance. 553 inline int SeqTwoByteStringSize(InstanceType instance_type); 554 555 // Computes the size for a TwoByteString instance of a given length. SizeFor(int length)556 static int SizeFor(int length) { 557 return OBJECT_POINTER_ALIGN(kHeaderSize + length * kShortSize); 558 } 559 560 // Maximal memory usage for a single sequential two-byte string. 561 static const int kMaxCharsSize = kMaxLength * 2; 562 static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize); 563 STATIC_ASSERT(static_cast<int>((kMaxSize - kHeaderSize) / sizeof(uint16_t)) >= 564 String::kMaxLength); 565 566 class BodyDescriptor; 567 // No weak fields. 568 typedef BodyDescriptor BodyDescriptorWeak; 569 570 private: 571 DISALLOW_IMPLICIT_CONSTRUCTORS(SeqTwoByteString); 572 }; 573 574 // The ConsString class describes string values built by using the 575 // addition operator on strings. A ConsString is a pair where the 576 // first and second components are pointers to other string values. 577 // One or both components of a ConsString can be pointers to other 578 // ConsStrings, creating a binary tree of ConsStrings where the leaves 579 // are non-ConsString string values. The string value represented by 580 // a ConsString can be obtained by concatenating the leaf string 581 // values in a left-to-right depth-first traversal of the tree. 582 class ConsString : public String { 583 public: 584 // First string of the cons cell. 585 inline String* first(); 586 // Doesn't check that the result is a string, even in debug mode. This is 587 // useful during GC where the mark bits confuse the checks. 588 inline Object* unchecked_first(); 589 inline void set_first(String* first, 590 WriteBarrierMode mode = UPDATE_WRITE_BARRIER); 591 592 // Second string of the cons cell. 593 inline String* second(); 594 // Doesn't check that the result is a string, even in debug mode. This is 595 // useful during GC where the mark bits confuse the checks. 596 inline Object* unchecked_second(); 597 inline void set_second(String* second, 598 WriteBarrierMode mode = UPDATE_WRITE_BARRIER); 599 600 // Dispatched behavior. 601 V8_EXPORT_PRIVATE uint16_t ConsStringGet(int index); 602 603 DECL_CAST(ConsString) 604 605 // Layout description. 606 static const int kFirstOffset = POINTER_SIZE_ALIGN(String::kSize); 607 static const int kSecondOffset = kFirstOffset + kPointerSize; 608 static const int kSize = kSecondOffset + kPointerSize; 609 610 // Minimum length for a cons string. 611 static const int kMinLength = 13; 612 613 typedef FixedBodyDescriptor<kFirstOffset, kSecondOffset + kPointerSize, kSize> 614 BodyDescriptor; 615 // No weak fields. 616 typedef BodyDescriptor BodyDescriptorWeak; 617 618 DECL_VERIFIER(ConsString) 619 620 private: 621 DISALLOW_IMPLICIT_CONSTRUCTORS(ConsString); 622 }; 623 624 // The ThinString class describes string objects that are just references 625 // to another string object. They are used for in-place internalization when 626 // the original string cannot actually be internalized in-place: in these 627 // cases, the original string is converted to a ThinString pointing at its 628 // internalized version (which is allocated as a new object). 629 // In terms of memory layout and most algorithms operating on strings, 630 // ThinStrings can be thought of as "one-part cons strings". 631 class ThinString : public String { 632 public: 633 // Actual string that this ThinString refers to. 634 inline String* actual() const; 635 inline HeapObject* unchecked_actual() const; 636 inline void set_actual(String* s, 637 WriteBarrierMode mode = UPDATE_WRITE_BARRIER); 638 639 V8_EXPORT_PRIVATE uint16_t ThinStringGet(int index); 640 641 DECL_CAST(ThinString) 642 DECL_VERIFIER(ThinString) 643 644 // Layout description. 645 static const int kActualOffset = String::kSize; 646 static const int kSize = kActualOffset + kPointerSize; 647 648 typedef FixedBodyDescriptor<kActualOffset, kSize, kSize> BodyDescriptor; 649 // No weak fields. 650 typedef BodyDescriptor BodyDescriptorWeak; 651 652 private: 653 DISALLOW_COPY_AND_ASSIGN(ThinString); 654 }; 655 656 // The Sliced String class describes strings that are substrings of another 657 // sequential string. The motivation is to save time and memory when creating 658 // a substring. A Sliced String is described as a pointer to the parent, 659 // the offset from the start of the parent string and the length. Using 660 // a Sliced String therefore requires unpacking of the parent string and 661 // adding the offset to the start address. A substring of a Sliced String 662 // are not nested since the double indirection is simplified when creating 663 // such a substring. 664 // Currently missing features are: 665 // - handling externalized parent strings 666 // - external strings as parent 667 // - truncating sliced string to enable otherwise unneeded parent to be GC'ed. 668 class SlicedString : public String { 669 public: 670 inline String* parent(); 671 inline void set_parent(String* parent, 672 WriteBarrierMode mode = UPDATE_WRITE_BARRIER); 673 inline int offset() const; 674 inline void set_offset(int offset); 675 676 // Dispatched behavior. 677 V8_EXPORT_PRIVATE uint16_t SlicedStringGet(int index); 678 679 DECL_CAST(SlicedString) 680 681 // Layout description. 682 static const int kParentOffset = POINTER_SIZE_ALIGN(String::kSize); 683 static const int kOffsetOffset = kParentOffset + kPointerSize; 684 static const int kSize = kOffsetOffset + kPointerSize; 685 686 // Minimum length for a sliced string. 687 static const int kMinLength = 13; 688 689 typedef FixedBodyDescriptor<kParentOffset, kOffsetOffset + kPointerSize, 690 kSize> 691 BodyDescriptor; 692 // No weak fields. 693 typedef BodyDescriptor BodyDescriptorWeak; 694 695 DECL_VERIFIER(SlicedString) 696 697 private: 698 DISALLOW_IMPLICIT_CONSTRUCTORS(SlicedString); 699 }; 700 701 // The ExternalString class describes string values that are backed by 702 // a string resource that lies outside the V8 heap. ExternalStrings 703 // consist of the length field common to all strings, a pointer to the 704 // external resource. It is important to ensure (externally) that the 705 // resource is not deallocated while the ExternalString is live in the 706 // V8 heap. 707 // 708 // The API expects that all ExternalStrings are created through the 709 // API. Therefore, ExternalStrings should not be used internally. 710 class ExternalString : public String { 711 public: 712 DECL_CAST(ExternalString) 713 714 // Layout description. 715 static const int kResourceOffset = POINTER_SIZE_ALIGN(String::kSize); 716 static const int kShortSize = kResourceOffset + kPointerSize; 717 static const int kResourceDataOffset = kResourceOffset + kPointerSize; 718 static const int kSize = kResourceDataOffset + kPointerSize; 719 720 // Return whether external string is short (data pointer is not cached). 721 inline bool is_short() const; 722 // Size in bytes of the external payload. 723 int ExternalPayloadSize() const; 724 725 // Used in the serializer/deserializer. 726 inline Address resource_as_address(); 727 inline void set_address_as_resource(Address address); 728 inline uint32_t resource_as_uint32(); 729 inline void set_uint32_as_resource(uint32_t value); 730 731 STATIC_ASSERT(kResourceOffset == Internals::kStringResourceOffset); 732 733 private: 734 DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalString); 735 }; 736 737 // The ExternalOneByteString class is an external string backed by an 738 // one-byte string. 739 class ExternalOneByteString : public ExternalString { 740 public: 741 static const bool kHasOneByteEncoding = true; 742 743 typedef v8::String::ExternalOneByteStringResource Resource; 744 745 // The underlying resource. 746 inline const Resource* resource(); 747 inline void set_resource(const Resource* buffer); 748 749 // Update the pointer cache to the external character array. 750 // The cached pointer is always valid, as the external character array does = 751 // not move during lifetime. Deserialization is the only exception, after 752 // which the pointer cache has to be refreshed. 753 inline void update_data_cache(); 754 755 inline const uint8_t* GetChars(); 756 757 // Dispatched behavior. 758 inline uint16_t ExternalOneByteStringGet(int index); 759 760 DECL_CAST(ExternalOneByteString) 761 762 class BodyDescriptor; 763 // No weak fields. 764 typedef BodyDescriptor BodyDescriptorWeak; 765 766 private: 767 DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalOneByteString); 768 }; 769 770 // The ExternalTwoByteString class is an external string backed by a UTF-16 771 // encoded string. 772 class ExternalTwoByteString : public ExternalString { 773 public: 774 static const bool kHasOneByteEncoding = false; 775 776 typedef v8::String::ExternalStringResource Resource; 777 778 // The underlying string resource. 779 inline const Resource* resource(); 780 inline void set_resource(const Resource* buffer); 781 782 // Update the pointer cache to the external character array. 783 // The cached pointer is always valid, as the external character array does = 784 // not move during lifetime. Deserialization is the only exception, after 785 // which the pointer cache has to be refreshed. 786 inline void update_data_cache(); 787 788 inline const uint16_t* GetChars(); 789 790 // Dispatched behavior. 791 inline uint16_t ExternalTwoByteStringGet(int index); 792 793 // For regexp code. 794 inline const uint16_t* ExternalTwoByteStringGetData(unsigned start); 795 796 DECL_CAST(ExternalTwoByteString) 797 798 class BodyDescriptor; 799 // No weak fields. 800 typedef BodyDescriptor BodyDescriptorWeak; 801 802 private: 803 DISALLOW_IMPLICIT_CONSTRUCTORS(ExternalTwoByteString); 804 }; 805 806 // A flat string reader provides random access to the contents of a 807 // string independent of the character width of the string. The handle 808 // must be valid as long as the reader is being used. 809 class FlatStringReader : public Relocatable { 810 public: 811 FlatStringReader(Isolate* isolate, Handle<String> str); 812 FlatStringReader(Isolate* isolate, Vector<const char> input); 813 void PostGarbageCollection(); 814 inline uc32 Get(int index); 815 template <typename Char> 816 inline Char Get(int index); length()817 int length() { return length_; } 818 819 private: 820 String** str_; 821 bool is_one_byte_; 822 int length_; 823 const void* start_; 824 }; 825 826 // This maintains an off-stack representation of the stack frames required 827 // to traverse a ConsString, allowing an entirely iterative and restartable 828 // traversal of the entire string 829 class ConsStringIterator { 830 public: ConsStringIterator()831 inline ConsStringIterator() {} 832 inline explicit ConsStringIterator(ConsString* cons_string, int offset = 0) { 833 Reset(cons_string, offset); 834 } 835 inline void Reset(ConsString* cons_string, int offset = 0) { 836 depth_ = 0; 837 // Next will always return nullptr. 838 if (cons_string == nullptr) return; 839 Initialize(cons_string, offset); 840 } 841 // Returns nullptr when complete. Next(int * offset_out)842 inline String* Next(int* offset_out) { 843 *offset_out = 0; 844 if (depth_ == 0) return nullptr; 845 return Continue(offset_out); 846 } 847 848 private: 849 static const int kStackSize = 32; 850 // Use a mask instead of doing modulo operations for stack wrapping. 851 static const int kDepthMask = kStackSize - 1; 852 static_assert(base::bits::IsPowerOfTwo(kStackSize), 853 "kStackSize must be power of two"); 854 static inline int OffsetForDepth(int depth); 855 856 inline void PushLeft(ConsString* string); 857 inline void PushRight(ConsString* string); 858 inline void AdjustMaximumDepth(); 859 inline void Pop(); StackBlown()860 inline bool StackBlown() { return maximum_depth_ - depth_ == kStackSize; } 861 void Initialize(ConsString* cons_string, int offset); 862 String* Continue(int* offset_out); 863 String* NextLeaf(bool* blew_stack); 864 String* Search(int* offset_out); 865 866 // Stack must always contain only frames for which right traversal 867 // has not yet been performed. 868 ConsString* frames_[kStackSize]; 869 ConsString* root_; 870 int depth_; 871 int maximum_depth_; 872 int consumed_; 873 DISALLOW_COPY_AND_ASSIGN(ConsStringIterator); 874 }; 875 876 class StringCharacterStream { 877 public: 878 inline explicit StringCharacterStream(String* string, int offset = 0); 879 inline uint16_t GetNext(); 880 inline bool HasMore(); 881 inline void Reset(String* string, int offset = 0); 882 inline void VisitOneByteString(const uint8_t* chars, int length); 883 inline void VisitTwoByteString(const uint16_t* chars, int length); 884 885 private: 886 ConsStringIterator iter_; 887 bool is_one_byte_; 888 union { 889 const uint8_t* buffer8_; 890 const uint16_t* buffer16_; 891 }; 892 const uint8_t* end_; 893 DISALLOW_COPY_AND_ASSIGN(StringCharacterStream); 894 }; 895 896 } // namespace internal 897 } // namespace v8 898 899 #include "src/objects/object-macros-undef.h" 900 901 #endif // V8_OBJECTS_STRING_H_ 902