1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_OBJECTS_STRING_H_
6 #define V8_OBJECTS_STRING_H_
7 
8 #include <memory>
9 
10 #include "src/base/bits.h"
11 #include "src/base/export-template.h"
12 #include "src/base/strings.h"
13 #include "src/common/globals.h"
14 #include "src/objects/instance-type.h"
15 #include "src/objects/name.h"
16 #include "src/objects/smi.h"
17 #include "src/strings/unicode-decoder.h"
18 
19 // Has to be the last include (doesn't have include guards):
20 #include "src/objects/object-macros.h"
21 
22 namespace v8 {
23 namespace internal {
24 
25 class SharedStringAccessGuardIfNeeded;
26 
27 enum InstanceType : uint16_t;
28 
29 enum AllowNullsFlag { ALLOW_NULLS, DISALLOW_NULLS };
30 enum RobustnessFlag { ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL };
31 
32 // The characteristics of a string are stored in its map.  Retrieving these
33 // few bits of information is moderately expensive, involving two memory
34 // loads where the second is dependent on the first.  To improve efficiency
35 // the shape of the string is given its own class so that it can be retrieved
36 // once and used for several string operations.  A StringShape is small enough
37 // to be passed by value and is immutable, but be aware that flattening a
38 // string can potentially alter its shape.  Also be aware that a GC caused by
39 // something else can alter the shape of a string due to ConsString
40 // shortcutting.  Keeping these restrictions in mind has proven to be error-
41 // prone and so we no longer put StringShapes in variables unless there is a
42 // concrete performance benefit at that particular point in the code.
43 class StringShape {
44  public:
45   inline explicit StringShape(const String s);
46   inline explicit StringShape(const String s, PtrComprCageBase cage_base);
47   inline explicit StringShape(Map s);
48   inline explicit StringShape(InstanceType t);
49   inline bool IsSequential() const;
50   inline bool IsExternal() const;
51   inline bool IsCons() const;
52   inline bool IsSliced() const;
53   inline bool IsThin() const;
54   inline bool IsIndirect() const;
55   inline bool IsUncachedExternal() const;
56   inline bool IsExternalOneByte() const;
57   inline bool IsExternalTwoByte() const;
58   inline bool IsSequentialOneByte() const;
59   inline bool IsSequentialTwoByte() const;
60   inline bool IsInternalized() const;
61   inline StringRepresentationTag representation_tag() const;
62   inline uint32_t encoding_tag() const;
63   inline uint32_t full_representation_tag() const;
64 #ifdef DEBUG
type()65   inline uint32_t type() const { return type_; }
invalidate()66   inline void invalidate() { valid_ = false; }
valid()67   inline bool valid() const { return valid_; }
68 #else
invalidate()69   inline void invalidate() {}
70 #endif
71 
72   // Run different behavior for each concrete string class type, as defined by
73   // the dispatcher.
74   template <typename TDispatcher, typename TResult, typename... TArgs>
75   inline TResult DispatchToSpecificTypeWithoutCast(TArgs&&... args);
76   template <typename TDispatcher, typename TResult, typename... TArgs>
77   inline TResult DispatchToSpecificType(String str, TArgs&&... args);
78 
79  private:
80   uint32_t type_;
81 #ifdef DEBUG
set_valid()82   inline void set_valid() { valid_ = true; }
83   bool valid_;
84 #else
set_valid()85   inline void set_valid() {}
86 #endif
87 };
88 
89 #include "torque-generated/src/objects/string-tq.inc"
90 
91 // The String abstract class captures JavaScript string values:
92 //
93 // Ecma-262:
94 //  4.3.16 String Value
95 //    A string value is a member of the type String and is a finite
96 //    ordered sequence of zero or more 16-bit unsigned integer values.
97 //
98 // All string values have a length field.
99 class String : public TorqueGeneratedString<String, Name> {
100  public:
101   enum Encoding { ONE_BYTE_ENCODING, TWO_BYTE_ENCODING };
102 
103   // Representation of the flat content of a String.
104   // A non-flat string doesn't have flat content.
105   // A flat string has content that's encoded as a sequence of either
106   // one-byte chars or two-byte UC16.
107   // Returned by String::GetFlatContent().
108   // Not safe to use from concurrent background threads.
109   // TODO(solanes): Move FlatContent into FlatStringReader, and make it private.
110   // This would de-duplicate code, as well as taking advantage of the fact that
111   // FlatStringReader is relocatable.
112   class FlatContent {
113    public:
114     // Returns true if the string is flat and this structure contains content.
IsFlat()115     bool IsFlat() const { return state_ != NON_FLAT; }
116     // Returns true if the structure contains one-byte content.
IsOneByte()117     bool IsOneByte() const { return state_ == ONE_BYTE; }
118     // Returns true if the structure contains two-byte content.
IsTwoByte()119     bool IsTwoByte() const { return state_ == TWO_BYTE; }
120 
121     // Return the one byte content of the string. Only use if IsOneByte()
122     // returns true.
ToOneByteVector()123     base::Vector<const uint8_t> ToOneByteVector() const {
124       DCHECK_EQ(ONE_BYTE, state_);
125       return base::Vector<const uint8_t>(onebyte_start, length_);
126     }
127     // Return the two-byte content of the string. Only use if IsTwoByte()
128     // returns true.
ToUC16Vector()129     base::Vector<const base::uc16> ToUC16Vector() const {
130       DCHECK_EQ(TWO_BYTE, state_);
131       return base::Vector<const base::uc16>(twobyte_start, length_);
132     }
133 
Get(int i)134     base::uc16 Get(int i) const {
135       DCHECK(i < length_);
136       DCHECK(state_ != NON_FLAT);
137       if (state_ == ONE_BYTE) return onebyte_start[i];
138       return twobyte_start[i];
139     }
140 
UsesSameString(const FlatContent & other)141     bool UsesSameString(const FlatContent& other) const {
142       return onebyte_start == other.onebyte_start;
143     }
144 
145    private:
146     enum State { NON_FLAT, ONE_BYTE, TWO_BYTE };
147 
148     // Constructors only used by String::GetFlatContent().
FlatContent(const uint8_t * start,int length,const DisallowGarbageCollection & no_gc)149     FlatContent(const uint8_t* start, int length,
150                 const DisallowGarbageCollection& no_gc)
151         : onebyte_start(start),
152           length_(length),
153           state_(ONE_BYTE),
154           no_gc_(no_gc) {}
FlatContent(const base::uc16 * start,int length,const DisallowGarbageCollection & no_gc)155     FlatContent(const base::uc16* start, int length,
156                 const DisallowGarbageCollection& no_gc)
157         : twobyte_start(start),
158           length_(length),
159           state_(TWO_BYTE),
160           no_gc_(no_gc) {}
FlatContent(const DisallowGarbageCollection & no_gc)161     explicit FlatContent(const DisallowGarbageCollection& no_gc)
162         : onebyte_start(nullptr), length_(0), state_(NON_FLAT), no_gc_(no_gc) {}
163 
164     union {
165       const uint8_t* onebyte_start;
166       const base::uc16* twobyte_start;
167     };
168     int length_;
169     State state_;
170     const DisallowGarbageCollection& no_gc_;
171 
172     friend class String;
173     friend class IterableSubString;
174   };
175 
176   template <typename IsolateT>
177   void MakeThin(IsolateT* isolate, String canonical);
178 
179   template <typename Char>
180   V8_INLINE base::Vector<const Char> GetCharVector(
181       const DisallowGarbageCollection& no_gc);
182 
183   // Get chars from sequential or external strings. May only be called when a
184   // SharedStringAccessGuard is not needed (i.e. on the main thread or on
185   // read-only strings).
186   template <typename Char>
187   inline const Char* GetChars(PtrComprCageBase cage_base,
188                               const DisallowGarbageCollection& no_gc) const;
189 
190   // Get chars from sequential or external strings.
191   template <typename Char>
192   inline const Char* GetChars(
193       PtrComprCageBase cage_base, const DisallowGarbageCollection& no_gc,
194       const SharedStringAccessGuardIfNeeded& access_guard) const;
195 
196   // Returns the address of the character at an offset into this string.
197   // Requires: this->IsFlat()
198   const byte* AddressOfCharacterAt(int start_index,
199                                    const DisallowGarbageCollection& no_gc);
200 
201   // Forward declare the non-atomic (set_)length defined in torque.
202   using TorqueGeneratedString::length;
203   using TorqueGeneratedString::set_length;
204   DECL_RELEASE_ACQUIRE_INT_ACCESSORS(length)
205 
206   // Returns whether this string has only one-byte chars, i.e. all of them can
207   // be one-byte encoded.  This might be the case even if the string is
208   // two-byte.  Such strings may appear when the embedder prefers
209   // two-byte external representations even for one-byte data.
210   DECL_GETTER(IsOneByteRepresentation, bool)
211   DECL_GETTER(IsTwoByteRepresentation, bool)
212 
213   // Cons and slices have an encoding flag that may not represent the actual
214   // encoding of the underlying string.  This is taken into account here.
215   // This function is static because that helps it get inlined.
216   // Requires: string.IsFlat()
217   static inline bool IsOneByteRepresentationUnderneath(String string);
218 
219   // Get and set individual two byte chars in the string.
220   inline void Set(int index, uint16_t value);
221   // Get individual two byte char in the string.  Repeated calls
222   // to this method are not efficient unless the string is flat.
223   // If it is called from a background thread, the LocalIsolate version should
224   // be used.
225   V8_INLINE uint16_t Get(int index) const;
226   V8_INLINE uint16_t Get(int index, Isolate* isolate) const;
227   V8_INLINE uint16_t Get(int index, LocalIsolate* local_isolate) const;
228   // Method to pass down the access_guard. Useful for recursive calls such as
229   // ThinStrings where we go String::Get into ThinString::Get into String::Get
230   // again for the internalized string.
231   V8_INLINE uint16_t
232   Get(int index, PtrComprCageBase cage_base,
233       const SharedStringAccessGuardIfNeeded& access_guard) const;
234 
235   // ES6 section 7.1.3.1 ToNumber Applied to the String Type
236   static Handle<Object> ToNumber(Isolate* isolate, Handle<String> subject);
237 
238   // Flattens the string.  Checks first inline to see if it is
239   // necessary.  Does nothing if the string is not a cons string.
240   // Flattening allocates a sequential string with the same data as
241   // the given string and mutates the cons string to a degenerate
242   // form, where the first component is the new sequential string and
243   // the second component is the empty string.  If allocation fails,
244   // this function returns a failure.  If flattening succeeds, this
245   // function returns the sequential string that is now the first
246   // component of the cons string.
247   //
248   // Degenerate cons strings are handled specially by the garbage
249   // collector (see IsShortcutCandidate).
250 
251   static inline Handle<String> Flatten(
252       Isolate* isolate, Handle<String> string,
253       AllocationType allocation = AllocationType::kYoung);
254   static inline Handle<String> Flatten(
255       LocalIsolate* isolate, Handle<String> string,
256       AllocationType allocation = AllocationType::kYoung);
257 
258   // Tries to return the content of a flat string as a structure holding either
259   // a flat vector of char or of base::uc16.
260   // If the string isn't flat, and therefore doesn't have flat content, the
261   // returned structure will report so, and can't provide a vector of either
262   // kind.
263   V8_EXPORT_PRIVATE FlatContent
264   GetFlatContent(const DisallowGarbageCollection& no_gc);
265 
266   // Returns the parent of a sliced string or first part of a flat cons string.
267   // Requires: StringShape(this).IsIndirect() && this->IsFlat()
268   inline String GetUnderlying() const;
269 
270   // String relational comparison, implemented according to ES6 section 7.2.11
271   // Abstract Relational Comparison (step 5): The comparison of Strings uses a
272   // simple lexicographic ordering on sequences of code unit values. There is no
273   // attempt to use the more complex, semantically oriented definitions of
274   // character or string equality and collating order defined in the Unicode
275   // specification. Therefore String values that are canonically equal according
276   // to the Unicode standard could test as unequal. In effect this algorithm
277   // assumes that both Strings are already in normalized form. Also, note that
278   // for strings containing supplementary characters, lexicographic ordering on
279   // sequences of UTF-16 code unit values differs from that on sequences of code
280   // point values.
281   V8_WARN_UNUSED_RESULT static ComparisonResult Compare(Isolate* isolate,
282                                                         Handle<String> x,
283                                                         Handle<String> y);
284 
285   // Perform ES6 21.1.3.8, including checking arguments.
286   static Object IndexOf(Isolate* isolate, Handle<Object> receiver,
287                         Handle<Object> search, Handle<Object> position);
288   // Perform string match of pattern on subject, starting at start index.
289   // Caller must ensure that 0 <= start_index <= sub->length(), as this does not
290   // check any arguments.
291   static int IndexOf(Isolate* isolate, Handle<String> receiver,
292                      Handle<String> search, int start_index);
293 
294   static Object LastIndexOf(Isolate* isolate, Handle<Object> receiver,
295                             Handle<Object> search, Handle<Object> position);
296 
297   // Encapsulates logic related to a match and its capture groups as required
298   // by GetSubstitution.
299   class Match {
300    public:
301     virtual Handle<String> GetMatch() = 0;
302     virtual Handle<String> GetPrefix() = 0;
303     virtual Handle<String> GetSuffix() = 0;
304 
305     // A named capture can be unmatched (either not specified in the pattern,
306     // or specified but unmatched in the current string), or matched.
307     enum CaptureState { UNMATCHED, MATCHED };
308 
309     virtual int CaptureCount() = 0;
310     virtual bool HasNamedCaptures() = 0;
311     virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0;
312     virtual MaybeHandle<String> GetNamedCapture(Handle<String> name,
313                                                 CaptureState* state) = 0;
314 
315     virtual ~Match() = default;
316   };
317 
318   // ES#sec-getsubstitution
319   // GetSubstitution(matched, str, position, captures, replacement)
320   // Expand the $-expressions in the string and return a new string with
321   // the result.
322   // A {start_index} can be passed to specify where to start scanning the
323   // replacement string.
324   V8_WARN_UNUSED_RESULT static MaybeHandle<String> GetSubstitution(
325       Isolate* isolate, Match* match, Handle<String> replacement,
326       int start_index = 0);
327 
328   // String equality operations.
329   inline bool Equals(String other) const;
330   inline static bool Equals(Isolate* isolate, Handle<String> one,
331                             Handle<String> two);
332 
333   enum class EqualityType { kWholeString, kPrefix, kNoLengthCheck };
334 
335   // Check if this string matches the given vector of characters, either as a
336   // whole string or just a prefix.
337   //
338   // The Isolate is passed as "evidence" that this call is on the main thread,
339   // and to distiguish from the LocalIsolate overload.
340   template <EqualityType kEqType = EqualityType::kWholeString, typename Char>
341   inline bool IsEqualTo(base::Vector<const Char> str, Isolate* isolate) const;
342 
343   // Check if this string matches the given vector of characters, either as a
344   // whole string or just a prefix.
345   //
346   // This is main-thread only, like the Isolate* overload, but additionally
347   // computes the PtrComprCageBase for IsEqualToImpl.
348   template <EqualityType kEqType = EqualityType::kWholeString, typename Char>
349   inline bool IsEqualTo(base::Vector<const Char> str) const;
350 
351   // Check if this string matches the given vector of characters, either as a
352   // whole string or just a prefix.
353   //
354   // The LocalIsolate is passed to provide access to the string access lock,
355   // which is taken when reading the string's contents on a background thread.
356   template <EqualityType kEqType = EqualityType::kWholeString, typename Char>
357   inline bool IsEqualTo(base::Vector<const Char> str,
358                         LocalIsolate* isolate) const;
359 
360   V8_EXPORT_PRIVATE bool HasOneBytePrefix(base::Vector<const char> str);
361   V8_EXPORT_PRIVATE inline bool IsOneByteEqualTo(base::Vector<const char> str);
362 
363   // Return a UTF8 representation of the string.  The string is null
364   // terminated but may optionally contain nulls.  Length is returned
365   // in length_output if length_output is not a null pointer  The string
366   // should be nearly flat, otherwise the performance of this method may
367   // be very slow (quadratic in the length).  Setting robustness_flag to
368   // ROBUST_STRING_TRAVERSAL invokes behaviour that is robust  This means it
369   // handles unexpected data without causing assert failures and it does not
370   // do any heap allocations.  This is useful when printing stack traces.
371   std::unique_ptr<char[]> ToCString(AllowNullsFlag allow_nulls,
372                                     RobustnessFlag robustness_flag, int offset,
373                                     int length, int* length_output = nullptr);
374   V8_EXPORT_PRIVATE std::unique_ptr<char[]> ToCString(
375       AllowNullsFlag allow_nulls = DISALLOW_NULLS,
376       RobustnessFlag robustness_flag = FAST_STRING_TRAVERSAL,
377       int* length_output = nullptr);
378 
379   // Externalization.
380   V8_EXPORT_PRIVATE bool MakeExternal(
381       v8::String::ExternalStringResource* resource);
382   V8_EXPORT_PRIVATE bool MakeExternal(
383       v8::String::ExternalOneByteStringResource* resource);
384   bool SupportsExternalization();
385 
386   // Conversion.
387   // "array index": an index allowed by the ES spec for JSArrays.
388   inline bool AsArrayIndex(uint32_t* index);
389 
390   // This is used for calculating array indices but differs from an
391   // Array Index in the regard that this does not support the full
392   // array index range. This only supports positive numbers less than
393   // or equal to INT_MAX.
394   //
395   // String::AsArrayIndex might be a better fit if you're looking to
396   // calculate the array index.
397   //
398   // if val < 0 or val > INT_MAX, returns -1
399   // if 0 <= val <= INT_MAX, returns val
400   static int32_t ToArrayIndex(Address addr);
401 
402   // "integer index": the string is the decimal representation of an
403   // integer in the range of a size_t. Useful for TypedArray accesses.
404   inline bool AsIntegerIndex(size_t* index);
405 
406   // Trimming.
407   enum TrimMode { kTrim, kTrimStart, kTrimEnd };
408 
409   V8_EXPORT_PRIVATE void PrintOn(FILE* out);
410   V8_EXPORT_PRIVATE void PrintOn(std::ostream& out);
411 
412   // For use during stack traces.  Performs rudimentary sanity check.
413   bool LooksValid();
414 
415   // Printing utility functions.
416   // - PrintUC16 prints the raw string contents to the given stream.
417   //   Non-printable characters are formatted as hex, but otherwise the string
418   //   is printed as-is.
419   // - StringShortPrint and StringPrint have extra formatting: they add a
420   //   prefix and suffix depending on the string kind, may add other information
421   //   such as the string heap object address, may truncate long strings, etc.
422   const char* PrefixForDebugPrint() const;
423   const char* SuffixForDebugPrint() const;
424   void StringShortPrint(StringStream* accumulator);
425   void PrintUC16(std::ostream& os, int start = 0, int end = -1);
426   void PrintUC16(StringStream* accumulator, int start, int end);
427 
428   // Dispatched behavior.
429 #if defined(DEBUG) || defined(OBJECT_PRINT)
430   char* ToAsciiArray();
431 #endif
432   DECL_PRINTER(String)
433   DECL_VERIFIER(String)
434 
435   inline bool IsFlat() const;
436   inline bool IsFlat(PtrComprCageBase cage_base) const;
437 
438   // Max char codes.
439   static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar;
440   static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar;
441   static const int kMaxUtf16CodeUnit = 0xffff;
442   static const uint32_t kMaxUtf16CodeUnitU = kMaxUtf16CodeUnit;
443   static const base::uc32 kMaxCodePoint = 0x10ffff;
444 
445   // Maximal string length.
446   // The max length is different on 32 and 64 bit platforms. Max length for
447   // 32-bit platforms is ~268.4M chars. On 64-bit platforms, max length is
448   // ~536.8M chars.
449   // See include/v8.h for the definition.
450   static const int kMaxLength = v8::String::kMaxLength;
451   // There are several defining limits imposed by our current implementation:
452   // - any string's length must fit into a Smi.
453   static_assert(kMaxLength <= kSmiMaxValue,
454                 "String length must fit into a Smi");
455   // - adding two string lengths must still fit into a 32-bit int without
456   //   overflow
457   static_assert(kMaxLength * 2 <= kMaxInt,
458                 "String::kMaxLength * 2 must fit into an int32");
459   // - any heap object's size in bytes must be able to fit into a Smi, because
460   //   its space on the heap might be filled with a Filler; for strings this
461   //   means SeqTwoByteString::kMaxSize must be able to fit into a Smi.
462   static_assert(kMaxLength * 2 + kHeaderSize <= kSmiMaxValue,
463                 "String object size in bytes must fit into a Smi");
464   // - any heap object's size in bytes must be able to fit into an int, because
465   //   that's what our object handling code uses almost everywhere.
466   static_assert(kMaxLength * 2 + kHeaderSize <= kMaxInt,
467                 "String object size in bytes must fit into an int");
468 
469   // Max length for computing hash. For strings longer than this limit the
470   // string length is used as the hash value.
471   static const int kMaxHashCalcLength = 16383;
472 
473   // Limit for truncation in short printing.
474   static const int kMaxShortPrintLength = 1024;
475 
476   // Helper function for flattening strings.
477   template <typename sinkchar>
478   EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
479   static void WriteToFlat(String source, sinkchar* sink, int from, int to);
480   template <typename sinkchar>
481   static void WriteToFlat(String source, sinkchar* sink, int from, int to,
482                           PtrComprCageBase cage_base,
483                           const SharedStringAccessGuardIfNeeded&);
484 
IsAscii(const char * chars,int length)485   static inline bool IsAscii(const char* chars, int length) {
486     return IsAscii(reinterpret_cast<const uint8_t*>(chars), length);
487   }
488 
IsAscii(const uint8_t * chars,int length)489   static inline bool IsAscii(const uint8_t* chars, int length) {
490     return NonAsciiStart(chars, length) >= length;
491   }
492 
NonOneByteStart(const base::uc16 * chars,int length)493   static inline int NonOneByteStart(const base::uc16* chars, int length) {
494     DCHECK(IsAligned(reinterpret_cast<Address>(chars), sizeof(base::uc16)));
495     const uint16_t* start = chars;
496     const uint16_t* limit = chars + length;
497 
498     if (static_cast<size_t>(length) >= kUIntptrSize) {
499       // Check unaligned chars.
500       while (!IsAligned(reinterpret_cast<Address>(chars), kUIntptrSize)) {
501         if (*chars > unibrow::Latin1::kMaxChar) {
502           return static_cast<int>(chars - start);
503         }
504         ++chars;
505       }
506 
507       // Check aligned words.
508       STATIC_ASSERT(unibrow::Latin1::kMaxChar == 0xFF);
509 #ifdef V8_TARGET_LITTLE_ENDIAN
510       const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFFFF * 0xFF00;
511 #else
512       const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFFFF * 0x00FF;
513 #endif
514       while (chars + sizeof(uintptr_t) <= limit) {
515         if (*reinterpret_cast<const uintptr_t*>(chars) & non_one_byte_mask) {
516           break;
517         }
518         chars += (sizeof(uintptr_t) / sizeof(base::uc16));
519       }
520     }
521 
522     // Check remaining unaligned chars, or find non-one-byte char in word.
523     while (chars < limit) {
524       if (*chars > unibrow::Latin1::kMaxChar) {
525         return static_cast<int>(chars - start);
526       }
527       ++chars;
528     }
529 
530     return static_cast<int>(chars - start);
531   }
532 
IsOneByte(const base::uc16 * chars,int length)533   static inline bool IsOneByte(const base::uc16* chars, int length) {
534     return NonOneByteStart(chars, length) >= length;
535   }
536 
537   // May only be called when a SharedStringAccessGuard is not needed (i.e. on
538   // the main thread or on read-only strings).
539   template <class Visitor>
540   static inline ConsString VisitFlat(Visitor* visitor, String string,
541                                      int offset = 0);
542 
543   template <class Visitor>
544   static inline ConsString VisitFlat(
545       Visitor* visitor, String string, int offset,
546       const SharedStringAccessGuardIfNeeded& access_guard);
547 
548   template <typename IsolateT>
549   static Handle<FixedArray> CalculateLineEnds(IsolateT* isolate,
550                                               Handle<String> string,
551                                               bool include_ending_line);
552 
553  private:
554   friend class Name;
555   friend class StringTableInsertionKey;
556   friend class InternalizedStringKey;
557 
558   // Implementation of the Get() public methods. Do not use directly.
559   V8_INLINE uint16_t
560   GetImpl(int index, PtrComprCageBase cage_base,
561           const SharedStringAccessGuardIfNeeded& access_guard) const;
562 
563   // Implementation of the IsEqualTo() public methods. Do not use directly.
564   template <EqualityType kEqType, typename Char>
565   V8_INLINE bool IsEqualToImpl(
566       base::Vector<const Char> str, PtrComprCageBase cage_base,
567       const SharedStringAccessGuardIfNeeded& access_guard) const;
568 
569   // Out-of-line IsEqualToImpl for ConsString.
570   template <typename Char>
571   V8_NOINLINE static bool IsConsStringEqualToImpl(
572       ConsString string, int slice_offset, base::Vector<const Char> str,
573       PtrComprCageBase cage_base,
574       const SharedStringAccessGuardIfNeeded& access_guard);
575 
576   V8_EXPORT_PRIVATE static Handle<String> SlowFlatten(
577       Isolate* isolate, Handle<ConsString> cons, AllocationType allocation);
578 
579   // Slow case of String::Equals.  This implementation works on any strings
580   // but it is most efficient on strings that are almost flat.
581   V8_EXPORT_PRIVATE bool SlowEquals(String other) const;
582   V8_EXPORT_PRIVATE bool SlowEquals(
583       String other, const SharedStringAccessGuardIfNeeded&) const;
584 
585   V8_EXPORT_PRIVATE static bool SlowEquals(Isolate* isolate, Handle<String> one,
586                                            Handle<String> two);
587 
588   // Slow case of AsArrayIndex.
589   V8_EXPORT_PRIVATE bool SlowAsArrayIndex(uint32_t* index);
590   V8_EXPORT_PRIVATE bool SlowAsIntegerIndex(size_t* index);
591 
592   // Compute and set the hash code.
593   V8_EXPORT_PRIVATE uint32_t ComputeAndSetHash();
594   V8_EXPORT_PRIVATE uint32_t
595   ComputeAndSetHash(const SharedStringAccessGuardIfNeeded&);
596 
597   TQ_OBJECT_CONSTRUCTORS(String)
598 };
599 
600 // clang-format off
601 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
602 void String::WriteToFlat(String source, uint8_t* sink, int from, int to);
603 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
604 void String::WriteToFlat(String source, uint16_t* sink, int from, int to);
605 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
606 void String::WriteToFlat(String source, uint8_t* sink, int from, int to,
607                          PtrComprCageBase cage_base,
608                          const SharedStringAccessGuardIfNeeded&);
609 extern template EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE)
610 void String::WriteToFlat(String source, uint16_t* sink, int from, int to,
611                          PtrComprCageBase cage_base,
612                          const SharedStringAccessGuardIfNeeded&);
613 // clang-format on
614 
615 class SubStringRange {
616  public:
617   inline SubStringRange(String string, const DisallowGarbageCollection& no_gc,
618                         int first = 0, int length = -1);
619   class iterator;
620   inline iterator begin();
621   inline iterator end();
622 
623  private:
624   String string_;
625   int first_;
626   int length_;
627   const DisallowGarbageCollection& no_gc_;
628 };
629 
630 // The SeqString abstract class captures sequential string values.
631 class SeqString : public TorqueGeneratedSeqString<SeqString, String> {
632  public:
633   // Truncate the string in-place if possible and return the result.
634   // In case of new_length == 0, the empty string is returned without
635   // truncating the original string.
636   V8_WARN_UNUSED_RESULT static Handle<String> Truncate(Handle<SeqString> string,
637                                                        int new_length);
638 
639   TQ_OBJECT_CONSTRUCTORS(SeqString)
640 };
641 
642 class InternalizedString
643     : public TorqueGeneratedInternalizedString<InternalizedString, String> {
644  public:
645   // TODO(neis): Possibly move some stuff from String here.
646 
647   TQ_OBJECT_CONSTRUCTORS(InternalizedString)
648 };
649 
650 // The OneByteString class captures sequential one-byte string objects.
651 // Each character in the OneByteString is an one-byte character.
652 class SeqOneByteString
653     : public TorqueGeneratedSeqOneByteString<SeqOneByteString, SeqString> {
654  public:
655   static const bool kHasOneByteEncoding = true;
656   using Char = uint8_t;
657 
658   // Dispatched behavior. The non SharedStringAccessGuardIfNeeded method is also
659   // defined for convenience and it will check that the access guard is not
660   // needed.
661   inline uint8_t Get(int index) const;
662   inline uint8_t Get(int index, PtrComprCageBase cage_base,
663                      const SharedStringAccessGuardIfNeeded& access_guard) const;
664   inline void SeqOneByteStringSet(int index, uint16_t value);
665 
666   // Get the address of the characters in this string.
667   inline Address GetCharsAddress() const;
668 
669   // Get a pointer to the characters of the string. May only be called when a
670   // SharedStringAccessGuard is not needed (i.e. on the main thread or on
671   // read-only strings).
672   inline uint8_t* GetChars(const DisallowGarbageCollection& no_gc) const;
673 
674   // Get a pointer to the characters of the string.
675   inline uint8_t* GetChars(
676       const DisallowGarbageCollection& no_gc,
677       const SharedStringAccessGuardIfNeeded& access_guard) const;
678 
679   // Clear uninitialized padding space. This ensures that the snapshot content
680   // is deterministic.
681   void clear_padding();
682 
683   // Garbage collection support.  This method is called by the
684   // garbage collector to compute the actual size of an OneByteString
685   // instance.
686   inline int SeqOneByteStringSize(InstanceType instance_type);
687 
688   // Maximal memory usage for a single sequential one-byte string.
689   static const int kMaxCharsSize = kMaxLength;
690   static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
691   STATIC_ASSERT((kMaxSize - kHeaderSize) >= String::kMaxLength);
692 
693   int AllocatedSize();
694 
695   class BodyDescriptor;
696 
697   TQ_OBJECT_CONSTRUCTORS(SeqOneByteString)
698 };
699 
700 // The TwoByteString class captures sequential unicode string objects.
701 // Each character in the TwoByteString is a two-byte uint16_t.
702 class SeqTwoByteString
703     : public TorqueGeneratedSeqTwoByteString<SeqTwoByteString, SeqString> {
704  public:
705   static const bool kHasOneByteEncoding = false;
706   using Char = uint16_t;
707 
708   // Dispatched behavior.
709   inline uint16_t Get(
710       int index, PtrComprCageBase cage_base,
711       const SharedStringAccessGuardIfNeeded& access_guard) const;
712   inline void SeqTwoByteStringSet(int index, uint16_t value);
713 
714   // Get the address of the characters in this string.
715   inline Address GetCharsAddress() const;
716 
717   // Get a pointer to the characters of the string. May only be called when a
718   // SharedStringAccessGuard is not needed (i.e. on the main thread or on
719   // read-only strings).
720   inline base::uc16* GetChars(const DisallowGarbageCollection& no_gc) const;
721 
722   // Get a pointer to the characters of the string.
723   inline base::uc16* GetChars(
724       const DisallowGarbageCollection& no_gc,
725       const SharedStringAccessGuardIfNeeded& access_guard) const;
726 
727   // Clear uninitialized padding space. This ensures that the snapshot content
728   // is deterministic.
729   void clear_padding();
730 
731   // Garbage collection support.  This method is called by the
732   // garbage collector to compute the actual size of a TwoByteString
733   // instance.
734   inline int SeqTwoByteStringSize(InstanceType instance_type);
735 
736   // Maximal memory usage for a single sequential two-byte string.
737   static const int kMaxCharsSize = kMaxLength * 2;
738   static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
739   STATIC_ASSERT(static_cast<int>((kMaxSize - kHeaderSize) / sizeof(uint16_t)) >=
740                 String::kMaxLength);
741 
742   int AllocatedSize();
743 
744   class BodyDescriptor;
745 
746   TQ_OBJECT_CONSTRUCTORS(SeqTwoByteString)
747 };
748 
749 // The ConsString class describes string values built by using the
750 // addition operator on strings.  A ConsString is a pair where the
751 // first and second components are pointers to other string values.
752 // One or both components of a ConsString can be pointers to other
753 // ConsStrings, creating a binary tree of ConsStrings where the leaves
754 // are non-ConsString string values.  The string value represented by
755 // a ConsString can be obtained by concatenating the leaf string
756 // values in a left-to-right depth-first traversal of the tree.
757 class ConsString : public TorqueGeneratedConsString<ConsString, String> {
758  public:
759   // Doesn't check that the result is a string, even in debug mode.  This is
760   // useful during GC where the mark bits confuse the checks.
761   inline Object unchecked_first();
762 
763   // Doesn't check that the result is a string, even in debug mode.  This is
764   // useful during GC where the mark bits confuse the checks.
765   inline Object unchecked_second();
766 
767   // Dispatched behavior.
768   V8_EXPORT_PRIVATE uint16_t
769   Get(int index, PtrComprCageBase cage_base,
770       const SharedStringAccessGuardIfNeeded& access_guard) const;
771 
772   // Minimum length for a cons string.
773   static const int kMinLength = 13;
774 
775   class BodyDescriptor;
776 
777   DECL_VERIFIER(ConsString)
778 
779   TQ_OBJECT_CONSTRUCTORS(ConsString)
780 };
781 
782 // The ThinString class describes string objects that are just references
783 // to another string object. They are used for in-place internalization when
784 // the original string cannot actually be internalized in-place: in these
785 // cases, the original string is converted to a ThinString pointing at its
786 // internalized version (which is allocated as a new object).
787 // In terms of memory layout and most algorithms operating on strings,
788 // ThinStrings can be thought of as "one-part cons strings".
789 class ThinString : public TorqueGeneratedThinString<ThinString, String> {
790  public:
791   DECL_GETTER(unchecked_actual, HeapObject)
792 
793   V8_EXPORT_PRIVATE uint16_t
794   Get(int index, PtrComprCageBase cage_base,
795       const SharedStringAccessGuardIfNeeded& access_guard) const;
796 
797   DECL_VERIFIER(ThinString)
798 
799   class BodyDescriptor;
800 
801   TQ_OBJECT_CONSTRUCTORS(ThinString)
802 };
803 
804 // The Sliced String class describes strings that are substrings of another
805 // sequential string.  The motivation is to save time and memory when creating
806 // a substring.  A Sliced String is described as a pointer to the parent,
807 // the offset from the start of the parent string and the length.  Using
808 // a Sliced String therefore requires unpacking of the parent string and
809 // adding the offset to the start address.  A substring of a Sliced String
810 // are not nested since the double indirection is simplified when creating
811 // such a substring.
812 // Currently missing features are:
813 //  - truncating sliced string to enable otherwise unneeded parent to be GC'ed.
814 class SlicedString : public TorqueGeneratedSlicedString<SlicedString, String> {
815  public:
816   inline void set_parent(String parent,
817                          WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
818   // Dispatched behavior.
819   V8_EXPORT_PRIVATE uint16_t
820   Get(int index, PtrComprCageBase cage_base,
821       const SharedStringAccessGuardIfNeeded& access_guard) const;
822 
823   // Minimum length for a sliced string.
824   static const int kMinLength = 13;
825 
826   class BodyDescriptor;
827 
828   DECL_VERIFIER(SlicedString)
829 
830   TQ_OBJECT_CONSTRUCTORS(SlicedString)
831 };
832 
833 // The ExternalString class describes string values that are backed by
834 // a string resource that lies outside the V8 heap.  ExternalStrings
835 // consist of the length field common to all strings, a pointer to the
836 // external resource.  It is important to ensure (externally) that the
837 // resource is not deallocated while the ExternalString is live in the
838 // V8 heap.
839 //
840 // The API expects that all ExternalStrings are created through the
841 // API.  Therefore, ExternalStrings should not be used internally.
842 class ExternalString
843     : public TorqueGeneratedExternalString<ExternalString, String> {
844  public:
845   DECL_VERIFIER(ExternalString)
846 
847   // Size of uncached external strings.
848   static const int kUncachedSize =
849       kResourceOffset + FIELD_SIZE(kResourceOffset);
850 
851   inline void AllocateExternalPointerEntries(Isolate* isolate);
852 
853   // Return whether the external string data pointer is not cached.
854   inline bool is_uncached() const;
855   // Size in bytes of the external payload.
856   int ExternalPayloadSize() const;
857 
858   // Used in the serializer/deserializer.
859   DECL_GETTER(resource_as_address, Address)
860   inline void set_address_as_resource(Isolate* isolate, Address address);
861   inline uint32_t GetResourceRefForDeserialization();
862   inline void SetResourceRefForSerialization(uint32_t ref);
863 
864   // Disposes string's resource object if it has not already been disposed.
865   inline void DisposeResource(Isolate* isolate);
866 
867   STATIC_ASSERT(kResourceOffset == Internals::kStringResourceOffset);
868   static const int kSizeOfAllExternalStrings = kHeaderSize;
869 
870  private:
871   // Hide generated accessors.
872   DECL_ACCESSORS(resource, void*)
873   DECL_ACCESSORS(resource_data, void*)
874 
875   TQ_OBJECT_CONSTRUCTORS(ExternalString)
876 };
877 
878 // The ExternalOneByteString class is an external string backed by an
879 // one-byte string.
880 class ExternalOneByteString
881     : public TorqueGeneratedExternalOneByteString<ExternalOneByteString,
882                                                   ExternalString> {
883  public:
884   static const bool kHasOneByteEncoding = true;
885 
886   using Resource = v8::String::ExternalOneByteStringResource;
887 
888   // The underlying resource.
889   DECL_GETTER(resource, const Resource*)
890 
891   // It is assumed that the previous resource is null. If it is not null, then
892   // it is the responsability of the caller the handle the previous resource.
893   inline void SetResource(Isolate* isolate, const Resource* buffer);
894 
895   // Used only during serialization.
896   inline void set_resource(Isolate* isolate, const Resource* buffer);
897 
898   // Update the pointer cache to the external character array.
899   // The cached pointer is always valid, as the external character array does =
900   // not move during lifetime.  Deserialization is the only exception, after
901   // which the pointer cache has to be refreshed.
902   inline void update_data_cache(Isolate* isolate);
903 
904   inline const uint8_t* GetChars(PtrComprCageBase cage_base) const;
905 
906   // Dispatched behavior.
907   inline uint8_t Get(int index, PtrComprCageBase cage_base,
908                      const SharedStringAccessGuardIfNeeded& access_guard) const;
909 
910   class BodyDescriptor;
911 
912   STATIC_ASSERT(kSize == kSizeOfAllExternalStrings);
913 
914   TQ_OBJECT_CONSTRUCTORS(ExternalOneByteString)
915 
916  private:
917   // The underlying resource as a non-const pointer.
918   DECL_GETTER(mutable_resource, Resource*)
919 };
920 
921 // The ExternalTwoByteString class is an external string backed by a UTF-16
922 // encoded string.
923 class ExternalTwoByteString
924     : public TorqueGeneratedExternalTwoByteString<ExternalTwoByteString,
925                                                   ExternalString> {
926  public:
927   static const bool kHasOneByteEncoding = false;
928 
929   using Resource = v8::String::ExternalStringResource;
930 
931   // The underlying string resource.
932   DECL_GETTER(resource, const Resource*)
933 
934   // It is assumed that the previous resource is null. If it is not null, then
935   // it is the responsability of the caller the handle the previous resource.
936   inline void SetResource(Isolate* isolate, const Resource* buffer);
937 
938   // Used only during serialization.
939   inline void set_resource(Isolate* isolate, const Resource* buffer);
940 
941   // Update the pointer cache to the external character array.
942   // The cached pointer is always valid, as the external character array does =
943   // not move during lifetime.  Deserialization is the only exception, after
944   // which the pointer cache has to be refreshed.
945   inline void update_data_cache(Isolate* isolate);
946 
947   inline const uint16_t* GetChars(PtrComprCageBase cage_base) const;
948 
949   // Dispatched behavior.
950   inline uint16_t Get(
951       int index, PtrComprCageBase cage_base,
952       const SharedStringAccessGuardIfNeeded& access_guard) const;
953 
954   // For regexp code.
955   inline const uint16_t* ExternalTwoByteStringGetData(unsigned start);
956 
957   class BodyDescriptor;
958 
959   STATIC_ASSERT(kSize == kSizeOfAllExternalStrings);
960 
961   TQ_OBJECT_CONSTRUCTORS(ExternalTwoByteString)
962 
963  private:
964   // The underlying resource as a non-const pointer.
965   DECL_GETTER(mutable_resource, Resource*)
966 };
967 
968 // A flat string reader provides random access to the contents of a
969 // string independent of the character width of the string. The handle
970 // must be valid as long as the reader is being used.
971 // Not safe to use from concurrent background threads.
972 class V8_EXPORT_PRIVATE FlatStringReader : public Relocatable {
973  public:
974   FlatStringReader(Isolate* isolate, Handle<String> str);
975   void PostGarbageCollection() override;
976   inline base::uc32 Get(int index) const;
977   template <typename Char>
978   inline Char Get(int index) const;
979   int length() { return length_; }
980 
981  private:
982   Handle<String> str_;
983   bool is_one_byte_;
984   int length_;
985   const void* start_;
986 };
987 
988 // This maintains an off-stack representation of the stack frames required
989 // to traverse a ConsString, allowing an entirely iterative and restartable
990 // traversal of the entire string
991 class ConsStringIterator {
992  public:
993   inline ConsStringIterator() = default;
994   inline explicit ConsStringIterator(ConsString cons_string, int offset = 0) {
995     Reset(cons_string, offset);
996   }
997   ConsStringIterator(const ConsStringIterator&) = delete;
998   ConsStringIterator& operator=(const ConsStringIterator&) = delete;
999   inline void Reset(ConsString cons_string, int offset = 0) {
1000     depth_ = 0;
1001     // Next will always return nullptr.
1002     if (cons_string.is_null()) return;
1003     Initialize(cons_string, offset);
1004   }
1005   // Returns nullptr when complete.
1006   inline String Next(int* offset_out) {
1007     *offset_out = 0;
1008     if (depth_ == 0) return String();
1009     return Continue(offset_out);
1010   }
1011 
1012  private:
1013   static const int kStackSize = 32;
1014   // Use a mask instead of doing modulo operations for stack wrapping.
1015   static const int kDepthMask = kStackSize - 1;
1016   static_assert(base::bits::IsPowerOfTwo(kStackSize),
1017                 "kStackSize must be power of two");
1018   static inline int OffsetForDepth(int depth);
1019 
1020   inline void PushLeft(ConsString string);
1021   inline void PushRight(ConsString string);
1022   inline void AdjustMaximumDepth();
1023   inline void Pop();
1024   inline bool StackBlown() { return maximum_depth_ - depth_ == kStackSize; }
1025   V8_EXPORT_PRIVATE void Initialize(ConsString cons_string, int offset);
1026   V8_EXPORT_PRIVATE String Continue(int* offset_out);
1027   String NextLeaf(bool* blew_stack);
1028   String Search(int* offset_out);
1029 
1030   // Stack must always contain only frames for which right traversal
1031   // has not yet been performed.
1032   ConsString frames_[kStackSize];
1033   ConsString root_;
1034   int depth_;
1035   int maximum_depth_;
1036   int consumed_;
1037 };
1038 
1039 class StringCharacterStream;
1040 
1041 template <typename Char>
1042 struct CharTraits;
1043 
1044 template <>
1045 struct CharTraits<uint8_t> {
1046   using String = SeqOneByteString;
1047   using ExternalString = ExternalOneByteString;
1048 };
1049 
1050 template <>
1051 struct CharTraits<uint16_t> {
1052   using String = SeqTwoByteString;
1053   using ExternalString = ExternalTwoByteString;
1054 };
1055 
1056 }  // namespace internal
1057 }  // namespace v8
1058 
1059 #include "src/objects/object-macros-undef.h"
1060 
1061 #endif  // V8_OBJECTS_STRING_H_
1062