1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_STRING_BUILDER_H_
6 #define V8_STRING_BUILDER_H_
7 
8 #include "src/assert-scope.h"
9 #include "src/handles.h"
10 #include "src/heap/factory.h"
11 #include "src/isolate.h"
12 #include "src/objects.h"
13 #include "src/utils.h"
14 
15 namespace v8 {
16 namespace internal {
17 
18 const int kStringBuilderConcatHelperLengthBits = 11;
19 const int kStringBuilderConcatHelperPositionBits = 19;
20 
21 typedef BitField<int, 0, kStringBuilderConcatHelperLengthBits>
22     StringBuilderSubstringLength;
23 typedef BitField<int, kStringBuilderConcatHelperLengthBits,
24                  kStringBuilderConcatHelperPositionBits>
25     StringBuilderSubstringPosition;
26 
27 
28 template <typename sinkchar>
StringBuilderConcatHelper(String * special,sinkchar * sink,FixedArray * fixed_array,int array_length)29 static inline void StringBuilderConcatHelper(String* special, sinkchar* sink,
30                                              FixedArray* fixed_array,
31                                              int array_length) {
32   DisallowHeapAllocation no_gc;
33   int position = 0;
34   for (int i = 0; i < array_length; i++) {
35     Object* element = fixed_array->get(i);
36     if (element->IsSmi()) {
37       // Smi encoding of position and length.
38       int encoded_slice = Smi::ToInt(element);
39       int pos;
40       int len;
41       if (encoded_slice > 0) {
42         // Position and length encoded in one smi.
43         pos = StringBuilderSubstringPosition::decode(encoded_slice);
44         len = StringBuilderSubstringLength::decode(encoded_slice);
45       } else {
46         // Position and length encoded in two smis.
47         Object* obj = fixed_array->get(++i);
48         DCHECK(obj->IsSmi());
49         pos = Smi::ToInt(obj);
50         len = -encoded_slice;
51       }
52       String::WriteToFlat(special, sink + position, pos, pos + len);
53       position += len;
54     } else {
55       String* string = String::cast(element);
56       int element_length = string->length();
57       String::WriteToFlat(string, sink + position, 0, element_length);
58       position += element_length;
59     }
60   }
61 }
62 
63 
64 // Returns the result length of the concatenation.
65 // On illegal argument, -1 is returned.
StringBuilderConcatLength(int special_length,FixedArray * fixed_array,int array_length,bool * one_byte)66 static inline int StringBuilderConcatLength(int special_length,
67                                             FixedArray* fixed_array,
68                                             int array_length, bool* one_byte) {
69   DisallowHeapAllocation no_gc;
70   int position = 0;
71   for (int i = 0; i < array_length; i++) {
72     int increment = 0;
73     Object* elt = fixed_array->get(i);
74     if (elt->IsSmi()) {
75       // Smi encoding of position and length.
76       int smi_value = Smi::ToInt(elt);
77       int pos;
78       int len;
79       if (smi_value > 0) {
80         // Position and length encoded in one smi.
81         pos = StringBuilderSubstringPosition::decode(smi_value);
82         len = StringBuilderSubstringLength::decode(smi_value);
83       } else {
84         // Position and length encoded in two smis.
85         len = -smi_value;
86         // Get the position and check that it is a positive smi.
87         i++;
88         if (i >= array_length) return -1;
89         Object* next_smi = fixed_array->get(i);
90         if (!next_smi->IsSmi()) return -1;
91         pos = Smi::ToInt(next_smi);
92         if (pos < 0) return -1;
93       }
94       DCHECK_GE(pos, 0);
95       DCHECK_GE(len, 0);
96       if (pos > special_length || len > special_length - pos) return -1;
97       increment = len;
98     } else if (elt->IsString()) {
99       String* element = String::cast(elt);
100       int element_length = element->length();
101       increment = element_length;
102       if (*one_byte && !element->HasOnlyOneByteChars()) {
103         *one_byte = false;
104       }
105     } else {
106       return -1;
107     }
108     if (increment > String::kMaxLength - position) {
109       return kMaxInt;  // Provoke throw on allocation.
110     }
111     position += increment;
112   }
113   return position;
114 }
115 
116 
117 class FixedArrayBuilder {
118  public:
FixedArrayBuilder(Isolate * isolate,int initial_capacity)119   explicit FixedArrayBuilder(Isolate* isolate, int initial_capacity)
120       : array_(isolate->factory()->NewFixedArrayWithHoles(initial_capacity)),
121         length_(0),
122         has_non_smi_elements_(false) {
123     // Require a non-zero initial size. Ensures that doubling the size to
124     // extend the array will work.
125     DCHECK_GT(initial_capacity, 0);
126   }
127 
FixedArrayBuilder(Handle<FixedArray> backing_store)128   explicit FixedArrayBuilder(Handle<FixedArray> backing_store)
129       : array_(backing_store), length_(0), has_non_smi_elements_(false) {
130     // Require a non-zero initial size. Ensures that doubling the size to
131     // extend the array will work.
132     DCHECK_GT(backing_store->length(), 0);
133   }
134 
HasCapacity(int elements)135   bool HasCapacity(int elements) {
136     int length = array_->length();
137     int required_length = length_ + elements;
138     return (length >= required_length);
139   }
140 
EnsureCapacity(int elements)141   void EnsureCapacity(int elements) {
142     int length = array_->length();
143     int required_length = length_ + elements;
144     if (length < required_length) {
145       int new_length = length;
146       do {
147         new_length *= 2;
148       } while (new_length < required_length);
149       Handle<FixedArray> extended_array =
150           array_->GetIsolate()->factory()->NewFixedArrayWithHoles(new_length);
151       array_->CopyTo(0, *extended_array, 0, length_);
152       array_ = extended_array;
153     }
154   }
155 
Add(Object * value)156   void Add(Object* value) {
157     DCHECK(!value->IsSmi());
158     DCHECK(length_ < capacity());
159     array_->set(length_, value);
160     length_++;
161     has_non_smi_elements_ = true;
162   }
163 
Add(Smi * value)164   void Add(Smi* value) {
165     DCHECK(value->IsSmi());
166     DCHECK(length_ < capacity());
167     array_->set(length_, value);
168     length_++;
169   }
170 
array()171   Handle<FixedArray> array() { return array_; }
172 
length()173   int length() { return length_; }
174 
capacity()175   int capacity() { return array_->length(); }
176 
ToJSArray(Handle<JSArray> target_array)177   Handle<JSArray> ToJSArray(Handle<JSArray> target_array) {
178     JSArray::SetContent(target_array, array_);
179     target_array->set_length(Smi::FromInt(length_));
180     return target_array;
181   }
182 
183  private:
184   Handle<FixedArray> array_;
185   int length_;
186   bool has_non_smi_elements_;
187 };
188 
189 
190 class ReplacementStringBuilder {
191  public:
ReplacementStringBuilder(Heap * heap,Handle<String> subject,int estimated_part_count)192   ReplacementStringBuilder(Heap* heap, Handle<String> subject,
193                            int estimated_part_count)
194       : heap_(heap),
195         array_builder_(heap->isolate(), estimated_part_count),
196         subject_(subject),
197         character_count_(0),
198         is_one_byte_(subject->IsOneByteRepresentation()) {
199     // Require a non-zero initial size. Ensures that doubling the size to
200     // extend the array will work.
201     DCHECK_GT(estimated_part_count, 0);
202   }
203 
AddSubjectSlice(FixedArrayBuilder * builder,int from,int to)204   static inline void AddSubjectSlice(FixedArrayBuilder* builder, int from,
205                                      int to) {
206     DCHECK_GE(from, 0);
207     int length = to - from;
208     DCHECK_GT(length, 0);
209     if (StringBuilderSubstringLength::is_valid(length) &&
210         StringBuilderSubstringPosition::is_valid(from)) {
211       int encoded_slice = StringBuilderSubstringLength::encode(length) |
212                           StringBuilderSubstringPosition::encode(from);
213       builder->Add(Smi::FromInt(encoded_slice));
214     } else {
215       // Otherwise encode as two smis.
216       builder->Add(Smi::FromInt(-length));
217       builder->Add(Smi::FromInt(from));
218     }
219   }
220 
221 
EnsureCapacity(int elements)222   void EnsureCapacity(int elements) { array_builder_.EnsureCapacity(elements); }
223 
224 
AddSubjectSlice(int from,int to)225   void AddSubjectSlice(int from, int to) {
226     AddSubjectSlice(&array_builder_, from, to);
227     IncrementCharacterCount(to - from);
228   }
229 
230 
AddString(Handle<String> string)231   void AddString(Handle<String> string) {
232     int length = string->length();
233     DCHECK_GT(length, 0);
234     AddElement(*string);
235     if (!string->IsOneByteRepresentation()) {
236       is_one_byte_ = false;
237     }
238     IncrementCharacterCount(length);
239   }
240 
241 
242   MaybeHandle<String> ToString();
243 
244 
IncrementCharacterCount(int by)245   void IncrementCharacterCount(int by) {
246     if (character_count_ > String::kMaxLength - by) {
247       STATIC_ASSERT(String::kMaxLength < kMaxInt);
248       character_count_ = kMaxInt;
249     } else {
250       character_count_ += by;
251     }
252   }
253 
254  private:
AddElement(Object * element)255   void AddElement(Object* element) {
256     DCHECK(element->IsSmi() || element->IsString());
257     DCHECK(array_builder_.capacity() > array_builder_.length());
258     array_builder_.Add(element);
259   }
260 
261   Heap* heap_;
262   FixedArrayBuilder array_builder_;
263   Handle<String> subject_;
264   int character_count_;
265   bool is_one_byte_;
266 };
267 
268 
269 class IncrementalStringBuilder {
270  public:
271   explicit IncrementalStringBuilder(Isolate* isolate);
272 
INLINE(String::Encoding CurrentEncoding ())273   INLINE(String::Encoding CurrentEncoding()) { return encoding_; }
274 
275   template <typename SrcChar, typename DestChar>
276   INLINE(void Append(SrcChar c));
277 
INLINE(void AppendCharacter (uint8_t c))278   INLINE(void AppendCharacter(uint8_t c)) {
279     if (encoding_ == String::ONE_BYTE_ENCODING) {
280       Append<uint8_t, uint8_t>(c);
281     } else {
282       Append<uint8_t, uc16>(c);
283     }
284   }
285 
INLINE(void AppendCString (const char * s))286   INLINE(void AppendCString(const char* s)) {
287     const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
288     if (encoding_ == String::ONE_BYTE_ENCODING) {
289       while (*u != '\0') Append<uint8_t, uint8_t>(*(u++));
290     } else {
291       while (*u != '\0') Append<uint8_t, uc16>(*(u++));
292     }
293   }
294 
INLINE(void AppendCString (const uc16 * s))295   INLINE(void AppendCString(const uc16* s)) {
296     if (encoding_ == String::ONE_BYTE_ENCODING) {
297       while (*s != '\0') Append<uc16, uint8_t>(*(s++));
298     } else {
299       while (*s != '\0') Append<uc16, uc16>(*(s++));
300     }
301   }
302 
INLINE(bool CurrentPartCanFit (int length))303   INLINE(bool CurrentPartCanFit(int length)) {
304     return part_length_ - current_index_ > length;
305   }
306 
307   // We make a rough estimate to find out if the current string can be
308   // serialized without allocating a new string part. The worst case length of
309   // an escaped character is 6. Shifting the remaining string length right by 3
310   // is a more pessimistic estimate, but faster to calculate.
INLINE(int EscapedLengthIfCurrentPartFits (int length))311   INLINE(int EscapedLengthIfCurrentPartFits(int length)) {
312     if (length > kMaxPartLength) return 0;
313     STATIC_ASSERT((kMaxPartLength << 3) <= String::kMaxLength);
314     // This shift will not overflow because length is already less than the
315     // maximum part length.
316     int worst_case_length = length << 3;
317     return CurrentPartCanFit(worst_case_length) ? worst_case_length : 0;
318   }
319 
320   void AppendString(Handle<String> string);
321 
322   MaybeHandle<String> Finish();
323 
INLINE(bool HasOverflowed ())324   INLINE(bool HasOverflowed()) const { return overflowed_; }
325 
INLINE(int Length ())326   INLINE(int Length()) const { return accumulator_->length() + current_index_; }
327 
328   // Change encoding to two-byte.
ChangeEncoding()329   void ChangeEncoding() {
330     DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
331     ShrinkCurrentPart();
332     encoding_ = String::TWO_BYTE_ENCODING;
333     Extend();
334   }
335 
336   template <typename DestChar>
337   class NoExtend {
338    public:
NoExtend(Handle<String> string,int offset)339     explicit NoExtend(Handle<String> string, int offset) {
340       DCHECK(string->IsSeqOneByteString() || string->IsSeqTwoByteString());
341       if (sizeof(DestChar) == 1) {
342         start_ = reinterpret_cast<DestChar*>(
343             Handle<SeqOneByteString>::cast(string)->GetChars() + offset);
344       } else {
345         start_ = reinterpret_cast<DestChar*>(
346             Handle<SeqTwoByteString>::cast(string)->GetChars() + offset);
347       }
348       cursor_ = start_;
349     }
350 
INLINE(void Append (DestChar c))351     INLINE(void Append(DestChar c)) { *(cursor_++) = c; }
INLINE(void AppendCString (const char * s))352     INLINE(void AppendCString(const char* s)) {
353       const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
354       while (*u != '\0') Append(*(u++));
355     }
356 
written()357     int written() { return static_cast<int>(cursor_ - start_); }
358 
359    private:
360     DestChar* start_;
361     DestChar* cursor_;
362     DisallowHeapAllocation no_gc_;
363   };
364 
365   template <typename DestChar>
366   class NoExtendString : public NoExtend<DestChar> {
367    public:
NoExtendString(Handle<String> string,int required_length)368     NoExtendString(Handle<String> string, int required_length)
369         : NoExtend<DestChar>(string, 0), string_(string) {
370       DCHECK(string->length() >= required_length);
371     }
372 
Finalize()373     Handle<String> Finalize() {
374       Handle<SeqString> string = Handle<SeqString>::cast(string_);
375       int length = NoExtend<DestChar>::written();
376       Handle<String> result = SeqString::Truncate(string, length);
377       string_ = Handle<String>();
378       return result;
379     }
380 
381    private:
382     Handle<String> string_;
383   };
384 
385   template <typename DestChar>
386   class NoExtendBuilder : public NoExtend<DestChar> {
387    public:
NoExtendBuilder(IncrementalStringBuilder * builder,int required_length)388     NoExtendBuilder(IncrementalStringBuilder* builder, int required_length)
389         : NoExtend<DestChar>(builder->current_part(), builder->current_index_),
390           builder_(builder) {
391       DCHECK(builder->CurrentPartCanFit(required_length));
392     }
393 
~NoExtendBuilder()394     ~NoExtendBuilder() {
395       builder_->current_index_ += NoExtend<DestChar>::written();
396     }
397 
398    private:
399     IncrementalStringBuilder* builder_;
400   };
401 
402  private:
factory()403   Factory* factory() { return isolate_->factory(); }
404 
INLINE(Handle<String> accumulator ())405   INLINE(Handle<String> accumulator()) { return accumulator_; }
406 
INLINE(void set_accumulator (Handle<String> string))407   INLINE(void set_accumulator(Handle<String> string)) {
408     *accumulator_.location() = *string;
409   }
410 
INLINE(Handle<String> current_part ())411   INLINE(Handle<String> current_part()) { return current_part_; }
412 
INLINE(void set_current_part (Handle<String> string))413   INLINE(void set_current_part(Handle<String> string)) {
414     *current_part_.location() = *string;
415   }
416 
417   // Add the current part to the accumulator.
418   void Accumulate(Handle<String> new_part);
419 
420   // Finish the current part and allocate a new part.
421   void Extend();
422 
423   // Shrink current part to the right size.
ShrinkCurrentPart()424   void ShrinkCurrentPart() {
425     DCHECK(current_index_ < part_length_);
426     set_current_part(SeqString::Truncate(
427         Handle<SeqString>::cast(current_part()), current_index_));
428   }
429 
430   static const int kInitialPartLength = 32;
431   static const int kMaxPartLength = 16 * 1024;
432   static const int kPartLengthGrowthFactor = 2;
433 
434   Isolate* isolate_;
435   String::Encoding encoding_;
436   bool overflowed_;
437   int part_length_;
438   int current_index_;
439   Handle<String> accumulator_;
440   Handle<String> current_part_;
441 };
442 
443 
444 template <typename SrcChar, typename DestChar>
Append(SrcChar c)445 void IncrementalStringBuilder::Append(SrcChar c) {
446   DCHECK_EQ(encoding_ == String::ONE_BYTE_ENCODING, sizeof(DestChar) == 1);
447   if (sizeof(DestChar) == 1) {
448     DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
449     SeqOneByteString::cast(*current_part_)
450         ->SeqOneByteStringSet(current_index_++, c);
451   } else {
452     DCHECK_EQ(String::TWO_BYTE_ENCODING, encoding_);
453     SeqTwoByteString::cast(*current_part_)
454         ->SeqTwoByteStringSet(current_index_++, c);
455   }
456   if (current_index_ == part_length_) Extend();
457 }
458 }  // namespace internal
459 }  // namespace v8
460 
461 #endif  // V8_STRING_BUILDER_H_
462