1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef V8_STRING_BUILDER_H_
6 #define V8_STRING_BUILDER_H_
7
8 #include "src/assert-scope.h"
9 #include "src/handles.h"
10 #include "src/heap/factory.h"
11 #include "src/isolate.h"
12 #include "src/objects.h"
13 #include "src/utils.h"
14
15 namespace v8 {
16 namespace internal {
17
18 const int kStringBuilderConcatHelperLengthBits = 11;
19 const int kStringBuilderConcatHelperPositionBits = 19;
20
21 typedef BitField<int, 0, kStringBuilderConcatHelperLengthBits>
22 StringBuilderSubstringLength;
23 typedef BitField<int, kStringBuilderConcatHelperLengthBits,
24 kStringBuilderConcatHelperPositionBits>
25 StringBuilderSubstringPosition;
26
27
28 template <typename sinkchar>
StringBuilderConcatHelper(String * special,sinkchar * sink,FixedArray * fixed_array,int array_length)29 static inline void StringBuilderConcatHelper(String* special, sinkchar* sink,
30 FixedArray* fixed_array,
31 int array_length) {
32 DisallowHeapAllocation no_gc;
33 int position = 0;
34 for (int i = 0; i < array_length; i++) {
35 Object* element = fixed_array->get(i);
36 if (element->IsSmi()) {
37 // Smi encoding of position and length.
38 int encoded_slice = Smi::ToInt(element);
39 int pos;
40 int len;
41 if (encoded_slice > 0) {
42 // Position and length encoded in one smi.
43 pos = StringBuilderSubstringPosition::decode(encoded_slice);
44 len = StringBuilderSubstringLength::decode(encoded_slice);
45 } else {
46 // Position and length encoded in two smis.
47 Object* obj = fixed_array->get(++i);
48 DCHECK(obj->IsSmi());
49 pos = Smi::ToInt(obj);
50 len = -encoded_slice;
51 }
52 String::WriteToFlat(special, sink + position, pos, pos + len);
53 position += len;
54 } else {
55 String* string = String::cast(element);
56 int element_length = string->length();
57 String::WriteToFlat(string, sink + position, 0, element_length);
58 position += element_length;
59 }
60 }
61 }
62
63
64 // Returns the result length of the concatenation.
65 // On illegal argument, -1 is returned.
StringBuilderConcatLength(int special_length,FixedArray * fixed_array,int array_length,bool * one_byte)66 static inline int StringBuilderConcatLength(int special_length,
67 FixedArray* fixed_array,
68 int array_length, bool* one_byte) {
69 DisallowHeapAllocation no_gc;
70 int position = 0;
71 for (int i = 0; i < array_length; i++) {
72 int increment = 0;
73 Object* elt = fixed_array->get(i);
74 if (elt->IsSmi()) {
75 // Smi encoding of position and length.
76 int smi_value = Smi::ToInt(elt);
77 int pos;
78 int len;
79 if (smi_value > 0) {
80 // Position and length encoded in one smi.
81 pos = StringBuilderSubstringPosition::decode(smi_value);
82 len = StringBuilderSubstringLength::decode(smi_value);
83 } else {
84 // Position and length encoded in two smis.
85 len = -smi_value;
86 // Get the position and check that it is a positive smi.
87 i++;
88 if (i >= array_length) return -1;
89 Object* next_smi = fixed_array->get(i);
90 if (!next_smi->IsSmi()) return -1;
91 pos = Smi::ToInt(next_smi);
92 if (pos < 0) return -1;
93 }
94 DCHECK_GE(pos, 0);
95 DCHECK_GE(len, 0);
96 if (pos > special_length || len > special_length - pos) return -1;
97 increment = len;
98 } else if (elt->IsString()) {
99 String* element = String::cast(elt);
100 int element_length = element->length();
101 increment = element_length;
102 if (*one_byte && !element->HasOnlyOneByteChars()) {
103 *one_byte = false;
104 }
105 } else {
106 return -1;
107 }
108 if (increment > String::kMaxLength - position) {
109 return kMaxInt; // Provoke throw on allocation.
110 }
111 position += increment;
112 }
113 return position;
114 }
115
116
117 class FixedArrayBuilder {
118 public:
FixedArrayBuilder(Isolate * isolate,int initial_capacity)119 explicit FixedArrayBuilder(Isolate* isolate, int initial_capacity)
120 : array_(isolate->factory()->NewFixedArrayWithHoles(initial_capacity)),
121 length_(0),
122 has_non_smi_elements_(false) {
123 // Require a non-zero initial size. Ensures that doubling the size to
124 // extend the array will work.
125 DCHECK_GT(initial_capacity, 0);
126 }
127
FixedArrayBuilder(Handle<FixedArray> backing_store)128 explicit FixedArrayBuilder(Handle<FixedArray> backing_store)
129 : array_(backing_store), length_(0), has_non_smi_elements_(false) {
130 // Require a non-zero initial size. Ensures that doubling the size to
131 // extend the array will work.
132 DCHECK_GT(backing_store->length(), 0);
133 }
134
HasCapacity(int elements)135 bool HasCapacity(int elements) {
136 int length = array_->length();
137 int required_length = length_ + elements;
138 return (length >= required_length);
139 }
140
EnsureCapacity(int elements)141 void EnsureCapacity(int elements) {
142 int length = array_->length();
143 int required_length = length_ + elements;
144 if (length < required_length) {
145 int new_length = length;
146 do {
147 new_length *= 2;
148 } while (new_length < required_length);
149 Handle<FixedArray> extended_array =
150 array_->GetIsolate()->factory()->NewFixedArrayWithHoles(new_length);
151 array_->CopyTo(0, *extended_array, 0, length_);
152 array_ = extended_array;
153 }
154 }
155
Add(Object * value)156 void Add(Object* value) {
157 DCHECK(!value->IsSmi());
158 DCHECK(length_ < capacity());
159 array_->set(length_, value);
160 length_++;
161 has_non_smi_elements_ = true;
162 }
163
Add(Smi * value)164 void Add(Smi* value) {
165 DCHECK(value->IsSmi());
166 DCHECK(length_ < capacity());
167 array_->set(length_, value);
168 length_++;
169 }
170
array()171 Handle<FixedArray> array() { return array_; }
172
length()173 int length() { return length_; }
174
capacity()175 int capacity() { return array_->length(); }
176
ToJSArray(Handle<JSArray> target_array)177 Handle<JSArray> ToJSArray(Handle<JSArray> target_array) {
178 JSArray::SetContent(target_array, array_);
179 target_array->set_length(Smi::FromInt(length_));
180 return target_array;
181 }
182
183 private:
184 Handle<FixedArray> array_;
185 int length_;
186 bool has_non_smi_elements_;
187 };
188
189
190 class ReplacementStringBuilder {
191 public:
ReplacementStringBuilder(Heap * heap,Handle<String> subject,int estimated_part_count)192 ReplacementStringBuilder(Heap* heap, Handle<String> subject,
193 int estimated_part_count)
194 : heap_(heap),
195 array_builder_(heap->isolate(), estimated_part_count),
196 subject_(subject),
197 character_count_(0),
198 is_one_byte_(subject->IsOneByteRepresentation()) {
199 // Require a non-zero initial size. Ensures that doubling the size to
200 // extend the array will work.
201 DCHECK_GT(estimated_part_count, 0);
202 }
203
AddSubjectSlice(FixedArrayBuilder * builder,int from,int to)204 static inline void AddSubjectSlice(FixedArrayBuilder* builder, int from,
205 int to) {
206 DCHECK_GE(from, 0);
207 int length = to - from;
208 DCHECK_GT(length, 0);
209 if (StringBuilderSubstringLength::is_valid(length) &&
210 StringBuilderSubstringPosition::is_valid(from)) {
211 int encoded_slice = StringBuilderSubstringLength::encode(length) |
212 StringBuilderSubstringPosition::encode(from);
213 builder->Add(Smi::FromInt(encoded_slice));
214 } else {
215 // Otherwise encode as two smis.
216 builder->Add(Smi::FromInt(-length));
217 builder->Add(Smi::FromInt(from));
218 }
219 }
220
221
EnsureCapacity(int elements)222 void EnsureCapacity(int elements) { array_builder_.EnsureCapacity(elements); }
223
224
AddSubjectSlice(int from,int to)225 void AddSubjectSlice(int from, int to) {
226 AddSubjectSlice(&array_builder_, from, to);
227 IncrementCharacterCount(to - from);
228 }
229
230
AddString(Handle<String> string)231 void AddString(Handle<String> string) {
232 int length = string->length();
233 DCHECK_GT(length, 0);
234 AddElement(*string);
235 if (!string->IsOneByteRepresentation()) {
236 is_one_byte_ = false;
237 }
238 IncrementCharacterCount(length);
239 }
240
241
242 MaybeHandle<String> ToString();
243
244
IncrementCharacterCount(int by)245 void IncrementCharacterCount(int by) {
246 if (character_count_ > String::kMaxLength - by) {
247 STATIC_ASSERT(String::kMaxLength < kMaxInt);
248 character_count_ = kMaxInt;
249 } else {
250 character_count_ += by;
251 }
252 }
253
254 private:
AddElement(Object * element)255 void AddElement(Object* element) {
256 DCHECK(element->IsSmi() || element->IsString());
257 DCHECK(array_builder_.capacity() > array_builder_.length());
258 array_builder_.Add(element);
259 }
260
261 Heap* heap_;
262 FixedArrayBuilder array_builder_;
263 Handle<String> subject_;
264 int character_count_;
265 bool is_one_byte_;
266 };
267
268
269 class IncrementalStringBuilder {
270 public:
271 explicit IncrementalStringBuilder(Isolate* isolate);
272
INLINE(String::Encoding CurrentEncoding ())273 INLINE(String::Encoding CurrentEncoding()) { return encoding_; }
274
275 template <typename SrcChar, typename DestChar>
276 INLINE(void Append(SrcChar c));
277
INLINE(void AppendCharacter (uint8_t c))278 INLINE(void AppendCharacter(uint8_t c)) {
279 if (encoding_ == String::ONE_BYTE_ENCODING) {
280 Append<uint8_t, uint8_t>(c);
281 } else {
282 Append<uint8_t, uc16>(c);
283 }
284 }
285
INLINE(void AppendCString (const char * s))286 INLINE(void AppendCString(const char* s)) {
287 const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
288 if (encoding_ == String::ONE_BYTE_ENCODING) {
289 while (*u != '\0') Append<uint8_t, uint8_t>(*(u++));
290 } else {
291 while (*u != '\0') Append<uint8_t, uc16>(*(u++));
292 }
293 }
294
INLINE(void AppendCString (const uc16 * s))295 INLINE(void AppendCString(const uc16* s)) {
296 if (encoding_ == String::ONE_BYTE_ENCODING) {
297 while (*s != '\0') Append<uc16, uint8_t>(*(s++));
298 } else {
299 while (*s != '\0') Append<uc16, uc16>(*(s++));
300 }
301 }
302
INLINE(bool CurrentPartCanFit (int length))303 INLINE(bool CurrentPartCanFit(int length)) {
304 return part_length_ - current_index_ > length;
305 }
306
307 // We make a rough estimate to find out if the current string can be
308 // serialized without allocating a new string part. The worst case length of
309 // an escaped character is 6. Shifting the remaining string length right by 3
310 // is a more pessimistic estimate, but faster to calculate.
INLINE(int EscapedLengthIfCurrentPartFits (int length))311 INLINE(int EscapedLengthIfCurrentPartFits(int length)) {
312 if (length > kMaxPartLength) return 0;
313 STATIC_ASSERT((kMaxPartLength << 3) <= String::kMaxLength);
314 // This shift will not overflow because length is already less than the
315 // maximum part length.
316 int worst_case_length = length << 3;
317 return CurrentPartCanFit(worst_case_length) ? worst_case_length : 0;
318 }
319
320 void AppendString(Handle<String> string);
321
322 MaybeHandle<String> Finish();
323
INLINE(bool HasOverflowed ())324 INLINE(bool HasOverflowed()) const { return overflowed_; }
325
INLINE(int Length ())326 INLINE(int Length()) const { return accumulator_->length() + current_index_; }
327
328 // Change encoding to two-byte.
ChangeEncoding()329 void ChangeEncoding() {
330 DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
331 ShrinkCurrentPart();
332 encoding_ = String::TWO_BYTE_ENCODING;
333 Extend();
334 }
335
336 template <typename DestChar>
337 class NoExtend {
338 public:
NoExtend(Handle<String> string,int offset)339 explicit NoExtend(Handle<String> string, int offset) {
340 DCHECK(string->IsSeqOneByteString() || string->IsSeqTwoByteString());
341 if (sizeof(DestChar) == 1) {
342 start_ = reinterpret_cast<DestChar*>(
343 Handle<SeqOneByteString>::cast(string)->GetChars() + offset);
344 } else {
345 start_ = reinterpret_cast<DestChar*>(
346 Handle<SeqTwoByteString>::cast(string)->GetChars() + offset);
347 }
348 cursor_ = start_;
349 }
350
INLINE(void Append (DestChar c))351 INLINE(void Append(DestChar c)) { *(cursor_++) = c; }
INLINE(void AppendCString (const char * s))352 INLINE(void AppendCString(const char* s)) {
353 const uint8_t* u = reinterpret_cast<const uint8_t*>(s);
354 while (*u != '\0') Append(*(u++));
355 }
356
written()357 int written() { return static_cast<int>(cursor_ - start_); }
358
359 private:
360 DestChar* start_;
361 DestChar* cursor_;
362 DisallowHeapAllocation no_gc_;
363 };
364
365 template <typename DestChar>
366 class NoExtendString : public NoExtend<DestChar> {
367 public:
NoExtendString(Handle<String> string,int required_length)368 NoExtendString(Handle<String> string, int required_length)
369 : NoExtend<DestChar>(string, 0), string_(string) {
370 DCHECK(string->length() >= required_length);
371 }
372
Finalize()373 Handle<String> Finalize() {
374 Handle<SeqString> string = Handle<SeqString>::cast(string_);
375 int length = NoExtend<DestChar>::written();
376 Handle<String> result = SeqString::Truncate(string, length);
377 string_ = Handle<String>();
378 return result;
379 }
380
381 private:
382 Handle<String> string_;
383 };
384
385 template <typename DestChar>
386 class NoExtendBuilder : public NoExtend<DestChar> {
387 public:
NoExtendBuilder(IncrementalStringBuilder * builder,int required_length)388 NoExtendBuilder(IncrementalStringBuilder* builder, int required_length)
389 : NoExtend<DestChar>(builder->current_part(), builder->current_index_),
390 builder_(builder) {
391 DCHECK(builder->CurrentPartCanFit(required_length));
392 }
393
~NoExtendBuilder()394 ~NoExtendBuilder() {
395 builder_->current_index_ += NoExtend<DestChar>::written();
396 }
397
398 private:
399 IncrementalStringBuilder* builder_;
400 };
401
402 private:
factory()403 Factory* factory() { return isolate_->factory(); }
404
INLINE(Handle<String> accumulator ())405 INLINE(Handle<String> accumulator()) { return accumulator_; }
406
INLINE(void set_accumulator (Handle<String> string))407 INLINE(void set_accumulator(Handle<String> string)) {
408 *accumulator_.location() = *string;
409 }
410
INLINE(Handle<String> current_part ())411 INLINE(Handle<String> current_part()) { return current_part_; }
412
INLINE(void set_current_part (Handle<String> string))413 INLINE(void set_current_part(Handle<String> string)) {
414 *current_part_.location() = *string;
415 }
416
417 // Add the current part to the accumulator.
418 void Accumulate(Handle<String> new_part);
419
420 // Finish the current part and allocate a new part.
421 void Extend();
422
423 // Shrink current part to the right size.
ShrinkCurrentPart()424 void ShrinkCurrentPart() {
425 DCHECK(current_index_ < part_length_);
426 set_current_part(SeqString::Truncate(
427 Handle<SeqString>::cast(current_part()), current_index_));
428 }
429
430 static const int kInitialPartLength = 32;
431 static const int kMaxPartLength = 16 * 1024;
432 static const int kPartLengthGrowthFactor = 2;
433
434 Isolate* isolate_;
435 String::Encoding encoding_;
436 bool overflowed_;
437 int part_length_;
438 int current_index_;
439 Handle<String> accumulator_;
440 Handle<String> current_part_;
441 };
442
443
444 template <typename SrcChar, typename DestChar>
Append(SrcChar c)445 void IncrementalStringBuilder::Append(SrcChar c) {
446 DCHECK_EQ(encoding_ == String::ONE_BYTE_ENCODING, sizeof(DestChar) == 1);
447 if (sizeof(DestChar) == 1) {
448 DCHECK_EQ(String::ONE_BYTE_ENCODING, encoding_);
449 SeqOneByteString::cast(*current_part_)
450 ->SeqOneByteStringSet(current_index_++, c);
451 } else {
452 DCHECK_EQ(String::TWO_BYTE_ENCODING, encoding_);
453 SeqTwoByteString::cast(*current_part_)
454 ->SeqTwoByteStringSet(current_index_++, c);
455 }
456 if (current_index_ == part_length_) Extend();
457 }
458 } // namespace internal
459 } // namespace v8
460
461 #endif // V8_STRING_BUILDER_H_
462