1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <functional>
6
7 #include "src/common/message-template.h"
8 #include "src/execution/arguments-inl.h"
9 #include "src/execution/isolate-inl.h"
10 #include "src/heap/heap-inl.h" // For ToBoolean. TODO(jkummerow): Drop.
11 #include "src/logging/counters.h"
12 #include "src/numbers/conversions-inl.h"
13 #include "src/objects/js-array-inl.h"
14 #include "src/objects/js-regexp-inl.h"
15 #include "src/regexp/regexp-utils.h"
16 #include "src/regexp/regexp.h"
17 #include "src/runtime/runtime-utils.h"
18 #include "src/strings/string-builder-inl.h"
19 #include "src/strings/string-search.h"
20 #include "src/zone/zone-chunk-list.h"
21
22 namespace v8 {
23 namespace internal {
24
25 namespace {
26
27 // Returns -1 for failure.
GetArgcForReplaceCallable(uint32_t num_captures,bool has_named_captures)28 uint32_t GetArgcForReplaceCallable(uint32_t num_captures,
29 bool has_named_captures) {
30 const uint32_t kAdditionalArgsWithoutNamedCaptures = 2;
31 const uint32_t kAdditionalArgsWithNamedCaptures = 3;
32 if (num_captures > Code::kMaxArguments) return -1;
33 uint32_t argc = has_named_captures
34 ? num_captures + kAdditionalArgsWithNamedCaptures
35 : num_captures + kAdditionalArgsWithoutNamedCaptures;
36 STATIC_ASSERT(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() -
37 kAdditionalArgsWithNamedCaptures);
38 return (argc > Code::kMaxArguments) ? -1 : argc;
39 }
40
41 // Looks up the capture of the given name. Returns the (1-based) numbered
42 // capture index or -1 on failure.
LookupNamedCapture(const std::function<bool (String)> & name_matches,FixedArray capture_name_map)43 int LookupNamedCapture(const std::function<bool(String)>& name_matches,
44 FixedArray capture_name_map) {
45 // TODO(jgruber): Sort capture_name_map and do binary search via
46 // internalized strings.
47
48 int maybe_capture_index = -1;
49 const int named_capture_count = capture_name_map.length() >> 1;
50 for (int j = 0; j < named_capture_count; j++) {
51 // The format of {capture_name_map} is documented at
52 // JSRegExp::kIrregexpCaptureNameMapIndex.
53 const int name_ix = j * 2;
54 const int index_ix = j * 2 + 1;
55
56 String capture_name = String::cast(capture_name_map.get(name_ix));
57 if (!name_matches(capture_name)) continue;
58
59 maybe_capture_index = Smi::ToInt(capture_name_map.get(index_ix));
60 break;
61 }
62
63 return maybe_capture_index;
64 }
65
66 } // namespace
67
68 class CompiledReplacement {
69 public:
CompiledReplacement(Zone * zone)70 explicit CompiledReplacement(Zone* zone)
71 : parts_(zone), replacement_substrings_(zone) {}
72
73 // Return whether the replacement is simple.
74 bool Compile(Isolate* isolate, Handle<JSRegExp> regexp,
75 Handle<String> replacement, int capture_count,
76 int subject_length);
77
78 // Use Apply only if Compile returned false.
79 void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
80 int32_t* match);
81
82 // Number of distinct parts of the replacement pattern.
parts()83 int parts() { return static_cast<int>(parts_.size()); }
84
85 private:
86 enum PartType {
87 SUBJECT_PREFIX = 1,
88 SUBJECT_SUFFIX,
89 SUBJECT_CAPTURE,
90 REPLACEMENT_SUBSTRING,
91 REPLACEMENT_STRING,
92 EMPTY_REPLACEMENT,
93 NUMBER_OF_PART_TYPES
94 };
95
96 struct ReplacementPart {
SubjectMatchv8::internal::CompiledReplacement::ReplacementPart97 static inline ReplacementPart SubjectMatch() {
98 return ReplacementPart(SUBJECT_CAPTURE, 0);
99 }
SubjectCapturev8::internal::CompiledReplacement::ReplacementPart100 static inline ReplacementPart SubjectCapture(int capture_index) {
101 return ReplacementPart(SUBJECT_CAPTURE, capture_index);
102 }
SubjectPrefixv8::internal::CompiledReplacement::ReplacementPart103 static inline ReplacementPart SubjectPrefix() {
104 return ReplacementPart(SUBJECT_PREFIX, 0);
105 }
SubjectSuffixv8::internal::CompiledReplacement::ReplacementPart106 static inline ReplacementPart SubjectSuffix(int subject_length) {
107 return ReplacementPart(SUBJECT_SUFFIX, subject_length);
108 }
ReplacementStringv8::internal::CompiledReplacement::ReplacementPart109 static inline ReplacementPart ReplacementString() {
110 return ReplacementPart(REPLACEMENT_STRING, 0);
111 }
EmptyReplacementv8::internal::CompiledReplacement::ReplacementPart112 static inline ReplacementPart EmptyReplacement() {
113 return ReplacementPart(EMPTY_REPLACEMENT, 0);
114 }
ReplacementSubStringv8::internal::CompiledReplacement::ReplacementPart115 static inline ReplacementPart ReplacementSubString(int from, int to) {
116 DCHECK_LE(0, from);
117 DCHECK_GT(to, from);
118 return ReplacementPart(-from, to);
119 }
120
121 // If tag <= 0 then it is the negation of a start index of a substring of
122 // the replacement pattern, otherwise it's a value from PartType.
ReplacementPartv8::internal::CompiledReplacement::ReplacementPart123 ReplacementPart(int tag, int data) : tag(tag), data(data) {
124 // Must be non-positive or a PartType value.
125 DCHECK(tag < NUMBER_OF_PART_TYPES);
126 }
127 // Either a value of PartType or a non-positive number that is
128 // the negation of an index into the replacement string.
129 int tag;
130 // The data value's interpretation depends on the value of tag:
131 // tag == SUBJECT_PREFIX ||
132 // tag == SUBJECT_SUFFIX: data is unused.
133 // tag == SUBJECT_CAPTURE: data is the number of the capture.
134 // tag == REPLACEMENT_SUBSTRING ||
135 // tag == REPLACEMENT_STRING: data is index into array of substrings
136 // of the replacement string.
137 // tag == EMPTY_REPLACEMENT: data is unused.
138 // tag <= 0: Temporary representation of the substring of the replacement
139 // string ranging over -tag .. data.
140 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
141 // substring objects.
142 int data;
143 };
144
145 template <typename Char>
ParseReplacementPattern(ZoneChunkList<ReplacementPart> * parts,Vector<Char> characters,FixedArray capture_name_map,int capture_count,int subject_length)146 bool ParseReplacementPattern(ZoneChunkList<ReplacementPart>* parts,
147 Vector<Char> characters,
148 FixedArray capture_name_map, int capture_count,
149 int subject_length) {
150 // Equivalent to String::GetSubstitution, except that this method converts
151 // the replacement string into an internal representation that avoids
152 // repeated parsing when used repeatedly.
153 int length = characters.length();
154 int last = 0;
155 for (int i = 0; i < length; i++) {
156 Char c = characters[i];
157 if (c == '$') {
158 int next_index = i + 1;
159 if (next_index == length) { // No next character!
160 break;
161 }
162 Char c2 = characters[next_index];
163 switch (c2) {
164 case '$':
165 if (i > last) {
166 // There is a substring before. Include the first "$".
167 parts->push_back(
168 ReplacementPart::ReplacementSubString(last, next_index));
169 last = next_index + 1; // Continue after the second "$".
170 } else {
171 // Let the next substring start with the second "$".
172 last = next_index;
173 }
174 i = next_index;
175 break;
176 case '`':
177 if (i > last) {
178 parts->push_back(ReplacementPart::ReplacementSubString(last, i));
179 }
180 parts->push_back(ReplacementPart::SubjectPrefix());
181 i = next_index;
182 last = i + 1;
183 break;
184 case '\'':
185 if (i > last) {
186 parts->push_back(ReplacementPart::ReplacementSubString(last, i));
187 }
188 parts->push_back(ReplacementPart::SubjectSuffix(subject_length));
189 i = next_index;
190 last = i + 1;
191 break;
192 case '&':
193 if (i > last) {
194 parts->push_back(ReplacementPart::ReplacementSubString(last, i));
195 }
196 parts->push_back(ReplacementPart::SubjectMatch());
197 i = next_index;
198 last = i + 1;
199 break;
200 case '0':
201 case '1':
202 case '2':
203 case '3':
204 case '4':
205 case '5':
206 case '6':
207 case '7':
208 case '8':
209 case '9': {
210 int capture_ref = c2 - '0';
211 if (capture_ref > capture_count) {
212 i = next_index;
213 continue;
214 }
215 int second_digit_index = next_index + 1;
216 if (second_digit_index < length) {
217 // Peek ahead to see if we have two digits.
218 Char c3 = characters[second_digit_index];
219 if ('0' <= c3 && c3 <= '9') { // Double digits.
220 int double_digit_ref = capture_ref * 10 + c3 - '0';
221 if (double_digit_ref <= capture_count) {
222 next_index = second_digit_index;
223 capture_ref = double_digit_ref;
224 }
225 }
226 }
227 if (capture_ref > 0) {
228 if (i > last) {
229 parts->push_back(
230 ReplacementPart::ReplacementSubString(last, i));
231 }
232 DCHECK(capture_ref <= capture_count);
233 parts->push_back(ReplacementPart::SubjectCapture(capture_ref));
234 last = next_index + 1;
235 }
236 i = next_index;
237 break;
238 }
239 case '<': {
240 if (capture_name_map.is_null()) {
241 i = next_index;
242 break;
243 }
244
245 // Scan until the next '>', and let the enclosed substring be the
246 // groupName.
247
248 const int name_start_index = next_index + 1;
249 int closing_bracket_index = -1;
250 for (int j = name_start_index; j < length; j++) {
251 if (characters[j] == '>') {
252 closing_bracket_index = j;
253 break;
254 }
255 }
256
257 // If no closing bracket is found, '$<' is treated as a string
258 // literal.
259 if (closing_bracket_index == -1) {
260 i = next_index;
261 break;
262 }
263
264 Vector<Char> requested_name =
265 characters.SubVector(name_start_index, closing_bracket_index);
266
267 // Let capture be ? Get(namedCaptures, groupName).
268
269 const int capture_index = LookupNamedCapture(
270 [=](String capture_name) {
271 return capture_name.IsEqualTo(requested_name);
272 },
273 capture_name_map);
274
275 // If capture is undefined or does not exist, replace the text
276 // through the following '>' with the empty string.
277 // Otherwise, replace the text through the following '>' with
278 // ? ToString(capture).
279
280 DCHECK(capture_index == -1 ||
281 (1 <= capture_index && capture_index <= capture_count));
282
283 if (i > last) {
284 parts->push_back(ReplacementPart::ReplacementSubString(last, i));
285 }
286 parts->push_back(
287 (capture_index == -1)
288 ? ReplacementPart::EmptyReplacement()
289 : ReplacementPart::SubjectCapture(capture_index));
290 last = closing_bracket_index + 1;
291 i = closing_bracket_index;
292 break;
293 }
294 default:
295 i = next_index;
296 break;
297 }
298 }
299 }
300 if (length > last) {
301 if (last == 0) {
302 // Replacement is simple. Do not use Apply to do the replacement.
303 return true;
304 } else {
305 parts->push_back(ReplacementPart::ReplacementSubString(last, length));
306 }
307 }
308 return false;
309 }
310
311 ZoneChunkList<ReplacementPart> parts_;
312 ZoneVector<Handle<String>> replacement_substrings_;
313 };
314
Compile(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> replacement,int capture_count,int subject_length)315 bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp,
316 Handle<String> replacement, int capture_count,
317 int subject_length) {
318 {
319 DisallowHeapAllocation no_gc;
320 String::FlatContent content = replacement->GetFlatContent(no_gc);
321 DCHECK(content.IsFlat());
322
323 FixedArray capture_name_map;
324 if (capture_count > 0) {
325 DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
326 Object maybe_capture_name_map = regexp->CaptureNameMap();
327 if (maybe_capture_name_map.IsFixedArray()) {
328 capture_name_map = FixedArray::cast(maybe_capture_name_map);
329 }
330 }
331
332 bool simple;
333 if (content.IsOneByte()) {
334 simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
335 capture_name_map, capture_count,
336 subject_length);
337 } else {
338 DCHECK(content.IsTwoByte());
339 simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
340 capture_name_map, capture_count,
341 subject_length);
342 }
343 if (simple) return true;
344 }
345
346 // Find substrings of replacement string and create them as String objects.
347 int substring_index = 0;
348 for (ReplacementPart& part : parts_) {
349 int tag = part.tag;
350 if (tag <= 0) { // A replacement string slice.
351 int from = -tag;
352 int to = part.data;
353 replacement_substrings_.push_back(
354 isolate->factory()->NewSubString(replacement, from, to));
355 part.tag = REPLACEMENT_SUBSTRING;
356 part.data = substring_index;
357 substring_index++;
358 } else if (tag == REPLACEMENT_STRING) {
359 replacement_substrings_.push_back(replacement);
360 part.data = substring_index;
361 substring_index++;
362 }
363 }
364 return false;
365 }
366
367
Apply(ReplacementStringBuilder * builder,int match_from,int match_to,int32_t * match)368 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
369 int match_from, int match_to, int32_t* match) {
370 DCHECK_LT(0, parts_.size());
371 for (ReplacementPart& part : parts_) {
372 switch (part.tag) {
373 case SUBJECT_PREFIX:
374 if (match_from > 0) builder->AddSubjectSlice(0, match_from);
375 break;
376 case SUBJECT_SUFFIX: {
377 int subject_length = part.data;
378 if (match_to < subject_length) {
379 builder->AddSubjectSlice(match_to, subject_length);
380 }
381 break;
382 }
383 case SUBJECT_CAPTURE: {
384 int capture = part.data;
385 int from = match[capture * 2];
386 int to = match[capture * 2 + 1];
387 if (from >= 0 && to > from) {
388 builder->AddSubjectSlice(from, to);
389 }
390 break;
391 }
392 case REPLACEMENT_SUBSTRING:
393 case REPLACEMENT_STRING:
394 builder->AddString(replacement_substrings_[part.data]);
395 break;
396 case EMPTY_REPLACEMENT:
397 break;
398 default:
399 UNREACHABLE();
400 }
401 }
402 }
403
FindOneByteStringIndices(Vector<const uint8_t> subject,uint8_t pattern,std::vector<int> * indices,unsigned int limit)404 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
405 std::vector<int>* indices, unsigned int limit) {
406 DCHECK_LT(0, limit);
407 // Collect indices of pattern in subject using memchr.
408 // Stop after finding at most limit values.
409 const uint8_t* subject_start = subject.begin();
410 const uint8_t* subject_end = subject_start + subject.length();
411 const uint8_t* pos = subject_start;
412 while (limit > 0) {
413 pos = reinterpret_cast<const uint8_t*>(
414 memchr(pos, pattern, subject_end - pos));
415 if (pos == nullptr) return;
416 indices->push_back(static_cast<int>(pos - subject_start));
417 pos++;
418 limit--;
419 }
420 }
421
FindTwoByteStringIndices(const Vector<const uc16> subject,uc16 pattern,std::vector<int> * indices,unsigned int limit)422 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
423 std::vector<int>* indices, unsigned int limit) {
424 DCHECK_LT(0, limit);
425 const uc16* subject_start = subject.begin();
426 const uc16* subject_end = subject_start + subject.length();
427 for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
428 if (*pos == pattern) {
429 indices->push_back(static_cast<int>(pos - subject_start));
430 limit--;
431 }
432 }
433 }
434
435 template <typename SubjectChar, typename PatternChar>
FindStringIndices(Isolate * isolate,Vector<const SubjectChar> subject,Vector<const PatternChar> pattern,std::vector<int> * indices,unsigned int limit)436 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
437 Vector<const PatternChar> pattern,
438 std::vector<int>* indices, unsigned int limit) {
439 DCHECK_LT(0, limit);
440 // Collect indices of pattern in subject.
441 // Stop after finding at most limit values.
442 int pattern_length = pattern.length();
443 int index = 0;
444 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
445 while (limit > 0) {
446 index = search.Search(subject, index);
447 if (index < 0) return;
448 indices->push_back(index);
449 index += pattern_length;
450 limit--;
451 }
452 }
453
FindStringIndicesDispatch(Isolate * isolate,String subject,String pattern,std::vector<int> * indices,unsigned int limit)454 void FindStringIndicesDispatch(Isolate* isolate, String subject, String pattern,
455 std::vector<int>* indices, unsigned int limit) {
456 {
457 DisallowHeapAllocation no_gc;
458 String::FlatContent subject_content = subject.GetFlatContent(no_gc);
459 String::FlatContent pattern_content = pattern.GetFlatContent(no_gc);
460 DCHECK(subject_content.IsFlat());
461 DCHECK(pattern_content.IsFlat());
462 if (subject_content.IsOneByte()) {
463 Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
464 if (pattern_content.IsOneByte()) {
465 Vector<const uint8_t> pattern_vector =
466 pattern_content.ToOneByteVector();
467 if (pattern_vector.length() == 1) {
468 FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
469 limit);
470 } else {
471 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
472 limit);
473 }
474 } else {
475 FindStringIndices(isolate, subject_vector,
476 pattern_content.ToUC16Vector(), indices, limit);
477 }
478 } else {
479 Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
480 if (pattern_content.IsOneByte()) {
481 Vector<const uint8_t> pattern_vector =
482 pattern_content.ToOneByteVector();
483 if (pattern_vector.length() == 1) {
484 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
485 limit);
486 } else {
487 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
488 limit);
489 }
490 } else {
491 Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
492 if (pattern_vector.length() == 1) {
493 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
494 limit);
495 } else {
496 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
497 limit);
498 }
499 }
500 }
501 }
502 }
503
504 namespace {
GetRewoundRegexpIndicesList(Isolate * isolate)505 std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
506 std::vector<int>* list = isolate->regexp_indices();
507 list->clear();
508 return list;
509 }
510
TruncateRegexpIndicesList(Isolate * isolate)511 void TruncateRegexpIndicesList(Isolate* isolate) {
512 // Same size as smallest zone segment, preserving behavior from the
513 // runtime zone.
514 static const int kMaxRegexpIndicesListCapacity = 8 * KB;
515 std::vector<int>* indicies = isolate->regexp_indices();
516 if (indicies->capacity() > kMaxRegexpIndicesListCapacity) {
517 // Throw away backing storage.
518 indicies->clear();
519 indicies->shrink_to_fit();
520 }
521 }
522 } // namespace
523
524 template <typename ResultSeqString>
StringReplaceGlobalAtomRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> pattern_regexp,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)525 V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalAtomRegExpWithString(
526 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
527 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
528 DCHECK(subject->IsFlat());
529 DCHECK(replacement->IsFlat());
530
531 std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
532
533 DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
534 String pattern =
535 String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
536 int subject_len = subject->length();
537 int pattern_len = pattern.length();
538 int replacement_len = replacement->length();
539
540 FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF);
541
542 if (indices->empty()) return *subject;
543
544 // Detect integer overflow.
545 int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
546 static_cast<int64_t>(pattern_len)) *
547 static_cast<int64_t>(indices->size()) +
548 static_cast<int64_t>(subject_len);
549 int result_len;
550 if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
551 STATIC_ASSERT(String::kMaxLength < kMaxInt);
552 result_len = kMaxInt; // Provoke exception.
553 } else {
554 result_len = static_cast<int>(result_len_64);
555 }
556 if (result_len == 0) {
557 return ReadOnlyRoots(isolate).empty_string();
558 }
559
560 int subject_pos = 0;
561 int result_pos = 0;
562
563 MaybeHandle<SeqString> maybe_res;
564 if (ResultSeqString::kHasOneByteEncoding) {
565 maybe_res = isolate->factory()->NewRawOneByteString(result_len);
566 } else {
567 maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
568 }
569 Handle<SeqString> untyped_res;
570 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
571 Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
572
573 DisallowHeapAllocation no_gc;
574 for (int index : *indices) {
575 // Copy non-matched subject content.
576 if (subject_pos < index) {
577 String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
578 subject_pos, index);
579 result_pos += index - subject_pos;
580 }
581
582 // Replace match.
583 if (replacement_len > 0) {
584 String::WriteToFlat(*replacement, result->GetChars(no_gc) + result_pos, 0,
585 replacement_len);
586 result_pos += replacement_len;
587 }
588
589 subject_pos = index + pattern_len;
590 }
591 // Add remaining subject content at the end.
592 if (subject_pos < subject_len) {
593 String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
594 subject_pos, subject_len);
595 }
596
597 int32_t match_indices[] = {indices->back(), indices->back() + pattern_len};
598 RegExp::SetLastMatchInfo(isolate, last_match_info, subject, 0, match_indices);
599
600 TruncateRegexpIndicesList(isolate);
601
602 return *result;
603 }
604
StringReplaceGlobalRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)605 V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString(
606 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
607 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
608 DCHECK(subject->IsFlat());
609 DCHECK(replacement->IsFlat());
610
611 int capture_count = regexp->CaptureCount();
612 int subject_length = subject->length();
613
614 JSRegExp::Type typeTag = regexp->TypeTag();
615 if (typeTag == JSRegExp::IRREGEXP) {
616 // Ensure the RegExp is compiled so we can access the capture-name map.
617 if (RegExp::IrregexpPrepare(isolate, regexp, subject) == -1) {
618 DCHECK(isolate->has_pending_exception());
619 return ReadOnlyRoots(isolate).exception();
620 }
621 }
622
623 // CompiledReplacement uses zone allocation.
624 Zone zone(isolate->allocator(), ZONE_NAME);
625 CompiledReplacement compiled_replacement(&zone);
626 const bool simple_replace = compiled_replacement.Compile(
627 isolate, regexp, replacement, capture_count, subject_length);
628
629 // Shortcut for simple non-regexp global replacements
630 if (typeTag == JSRegExp::ATOM && simple_replace) {
631 if (subject->IsOneByteRepresentation() &&
632 replacement->IsOneByteRepresentation()) {
633 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
634 isolate, subject, regexp, replacement, last_match_info);
635 } else {
636 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
637 isolate, subject, regexp, replacement, last_match_info);
638 }
639 }
640
641 RegExpGlobalCache global_cache(regexp, subject, isolate);
642 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
643
644 int32_t* current_match = global_cache.FetchNext();
645 if (current_match == nullptr) {
646 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
647 return *subject;
648 }
649
650 // Guessing the number of parts that the final result string is built
651 // from. Global regexps can match any number of times, so we guess
652 // conservatively.
653 int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
654 ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
655
656 int prev = 0;
657
658 do {
659 int start = current_match[0];
660 int end = current_match[1];
661
662 if (prev < start) {
663 builder.AddSubjectSlice(prev, start);
664 }
665
666 if (simple_replace) {
667 builder.AddString(replacement);
668 } else {
669 compiled_replacement.Apply(&builder, start, end, current_match);
670 }
671 prev = end;
672
673 current_match = global_cache.FetchNext();
674 } while (current_match != nullptr);
675
676 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
677
678 if (prev < subject_length) {
679 builder.AddSubjectSlice(prev, subject_length);
680 }
681
682 RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
683 global_cache.LastSuccessfulMatch());
684
685 RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
686 }
687
688 template <typename ResultSeqString>
StringReplaceGlobalRegExpWithEmptyString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<RegExpMatchInfo> last_match_info)689 V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString(
690 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
691 Handle<RegExpMatchInfo> last_match_info) {
692 DCHECK(subject->IsFlat());
693
694 // Shortcut for simple non-regexp global replacements
695 if (regexp->TypeTag() == JSRegExp::ATOM) {
696 Handle<String> empty_string = isolate->factory()->empty_string();
697 if (subject->IsOneByteRepresentation()) {
698 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
699 isolate, subject, regexp, empty_string, last_match_info);
700 } else {
701 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
702 isolate, subject, regexp, empty_string, last_match_info);
703 }
704 }
705
706 RegExpGlobalCache global_cache(regexp, subject, isolate);
707 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
708
709 int32_t* current_match = global_cache.FetchNext();
710 if (current_match == nullptr) {
711 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
712 return *subject;
713 }
714
715 int start = current_match[0];
716 int end = current_match[1];
717 int capture_count = regexp->CaptureCount();
718 int subject_length = subject->length();
719
720 int new_length = subject_length - (end - start);
721 if (new_length == 0) return ReadOnlyRoots(isolate).empty_string();
722
723 Handle<ResultSeqString> answer;
724 if (ResultSeqString::kHasOneByteEncoding) {
725 answer = Handle<ResultSeqString>::cast(
726 isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
727 } else {
728 answer = Handle<ResultSeqString>::cast(
729 isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
730 }
731
732 int prev = 0;
733 int position = 0;
734
735 DisallowHeapAllocation no_gc;
736 do {
737 start = current_match[0];
738 end = current_match[1];
739 if (prev < start) {
740 // Add substring subject[prev;start] to answer string.
741 String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
742 start);
743 position += start - prev;
744 }
745 prev = end;
746
747 current_match = global_cache.FetchNext();
748 } while (current_match != nullptr);
749
750 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
751
752 RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
753 global_cache.LastSuccessfulMatch());
754
755 if (prev < subject_length) {
756 // Add substring subject[prev;length] to answer string.
757 String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
758 subject_length);
759 position += subject_length - prev;
760 }
761
762 if (position == 0) return ReadOnlyRoots(isolate).empty_string();
763
764 // Shorten string and fill
765 int string_size = ResultSeqString::SizeFor(position);
766 int allocated_string_size = ResultSeqString::SizeFor(new_length);
767 int delta = allocated_string_size - string_size;
768
769 answer->set_length(position);
770 if (delta == 0) return *answer;
771
772 Address end_of_string = answer->address() + string_size;
773 Heap* heap = isolate->heap();
774
775 // The trimming is performed on a newly allocated object, which is on a
776 // freshly allocated page or on an already swept page. Hence, the sweeper
777 // thread can not get confused with the filler creation. No synchronization
778 // needed.
779 // TODO(hpayer): We should shrink the large object page if the size
780 // of the object changed significantly.
781 if (!heap->IsLargeObject(*answer)) {
782 heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
783 }
784 return *answer;
785 }
786
RUNTIME_FUNCTION(Runtime_StringSplit)787 RUNTIME_FUNCTION(Runtime_StringSplit) {
788 HandleScope handle_scope(isolate);
789 DCHECK_EQ(3, args.length());
790 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
791 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
792 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
793 CHECK_LT(0, limit);
794
795 int subject_length = subject->length();
796 int pattern_length = pattern->length();
797 CHECK_LT(0, pattern_length);
798
799 if (limit == 0xFFFFFFFFu) {
800 FixedArray last_match_cache_unused;
801 Handle<Object> cached_answer(
802 RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
803 &last_match_cache_unused,
804 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
805 isolate);
806 if (*cached_answer != Smi::zero()) {
807 // The cache FixedArray is a COW-array and can therefore be reused.
808 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
809 Handle<FixedArray>::cast(cached_answer));
810 return *result;
811 }
812 }
813
814 // The limit can be very large (0xFFFFFFFFu), but since the pattern
815 // isn't empty, we can never create more parts than ~half the length
816 // of the subject.
817
818 subject = String::Flatten(isolate, subject);
819 pattern = String::Flatten(isolate, pattern);
820
821 std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
822
823 FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
824
825 if (static_cast<uint32_t>(indices->size()) < limit) {
826 indices->push_back(subject_length);
827 }
828
829 // The list indices now contains the end of each part to create.
830
831 // Create JSArray of substrings separated by separator.
832 int part_count = static_cast<int>(indices->size());
833
834 Handle<JSArray> result =
835 isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count,
836 INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
837
838 DCHECK(result->HasObjectElements());
839
840 Handle<FixedArray> elements(FixedArray::cast(result->elements()), isolate);
841
842 if (part_count == 1 && indices->at(0) == subject_length) {
843 elements->set(0, *subject);
844 } else {
845 int part_start = 0;
846 FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
847 int part_end = indices->at(i);
848 Handle<String> substring =
849 isolate->factory()->NewProperSubString(subject, part_start, part_end);
850 elements->set(i, *substring);
851 part_start = part_end + pattern_length;
852 });
853 }
854
855 if (limit == 0xFFFFFFFFu) {
856 if (result->HasObjectElements()) {
857 RegExpResultsCache::Enter(isolate, subject, pattern, elements,
858 isolate->factory()->empty_fixed_array(),
859 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
860 }
861 }
862
863 TruncateRegexpIndicesList(isolate);
864
865 return *result;
866 }
867
RUNTIME_FUNCTION(Runtime_RegExpExec)868 RUNTIME_FUNCTION(Runtime_RegExpExec) {
869 HandleScope scope(isolate);
870 DCHECK_EQ(4, args.length());
871 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
872 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
873 CONVERT_INT32_ARG_CHECKED(index, 2);
874 CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
875 // Due to the way the JS calls are constructed this must be less than the
876 // length of a string, i.e. it is always a Smi. We check anyway for security.
877 CHECK_LE(0, index);
878 CHECK_GE(subject->length(), index);
879 isolate->counters()->regexp_entry_runtime()->Increment();
880 RETURN_RESULT_OR_FAILURE(
881 isolate, RegExp::Exec(isolate, regexp, subject, index, last_match_info));
882 }
883
884 namespace {
885
886 class MatchInfoBackedMatch : public String::Match {
887 public:
MatchInfoBackedMatch(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> subject,Handle<RegExpMatchInfo> match_info)888 MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
889 Handle<String> subject,
890 Handle<RegExpMatchInfo> match_info)
891 : isolate_(isolate), match_info_(match_info) {
892 subject_ = String::Flatten(isolate, subject);
893
894 if (regexp->TypeTag() == JSRegExp::IRREGEXP) {
895 Object o = regexp->CaptureNameMap();
896 has_named_captures_ = o.IsFixedArray();
897 if (has_named_captures_) {
898 capture_name_map_ = handle(FixedArray::cast(o), isolate);
899 }
900 } else {
901 has_named_captures_ = false;
902 }
903 }
904
GetMatch()905 Handle<String> GetMatch() override {
906 return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
907 }
908
GetPrefix()909 Handle<String> GetPrefix() override {
910 const int match_start = match_info_->Capture(0);
911 return isolate_->factory()->NewSubString(subject_, 0, match_start);
912 }
913
GetSuffix()914 Handle<String> GetSuffix() override {
915 const int match_end = match_info_->Capture(1);
916 return isolate_->factory()->NewSubString(subject_, match_end,
917 subject_->length());
918 }
919
HasNamedCaptures()920 bool HasNamedCaptures() override { return has_named_captures_; }
921
CaptureCount()922 int CaptureCount() override {
923 return match_info_->NumberOfCaptureRegisters() / 2;
924 }
925
GetCapture(int i,bool * capture_exists)926 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
927 Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
928 isolate_, match_info_, i, capture_exists);
929 return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
930 : isolate_->factory()->empty_string();
931 }
932
GetNamedCapture(Handle<String> name,CaptureState * state)933 MaybeHandle<String> GetNamedCapture(Handle<String> name,
934 CaptureState* state) override {
935 DCHECK(has_named_captures_);
936 const int capture_index = LookupNamedCapture(
937 [=](String capture_name) { return capture_name.Equals(*name); },
938 *capture_name_map_);
939
940 if (capture_index == -1) {
941 *state = INVALID;
942 return name; // Arbitrary string handle.
943 }
944
945 DCHECK(1 <= capture_index && capture_index <= CaptureCount());
946
947 bool capture_exists;
948 Handle<String> capture_value;
949 ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
950 GetCapture(capture_index, &capture_exists),
951 String);
952
953 if (!capture_exists) {
954 *state = UNMATCHED;
955 return isolate_->factory()->empty_string();
956 } else {
957 *state = MATCHED;
958 return capture_value;
959 }
960 }
961
962 private:
963 Isolate* isolate_;
964 Handle<String> subject_;
965 Handle<RegExpMatchInfo> match_info_;
966
967 bool has_named_captures_;
968 Handle<FixedArray> capture_name_map_;
969 };
970
971 class VectorBackedMatch : public String::Match {
972 public:
VectorBackedMatch(Isolate * isolate,Handle<String> subject,Handle<String> match,int match_position,ZoneVector<Handle<Object>> * captures,Handle<Object> groups_obj)973 VectorBackedMatch(Isolate* isolate, Handle<String> subject,
974 Handle<String> match, int match_position,
975 ZoneVector<Handle<Object>>* captures,
976 Handle<Object> groups_obj)
977 : isolate_(isolate),
978 match_(match),
979 match_position_(match_position),
980 captures_(captures) {
981 subject_ = String::Flatten(isolate, subject);
982
983 DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
984 has_named_captures_ = !groups_obj->IsUndefined(isolate);
985 if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
986 }
987
GetMatch()988 Handle<String> GetMatch() override { return match_; }
989
GetPrefix()990 Handle<String> GetPrefix() override {
991 return isolate_->factory()->NewSubString(subject_, 0, match_position_);
992 }
993
GetSuffix()994 Handle<String> GetSuffix() override {
995 const int match_end_position = match_position_ + match_->length();
996 return isolate_->factory()->NewSubString(subject_, match_end_position,
997 subject_->length());
998 }
999
HasNamedCaptures()1000 bool HasNamedCaptures() override { return has_named_captures_; }
1001
CaptureCount()1002 int CaptureCount() override { return static_cast<int>(captures_->size()); }
1003
GetCapture(int i,bool * capture_exists)1004 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1005 Handle<Object> capture_obj = captures_->at(i);
1006 if (capture_obj->IsUndefined(isolate_)) {
1007 *capture_exists = false;
1008 return isolate_->factory()->empty_string();
1009 }
1010 *capture_exists = true;
1011 return Object::ToString(isolate_, capture_obj);
1012 }
1013
GetNamedCapture(Handle<String> name,CaptureState * state)1014 MaybeHandle<String> GetNamedCapture(Handle<String> name,
1015 CaptureState* state) override {
1016 DCHECK(has_named_captures_);
1017
1018 Maybe<bool> maybe_capture_exists =
1019 JSReceiver::HasProperty(groups_obj_, name);
1020 if (maybe_capture_exists.IsNothing()) return MaybeHandle<String>();
1021
1022 if (!maybe_capture_exists.FromJust()) {
1023 *state = INVALID;
1024 return name; // Arbitrary string handle.
1025 }
1026
1027 Handle<Object> capture_obj;
1028 ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
1029 Object::GetProperty(isolate_, groups_obj_, name),
1030 String);
1031 if (capture_obj->IsUndefined(isolate_)) {
1032 *state = UNMATCHED;
1033 return isolate_->factory()->empty_string();
1034 } else {
1035 *state = MATCHED;
1036 return Object::ToString(isolate_, capture_obj);
1037 }
1038 }
1039
1040 private:
1041 Isolate* isolate_;
1042 Handle<String> subject_;
1043 Handle<String> match_;
1044 const int match_position_;
1045 ZoneVector<Handle<Object>>* captures_;
1046
1047 bool has_named_captures_;
1048 Handle<JSReceiver> groups_obj_;
1049 };
1050
1051 // Create the groups object (see also the RegExp result creation in
1052 // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
ConstructNamedCaptureGroupsObject(Isolate * isolate,Handle<FixedArray> capture_map,const std::function<Object (int)> & f_get_capture)1053 Handle<JSObject> ConstructNamedCaptureGroupsObject(
1054 Isolate* isolate, Handle<FixedArray> capture_map,
1055 const std::function<Object(int)>& f_get_capture) {
1056 Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
1057
1058 const int named_capture_count = capture_map->length() >> 1;
1059 for (int i = 0; i < named_capture_count; i++) {
1060 const int name_ix = i * 2;
1061 const int index_ix = i * 2 + 1;
1062
1063 Handle<String> capture_name(String::cast(capture_map->get(name_ix)),
1064 isolate);
1065 const int capture_ix = Smi::ToInt(capture_map->get(index_ix));
1066 DCHECK_GE(capture_ix, 1); // Explicit groups start at index 1.
1067
1068 Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
1069 DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString());
1070
1071 JSObject::AddProperty(isolate, groups, capture_name, capture_value, NONE);
1072 }
1073
1074 return groups;
1075 }
1076
1077 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
1078 // separate last match info. See comment on that function.
1079 template <bool has_capture>
SearchRegExpMultiple(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<RegExpMatchInfo> last_match_array,Handle<JSArray> result_array)1080 static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
1081 Handle<JSRegExp> regexp,
1082 Handle<RegExpMatchInfo> last_match_array,
1083 Handle<JSArray> result_array) {
1084 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1085 DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
1086 DCHECK(subject->IsFlat());
1087
1088 // Force tier up to native code for global replaces. The global replace is
1089 // implemented differently for native code and bytecode execution, where the
1090 // native code expects an array to store all the matches, and the bytecode
1091 // matches one at a time, so it's easier to tier-up to native code from the
1092 // start.
1093 if (FLAG_regexp_tier_up && regexp->TypeTag() == JSRegExp::IRREGEXP) {
1094 regexp->MarkTierUpForNextExec();
1095 if (FLAG_trace_regexp_tier_up) {
1096 PrintF("Forcing tier-up of JSRegExp object %p in SearchRegExpMultiple\n",
1097 reinterpret_cast<void*>(regexp->ptr()));
1098 }
1099 }
1100
1101 int capture_count = regexp->CaptureCount();
1102 int subject_length = subject->length();
1103
1104 static const int kMinLengthToCache = 0x1000;
1105
1106 if (subject_length > kMinLengthToCache) {
1107 FixedArray last_match_cache;
1108 Object cached_answer = RegExpResultsCache::Lookup(
1109 isolate->heap(), *subject, regexp->data(), &last_match_cache,
1110 RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1111 if (cached_answer.IsFixedArray()) {
1112 int capture_registers = (capture_count + 1) * 2;
1113 int32_t* last_match = NewArray<int32_t>(capture_registers);
1114 for (int i = 0; i < capture_registers; i++) {
1115 last_match[i] = Smi::ToInt(last_match_cache.get(i));
1116 }
1117 Handle<FixedArray> cached_fixed_array =
1118 Handle<FixedArray>(FixedArray::cast(cached_answer), isolate);
1119 // The cache FixedArray is a COW-array and we need to return a copy.
1120 Handle<FixedArray> copied_fixed_array =
1121 isolate->factory()->CopyFixedArrayWithMap(
1122 cached_fixed_array, isolate->factory()->fixed_array_map());
1123 JSArray::SetContent(result_array, copied_fixed_array);
1124 RegExp::SetLastMatchInfo(isolate, last_match_array, subject,
1125 capture_count, last_match);
1126 DeleteArray(last_match);
1127 return *result_array;
1128 }
1129 }
1130
1131 RegExpGlobalCache global_cache(regexp, subject, isolate);
1132 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1133
1134 // Ensured in Runtime_RegExpExecMultiple.
1135 DCHECK(result_array->HasObjectElements());
1136 Handle<FixedArray> result_elements(FixedArray::cast(result_array->elements()),
1137 isolate);
1138 if (result_elements->length() < 16) {
1139 result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
1140 }
1141
1142 FixedArrayBuilder builder(result_elements);
1143
1144 // Position to search from.
1145 int match_start = -1;
1146 int match_end = 0;
1147 bool first = true;
1148
1149 // Two smis before and after the match, for very long strings.
1150 static const int kMaxBuilderEntriesPerRegExpMatch = 5;
1151
1152 while (true) {
1153 int32_t* current_match = global_cache.FetchNext();
1154 if (current_match == nullptr) break;
1155 match_start = current_match[0];
1156 builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch);
1157 if (match_end < match_start) {
1158 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1159 match_start);
1160 }
1161 match_end = current_match[1];
1162 {
1163 // Avoid accumulating new handles inside loop.
1164 HandleScope temp_scope(isolate);
1165 Handle<String> match;
1166 if (!first) {
1167 match = isolate->factory()->NewProperSubString(subject, match_start,
1168 match_end);
1169 } else {
1170 match =
1171 isolate->factory()->NewSubString(subject, match_start, match_end);
1172 first = false;
1173 }
1174
1175 if (has_capture) {
1176 // Arguments array to replace function is match, captures, index and
1177 // subject, i.e., 3 + capture count in total. If the RegExp contains
1178 // named captures, they are also passed as the last argument.
1179
1180 Handle<Object> maybe_capture_map(regexp->CaptureNameMap(), isolate);
1181 const bool has_named_captures = maybe_capture_map->IsFixedArray();
1182
1183 const int argc =
1184 has_named_captures ? 4 + capture_count : 3 + capture_count;
1185
1186 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
1187 int cursor = 0;
1188
1189 elements->set(cursor++, *match);
1190 for (int i = 1; i <= capture_count; i++) {
1191 int start = current_match[i * 2];
1192 if (start >= 0) {
1193 int end = current_match[i * 2 + 1];
1194 DCHECK(start <= end);
1195 Handle<String> substring =
1196 isolate->factory()->NewSubString(subject, start, end);
1197 elements->set(cursor++, *substring);
1198 } else {
1199 DCHECK_GT(0, current_match[i * 2 + 1]);
1200 elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value());
1201 }
1202 }
1203
1204 elements->set(cursor++, Smi::FromInt(match_start));
1205 elements->set(cursor++, *subject);
1206
1207 if (has_named_captures) {
1208 Handle<FixedArray> capture_map =
1209 Handle<FixedArray>::cast(maybe_capture_map);
1210 Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1211 isolate, capture_map, [=](int ix) { return elements->get(ix); });
1212 elements->set(cursor++, *groups);
1213 }
1214
1215 DCHECK_EQ(cursor, argc);
1216 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1217 } else {
1218 builder.Add(*match);
1219 }
1220 }
1221 }
1222
1223 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1224
1225 if (match_start >= 0) {
1226 // Finished matching, with at least one match.
1227 if (match_end < subject_length) {
1228 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1229 subject_length);
1230 }
1231
1232 RegExp::SetLastMatchInfo(isolate, last_match_array, subject, capture_count,
1233 global_cache.LastSuccessfulMatch());
1234
1235 if (subject_length > kMinLengthToCache) {
1236 // Store the last successful match into the array for caching.
1237 // TODO(yangguo): do not expose last match to JS and simplify caching.
1238 int capture_registers = (capture_count + 1) * 2;
1239 Handle<FixedArray> last_match_cache =
1240 isolate->factory()->NewFixedArray(capture_registers);
1241 int32_t* last_match = global_cache.LastSuccessfulMatch();
1242 for (int i = 0; i < capture_registers; i++) {
1243 last_match_cache->set(i, Smi::FromInt(last_match[i]));
1244 }
1245 Handle<FixedArray> result_fixed_array =
1246 FixedArray::ShrinkOrEmpty(isolate, builder.array(), builder.length());
1247 // Cache the result and copy the FixedArray into a COW array.
1248 Handle<FixedArray> copied_fixed_array =
1249 isolate->factory()->CopyFixedArrayWithMap(
1250 result_fixed_array, isolate->factory()->fixed_array_map());
1251 RegExpResultsCache::Enter(
1252 isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1253 last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1254 }
1255 return *builder.ToJSArray(result_array);
1256 } else {
1257 return ReadOnlyRoots(isolate).null_value(); // No matches at all.
1258 }
1259 }
1260
1261 // Legacy implementation of RegExp.prototype[Symbol.replace] which
1262 // doesn't properly call the underlying exec method.
RegExpReplace(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> string,Handle<String> replace)1263 V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace(
1264 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string,
1265 Handle<String> replace) {
1266 // Functional fast-paths are dispatched directly by replace builtin.
1267 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1268
1269 Factory* factory = isolate->factory();
1270
1271 const int flags = regexp->GetFlags();
1272 const bool global = (flags & JSRegExp::kGlobal) != 0;
1273 const bool sticky = (flags & JSRegExp::kSticky) != 0;
1274
1275 replace = String::Flatten(isolate, replace);
1276
1277 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1278
1279 if (!global) {
1280 // Non-global regexp search, string replace.
1281
1282 uint32_t last_index = 0;
1283 if (sticky) {
1284 Handle<Object> last_index_obj(regexp->last_index(), isolate);
1285 ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1286 Object::ToLength(isolate, last_index_obj),
1287 String);
1288 last_index = PositiveNumberToUint32(*last_index_obj);
1289 }
1290
1291 Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1292 isolate);
1293
1294 // A lastIndex exceeding the string length always returns null (signalling
1295 // failure) in RegExpBuiltinExec, thus we can skip the call.
1296 if (last_index <= static_cast<uint32_t>(string->length())) {
1297 ASSIGN_RETURN_ON_EXCEPTION(
1298 isolate, match_indices_obj,
1299 RegExp::Exec(isolate, regexp, string, last_index, last_match_info),
1300 String);
1301 }
1302
1303 if (match_indices_obj->IsNull(isolate)) {
1304 if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER);
1305 return string;
1306 }
1307
1308 auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1309
1310 const int start_index = match_indices->Capture(0);
1311 const int end_index = match_indices->Capture(1);
1312
1313 if (sticky) {
1314 regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER);
1315 }
1316
1317 IncrementalStringBuilder builder(isolate);
1318 builder.AppendString(factory->NewSubString(string, 0, start_index));
1319
1320 if (replace->length() > 0) {
1321 MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
1322 Handle<String> replacement;
1323 ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1324 String::GetSubstitution(isolate, &m, replace),
1325 String);
1326 builder.AppendString(replacement);
1327 }
1328
1329 builder.AppendString(
1330 factory->NewSubString(string, end_index, string->length()));
1331 return builder.Finish();
1332 } else {
1333 // Global regexp search, string replace.
1334 DCHECK(global);
1335 RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1336 String);
1337
1338 // Force tier up to native code for global replaces. The global replace is
1339 // implemented differently for native code and bytecode execution, where the
1340 // native code expects an array to store all the matches, and the bytecode
1341 // matches one at a time, so it's easier to tier-up to native code from the
1342 // start.
1343 if (FLAG_regexp_tier_up && regexp->TypeTag() == JSRegExp::IRREGEXP) {
1344 regexp->MarkTierUpForNextExec();
1345 if (FLAG_trace_regexp_tier_up) {
1346 PrintF("Forcing tier-up of JSRegExp object %p in RegExpReplace\n",
1347 reinterpret_cast<void*>(regexp->ptr()));
1348 }
1349 }
1350
1351 if (replace->length() == 0) {
1352 if (string->IsOneByteRepresentation()) {
1353 Object result =
1354 StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1355 isolate, string, regexp, last_match_info);
1356 return handle(String::cast(result), isolate);
1357 } else {
1358 Object result =
1359 StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1360 isolate, string, regexp, last_match_info);
1361 return handle(String::cast(result), isolate);
1362 }
1363 }
1364
1365 Object result = StringReplaceGlobalRegExpWithString(
1366 isolate, string, regexp, replace, last_match_info);
1367 if (result.IsString()) {
1368 return handle(String::cast(result), isolate);
1369 } else {
1370 return MaybeHandle<String>();
1371 }
1372 }
1373
1374 UNREACHABLE();
1375 }
1376
1377 } // namespace
1378
1379 // This is only called for StringReplaceGlobalRegExpWithFunction.
RUNTIME_FUNCTION(Runtime_RegExpExecMultiple)1380 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1381 HandleScope handles(isolate);
1382 DCHECK_EQ(4, args.length());
1383
1384 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1385 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1386 CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2);
1387 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1388
1389 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1390 CHECK(result_array->HasObjectElements());
1391
1392 subject = String::Flatten(isolate, subject);
1393 CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
1394
1395 Object result;
1396 if (regexp->CaptureCount() == 0) {
1397 result = SearchRegExpMultiple<false>(isolate, subject, regexp,
1398 last_match_info, result_array);
1399 } else {
1400 result = SearchRegExpMultiple<true>(isolate, subject, regexp,
1401 last_match_info, result_array);
1402 }
1403 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1404 return result;
1405 }
1406
RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction)1407 RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1408 HandleScope scope(isolate);
1409 DCHECK_EQ(3, args.length());
1410 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
1411 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
1412 CONVERT_ARG_HANDLE_CHECKED(JSReceiver, replace_obj, 2);
1413
1414 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1415 DCHECK(replace_obj->map().is_callable());
1416
1417 Factory* factory = isolate->factory();
1418 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1419
1420 const int flags = regexp->GetFlags();
1421 DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1422
1423 // TODO(jgruber): This should be an easy port to CSA with massive payback.
1424
1425 const bool sticky = (flags & JSRegExp::kSticky) != 0;
1426 uint32_t last_index = 0;
1427 if (sticky) {
1428 Handle<Object> last_index_obj(regexp->last_index(), isolate);
1429 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1430 isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1431 last_index = PositiveNumberToUint32(*last_index_obj);
1432 }
1433
1434 Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1435 isolate);
1436
1437 // A lastIndex exceeding the string length always returns null (signalling
1438 // failure) in RegExpBuiltinExec, thus we can skip the call.
1439 if (last_index <= static_cast<uint32_t>(subject->length())) {
1440 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1441 isolate, match_indices_obj,
1442 RegExp::Exec(isolate, regexp, subject, last_index, last_match_info));
1443 }
1444
1445 if (match_indices_obj->IsNull(isolate)) {
1446 if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER);
1447 return *subject;
1448 }
1449
1450 Handle<RegExpMatchInfo> match_indices =
1451 Handle<RegExpMatchInfo>::cast(match_indices_obj);
1452
1453 const int index = match_indices->Capture(0);
1454 const int end_of_match = match_indices->Capture(1);
1455
1456 if (sticky) {
1457 regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER);
1458 }
1459
1460 IncrementalStringBuilder builder(isolate);
1461 builder.AppendString(factory->NewSubString(subject, 0, index));
1462
1463 // Compute the parameter list consisting of the match, captures, index,
1464 // and subject for the replace function invocation. If the RegExp contains
1465 // named captures, they are also passed as the last argument.
1466
1467 // The number of captures plus one for the match.
1468 const int m = match_indices->NumberOfCaptureRegisters() / 2;
1469
1470 bool has_named_captures = false;
1471 Handle<FixedArray> capture_map;
1472 if (m > 1) {
1473 // The existence of capture groups implies IRREGEXP kind.
1474 DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
1475
1476 Object maybe_capture_map = regexp->CaptureNameMap();
1477 if (maybe_capture_map.IsFixedArray()) {
1478 has_named_captures = true;
1479 capture_map = handle(FixedArray::cast(maybe_capture_map), isolate);
1480 }
1481 }
1482
1483 const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures);
1484 if (argc == static_cast<uint32_t>(-1)) {
1485 THROW_NEW_ERROR_RETURN_FAILURE(
1486 isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1487 }
1488 ScopedVector<Handle<Object>> argv(argc);
1489
1490 int cursor = 0;
1491 for (int j = 0; j < m; j++) {
1492 bool ok;
1493 Handle<String> capture =
1494 RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1495 if (ok) {
1496 argv[cursor++] = capture;
1497 } else {
1498 argv[cursor++] = factory->undefined_value();
1499 }
1500 }
1501
1502 argv[cursor++] = handle(Smi::FromInt(index), isolate);
1503 argv[cursor++] = subject;
1504
1505 if (has_named_captures) {
1506 argv[cursor++] = ConstructNamedCaptureGroupsObject(
1507 isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
1508 }
1509
1510 DCHECK_EQ(cursor, argc);
1511
1512 Handle<Object> replacement_obj;
1513 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1514 isolate, replacement_obj,
1515 Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1516 argv.begin()));
1517
1518 Handle<String> replacement;
1519 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1520 isolate, replacement, Object::ToString(isolate, replacement_obj));
1521
1522 builder.AppendString(replacement);
1523 builder.AppendString(
1524 factory->NewSubString(subject, end_of_match, subject->length()));
1525
1526 RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1527 }
1528
1529 namespace {
1530
ToUint32(Isolate * isolate,Handle<Object> object,uint32_t * out)1531 V8_WARN_UNUSED_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1532 Handle<Object> object,
1533 uint32_t* out) {
1534 if (object->IsUndefined(isolate)) {
1535 *out = kMaxUInt32;
1536 return object;
1537 }
1538
1539 Handle<Object> number;
1540 ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(isolate, object),
1541 Object);
1542 *out = NumberToUint32(*number);
1543 return object;
1544 }
1545
NewJSArrayWithElements(Isolate * isolate,Handle<FixedArray> elems,int num_elems)1546 Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1547 Handle<FixedArray> elems,
1548 int num_elems) {
1549 return isolate->factory()->NewJSArrayWithElements(
1550 FixedArray::ShrinkOrEmpty(isolate, elems, num_elems));
1551 }
1552
1553 } // namespace
1554
1555 // Slow path for:
1556 // ES#sec-regexp.prototype-@@replace
1557 // RegExp.prototype [ @@split ] ( string, limit )
RUNTIME_FUNCTION(Runtime_RegExpSplit)1558 RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1559 HandleScope scope(isolate);
1560 DCHECK_EQ(3, args.length());
1561
1562 CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1563 CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1564 CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2);
1565
1566 Factory* factory = isolate->factory();
1567
1568 Handle<JSFunction> regexp_fun = isolate->regexp_function();
1569 Handle<Object> ctor;
1570 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1571 isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun));
1572
1573 Handle<Object> flags_obj;
1574 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1575 isolate, flags_obj,
1576 JSObject::GetProperty(isolate, recv, factory->flags_string()));
1577
1578 Handle<String> flags;
1579 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1580 Object::ToString(isolate, flags_obj));
1581
1582 Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1583 const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1584
1585 Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1586 const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1587
1588 Handle<String> new_flags = flags;
1589 if (!sticky) {
1590 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1591 factory->NewConsString(flags, y_str));
1592 }
1593
1594 Handle<JSReceiver> splitter;
1595 {
1596 const int argc = 2;
1597
1598 ScopedVector<Handle<Object>> argv(argc);
1599 argv[0] = recv;
1600 argv[1] = new_flags;
1601
1602 Handle<Object> splitter_obj;
1603 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1604 isolate, splitter_obj,
1605 Execution::New(isolate, ctor, argc, argv.begin()));
1606
1607 splitter = Handle<JSReceiver>::cast(splitter_obj);
1608 }
1609
1610 uint32_t limit;
1611 RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1612
1613 const uint32_t length = string->length();
1614
1615 if (limit == 0) return *factory->NewJSArray(0);
1616
1617 if (length == 0) {
1618 Handle<Object> result;
1619 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1620 isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1621 factory->undefined_value()));
1622
1623 if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1624
1625 Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
1626 elems->set(0, *string);
1627 return *factory->NewJSArrayWithElements(elems);
1628 }
1629
1630 static const int kInitialArraySize = 8;
1631 Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1632 uint32_t num_elems = 0;
1633
1634 uint32_t string_index = 0;
1635 uint32_t prev_string_index = 0;
1636 while (string_index < length) {
1637 RETURN_FAILURE_ON_EXCEPTION(
1638 isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1639
1640 Handle<Object> result;
1641 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1642 isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1643 factory->undefined_value()));
1644
1645 if (result->IsNull(isolate)) {
1646 string_index = static_cast<uint32_t>(
1647 RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1648 continue;
1649 }
1650
1651 Handle<Object> last_index_obj;
1652 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1653 isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1654
1655 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1656 isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1657
1658 const uint32_t end =
1659 std::min(PositiveNumberToUint32(*last_index_obj), length);
1660 if (end == prev_string_index) {
1661 string_index = static_cast<uint32_t>(
1662 RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1663 continue;
1664 }
1665
1666 {
1667 Handle<String> substr =
1668 factory->NewSubString(string, prev_string_index, string_index);
1669 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1670 if (num_elems == limit) {
1671 return *NewJSArrayWithElements(isolate, elems, num_elems);
1672 }
1673 }
1674
1675 prev_string_index = end;
1676
1677 Handle<Object> num_captures_obj;
1678 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1679 isolate, num_captures_obj,
1680 Object::GetProperty(isolate, result,
1681 isolate->factory()->length_string()));
1682
1683 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1684 isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1685 const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj);
1686
1687 for (uint32_t i = 1; i < num_captures; i++) {
1688 Handle<Object> capture;
1689 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1690 isolate, capture, Object::GetElement(isolate, result, i));
1691 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture);
1692 if (num_elems == limit) {
1693 return *NewJSArrayWithElements(isolate, elems, num_elems);
1694 }
1695 }
1696
1697 string_index = prev_string_index;
1698 }
1699
1700 {
1701 Handle<String> substr =
1702 factory->NewSubString(string, prev_string_index, length);
1703 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1704 }
1705
1706 return *NewJSArrayWithElements(isolate, elems, num_elems);
1707 }
1708
1709 // Slow path for:
1710 // ES#sec-regexp.prototype-@@replace
1711 // RegExp.prototype [ @@replace ] ( string, replaceValue )
RUNTIME_FUNCTION(Runtime_RegExpReplaceRT)1712 RUNTIME_FUNCTION(Runtime_RegExpReplaceRT) {
1713 HandleScope scope(isolate);
1714 DCHECK_EQ(3, args.length());
1715
1716 CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1717 CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1718 Handle<Object> replace_obj = args.at(2);
1719
1720 Factory* factory = isolate->factory();
1721
1722 string = String::Flatten(isolate, string);
1723
1724 const bool functional_replace = replace_obj->IsCallable();
1725
1726 Handle<String> replace;
1727 if (!functional_replace) {
1728 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1729 Object::ToString(isolate, replace_obj));
1730 }
1731
1732 // Fast-path for unmodified JSRegExps (and non-functional replace).
1733 if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1734 // We should never get here with functional replace because unmodified
1735 // regexp and functional replace should be fully handled in CSA code.
1736 CHECK(!functional_replace);
1737 Handle<Object> result;
1738 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1739 isolate, result,
1740 RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string, replace));
1741 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, recv));
1742 return *result;
1743 }
1744
1745 const uint32_t length = string->length();
1746
1747 Handle<Object> global_obj;
1748 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1749 isolate, global_obj,
1750 JSReceiver::GetProperty(isolate, recv, factory->global_string()));
1751 const bool global = global_obj->BooleanValue(isolate);
1752
1753 bool unicode = false;
1754 if (global) {
1755 Handle<Object> unicode_obj;
1756 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1757 isolate, unicode_obj,
1758 JSReceiver::GetProperty(isolate, recv, factory->unicode_string()));
1759 unicode = unicode_obj->BooleanValue(isolate);
1760
1761 RETURN_FAILURE_ON_EXCEPTION(isolate,
1762 RegExpUtils::SetLastIndex(isolate, recv, 0));
1763 }
1764
1765 Zone zone(isolate->allocator(), ZONE_NAME);
1766 ZoneVector<Handle<Object>> results(&zone);
1767
1768 while (true) {
1769 Handle<Object> result;
1770 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1771 isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1772 factory->undefined_value()));
1773
1774 if (result->IsNull(isolate)) break;
1775
1776 results.push_back(result);
1777 if (!global) break;
1778
1779 Handle<Object> match_obj;
1780 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1781 Object::GetElement(isolate, result, 0));
1782
1783 Handle<String> match;
1784 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1785 Object::ToString(isolate, match_obj));
1786
1787 if (match->length() == 0) {
1788 RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1789 isolate, recv, string, unicode));
1790 }
1791 }
1792
1793 // TODO(jgruber): Look into ReplacementStringBuilder instead.
1794 IncrementalStringBuilder builder(isolate);
1795 uint32_t next_source_position = 0;
1796
1797 for (const auto& result : results) {
1798 HandleScope handle_scope(isolate);
1799 Handle<Object> captures_length_obj;
1800 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1801 isolate, captures_length_obj,
1802 Object::GetProperty(isolate, result, factory->length_string()));
1803
1804 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1805 isolate, captures_length_obj,
1806 Object::ToLength(isolate, captures_length_obj));
1807 const uint32_t captures_length =
1808 PositiveNumberToUint32(*captures_length_obj);
1809
1810 Handle<Object> match_obj;
1811 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1812 Object::GetElement(isolate, result, 0));
1813
1814 Handle<String> match;
1815 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1816 Object::ToString(isolate, match_obj));
1817
1818 const int match_length = match->length();
1819
1820 Handle<Object> position_obj;
1821 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1822 isolate, position_obj,
1823 Object::GetProperty(isolate, result, factory->index_string()));
1824
1825 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1826 isolate, position_obj, Object::ToInteger(isolate, position_obj));
1827 const uint32_t position =
1828 std::min(PositiveNumberToUint32(*position_obj), length);
1829
1830 // Do not reserve capacity since captures_length is user-controlled.
1831 ZoneVector<Handle<Object>> captures(&zone);
1832
1833 for (uint32_t n = 0; n < captures_length; n++) {
1834 Handle<Object> capture;
1835 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1836 isolate, capture, Object::GetElement(isolate, result, n));
1837
1838 if (!capture->IsUndefined(isolate)) {
1839 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1840 Object::ToString(isolate, capture));
1841 }
1842 captures.push_back(capture);
1843 }
1844
1845 Handle<Object> groups_obj = isolate->factory()->undefined_value();
1846 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1847 isolate, groups_obj,
1848 Object::GetProperty(isolate, result, factory->groups_string()));
1849
1850 const bool has_named_captures = !groups_obj->IsUndefined(isolate);
1851
1852 Handle<String> replacement;
1853 if (functional_replace) {
1854 const uint32_t argc =
1855 GetArgcForReplaceCallable(captures_length, has_named_captures);
1856 if (argc == static_cast<uint32_t>(-1)) {
1857 THROW_NEW_ERROR_RETURN_FAILURE(
1858 isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1859 }
1860
1861 ScopedVector<Handle<Object>> argv(argc);
1862
1863 int cursor = 0;
1864 for (uint32_t j = 0; j < captures_length; j++) {
1865 argv[cursor++] = captures[j];
1866 }
1867
1868 argv[cursor++] = handle(Smi::FromInt(position), isolate);
1869 argv[cursor++] = string;
1870 if (has_named_captures) argv[cursor++] = groups_obj;
1871
1872 DCHECK_EQ(cursor, argc);
1873
1874 Handle<Object> replacement_obj;
1875 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1876 isolate, replacement_obj,
1877 Execution::Call(isolate, replace_obj, factory->undefined_value(),
1878 argc, argv.begin()));
1879
1880 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1881 isolate, replacement, Object::ToString(isolate, replacement_obj));
1882 } else {
1883 DCHECK(!functional_replace);
1884 if (!groups_obj->IsUndefined(isolate)) {
1885 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1886 isolate, groups_obj, Object::ToObject(isolate, groups_obj));
1887 }
1888 VectorBackedMatch m(isolate, string, match, position, &captures,
1889 groups_obj);
1890 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1891 isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1892 }
1893
1894 if (position >= next_source_position) {
1895 builder.AppendString(
1896 factory->NewSubString(string, next_source_position, position));
1897 builder.AppendString(replacement);
1898
1899 next_source_position = position + match_length;
1900 }
1901 }
1902
1903 if (next_source_position < length) {
1904 builder.AppendString(
1905 factory->NewSubString(string, next_source_position, length));
1906 }
1907
1908 RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1909 }
1910
RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile)1911 RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
1912 HandleScope scope(isolate);
1913 DCHECK_EQ(3, args.length());
1914 // TODO(pwong): To follow the spec more closely and simplify calling code,
1915 // this could handle the canonicalization of pattern and flags. See
1916 // https://tc39.github.io/ecma262/#sec-regexpinitialize
1917 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1918 CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
1919 CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
1920
1921 RETURN_FAILURE_ON_EXCEPTION(isolate,
1922 JSRegExp::Initialize(regexp, source, flags));
1923
1924 return *regexp;
1925 }
1926
RUNTIME_FUNCTION(Runtime_IsRegExp)1927 RUNTIME_FUNCTION(Runtime_IsRegExp) {
1928 SealHandleScope shs(isolate);
1929 DCHECK_EQ(1, args.length());
1930 CONVERT_ARG_CHECKED(Object, obj, 0);
1931 return isolate->heap()->ToBoolean(obj.IsJSRegExp());
1932 }
1933
1934 } // namespace internal
1935 } // namespace v8
1936