1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <functional>
6 
7 #include "src/common/message-template.h"
8 #include "src/execution/arguments-inl.h"
9 #include "src/execution/isolate-inl.h"
10 #include "src/heap/heap-inl.h"  // For ToBoolean. TODO(jkummerow): Drop.
11 #include "src/logging/counters.h"
12 #include "src/numbers/conversions-inl.h"
13 #include "src/objects/js-array-inl.h"
14 #include "src/objects/js-regexp-inl.h"
15 #include "src/regexp/regexp-utils.h"
16 #include "src/regexp/regexp.h"
17 #include "src/runtime/runtime-utils.h"
18 #include "src/strings/string-builder-inl.h"
19 #include "src/strings/string-search.h"
20 #include "src/zone/zone-chunk-list.h"
21 
22 namespace v8 {
23 namespace internal {
24 
25 namespace {
26 
27 // Returns -1 for failure.
GetArgcForReplaceCallable(uint32_t num_captures,bool has_named_captures)28 uint32_t GetArgcForReplaceCallable(uint32_t num_captures,
29                                    bool has_named_captures) {
30   const uint32_t kAdditionalArgsWithoutNamedCaptures = 2;
31   const uint32_t kAdditionalArgsWithNamedCaptures = 3;
32   if (num_captures > Code::kMaxArguments) return -1;
33   uint32_t argc = has_named_captures
34                       ? num_captures + kAdditionalArgsWithNamedCaptures
35                       : num_captures + kAdditionalArgsWithoutNamedCaptures;
36   STATIC_ASSERT(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() -
37                                           kAdditionalArgsWithNamedCaptures);
38   return (argc > Code::kMaxArguments) ? -1 : argc;
39 }
40 
41 // Looks up the capture of the given name. Returns the (1-based) numbered
42 // capture index or -1 on failure.
LookupNamedCapture(const std::function<bool (String)> & name_matches,FixedArray capture_name_map)43 int LookupNamedCapture(const std::function<bool(String)>& name_matches,
44                        FixedArray capture_name_map) {
45   // TODO(jgruber): Sort capture_name_map and do binary search via
46   // internalized strings.
47 
48   int maybe_capture_index = -1;
49   const int named_capture_count = capture_name_map.length() >> 1;
50   for (int j = 0; j < named_capture_count; j++) {
51     // The format of {capture_name_map} is documented at
52     // JSRegExp::kIrregexpCaptureNameMapIndex.
53     const int name_ix = j * 2;
54     const int index_ix = j * 2 + 1;
55 
56     String capture_name = String::cast(capture_name_map.get(name_ix));
57     if (!name_matches(capture_name)) continue;
58 
59     maybe_capture_index = Smi::ToInt(capture_name_map.get(index_ix));
60     break;
61   }
62 
63   return maybe_capture_index;
64 }
65 
66 }  // namespace
67 
68 class CompiledReplacement {
69  public:
CompiledReplacement(Zone * zone)70   explicit CompiledReplacement(Zone* zone)
71       : parts_(zone), replacement_substrings_(zone) {}
72 
73   // Return whether the replacement is simple.
74   bool Compile(Isolate* isolate, Handle<JSRegExp> regexp,
75                Handle<String> replacement, int capture_count,
76                int subject_length);
77 
78   // Use Apply only if Compile returned false.
79   void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
80              int32_t* match);
81 
82   // Number of distinct parts of the replacement pattern.
parts()83   int parts() { return static_cast<int>(parts_.size()); }
84 
85  private:
86   enum PartType {
87     SUBJECT_PREFIX = 1,
88     SUBJECT_SUFFIX,
89     SUBJECT_CAPTURE,
90     REPLACEMENT_SUBSTRING,
91     REPLACEMENT_STRING,
92     EMPTY_REPLACEMENT,
93     NUMBER_OF_PART_TYPES
94   };
95 
96   struct ReplacementPart {
SubjectMatchv8::internal::CompiledReplacement::ReplacementPart97     static inline ReplacementPart SubjectMatch() {
98       return ReplacementPart(SUBJECT_CAPTURE, 0);
99     }
SubjectCapturev8::internal::CompiledReplacement::ReplacementPart100     static inline ReplacementPart SubjectCapture(int capture_index) {
101       return ReplacementPart(SUBJECT_CAPTURE, capture_index);
102     }
SubjectPrefixv8::internal::CompiledReplacement::ReplacementPart103     static inline ReplacementPart SubjectPrefix() {
104       return ReplacementPart(SUBJECT_PREFIX, 0);
105     }
SubjectSuffixv8::internal::CompiledReplacement::ReplacementPart106     static inline ReplacementPart SubjectSuffix(int subject_length) {
107       return ReplacementPart(SUBJECT_SUFFIX, subject_length);
108     }
ReplacementStringv8::internal::CompiledReplacement::ReplacementPart109     static inline ReplacementPart ReplacementString() {
110       return ReplacementPart(REPLACEMENT_STRING, 0);
111     }
EmptyReplacementv8::internal::CompiledReplacement::ReplacementPart112     static inline ReplacementPart EmptyReplacement() {
113       return ReplacementPart(EMPTY_REPLACEMENT, 0);
114     }
ReplacementSubStringv8::internal::CompiledReplacement::ReplacementPart115     static inline ReplacementPart ReplacementSubString(int from, int to) {
116       DCHECK_LE(0, from);
117       DCHECK_GT(to, from);
118       return ReplacementPart(-from, to);
119     }
120 
121     // If tag <= 0 then it is the negation of a start index of a substring of
122     // the replacement pattern, otherwise it's a value from PartType.
ReplacementPartv8::internal::CompiledReplacement::ReplacementPart123     ReplacementPart(int tag, int data) : tag(tag), data(data) {
124       // Must be non-positive or a PartType value.
125       DCHECK(tag < NUMBER_OF_PART_TYPES);
126     }
127     // Either a value of PartType or a non-positive number that is
128     // the negation of an index into the replacement string.
129     int tag;
130     // The data value's interpretation depends on the value of tag:
131     // tag == SUBJECT_PREFIX ||
132     // tag == SUBJECT_SUFFIX:  data is unused.
133     // tag == SUBJECT_CAPTURE: data is the number of the capture.
134     // tag == REPLACEMENT_SUBSTRING ||
135     // tag == REPLACEMENT_STRING:    data is index into array of substrings
136     //                               of the replacement string.
137     // tag == EMPTY_REPLACEMENT: data is unused.
138     // tag <= 0: Temporary representation of the substring of the replacement
139     //           string ranging over -tag .. data.
140     //           Is replaced by REPLACEMENT_{SUB,}STRING when we create the
141     //           substring objects.
142     int data;
143   };
144 
145   template <typename Char>
ParseReplacementPattern(ZoneChunkList<ReplacementPart> * parts,Vector<Char> characters,FixedArray capture_name_map,int capture_count,int subject_length)146   bool ParseReplacementPattern(ZoneChunkList<ReplacementPart>* parts,
147                                Vector<Char> characters,
148                                FixedArray capture_name_map, int capture_count,
149                                int subject_length) {
150     // Equivalent to String::GetSubstitution, except that this method converts
151     // the replacement string into an internal representation that avoids
152     // repeated parsing when used repeatedly.
153     int length = characters.length();
154     int last = 0;
155     for (int i = 0; i < length; i++) {
156       Char c = characters[i];
157       if (c == '$') {
158         int next_index = i + 1;
159         if (next_index == length) {  // No next character!
160           break;
161         }
162         Char c2 = characters[next_index];
163         switch (c2) {
164           case '$':
165             if (i > last) {
166               // There is a substring before. Include the first "$".
167               parts->push_back(
168                   ReplacementPart::ReplacementSubString(last, next_index));
169               last = next_index + 1;  // Continue after the second "$".
170             } else {
171               // Let the next substring start with the second "$".
172               last = next_index;
173             }
174             i = next_index;
175             break;
176           case '`':
177             if (i > last) {
178               parts->push_back(ReplacementPart::ReplacementSubString(last, i));
179             }
180             parts->push_back(ReplacementPart::SubjectPrefix());
181             i = next_index;
182             last = i + 1;
183             break;
184           case '\'':
185             if (i > last) {
186               parts->push_back(ReplacementPart::ReplacementSubString(last, i));
187             }
188             parts->push_back(ReplacementPart::SubjectSuffix(subject_length));
189             i = next_index;
190             last = i + 1;
191             break;
192           case '&':
193             if (i > last) {
194               parts->push_back(ReplacementPart::ReplacementSubString(last, i));
195             }
196             parts->push_back(ReplacementPart::SubjectMatch());
197             i = next_index;
198             last = i + 1;
199             break;
200           case '0':
201           case '1':
202           case '2':
203           case '3':
204           case '4':
205           case '5':
206           case '6':
207           case '7':
208           case '8':
209           case '9': {
210             int capture_ref = c2 - '0';
211             if (capture_ref > capture_count) {
212               i = next_index;
213               continue;
214             }
215             int second_digit_index = next_index + 1;
216             if (second_digit_index < length) {
217               // Peek ahead to see if we have two digits.
218               Char c3 = characters[second_digit_index];
219               if ('0' <= c3 && c3 <= '9') {  // Double digits.
220                 int double_digit_ref = capture_ref * 10 + c3 - '0';
221                 if (double_digit_ref <= capture_count) {
222                   next_index = second_digit_index;
223                   capture_ref = double_digit_ref;
224                 }
225               }
226             }
227             if (capture_ref > 0) {
228               if (i > last) {
229                 parts->push_back(
230                     ReplacementPart::ReplacementSubString(last, i));
231               }
232               DCHECK(capture_ref <= capture_count);
233               parts->push_back(ReplacementPart::SubjectCapture(capture_ref));
234               last = next_index + 1;
235             }
236             i = next_index;
237             break;
238           }
239           case '<': {
240             if (capture_name_map.is_null()) {
241               i = next_index;
242               break;
243             }
244 
245             // Scan until the next '>', and let the enclosed substring be the
246             // groupName.
247 
248             const int name_start_index = next_index + 1;
249             int closing_bracket_index = -1;
250             for (int j = name_start_index; j < length; j++) {
251               if (characters[j] == '>') {
252                 closing_bracket_index = j;
253                 break;
254               }
255             }
256 
257             // If no closing bracket is found, '$<' is treated as a string
258             // literal.
259             if (closing_bracket_index == -1) {
260               i = next_index;
261               break;
262             }
263 
264             Vector<Char> requested_name =
265                 characters.SubVector(name_start_index, closing_bracket_index);
266 
267             // Let capture be ? Get(namedCaptures, groupName).
268 
269             const int capture_index = LookupNamedCapture(
270                 [=](String capture_name) {
271                   return capture_name.IsEqualTo(requested_name);
272                 },
273                 capture_name_map);
274 
275             // If capture is undefined or does not exist, replace the text
276             // through the following '>' with the empty string.
277             // Otherwise, replace the text through the following '>' with
278             // ? ToString(capture).
279 
280             DCHECK(capture_index == -1 ||
281                    (1 <= capture_index && capture_index <= capture_count));
282 
283             if (i > last) {
284               parts->push_back(ReplacementPart::ReplacementSubString(last, i));
285             }
286             parts->push_back(
287                 (capture_index == -1)
288                     ? ReplacementPart::EmptyReplacement()
289                     : ReplacementPart::SubjectCapture(capture_index));
290             last = closing_bracket_index + 1;
291             i = closing_bracket_index;
292             break;
293           }
294           default:
295             i = next_index;
296             break;
297         }
298       }
299     }
300     if (length > last) {
301       if (last == 0) {
302         // Replacement is simple.  Do not use Apply to do the replacement.
303         return true;
304       } else {
305         parts->push_back(ReplacementPart::ReplacementSubString(last, length));
306       }
307     }
308     return false;
309   }
310 
311   ZoneChunkList<ReplacementPart> parts_;
312   ZoneVector<Handle<String>> replacement_substrings_;
313 };
314 
Compile(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> replacement,int capture_count,int subject_length)315 bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp,
316                                   Handle<String> replacement, int capture_count,
317                                   int subject_length) {
318   {
319     DisallowHeapAllocation no_gc;
320     String::FlatContent content = replacement->GetFlatContent(no_gc);
321     DCHECK(content.IsFlat());
322 
323     FixedArray capture_name_map;
324     if (capture_count > 0) {
325       DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
326       Object maybe_capture_name_map = regexp->CaptureNameMap();
327       if (maybe_capture_name_map.IsFixedArray()) {
328         capture_name_map = FixedArray::cast(maybe_capture_name_map);
329       }
330     }
331 
332     bool simple;
333     if (content.IsOneByte()) {
334       simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
335                                        capture_name_map, capture_count,
336                                        subject_length);
337     } else {
338       DCHECK(content.IsTwoByte());
339       simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
340                                        capture_name_map, capture_count,
341                                        subject_length);
342     }
343     if (simple) return true;
344   }
345 
346   // Find substrings of replacement string and create them as String objects.
347   int substring_index = 0;
348   for (ReplacementPart& part : parts_) {
349     int tag = part.tag;
350     if (tag <= 0) {  // A replacement string slice.
351       int from = -tag;
352       int to = part.data;
353       replacement_substrings_.push_back(
354           isolate->factory()->NewSubString(replacement, from, to));
355       part.tag = REPLACEMENT_SUBSTRING;
356       part.data = substring_index;
357       substring_index++;
358     } else if (tag == REPLACEMENT_STRING) {
359       replacement_substrings_.push_back(replacement);
360       part.data = substring_index;
361       substring_index++;
362     }
363   }
364   return false;
365 }
366 
367 
Apply(ReplacementStringBuilder * builder,int match_from,int match_to,int32_t * match)368 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
369                                 int match_from, int match_to, int32_t* match) {
370   DCHECK_LT(0, parts_.size());
371   for (ReplacementPart& part : parts_) {
372     switch (part.tag) {
373       case SUBJECT_PREFIX:
374         if (match_from > 0) builder->AddSubjectSlice(0, match_from);
375         break;
376       case SUBJECT_SUFFIX: {
377         int subject_length = part.data;
378         if (match_to < subject_length) {
379           builder->AddSubjectSlice(match_to, subject_length);
380         }
381         break;
382       }
383       case SUBJECT_CAPTURE: {
384         int capture = part.data;
385         int from = match[capture * 2];
386         int to = match[capture * 2 + 1];
387         if (from >= 0 && to > from) {
388           builder->AddSubjectSlice(from, to);
389         }
390         break;
391       }
392       case REPLACEMENT_SUBSTRING:
393       case REPLACEMENT_STRING:
394         builder->AddString(replacement_substrings_[part.data]);
395         break;
396       case EMPTY_REPLACEMENT:
397         break;
398       default:
399         UNREACHABLE();
400     }
401   }
402 }
403 
FindOneByteStringIndices(Vector<const uint8_t> subject,uint8_t pattern,std::vector<int> * indices,unsigned int limit)404 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
405                               std::vector<int>* indices, unsigned int limit) {
406   DCHECK_LT(0, limit);
407   // Collect indices of pattern in subject using memchr.
408   // Stop after finding at most limit values.
409   const uint8_t* subject_start = subject.begin();
410   const uint8_t* subject_end = subject_start + subject.length();
411   const uint8_t* pos = subject_start;
412   while (limit > 0) {
413     pos = reinterpret_cast<const uint8_t*>(
414         memchr(pos, pattern, subject_end - pos));
415     if (pos == nullptr) return;
416     indices->push_back(static_cast<int>(pos - subject_start));
417     pos++;
418     limit--;
419   }
420 }
421 
FindTwoByteStringIndices(const Vector<const uc16> subject,uc16 pattern,std::vector<int> * indices,unsigned int limit)422 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
423                               std::vector<int>* indices, unsigned int limit) {
424   DCHECK_LT(0, limit);
425   const uc16* subject_start = subject.begin();
426   const uc16* subject_end = subject_start + subject.length();
427   for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
428     if (*pos == pattern) {
429       indices->push_back(static_cast<int>(pos - subject_start));
430       limit--;
431     }
432   }
433 }
434 
435 template <typename SubjectChar, typename PatternChar>
FindStringIndices(Isolate * isolate,Vector<const SubjectChar> subject,Vector<const PatternChar> pattern,std::vector<int> * indices,unsigned int limit)436 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
437                        Vector<const PatternChar> pattern,
438                        std::vector<int>* indices, unsigned int limit) {
439   DCHECK_LT(0, limit);
440   // Collect indices of pattern in subject.
441   // Stop after finding at most limit values.
442   int pattern_length = pattern.length();
443   int index = 0;
444   StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
445   while (limit > 0) {
446     index = search.Search(subject, index);
447     if (index < 0) return;
448     indices->push_back(index);
449     index += pattern_length;
450     limit--;
451   }
452 }
453 
FindStringIndicesDispatch(Isolate * isolate,String subject,String pattern,std::vector<int> * indices,unsigned int limit)454 void FindStringIndicesDispatch(Isolate* isolate, String subject, String pattern,
455                                std::vector<int>* indices, unsigned int limit) {
456   {
457     DisallowHeapAllocation no_gc;
458     String::FlatContent subject_content = subject.GetFlatContent(no_gc);
459     String::FlatContent pattern_content = pattern.GetFlatContent(no_gc);
460     DCHECK(subject_content.IsFlat());
461     DCHECK(pattern_content.IsFlat());
462     if (subject_content.IsOneByte()) {
463       Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
464       if (pattern_content.IsOneByte()) {
465         Vector<const uint8_t> pattern_vector =
466             pattern_content.ToOneByteVector();
467         if (pattern_vector.length() == 1) {
468           FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
469                                    limit);
470         } else {
471           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
472                             limit);
473         }
474       } else {
475         FindStringIndices(isolate, subject_vector,
476                           pattern_content.ToUC16Vector(), indices, limit);
477       }
478     } else {
479       Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
480       if (pattern_content.IsOneByte()) {
481         Vector<const uint8_t> pattern_vector =
482             pattern_content.ToOneByteVector();
483         if (pattern_vector.length() == 1) {
484           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
485                                    limit);
486         } else {
487           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
488                             limit);
489         }
490       } else {
491         Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
492         if (pattern_vector.length() == 1) {
493           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
494                                    limit);
495         } else {
496           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
497                             limit);
498         }
499       }
500     }
501   }
502 }
503 
504 namespace {
GetRewoundRegexpIndicesList(Isolate * isolate)505 std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
506   std::vector<int>* list = isolate->regexp_indices();
507   list->clear();
508   return list;
509 }
510 
TruncateRegexpIndicesList(Isolate * isolate)511 void TruncateRegexpIndicesList(Isolate* isolate) {
512   // Same size as smallest zone segment, preserving behavior from the
513   // runtime zone.
514   static const int kMaxRegexpIndicesListCapacity = 8 * KB;
515   std::vector<int>* indicies = isolate->regexp_indices();
516   if (indicies->capacity() > kMaxRegexpIndicesListCapacity) {
517     // Throw away backing storage.
518     indicies->clear();
519     indicies->shrink_to_fit();
520   }
521 }
522 }  // namespace
523 
524 template <typename ResultSeqString>
StringReplaceGlobalAtomRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> pattern_regexp,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)525 V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalAtomRegExpWithString(
526     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
527     Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
528   DCHECK(subject->IsFlat());
529   DCHECK(replacement->IsFlat());
530 
531   std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
532 
533   DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
534   String pattern =
535       String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
536   int subject_len = subject->length();
537   int pattern_len = pattern.length();
538   int replacement_len = replacement->length();
539 
540   FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF);
541 
542   if (indices->empty()) return *subject;
543 
544   // Detect integer overflow.
545   int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
546                            static_cast<int64_t>(pattern_len)) *
547                               static_cast<int64_t>(indices->size()) +
548                           static_cast<int64_t>(subject_len);
549   int result_len;
550   if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
551     STATIC_ASSERT(String::kMaxLength < kMaxInt);
552     result_len = kMaxInt;  // Provoke exception.
553   } else {
554     result_len = static_cast<int>(result_len_64);
555   }
556   if (result_len == 0) {
557     return ReadOnlyRoots(isolate).empty_string();
558   }
559 
560   int subject_pos = 0;
561   int result_pos = 0;
562 
563   MaybeHandle<SeqString> maybe_res;
564   if (ResultSeqString::kHasOneByteEncoding) {
565     maybe_res = isolate->factory()->NewRawOneByteString(result_len);
566   } else {
567     maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
568   }
569   Handle<SeqString> untyped_res;
570   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
571   Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
572 
573   DisallowHeapAllocation no_gc;
574   for (int index : *indices) {
575     // Copy non-matched subject content.
576     if (subject_pos < index) {
577       String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
578                           subject_pos, index);
579       result_pos += index - subject_pos;
580     }
581 
582     // Replace match.
583     if (replacement_len > 0) {
584       String::WriteToFlat(*replacement, result->GetChars(no_gc) + result_pos, 0,
585                           replacement_len);
586       result_pos += replacement_len;
587     }
588 
589     subject_pos = index + pattern_len;
590   }
591   // Add remaining subject content at the end.
592   if (subject_pos < subject_len) {
593     String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
594                         subject_pos, subject_len);
595   }
596 
597   int32_t match_indices[] = {indices->back(), indices->back() + pattern_len};
598   RegExp::SetLastMatchInfo(isolate, last_match_info, subject, 0, match_indices);
599 
600   TruncateRegexpIndicesList(isolate);
601 
602   return *result;
603 }
604 
StringReplaceGlobalRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)605 V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString(
606     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
607     Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
608   DCHECK(subject->IsFlat());
609   DCHECK(replacement->IsFlat());
610 
611   int capture_count = regexp->CaptureCount();
612   int subject_length = subject->length();
613 
614   JSRegExp::Type typeTag = regexp->TypeTag();
615   if (typeTag == JSRegExp::IRREGEXP) {
616     // Ensure the RegExp is compiled so we can access the capture-name map.
617     if (RegExp::IrregexpPrepare(isolate, regexp, subject) == -1) {
618       DCHECK(isolate->has_pending_exception());
619       return ReadOnlyRoots(isolate).exception();
620     }
621   }
622 
623   // CompiledReplacement uses zone allocation.
624   Zone zone(isolate->allocator(), ZONE_NAME);
625   CompiledReplacement compiled_replacement(&zone);
626   const bool simple_replace = compiled_replacement.Compile(
627       isolate, regexp, replacement, capture_count, subject_length);
628 
629   // Shortcut for simple non-regexp global replacements
630   if (typeTag == JSRegExp::ATOM && simple_replace) {
631     if (subject->IsOneByteRepresentation() &&
632         replacement->IsOneByteRepresentation()) {
633       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
634           isolate, subject, regexp, replacement, last_match_info);
635     } else {
636       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
637           isolate, subject, regexp, replacement, last_match_info);
638     }
639   }
640 
641   RegExpGlobalCache global_cache(regexp, subject, isolate);
642   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
643 
644   int32_t* current_match = global_cache.FetchNext();
645   if (current_match == nullptr) {
646     if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
647     return *subject;
648   }
649 
650   // Guessing the number of parts that the final result string is built
651   // from. Global regexps can match any number of times, so we guess
652   // conservatively.
653   int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
654   ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
655 
656   int prev = 0;
657 
658   do {
659     int start = current_match[0];
660     int end = current_match[1];
661 
662     if (prev < start) {
663       builder.AddSubjectSlice(prev, start);
664     }
665 
666     if (simple_replace) {
667       builder.AddString(replacement);
668     } else {
669       compiled_replacement.Apply(&builder, start, end, current_match);
670     }
671     prev = end;
672 
673     current_match = global_cache.FetchNext();
674   } while (current_match != nullptr);
675 
676   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
677 
678   if (prev < subject_length) {
679     builder.AddSubjectSlice(prev, subject_length);
680   }
681 
682   RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
683                            global_cache.LastSuccessfulMatch());
684 
685   RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
686 }
687 
688 template <typename ResultSeqString>
StringReplaceGlobalRegExpWithEmptyString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<RegExpMatchInfo> last_match_info)689 V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString(
690     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
691     Handle<RegExpMatchInfo> last_match_info) {
692   DCHECK(subject->IsFlat());
693 
694   // Shortcut for simple non-regexp global replacements
695   if (regexp->TypeTag() == JSRegExp::ATOM) {
696     Handle<String> empty_string = isolate->factory()->empty_string();
697     if (subject->IsOneByteRepresentation()) {
698       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
699           isolate, subject, regexp, empty_string, last_match_info);
700     } else {
701       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
702           isolate, subject, regexp, empty_string, last_match_info);
703     }
704   }
705 
706   RegExpGlobalCache global_cache(regexp, subject, isolate);
707   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
708 
709   int32_t* current_match = global_cache.FetchNext();
710   if (current_match == nullptr) {
711     if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
712     return *subject;
713   }
714 
715   int start = current_match[0];
716   int end = current_match[1];
717   int capture_count = regexp->CaptureCount();
718   int subject_length = subject->length();
719 
720   int new_length = subject_length - (end - start);
721   if (new_length == 0) return ReadOnlyRoots(isolate).empty_string();
722 
723   Handle<ResultSeqString> answer;
724   if (ResultSeqString::kHasOneByteEncoding) {
725     answer = Handle<ResultSeqString>::cast(
726         isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
727   } else {
728     answer = Handle<ResultSeqString>::cast(
729         isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
730   }
731 
732   int prev = 0;
733   int position = 0;
734 
735   DisallowHeapAllocation no_gc;
736   do {
737     start = current_match[0];
738     end = current_match[1];
739     if (prev < start) {
740       // Add substring subject[prev;start] to answer string.
741       String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
742                           start);
743       position += start - prev;
744     }
745     prev = end;
746 
747     current_match = global_cache.FetchNext();
748   } while (current_match != nullptr);
749 
750   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
751 
752   RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
753                            global_cache.LastSuccessfulMatch());
754 
755   if (prev < subject_length) {
756     // Add substring subject[prev;length] to answer string.
757     String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
758                         subject_length);
759     position += subject_length - prev;
760   }
761 
762   if (position == 0) return ReadOnlyRoots(isolate).empty_string();
763 
764   // Shorten string and fill
765   int string_size = ResultSeqString::SizeFor(position);
766   int allocated_string_size = ResultSeqString::SizeFor(new_length);
767   int delta = allocated_string_size - string_size;
768 
769   answer->set_length(position);
770   if (delta == 0) return *answer;
771 
772   Address end_of_string = answer->address() + string_size;
773   Heap* heap = isolate->heap();
774 
775   // The trimming is performed on a newly allocated object, which is on a
776   // freshly allocated page or on an already swept page. Hence, the sweeper
777   // thread can not get confused with the filler creation. No synchronization
778   // needed.
779   // TODO(hpayer): We should shrink the large object page if the size
780   // of the object changed significantly.
781   if (!heap->IsLargeObject(*answer)) {
782     heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
783   }
784   return *answer;
785 }
786 
RUNTIME_FUNCTION(Runtime_StringSplit)787 RUNTIME_FUNCTION(Runtime_StringSplit) {
788   HandleScope handle_scope(isolate);
789   DCHECK_EQ(3, args.length());
790   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
791   CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
792   CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
793   CHECK_LT(0, limit);
794 
795   int subject_length = subject->length();
796   int pattern_length = pattern->length();
797   CHECK_LT(0, pattern_length);
798 
799   if (limit == 0xFFFFFFFFu) {
800     FixedArray last_match_cache_unused;
801     Handle<Object> cached_answer(
802         RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
803                                    &last_match_cache_unused,
804                                    RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
805         isolate);
806     if (*cached_answer != Smi::zero()) {
807       // The cache FixedArray is a COW-array and can therefore be reused.
808       Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
809           Handle<FixedArray>::cast(cached_answer));
810       return *result;
811     }
812   }
813 
814   // The limit can be very large (0xFFFFFFFFu), but since the pattern
815   // isn't empty, we can never create more parts than ~half the length
816   // of the subject.
817 
818   subject = String::Flatten(isolate, subject);
819   pattern = String::Flatten(isolate, pattern);
820 
821   std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
822 
823   FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
824 
825   if (static_cast<uint32_t>(indices->size()) < limit) {
826     indices->push_back(subject_length);
827   }
828 
829   // The list indices now contains the end of each part to create.
830 
831   // Create JSArray of substrings separated by separator.
832   int part_count = static_cast<int>(indices->size());
833 
834   Handle<JSArray> result =
835       isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count,
836                                      INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
837 
838   DCHECK(result->HasObjectElements());
839 
840   Handle<FixedArray> elements(FixedArray::cast(result->elements()), isolate);
841 
842   if (part_count == 1 && indices->at(0) == subject_length) {
843     elements->set(0, *subject);
844   } else {
845     int part_start = 0;
846     FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
847       int part_end = indices->at(i);
848       Handle<String> substring =
849           isolate->factory()->NewProperSubString(subject, part_start, part_end);
850       elements->set(i, *substring);
851       part_start = part_end + pattern_length;
852     });
853   }
854 
855   if (limit == 0xFFFFFFFFu) {
856     if (result->HasObjectElements()) {
857       RegExpResultsCache::Enter(isolate, subject, pattern, elements,
858                                 isolate->factory()->empty_fixed_array(),
859                                 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
860     }
861   }
862 
863   TruncateRegexpIndicesList(isolate);
864 
865   return *result;
866 }
867 
RUNTIME_FUNCTION(Runtime_RegExpExec)868 RUNTIME_FUNCTION(Runtime_RegExpExec) {
869   HandleScope scope(isolate);
870   DCHECK_EQ(4, args.length());
871   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
872   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
873   CONVERT_INT32_ARG_CHECKED(index, 2);
874   CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
875   // Due to the way the JS calls are constructed this must be less than the
876   // length of a string, i.e. it is always a Smi.  We check anyway for security.
877   CHECK_LE(0, index);
878   CHECK_GE(subject->length(), index);
879   isolate->counters()->regexp_entry_runtime()->Increment();
880   RETURN_RESULT_OR_FAILURE(
881       isolate, RegExp::Exec(isolate, regexp, subject, index, last_match_info));
882 }
883 
884 namespace {
885 
886 class MatchInfoBackedMatch : public String::Match {
887  public:
MatchInfoBackedMatch(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> subject,Handle<RegExpMatchInfo> match_info)888   MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
889                        Handle<String> subject,
890                        Handle<RegExpMatchInfo> match_info)
891       : isolate_(isolate), match_info_(match_info) {
892     subject_ = String::Flatten(isolate, subject);
893 
894     if (regexp->TypeTag() == JSRegExp::IRREGEXP) {
895       Object o = regexp->CaptureNameMap();
896       has_named_captures_ = o.IsFixedArray();
897       if (has_named_captures_) {
898         capture_name_map_ = handle(FixedArray::cast(o), isolate);
899       }
900     } else {
901       has_named_captures_ = false;
902     }
903   }
904 
GetMatch()905   Handle<String> GetMatch() override {
906     return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
907   }
908 
GetPrefix()909   Handle<String> GetPrefix() override {
910     const int match_start = match_info_->Capture(0);
911     return isolate_->factory()->NewSubString(subject_, 0, match_start);
912   }
913 
GetSuffix()914   Handle<String> GetSuffix() override {
915     const int match_end = match_info_->Capture(1);
916     return isolate_->factory()->NewSubString(subject_, match_end,
917                                              subject_->length());
918   }
919 
HasNamedCaptures()920   bool HasNamedCaptures() override { return has_named_captures_; }
921 
CaptureCount()922   int CaptureCount() override {
923     return match_info_->NumberOfCaptureRegisters() / 2;
924   }
925 
GetCapture(int i,bool * capture_exists)926   MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
927     Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
928         isolate_, match_info_, i, capture_exists);
929     return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
930                              : isolate_->factory()->empty_string();
931   }
932 
GetNamedCapture(Handle<String> name,CaptureState * state)933   MaybeHandle<String> GetNamedCapture(Handle<String> name,
934                                       CaptureState* state) override {
935     DCHECK(has_named_captures_);
936     const int capture_index = LookupNamedCapture(
937         [=](String capture_name) { return capture_name.Equals(*name); },
938         *capture_name_map_);
939 
940     if (capture_index == -1) {
941       *state = INVALID;
942       return name;  // Arbitrary string handle.
943     }
944 
945     DCHECK(1 <= capture_index && capture_index <= CaptureCount());
946 
947     bool capture_exists;
948     Handle<String> capture_value;
949     ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
950                                GetCapture(capture_index, &capture_exists),
951                                String);
952 
953     if (!capture_exists) {
954       *state = UNMATCHED;
955       return isolate_->factory()->empty_string();
956     } else {
957       *state = MATCHED;
958       return capture_value;
959     }
960   }
961 
962  private:
963   Isolate* isolate_;
964   Handle<String> subject_;
965   Handle<RegExpMatchInfo> match_info_;
966 
967   bool has_named_captures_;
968   Handle<FixedArray> capture_name_map_;
969 };
970 
971 class VectorBackedMatch : public String::Match {
972  public:
VectorBackedMatch(Isolate * isolate,Handle<String> subject,Handle<String> match,int match_position,ZoneVector<Handle<Object>> * captures,Handle<Object> groups_obj)973   VectorBackedMatch(Isolate* isolate, Handle<String> subject,
974                     Handle<String> match, int match_position,
975                     ZoneVector<Handle<Object>>* captures,
976                     Handle<Object> groups_obj)
977       : isolate_(isolate),
978         match_(match),
979         match_position_(match_position),
980         captures_(captures) {
981     subject_ = String::Flatten(isolate, subject);
982 
983     DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
984     has_named_captures_ = !groups_obj->IsUndefined(isolate);
985     if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
986   }
987 
GetMatch()988   Handle<String> GetMatch() override { return match_; }
989 
GetPrefix()990   Handle<String> GetPrefix() override {
991     return isolate_->factory()->NewSubString(subject_, 0, match_position_);
992   }
993 
GetSuffix()994   Handle<String> GetSuffix() override {
995     const int match_end_position = match_position_ + match_->length();
996     return isolate_->factory()->NewSubString(subject_, match_end_position,
997                                              subject_->length());
998   }
999 
HasNamedCaptures()1000   bool HasNamedCaptures() override { return has_named_captures_; }
1001 
CaptureCount()1002   int CaptureCount() override { return static_cast<int>(captures_->size()); }
1003 
GetCapture(int i,bool * capture_exists)1004   MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1005     Handle<Object> capture_obj = captures_->at(i);
1006     if (capture_obj->IsUndefined(isolate_)) {
1007       *capture_exists = false;
1008       return isolate_->factory()->empty_string();
1009     }
1010     *capture_exists = true;
1011     return Object::ToString(isolate_, capture_obj);
1012   }
1013 
GetNamedCapture(Handle<String> name,CaptureState * state)1014   MaybeHandle<String> GetNamedCapture(Handle<String> name,
1015                                       CaptureState* state) override {
1016     DCHECK(has_named_captures_);
1017 
1018     Maybe<bool> maybe_capture_exists =
1019         JSReceiver::HasProperty(groups_obj_, name);
1020     if (maybe_capture_exists.IsNothing()) return MaybeHandle<String>();
1021 
1022     if (!maybe_capture_exists.FromJust()) {
1023       *state = INVALID;
1024       return name;  // Arbitrary string handle.
1025     }
1026 
1027     Handle<Object> capture_obj;
1028     ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
1029                                Object::GetProperty(isolate_, groups_obj_, name),
1030                                String);
1031     if (capture_obj->IsUndefined(isolate_)) {
1032       *state = UNMATCHED;
1033       return isolate_->factory()->empty_string();
1034     } else {
1035       *state = MATCHED;
1036       return Object::ToString(isolate_, capture_obj);
1037     }
1038   }
1039 
1040  private:
1041   Isolate* isolate_;
1042   Handle<String> subject_;
1043   Handle<String> match_;
1044   const int match_position_;
1045   ZoneVector<Handle<Object>>* captures_;
1046 
1047   bool has_named_captures_;
1048   Handle<JSReceiver> groups_obj_;
1049 };
1050 
1051 // Create the groups object (see also the RegExp result creation in
1052 // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
ConstructNamedCaptureGroupsObject(Isolate * isolate,Handle<FixedArray> capture_map,const std::function<Object (int)> & f_get_capture)1053 Handle<JSObject> ConstructNamedCaptureGroupsObject(
1054     Isolate* isolate, Handle<FixedArray> capture_map,
1055     const std::function<Object(int)>& f_get_capture) {
1056   Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
1057 
1058   const int named_capture_count = capture_map->length() >> 1;
1059   for (int i = 0; i < named_capture_count; i++) {
1060     const int name_ix = i * 2;
1061     const int index_ix = i * 2 + 1;
1062 
1063     Handle<String> capture_name(String::cast(capture_map->get(name_ix)),
1064                                 isolate);
1065     const int capture_ix = Smi::ToInt(capture_map->get(index_ix));
1066     DCHECK_GE(capture_ix, 1);  // Explicit groups start at index 1.
1067 
1068     Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
1069     DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString());
1070 
1071     JSObject::AddProperty(isolate, groups, capture_name, capture_value, NONE);
1072   }
1073 
1074   return groups;
1075 }
1076 
1077 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
1078 // separate last match info.  See comment on that function.
1079 template <bool has_capture>
SearchRegExpMultiple(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<RegExpMatchInfo> last_match_array,Handle<JSArray> result_array)1080 static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
1081                                    Handle<JSRegExp> regexp,
1082                                    Handle<RegExpMatchInfo> last_match_array,
1083                                    Handle<JSArray> result_array) {
1084   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1085   DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
1086   DCHECK(subject->IsFlat());
1087 
1088   // Force tier up to native code for global replaces. The global replace is
1089   // implemented differently for native code and bytecode execution, where the
1090   // native code expects an array to store all the matches, and the bytecode
1091   // matches one at a time, so it's easier to tier-up to native code from the
1092   // start.
1093   if (FLAG_regexp_tier_up && regexp->TypeTag() == JSRegExp::IRREGEXP) {
1094     regexp->MarkTierUpForNextExec();
1095     if (FLAG_trace_regexp_tier_up) {
1096       PrintF("Forcing tier-up of JSRegExp object %p in SearchRegExpMultiple\n",
1097              reinterpret_cast<void*>(regexp->ptr()));
1098     }
1099   }
1100 
1101   int capture_count = regexp->CaptureCount();
1102   int subject_length = subject->length();
1103 
1104   static const int kMinLengthToCache = 0x1000;
1105 
1106   if (subject_length > kMinLengthToCache) {
1107     FixedArray last_match_cache;
1108     Object cached_answer = RegExpResultsCache::Lookup(
1109         isolate->heap(), *subject, regexp->data(), &last_match_cache,
1110         RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1111     if (cached_answer.IsFixedArray()) {
1112       int capture_registers = (capture_count + 1) * 2;
1113       int32_t* last_match = NewArray<int32_t>(capture_registers);
1114       for (int i = 0; i < capture_registers; i++) {
1115         last_match[i] = Smi::ToInt(last_match_cache.get(i));
1116       }
1117       Handle<FixedArray> cached_fixed_array =
1118           Handle<FixedArray>(FixedArray::cast(cached_answer), isolate);
1119       // The cache FixedArray is a COW-array and we need to return a copy.
1120       Handle<FixedArray> copied_fixed_array =
1121           isolate->factory()->CopyFixedArrayWithMap(
1122               cached_fixed_array, isolate->factory()->fixed_array_map());
1123       JSArray::SetContent(result_array, copied_fixed_array);
1124       RegExp::SetLastMatchInfo(isolate, last_match_array, subject,
1125                                capture_count, last_match);
1126       DeleteArray(last_match);
1127       return *result_array;
1128     }
1129   }
1130 
1131   RegExpGlobalCache global_cache(regexp, subject, isolate);
1132   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1133 
1134   // Ensured in Runtime_RegExpExecMultiple.
1135   DCHECK(result_array->HasObjectElements());
1136   Handle<FixedArray> result_elements(FixedArray::cast(result_array->elements()),
1137                                      isolate);
1138   if (result_elements->length() < 16) {
1139     result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
1140   }
1141 
1142   FixedArrayBuilder builder(result_elements);
1143 
1144   // Position to search from.
1145   int match_start = -1;
1146   int match_end = 0;
1147   bool first = true;
1148 
1149   // Two smis before and after the match, for very long strings.
1150   static const int kMaxBuilderEntriesPerRegExpMatch = 5;
1151 
1152   while (true) {
1153     int32_t* current_match = global_cache.FetchNext();
1154     if (current_match == nullptr) break;
1155     match_start = current_match[0];
1156     builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch);
1157     if (match_end < match_start) {
1158       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1159                                                 match_start);
1160     }
1161     match_end = current_match[1];
1162     {
1163       // Avoid accumulating new handles inside loop.
1164       HandleScope temp_scope(isolate);
1165       Handle<String> match;
1166       if (!first) {
1167         match = isolate->factory()->NewProperSubString(subject, match_start,
1168                                                        match_end);
1169       } else {
1170         match =
1171             isolate->factory()->NewSubString(subject, match_start, match_end);
1172         first = false;
1173       }
1174 
1175       if (has_capture) {
1176         // Arguments array to replace function is match, captures, index and
1177         // subject, i.e., 3 + capture count in total. If the RegExp contains
1178         // named captures, they are also passed as the last argument.
1179 
1180         Handle<Object> maybe_capture_map(regexp->CaptureNameMap(), isolate);
1181         const bool has_named_captures = maybe_capture_map->IsFixedArray();
1182 
1183         const int argc =
1184             has_named_captures ? 4 + capture_count : 3 + capture_count;
1185 
1186         Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
1187         int cursor = 0;
1188 
1189         elements->set(cursor++, *match);
1190         for (int i = 1; i <= capture_count; i++) {
1191           int start = current_match[i * 2];
1192           if (start >= 0) {
1193             int end = current_match[i * 2 + 1];
1194             DCHECK(start <= end);
1195             Handle<String> substring =
1196                 isolate->factory()->NewSubString(subject, start, end);
1197             elements->set(cursor++, *substring);
1198           } else {
1199             DCHECK_GT(0, current_match[i * 2 + 1]);
1200             elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value());
1201           }
1202         }
1203 
1204         elements->set(cursor++, Smi::FromInt(match_start));
1205         elements->set(cursor++, *subject);
1206 
1207         if (has_named_captures) {
1208           Handle<FixedArray> capture_map =
1209               Handle<FixedArray>::cast(maybe_capture_map);
1210           Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1211               isolate, capture_map, [=](int ix) { return elements->get(ix); });
1212           elements->set(cursor++, *groups);
1213         }
1214 
1215         DCHECK_EQ(cursor, argc);
1216         builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1217       } else {
1218         builder.Add(*match);
1219       }
1220     }
1221   }
1222 
1223   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1224 
1225   if (match_start >= 0) {
1226     // Finished matching, with at least one match.
1227     if (match_end < subject_length) {
1228       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1229                                                 subject_length);
1230     }
1231 
1232     RegExp::SetLastMatchInfo(isolate, last_match_array, subject, capture_count,
1233                              global_cache.LastSuccessfulMatch());
1234 
1235     if (subject_length > kMinLengthToCache) {
1236       // Store the last successful match into the array for caching.
1237       // TODO(yangguo): do not expose last match to JS and simplify caching.
1238       int capture_registers = (capture_count + 1) * 2;
1239       Handle<FixedArray> last_match_cache =
1240           isolate->factory()->NewFixedArray(capture_registers);
1241       int32_t* last_match = global_cache.LastSuccessfulMatch();
1242       for (int i = 0; i < capture_registers; i++) {
1243         last_match_cache->set(i, Smi::FromInt(last_match[i]));
1244       }
1245       Handle<FixedArray> result_fixed_array =
1246           FixedArray::ShrinkOrEmpty(isolate, builder.array(), builder.length());
1247       // Cache the result and copy the FixedArray into a COW array.
1248       Handle<FixedArray> copied_fixed_array =
1249           isolate->factory()->CopyFixedArrayWithMap(
1250               result_fixed_array, isolate->factory()->fixed_array_map());
1251       RegExpResultsCache::Enter(
1252           isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1253           last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1254     }
1255     return *builder.ToJSArray(result_array);
1256   } else {
1257     return ReadOnlyRoots(isolate).null_value();  // No matches at all.
1258   }
1259 }
1260 
1261 // Legacy implementation of RegExp.prototype[Symbol.replace] which
1262 // doesn't properly call the underlying exec method.
RegExpReplace(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> string,Handle<String> replace)1263 V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace(
1264     Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string,
1265     Handle<String> replace) {
1266   // Functional fast-paths are dispatched directly by replace builtin.
1267   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1268 
1269   Factory* factory = isolate->factory();
1270 
1271   const int flags = regexp->GetFlags();
1272   const bool global = (flags & JSRegExp::kGlobal) != 0;
1273   const bool sticky = (flags & JSRegExp::kSticky) != 0;
1274 
1275   replace = String::Flatten(isolate, replace);
1276 
1277   Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1278 
1279   if (!global) {
1280     // Non-global regexp search, string replace.
1281 
1282     uint32_t last_index = 0;
1283     if (sticky) {
1284       Handle<Object> last_index_obj(regexp->last_index(), isolate);
1285       ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1286                                  Object::ToLength(isolate, last_index_obj),
1287                                  String);
1288       last_index = PositiveNumberToUint32(*last_index_obj);
1289     }
1290 
1291     Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1292                                      isolate);
1293 
1294     // A lastIndex exceeding the string length always returns null (signalling
1295     // failure) in RegExpBuiltinExec, thus we can skip the call.
1296     if (last_index <= static_cast<uint32_t>(string->length())) {
1297       ASSIGN_RETURN_ON_EXCEPTION(
1298           isolate, match_indices_obj,
1299           RegExp::Exec(isolate, regexp, string, last_index, last_match_info),
1300           String);
1301     }
1302 
1303     if (match_indices_obj->IsNull(isolate)) {
1304       if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER);
1305       return string;
1306     }
1307 
1308     auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1309 
1310     const int start_index = match_indices->Capture(0);
1311     const int end_index = match_indices->Capture(1);
1312 
1313     if (sticky) {
1314       regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER);
1315     }
1316 
1317     IncrementalStringBuilder builder(isolate);
1318     builder.AppendString(factory->NewSubString(string, 0, start_index));
1319 
1320     if (replace->length() > 0) {
1321       MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
1322       Handle<String> replacement;
1323       ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1324                                  String::GetSubstitution(isolate, &m, replace),
1325                                  String);
1326       builder.AppendString(replacement);
1327     }
1328 
1329     builder.AppendString(
1330         factory->NewSubString(string, end_index, string->length()));
1331     return builder.Finish();
1332   } else {
1333     // Global regexp search, string replace.
1334     DCHECK(global);
1335     RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1336                         String);
1337 
1338     // Force tier up to native code for global replaces. The global replace is
1339     // implemented differently for native code and bytecode execution, where the
1340     // native code expects an array to store all the matches, and the bytecode
1341     // matches one at a time, so it's easier to tier-up to native code from the
1342     // start.
1343     if (FLAG_regexp_tier_up && regexp->TypeTag() == JSRegExp::IRREGEXP) {
1344       regexp->MarkTierUpForNextExec();
1345       if (FLAG_trace_regexp_tier_up) {
1346         PrintF("Forcing tier-up of JSRegExp object %p in RegExpReplace\n",
1347                reinterpret_cast<void*>(regexp->ptr()));
1348       }
1349     }
1350 
1351     if (replace->length() == 0) {
1352       if (string->IsOneByteRepresentation()) {
1353         Object result =
1354             StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1355                 isolate, string, regexp, last_match_info);
1356         return handle(String::cast(result), isolate);
1357       } else {
1358         Object result =
1359             StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1360                 isolate, string, regexp, last_match_info);
1361         return handle(String::cast(result), isolate);
1362       }
1363     }
1364 
1365     Object result = StringReplaceGlobalRegExpWithString(
1366         isolate, string, regexp, replace, last_match_info);
1367     if (result.IsString()) {
1368       return handle(String::cast(result), isolate);
1369     } else {
1370       return MaybeHandle<String>();
1371     }
1372   }
1373 
1374   UNREACHABLE();
1375 }
1376 
1377 }  // namespace
1378 
1379 // This is only called for StringReplaceGlobalRegExpWithFunction.
RUNTIME_FUNCTION(Runtime_RegExpExecMultiple)1380 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1381   HandleScope handles(isolate);
1382   DCHECK_EQ(4, args.length());
1383 
1384   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1385   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1386   CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2);
1387   CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1388 
1389   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1390   CHECK(result_array->HasObjectElements());
1391 
1392   subject = String::Flatten(isolate, subject);
1393   CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
1394 
1395   Object result;
1396   if (regexp->CaptureCount() == 0) {
1397     result = SearchRegExpMultiple<false>(isolate, subject, regexp,
1398                                          last_match_info, result_array);
1399   } else {
1400     result = SearchRegExpMultiple<true>(isolate, subject, regexp,
1401                                         last_match_info, result_array);
1402   }
1403   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1404   return result;
1405 }
1406 
RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction)1407 RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1408   HandleScope scope(isolate);
1409   DCHECK_EQ(3, args.length());
1410   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
1411   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
1412   CONVERT_ARG_HANDLE_CHECKED(JSReceiver, replace_obj, 2);
1413 
1414   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1415   DCHECK(replace_obj->map().is_callable());
1416 
1417   Factory* factory = isolate->factory();
1418   Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1419 
1420   const int flags = regexp->GetFlags();
1421   DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1422 
1423   // TODO(jgruber): This should be an easy port to CSA with massive payback.
1424 
1425   const bool sticky = (flags & JSRegExp::kSticky) != 0;
1426   uint32_t last_index = 0;
1427   if (sticky) {
1428     Handle<Object> last_index_obj(regexp->last_index(), isolate);
1429     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1430         isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1431     last_index = PositiveNumberToUint32(*last_index_obj);
1432   }
1433 
1434   Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1435                                    isolate);
1436 
1437   // A lastIndex exceeding the string length always returns null (signalling
1438   // failure) in RegExpBuiltinExec, thus we can skip the call.
1439   if (last_index <= static_cast<uint32_t>(subject->length())) {
1440     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1441         isolate, match_indices_obj,
1442         RegExp::Exec(isolate, regexp, subject, last_index, last_match_info));
1443   }
1444 
1445   if (match_indices_obj->IsNull(isolate)) {
1446     if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER);
1447     return *subject;
1448   }
1449 
1450   Handle<RegExpMatchInfo> match_indices =
1451       Handle<RegExpMatchInfo>::cast(match_indices_obj);
1452 
1453   const int index = match_indices->Capture(0);
1454   const int end_of_match = match_indices->Capture(1);
1455 
1456   if (sticky) {
1457     regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER);
1458   }
1459 
1460   IncrementalStringBuilder builder(isolate);
1461   builder.AppendString(factory->NewSubString(subject, 0, index));
1462 
1463   // Compute the parameter list consisting of the match, captures, index,
1464   // and subject for the replace function invocation. If the RegExp contains
1465   // named captures, they are also passed as the last argument.
1466 
1467   // The number of captures plus one for the match.
1468   const int m = match_indices->NumberOfCaptureRegisters() / 2;
1469 
1470   bool has_named_captures = false;
1471   Handle<FixedArray> capture_map;
1472   if (m > 1) {
1473     // The existence of capture groups implies IRREGEXP kind.
1474     DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
1475 
1476     Object maybe_capture_map = regexp->CaptureNameMap();
1477     if (maybe_capture_map.IsFixedArray()) {
1478       has_named_captures = true;
1479       capture_map = handle(FixedArray::cast(maybe_capture_map), isolate);
1480     }
1481   }
1482 
1483   const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures);
1484   if (argc == static_cast<uint32_t>(-1)) {
1485     THROW_NEW_ERROR_RETURN_FAILURE(
1486         isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1487   }
1488   ScopedVector<Handle<Object>> argv(argc);
1489 
1490   int cursor = 0;
1491   for (int j = 0; j < m; j++) {
1492     bool ok;
1493     Handle<String> capture =
1494         RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1495     if (ok) {
1496       argv[cursor++] = capture;
1497     } else {
1498       argv[cursor++] = factory->undefined_value();
1499     }
1500   }
1501 
1502   argv[cursor++] = handle(Smi::FromInt(index), isolate);
1503   argv[cursor++] = subject;
1504 
1505   if (has_named_captures) {
1506     argv[cursor++] = ConstructNamedCaptureGroupsObject(
1507         isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
1508   }
1509 
1510   DCHECK_EQ(cursor, argc);
1511 
1512   Handle<Object> replacement_obj;
1513   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1514       isolate, replacement_obj,
1515       Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1516                       argv.begin()));
1517 
1518   Handle<String> replacement;
1519   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1520       isolate, replacement, Object::ToString(isolate, replacement_obj));
1521 
1522   builder.AppendString(replacement);
1523   builder.AppendString(
1524       factory->NewSubString(subject, end_of_match, subject->length()));
1525 
1526   RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1527 }
1528 
1529 namespace {
1530 
ToUint32(Isolate * isolate,Handle<Object> object,uint32_t * out)1531 V8_WARN_UNUSED_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1532                                                    Handle<Object> object,
1533                                                    uint32_t* out) {
1534   if (object->IsUndefined(isolate)) {
1535     *out = kMaxUInt32;
1536     return object;
1537   }
1538 
1539   Handle<Object> number;
1540   ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(isolate, object),
1541                              Object);
1542   *out = NumberToUint32(*number);
1543   return object;
1544 }
1545 
NewJSArrayWithElements(Isolate * isolate,Handle<FixedArray> elems,int num_elems)1546 Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1547                                        Handle<FixedArray> elems,
1548                                        int num_elems) {
1549   return isolate->factory()->NewJSArrayWithElements(
1550       FixedArray::ShrinkOrEmpty(isolate, elems, num_elems));
1551 }
1552 
1553 }  // namespace
1554 
1555 // Slow path for:
1556 // ES#sec-regexp.prototype-@@replace
1557 // RegExp.prototype [ @@split ] ( string, limit )
RUNTIME_FUNCTION(Runtime_RegExpSplit)1558 RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1559   HandleScope scope(isolate);
1560   DCHECK_EQ(3, args.length());
1561 
1562   CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1563   CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1564   CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2);
1565 
1566   Factory* factory = isolate->factory();
1567 
1568   Handle<JSFunction> regexp_fun = isolate->regexp_function();
1569   Handle<Object> ctor;
1570   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1571       isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun));
1572 
1573   Handle<Object> flags_obj;
1574   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1575       isolate, flags_obj,
1576       JSObject::GetProperty(isolate, recv, factory->flags_string()));
1577 
1578   Handle<String> flags;
1579   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1580                                      Object::ToString(isolate, flags_obj));
1581 
1582   Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1583   const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1584 
1585   Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1586   const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1587 
1588   Handle<String> new_flags = flags;
1589   if (!sticky) {
1590     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1591                                        factory->NewConsString(flags, y_str));
1592   }
1593 
1594   Handle<JSReceiver> splitter;
1595   {
1596     const int argc = 2;
1597 
1598     ScopedVector<Handle<Object>> argv(argc);
1599     argv[0] = recv;
1600     argv[1] = new_flags;
1601 
1602     Handle<Object> splitter_obj;
1603     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1604         isolate, splitter_obj,
1605         Execution::New(isolate, ctor, argc, argv.begin()));
1606 
1607     splitter = Handle<JSReceiver>::cast(splitter_obj);
1608   }
1609 
1610   uint32_t limit;
1611   RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1612 
1613   const uint32_t length = string->length();
1614 
1615   if (limit == 0) return *factory->NewJSArray(0);
1616 
1617   if (length == 0) {
1618     Handle<Object> result;
1619     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1620         isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1621                                                  factory->undefined_value()));
1622 
1623     if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1624 
1625     Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
1626     elems->set(0, *string);
1627     return *factory->NewJSArrayWithElements(elems);
1628   }
1629 
1630   static const int kInitialArraySize = 8;
1631   Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1632   uint32_t num_elems = 0;
1633 
1634   uint32_t string_index = 0;
1635   uint32_t prev_string_index = 0;
1636   while (string_index < length) {
1637     RETURN_FAILURE_ON_EXCEPTION(
1638         isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1639 
1640     Handle<Object> result;
1641     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1642         isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1643                                                  factory->undefined_value()));
1644 
1645     if (result->IsNull(isolate)) {
1646       string_index = static_cast<uint32_t>(
1647           RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1648       continue;
1649     }
1650 
1651     Handle<Object> last_index_obj;
1652     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1653         isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1654 
1655     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1656         isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1657 
1658     const uint32_t end =
1659         std::min(PositiveNumberToUint32(*last_index_obj), length);
1660     if (end == prev_string_index) {
1661       string_index = static_cast<uint32_t>(
1662           RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1663       continue;
1664     }
1665 
1666     {
1667       Handle<String> substr =
1668           factory->NewSubString(string, prev_string_index, string_index);
1669       elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1670       if (num_elems == limit) {
1671         return *NewJSArrayWithElements(isolate, elems, num_elems);
1672       }
1673     }
1674 
1675     prev_string_index = end;
1676 
1677     Handle<Object> num_captures_obj;
1678     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1679         isolate, num_captures_obj,
1680         Object::GetProperty(isolate, result,
1681                             isolate->factory()->length_string()));
1682 
1683     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1684         isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1685     const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj);
1686 
1687     for (uint32_t i = 1; i < num_captures; i++) {
1688       Handle<Object> capture;
1689       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1690           isolate, capture, Object::GetElement(isolate, result, i));
1691       elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture);
1692       if (num_elems == limit) {
1693         return *NewJSArrayWithElements(isolate, elems, num_elems);
1694       }
1695     }
1696 
1697     string_index = prev_string_index;
1698   }
1699 
1700   {
1701     Handle<String> substr =
1702         factory->NewSubString(string, prev_string_index, length);
1703     elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1704   }
1705 
1706   return *NewJSArrayWithElements(isolate, elems, num_elems);
1707 }
1708 
1709 // Slow path for:
1710 // ES#sec-regexp.prototype-@@replace
1711 // RegExp.prototype [ @@replace ] ( string, replaceValue )
RUNTIME_FUNCTION(Runtime_RegExpReplaceRT)1712 RUNTIME_FUNCTION(Runtime_RegExpReplaceRT) {
1713   HandleScope scope(isolate);
1714   DCHECK_EQ(3, args.length());
1715 
1716   CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1717   CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1718   Handle<Object> replace_obj = args.at(2);
1719 
1720   Factory* factory = isolate->factory();
1721 
1722   string = String::Flatten(isolate, string);
1723 
1724   const bool functional_replace = replace_obj->IsCallable();
1725 
1726   Handle<String> replace;
1727   if (!functional_replace) {
1728     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1729                                        Object::ToString(isolate, replace_obj));
1730   }
1731 
1732   // Fast-path for unmodified JSRegExps (and non-functional replace).
1733   if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1734     // We should never get here with functional replace because unmodified
1735     // regexp and functional replace should be fully handled in CSA code.
1736     CHECK(!functional_replace);
1737     Handle<Object> result;
1738     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1739         isolate, result,
1740         RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string, replace));
1741     DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, recv));
1742     return *result;
1743   }
1744 
1745   const uint32_t length = string->length();
1746 
1747   Handle<Object> global_obj;
1748   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1749       isolate, global_obj,
1750       JSReceiver::GetProperty(isolate, recv, factory->global_string()));
1751   const bool global = global_obj->BooleanValue(isolate);
1752 
1753   bool unicode = false;
1754   if (global) {
1755     Handle<Object> unicode_obj;
1756     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1757         isolate, unicode_obj,
1758         JSReceiver::GetProperty(isolate, recv, factory->unicode_string()));
1759     unicode = unicode_obj->BooleanValue(isolate);
1760 
1761     RETURN_FAILURE_ON_EXCEPTION(isolate,
1762                                 RegExpUtils::SetLastIndex(isolate, recv, 0));
1763   }
1764 
1765   Zone zone(isolate->allocator(), ZONE_NAME);
1766   ZoneVector<Handle<Object>> results(&zone);
1767 
1768   while (true) {
1769     Handle<Object> result;
1770     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1771         isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1772                                                  factory->undefined_value()));
1773 
1774     if (result->IsNull(isolate)) break;
1775 
1776     results.push_back(result);
1777     if (!global) break;
1778 
1779     Handle<Object> match_obj;
1780     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1781                                        Object::GetElement(isolate, result, 0));
1782 
1783     Handle<String> match;
1784     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1785                                        Object::ToString(isolate, match_obj));
1786 
1787     if (match->length() == 0) {
1788       RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1789                                                isolate, recv, string, unicode));
1790     }
1791   }
1792 
1793   // TODO(jgruber): Look into ReplacementStringBuilder instead.
1794   IncrementalStringBuilder builder(isolate);
1795   uint32_t next_source_position = 0;
1796 
1797   for (const auto& result : results) {
1798     HandleScope handle_scope(isolate);
1799     Handle<Object> captures_length_obj;
1800     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1801         isolate, captures_length_obj,
1802         Object::GetProperty(isolate, result, factory->length_string()));
1803 
1804     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1805         isolate, captures_length_obj,
1806         Object::ToLength(isolate, captures_length_obj));
1807     const uint32_t captures_length =
1808         PositiveNumberToUint32(*captures_length_obj);
1809 
1810     Handle<Object> match_obj;
1811     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1812                                        Object::GetElement(isolate, result, 0));
1813 
1814     Handle<String> match;
1815     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1816                                        Object::ToString(isolate, match_obj));
1817 
1818     const int match_length = match->length();
1819 
1820     Handle<Object> position_obj;
1821     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1822         isolate, position_obj,
1823         Object::GetProperty(isolate, result, factory->index_string()));
1824 
1825     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1826         isolate, position_obj, Object::ToInteger(isolate, position_obj));
1827     const uint32_t position =
1828         std::min(PositiveNumberToUint32(*position_obj), length);
1829 
1830     // Do not reserve capacity since captures_length is user-controlled.
1831     ZoneVector<Handle<Object>> captures(&zone);
1832 
1833     for (uint32_t n = 0; n < captures_length; n++) {
1834       Handle<Object> capture;
1835       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1836           isolate, capture, Object::GetElement(isolate, result, n));
1837 
1838       if (!capture->IsUndefined(isolate)) {
1839         ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1840                                            Object::ToString(isolate, capture));
1841       }
1842       captures.push_back(capture);
1843     }
1844 
1845     Handle<Object> groups_obj = isolate->factory()->undefined_value();
1846     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1847         isolate, groups_obj,
1848         Object::GetProperty(isolate, result, factory->groups_string()));
1849 
1850     const bool has_named_captures = !groups_obj->IsUndefined(isolate);
1851 
1852     Handle<String> replacement;
1853     if (functional_replace) {
1854       const uint32_t argc =
1855           GetArgcForReplaceCallable(captures_length, has_named_captures);
1856       if (argc == static_cast<uint32_t>(-1)) {
1857         THROW_NEW_ERROR_RETURN_FAILURE(
1858             isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1859       }
1860 
1861       ScopedVector<Handle<Object>> argv(argc);
1862 
1863       int cursor = 0;
1864       for (uint32_t j = 0; j < captures_length; j++) {
1865         argv[cursor++] = captures[j];
1866       }
1867 
1868       argv[cursor++] = handle(Smi::FromInt(position), isolate);
1869       argv[cursor++] = string;
1870       if (has_named_captures) argv[cursor++] = groups_obj;
1871 
1872       DCHECK_EQ(cursor, argc);
1873 
1874       Handle<Object> replacement_obj;
1875       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1876           isolate, replacement_obj,
1877           Execution::Call(isolate, replace_obj, factory->undefined_value(),
1878                           argc, argv.begin()));
1879 
1880       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1881           isolate, replacement, Object::ToString(isolate, replacement_obj));
1882     } else {
1883       DCHECK(!functional_replace);
1884       if (!groups_obj->IsUndefined(isolate)) {
1885         ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1886             isolate, groups_obj, Object::ToObject(isolate, groups_obj));
1887       }
1888       VectorBackedMatch m(isolate, string, match, position, &captures,
1889                           groups_obj);
1890       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1891           isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1892     }
1893 
1894     if (position >= next_source_position) {
1895       builder.AppendString(
1896           factory->NewSubString(string, next_source_position, position));
1897       builder.AppendString(replacement);
1898 
1899       next_source_position = position + match_length;
1900     }
1901   }
1902 
1903   if (next_source_position < length) {
1904     builder.AppendString(
1905         factory->NewSubString(string, next_source_position, length));
1906   }
1907 
1908   RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1909 }
1910 
RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile)1911 RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
1912   HandleScope scope(isolate);
1913   DCHECK_EQ(3, args.length());
1914   // TODO(pwong): To follow the spec more closely and simplify calling code,
1915   // this could handle the canonicalization of pattern and flags. See
1916   // https://tc39.github.io/ecma262/#sec-regexpinitialize
1917   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1918   CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
1919   CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
1920 
1921   RETURN_FAILURE_ON_EXCEPTION(isolate,
1922                               JSRegExp::Initialize(regexp, source, flags));
1923 
1924   return *regexp;
1925 }
1926 
RUNTIME_FUNCTION(Runtime_IsRegExp)1927 RUNTIME_FUNCTION(Runtime_IsRegExp) {
1928   SealHandleScope shs(isolate);
1929   DCHECK_EQ(1, args.length());
1930   CONVERT_ARG_CHECKED(Object, obj, 0);
1931   return isolate->heap()->ToBoolean(obj.IsJSRegExp());
1932 }
1933 
1934 }  // namespace internal
1935 }  // namespace v8
1936