1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <functional>
6 
7 #include "src/base/small-vector.h"
8 #include "src/base/strings.h"
9 #include "src/common/message-template.h"
10 #include "src/execution/arguments-inl.h"
11 #include "src/execution/isolate-inl.h"
12 #include "src/heap/heap-inl.h"  // For ToBoolean. TODO(jkummerow): Drop.
13 #include "src/logging/counters.h"
14 #include "src/numbers/conversions-inl.h"
15 #include "src/objects/js-array-inl.h"
16 #include "src/objects/js-regexp-inl.h"
17 #include "src/regexp/regexp-utils.h"
18 #include "src/regexp/regexp.h"
19 #include "src/runtime/runtime-utils.h"
20 #include "src/strings/string-builder-inl.h"
21 #include "src/strings/string-search.h"
22 
23 namespace v8 {
24 namespace internal {
25 
26 namespace {
27 
28 // Fairly arbitrary, but intended to fit:
29 //
30 // - captures
31 // - results
32 // - parsed replacement pattern parts
33 //
34 // for small, common cases.
35 constexpr int kStaticVectorSlots = 8;
36 
37 // Returns -1 for failure.
GetArgcForReplaceCallable(uint32_t num_captures,bool has_named_captures)38 uint32_t GetArgcForReplaceCallable(uint32_t num_captures,
39                                    bool has_named_captures) {
40   const uint32_t kAdditionalArgsWithoutNamedCaptures = 2;
41   const uint32_t kAdditionalArgsWithNamedCaptures = 3;
42   if (num_captures > Code::kMaxArguments) return -1;
43   uint32_t argc = has_named_captures
44                       ? num_captures + kAdditionalArgsWithNamedCaptures
45                       : num_captures + kAdditionalArgsWithoutNamedCaptures;
46   STATIC_ASSERT(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() -
47                                           kAdditionalArgsWithNamedCaptures);
48   return (argc > Code::kMaxArguments) ? -1 : argc;
49 }
50 
51 // Looks up the capture of the given name. Returns the (1-based) numbered
52 // capture index or -1 on failure.
LookupNamedCapture(const std::function<bool (String)> & name_matches,FixedArray capture_name_map)53 int LookupNamedCapture(const std::function<bool(String)>& name_matches,
54                        FixedArray capture_name_map) {
55   // TODO(jgruber): Sort capture_name_map and do binary search via
56   // internalized strings.
57 
58   int maybe_capture_index = -1;
59   const int named_capture_count = capture_name_map.length() >> 1;
60   for (int j = 0; j < named_capture_count; j++) {
61     // The format of {capture_name_map} is documented at
62     // JSRegExp::kIrregexpCaptureNameMapIndex.
63     const int name_ix = j * 2;
64     const int index_ix = j * 2 + 1;
65 
66     String capture_name = String::cast(capture_name_map.get(name_ix));
67     if (!name_matches(capture_name)) continue;
68 
69     maybe_capture_index = Smi::ToInt(capture_name_map.get(index_ix));
70     break;
71   }
72 
73   return maybe_capture_index;
74 }
75 
76 }  // namespace
77 
78 class CompiledReplacement {
79  public:
80   // Return whether the replacement is simple.
81   bool Compile(Isolate* isolate, Handle<JSRegExp> regexp,
82                Handle<String> replacement, int capture_count,
83                int subject_length);
84 
85   // Use Apply only if Compile returned false.
86   void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
87              int32_t* match);
88 
89   // Number of distinct parts of the replacement pattern.
parts()90   int parts() { return static_cast<int>(parts_.size()); }
91 
92  private:
93   enum PartType {
94     SUBJECT_PREFIX = 1,
95     SUBJECT_SUFFIX,
96     SUBJECT_CAPTURE,
97     REPLACEMENT_SUBSTRING,
98     REPLACEMENT_STRING,
99     EMPTY_REPLACEMENT,
100     NUMBER_OF_PART_TYPES
101   };
102 
103   struct ReplacementPart {
SubjectMatchv8::internal::CompiledReplacement::ReplacementPart104     static inline ReplacementPart SubjectMatch() {
105       return ReplacementPart(SUBJECT_CAPTURE, 0);
106     }
SubjectCapturev8::internal::CompiledReplacement::ReplacementPart107     static inline ReplacementPart SubjectCapture(int capture_index) {
108       return ReplacementPart(SUBJECT_CAPTURE, capture_index);
109     }
SubjectPrefixv8::internal::CompiledReplacement::ReplacementPart110     static inline ReplacementPart SubjectPrefix() {
111       return ReplacementPart(SUBJECT_PREFIX, 0);
112     }
SubjectSuffixv8::internal::CompiledReplacement::ReplacementPart113     static inline ReplacementPart SubjectSuffix(int subject_length) {
114       return ReplacementPart(SUBJECT_SUFFIX, subject_length);
115     }
ReplacementStringv8::internal::CompiledReplacement::ReplacementPart116     static inline ReplacementPart ReplacementString() {
117       return ReplacementPart(REPLACEMENT_STRING, 0);
118     }
EmptyReplacementv8::internal::CompiledReplacement::ReplacementPart119     static inline ReplacementPart EmptyReplacement() {
120       return ReplacementPart(EMPTY_REPLACEMENT, 0);
121     }
ReplacementSubStringv8::internal::CompiledReplacement::ReplacementPart122     static inline ReplacementPart ReplacementSubString(int from, int to) {
123       DCHECK_LE(0, from);
124       DCHECK_GT(to, from);
125       return ReplacementPart(-from, to);
126     }
127 
128     // If tag <= 0 then it is the negation of a start index of a substring of
129     // the replacement pattern, otherwise it's a value from PartType.
ReplacementPartv8::internal::CompiledReplacement::ReplacementPart130     ReplacementPart(int tag, int data) : tag(tag), data(data) {
131       // Must be non-positive or a PartType value.
132       DCHECK(tag < NUMBER_OF_PART_TYPES);
133     }
134     // Either a value of PartType or a non-positive number that is
135     // the negation of an index into the replacement string.
136     int tag;
137     // The data value's interpretation depends on the value of tag:
138     // tag == SUBJECT_PREFIX ||
139     // tag == SUBJECT_SUFFIX:  data is unused.
140     // tag == SUBJECT_CAPTURE: data is the number of the capture.
141     // tag == REPLACEMENT_SUBSTRING ||
142     // tag == REPLACEMENT_STRING:    data is index into array of substrings
143     //                               of the replacement string.
144     // tag == EMPTY_REPLACEMENT: data is unused.
145     // tag <= 0: Temporary representation of the substring of the replacement
146     //           string ranging over -tag .. data.
147     //           Is replaced by REPLACEMENT_{SUB,}STRING when we create the
148     //           substring objects.
149     int data;
150   };
151 
152   template <typename Char>
ParseReplacementPattern(base::Vector<Char> characters,FixedArray capture_name_map,int capture_count,int subject_length)153   bool ParseReplacementPattern(base::Vector<Char> characters,
154                                FixedArray capture_name_map, int capture_count,
155                                int subject_length) {
156     // Equivalent to String::GetSubstitution, except that this method converts
157     // the replacement string into an internal representation that avoids
158     // repeated parsing when used repeatedly.
159     int length = characters.length();
160     int last = 0;
161     for (int i = 0; i < length; i++) {
162       Char c = characters[i];
163       if (c == '$') {
164         int next_index = i + 1;
165         if (next_index == length) {  // No next character!
166           break;
167         }
168         Char c2 = characters[next_index];
169         switch (c2) {
170           case '$':
171             if (i > last) {
172               // There is a substring before. Include the first "$".
173               parts_.emplace_back(
174                   ReplacementPart::ReplacementSubString(last, next_index));
175               last = next_index + 1;  // Continue after the second "$".
176             } else {
177               // Let the next substring start with the second "$".
178               last = next_index;
179             }
180             i = next_index;
181             break;
182           case '`':
183             if (i > last) {
184               parts_.emplace_back(
185                   ReplacementPart::ReplacementSubString(last, i));
186             }
187             parts_.emplace_back(ReplacementPart::SubjectPrefix());
188             i = next_index;
189             last = i + 1;
190             break;
191           case '\'':
192             if (i > last) {
193               parts_.emplace_back(
194                   ReplacementPart::ReplacementSubString(last, i));
195             }
196             parts_.emplace_back(ReplacementPart::SubjectSuffix(subject_length));
197             i = next_index;
198             last = i + 1;
199             break;
200           case '&':
201             if (i > last) {
202               parts_.emplace_back(
203                   ReplacementPart::ReplacementSubString(last, i));
204             }
205             parts_.emplace_back(ReplacementPart::SubjectMatch());
206             i = next_index;
207             last = i + 1;
208             break;
209           case '0':
210           case '1':
211           case '2':
212           case '3':
213           case '4':
214           case '5':
215           case '6':
216           case '7':
217           case '8':
218           case '9': {
219             int capture_ref = c2 - '0';
220             if (capture_ref > capture_count) {
221               i = next_index;
222               continue;
223             }
224             int second_digit_index = next_index + 1;
225             if (second_digit_index < length) {
226               // Peek ahead to see if we have two digits.
227               Char c3 = characters[second_digit_index];
228               if ('0' <= c3 && c3 <= '9') {  // Double digits.
229                 int double_digit_ref = capture_ref * 10 + c3 - '0';
230                 if (double_digit_ref <= capture_count) {
231                   next_index = second_digit_index;
232                   capture_ref = double_digit_ref;
233                 }
234               }
235             }
236             if (capture_ref > 0) {
237               if (i > last) {
238                 parts_.emplace_back(
239                     ReplacementPart::ReplacementSubString(last, i));
240               }
241               DCHECK(capture_ref <= capture_count);
242               parts_.emplace_back(ReplacementPart::SubjectCapture(capture_ref));
243               last = next_index + 1;
244             }
245             i = next_index;
246             break;
247           }
248           case '<': {
249             if (capture_name_map.is_null()) {
250               i = next_index;
251               break;
252             }
253 
254             // Scan until the next '>', and let the enclosed substring be the
255             // groupName.
256 
257             const int name_start_index = next_index + 1;
258             int closing_bracket_index = -1;
259             for (int j = name_start_index; j < length; j++) {
260               if (characters[j] == '>') {
261                 closing_bracket_index = j;
262                 break;
263               }
264             }
265 
266             // If no closing bracket is found, '$<' is treated as a string
267             // literal.
268             if (closing_bracket_index == -1) {
269               i = next_index;
270               break;
271             }
272 
273             base::Vector<Char> requested_name =
274                 characters.SubVector(name_start_index, closing_bracket_index);
275 
276             // Let capture be ? Get(namedCaptures, groupName).
277 
278             const int capture_index = LookupNamedCapture(
279                 [=](String capture_name) {
280                   return capture_name.IsEqualTo(requested_name);
281                 },
282                 capture_name_map);
283 
284             // If capture is undefined or does not exist, replace the text
285             // through the following '>' with the empty string.
286             // Otherwise, replace the text through the following '>' with
287             // ? ToString(capture).
288 
289             DCHECK(capture_index == -1 ||
290                    (1 <= capture_index && capture_index <= capture_count));
291 
292             if (i > last) {
293               parts_.emplace_back(
294                   ReplacementPart::ReplacementSubString(last, i));
295             }
296             parts_.emplace_back(
297                 (capture_index == -1)
298                     ? ReplacementPart::EmptyReplacement()
299                     : ReplacementPart::SubjectCapture(capture_index));
300             last = closing_bracket_index + 1;
301             i = closing_bracket_index;
302             break;
303           }
304           default:
305             i = next_index;
306             break;
307         }
308       }
309     }
310     if (length > last) {
311       if (last == 0) {
312         // Replacement is simple.  Do not use Apply to do the replacement.
313         return true;
314       } else {
315         parts_.emplace_back(
316             ReplacementPart::ReplacementSubString(last, length));
317       }
318     }
319     return false;
320   }
321 
322   base::SmallVector<ReplacementPart, kStaticVectorSlots> parts_;
323   base::SmallVector<Handle<String>, kStaticVectorSlots> replacement_substrings_;
324 };
325 
Compile(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> replacement,int capture_count,int subject_length)326 bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp,
327                                   Handle<String> replacement, int capture_count,
328                                   int subject_length) {
329   {
330     DisallowGarbageCollection no_gc;
331     String::FlatContent content = replacement->GetFlatContent(no_gc);
332     DCHECK(content.IsFlat());
333 
334     FixedArray capture_name_map;
335     if (capture_count > 0) {
336       DCHECK(JSRegExp::TypeSupportsCaptures(regexp->type_tag()));
337       Object maybe_capture_name_map = regexp->capture_name_map();
338       if (maybe_capture_name_map.IsFixedArray()) {
339         capture_name_map = FixedArray::cast(maybe_capture_name_map);
340       }
341     }
342 
343     bool simple;
344     if (content.IsOneByte()) {
345       simple =
346           ParseReplacementPattern(content.ToOneByteVector(), capture_name_map,
347                                   capture_count, subject_length);
348     } else {
349       DCHECK(content.IsTwoByte());
350       simple = ParseReplacementPattern(content.ToUC16Vector(), capture_name_map,
351                                        capture_count, subject_length);
352     }
353     if (simple) return true;
354   }
355 
356   // Find substrings of replacement string and create them as String objects.
357   int substring_index = 0;
358   for (ReplacementPart& part : parts_) {
359     int tag = part.tag;
360     if (tag <= 0) {  // A replacement string slice.
361       int from = -tag;
362       int to = part.data;
363       replacement_substrings_.emplace_back(
364           isolate->factory()->NewSubString(replacement, from, to));
365       part.tag = REPLACEMENT_SUBSTRING;
366       part.data = substring_index;
367       substring_index++;
368     } else if (tag == REPLACEMENT_STRING) {
369       replacement_substrings_.emplace_back(replacement);
370       part.data = substring_index;
371       substring_index++;
372     }
373   }
374   return false;
375 }
376 
377 
Apply(ReplacementStringBuilder * builder,int match_from,int match_to,int32_t * match)378 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
379                                 int match_from, int match_to, int32_t* match) {
380   DCHECK_LT(0, parts_.size());
381   for (ReplacementPart& part : parts_) {
382     switch (part.tag) {
383       case SUBJECT_PREFIX:
384         if (match_from > 0) builder->AddSubjectSlice(0, match_from);
385         break;
386       case SUBJECT_SUFFIX: {
387         int subject_length = part.data;
388         if (match_to < subject_length) {
389           builder->AddSubjectSlice(match_to, subject_length);
390         }
391         break;
392       }
393       case SUBJECT_CAPTURE: {
394         int capture = part.data;
395         int from = match[capture * 2];
396         int to = match[capture * 2 + 1];
397         if (from >= 0 && to > from) {
398           builder->AddSubjectSlice(from, to);
399         }
400         break;
401       }
402       case REPLACEMENT_SUBSTRING:
403       case REPLACEMENT_STRING:
404         builder->AddString(replacement_substrings_[part.data]);
405         break;
406       case EMPTY_REPLACEMENT:
407         break;
408       default:
409         UNREACHABLE();
410     }
411   }
412 }
413 
FindOneByteStringIndices(base::Vector<const uint8_t> subject,uint8_t pattern,std::vector<int> * indices,unsigned int limit)414 void FindOneByteStringIndices(base::Vector<const uint8_t> subject,
415                               uint8_t pattern, std::vector<int>* indices,
416                               unsigned int limit) {
417   DCHECK_LT(0, limit);
418   // Collect indices of pattern in subject using memchr.
419   // Stop after finding at most limit values.
420   const uint8_t* subject_start = subject.begin();
421   const uint8_t* subject_end = subject_start + subject.length();
422   const uint8_t* pos = subject_start;
423   while (limit > 0) {
424     pos = reinterpret_cast<const uint8_t*>(
425         memchr(pos, pattern, subject_end - pos));
426     if (pos == nullptr) return;
427     indices->push_back(static_cast<int>(pos - subject_start));
428     pos++;
429     limit--;
430   }
431 }
432 
FindTwoByteStringIndices(const base::Vector<const base::uc16> subject,base::uc16 pattern,std::vector<int> * indices,unsigned int limit)433 void FindTwoByteStringIndices(const base::Vector<const base::uc16> subject,
434                               base::uc16 pattern, std::vector<int>* indices,
435                               unsigned int limit) {
436   DCHECK_LT(0, limit);
437   const base::uc16* subject_start = subject.begin();
438   const base::uc16* subject_end = subject_start + subject.length();
439   for (const base::uc16* pos = subject_start; pos < subject_end && limit > 0;
440        pos++) {
441     if (*pos == pattern) {
442       indices->push_back(static_cast<int>(pos - subject_start));
443       limit--;
444     }
445   }
446 }
447 
448 template <typename SubjectChar, typename PatternChar>
FindStringIndices(Isolate * isolate,base::Vector<const SubjectChar> subject,base::Vector<const PatternChar> pattern,std::vector<int> * indices,unsigned int limit)449 void FindStringIndices(Isolate* isolate,
450                        base::Vector<const SubjectChar> subject,
451                        base::Vector<const PatternChar> pattern,
452                        std::vector<int>* indices, unsigned int limit) {
453   DCHECK_LT(0, limit);
454   // Collect indices of pattern in subject.
455   // Stop after finding at most limit values.
456   int pattern_length = pattern.length();
457   int index = 0;
458   StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
459   while (limit > 0) {
460     index = search.Search(subject, index);
461     if (index < 0) return;
462     indices->push_back(index);
463     index += pattern_length;
464     limit--;
465   }
466 }
467 
FindStringIndicesDispatch(Isolate * isolate,String subject,String pattern,std::vector<int> * indices,unsigned int limit)468 void FindStringIndicesDispatch(Isolate* isolate, String subject, String pattern,
469                                std::vector<int>* indices, unsigned int limit) {
470   {
471     DisallowGarbageCollection no_gc;
472     String::FlatContent subject_content = subject.GetFlatContent(no_gc);
473     String::FlatContent pattern_content = pattern.GetFlatContent(no_gc);
474     DCHECK(subject_content.IsFlat());
475     DCHECK(pattern_content.IsFlat());
476     if (subject_content.IsOneByte()) {
477       base::Vector<const uint8_t> subject_vector =
478           subject_content.ToOneByteVector();
479       if (pattern_content.IsOneByte()) {
480         base::Vector<const uint8_t> pattern_vector =
481             pattern_content.ToOneByteVector();
482         if (pattern_vector.length() == 1) {
483           FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
484                                    limit);
485         } else {
486           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
487                             limit);
488         }
489       } else {
490         FindStringIndices(isolate, subject_vector,
491                           pattern_content.ToUC16Vector(), indices, limit);
492       }
493     } else {
494       base::Vector<const base::uc16> subject_vector =
495           subject_content.ToUC16Vector();
496       if (pattern_content.IsOneByte()) {
497         base::Vector<const uint8_t> pattern_vector =
498             pattern_content.ToOneByteVector();
499         if (pattern_vector.length() == 1) {
500           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
501                                    limit);
502         } else {
503           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
504                             limit);
505         }
506       } else {
507         base::Vector<const base::uc16> pattern_vector =
508             pattern_content.ToUC16Vector();
509         if (pattern_vector.length() == 1) {
510           FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
511                                    limit);
512         } else {
513           FindStringIndices(isolate, subject_vector, pattern_vector, indices,
514                             limit);
515         }
516       }
517     }
518   }
519 }
520 
521 namespace {
GetRewoundRegexpIndicesList(Isolate * isolate)522 std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
523   std::vector<int>* list = isolate->regexp_indices();
524   list->clear();
525   return list;
526 }
527 
TruncateRegexpIndicesList(Isolate * isolate)528 void TruncateRegexpIndicesList(Isolate* isolate) {
529   // Same size as smallest zone segment, preserving behavior from the
530   // runtime zone.
531   // TODO(jgruber): Consider removing the reusable regexp_indices list and
532   // simply allocating a new list each time. It feels like we're needlessly
533   // optimizing an edge case.
534   static const int kMaxRegexpIndicesListCapacity = 8 * KB / kIntSize;
535   std::vector<int>* indices = isolate->regexp_indices();
536   if (indices->capacity() > kMaxRegexpIndicesListCapacity) {
537     // Throw away backing storage.
538     indices->clear();
539     indices->shrink_to_fit();
540   }
541 }
542 }  // namespace
543 
544 template <typename ResultSeqString>
StringReplaceGlobalAtomRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> pattern_regexp,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)545 V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalAtomRegExpWithString(
546     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
547     Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
548   DCHECK(subject->IsFlat());
549   DCHECK(replacement->IsFlat());
550 
551   std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
552 
553   DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->type_tag());
554   String pattern = pattern_regexp->atom_pattern();
555   int subject_len = subject->length();
556   int pattern_len = pattern.length();
557   int replacement_len = replacement->length();
558 
559   FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF);
560 
561   if (indices->empty()) return *subject;
562 
563   // Detect integer overflow.
564   int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
565                            static_cast<int64_t>(pattern_len)) *
566                               static_cast<int64_t>(indices->size()) +
567                           static_cast<int64_t>(subject_len);
568   int result_len;
569   if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
570     STATIC_ASSERT(String::kMaxLength < kMaxInt);
571     result_len = kMaxInt;  // Provoke exception.
572   } else {
573     result_len = static_cast<int>(result_len_64);
574   }
575   if (result_len == 0) {
576     return ReadOnlyRoots(isolate).empty_string();
577   }
578 
579   int subject_pos = 0;
580   int result_pos = 0;
581 
582   MaybeHandle<SeqString> maybe_res;
583   if (ResultSeqString::kHasOneByteEncoding) {
584     maybe_res = isolate->factory()->NewRawOneByteString(result_len);
585   } else {
586     maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
587   }
588   Handle<SeqString> untyped_res;
589   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
590   Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
591 
592   DisallowGarbageCollection no_gc;
593   for (int index : *indices) {
594     // Copy non-matched subject content.
595     if (subject_pos < index) {
596       String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
597                           subject_pos, index - subject_pos);
598       result_pos += index - subject_pos;
599     }
600 
601     // Replace match.
602     if (replacement_len > 0) {
603       String::WriteToFlat(*replacement, result->GetChars(no_gc) + result_pos, 0,
604                           replacement_len);
605       result_pos += replacement_len;
606     }
607 
608     subject_pos = index + pattern_len;
609   }
610   // Add remaining subject content at the end.
611   if (subject_pos < subject_len) {
612     String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
613                         subject_pos, subject_len - subject_pos);
614   }
615 
616   int32_t match_indices[] = {indices->back(), indices->back() + pattern_len};
617   RegExp::SetLastMatchInfo(isolate, last_match_info, subject, 0, match_indices);
618 
619   TruncateRegexpIndicesList(isolate);
620 
621   return *result;
622 }
623 
StringReplaceGlobalRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)624 V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString(
625     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
626     Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
627   DCHECK(subject->IsFlat());
628   DCHECK(replacement->IsFlat());
629 
630   int capture_count = regexp->capture_count();
631   int subject_length = subject->length();
632 
633   // Ensure the RegExp is compiled so we can access the capture-name map.
634   if (!RegExp::EnsureFullyCompiled(isolate, regexp, subject)) {
635     return ReadOnlyRoots(isolate).exception();
636   }
637 
638   CompiledReplacement compiled_replacement;
639   const bool simple_replace = compiled_replacement.Compile(
640       isolate, regexp, replacement, capture_count, subject_length);
641 
642   // Shortcut for simple non-regexp global replacements
643   if (regexp->type_tag() == JSRegExp::ATOM && simple_replace) {
644     if (subject->IsOneByteRepresentation() &&
645         replacement->IsOneByteRepresentation()) {
646       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
647           isolate, subject, regexp, replacement, last_match_info);
648     } else {
649       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
650           isolate, subject, regexp, replacement, last_match_info);
651     }
652   }
653 
654   RegExpGlobalCache global_cache(regexp, subject, isolate);
655   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
656 
657   int32_t* current_match = global_cache.FetchNext();
658   if (current_match == nullptr) {
659     if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
660     return *subject;
661   }
662 
663   // Guessing the number of parts that the final result string is built
664   // from. Global regexps can match any number of times, so we guess
665   // conservatively.
666   int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
667   ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
668 
669   int prev = 0;
670 
671   do {
672     int start = current_match[0];
673     int end = current_match[1];
674 
675     if (prev < start) {
676       builder.AddSubjectSlice(prev, start);
677     }
678 
679     if (simple_replace) {
680       builder.AddString(replacement);
681     } else {
682       compiled_replacement.Apply(&builder, start, end, current_match);
683     }
684     prev = end;
685 
686     current_match = global_cache.FetchNext();
687   } while (current_match != nullptr);
688 
689   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
690 
691   if (prev < subject_length) {
692     builder.AddSubjectSlice(prev, subject_length);
693   }
694 
695   RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
696                            global_cache.LastSuccessfulMatch());
697 
698   RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
699 }
700 
701 template <typename ResultSeqString>
StringReplaceGlobalRegExpWithEmptyString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<RegExpMatchInfo> last_match_info)702 V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString(
703     Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
704     Handle<RegExpMatchInfo> last_match_info) {
705   DCHECK(subject->IsFlat());
706 
707   // Shortcut for simple non-regexp global replacements
708   if (regexp->type_tag() == JSRegExp::ATOM) {
709     Handle<String> empty_string = isolate->factory()->empty_string();
710     if (subject->IsOneByteRepresentation()) {
711       return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
712           isolate, subject, regexp, empty_string, last_match_info);
713     } else {
714       return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
715           isolate, subject, regexp, empty_string, last_match_info);
716     }
717   }
718 
719   RegExpGlobalCache global_cache(regexp, subject, isolate);
720   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
721 
722   int32_t* current_match = global_cache.FetchNext();
723   if (current_match == nullptr) {
724     if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
725     return *subject;
726   }
727 
728   int start = current_match[0];
729   int end = current_match[1];
730   int capture_count = regexp->capture_count();
731   int subject_length = subject->length();
732 
733   int new_length = subject_length - (end - start);
734   if (new_length == 0) return ReadOnlyRoots(isolate).empty_string();
735 
736   Handle<ResultSeqString> answer;
737   if (ResultSeqString::kHasOneByteEncoding) {
738     answer = Handle<ResultSeqString>::cast(
739         isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
740   } else {
741     answer = Handle<ResultSeqString>::cast(
742         isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
743   }
744 
745   int prev = 0;
746   int position = 0;
747 
748   DisallowGarbageCollection no_gc;
749   do {
750     start = current_match[0];
751     end = current_match[1];
752     if (prev < start) {
753       // Add substring subject[prev;start] to answer string.
754       String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
755                           start - prev);
756       position += start - prev;
757     }
758     prev = end;
759 
760     current_match = global_cache.FetchNext();
761   } while (current_match != nullptr);
762 
763   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
764 
765   RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
766                            global_cache.LastSuccessfulMatch());
767 
768   if (prev < subject_length) {
769     // Add substring subject[prev;length] to answer string.
770     String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
771                         subject_length - prev);
772     position += subject_length - prev;
773   }
774 
775   if (position == 0) return ReadOnlyRoots(isolate).empty_string();
776 
777   // Shorten string and fill
778   int string_size = ResultSeqString::SizeFor(position);
779   int allocated_string_size = ResultSeqString::SizeFor(new_length);
780   int delta = allocated_string_size - string_size;
781 
782   answer->set_length(position);
783   if (delta == 0) return *answer;
784 
785   Address end_of_string = answer->address() + string_size;
786   Heap* heap = isolate->heap();
787 
788   // The trimming is performed on a newly allocated object, which is on a
789   // freshly allocated page or on an already swept page. Hence, the sweeper
790   // thread can not get confused with the filler creation. No synchronization
791   // needed.
792   // TODO(hpayer): We should shrink the large object page if the size
793   // of the object changed significantly.
794   if (!heap->IsLargeObject(*answer)) {
795     heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
796   }
797   return *answer;
798 }
799 
RUNTIME_FUNCTION(Runtime_StringSplit)800 RUNTIME_FUNCTION(Runtime_StringSplit) {
801   HandleScope handle_scope(isolate);
802   DCHECK_EQ(3, args.length());
803   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
804   CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
805   CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
806   CHECK_LT(0, limit);
807 
808   int subject_length = subject->length();
809   int pattern_length = pattern->length();
810   CHECK_LT(0, pattern_length);
811 
812   if (limit == 0xFFFFFFFFu) {
813     FixedArray last_match_cache_unused;
814     Handle<Object> cached_answer(
815         RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
816                                    &last_match_cache_unused,
817                                    RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
818         isolate);
819     if (*cached_answer != Smi::zero()) {
820       // The cache FixedArray is a COW-array and can therefore be reused.
821       Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
822           Handle<FixedArray>::cast(cached_answer));
823       return *result;
824     }
825   }
826 
827   // The limit can be very large (0xFFFFFFFFu), but since the pattern
828   // isn't empty, we can never create more parts than ~half the length
829   // of the subject.
830 
831   subject = String::Flatten(isolate, subject);
832   pattern = String::Flatten(isolate, pattern);
833 
834   std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
835 
836   FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
837 
838   if (static_cast<uint32_t>(indices->size()) < limit) {
839     indices->push_back(subject_length);
840   }
841 
842   // The list indices now contains the end of each part to create.
843 
844   // Create JSArray of substrings separated by separator.
845   int part_count = static_cast<int>(indices->size());
846 
847   Handle<JSArray> result =
848       isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count,
849                                      INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
850 
851   DCHECK(result->HasObjectElements());
852 
853   Handle<FixedArray> elements(FixedArray::cast(result->elements()), isolate);
854 
855   if (part_count == 1 && indices->at(0) == subject_length) {
856     elements->set(0, *subject);
857   } else {
858     int part_start = 0;
859     FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
860       int part_end = indices->at(i);
861       Handle<String> substring =
862           isolate->factory()->NewProperSubString(subject, part_start, part_end);
863       elements->set(i, *substring);
864       part_start = part_end + pattern_length;
865     });
866   }
867 
868   if (limit == 0xFFFFFFFFu) {
869     if (result->HasObjectElements()) {
870       RegExpResultsCache::Enter(isolate, subject, pattern, elements,
871                                 isolate->factory()->empty_fixed_array(),
872                                 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
873     }
874   }
875 
876   TruncateRegexpIndicesList(isolate);
877 
878   return *result;
879 }
880 
881 namespace {
882 
RegExpExec(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> subject,int32_t index,Handle<RegExpMatchInfo> last_match_info,RegExp::ExecQuirks exec_quirks)883 MaybeHandle<Object> RegExpExec(Isolate* isolate, Handle<JSRegExp> regexp,
884                                Handle<String> subject, int32_t index,
885                                Handle<RegExpMatchInfo> last_match_info,
886                                RegExp::ExecQuirks exec_quirks) {
887   // Due to the way the JS calls are constructed this must be less than the
888   // length of a string, i.e. it is always a Smi.  We check anyway for security.
889   CHECK_LE(0, index);
890   CHECK_GE(subject->length(), index);
891   isolate->counters()->regexp_entry_runtime()->Increment();
892   return RegExp::Exec(isolate, regexp, subject, index, last_match_info,
893                       exec_quirks);
894 }
895 
ExperimentalOneshotExec(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> subject,int32_t index,Handle<RegExpMatchInfo> last_match_info,RegExp::ExecQuirks exec_quirks)896 MaybeHandle<Object> ExperimentalOneshotExec(
897     Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
898     int32_t index, Handle<RegExpMatchInfo> last_match_info,
899     RegExp::ExecQuirks exec_quirks) {
900   // Due to the way the JS calls are constructed this must be less than the
901   // length of a string, i.e. it is always a Smi.  We check anyway for security.
902   CHECK_LE(0, index);
903   CHECK_GE(subject->length(), index);
904   isolate->counters()->regexp_entry_runtime()->Increment();
905   return RegExp::ExperimentalOneshotExec(isolate, regexp, subject, index,
906                                          last_match_info, exec_quirks);
907 }
908 
909 }  // namespace
910 
RUNTIME_FUNCTION(Runtime_RegExpExec)911 RUNTIME_FUNCTION(Runtime_RegExpExec) {
912   HandleScope scope(isolate);
913   DCHECK_EQ(4, args.length());
914   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
915   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
916   CONVERT_INT32_ARG_CHECKED(index, 2);
917   CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
918   RETURN_RESULT_OR_FAILURE(
919       isolate, RegExpExec(isolate, regexp, subject, index, last_match_info,
920                           RegExp::ExecQuirks::kNone));
921 }
922 
RUNTIME_FUNCTION(Runtime_RegExpExecTreatMatchAtEndAsFailure)923 RUNTIME_FUNCTION(Runtime_RegExpExecTreatMatchAtEndAsFailure) {
924   HandleScope scope(isolate);
925   DCHECK_EQ(4, args.length());
926   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
927   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
928   CONVERT_INT32_ARG_CHECKED(index, 2);
929   CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
930   RETURN_RESULT_OR_FAILURE(
931       isolate, RegExpExec(isolate, regexp, subject, index, last_match_info,
932                           RegExp::ExecQuirks::kTreatMatchAtEndAsFailure));
933 }
934 
RUNTIME_FUNCTION(Runtime_RegExpExperimentalOneshotExec)935 RUNTIME_FUNCTION(Runtime_RegExpExperimentalOneshotExec) {
936   HandleScope scope(isolate);
937   DCHECK_EQ(4, args.length());
938   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
939   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
940   CONVERT_INT32_ARG_CHECKED(index, 2);
941   CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
942   RETURN_RESULT_OR_FAILURE(
943       isolate,
944       ExperimentalOneshotExec(isolate, regexp, subject, index, last_match_info,
945                               RegExp::ExecQuirks::kNone));
946 }
947 
RUNTIME_FUNCTION(Runtime_RegExpExperimentalOneshotExecTreatMatchAtEndAsFailure)948 RUNTIME_FUNCTION(
949     Runtime_RegExpExperimentalOneshotExecTreatMatchAtEndAsFailure) {
950   HandleScope scope(isolate);
951   DCHECK_EQ(4, args.length());
952   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
953   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
954   CONVERT_INT32_ARG_CHECKED(index, 2);
955   CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
956   RETURN_RESULT_OR_FAILURE(
957       isolate,
958       ExperimentalOneshotExec(isolate, regexp, subject, index, last_match_info,
959                               RegExp::ExecQuirks::kTreatMatchAtEndAsFailure));
960 }
961 
RUNTIME_FUNCTION(Runtime_RegExpBuildIndices)962 RUNTIME_FUNCTION(Runtime_RegExpBuildIndices) {
963   HandleScope scope(isolate);
964   DCHECK_EQ(3, args.length());
965   CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, match_info, 1);
966   CONVERT_ARG_HANDLE_CHECKED(Object, maybe_names, 2);
967 #ifdef DEBUG
968   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
969   DCHECK(regexp->flags() & JSRegExp::kHasIndices);
970 #endif
971 
972   return *JSRegExpResultIndices::BuildIndices(isolate, match_info, maybe_names);
973 }
974 
975 namespace {
976 
977 class MatchInfoBackedMatch : public String::Match {
978  public:
MatchInfoBackedMatch(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> subject,Handle<RegExpMatchInfo> match_info)979   MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
980                        Handle<String> subject,
981                        Handle<RegExpMatchInfo> match_info)
982       : isolate_(isolate), match_info_(match_info) {
983     subject_ = String::Flatten(isolate, subject);
984 
985     if (JSRegExp::TypeSupportsCaptures(regexp->type_tag())) {
986       Object o = regexp->capture_name_map();
987       has_named_captures_ = o.IsFixedArray();
988       if (has_named_captures_) {
989         capture_name_map_ = handle(FixedArray::cast(o), isolate);
990       }
991     } else {
992       has_named_captures_ = false;
993     }
994   }
995 
GetMatch()996   Handle<String> GetMatch() override {
997     return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
998   }
999 
GetPrefix()1000   Handle<String> GetPrefix() override {
1001     const int match_start = match_info_->Capture(0);
1002     return isolate_->factory()->NewSubString(subject_, 0, match_start);
1003   }
1004 
GetSuffix()1005   Handle<String> GetSuffix() override {
1006     const int match_end = match_info_->Capture(1);
1007     return isolate_->factory()->NewSubString(subject_, match_end,
1008                                              subject_->length());
1009   }
1010 
HasNamedCaptures()1011   bool HasNamedCaptures() override { return has_named_captures_; }
1012 
CaptureCount()1013   int CaptureCount() override {
1014     return match_info_->NumberOfCaptureRegisters() / 2;
1015   }
1016 
GetCapture(int i,bool * capture_exists)1017   MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1018     Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
1019         isolate_, match_info_, i, capture_exists);
1020     return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
1021                              : isolate_->factory()->empty_string();
1022   }
1023 
GetNamedCapture(Handle<String> name,CaptureState * state)1024   MaybeHandle<String> GetNamedCapture(Handle<String> name,
1025                                       CaptureState* state) override {
1026     DCHECK(has_named_captures_);
1027     const int capture_index = LookupNamedCapture(
1028         [=](String capture_name) { return capture_name.Equals(*name); },
1029         *capture_name_map_);
1030 
1031     if (capture_index == -1) {
1032       *state = UNMATCHED;
1033       return isolate_->factory()->empty_string();
1034     }
1035 
1036     DCHECK(1 <= capture_index && capture_index <= CaptureCount());
1037 
1038     bool capture_exists;
1039     Handle<String> capture_value;
1040     ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
1041                                GetCapture(capture_index, &capture_exists),
1042                                String);
1043 
1044     if (!capture_exists) {
1045       *state = UNMATCHED;
1046       return isolate_->factory()->empty_string();
1047     } else {
1048       *state = MATCHED;
1049       return capture_value;
1050     }
1051   }
1052 
1053  private:
1054   Isolate* isolate_;
1055   Handle<String> subject_;
1056   Handle<RegExpMatchInfo> match_info_;
1057 
1058   bool has_named_captures_;
1059   Handle<FixedArray> capture_name_map_;
1060 };
1061 
1062 class VectorBackedMatch : public String::Match {
1063  public:
VectorBackedMatch(Isolate * isolate,Handle<String> subject,Handle<String> match,int match_position,base::Vector<Handle<Object>> captures,Handle<Object> groups_obj)1064   VectorBackedMatch(Isolate* isolate, Handle<String> subject,
1065                     Handle<String> match, int match_position,
1066                     base::Vector<Handle<Object>> captures,
1067                     Handle<Object> groups_obj)
1068       : isolate_(isolate),
1069         match_(match),
1070         match_position_(match_position),
1071         captures_(captures) {
1072     subject_ = String::Flatten(isolate, subject);
1073 
1074     DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
1075     has_named_captures_ = !groups_obj->IsUndefined(isolate);
1076     if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
1077   }
1078 
GetMatch()1079   Handle<String> GetMatch() override { return match_; }
1080 
GetPrefix()1081   Handle<String> GetPrefix() override {
1082     return isolate_->factory()->NewSubString(subject_, 0, match_position_);
1083   }
1084 
GetSuffix()1085   Handle<String> GetSuffix() override {
1086     const int match_end_position = match_position_ + match_->length();
1087     return isolate_->factory()->NewSubString(subject_, match_end_position,
1088                                              subject_->length());
1089   }
1090 
HasNamedCaptures()1091   bool HasNamedCaptures() override { return has_named_captures_; }
1092 
CaptureCount()1093   int CaptureCount() override { return captures_.length(); }
1094 
GetCapture(int i,bool * capture_exists)1095   MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1096     Handle<Object> capture_obj = captures_[i];
1097     if (capture_obj->IsUndefined(isolate_)) {
1098       *capture_exists = false;
1099       return isolate_->factory()->empty_string();
1100     }
1101     *capture_exists = true;
1102     return Object::ToString(isolate_, capture_obj);
1103   }
1104 
GetNamedCapture(Handle<String> name,CaptureState * state)1105   MaybeHandle<String> GetNamedCapture(Handle<String> name,
1106                                       CaptureState* state) override {
1107     DCHECK(has_named_captures_);
1108 
1109     Handle<Object> capture_obj;
1110     ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
1111                                Object::GetProperty(isolate_, groups_obj_, name),
1112                                String);
1113     if (capture_obj->IsUndefined(isolate_)) {
1114       *state = UNMATCHED;
1115       return isolate_->factory()->empty_string();
1116     } else {
1117       *state = MATCHED;
1118       return Object::ToString(isolate_, capture_obj);
1119     }
1120   }
1121 
1122  private:
1123   Isolate* isolate_;
1124   Handle<String> subject_;
1125   Handle<String> match_;
1126   const int match_position_;
1127   base::Vector<Handle<Object>> captures_;
1128 
1129   bool has_named_captures_;
1130   Handle<JSReceiver> groups_obj_;
1131 };
1132 
1133 // Create the groups object (see also the RegExp result creation in
1134 // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
ConstructNamedCaptureGroupsObject(Isolate * isolate,Handle<FixedArray> capture_map,const std::function<Object (int)> & f_get_capture)1135 Handle<JSObject> ConstructNamedCaptureGroupsObject(
1136     Isolate* isolate, Handle<FixedArray> capture_map,
1137     const std::function<Object(int)>& f_get_capture) {
1138   Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
1139 
1140   const int named_capture_count = capture_map->length() >> 1;
1141   for (int i = 0; i < named_capture_count; i++) {
1142     const int name_ix = i * 2;
1143     const int index_ix = i * 2 + 1;
1144 
1145     Handle<String> capture_name(String::cast(capture_map->get(name_ix)),
1146                                 isolate);
1147     const int capture_ix = Smi::ToInt(capture_map->get(index_ix));
1148     DCHECK_GE(capture_ix, 1);  // Explicit groups start at index 1.
1149 
1150     Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
1151     DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString());
1152 
1153     JSObject::AddProperty(isolate, groups, capture_name, capture_value, NONE);
1154   }
1155 
1156   return groups;
1157 }
1158 
1159 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
1160 // separate last match info.  See comment on that function.
1161 template <bool has_capture>
SearchRegExpMultiple(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<RegExpMatchInfo> last_match_array,Handle<JSArray> result_array)1162 static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
1163                                    Handle<JSRegExp> regexp,
1164                                    Handle<RegExpMatchInfo> last_match_array,
1165                                    Handle<JSArray> result_array) {
1166   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1167   DCHECK_NE(has_capture, regexp->capture_count() == 0);
1168   DCHECK(subject->IsFlat());
1169 
1170   // Force tier up to native code for global replaces. The global replace is
1171   // implemented differently for native code and bytecode execution, where the
1172   // native code expects an array to store all the matches, and the bytecode
1173   // matches one at a time, so it's easier to tier-up to native code from the
1174   // start.
1175   if (FLAG_regexp_tier_up && regexp->type_tag() == JSRegExp::IRREGEXP) {
1176     regexp->MarkTierUpForNextExec();
1177     if (FLAG_trace_regexp_tier_up) {
1178       PrintF("Forcing tier-up of JSRegExp object %p in SearchRegExpMultiple\n",
1179              reinterpret_cast<void*>(regexp->ptr()));
1180     }
1181   }
1182 
1183   int capture_count = regexp->capture_count();
1184   int subject_length = subject->length();
1185 
1186   static const int kMinLengthToCache = 0x1000;
1187 
1188   if (subject_length > kMinLengthToCache) {
1189     FixedArray last_match_cache;
1190     Object cached_answer = RegExpResultsCache::Lookup(
1191         isolate->heap(), *subject, regexp->data(), &last_match_cache,
1192         RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1193     if (cached_answer.IsFixedArray()) {
1194       int capture_registers = JSRegExp::RegistersForCaptureCount(capture_count);
1195       int32_t* last_match = NewArray<int32_t>(capture_registers);
1196       for (int i = 0; i < capture_registers; i++) {
1197         last_match[i] = Smi::ToInt(last_match_cache.get(i));
1198       }
1199       Handle<FixedArray> cached_fixed_array =
1200           Handle<FixedArray>(FixedArray::cast(cached_answer), isolate);
1201       // The cache FixedArray is a COW-array and we need to return a copy.
1202       Handle<FixedArray> copied_fixed_array =
1203           isolate->factory()->CopyFixedArrayWithMap(
1204               cached_fixed_array, isolate->factory()->fixed_array_map());
1205       JSArray::SetContent(result_array, copied_fixed_array);
1206       RegExp::SetLastMatchInfo(isolate, last_match_array, subject,
1207                                capture_count, last_match);
1208       DeleteArray(last_match);
1209       return *result_array;
1210     }
1211   }
1212 
1213   RegExpGlobalCache global_cache(regexp, subject, isolate);
1214   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1215 
1216   // Ensured in Runtime_RegExpExecMultiple.
1217   DCHECK(result_array->HasObjectElements());
1218   Handle<FixedArray> result_elements(FixedArray::cast(result_array->elements()),
1219                                      isolate);
1220   if (result_elements->length() < 16) {
1221     result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
1222   }
1223 
1224   FixedArrayBuilder builder(result_elements);
1225 
1226   // Position to search from.
1227   int match_start = -1;
1228   int match_end = 0;
1229   bool first = true;
1230 
1231   // Two smis before and after the match, for very long strings.
1232   static const int kMaxBuilderEntriesPerRegExpMatch = 5;
1233 
1234   while (true) {
1235     int32_t* current_match = global_cache.FetchNext();
1236     if (current_match == nullptr) break;
1237     match_start = current_match[0];
1238     builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch);
1239     if (match_end < match_start) {
1240       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1241                                                 match_start);
1242     }
1243     match_end = current_match[1];
1244     {
1245       // Avoid accumulating new handles inside loop.
1246       HandleScope temp_scope(isolate);
1247       Handle<String> match;
1248       if (!first) {
1249         match = isolate->factory()->NewProperSubString(subject, match_start,
1250                                                        match_end);
1251       } else {
1252         match =
1253             isolate->factory()->NewSubString(subject, match_start, match_end);
1254         first = false;
1255       }
1256 
1257       if (has_capture) {
1258         // Arguments array to replace function is match, captures, index and
1259         // subject, i.e., 3 + capture count in total. If the RegExp contains
1260         // named captures, they are also passed as the last argument.
1261 
1262         Handle<Object> maybe_capture_map(regexp->capture_name_map(), isolate);
1263         const bool has_named_captures = maybe_capture_map->IsFixedArray();
1264 
1265         const int argc =
1266             has_named_captures ? 4 + capture_count : 3 + capture_count;
1267 
1268         Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
1269         int cursor = 0;
1270 
1271         elements->set(cursor++, *match);
1272         for (int i = 1; i <= capture_count; i++) {
1273           int start = current_match[i * 2];
1274           if (start >= 0) {
1275             int end = current_match[i * 2 + 1];
1276             DCHECK(start <= end);
1277             Handle<String> substring =
1278                 isolate->factory()->NewSubString(subject, start, end);
1279             elements->set(cursor++, *substring);
1280           } else {
1281             DCHECK_GT(0, current_match[i * 2 + 1]);
1282             elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value());
1283           }
1284         }
1285 
1286         elements->set(cursor++, Smi::FromInt(match_start));
1287         elements->set(cursor++, *subject);
1288 
1289         if (has_named_captures) {
1290           Handle<FixedArray> capture_map =
1291               Handle<FixedArray>::cast(maybe_capture_map);
1292           Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1293               isolate, capture_map, [=](int ix) { return elements->get(ix); });
1294           elements->set(cursor++, *groups);
1295         }
1296 
1297         DCHECK_EQ(cursor, argc);
1298         builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1299       } else {
1300         builder.Add(*match);
1301       }
1302     }
1303   }
1304 
1305   if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1306 
1307   if (match_start >= 0) {
1308     // Finished matching, with at least one match.
1309     if (match_end < subject_length) {
1310       ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1311                                                 subject_length);
1312     }
1313 
1314     RegExp::SetLastMatchInfo(isolate, last_match_array, subject, capture_count,
1315                              global_cache.LastSuccessfulMatch());
1316 
1317     if (subject_length > kMinLengthToCache) {
1318       // Store the last successful match into the array for caching.
1319       int capture_registers = JSRegExp::RegistersForCaptureCount(capture_count);
1320       Handle<FixedArray> last_match_cache =
1321           isolate->factory()->NewFixedArray(capture_registers);
1322       int32_t* last_match = global_cache.LastSuccessfulMatch();
1323       for (int i = 0; i < capture_registers; i++) {
1324         last_match_cache->set(i, Smi::FromInt(last_match[i]));
1325       }
1326       Handle<FixedArray> result_fixed_array =
1327           FixedArray::ShrinkOrEmpty(isolate, builder.array(), builder.length());
1328       // Cache the result and copy the FixedArray into a COW array.
1329       Handle<FixedArray> copied_fixed_array =
1330           isolate->factory()->CopyFixedArrayWithMap(
1331               result_fixed_array, isolate->factory()->fixed_array_map());
1332       RegExpResultsCache::Enter(
1333           isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1334           last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1335     }
1336     return *builder.ToJSArray(result_array);
1337   } else {
1338     return ReadOnlyRoots(isolate).null_value();  // No matches at all.
1339   }
1340 }
1341 
1342 // Legacy implementation of RegExp.prototype[Symbol.replace] which
1343 // doesn't properly call the underlying exec method.
RegExpReplace(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> string,Handle<String> replace)1344 V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace(
1345     Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string,
1346     Handle<String> replace) {
1347   // Functional fast-paths are dispatched directly by replace builtin.
1348   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1349 
1350   Factory* factory = isolate->factory();
1351 
1352   const int flags = regexp->flags();
1353   const bool global = (flags & JSRegExp::kGlobal) != 0;
1354   const bool sticky = (flags & JSRegExp::kSticky) != 0;
1355 
1356   replace = String::Flatten(isolate, replace);
1357 
1358   Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1359 
1360   if (!global) {
1361     // Non-global regexp search, string replace.
1362 
1363     uint32_t last_index = 0;
1364     if (sticky) {
1365       Handle<Object> last_index_obj(regexp->last_index(), isolate);
1366       ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1367                                  Object::ToLength(isolate, last_index_obj),
1368                                  String);
1369       last_index = PositiveNumberToUint32(*last_index_obj);
1370     }
1371 
1372     Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1373                                      isolate);
1374 
1375     // A lastIndex exceeding the string length always returns null (signalling
1376     // failure) in RegExpBuiltinExec, thus we can skip the call.
1377     if (last_index <= static_cast<uint32_t>(string->length())) {
1378       ASSIGN_RETURN_ON_EXCEPTION(
1379           isolate, match_indices_obj,
1380           RegExp::Exec(isolate, regexp, string, last_index, last_match_info),
1381           String);
1382     }
1383 
1384     if (match_indices_obj->IsNull(isolate)) {
1385       if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER);
1386       return string;
1387     }
1388 
1389     auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1390 
1391     const int start_index = match_indices->Capture(0);
1392     const int end_index = match_indices->Capture(1);
1393 
1394     if (sticky) {
1395       regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER);
1396     }
1397 
1398     IncrementalStringBuilder builder(isolate);
1399     builder.AppendString(factory->NewSubString(string, 0, start_index));
1400 
1401     if (replace->length() > 0) {
1402       MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
1403       Handle<String> replacement;
1404       ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1405                                  String::GetSubstitution(isolate, &m, replace),
1406                                  String);
1407       builder.AppendString(replacement);
1408     }
1409 
1410     builder.AppendString(
1411         factory->NewSubString(string, end_index, string->length()));
1412     return builder.Finish();
1413   } else {
1414     // Global regexp search, string replace.
1415     DCHECK(global);
1416     RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1417                         String);
1418 
1419     // Force tier up to native code for global replaces. The global replace is
1420     // implemented differently for native code and bytecode execution, where the
1421     // native code expects an array to store all the matches, and the bytecode
1422     // matches one at a time, so it's easier to tier-up to native code from the
1423     // start.
1424     if (FLAG_regexp_tier_up && regexp->type_tag() == JSRegExp::IRREGEXP) {
1425       regexp->MarkTierUpForNextExec();
1426       if (FLAG_trace_regexp_tier_up) {
1427         PrintF("Forcing tier-up of JSRegExp object %p in RegExpReplace\n",
1428                reinterpret_cast<void*>(regexp->ptr()));
1429       }
1430     }
1431 
1432     if (replace->length() == 0) {
1433       if (string->IsOneByteRepresentation()) {
1434         Object result =
1435             StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1436                 isolate, string, regexp, last_match_info);
1437         return handle(String::cast(result), isolate);
1438       } else {
1439         Object result =
1440             StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1441                 isolate, string, regexp, last_match_info);
1442         return handle(String::cast(result), isolate);
1443       }
1444     }
1445 
1446     Object result = StringReplaceGlobalRegExpWithString(
1447         isolate, string, regexp, replace, last_match_info);
1448     if (result.IsString()) {
1449       return handle(String::cast(result), isolate);
1450     } else {
1451       return MaybeHandle<String>();
1452     }
1453   }
1454 
1455   UNREACHABLE();
1456 }
1457 
1458 }  // namespace
1459 
1460 // This is only called for StringReplaceGlobalRegExpWithFunction.
RUNTIME_FUNCTION(Runtime_RegExpExecMultiple)1461 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1462   HandleScope handles(isolate);
1463   DCHECK_EQ(4, args.length());
1464 
1465   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1466   CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1467   CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2);
1468   CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1469 
1470   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1471   CHECK(result_array->HasObjectElements());
1472 
1473   subject = String::Flatten(isolate, subject);
1474   CHECK(regexp->flags() & JSRegExp::kGlobal);
1475 
1476   Object result;
1477   if (regexp->capture_count() == 0) {
1478     result = SearchRegExpMultiple<false>(isolate, subject, regexp,
1479                                          last_match_info, result_array);
1480   } else {
1481     result = SearchRegExpMultiple<true>(isolate, subject, regexp,
1482                                         last_match_info, result_array);
1483   }
1484   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1485   return result;
1486 }
1487 
RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction)1488 RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1489   HandleScope scope(isolate);
1490   DCHECK_EQ(3, args.length());
1491   CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
1492   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
1493   CONVERT_ARG_HANDLE_CHECKED(JSReceiver, replace_obj, 2);
1494 
1495   DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1496   DCHECK(replace_obj->map().is_callable());
1497 
1498   Factory* factory = isolate->factory();
1499   Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1500 
1501   const int flags = regexp->flags();
1502   DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1503 
1504   // TODO(jgruber): This should be an easy port to CSA with massive payback.
1505 
1506   const bool sticky = (flags & JSRegExp::kSticky) != 0;
1507   uint32_t last_index = 0;
1508   if (sticky) {
1509     Handle<Object> last_index_obj(regexp->last_index(), isolate);
1510     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1511         isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1512     last_index = PositiveNumberToUint32(*last_index_obj);
1513   }
1514 
1515   Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1516                                    isolate);
1517 
1518   // A lastIndex exceeding the string length always returns null (signalling
1519   // failure) in RegExpBuiltinExec, thus we can skip the call.
1520   if (last_index <= static_cast<uint32_t>(subject->length())) {
1521     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1522         isolate, match_indices_obj,
1523         RegExp::Exec(isolate, regexp, subject, last_index, last_match_info));
1524   }
1525 
1526   if (match_indices_obj->IsNull(isolate)) {
1527     if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER);
1528     return *subject;
1529   }
1530 
1531   Handle<RegExpMatchInfo> match_indices =
1532       Handle<RegExpMatchInfo>::cast(match_indices_obj);
1533 
1534   const int index = match_indices->Capture(0);
1535   const int end_of_match = match_indices->Capture(1);
1536 
1537   if (sticky) {
1538     regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER);
1539   }
1540 
1541   IncrementalStringBuilder builder(isolate);
1542   builder.AppendString(factory->NewSubString(subject, 0, index));
1543 
1544   // Compute the parameter list consisting of the match, captures, index,
1545   // and subject for the replace function invocation. If the RegExp contains
1546   // named captures, they are also passed as the last argument.
1547 
1548   // The number of captures plus one for the match.
1549   const int m = match_indices->NumberOfCaptureRegisters() / 2;
1550 
1551   bool has_named_captures = false;
1552   Handle<FixedArray> capture_map;
1553   if (m > 1) {
1554     DCHECK(JSRegExp::TypeSupportsCaptures(regexp->type_tag()));
1555 
1556     Object maybe_capture_map = regexp->capture_name_map();
1557     if (maybe_capture_map.IsFixedArray()) {
1558       has_named_captures = true;
1559       capture_map = handle(FixedArray::cast(maybe_capture_map), isolate);
1560     }
1561   }
1562 
1563   const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures);
1564   if (argc == static_cast<uint32_t>(-1)) {
1565     THROW_NEW_ERROR_RETURN_FAILURE(
1566         isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1567   }
1568   base::ScopedVector<Handle<Object>> argv(argc);
1569 
1570   int cursor = 0;
1571   for (int j = 0; j < m; j++) {
1572     bool ok;
1573     Handle<String> capture =
1574         RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1575     if (ok) {
1576       argv[cursor++] = capture;
1577     } else {
1578       argv[cursor++] = factory->undefined_value();
1579     }
1580   }
1581 
1582   argv[cursor++] = handle(Smi::FromInt(index), isolate);
1583   argv[cursor++] = subject;
1584 
1585   if (has_named_captures) {
1586     argv[cursor++] = ConstructNamedCaptureGroupsObject(
1587         isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
1588   }
1589 
1590   DCHECK_EQ(cursor, argc);
1591 
1592   Handle<Object> replacement_obj;
1593   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1594       isolate, replacement_obj,
1595       Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1596                       argv.begin()));
1597 
1598   Handle<String> replacement;
1599   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1600       isolate, replacement, Object::ToString(isolate, replacement_obj));
1601 
1602   builder.AppendString(replacement);
1603   builder.AppendString(
1604       factory->NewSubString(subject, end_of_match, subject->length()));
1605 
1606   RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1607 }
1608 
1609 namespace {
1610 
ToUint32(Isolate * isolate,Handle<Object> object,uint32_t * out)1611 V8_WARN_UNUSED_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1612                                                    Handle<Object> object,
1613                                                    uint32_t* out) {
1614   if (object->IsUndefined(isolate)) {
1615     *out = kMaxUInt32;
1616     return object;
1617   }
1618 
1619   Handle<Object> number;
1620   ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(isolate, object),
1621                              Object);
1622   *out = NumberToUint32(*number);
1623   return object;
1624 }
1625 
NewJSArrayWithElements(Isolate * isolate,Handle<FixedArray> elems,int num_elems)1626 Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1627                                        Handle<FixedArray> elems,
1628                                        int num_elems) {
1629   return isolate->factory()->NewJSArrayWithElements(
1630       FixedArray::ShrinkOrEmpty(isolate, elems, num_elems));
1631 }
1632 
1633 }  // namespace
1634 
1635 // Slow path for:
1636 // ES#sec-regexp.prototype-@@replace
1637 // RegExp.prototype [ @@split ] ( string, limit )
RUNTIME_FUNCTION(Runtime_RegExpSplit)1638 RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1639   HandleScope scope(isolate);
1640   DCHECK_EQ(3, args.length());
1641 
1642   CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1643   CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1644   CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2);
1645 
1646   Factory* factory = isolate->factory();
1647 
1648   Handle<JSFunction> regexp_fun = isolate->regexp_function();
1649   Handle<Object> ctor;
1650   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1651       isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun));
1652 
1653   Handle<Object> flags_obj;
1654   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1655       isolate, flags_obj,
1656       JSObject::GetProperty(isolate, recv, factory->flags_string()));
1657 
1658   Handle<String> flags;
1659   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1660                                      Object::ToString(isolate, flags_obj));
1661 
1662   Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1663   const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1664 
1665   Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1666   const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1667 
1668   Handle<String> new_flags = flags;
1669   if (!sticky) {
1670     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1671                                        factory->NewConsString(flags, y_str));
1672   }
1673 
1674   Handle<JSReceiver> splitter;
1675   {
1676     const int argc = 2;
1677 
1678     base::ScopedVector<Handle<Object>> argv(argc);
1679     argv[0] = recv;
1680     argv[1] = new_flags;
1681 
1682     Handle<Object> splitter_obj;
1683     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1684         isolate, splitter_obj,
1685         Execution::New(isolate, ctor, argc, argv.begin()));
1686 
1687     splitter = Handle<JSReceiver>::cast(splitter_obj);
1688   }
1689 
1690   uint32_t limit;
1691   RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1692 
1693   const uint32_t length = string->length();
1694 
1695   if (limit == 0) return *factory->NewJSArray(0);
1696 
1697   if (length == 0) {
1698     Handle<Object> result;
1699     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1700         isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1701                                                  factory->undefined_value()));
1702 
1703     if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1704 
1705     Handle<FixedArray> elems = factory->NewFixedArray(1);
1706     elems->set(0, *string);
1707     return *factory->NewJSArrayWithElements(elems);
1708   }
1709 
1710   static const int kInitialArraySize = 8;
1711   Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1712   uint32_t num_elems = 0;
1713 
1714   uint32_t string_index = 0;
1715   uint32_t prev_string_index = 0;
1716   while (string_index < length) {
1717     RETURN_FAILURE_ON_EXCEPTION(
1718         isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1719 
1720     Handle<Object> result;
1721     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1722         isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1723                                                  factory->undefined_value()));
1724 
1725     if (result->IsNull(isolate)) {
1726       string_index = static_cast<uint32_t>(
1727           RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1728       continue;
1729     }
1730 
1731     Handle<Object> last_index_obj;
1732     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1733         isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1734 
1735     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1736         isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1737 
1738     const uint32_t end =
1739         std::min(PositiveNumberToUint32(*last_index_obj), length);
1740     if (end == prev_string_index) {
1741       string_index = static_cast<uint32_t>(
1742           RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1743       continue;
1744     }
1745 
1746     {
1747       Handle<String> substr =
1748           factory->NewSubString(string, prev_string_index, string_index);
1749       elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1750       if (num_elems == limit) {
1751         return *NewJSArrayWithElements(isolate, elems, num_elems);
1752       }
1753     }
1754 
1755     prev_string_index = end;
1756 
1757     Handle<Object> num_captures_obj;
1758     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1759         isolate, num_captures_obj,
1760         Object::GetProperty(isolate, result,
1761                             isolate->factory()->length_string()));
1762 
1763     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1764         isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1765     const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj);
1766 
1767     for (uint32_t i = 1; i < num_captures; i++) {
1768       Handle<Object> capture;
1769       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1770           isolate, capture, Object::GetElement(isolate, result, i));
1771       elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture);
1772       if (num_elems == limit) {
1773         return *NewJSArrayWithElements(isolate, elems, num_elems);
1774       }
1775     }
1776 
1777     string_index = prev_string_index;
1778   }
1779 
1780   {
1781     Handle<String> substr =
1782         factory->NewSubString(string, prev_string_index, length);
1783     elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1784   }
1785 
1786   return *NewJSArrayWithElements(isolate, elems, num_elems);
1787 }
1788 
1789 // Slow path for:
1790 // ES#sec-regexp.prototype-@@replace
1791 // RegExp.prototype [ @@replace ] ( string, replaceValue )
RUNTIME_FUNCTION(Runtime_RegExpReplaceRT)1792 RUNTIME_FUNCTION(Runtime_RegExpReplaceRT) {
1793   HandleScope scope(isolate);
1794   DCHECK_EQ(3, args.length());
1795 
1796   CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1797   CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1798   Handle<Object> replace_obj = args.at(2);
1799 
1800   Factory* factory = isolate->factory();
1801 
1802   string = String::Flatten(isolate, string);
1803 
1804   const bool functional_replace = replace_obj->IsCallable();
1805 
1806   Handle<String> replace;
1807   if (!functional_replace) {
1808     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1809                                        Object::ToString(isolate, replace_obj));
1810   }
1811 
1812   // Fast-path for unmodified JSRegExps (and non-functional replace).
1813   if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1814     // We should never get here with functional replace because unmodified
1815     // regexp and functional replace should be fully handled in CSA code.
1816     CHECK(!functional_replace);
1817     Handle<Object> result;
1818     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1819         isolate, result,
1820         RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string, replace));
1821     DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, recv));
1822     return *result;
1823   }
1824 
1825   const uint32_t length = string->length();
1826 
1827   Handle<Object> global_obj;
1828   ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1829       isolate, global_obj,
1830       JSReceiver::GetProperty(isolate, recv, factory->global_string()));
1831   const bool global = global_obj->BooleanValue(isolate);
1832 
1833   bool unicode = false;
1834   if (global) {
1835     Handle<Object> unicode_obj;
1836     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1837         isolate, unicode_obj,
1838         JSReceiver::GetProperty(isolate, recv, factory->unicode_string()));
1839     unicode = unicode_obj->BooleanValue(isolate);
1840 
1841     RETURN_FAILURE_ON_EXCEPTION(isolate,
1842                                 RegExpUtils::SetLastIndex(isolate, recv, 0));
1843   }
1844 
1845   base::SmallVector<Handle<Object>, kStaticVectorSlots> results;
1846 
1847   while (true) {
1848     Handle<Object> result;
1849     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1850         isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1851                                                  factory->undefined_value()));
1852 
1853     if (result->IsNull(isolate)) break;
1854 
1855     results.emplace_back(result);
1856     if (!global) break;
1857 
1858     Handle<Object> match_obj;
1859     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1860                                        Object::GetElement(isolate, result, 0));
1861 
1862     Handle<String> match;
1863     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1864                                        Object::ToString(isolate, match_obj));
1865 
1866     if (match->length() == 0) {
1867       RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1868                                                isolate, recv, string, unicode));
1869     }
1870   }
1871 
1872   // TODO(jgruber): Look into ReplacementStringBuilder instead.
1873   IncrementalStringBuilder builder(isolate);
1874   uint32_t next_source_position = 0;
1875 
1876   for (const auto& result : results) {
1877     HandleScope handle_scope(isolate);
1878     Handle<Object> captures_length_obj;
1879     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1880         isolate, captures_length_obj,
1881         Object::GetProperty(isolate, result, factory->length_string()));
1882 
1883     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1884         isolate, captures_length_obj,
1885         Object::ToLength(isolate, captures_length_obj));
1886     const uint32_t captures_length =
1887         PositiveNumberToUint32(*captures_length_obj);
1888 
1889     Handle<Object> match_obj;
1890     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1891                                        Object::GetElement(isolate, result, 0));
1892 
1893     Handle<String> match;
1894     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1895                                        Object::ToString(isolate, match_obj));
1896 
1897     const int match_length = match->length();
1898 
1899     Handle<Object> position_obj;
1900     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1901         isolate, position_obj,
1902         Object::GetProperty(isolate, result, factory->index_string()));
1903 
1904     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1905         isolate, position_obj, Object::ToInteger(isolate, position_obj));
1906     const uint32_t position =
1907         std::min(PositiveNumberToUint32(*position_obj), length);
1908 
1909     // Do not reserve capacity since captures_length is user-controlled.
1910     base::SmallVector<Handle<Object>, kStaticVectorSlots> captures;
1911 
1912     for (uint32_t n = 0; n < captures_length; n++) {
1913       Handle<Object> capture;
1914       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1915           isolate, capture, Object::GetElement(isolate, result, n));
1916 
1917       if (!capture->IsUndefined(isolate)) {
1918         ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1919                                            Object::ToString(isolate, capture));
1920       }
1921       captures.emplace_back(capture);
1922     }
1923 
1924     Handle<Object> groups_obj = isolate->factory()->undefined_value();
1925     ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1926         isolate, groups_obj,
1927         Object::GetProperty(isolate, result, factory->groups_string()));
1928 
1929     const bool has_named_captures = !groups_obj->IsUndefined(isolate);
1930 
1931     Handle<String> replacement;
1932     if (functional_replace) {
1933       const uint32_t argc =
1934           GetArgcForReplaceCallable(captures_length, has_named_captures);
1935       if (argc == static_cast<uint32_t>(-1)) {
1936         THROW_NEW_ERROR_RETURN_FAILURE(
1937             isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1938       }
1939 
1940       base::ScopedVector<Handle<Object>> argv(argc);
1941 
1942       int cursor = 0;
1943       for (uint32_t j = 0; j < captures_length; j++) {
1944         argv[cursor++] = captures[j];
1945       }
1946 
1947       argv[cursor++] = handle(Smi::FromInt(position), isolate);
1948       argv[cursor++] = string;
1949       if (has_named_captures) argv[cursor++] = groups_obj;
1950 
1951       DCHECK_EQ(cursor, argc);
1952 
1953       Handle<Object> replacement_obj;
1954       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1955           isolate, replacement_obj,
1956           Execution::Call(isolate, replace_obj, factory->undefined_value(),
1957                           argc, argv.begin()));
1958 
1959       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1960           isolate, replacement, Object::ToString(isolate, replacement_obj));
1961     } else {
1962       DCHECK(!functional_replace);
1963       if (!groups_obj->IsUndefined(isolate)) {
1964         ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1965             isolate, groups_obj, Object::ToObject(isolate, groups_obj));
1966       }
1967       VectorBackedMatch m(isolate, string, match, position,
1968                           base::VectorOf(captures), groups_obj);
1969       ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1970           isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1971     }
1972 
1973     if (position >= next_source_position) {
1974       builder.AppendString(
1975           factory->NewSubString(string, next_source_position, position));
1976       builder.AppendString(replacement);
1977 
1978       next_source_position = position + match_length;
1979     }
1980   }
1981 
1982   if (next_source_position < length) {
1983     builder.AppendString(
1984         factory->NewSubString(string, next_source_position, length));
1985   }
1986 
1987   RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1988 }
1989 
RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile)1990 RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
1991   HandleScope scope(isolate);
1992   DCHECK_EQ(3, args.length());
1993   // TODO(pwong): To follow the spec more closely and simplify calling code,
1994   // this could handle the canonicalization of pattern and flags. See
1995   // https://tc39.github.io/ecma262/#sec-regexpinitialize
1996   CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1997   CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
1998   CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
1999 
2000   RETURN_FAILURE_ON_EXCEPTION(isolate,
2001                               JSRegExp::Initialize(regexp, source, flags));
2002 
2003   return *regexp;
2004 }
2005 
RUNTIME_FUNCTION(Runtime_IsRegExp)2006 RUNTIME_FUNCTION(Runtime_IsRegExp) {
2007   SealHandleScope shs(isolate);
2008   DCHECK_EQ(1, args.length());
2009   CONVERT_ARG_CHECKED(Object, obj, 0);
2010   return isolate->heap()->ToBoolean(obj.IsJSRegExp());
2011 }
2012 
RUNTIME_FUNCTION(Runtime_RegExpStringFromFlags)2013 RUNTIME_FUNCTION(Runtime_RegExpStringFromFlags) {
2014   HandleScope scope(isolate);
2015   DCHECK_EQ(1, args.length());
2016   CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
2017   Handle<String> flags = JSRegExp::StringFromFlags(isolate, regexp.flags());
2018   return *flags;
2019 }
2020 
2021 }  // namespace internal
2022 }  // namespace v8
2023