1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <functional>
6
7 #include "src/base/small-vector.h"
8 #include "src/base/strings.h"
9 #include "src/common/message-template.h"
10 #include "src/execution/arguments-inl.h"
11 #include "src/execution/isolate-inl.h"
12 #include "src/heap/heap-inl.h" // For ToBoolean. TODO(jkummerow): Drop.
13 #include "src/logging/counters.h"
14 #include "src/numbers/conversions-inl.h"
15 #include "src/objects/js-array-inl.h"
16 #include "src/objects/js-regexp-inl.h"
17 #include "src/regexp/regexp-utils.h"
18 #include "src/regexp/regexp.h"
19 #include "src/runtime/runtime-utils.h"
20 #include "src/strings/string-builder-inl.h"
21 #include "src/strings/string-search.h"
22
23 namespace v8 {
24 namespace internal {
25
26 namespace {
27
28 // Fairly arbitrary, but intended to fit:
29 //
30 // - captures
31 // - results
32 // - parsed replacement pattern parts
33 //
34 // for small, common cases.
35 constexpr int kStaticVectorSlots = 8;
36
37 // Returns -1 for failure.
GetArgcForReplaceCallable(uint32_t num_captures,bool has_named_captures)38 uint32_t GetArgcForReplaceCallable(uint32_t num_captures,
39 bool has_named_captures) {
40 const uint32_t kAdditionalArgsWithoutNamedCaptures = 2;
41 const uint32_t kAdditionalArgsWithNamedCaptures = 3;
42 if (num_captures > Code::kMaxArguments) return -1;
43 uint32_t argc = has_named_captures
44 ? num_captures + kAdditionalArgsWithNamedCaptures
45 : num_captures + kAdditionalArgsWithoutNamedCaptures;
46 STATIC_ASSERT(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() -
47 kAdditionalArgsWithNamedCaptures);
48 return (argc > Code::kMaxArguments) ? -1 : argc;
49 }
50
51 // Looks up the capture of the given name. Returns the (1-based) numbered
52 // capture index or -1 on failure.
LookupNamedCapture(const std::function<bool (String)> & name_matches,FixedArray capture_name_map)53 int LookupNamedCapture(const std::function<bool(String)>& name_matches,
54 FixedArray capture_name_map) {
55 // TODO(jgruber): Sort capture_name_map and do binary search via
56 // internalized strings.
57
58 int maybe_capture_index = -1;
59 const int named_capture_count = capture_name_map.length() >> 1;
60 for (int j = 0; j < named_capture_count; j++) {
61 // The format of {capture_name_map} is documented at
62 // JSRegExp::kIrregexpCaptureNameMapIndex.
63 const int name_ix = j * 2;
64 const int index_ix = j * 2 + 1;
65
66 String capture_name = String::cast(capture_name_map.get(name_ix));
67 if (!name_matches(capture_name)) continue;
68
69 maybe_capture_index = Smi::ToInt(capture_name_map.get(index_ix));
70 break;
71 }
72
73 return maybe_capture_index;
74 }
75
76 } // namespace
77
78 class CompiledReplacement {
79 public:
80 // Return whether the replacement is simple.
81 bool Compile(Isolate* isolate, Handle<JSRegExp> regexp,
82 Handle<String> replacement, int capture_count,
83 int subject_length);
84
85 // Use Apply only if Compile returned false.
86 void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
87 int32_t* match);
88
89 // Number of distinct parts of the replacement pattern.
parts()90 int parts() { return static_cast<int>(parts_.size()); }
91
92 private:
93 enum PartType {
94 SUBJECT_PREFIX = 1,
95 SUBJECT_SUFFIX,
96 SUBJECT_CAPTURE,
97 REPLACEMENT_SUBSTRING,
98 REPLACEMENT_STRING,
99 EMPTY_REPLACEMENT,
100 NUMBER_OF_PART_TYPES
101 };
102
103 struct ReplacementPart {
SubjectMatchv8::internal::CompiledReplacement::ReplacementPart104 static inline ReplacementPart SubjectMatch() {
105 return ReplacementPart(SUBJECT_CAPTURE, 0);
106 }
SubjectCapturev8::internal::CompiledReplacement::ReplacementPart107 static inline ReplacementPart SubjectCapture(int capture_index) {
108 return ReplacementPart(SUBJECT_CAPTURE, capture_index);
109 }
SubjectPrefixv8::internal::CompiledReplacement::ReplacementPart110 static inline ReplacementPart SubjectPrefix() {
111 return ReplacementPart(SUBJECT_PREFIX, 0);
112 }
SubjectSuffixv8::internal::CompiledReplacement::ReplacementPart113 static inline ReplacementPart SubjectSuffix(int subject_length) {
114 return ReplacementPart(SUBJECT_SUFFIX, subject_length);
115 }
ReplacementStringv8::internal::CompiledReplacement::ReplacementPart116 static inline ReplacementPart ReplacementString() {
117 return ReplacementPart(REPLACEMENT_STRING, 0);
118 }
EmptyReplacementv8::internal::CompiledReplacement::ReplacementPart119 static inline ReplacementPart EmptyReplacement() {
120 return ReplacementPart(EMPTY_REPLACEMENT, 0);
121 }
ReplacementSubStringv8::internal::CompiledReplacement::ReplacementPart122 static inline ReplacementPart ReplacementSubString(int from, int to) {
123 DCHECK_LE(0, from);
124 DCHECK_GT(to, from);
125 return ReplacementPart(-from, to);
126 }
127
128 // If tag <= 0 then it is the negation of a start index of a substring of
129 // the replacement pattern, otherwise it's a value from PartType.
ReplacementPartv8::internal::CompiledReplacement::ReplacementPart130 ReplacementPart(int tag, int data) : tag(tag), data(data) {
131 // Must be non-positive or a PartType value.
132 DCHECK(tag < NUMBER_OF_PART_TYPES);
133 }
134 // Either a value of PartType or a non-positive number that is
135 // the negation of an index into the replacement string.
136 int tag;
137 // The data value's interpretation depends on the value of tag:
138 // tag == SUBJECT_PREFIX ||
139 // tag == SUBJECT_SUFFIX: data is unused.
140 // tag == SUBJECT_CAPTURE: data is the number of the capture.
141 // tag == REPLACEMENT_SUBSTRING ||
142 // tag == REPLACEMENT_STRING: data is index into array of substrings
143 // of the replacement string.
144 // tag == EMPTY_REPLACEMENT: data is unused.
145 // tag <= 0: Temporary representation of the substring of the replacement
146 // string ranging over -tag .. data.
147 // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
148 // substring objects.
149 int data;
150 };
151
152 template <typename Char>
ParseReplacementPattern(base::Vector<Char> characters,FixedArray capture_name_map,int capture_count,int subject_length)153 bool ParseReplacementPattern(base::Vector<Char> characters,
154 FixedArray capture_name_map, int capture_count,
155 int subject_length) {
156 // Equivalent to String::GetSubstitution, except that this method converts
157 // the replacement string into an internal representation that avoids
158 // repeated parsing when used repeatedly.
159 int length = characters.length();
160 int last = 0;
161 for (int i = 0; i < length; i++) {
162 Char c = characters[i];
163 if (c == '$') {
164 int next_index = i + 1;
165 if (next_index == length) { // No next character!
166 break;
167 }
168 Char c2 = characters[next_index];
169 switch (c2) {
170 case '$':
171 if (i > last) {
172 // There is a substring before. Include the first "$".
173 parts_.emplace_back(
174 ReplacementPart::ReplacementSubString(last, next_index));
175 last = next_index + 1; // Continue after the second "$".
176 } else {
177 // Let the next substring start with the second "$".
178 last = next_index;
179 }
180 i = next_index;
181 break;
182 case '`':
183 if (i > last) {
184 parts_.emplace_back(
185 ReplacementPart::ReplacementSubString(last, i));
186 }
187 parts_.emplace_back(ReplacementPart::SubjectPrefix());
188 i = next_index;
189 last = i + 1;
190 break;
191 case '\'':
192 if (i > last) {
193 parts_.emplace_back(
194 ReplacementPart::ReplacementSubString(last, i));
195 }
196 parts_.emplace_back(ReplacementPart::SubjectSuffix(subject_length));
197 i = next_index;
198 last = i + 1;
199 break;
200 case '&':
201 if (i > last) {
202 parts_.emplace_back(
203 ReplacementPart::ReplacementSubString(last, i));
204 }
205 parts_.emplace_back(ReplacementPart::SubjectMatch());
206 i = next_index;
207 last = i + 1;
208 break;
209 case '0':
210 case '1':
211 case '2':
212 case '3':
213 case '4':
214 case '5':
215 case '6':
216 case '7':
217 case '8':
218 case '9': {
219 int capture_ref = c2 - '0';
220 if (capture_ref > capture_count) {
221 i = next_index;
222 continue;
223 }
224 int second_digit_index = next_index + 1;
225 if (second_digit_index < length) {
226 // Peek ahead to see if we have two digits.
227 Char c3 = characters[second_digit_index];
228 if ('0' <= c3 && c3 <= '9') { // Double digits.
229 int double_digit_ref = capture_ref * 10 + c3 - '0';
230 if (double_digit_ref <= capture_count) {
231 next_index = second_digit_index;
232 capture_ref = double_digit_ref;
233 }
234 }
235 }
236 if (capture_ref > 0) {
237 if (i > last) {
238 parts_.emplace_back(
239 ReplacementPart::ReplacementSubString(last, i));
240 }
241 DCHECK(capture_ref <= capture_count);
242 parts_.emplace_back(ReplacementPart::SubjectCapture(capture_ref));
243 last = next_index + 1;
244 }
245 i = next_index;
246 break;
247 }
248 case '<': {
249 if (capture_name_map.is_null()) {
250 i = next_index;
251 break;
252 }
253
254 // Scan until the next '>', and let the enclosed substring be the
255 // groupName.
256
257 const int name_start_index = next_index + 1;
258 int closing_bracket_index = -1;
259 for (int j = name_start_index; j < length; j++) {
260 if (characters[j] == '>') {
261 closing_bracket_index = j;
262 break;
263 }
264 }
265
266 // If no closing bracket is found, '$<' is treated as a string
267 // literal.
268 if (closing_bracket_index == -1) {
269 i = next_index;
270 break;
271 }
272
273 base::Vector<Char> requested_name =
274 characters.SubVector(name_start_index, closing_bracket_index);
275
276 // Let capture be ? Get(namedCaptures, groupName).
277
278 const int capture_index = LookupNamedCapture(
279 [=](String capture_name) {
280 return capture_name.IsEqualTo(requested_name);
281 },
282 capture_name_map);
283
284 // If capture is undefined or does not exist, replace the text
285 // through the following '>' with the empty string.
286 // Otherwise, replace the text through the following '>' with
287 // ? ToString(capture).
288
289 DCHECK(capture_index == -1 ||
290 (1 <= capture_index && capture_index <= capture_count));
291
292 if (i > last) {
293 parts_.emplace_back(
294 ReplacementPart::ReplacementSubString(last, i));
295 }
296 parts_.emplace_back(
297 (capture_index == -1)
298 ? ReplacementPart::EmptyReplacement()
299 : ReplacementPart::SubjectCapture(capture_index));
300 last = closing_bracket_index + 1;
301 i = closing_bracket_index;
302 break;
303 }
304 default:
305 i = next_index;
306 break;
307 }
308 }
309 }
310 if (length > last) {
311 if (last == 0) {
312 // Replacement is simple. Do not use Apply to do the replacement.
313 return true;
314 } else {
315 parts_.emplace_back(
316 ReplacementPart::ReplacementSubString(last, length));
317 }
318 }
319 return false;
320 }
321
322 base::SmallVector<ReplacementPart, kStaticVectorSlots> parts_;
323 base::SmallVector<Handle<String>, kStaticVectorSlots> replacement_substrings_;
324 };
325
Compile(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> replacement,int capture_count,int subject_length)326 bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp,
327 Handle<String> replacement, int capture_count,
328 int subject_length) {
329 {
330 DisallowGarbageCollection no_gc;
331 String::FlatContent content = replacement->GetFlatContent(no_gc);
332 DCHECK(content.IsFlat());
333
334 FixedArray capture_name_map;
335 if (capture_count > 0) {
336 DCHECK(JSRegExp::TypeSupportsCaptures(regexp->type_tag()));
337 Object maybe_capture_name_map = regexp->capture_name_map();
338 if (maybe_capture_name_map.IsFixedArray()) {
339 capture_name_map = FixedArray::cast(maybe_capture_name_map);
340 }
341 }
342
343 bool simple;
344 if (content.IsOneByte()) {
345 simple =
346 ParseReplacementPattern(content.ToOneByteVector(), capture_name_map,
347 capture_count, subject_length);
348 } else {
349 DCHECK(content.IsTwoByte());
350 simple = ParseReplacementPattern(content.ToUC16Vector(), capture_name_map,
351 capture_count, subject_length);
352 }
353 if (simple) return true;
354 }
355
356 // Find substrings of replacement string and create them as String objects.
357 int substring_index = 0;
358 for (ReplacementPart& part : parts_) {
359 int tag = part.tag;
360 if (tag <= 0) { // A replacement string slice.
361 int from = -tag;
362 int to = part.data;
363 replacement_substrings_.emplace_back(
364 isolate->factory()->NewSubString(replacement, from, to));
365 part.tag = REPLACEMENT_SUBSTRING;
366 part.data = substring_index;
367 substring_index++;
368 } else if (tag == REPLACEMENT_STRING) {
369 replacement_substrings_.emplace_back(replacement);
370 part.data = substring_index;
371 substring_index++;
372 }
373 }
374 return false;
375 }
376
377
Apply(ReplacementStringBuilder * builder,int match_from,int match_to,int32_t * match)378 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
379 int match_from, int match_to, int32_t* match) {
380 DCHECK_LT(0, parts_.size());
381 for (ReplacementPart& part : parts_) {
382 switch (part.tag) {
383 case SUBJECT_PREFIX:
384 if (match_from > 0) builder->AddSubjectSlice(0, match_from);
385 break;
386 case SUBJECT_SUFFIX: {
387 int subject_length = part.data;
388 if (match_to < subject_length) {
389 builder->AddSubjectSlice(match_to, subject_length);
390 }
391 break;
392 }
393 case SUBJECT_CAPTURE: {
394 int capture = part.data;
395 int from = match[capture * 2];
396 int to = match[capture * 2 + 1];
397 if (from >= 0 && to > from) {
398 builder->AddSubjectSlice(from, to);
399 }
400 break;
401 }
402 case REPLACEMENT_SUBSTRING:
403 case REPLACEMENT_STRING:
404 builder->AddString(replacement_substrings_[part.data]);
405 break;
406 case EMPTY_REPLACEMENT:
407 break;
408 default:
409 UNREACHABLE();
410 }
411 }
412 }
413
FindOneByteStringIndices(base::Vector<const uint8_t> subject,uint8_t pattern,std::vector<int> * indices,unsigned int limit)414 void FindOneByteStringIndices(base::Vector<const uint8_t> subject,
415 uint8_t pattern, std::vector<int>* indices,
416 unsigned int limit) {
417 DCHECK_LT(0, limit);
418 // Collect indices of pattern in subject using memchr.
419 // Stop after finding at most limit values.
420 const uint8_t* subject_start = subject.begin();
421 const uint8_t* subject_end = subject_start + subject.length();
422 const uint8_t* pos = subject_start;
423 while (limit > 0) {
424 pos = reinterpret_cast<const uint8_t*>(
425 memchr(pos, pattern, subject_end - pos));
426 if (pos == nullptr) return;
427 indices->push_back(static_cast<int>(pos - subject_start));
428 pos++;
429 limit--;
430 }
431 }
432
FindTwoByteStringIndices(const base::Vector<const base::uc16> subject,base::uc16 pattern,std::vector<int> * indices,unsigned int limit)433 void FindTwoByteStringIndices(const base::Vector<const base::uc16> subject,
434 base::uc16 pattern, std::vector<int>* indices,
435 unsigned int limit) {
436 DCHECK_LT(0, limit);
437 const base::uc16* subject_start = subject.begin();
438 const base::uc16* subject_end = subject_start + subject.length();
439 for (const base::uc16* pos = subject_start; pos < subject_end && limit > 0;
440 pos++) {
441 if (*pos == pattern) {
442 indices->push_back(static_cast<int>(pos - subject_start));
443 limit--;
444 }
445 }
446 }
447
448 template <typename SubjectChar, typename PatternChar>
FindStringIndices(Isolate * isolate,base::Vector<const SubjectChar> subject,base::Vector<const PatternChar> pattern,std::vector<int> * indices,unsigned int limit)449 void FindStringIndices(Isolate* isolate,
450 base::Vector<const SubjectChar> subject,
451 base::Vector<const PatternChar> pattern,
452 std::vector<int>* indices, unsigned int limit) {
453 DCHECK_LT(0, limit);
454 // Collect indices of pattern in subject.
455 // Stop after finding at most limit values.
456 int pattern_length = pattern.length();
457 int index = 0;
458 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
459 while (limit > 0) {
460 index = search.Search(subject, index);
461 if (index < 0) return;
462 indices->push_back(index);
463 index += pattern_length;
464 limit--;
465 }
466 }
467
FindStringIndicesDispatch(Isolate * isolate,String subject,String pattern,std::vector<int> * indices,unsigned int limit)468 void FindStringIndicesDispatch(Isolate* isolate, String subject, String pattern,
469 std::vector<int>* indices, unsigned int limit) {
470 {
471 DisallowGarbageCollection no_gc;
472 String::FlatContent subject_content = subject.GetFlatContent(no_gc);
473 String::FlatContent pattern_content = pattern.GetFlatContent(no_gc);
474 DCHECK(subject_content.IsFlat());
475 DCHECK(pattern_content.IsFlat());
476 if (subject_content.IsOneByte()) {
477 base::Vector<const uint8_t> subject_vector =
478 subject_content.ToOneByteVector();
479 if (pattern_content.IsOneByte()) {
480 base::Vector<const uint8_t> pattern_vector =
481 pattern_content.ToOneByteVector();
482 if (pattern_vector.length() == 1) {
483 FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
484 limit);
485 } else {
486 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
487 limit);
488 }
489 } else {
490 FindStringIndices(isolate, subject_vector,
491 pattern_content.ToUC16Vector(), indices, limit);
492 }
493 } else {
494 base::Vector<const base::uc16> subject_vector =
495 subject_content.ToUC16Vector();
496 if (pattern_content.IsOneByte()) {
497 base::Vector<const uint8_t> pattern_vector =
498 pattern_content.ToOneByteVector();
499 if (pattern_vector.length() == 1) {
500 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
501 limit);
502 } else {
503 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
504 limit);
505 }
506 } else {
507 base::Vector<const base::uc16> pattern_vector =
508 pattern_content.ToUC16Vector();
509 if (pattern_vector.length() == 1) {
510 FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
511 limit);
512 } else {
513 FindStringIndices(isolate, subject_vector, pattern_vector, indices,
514 limit);
515 }
516 }
517 }
518 }
519 }
520
521 namespace {
GetRewoundRegexpIndicesList(Isolate * isolate)522 std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
523 std::vector<int>* list = isolate->regexp_indices();
524 list->clear();
525 return list;
526 }
527
TruncateRegexpIndicesList(Isolate * isolate)528 void TruncateRegexpIndicesList(Isolate* isolate) {
529 // Same size as smallest zone segment, preserving behavior from the
530 // runtime zone.
531 // TODO(jgruber): Consider removing the reusable regexp_indices list and
532 // simply allocating a new list each time. It feels like we're needlessly
533 // optimizing an edge case.
534 static const int kMaxRegexpIndicesListCapacity = 8 * KB / kIntSize;
535 std::vector<int>* indices = isolate->regexp_indices();
536 if (indices->capacity() > kMaxRegexpIndicesListCapacity) {
537 // Throw away backing storage.
538 indices->clear();
539 indices->shrink_to_fit();
540 }
541 }
542 } // namespace
543
544 template <typename ResultSeqString>
StringReplaceGlobalAtomRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> pattern_regexp,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)545 V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalAtomRegExpWithString(
546 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
547 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
548 DCHECK(subject->IsFlat());
549 DCHECK(replacement->IsFlat());
550
551 std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
552
553 DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->type_tag());
554 String pattern = pattern_regexp->atom_pattern();
555 int subject_len = subject->length();
556 int pattern_len = pattern.length();
557 int replacement_len = replacement->length();
558
559 FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF);
560
561 if (indices->empty()) return *subject;
562
563 // Detect integer overflow.
564 int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
565 static_cast<int64_t>(pattern_len)) *
566 static_cast<int64_t>(indices->size()) +
567 static_cast<int64_t>(subject_len);
568 int result_len;
569 if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
570 STATIC_ASSERT(String::kMaxLength < kMaxInt);
571 result_len = kMaxInt; // Provoke exception.
572 } else {
573 result_len = static_cast<int>(result_len_64);
574 }
575 if (result_len == 0) {
576 return ReadOnlyRoots(isolate).empty_string();
577 }
578
579 int subject_pos = 0;
580 int result_pos = 0;
581
582 MaybeHandle<SeqString> maybe_res;
583 if (ResultSeqString::kHasOneByteEncoding) {
584 maybe_res = isolate->factory()->NewRawOneByteString(result_len);
585 } else {
586 maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
587 }
588 Handle<SeqString> untyped_res;
589 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
590 Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
591
592 DisallowGarbageCollection no_gc;
593 for (int index : *indices) {
594 // Copy non-matched subject content.
595 if (subject_pos < index) {
596 String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
597 subject_pos, index - subject_pos);
598 result_pos += index - subject_pos;
599 }
600
601 // Replace match.
602 if (replacement_len > 0) {
603 String::WriteToFlat(*replacement, result->GetChars(no_gc) + result_pos, 0,
604 replacement_len);
605 result_pos += replacement_len;
606 }
607
608 subject_pos = index + pattern_len;
609 }
610 // Add remaining subject content at the end.
611 if (subject_pos < subject_len) {
612 String::WriteToFlat(*subject, result->GetChars(no_gc) + result_pos,
613 subject_pos, subject_len - subject_pos);
614 }
615
616 int32_t match_indices[] = {indices->back(), indices->back() + pattern_len};
617 RegExp::SetLastMatchInfo(isolate, last_match_info, subject, 0, match_indices);
618
619 TruncateRegexpIndicesList(isolate);
620
621 return *result;
622 }
623
StringReplaceGlobalRegExpWithString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<String> replacement,Handle<RegExpMatchInfo> last_match_info)624 V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString(
625 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
626 Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
627 DCHECK(subject->IsFlat());
628 DCHECK(replacement->IsFlat());
629
630 int capture_count = regexp->capture_count();
631 int subject_length = subject->length();
632
633 // Ensure the RegExp is compiled so we can access the capture-name map.
634 if (!RegExp::EnsureFullyCompiled(isolate, regexp, subject)) {
635 return ReadOnlyRoots(isolate).exception();
636 }
637
638 CompiledReplacement compiled_replacement;
639 const bool simple_replace = compiled_replacement.Compile(
640 isolate, regexp, replacement, capture_count, subject_length);
641
642 // Shortcut for simple non-regexp global replacements
643 if (regexp->type_tag() == JSRegExp::ATOM && simple_replace) {
644 if (subject->IsOneByteRepresentation() &&
645 replacement->IsOneByteRepresentation()) {
646 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
647 isolate, subject, regexp, replacement, last_match_info);
648 } else {
649 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
650 isolate, subject, regexp, replacement, last_match_info);
651 }
652 }
653
654 RegExpGlobalCache global_cache(regexp, subject, isolate);
655 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
656
657 int32_t* current_match = global_cache.FetchNext();
658 if (current_match == nullptr) {
659 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
660 return *subject;
661 }
662
663 // Guessing the number of parts that the final result string is built
664 // from. Global regexps can match any number of times, so we guess
665 // conservatively.
666 int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
667 ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
668
669 int prev = 0;
670
671 do {
672 int start = current_match[0];
673 int end = current_match[1];
674
675 if (prev < start) {
676 builder.AddSubjectSlice(prev, start);
677 }
678
679 if (simple_replace) {
680 builder.AddString(replacement);
681 } else {
682 compiled_replacement.Apply(&builder, start, end, current_match);
683 }
684 prev = end;
685
686 current_match = global_cache.FetchNext();
687 } while (current_match != nullptr);
688
689 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
690
691 if (prev < subject_length) {
692 builder.AddSubjectSlice(prev, subject_length);
693 }
694
695 RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
696 global_cache.LastSuccessfulMatch());
697
698 RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
699 }
700
701 template <typename ResultSeqString>
StringReplaceGlobalRegExpWithEmptyString(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<RegExpMatchInfo> last_match_info)702 V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString(
703 Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
704 Handle<RegExpMatchInfo> last_match_info) {
705 DCHECK(subject->IsFlat());
706
707 // Shortcut for simple non-regexp global replacements
708 if (regexp->type_tag() == JSRegExp::ATOM) {
709 Handle<String> empty_string = isolate->factory()->empty_string();
710 if (subject->IsOneByteRepresentation()) {
711 return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
712 isolate, subject, regexp, empty_string, last_match_info);
713 } else {
714 return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
715 isolate, subject, regexp, empty_string, last_match_info);
716 }
717 }
718
719 RegExpGlobalCache global_cache(regexp, subject, isolate);
720 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
721
722 int32_t* current_match = global_cache.FetchNext();
723 if (current_match == nullptr) {
724 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
725 return *subject;
726 }
727
728 int start = current_match[0];
729 int end = current_match[1];
730 int capture_count = regexp->capture_count();
731 int subject_length = subject->length();
732
733 int new_length = subject_length - (end - start);
734 if (new_length == 0) return ReadOnlyRoots(isolate).empty_string();
735
736 Handle<ResultSeqString> answer;
737 if (ResultSeqString::kHasOneByteEncoding) {
738 answer = Handle<ResultSeqString>::cast(
739 isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
740 } else {
741 answer = Handle<ResultSeqString>::cast(
742 isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
743 }
744
745 int prev = 0;
746 int position = 0;
747
748 DisallowGarbageCollection no_gc;
749 do {
750 start = current_match[0];
751 end = current_match[1];
752 if (prev < start) {
753 // Add substring subject[prev;start] to answer string.
754 String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
755 start - prev);
756 position += start - prev;
757 }
758 prev = end;
759
760 current_match = global_cache.FetchNext();
761 } while (current_match != nullptr);
762
763 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
764
765 RegExp::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
766 global_cache.LastSuccessfulMatch());
767
768 if (prev < subject_length) {
769 // Add substring subject[prev;length] to answer string.
770 String::WriteToFlat(*subject, answer->GetChars(no_gc) + position, prev,
771 subject_length - prev);
772 position += subject_length - prev;
773 }
774
775 if (position == 0) return ReadOnlyRoots(isolate).empty_string();
776
777 // Shorten string and fill
778 int string_size = ResultSeqString::SizeFor(position);
779 int allocated_string_size = ResultSeqString::SizeFor(new_length);
780 int delta = allocated_string_size - string_size;
781
782 answer->set_length(position);
783 if (delta == 0) return *answer;
784
785 Address end_of_string = answer->address() + string_size;
786 Heap* heap = isolate->heap();
787
788 // The trimming is performed on a newly allocated object, which is on a
789 // freshly allocated page or on an already swept page. Hence, the sweeper
790 // thread can not get confused with the filler creation. No synchronization
791 // needed.
792 // TODO(hpayer): We should shrink the large object page if the size
793 // of the object changed significantly.
794 if (!heap->IsLargeObject(*answer)) {
795 heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
796 }
797 return *answer;
798 }
799
RUNTIME_FUNCTION(Runtime_StringSplit)800 RUNTIME_FUNCTION(Runtime_StringSplit) {
801 HandleScope handle_scope(isolate);
802 DCHECK_EQ(3, args.length());
803 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
804 CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
805 CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
806 CHECK_LT(0, limit);
807
808 int subject_length = subject->length();
809 int pattern_length = pattern->length();
810 CHECK_LT(0, pattern_length);
811
812 if (limit == 0xFFFFFFFFu) {
813 FixedArray last_match_cache_unused;
814 Handle<Object> cached_answer(
815 RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
816 &last_match_cache_unused,
817 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
818 isolate);
819 if (*cached_answer != Smi::zero()) {
820 // The cache FixedArray is a COW-array and can therefore be reused.
821 Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
822 Handle<FixedArray>::cast(cached_answer));
823 return *result;
824 }
825 }
826
827 // The limit can be very large (0xFFFFFFFFu), but since the pattern
828 // isn't empty, we can never create more parts than ~half the length
829 // of the subject.
830
831 subject = String::Flatten(isolate, subject);
832 pattern = String::Flatten(isolate, pattern);
833
834 std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
835
836 FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
837
838 if (static_cast<uint32_t>(indices->size()) < limit) {
839 indices->push_back(subject_length);
840 }
841
842 // The list indices now contains the end of each part to create.
843
844 // Create JSArray of substrings separated by separator.
845 int part_count = static_cast<int>(indices->size());
846
847 Handle<JSArray> result =
848 isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count,
849 INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
850
851 DCHECK(result->HasObjectElements());
852
853 Handle<FixedArray> elements(FixedArray::cast(result->elements()), isolate);
854
855 if (part_count == 1 && indices->at(0) == subject_length) {
856 elements->set(0, *subject);
857 } else {
858 int part_start = 0;
859 FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
860 int part_end = indices->at(i);
861 Handle<String> substring =
862 isolate->factory()->NewProperSubString(subject, part_start, part_end);
863 elements->set(i, *substring);
864 part_start = part_end + pattern_length;
865 });
866 }
867
868 if (limit == 0xFFFFFFFFu) {
869 if (result->HasObjectElements()) {
870 RegExpResultsCache::Enter(isolate, subject, pattern, elements,
871 isolate->factory()->empty_fixed_array(),
872 RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
873 }
874 }
875
876 TruncateRegexpIndicesList(isolate);
877
878 return *result;
879 }
880
881 namespace {
882
RegExpExec(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> subject,int32_t index,Handle<RegExpMatchInfo> last_match_info,RegExp::ExecQuirks exec_quirks)883 MaybeHandle<Object> RegExpExec(Isolate* isolate, Handle<JSRegExp> regexp,
884 Handle<String> subject, int32_t index,
885 Handle<RegExpMatchInfo> last_match_info,
886 RegExp::ExecQuirks exec_quirks) {
887 // Due to the way the JS calls are constructed this must be less than the
888 // length of a string, i.e. it is always a Smi. We check anyway for security.
889 CHECK_LE(0, index);
890 CHECK_GE(subject->length(), index);
891 isolate->counters()->regexp_entry_runtime()->Increment();
892 return RegExp::Exec(isolate, regexp, subject, index, last_match_info,
893 exec_quirks);
894 }
895
ExperimentalOneshotExec(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> subject,int32_t index,Handle<RegExpMatchInfo> last_match_info,RegExp::ExecQuirks exec_quirks)896 MaybeHandle<Object> ExperimentalOneshotExec(
897 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
898 int32_t index, Handle<RegExpMatchInfo> last_match_info,
899 RegExp::ExecQuirks exec_quirks) {
900 // Due to the way the JS calls are constructed this must be less than the
901 // length of a string, i.e. it is always a Smi. We check anyway for security.
902 CHECK_LE(0, index);
903 CHECK_GE(subject->length(), index);
904 isolate->counters()->regexp_entry_runtime()->Increment();
905 return RegExp::ExperimentalOneshotExec(isolate, regexp, subject, index,
906 last_match_info, exec_quirks);
907 }
908
909 } // namespace
910
RUNTIME_FUNCTION(Runtime_RegExpExec)911 RUNTIME_FUNCTION(Runtime_RegExpExec) {
912 HandleScope scope(isolate);
913 DCHECK_EQ(4, args.length());
914 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
915 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
916 CONVERT_INT32_ARG_CHECKED(index, 2);
917 CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
918 RETURN_RESULT_OR_FAILURE(
919 isolate, RegExpExec(isolate, regexp, subject, index, last_match_info,
920 RegExp::ExecQuirks::kNone));
921 }
922
RUNTIME_FUNCTION(Runtime_RegExpExecTreatMatchAtEndAsFailure)923 RUNTIME_FUNCTION(Runtime_RegExpExecTreatMatchAtEndAsFailure) {
924 HandleScope scope(isolate);
925 DCHECK_EQ(4, args.length());
926 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
927 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
928 CONVERT_INT32_ARG_CHECKED(index, 2);
929 CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
930 RETURN_RESULT_OR_FAILURE(
931 isolate, RegExpExec(isolate, regexp, subject, index, last_match_info,
932 RegExp::ExecQuirks::kTreatMatchAtEndAsFailure));
933 }
934
RUNTIME_FUNCTION(Runtime_RegExpExperimentalOneshotExec)935 RUNTIME_FUNCTION(Runtime_RegExpExperimentalOneshotExec) {
936 HandleScope scope(isolate);
937 DCHECK_EQ(4, args.length());
938 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
939 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
940 CONVERT_INT32_ARG_CHECKED(index, 2);
941 CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
942 RETURN_RESULT_OR_FAILURE(
943 isolate,
944 ExperimentalOneshotExec(isolate, regexp, subject, index, last_match_info,
945 RegExp::ExecQuirks::kNone));
946 }
947
RUNTIME_FUNCTION(Runtime_RegExpExperimentalOneshotExecTreatMatchAtEndAsFailure)948 RUNTIME_FUNCTION(
949 Runtime_RegExpExperimentalOneshotExecTreatMatchAtEndAsFailure) {
950 HandleScope scope(isolate);
951 DCHECK_EQ(4, args.length());
952 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
953 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
954 CONVERT_INT32_ARG_CHECKED(index, 2);
955 CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
956 RETURN_RESULT_OR_FAILURE(
957 isolate,
958 ExperimentalOneshotExec(isolate, regexp, subject, index, last_match_info,
959 RegExp::ExecQuirks::kTreatMatchAtEndAsFailure));
960 }
961
RUNTIME_FUNCTION(Runtime_RegExpBuildIndices)962 RUNTIME_FUNCTION(Runtime_RegExpBuildIndices) {
963 HandleScope scope(isolate);
964 DCHECK_EQ(3, args.length());
965 CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, match_info, 1);
966 CONVERT_ARG_HANDLE_CHECKED(Object, maybe_names, 2);
967 #ifdef DEBUG
968 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
969 DCHECK(regexp->flags() & JSRegExp::kHasIndices);
970 #endif
971
972 return *JSRegExpResultIndices::BuildIndices(isolate, match_info, maybe_names);
973 }
974
975 namespace {
976
977 class MatchInfoBackedMatch : public String::Match {
978 public:
MatchInfoBackedMatch(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> subject,Handle<RegExpMatchInfo> match_info)979 MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
980 Handle<String> subject,
981 Handle<RegExpMatchInfo> match_info)
982 : isolate_(isolate), match_info_(match_info) {
983 subject_ = String::Flatten(isolate, subject);
984
985 if (JSRegExp::TypeSupportsCaptures(regexp->type_tag())) {
986 Object o = regexp->capture_name_map();
987 has_named_captures_ = o.IsFixedArray();
988 if (has_named_captures_) {
989 capture_name_map_ = handle(FixedArray::cast(o), isolate);
990 }
991 } else {
992 has_named_captures_ = false;
993 }
994 }
995
GetMatch()996 Handle<String> GetMatch() override {
997 return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
998 }
999
GetPrefix()1000 Handle<String> GetPrefix() override {
1001 const int match_start = match_info_->Capture(0);
1002 return isolate_->factory()->NewSubString(subject_, 0, match_start);
1003 }
1004
GetSuffix()1005 Handle<String> GetSuffix() override {
1006 const int match_end = match_info_->Capture(1);
1007 return isolate_->factory()->NewSubString(subject_, match_end,
1008 subject_->length());
1009 }
1010
HasNamedCaptures()1011 bool HasNamedCaptures() override { return has_named_captures_; }
1012
CaptureCount()1013 int CaptureCount() override {
1014 return match_info_->NumberOfCaptureRegisters() / 2;
1015 }
1016
GetCapture(int i,bool * capture_exists)1017 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1018 Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
1019 isolate_, match_info_, i, capture_exists);
1020 return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
1021 : isolate_->factory()->empty_string();
1022 }
1023
GetNamedCapture(Handle<String> name,CaptureState * state)1024 MaybeHandle<String> GetNamedCapture(Handle<String> name,
1025 CaptureState* state) override {
1026 DCHECK(has_named_captures_);
1027 const int capture_index = LookupNamedCapture(
1028 [=](String capture_name) { return capture_name.Equals(*name); },
1029 *capture_name_map_);
1030
1031 if (capture_index == -1) {
1032 *state = UNMATCHED;
1033 return isolate_->factory()->empty_string();
1034 }
1035
1036 DCHECK(1 <= capture_index && capture_index <= CaptureCount());
1037
1038 bool capture_exists;
1039 Handle<String> capture_value;
1040 ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
1041 GetCapture(capture_index, &capture_exists),
1042 String);
1043
1044 if (!capture_exists) {
1045 *state = UNMATCHED;
1046 return isolate_->factory()->empty_string();
1047 } else {
1048 *state = MATCHED;
1049 return capture_value;
1050 }
1051 }
1052
1053 private:
1054 Isolate* isolate_;
1055 Handle<String> subject_;
1056 Handle<RegExpMatchInfo> match_info_;
1057
1058 bool has_named_captures_;
1059 Handle<FixedArray> capture_name_map_;
1060 };
1061
1062 class VectorBackedMatch : public String::Match {
1063 public:
VectorBackedMatch(Isolate * isolate,Handle<String> subject,Handle<String> match,int match_position,base::Vector<Handle<Object>> captures,Handle<Object> groups_obj)1064 VectorBackedMatch(Isolate* isolate, Handle<String> subject,
1065 Handle<String> match, int match_position,
1066 base::Vector<Handle<Object>> captures,
1067 Handle<Object> groups_obj)
1068 : isolate_(isolate),
1069 match_(match),
1070 match_position_(match_position),
1071 captures_(captures) {
1072 subject_ = String::Flatten(isolate, subject);
1073
1074 DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
1075 has_named_captures_ = !groups_obj->IsUndefined(isolate);
1076 if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
1077 }
1078
GetMatch()1079 Handle<String> GetMatch() override { return match_; }
1080
GetPrefix()1081 Handle<String> GetPrefix() override {
1082 return isolate_->factory()->NewSubString(subject_, 0, match_position_);
1083 }
1084
GetSuffix()1085 Handle<String> GetSuffix() override {
1086 const int match_end_position = match_position_ + match_->length();
1087 return isolate_->factory()->NewSubString(subject_, match_end_position,
1088 subject_->length());
1089 }
1090
HasNamedCaptures()1091 bool HasNamedCaptures() override { return has_named_captures_; }
1092
CaptureCount()1093 int CaptureCount() override { return captures_.length(); }
1094
GetCapture(int i,bool * capture_exists)1095 MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1096 Handle<Object> capture_obj = captures_[i];
1097 if (capture_obj->IsUndefined(isolate_)) {
1098 *capture_exists = false;
1099 return isolate_->factory()->empty_string();
1100 }
1101 *capture_exists = true;
1102 return Object::ToString(isolate_, capture_obj);
1103 }
1104
GetNamedCapture(Handle<String> name,CaptureState * state)1105 MaybeHandle<String> GetNamedCapture(Handle<String> name,
1106 CaptureState* state) override {
1107 DCHECK(has_named_captures_);
1108
1109 Handle<Object> capture_obj;
1110 ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
1111 Object::GetProperty(isolate_, groups_obj_, name),
1112 String);
1113 if (capture_obj->IsUndefined(isolate_)) {
1114 *state = UNMATCHED;
1115 return isolate_->factory()->empty_string();
1116 } else {
1117 *state = MATCHED;
1118 return Object::ToString(isolate_, capture_obj);
1119 }
1120 }
1121
1122 private:
1123 Isolate* isolate_;
1124 Handle<String> subject_;
1125 Handle<String> match_;
1126 const int match_position_;
1127 base::Vector<Handle<Object>> captures_;
1128
1129 bool has_named_captures_;
1130 Handle<JSReceiver> groups_obj_;
1131 };
1132
1133 // Create the groups object (see also the RegExp result creation in
1134 // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
ConstructNamedCaptureGroupsObject(Isolate * isolate,Handle<FixedArray> capture_map,const std::function<Object (int)> & f_get_capture)1135 Handle<JSObject> ConstructNamedCaptureGroupsObject(
1136 Isolate* isolate, Handle<FixedArray> capture_map,
1137 const std::function<Object(int)>& f_get_capture) {
1138 Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
1139
1140 const int named_capture_count = capture_map->length() >> 1;
1141 for (int i = 0; i < named_capture_count; i++) {
1142 const int name_ix = i * 2;
1143 const int index_ix = i * 2 + 1;
1144
1145 Handle<String> capture_name(String::cast(capture_map->get(name_ix)),
1146 isolate);
1147 const int capture_ix = Smi::ToInt(capture_map->get(index_ix));
1148 DCHECK_GE(capture_ix, 1); // Explicit groups start at index 1.
1149
1150 Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
1151 DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString());
1152
1153 JSObject::AddProperty(isolate, groups, capture_name, capture_value, NONE);
1154 }
1155
1156 return groups;
1157 }
1158
1159 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
1160 // separate last match info. See comment on that function.
1161 template <bool has_capture>
SearchRegExpMultiple(Isolate * isolate,Handle<String> subject,Handle<JSRegExp> regexp,Handle<RegExpMatchInfo> last_match_array,Handle<JSArray> result_array)1162 static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
1163 Handle<JSRegExp> regexp,
1164 Handle<RegExpMatchInfo> last_match_array,
1165 Handle<JSArray> result_array) {
1166 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1167 DCHECK_NE(has_capture, regexp->capture_count() == 0);
1168 DCHECK(subject->IsFlat());
1169
1170 // Force tier up to native code for global replaces. The global replace is
1171 // implemented differently for native code and bytecode execution, where the
1172 // native code expects an array to store all the matches, and the bytecode
1173 // matches one at a time, so it's easier to tier-up to native code from the
1174 // start.
1175 if (FLAG_regexp_tier_up && regexp->type_tag() == JSRegExp::IRREGEXP) {
1176 regexp->MarkTierUpForNextExec();
1177 if (FLAG_trace_regexp_tier_up) {
1178 PrintF("Forcing tier-up of JSRegExp object %p in SearchRegExpMultiple\n",
1179 reinterpret_cast<void*>(regexp->ptr()));
1180 }
1181 }
1182
1183 int capture_count = regexp->capture_count();
1184 int subject_length = subject->length();
1185
1186 static const int kMinLengthToCache = 0x1000;
1187
1188 if (subject_length > kMinLengthToCache) {
1189 FixedArray last_match_cache;
1190 Object cached_answer = RegExpResultsCache::Lookup(
1191 isolate->heap(), *subject, regexp->data(), &last_match_cache,
1192 RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1193 if (cached_answer.IsFixedArray()) {
1194 int capture_registers = JSRegExp::RegistersForCaptureCount(capture_count);
1195 int32_t* last_match = NewArray<int32_t>(capture_registers);
1196 for (int i = 0; i < capture_registers; i++) {
1197 last_match[i] = Smi::ToInt(last_match_cache.get(i));
1198 }
1199 Handle<FixedArray> cached_fixed_array =
1200 Handle<FixedArray>(FixedArray::cast(cached_answer), isolate);
1201 // The cache FixedArray is a COW-array and we need to return a copy.
1202 Handle<FixedArray> copied_fixed_array =
1203 isolate->factory()->CopyFixedArrayWithMap(
1204 cached_fixed_array, isolate->factory()->fixed_array_map());
1205 JSArray::SetContent(result_array, copied_fixed_array);
1206 RegExp::SetLastMatchInfo(isolate, last_match_array, subject,
1207 capture_count, last_match);
1208 DeleteArray(last_match);
1209 return *result_array;
1210 }
1211 }
1212
1213 RegExpGlobalCache global_cache(regexp, subject, isolate);
1214 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1215
1216 // Ensured in Runtime_RegExpExecMultiple.
1217 DCHECK(result_array->HasObjectElements());
1218 Handle<FixedArray> result_elements(FixedArray::cast(result_array->elements()),
1219 isolate);
1220 if (result_elements->length() < 16) {
1221 result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
1222 }
1223
1224 FixedArrayBuilder builder(result_elements);
1225
1226 // Position to search from.
1227 int match_start = -1;
1228 int match_end = 0;
1229 bool first = true;
1230
1231 // Two smis before and after the match, for very long strings.
1232 static const int kMaxBuilderEntriesPerRegExpMatch = 5;
1233
1234 while (true) {
1235 int32_t* current_match = global_cache.FetchNext();
1236 if (current_match == nullptr) break;
1237 match_start = current_match[0];
1238 builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch);
1239 if (match_end < match_start) {
1240 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1241 match_start);
1242 }
1243 match_end = current_match[1];
1244 {
1245 // Avoid accumulating new handles inside loop.
1246 HandleScope temp_scope(isolate);
1247 Handle<String> match;
1248 if (!first) {
1249 match = isolate->factory()->NewProperSubString(subject, match_start,
1250 match_end);
1251 } else {
1252 match =
1253 isolate->factory()->NewSubString(subject, match_start, match_end);
1254 first = false;
1255 }
1256
1257 if (has_capture) {
1258 // Arguments array to replace function is match, captures, index and
1259 // subject, i.e., 3 + capture count in total. If the RegExp contains
1260 // named captures, they are also passed as the last argument.
1261
1262 Handle<Object> maybe_capture_map(regexp->capture_name_map(), isolate);
1263 const bool has_named_captures = maybe_capture_map->IsFixedArray();
1264
1265 const int argc =
1266 has_named_captures ? 4 + capture_count : 3 + capture_count;
1267
1268 Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
1269 int cursor = 0;
1270
1271 elements->set(cursor++, *match);
1272 for (int i = 1; i <= capture_count; i++) {
1273 int start = current_match[i * 2];
1274 if (start >= 0) {
1275 int end = current_match[i * 2 + 1];
1276 DCHECK(start <= end);
1277 Handle<String> substring =
1278 isolate->factory()->NewSubString(subject, start, end);
1279 elements->set(cursor++, *substring);
1280 } else {
1281 DCHECK_GT(0, current_match[i * 2 + 1]);
1282 elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value());
1283 }
1284 }
1285
1286 elements->set(cursor++, Smi::FromInt(match_start));
1287 elements->set(cursor++, *subject);
1288
1289 if (has_named_captures) {
1290 Handle<FixedArray> capture_map =
1291 Handle<FixedArray>::cast(maybe_capture_map);
1292 Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1293 isolate, capture_map, [=](int ix) { return elements->get(ix); });
1294 elements->set(cursor++, *groups);
1295 }
1296
1297 DCHECK_EQ(cursor, argc);
1298 builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1299 } else {
1300 builder.Add(*match);
1301 }
1302 }
1303 }
1304
1305 if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1306
1307 if (match_start >= 0) {
1308 // Finished matching, with at least one match.
1309 if (match_end < subject_length) {
1310 ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1311 subject_length);
1312 }
1313
1314 RegExp::SetLastMatchInfo(isolate, last_match_array, subject, capture_count,
1315 global_cache.LastSuccessfulMatch());
1316
1317 if (subject_length > kMinLengthToCache) {
1318 // Store the last successful match into the array for caching.
1319 int capture_registers = JSRegExp::RegistersForCaptureCount(capture_count);
1320 Handle<FixedArray> last_match_cache =
1321 isolate->factory()->NewFixedArray(capture_registers);
1322 int32_t* last_match = global_cache.LastSuccessfulMatch();
1323 for (int i = 0; i < capture_registers; i++) {
1324 last_match_cache->set(i, Smi::FromInt(last_match[i]));
1325 }
1326 Handle<FixedArray> result_fixed_array =
1327 FixedArray::ShrinkOrEmpty(isolate, builder.array(), builder.length());
1328 // Cache the result and copy the FixedArray into a COW array.
1329 Handle<FixedArray> copied_fixed_array =
1330 isolate->factory()->CopyFixedArrayWithMap(
1331 result_fixed_array, isolate->factory()->fixed_array_map());
1332 RegExpResultsCache::Enter(
1333 isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1334 last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1335 }
1336 return *builder.ToJSArray(result_array);
1337 } else {
1338 return ReadOnlyRoots(isolate).null_value(); // No matches at all.
1339 }
1340 }
1341
1342 // Legacy implementation of RegExp.prototype[Symbol.replace] which
1343 // doesn't properly call the underlying exec method.
RegExpReplace(Isolate * isolate,Handle<JSRegExp> regexp,Handle<String> string,Handle<String> replace)1344 V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace(
1345 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string,
1346 Handle<String> replace) {
1347 // Functional fast-paths are dispatched directly by replace builtin.
1348 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1349
1350 Factory* factory = isolate->factory();
1351
1352 const int flags = regexp->flags();
1353 const bool global = (flags & JSRegExp::kGlobal) != 0;
1354 const bool sticky = (flags & JSRegExp::kSticky) != 0;
1355
1356 replace = String::Flatten(isolate, replace);
1357
1358 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1359
1360 if (!global) {
1361 // Non-global regexp search, string replace.
1362
1363 uint32_t last_index = 0;
1364 if (sticky) {
1365 Handle<Object> last_index_obj(regexp->last_index(), isolate);
1366 ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1367 Object::ToLength(isolate, last_index_obj),
1368 String);
1369 last_index = PositiveNumberToUint32(*last_index_obj);
1370 }
1371
1372 Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1373 isolate);
1374
1375 // A lastIndex exceeding the string length always returns null (signalling
1376 // failure) in RegExpBuiltinExec, thus we can skip the call.
1377 if (last_index <= static_cast<uint32_t>(string->length())) {
1378 ASSIGN_RETURN_ON_EXCEPTION(
1379 isolate, match_indices_obj,
1380 RegExp::Exec(isolate, regexp, string, last_index, last_match_info),
1381 String);
1382 }
1383
1384 if (match_indices_obj->IsNull(isolate)) {
1385 if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER);
1386 return string;
1387 }
1388
1389 auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1390
1391 const int start_index = match_indices->Capture(0);
1392 const int end_index = match_indices->Capture(1);
1393
1394 if (sticky) {
1395 regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER);
1396 }
1397
1398 IncrementalStringBuilder builder(isolate);
1399 builder.AppendString(factory->NewSubString(string, 0, start_index));
1400
1401 if (replace->length() > 0) {
1402 MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
1403 Handle<String> replacement;
1404 ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1405 String::GetSubstitution(isolate, &m, replace),
1406 String);
1407 builder.AppendString(replacement);
1408 }
1409
1410 builder.AppendString(
1411 factory->NewSubString(string, end_index, string->length()));
1412 return builder.Finish();
1413 } else {
1414 // Global regexp search, string replace.
1415 DCHECK(global);
1416 RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1417 String);
1418
1419 // Force tier up to native code for global replaces. The global replace is
1420 // implemented differently for native code and bytecode execution, where the
1421 // native code expects an array to store all the matches, and the bytecode
1422 // matches one at a time, so it's easier to tier-up to native code from the
1423 // start.
1424 if (FLAG_regexp_tier_up && regexp->type_tag() == JSRegExp::IRREGEXP) {
1425 regexp->MarkTierUpForNextExec();
1426 if (FLAG_trace_regexp_tier_up) {
1427 PrintF("Forcing tier-up of JSRegExp object %p in RegExpReplace\n",
1428 reinterpret_cast<void*>(regexp->ptr()));
1429 }
1430 }
1431
1432 if (replace->length() == 0) {
1433 if (string->IsOneByteRepresentation()) {
1434 Object result =
1435 StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1436 isolate, string, regexp, last_match_info);
1437 return handle(String::cast(result), isolate);
1438 } else {
1439 Object result =
1440 StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1441 isolate, string, regexp, last_match_info);
1442 return handle(String::cast(result), isolate);
1443 }
1444 }
1445
1446 Object result = StringReplaceGlobalRegExpWithString(
1447 isolate, string, regexp, replace, last_match_info);
1448 if (result.IsString()) {
1449 return handle(String::cast(result), isolate);
1450 } else {
1451 return MaybeHandle<String>();
1452 }
1453 }
1454
1455 UNREACHABLE();
1456 }
1457
1458 } // namespace
1459
1460 // This is only called for StringReplaceGlobalRegExpWithFunction.
RUNTIME_FUNCTION(Runtime_RegExpExecMultiple)1461 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1462 HandleScope handles(isolate);
1463 DCHECK_EQ(4, args.length());
1464
1465 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1466 CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1467 CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2);
1468 CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1469
1470 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1471 CHECK(result_array->HasObjectElements());
1472
1473 subject = String::Flatten(isolate, subject);
1474 CHECK(regexp->flags() & JSRegExp::kGlobal);
1475
1476 Object result;
1477 if (regexp->capture_count() == 0) {
1478 result = SearchRegExpMultiple<false>(isolate, subject, regexp,
1479 last_match_info, result_array);
1480 } else {
1481 result = SearchRegExpMultiple<true>(isolate, subject, regexp,
1482 last_match_info, result_array);
1483 }
1484 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1485 return result;
1486 }
1487
RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction)1488 RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1489 HandleScope scope(isolate);
1490 DCHECK_EQ(3, args.length());
1491 CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
1492 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
1493 CONVERT_ARG_HANDLE_CHECKED(JSReceiver, replace_obj, 2);
1494
1495 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1496 DCHECK(replace_obj->map().is_callable());
1497
1498 Factory* factory = isolate->factory();
1499 Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1500
1501 const int flags = regexp->flags();
1502 DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1503
1504 // TODO(jgruber): This should be an easy port to CSA with massive payback.
1505
1506 const bool sticky = (flags & JSRegExp::kSticky) != 0;
1507 uint32_t last_index = 0;
1508 if (sticky) {
1509 Handle<Object> last_index_obj(regexp->last_index(), isolate);
1510 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1511 isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1512 last_index = PositiveNumberToUint32(*last_index_obj);
1513 }
1514
1515 Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1516 isolate);
1517
1518 // A lastIndex exceeding the string length always returns null (signalling
1519 // failure) in RegExpBuiltinExec, thus we can skip the call.
1520 if (last_index <= static_cast<uint32_t>(subject->length())) {
1521 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1522 isolate, match_indices_obj,
1523 RegExp::Exec(isolate, regexp, subject, last_index, last_match_info));
1524 }
1525
1526 if (match_indices_obj->IsNull(isolate)) {
1527 if (sticky) regexp->set_last_index(Smi::zero(), SKIP_WRITE_BARRIER);
1528 return *subject;
1529 }
1530
1531 Handle<RegExpMatchInfo> match_indices =
1532 Handle<RegExpMatchInfo>::cast(match_indices_obj);
1533
1534 const int index = match_indices->Capture(0);
1535 const int end_of_match = match_indices->Capture(1);
1536
1537 if (sticky) {
1538 regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER);
1539 }
1540
1541 IncrementalStringBuilder builder(isolate);
1542 builder.AppendString(factory->NewSubString(subject, 0, index));
1543
1544 // Compute the parameter list consisting of the match, captures, index,
1545 // and subject for the replace function invocation. If the RegExp contains
1546 // named captures, they are also passed as the last argument.
1547
1548 // The number of captures plus one for the match.
1549 const int m = match_indices->NumberOfCaptureRegisters() / 2;
1550
1551 bool has_named_captures = false;
1552 Handle<FixedArray> capture_map;
1553 if (m > 1) {
1554 DCHECK(JSRegExp::TypeSupportsCaptures(regexp->type_tag()));
1555
1556 Object maybe_capture_map = regexp->capture_name_map();
1557 if (maybe_capture_map.IsFixedArray()) {
1558 has_named_captures = true;
1559 capture_map = handle(FixedArray::cast(maybe_capture_map), isolate);
1560 }
1561 }
1562
1563 const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures);
1564 if (argc == static_cast<uint32_t>(-1)) {
1565 THROW_NEW_ERROR_RETURN_FAILURE(
1566 isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1567 }
1568 base::ScopedVector<Handle<Object>> argv(argc);
1569
1570 int cursor = 0;
1571 for (int j = 0; j < m; j++) {
1572 bool ok;
1573 Handle<String> capture =
1574 RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1575 if (ok) {
1576 argv[cursor++] = capture;
1577 } else {
1578 argv[cursor++] = factory->undefined_value();
1579 }
1580 }
1581
1582 argv[cursor++] = handle(Smi::FromInt(index), isolate);
1583 argv[cursor++] = subject;
1584
1585 if (has_named_captures) {
1586 argv[cursor++] = ConstructNamedCaptureGroupsObject(
1587 isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
1588 }
1589
1590 DCHECK_EQ(cursor, argc);
1591
1592 Handle<Object> replacement_obj;
1593 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1594 isolate, replacement_obj,
1595 Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1596 argv.begin()));
1597
1598 Handle<String> replacement;
1599 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1600 isolate, replacement, Object::ToString(isolate, replacement_obj));
1601
1602 builder.AppendString(replacement);
1603 builder.AppendString(
1604 factory->NewSubString(subject, end_of_match, subject->length()));
1605
1606 RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1607 }
1608
1609 namespace {
1610
ToUint32(Isolate * isolate,Handle<Object> object,uint32_t * out)1611 V8_WARN_UNUSED_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1612 Handle<Object> object,
1613 uint32_t* out) {
1614 if (object->IsUndefined(isolate)) {
1615 *out = kMaxUInt32;
1616 return object;
1617 }
1618
1619 Handle<Object> number;
1620 ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(isolate, object),
1621 Object);
1622 *out = NumberToUint32(*number);
1623 return object;
1624 }
1625
NewJSArrayWithElements(Isolate * isolate,Handle<FixedArray> elems,int num_elems)1626 Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1627 Handle<FixedArray> elems,
1628 int num_elems) {
1629 return isolate->factory()->NewJSArrayWithElements(
1630 FixedArray::ShrinkOrEmpty(isolate, elems, num_elems));
1631 }
1632
1633 } // namespace
1634
1635 // Slow path for:
1636 // ES#sec-regexp.prototype-@@replace
1637 // RegExp.prototype [ @@split ] ( string, limit )
RUNTIME_FUNCTION(Runtime_RegExpSplit)1638 RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1639 HandleScope scope(isolate);
1640 DCHECK_EQ(3, args.length());
1641
1642 CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1643 CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1644 CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2);
1645
1646 Factory* factory = isolate->factory();
1647
1648 Handle<JSFunction> regexp_fun = isolate->regexp_function();
1649 Handle<Object> ctor;
1650 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1651 isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun));
1652
1653 Handle<Object> flags_obj;
1654 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1655 isolate, flags_obj,
1656 JSObject::GetProperty(isolate, recv, factory->flags_string()));
1657
1658 Handle<String> flags;
1659 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1660 Object::ToString(isolate, flags_obj));
1661
1662 Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1663 const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1664
1665 Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1666 const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1667
1668 Handle<String> new_flags = flags;
1669 if (!sticky) {
1670 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1671 factory->NewConsString(flags, y_str));
1672 }
1673
1674 Handle<JSReceiver> splitter;
1675 {
1676 const int argc = 2;
1677
1678 base::ScopedVector<Handle<Object>> argv(argc);
1679 argv[0] = recv;
1680 argv[1] = new_flags;
1681
1682 Handle<Object> splitter_obj;
1683 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1684 isolate, splitter_obj,
1685 Execution::New(isolate, ctor, argc, argv.begin()));
1686
1687 splitter = Handle<JSReceiver>::cast(splitter_obj);
1688 }
1689
1690 uint32_t limit;
1691 RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1692
1693 const uint32_t length = string->length();
1694
1695 if (limit == 0) return *factory->NewJSArray(0);
1696
1697 if (length == 0) {
1698 Handle<Object> result;
1699 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1700 isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1701 factory->undefined_value()));
1702
1703 if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1704
1705 Handle<FixedArray> elems = factory->NewFixedArray(1);
1706 elems->set(0, *string);
1707 return *factory->NewJSArrayWithElements(elems);
1708 }
1709
1710 static const int kInitialArraySize = 8;
1711 Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1712 uint32_t num_elems = 0;
1713
1714 uint32_t string_index = 0;
1715 uint32_t prev_string_index = 0;
1716 while (string_index < length) {
1717 RETURN_FAILURE_ON_EXCEPTION(
1718 isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1719
1720 Handle<Object> result;
1721 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1722 isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1723 factory->undefined_value()));
1724
1725 if (result->IsNull(isolate)) {
1726 string_index = static_cast<uint32_t>(
1727 RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1728 continue;
1729 }
1730
1731 Handle<Object> last_index_obj;
1732 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1733 isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1734
1735 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1736 isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1737
1738 const uint32_t end =
1739 std::min(PositiveNumberToUint32(*last_index_obj), length);
1740 if (end == prev_string_index) {
1741 string_index = static_cast<uint32_t>(
1742 RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1743 continue;
1744 }
1745
1746 {
1747 Handle<String> substr =
1748 factory->NewSubString(string, prev_string_index, string_index);
1749 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1750 if (num_elems == limit) {
1751 return *NewJSArrayWithElements(isolate, elems, num_elems);
1752 }
1753 }
1754
1755 prev_string_index = end;
1756
1757 Handle<Object> num_captures_obj;
1758 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1759 isolate, num_captures_obj,
1760 Object::GetProperty(isolate, result,
1761 isolate->factory()->length_string()));
1762
1763 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1764 isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1765 const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj);
1766
1767 for (uint32_t i = 1; i < num_captures; i++) {
1768 Handle<Object> capture;
1769 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1770 isolate, capture, Object::GetElement(isolate, result, i));
1771 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture);
1772 if (num_elems == limit) {
1773 return *NewJSArrayWithElements(isolate, elems, num_elems);
1774 }
1775 }
1776
1777 string_index = prev_string_index;
1778 }
1779
1780 {
1781 Handle<String> substr =
1782 factory->NewSubString(string, prev_string_index, length);
1783 elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1784 }
1785
1786 return *NewJSArrayWithElements(isolate, elems, num_elems);
1787 }
1788
1789 // Slow path for:
1790 // ES#sec-regexp.prototype-@@replace
1791 // RegExp.prototype [ @@replace ] ( string, replaceValue )
RUNTIME_FUNCTION(Runtime_RegExpReplaceRT)1792 RUNTIME_FUNCTION(Runtime_RegExpReplaceRT) {
1793 HandleScope scope(isolate);
1794 DCHECK_EQ(3, args.length());
1795
1796 CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1797 CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1798 Handle<Object> replace_obj = args.at(2);
1799
1800 Factory* factory = isolate->factory();
1801
1802 string = String::Flatten(isolate, string);
1803
1804 const bool functional_replace = replace_obj->IsCallable();
1805
1806 Handle<String> replace;
1807 if (!functional_replace) {
1808 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1809 Object::ToString(isolate, replace_obj));
1810 }
1811
1812 // Fast-path for unmodified JSRegExps (and non-functional replace).
1813 if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1814 // We should never get here with functional replace because unmodified
1815 // regexp and functional replace should be fully handled in CSA code.
1816 CHECK(!functional_replace);
1817 Handle<Object> result;
1818 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1819 isolate, result,
1820 RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string, replace));
1821 DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, recv));
1822 return *result;
1823 }
1824
1825 const uint32_t length = string->length();
1826
1827 Handle<Object> global_obj;
1828 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1829 isolate, global_obj,
1830 JSReceiver::GetProperty(isolate, recv, factory->global_string()));
1831 const bool global = global_obj->BooleanValue(isolate);
1832
1833 bool unicode = false;
1834 if (global) {
1835 Handle<Object> unicode_obj;
1836 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1837 isolate, unicode_obj,
1838 JSReceiver::GetProperty(isolate, recv, factory->unicode_string()));
1839 unicode = unicode_obj->BooleanValue(isolate);
1840
1841 RETURN_FAILURE_ON_EXCEPTION(isolate,
1842 RegExpUtils::SetLastIndex(isolate, recv, 0));
1843 }
1844
1845 base::SmallVector<Handle<Object>, kStaticVectorSlots> results;
1846
1847 while (true) {
1848 Handle<Object> result;
1849 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1850 isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1851 factory->undefined_value()));
1852
1853 if (result->IsNull(isolate)) break;
1854
1855 results.emplace_back(result);
1856 if (!global) break;
1857
1858 Handle<Object> match_obj;
1859 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1860 Object::GetElement(isolate, result, 0));
1861
1862 Handle<String> match;
1863 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1864 Object::ToString(isolate, match_obj));
1865
1866 if (match->length() == 0) {
1867 RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1868 isolate, recv, string, unicode));
1869 }
1870 }
1871
1872 // TODO(jgruber): Look into ReplacementStringBuilder instead.
1873 IncrementalStringBuilder builder(isolate);
1874 uint32_t next_source_position = 0;
1875
1876 for (const auto& result : results) {
1877 HandleScope handle_scope(isolate);
1878 Handle<Object> captures_length_obj;
1879 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1880 isolate, captures_length_obj,
1881 Object::GetProperty(isolate, result, factory->length_string()));
1882
1883 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1884 isolate, captures_length_obj,
1885 Object::ToLength(isolate, captures_length_obj));
1886 const uint32_t captures_length =
1887 PositiveNumberToUint32(*captures_length_obj);
1888
1889 Handle<Object> match_obj;
1890 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1891 Object::GetElement(isolate, result, 0));
1892
1893 Handle<String> match;
1894 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1895 Object::ToString(isolate, match_obj));
1896
1897 const int match_length = match->length();
1898
1899 Handle<Object> position_obj;
1900 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1901 isolate, position_obj,
1902 Object::GetProperty(isolate, result, factory->index_string()));
1903
1904 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1905 isolate, position_obj, Object::ToInteger(isolate, position_obj));
1906 const uint32_t position =
1907 std::min(PositiveNumberToUint32(*position_obj), length);
1908
1909 // Do not reserve capacity since captures_length is user-controlled.
1910 base::SmallVector<Handle<Object>, kStaticVectorSlots> captures;
1911
1912 for (uint32_t n = 0; n < captures_length; n++) {
1913 Handle<Object> capture;
1914 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1915 isolate, capture, Object::GetElement(isolate, result, n));
1916
1917 if (!capture->IsUndefined(isolate)) {
1918 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1919 Object::ToString(isolate, capture));
1920 }
1921 captures.emplace_back(capture);
1922 }
1923
1924 Handle<Object> groups_obj = isolate->factory()->undefined_value();
1925 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1926 isolate, groups_obj,
1927 Object::GetProperty(isolate, result, factory->groups_string()));
1928
1929 const bool has_named_captures = !groups_obj->IsUndefined(isolate);
1930
1931 Handle<String> replacement;
1932 if (functional_replace) {
1933 const uint32_t argc =
1934 GetArgcForReplaceCallable(captures_length, has_named_captures);
1935 if (argc == static_cast<uint32_t>(-1)) {
1936 THROW_NEW_ERROR_RETURN_FAILURE(
1937 isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1938 }
1939
1940 base::ScopedVector<Handle<Object>> argv(argc);
1941
1942 int cursor = 0;
1943 for (uint32_t j = 0; j < captures_length; j++) {
1944 argv[cursor++] = captures[j];
1945 }
1946
1947 argv[cursor++] = handle(Smi::FromInt(position), isolate);
1948 argv[cursor++] = string;
1949 if (has_named_captures) argv[cursor++] = groups_obj;
1950
1951 DCHECK_EQ(cursor, argc);
1952
1953 Handle<Object> replacement_obj;
1954 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1955 isolate, replacement_obj,
1956 Execution::Call(isolate, replace_obj, factory->undefined_value(),
1957 argc, argv.begin()));
1958
1959 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1960 isolate, replacement, Object::ToString(isolate, replacement_obj));
1961 } else {
1962 DCHECK(!functional_replace);
1963 if (!groups_obj->IsUndefined(isolate)) {
1964 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1965 isolate, groups_obj, Object::ToObject(isolate, groups_obj));
1966 }
1967 VectorBackedMatch m(isolate, string, match, position,
1968 base::VectorOf(captures), groups_obj);
1969 ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1970 isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1971 }
1972
1973 if (position >= next_source_position) {
1974 builder.AppendString(
1975 factory->NewSubString(string, next_source_position, position));
1976 builder.AppendString(replacement);
1977
1978 next_source_position = position + match_length;
1979 }
1980 }
1981
1982 if (next_source_position < length) {
1983 builder.AppendString(
1984 factory->NewSubString(string, next_source_position, length));
1985 }
1986
1987 RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1988 }
1989
RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile)1990 RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
1991 HandleScope scope(isolate);
1992 DCHECK_EQ(3, args.length());
1993 // TODO(pwong): To follow the spec more closely and simplify calling code,
1994 // this could handle the canonicalization of pattern and flags. See
1995 // https://tc39.github.io/ecma262/#sec-regexpinitialize
1996 CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1997 CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
1998 CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
1999
2000 RETURN_FAILURE_ON_EXCEPTION(isolate,
2001 JSRegExp::Initialize(regexp, source, flags));
2002
2003 return *regexp;
2004 }
2005
RUNTIME_FUNCTION(Runtime_IsRegExp)2006 RUNTIME_FUNCTION(Runtime_IsRegExp) {
2007 SealHandleScope shs(isolate);
2008 DCHECK_EQ(1, args.length());
2009 CONVERT_ARG_CHECKED(Object, obj, 0);
2010 return isolate->heap()->ToBoolean(obj.IsJSRegExp());
2011 }
2012
RUNTIME_FUNCTION(Runtime_RegExpStringFromFlags)2013 RUNTIME_FUNCTION(Runtime_RegExpStringFromFlags) {
2014 HandleScope scope(isolate);
2015 DCHECK_EQ(1, args.length());
2016 CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
2017 Handle<String> flags = JSRegExp::StringFromFlags(isolate, regexp.flags());
2018 return *flags;
2019 }
2020
2021 } // namespace internal
2022 } // namespace v8
2023