1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/builtins/builtins-regexp-gen.h"
6
7 #include "src/builtins/builtins-constructor-gen.h"
8 #include "src/builtins/builtins-utils-gen.h"
9 #include "src/builtins/builtins.h"
10 #include "src/builtins/growable-fixed-array-gen.h"
11 #include "src/code-factory.h"
12 #include "src/code-stub-assembler.h"
13 #include "src/counters.h"
14 #include "src/heap/factory-inl.h"
15 #include "src/objects/js-regexp-string-iterator.h"
16 #include "src/objects/js-regexp.h"
17 #include "src/objects/regexp-match-info.h"
18 #include "src/regexp/regexp-macro-assembler.h"
19
20 namespace v8 {
21 namespace internal {
22
23 using compiler::Node;
24 template <class T>
25 using TNode = compiler::TNode<T>;
26
27 // -----------------------------------------------------------------------------
28 // ES6 section 21.2 RegExp Objects
29
AllocateRegExpResult(Node * context,Node * length,Node * index,Node * input)30 Node* RegExpBuiltinsAssembler::AllocateRegExpResult(Node* context, Node* length,
31 Node* index, Node* input) {
32 CSA_ASSERT(this, IsContext(context));
33 CSA_ASSERT(this, TaggedIsSmi(index));
34 CSA_ASSERT(this, TaggedIsSmi(length));
35 CSA_ASSERT(this, IsString(input));
36
37 #ifdef DEBUG
38 TNode<Smi> const max_length =
39 SmiConstant(JSArray::kInitialMaxFastElementArray);
40 CSA_ASSERT(this, SmiLessThanOrEqual(CAST(length), max_length));
41 #endif // DEBUG
42
43 // Allocate the JSRegExpResult together with its elements fixed array.
44 // Initial preparations first.
45
46 Node* const length_intptr = SmiUntag(length);
47 const ElementsKind elements_kind = PACKED_ELEMENTS;
48
49 Node* const elements_size = GetFixedArrayAllocationSize(
50 length_intptr, elements_kind, INTPTR_PARAMETERS);
51 Node* const total_size =
52 IntPtrAdd(elements_size, IntPtrConstant(JSRegExpResult::kSize));
53
54 static const int kRegExpResultOffset = 0;
55 static const int kElementsOffset =
56 kRegExpResultOffset + JSRegExpResult::kSize;
57
58 // The folded allocation.
59
60 Node* const result = Allocate(total_size);
61 Node* const elements = InnerAllocate(result, kElementsOffset);
62
63 // Initialize the JSRegExpResult.
64
65 Node* const native_context = LoadNativeContext(context);
66 Node* const map =
67 LoadContextElement(native_context, Context::REGEXP_RESULT_MAP_INDEX);
68 StoreMapNoWriteBarrier(result, map);
69
70 StoreObjectFieldNoWriteBarrier(result, JSArray::kPropertiesOrHashOffset,
71 EmptyFixedArrayConstant());
72 StoreObjectFieldNoWriteBarrier(result, JSArray::kElementsOffset, elements);
73 StoreObjectFieldNoWriteBarrier(result, JSArray::kLengthOffset, length);
74
75 StoreObjectFieldNoWriteBarrier(result, JSRegExpResult::kIndexOffset, index);
76 StoreObjectFieldNoWriteBarrier(result, JSRegExpResult::kInputOffset, input);
77 StoreObjectFieldNoWriteBarrier(result, JSRegExpResult::kGroupsOffset,
78 UndefinedConstant());
79
80 // Initialize the elements.
81
82 DCHECK(!IsDoubleElementsKind(elements_kind));
83 const Heap::RootListIndex map_index = Heap::kFixedArrayMapRootIndex;
84 DCHECK(Heap::RootIsImmortalImmovable(map_index));
85 StoreMapNoWriteBarrier(elements, map_index);
86 StoreObjectFieldNoWriteBarrier(elements, FixedArray::kLengthOffset, length);
87
88 Node* const zero = IntPtrConstant(0);
89 FillFixedArrayWithValue(elements_kind, elements, zero, length_intptr,
90 Heap::kUndefinedValueRootIndex);
91
92 return result;
93 }
94
RegExpCreate(TNode<Context> context,TNode<Context> native_context,TNode<Object> maybe_string,TNode<String> flags)95 TNode<Object> RegExpBuiltinsAssembler::RegExpCreate(
96 TNode<Context> context, TNode<Context> native_context,
97 TNode<Object> maybe_string, TNode<String> flags) {
98 TNode<JSFunction> regexp_function =
99 CAST(LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX));
100 TNode<Map> initial_map = CAST(LoadObjectField(
101 regexp_function, JSFunction::kPrototypeOrInitialMapOffset));
102 return RegExpCreate(context, initial_map, maybe_string, flags);
103 }
104
RegExpCreate(TNode<Context> context,TNode<Map> initial_map,TNode<Object> maybe_string,TNode<String> flags)105 TNode<Object> RegExpBuiltinsAssembler::RegExpCreate(TNode<Context> context,
106 TNode<Map> initial_map,
107 TNode<Object> maybe_string,
108 TNode<String> flags) {
109 TNode<String> pattern = Select<String>(
110 IsUndefined(maybe_string), [=] { return EmptyStringConstant(); },
111 [=] { return ToString_Inline(context, maybe_string); });
112 TNode<Object> regexp = CAST(AllocateJSObjectFromMap(initial_map));
113 return CallRuntime(Runtime::kRegExpInitializeAndCompile, context, regexp,
114 pattern, flags);
115 }
116
FastLoadLastIndex(Node * regexp)117 Node* RegExpBuiltinsAssembler::FastLoadLastIndex(Node* regexp) {
118 // Load the in-object field.
119 static const int field_offset =
120 JSRegExp::kSize + JSRegExp::kLastIndexFieldIndex * kPointerSize;
121 return LoadObjectField(regexp, field_offset);
122 }
123
SlowLoadLastIndex(Node * context,Node * regexp)124 Node* RegExpBuiltinsAssembler::SlowLoadLastIndex(Node* context, Node* regexp) {
125 // Load through the GetProperty stub.
126 return GetProperty(context, regexp, isolate()->factory()->lastIndex_string());
127 }
128
LoadLastIndex(Node * context,Node * regexp,bool is_fastpath)129 Node* RegExpBuiltinsAssembler::LoadLastIndex(Node* context, Node* regexp,
130 bool is_fastpath) {
131 return is_fastpath ? FastLoadLastIndex(regexp)
132 : SlowLoadLastIndex(context, regexp);
133 }
134
135 // The fast-path of StoreLastIndex when regexp is guaranteed to be an unmodified
136 // JSRegExp instance.
FastStoreLastIndex(Node * regexp,Node * value)137 void RegExpBuiltinsAssembler::FastStoreLastIndex(Node* regexp, Node* value) {
138 // Store the in-object field.
139 static const int field_offset =
140 JSRegExp::kSize + JSRegExp::kLastIndexFieldIndex * kPointerSize;
141 StoreObjectField(regexp, field_offset, value);
142 }
143
SlowStoreLastIndex(Node * context,Node * regexp,Node * value)144 void RegExpBuiltinsAssembler::SlowStoreLastIndex(Node* context, Node* regexp,
145 Node* value) {
146 // Store through runtime.
147 // TODO(ishell): Use SetPropertyStub here once available.
148 Node* const name = HeapConstant(isolate()->factory()->lastIndex_string());
149 Node* const language_mode = SmiConstant(LanguageMode::kStrict);
150 CallRuntime(Runtime::kSetProperty, context, regexp, name, value,
151 language_mode);
152 }
153
StoreLastIndex(Node * context,Node * regexp,Node * value,bool is_fastpath)154 void RegExpBuiltinsAssembler::StoreLastIndex(Node* context, Node* regexp,
155 Node* value, bool is_fastpath) {
156 if (is_fastpath) {
157 FastStoreLastIndex(regexp, value);
158 } else {
159 SlowStoreLastIndex(context, regexp, value);
160 }
161 }
162
ConstructNewResultFromMatchInfo(Node * const context,Node * const regexp,Node * const match_info,TNode<String> const string)163 Node* RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo(
164 Node* const context, Node* const regexp, Node* const match_info,
165 TNode<String> const string) {
166 CSA_ASSERT(this, IsFixedArrayMap(LoadMap(match_info)));
167 CSA_ASSERT(this, IsJSRegExp(regexp));
168
169 Label named_captures(this), out(this);
170
171 TNode<IntPtrT> num_indices = SmiUntag(CAST(LoadFixedArrayElement(
172 match_info, RegExpMatchInfo::kNumberOfCapturesIndex)));
173 TNode<Smi> const num_results = SmiTag(WordShr(num_indices, 1));
174 Node* const start =
175 LoadFixedArrayElement(match_info, RegExpMatchInfo::kFirstCaptureIndex);
176 Node* const end = LoadFixedArrayElement(
177 match_info, RegExpMatchInfo::kFirstCaptureIndex + 1);
178
179 // Calculate the substring of the first match before creating the result array
180 // to avoid an unnecessary write barrier storing the first result.
181
182 TNode<String> const first = SubString(string, SmiUntag(start), SmiUntag(end));
183
184 Node* const result =
185 AllocateRegExpResult(context, num_results, start, string);
186 Node* const result_elements = LoadElements(result);
187
188 StoreFixedArrayElement(result_elements, 0, first, SKIP_WRITE_BARRIER);
189
190 // If no captures exist we can skip named capture handling as well.
191 GotoIf(SmiEqual(num_results, SmiConstant(1)), &out);
192
193 // Store all remaining captures.
194 Node* const limit = IntPtrAdd(
195 IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex), num_indices);
196
197 VARIABLE(var_from_cursor, MachineType::PointerRepresentation(),
198 IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex + 2));
199 VARIABLE(var_to_cursor, MachineType::PointerRepresentation(),
200 IntPtrConstant(1));
201
202 Variable* vars[] = {&var_from_cursor, &var_to_cursor};
203 Label loop(this, 2, vars);
204
205 Goto(&loop);
206 BIND(&loop);
207 {
208 Node* const from_cursor = var_from_cursor.value();
209 Node* const to_cursor = var_to_cursor.value();
210 TNode<Smi> const start =
211 CAST(LoadFixedArrayElement(match_info, from_cursor));
212
213 Label next_iter(this);
214 GotoIf(SmiEqual(start, SmiConstant(-1)), &next_iter);
215
216 Node* const from_cursor_plus1 = IntPtrAdd(from_cursor, IntPtrConstant(1));
217 Node* const end = LoadFixedArrayElement(match_info, from_cursor_plus1);
218
219 TNode<String> const capture =
220 SubString(string, SmiUntag(start), SmiUntag(end));
221 StoreFixedArrayElement(result_elements, to_cursor, capture);
222 Goto(&next_iter);
223
224 BIND(&next_iter);
225 var_from_cursor.Bind(IntPtrAdd(from_cursor, IntPtrConstant(2)));
226 var_to_cursor.Bind(IntPtrAdd(to_cursor, IntPtrConstant(1)));
227 Branch(UintPtrLessThan(var_from_cursor.value(), limit), &loop,
228 &named_captures);
229 }
230
231 BIND(&named_captures);
232 {
233 // We reach this point only if captures exist, implying that this is an
234 // IRREGEXP JSRegExp.
235
236 CSA_ASSERT(this, IsJSRegExp(regexp));
237 CSA_ASSERT(this, SmiGreaterThan(num_results, SmiConstant(1)));
238
239 // Preparations for named capture properties. Exit early if the result does
240 // not have any named captures to minimize performance impact.
241
242 Node* const data = LoadObjectField(regexp, JSRegExp::kDataOffset);
243 CSA_ASSERT(this,
244 SmiEqual(CAST(LoadFixedArrayElement(data, JSRegExp::kTagIndex)),
245 SmiConstant(JSRegExp::IRREGEXP)));
246
247 // The names fixed array associates names at even indices with a capture
248 // index at odd indices.
249 TNode<Object> const maybe_names =
250 LoadFixedArrayElement(data, JSRegExp::kIrregexpCaptureNameMapIndex);
251 GotoIf(WordEqual(maybe_names, SmiConstant(0)), &out);
252
253 // Allocate a new object to store the named capture properties.
254 // TODO(jgruber): Could be optimized by adding the object map to the heap
255 // root list.
256
257 Node* const native_context = LoadNativeContext(context);
258 Node* const map = LoadContextElement(
259 native_context, Context::SLOW_OBJECT_WITH_NULL_PROTOTYPE_MAP);
260 Node* const properties =
261 AllocateNameDictionary(NameDictionary::kInitialCapacity);
262
263 Node* const group_object = AllocateJSObjectFromMap(map, properties);
264 StoreObjectField(result, JSRegExpResult::kGroupsOffset, group_object);
265
266 // One or more named captures exist, add a property for each one.
267
268 TNode<FixedArray> names = CAST(maybe_names);
269 TNode<IntPtrT> const names_length = LoadAndUntagFixedArrayBaseLength(names);
270 CSA_ASSERT(this, IntPtrGreaterThan(names_length, IntPtrConstant(0)));
271
272 VARIABLE(var_i, MachineType::PointerRepresentation());
273 var_i.Bind(IntPtrConstant(0));
274
275 Variable* vars[] = {&var_i};
276 const int vars_count = sizeof(vars) / sizeof(vars[0]);
277 Label loop(this, vars_count, vars);
278
279 Goto(&loop);
280 BIND(&loop);
281 {
282 Node* const i = var_i.value();
283 Node* const i_plus_1 = IntPtrAdd(i, IntPtrConstant(1));
284 Node* const i_plus_2 = IntPtrAdd(i_plus_1, IntPtrConstant(1));
285
286 Node* const name = LoadFixedArrayElement(names, i);
287 Node* const index = LoadFixedArrayElement(names, i_plus_1);
288 Node* const capture =
289 LoadFixedArrayElement(result_elements, SmiUntag(index));
290
291 // TODO(jgruber): Calling into runtime to create each property is slow.
292 // Either we should create properties entirely in CSA (should be doable),
293 // or only call runtime once and loop there.
294 CallRuntime(Runtime::kCreateDataProperty, context, group_object, name,
295 capture);
296
297 var_i.Bind(i_plus_2);
298 Branch(IntPtrGreaterThanOrEqual(var_i.value(), names_length), &out,
299 &loop);
300 }
301 }
302
303 BIND(&out);
304 return result;
305 }
306
GetStringPointers(Node * const string_data,Node * const offset,Node * const last_index,Node * const string_length,String::Encoding encoding,Variable * var_string_start,Variable * var_string_end)307 void RegExpBuiltinsAssembler::GetStringPointers(
308 Node* const string_data, Node* const offset, Node* const last_index,
309 Node* const string_length, String::Encoding encoding,
310 Variable* var_string_start, Variable* var_string_end) {
311 DCHECK_EQ(var_string_start->rep(), MachineType::PointerRepresentation());
312 DCHECK_EQ(var_string_end->rep(), MachineType::PointerRepresentation());
313
314 const ElementsKind kind = (encoding == String::ONE_BYTE_ENCODING)
315 ? UINT8_ELEMENTS
316 : UINT16_ELEMENTS;
317
318 Node* const from_offset = ElementOffsetFromIndex(
319 IntPtrAdd(offset, last_index), kind, INTPTR_PARAMETERS);
320 var_string_start->Bind(IntPtrAdd(string_data, from_offset));
321
322 Node* const to_offset = ElementOffsetFromIndex(
323 IntPtrAdd(offset, string_length), kind, INTPTR_PARAMETERS);
324 var_string_end->Bind(IntPtrAdd(string_data, to_offset));
325 }
326
RegExpExecInternal(Node * const context,Node * const regexp,Node * const string,Node * const last_index,Node * const match_info)327 Node* RegExpBuiltinsAssembler::RegExpExecInternal(Node* const context,
328 Node* const regexp,
329 Node* const string,
330 Node* const last_index,
331 Node* const match_info) {
332 // Just jump directly to runtime if native RegExp is not selected at compile
333 // time or if regexp entry in generated code is turned off runtime switch or
334 // at compilation.
335 #ifdef V8_INTERPRETED_REGEXP
336 return CallRuntime(Runtime::kRegExpExec, context, regexp, string, last_index,
337 match_info);
338 #else // V8_INTERPRETED_REGEXP
339 CSA_ASSERT(this, TaggedIsNotSmi(regexp));
340 CSA_ASSERT(this, IsJSRegExp(regexp));
341
342 CSA_ASSERT(this, TaggedIsNotSmi(string));
343 CSA_ASSERT(this, IsString(string));
344
345 CSA_ASSERT(this, IsNumber(last_index));
346 CSA_ASSERT(this, IsFixedArrayMap(LoadReceiverMap(match_info)));
347
348 Node* const int_zero = IntPtrConstant(0);
349
350 ToDirectStringAssembler to_direct(state(), string);
351
352 VARIABLE(var_result, MachineRepresentation::kTagged);
353 Label out(this), atom(this), runtime(this, Label::kDeferred);
354
355 // External constants.
356 Node* const isolate_address =
357 ExternalConstant(ExternalReference::isolate_address(isolate()));
358 Node* const regexp_stack_memory_address_address = ExternalConstant(
359 ExternalReference::address_of_regexp_stack_memory_address(isolate()));
360 Node* const regexp_stack_memory_size_address = ExternalConstant(
361 ExternalReference::address_of_regexp_stack_memory_size(isolate()));
362 Node* const static_offsets_vector_address = ExternalConstant(
363 ExternalReference::address_of_static_offsets_vector(isolate()));
364
365 // At this point, last_index is definitely a canonicalized non-negative
366 // number, which implies that any non-Smi last_index is greater than
367 // the maximal string length. If lastIndex > string.length then the matcher
368 // must fail.
369
370 Label if_failure(this);
371
372 CSA_ASSERT(this, IsNumberNormalized(last_index));
373 CSA_ASSERT(this, IsNumberPositive(last_index));
374 GotoIf(TaggedIsNotSmi(last_index), &if_failure);
375
376 Node* const int_string_length = LoadStringLengthAsWord(string);
377 Node* const int_last_index = SmiUntag(last_index);
378
379 GotoIf(UintPtrGreaterThan(int_last_index, int_string_length), &if_failure);
380
381 Node* const data = LoadObjectField(regexp, JSRegExp::kDataOffset);
382 {
383 // Check that the RegExp has been compiled (data contains a fixed array).
384 CSA_ASSERT(this, TaggedIsNotSmi(data));
385 CSA_ASSERT(this, HasInstanceType(data, FIXED_ARRAY_TYPE));
386
387 // Dispatch on the type of the RegExp.
388 {
389 Label next(this), unreachable(this, Label::kDeferred);
390 Node* const tag = LoadAndUntagToWord32FixedArrayElement(
391 data, IntPtrConstant(JSRegExp::kTagIndex));
392
393 int32_t values[] = {
394 JSRegExp::IRREGEXP, JSRegExp::ATOM, JSRegExp::NOT_COMPILED,
395 };
396 Label* labels[] = {&next, &atom, &runtime};
397
398 STATIC_ASSERT(arraysize(values) == arraysize(labels));
399 Switch(tag, &unreachable, values, labels, arraysize(values));
400
401 BIND(&unreachable);
402 Unreachable();
403
404 BIND(&next);
405 }
406
407 // Check (number_of_captures + 1) * 2 <= offsets vector size
408 // Or number_of_captures <= offsets vector size / 2 - 1
409 TNode<Smi> const capture_count =
410 CAST(LoadFixedArrayElement(data, JSRegExp::kIrregexpCaptureCountIndex));
411
412 STATIC_ASSERT(Isolate::kJSRegexpStaticOffsetsVectorSize >= 2);
413 GotoIf(SmiAbove(
414 capture_count,
415 SmiConstant(Isolate::kJSRegexpStaticOffsetsVectorSize / 2 - 1)),
416 &runtime);
417 }
418
419 // Ensure that a RegExp stack is allocated. This check is after branching off
420 // for ATOM regexps to avoid unnecessary trips to runtime.
421 {
422 Node* const stack_size =
423 Load(MachineType::IntPtr(), regexp_stack_memory_size_address);
424 GotoIf(IntPtrEqual(stack_size, int_zero), &runtime);
425 }
426
427 // Unpack the string if possible.
428
429 to_direct.TryToDirect(&runtime);
430
431 // Load the irregexp code object and offsets into the subject string. Both
432 // depend on whether the string is one- or two-byte.
433
434 VARIABLE(var_string_start, MachineType::PointerRepresentation());
435 VARIABLE(var_string_end, MachineType::PointerRepresentation());
436 VARIABLE(var_code, MachineRepresentation::kTagged);
437
438 {
439 Node* const direct_string_data = to_direct.PointerToData(&runtime);
440
441 Label next(this), if_isonebyte(this), if_istwobyte(this, Label::kDeferred);
442 Branch(IsOneByteStringInstanceType(to_direct.instance_type()),
443 &if_isonebyte, &if_istwobyte);
444
445 BIND(&if_isonebyte);
446 {
447 GetStringPointers(direct_string_data, to_direct.offset(), int_last_index,
448 int_string_length, String::ONE_BYTE_ENCODING,
449 &var_string_start, &var_string_end);
450 var_code.Bind(
451 LoadFixedArrayElement(data, JSRegExp::kIrregexpLatin1CodeIndex));
452 Goto(&next);
453 }
454
455 BIND(&if_istwobyte);
456 {
457 GetStringPointers(direct_string_data, to_direct.offset(), int_last_index,
458 int_string_length, String::TWO_BYTE_ENCODING,
459 &var_string_start, &var_string_end);
460 var_code.Bind(
461 LoadFixedArrayElement(data, JSRegExp::kIrregexpUC16CodeIndex));
462 Goto(&next);
463 }
464
465 BIND(&next);
466 }
467
468 // Check that the irregexp code has been generated for the actual string
469 // encoding. If it has, the field contains a code object; and otherwise it
470 // contains the uninitialized sentinel as a smi.
471
472 Node* const code = var_code.value();
473 CSA_ASSERT_BRANCH(this, [=](Label* ok, Label* not_ok) {
474 GotoIfNot(TaggedIsSmi(code), ok);
475 Branch(SmiEqual(CAST(code), SmiConstant(JSRegExp::kUninitializedValue)), ok,
476 not_ok);
477 });
478 GotoIf(TaggedIsSmi(code), &runtime);
479 CSA_ASSERT(this, HasInstanceType(code, CODE_TYPE));
480
481 Label if_success(this), if_exception(this, Label::kDeferred);
482 {
483 IncrementCounter(isolate()->counters()->regexp_entry_native(), 1);
484
485 // Set up args for the final call into generated Irregexp code.
486
487 MachineType type_int32 = MachineType::Int32();
488 MachineType type_tagged = MachineType::AnyTagged();
489 MachineType type_ptr = MachineType::Pointer();
490
491 // Result: A NativeRegExpMacroAssembler::Result return code.
492 MachineType retval_type = type_int32;
493
494 // Argument 0: Original subject string.
495 MachineType arg0_type = type_tagged;
496 Node* const arg0 = string;
497
498 // Argument 1: Previous index.
499 MachineType arg1_type = type_int32;
500 Node* const arg1 = TruncateIntPtrToInt32(int_last_index);
501
502 // Argument 2: Start of string data.
503 MachineType arg2_type = type_ptr;
504 Node* const arg2 = var_string_start.value();
505
506 // Argument 3: End of string data.
507 MachineType arg3_type = type_ptr;
508 Node* const arg3 = var_string_end.value();
509
510 // Argument 4: static offsets vector buffer.
511 MachineType arg4_type = type_ptr;
512 Node* const arg4 = static_offsets_vector_address;
513
514 // Argument 5: Set the number of capture registers to zero to force global
515 // regexps to behave as non-global. This does not affect non-global
516 // regexps.
517 MachineType arg5_type = type_int32;
518 Node* const arg5 = Int32Constant(0);
519
520 // Argument 6: Start (high end) of backtracking stack memory area.
521 Node* const stack_start =
522 Load(MachineType::Pointer(), regexp_stack_memory_address_address);
523 Node* const stack_size =
524 Load(MachineType::IntPtr(), regexp_stack_memory_size_address);
525 Node* const stack_end = IntPtrAdd(stack_start, stack_size);
526
527 MachineType arg6_type = type_ptr;
528 Node* const arg6 = stack_end;
529
530 // Argument 7: Indicate that this is a direct call from JavaScript.
531 MachineType arg7_type = type_int32;
532 Node* const arg7 = Int32Constant(1);
533
534 // Argument 8: Pass current isolate address.
535 MachineType arg8_type = type_ptr;
536 Node* const arg8 = isolate_address;
537
538 Node* const code_entry =
539 IntPtrAdd(BitcastTaggedToWord(code),
540 IntPtrConstant(Code::kHeaderSize - kHeapObjectTag));
541
542 Node* const result = CallCFunction9(
543 retval_type, arg0_type, arg1_type, arg2_type, arg3_type, arg4_type,
544 arg5_type, arg6_type, arg7_type, arg8_type, code_entry, arg0, arg1,
545 arg2, arg3, arg4, arg5, arg6, arg7, arg8);
546
547 // Check the result.
548 // We expect exactly one result since we force the called regexp to behave
549 // as non-global.
550 Node* const int_result = ChangeInt32ToIntPtr(result);
551 GotoIf(IntPtrEqual(int_result,
552 IntPtrConstant(NativeRegExpMacroAssembler::SUCCESS)),
553 &if_success);
554 GotoIf(IntPtrEqual(int_result,
555 IntPtrConstant(NativeRegExpMacroAssembler::FAILURE)),
556 &if_failure);
557 GotoIf(IntPtrEqual(int_result,
558 IntPtrConstant(NativeRegExpMacroAssembler::EXCEPTION)),
559 &if_exception);
560
561 CSA_ASSERT(this,
562 IntPtrEqual(int_result,
563 IntPtrConstant(NativeRegExpMacroAssembler::RETRY)));
564 Goto(&runtime);
565 }
566
567 BIND(&if_success);
568 {
569 // Check that the last match info has space for the capture registers and
570 // the additional information. Ensure no overflow in add.
571 STATIC_ASSERT(FixedArray::kMaxLength < kMaxInt - FixedArray::kLengthOffset);
572 TNode<Smi> const available_slots =
573 SmiSub(LoadFixedArrayBaseLength(match_info),
574 SmiConstant(RegExpMatchInfo::kLastMatchOverhead));
575 TNode<Smi> const capture_count =
576 CAST(LoadFixedArrayElement(data, JSRegExp::kIrregexpCaptureCountIndex));
577 // Calculate number of register_count = (capture_count + 1) * 2.
578 TNode<Smi> const register_count =
579 SmiShl(SmiAdd(capture_count, SmiConstant(1)), 1);
580 GotoIf(SmiGreaterThan(register_count, available_slots), &runtime);
581
582 // Fill match_info.
583
584 StoreFixedArrayElement(match_info, RegExpMatchInfo::kNumberOfCapturesIndex,
585 register_count, SKIP_WRITE_BARRIER);
586 StoreFixedArrayElement(match_info, RegExpMatchInfo::kLastSubjectIndex,
587 string);
588 StoreFixedArrayElement(match_info, RegExpMatchInfo::kLastInputIndex,
589 string);
590
591 // Fill match and capture offsets in match_info.
592 {
593 Node* const limit_offset = ElementOffsetFromIndex(
594 register_count, INT32_ELEMENTS, SMI_PARAMETERS, 0);
595
596 Node* const to_offset = ElementOffsetFromIndex(
597 IntPtrConstant(RegExpMatchInfo::kFirstCaptureIndex), PACKED_ELEMENTS,
598 INTPTR_PARAMETERS, RegExpMatchInfo::kHeaderSize - kHeapObjectTag);
599 VARIABLE(var_to_offset, MachineType::PointerRepresentation(), to_offset);
600
601 VariableList vars({&var_to_offset}, zone());
602 BuildFastLoop(
603 vars, int_zero, limit_offset,
604 [=, &var_to_offset](Node* offset) {
605 Node* const value = Load(MachineType::Int32(),
606 static_offsets_vector_address, offset);
607 Node* const smi_value = SmiFromInt32(value);
608 StoreNoWriteBarrier(MachineRepresentation::kTagged, match_info,
609 var_to_offset.value(), smi_value);
610 Increment(&var_to_offset, kPointerSize);
611 },
612 kInt32Size, INTPTR_PARAMETERS, IndexAdvanceMode::kPost);
613 }
614
615 var_result.Bind(match_info);
616 Goto(&out);
617 }
618
619 BIND(&if_failure);
620 {
621 var_result.Bind(NullConstant());
622 Goto(&out);
623 }
624
625 BIND(&if_exception);
626 {
627 // A stack overflow was detected in RegExp code.
628 #ifdef DEBUG
629 Node* const pending_exception_address =
630 ExternalConstant(ExternalReference::Create(
631 IsolateAddressId::kPendingExceptionAddress, isolate()));
632 CSA_ASSERT(this, IsTheHole(Load(MachineType::AnyTagged(),
633 pending_exception_address)));
634 #endif // DEBUG
635 CallRuntime(Runtime::kThrowStackOverflow, context);
636 Unreachable();
637 }
638
639 BIND(&runtime);
640 {
641 Node* const result = CallRuntime(Runtime::kRegExpExec, context, regexp,
642 string, last_index, match_info);
643 var_result.Bind(result);
644 Goto(&out);
645 }
646
647 BIND(&atom);
648 {
649 // TODO(jgruber): A call with 4 args stresses register allocation, this
650 // should probably just be inlined.
651 Node* const result = CallBuiltin(Builtins::kRegExpExecAtom, context, regexp,
652 string, last_index, match_info);
653 var_result.Bind(result);
654 Goto(&out);
655 }
656
657 BIND(&out);
658 return var_result.value();
659 #endif // V8_INTERPRETED_REGEXP
660 }
661
662 // ES#sec-regexp.prototype.exec
663 // RegExp.prototype.exec ( string )
664 // Implements the core of RegExp.prototype.exec but without actually
665 // constructing the JSRegExpResult. Returns either null (if the RegExp did not
666 // match) or a fixed array containing match indices as returned by
667 // RegExpExecStub.
RegExpPrototypeExecBodyWithoutResult(Node * const context,Node * const regexp,Node * const string,Label * if_didnotmatch,const bool is_fastpath)668 Node* RegExpBuiltinsAssembler::RegExpPrototypeExecBodyWithoutResult(
669 Node* const context, Node* const regexp, Node* const string,
670 Label* if_didnotmatch, const bool is_fastpath) {
671 Node* const int_zero = IntPtrConstant(0);
672 Node* const smi_zero = SmiConstant(0);
673
674 if (is_fastpath) {
675 CSA_ASSERT(this, IsFastRegExpNoPrototype(context, regexp));
676 } else {
677 ThrowIfNotInstanceType(context, regexp, JS_REGEXP_TYPE,
678 "RegExp.prototype.exec");
679 }
680
681 CSA_ASSERT(this, IsString(string));
682 CSA_ASSERT(this, IsJSRegExp(regexp));
683
684 VARIABLE(var_result, MachineRepresentation::kTagged);
685 Label out(this);
686
687 // Load lastIndex.
688 VARIABLE(var_lastindex, MachineRepresentation::kTagged);
689 {
690 Node* const regexp_lastindex = LoadLastIndex(context, regexp, is_fastpath);
691 var_lastindex.Bind(regexp_lastindex);
692
693 if (is_fastpath) {
694 // ToLength on a positive smi is a nop and can be skipped.
695 CSA_ASSERT(this, TaggedIsPositiveSmi(regexp_lastindex));
696 } else {
697 // Omit ToLength if lastindex is a non-negative smi.
698 Label call_tolength(this, Label::kDeferred), next(this);
699 Branch(TaggedIsPositiveSmi(regexp_lastindex), &next, &call_tolength);
700
701 BIND(&call_tolength);
702 {
703 var_lastindex.Bind(ToLength_Inline(context, regexp_lastindex));
704 Goto(&next);
705 }
706
707 BIND(&next);
708 }
709 }
710
711 // Check whether the regexp is global or sticky, which determines whether we
712 // update last index later on.
713 Node* const flags = LoadObjectField(regexp, JSRegExp::kFlagsOffset);
714 Node* const is_global_or_sticky = WordAnd(
715 SmiUntag(flags), IntPtrConstant(JSRegExp::kGlobal | JSRegExp::kSticky));
716 Node* const should_update_last_index =
717 WordNotEqual(is_global_or_sticky, int_zero);
718
719 // Grab and possibly update last index.
720 Label run_exec(this);
721 {
722 Label if_doupdate(this), if_dontupdate(this);
723 Branch(should_update_last_index, &if_doupdate, &if_dontupdate);
724
725 BIND(&if_doupdate);
726 {
727 Node* const lastindex = var_lastindex.value();
728
729 Label if_isoob(this, Label::kDeferred);
730 GotoIfNot(TaggedIsSmi(lastindex), &if_isoob);
731 TNode<Smi> const string_length = LoadStringLengthAsSmi(string);
732 GotoIfNot(SmiLessThanOrEqual(CAST(lastindex), string_length), &if_isoob);
733 Goto(&run_exec);
734
735 BIND(&if_isoob);
736 {
737 StoreLastIndex(context, regexp, smi_zero, is_fastpath);
738 var_result.Bind(NullConstant());
739 Goto(if_didnotmatch);
740 }
741 }
742
743 BIND(&if_dontupdate);
744 {
745 var_lastindex.Bind(smi_zero);
746 Goto(&run_exec);
747 }
748 }
749
750 Node* match_indices;
751 Label successful_match(this);
752 BIND(&run_exec);
753 {
754 // Get last match info from the context.
755 Node* const native_context = LoadNativeContext(context);
756 Node* const last_match_info = LoadContextElement(
757 native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);
758
759 // Call the exec stub.
760 match_indices = RegExpExecInternal(context, regexp, string,
761 var_lastindex.value(), last_match_info);
762 var_result.Bind(match_indices);
763
764 // {match_indices} is either null or the RegExpMatchInfo array.
765 // Return early if exec failed, possibly updating last index.
766 GotoIfNot(IsNull(match_indices), &successful_match);
767
768 GotoIfNot(should_update_last_index, if_didnotmatch);
769
770 StoreLastIndex(context, regexp, smi_zero, is_fastpath);
771 Goto(if_didnotmatch);
772 }
773
774 BIND(&successful_match);
775 {
776 GotoIfNot(should_update_last_index, &out);
777
778 // Update the new last index from {match_indices}.
779 Node* const new_lastindex = LoadFixedArrayElement(
780 match_indices, RegExpMatchInfo::kFirstCaptureIndex + 1);
781
782 StoreLastIndex(context, regexp, new_lastindex, is_fastpath);
783 Goto(&out);
784 }
785
786 BIND(&out);
787 return var_result.value();
788 }
789
790 // ES#sec-regexp.prototype.exec
791 // RegExp.prototype.exec ( string )
RegExpPrototypeExecBody(Node * const context,Node * const regexp,TNode<String> const string,const bool is_fastpath)792 Node* RegExpBuiltinsAssembler::RegExpPrototypeExecBody(
793 Node* const context, Node* const regexp, TNode<String> const string,
794 const bool is_fastpath) {
795 VARIABLE(var_result, MachineRepresentation::kTagged);
796
797 Label if_didnotmatch(this), out(this);
798 Node* const indices_or_null = RegExpPrototypeExecBodyWithoutResult(
799 context, regexp, string, &if_didnotmatch, is_fastpath);
800
801 // Successful match.
802 {
803 Node* const match_indices = indices_or_null;
804 Node* const result =
805 ConstructNewResultFromMatchInfo(context, regexp, match_indices, string);
806 var_result.Bind(result);
807 Goto(&out);
808 }
809
810 BIND(&if_didnotmatch);
811 {
812 var_result.Bind(NullConstant());
813 Goto(&out);
814 }
815
816 BIND(&out);
817 return var_result.value();
818 }
819
ThrowIfNotJSReceiver(Node * context,Node * maybe_receiver,MessageTemplate::Template msg_template,char const * method_name)820 Node* RegExpBuiltinsAssembler::ThrowIfNotJSReceiver(
821 Node* context, Node* maybe_receiver, MessageTemplate::Template msg_template,
822 char const* method_name) {
823 Label out(this), throw_exception(this, Label::kDeferred);
824 VARIABLE(var_value_map, MachineRepresentation::kTagged);
825
826 GotoIf(TaggedIsSmi(maybe_receiver), &throw_exception);
827
828 // Load the instance type of the {value}.
829 var_value_map.Bind(LoadMap(maybe_receiver));
830 Node* const value_instance_type = LoadMapInstanceType(var_value_map.value());
831
832 Branch(IsJSReceiverInstanceType(value_instance_type), &out, &throw_exception);
833
834 // The {value} is not a compatible receiver for this method.
835 BIND(&throw_exception);
836 {
837 Node* const value_str =
838 CallBuiltin(Builtins::kToString, context, maybe_receiver);
839 ThrowTypeError(context, msg_template, StringConstant(method_name),
840 value_str);
841 }
842
843 BIND(&out);
844 return var_value_map.value();
845 }
846
IsFastRegExpNoPrototype(Node * const context,Node * const object,Node * const map)847 Node* RegExpBuiltinsAssembler::IsFastRegExpNoPrototype(Node* const context,
848 Node* const object,
849 Node* const map) {
850 Label out(this);
851 VARIABLE(var_result, MachineRepresentation::kWord32);
852
853 #ifdef V8_ENABLE_FORCE_SLOW_PATH
854 var_result.Bind(Int32Constant(0));
855 GotoIfForceSlowPath(&out);
856 #endif
857
858 Node* const native_context = LoadNativeContext(context);
859 Node* const regexp_fun =
860 LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX);
861 Node* const initial_map =
862 LoadObjectField(regexp_fun, JSFunction::kPrototypeOrInitialMapOffset);
863 Node* const has_initialmap = WordEqual(map, initial_map);
864
865 var_result.Bind(has_initialmap);
866 GotoIfNot(has_initialmap, &out);
867
868 // The smi check is required to omit ToLength(lastIndex) calls with possible
869 // user-code execution on the fast path.
870 Node* const last_index = FastLoadLastIndex(object);
871 var_result.Bind(TaggedIsPositiveSmi(last_index));
872 Goto(&out);
873
874 BIND(&out);
875 return var_result.value();
876 }
877
IsFastRegExpNoPrototype(Node * const context,Node * const object)878 Node* RegExpBuiltinsAssembler::IsFastRegExpNoPrototype(Node* const context,
879 Node* const object) {
880 CSA_ASSERT(this, TaggedIsNotSmi(object));
881 return IsFastRegExpNoPrototype(context, object, LoadMap(object));
882 }
883
884 // RegExp fast path implementations rely on unmodified JSRegExp instances.
885 // We use a fairly coarse granularity for this and simply check whether both
886 // the regexp itself is unmodified (i.e. its map has not changed), its
887 // prototype is unmodified, and lastIndex is a non-negative smi.
BranchIfFastRegExp(Node * const context,Node * const object,Node * const map,Label * const if_isunmodified,Label * const if_ismodified)888 void RegExpBuiltinsAssembler::BranchIfFastRegExp(Node* const context,
889 Node* const object,
890 Node* const map,
891 Label* const if_isunmodified,
892 Label* const if_ismodified) {
893 CSA_ASSERT(this, WordEqual(LoadMap(object), map));
894
895 GotoIfForceSlowPath(if_ismodified);
896
897 // TODO(ishell): Update this check once map changes for constant field
898 // tracking are landing.
899
900 Node* const native_context = LoadNativeContext(context);
901 Node* const regexp_fun =
902 LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX);
903 Node* const initial_map =
904 LoadObjectField(regexp_fun, JSFunction::kPrototypeOrInitialMapOffset);
905 Node* const has_initialmap = WordEqual(map, initial_map);
906
907 GotoIfNot(has_initialmap, if_ismodified);
908
909 Node* const initial_proto_initial_map =
910 LoadContextElement(native_context, Context::REGEXP_PROTOTYPE_MAP_INDEX);
911 Node* const proto_map = LoadMap(LoadMapPrototype(map));
912 Node* const proto_has_initialmap =
913 WordEqual(proto_map, initial_proto_initial_map);
914
915 GotoIfNot(proto_has_initialmap, if_ismodified);
916
917 // The smi check is required to omit ToLength(lastIndex) calls with possible
918 // user-code execution on the fast path.
919 Node* const last_index = FastLoadLastIndex(object);
920 Branch(TaggedIsPositiveSmi(last_index), if_isunmodified, if_ismodified);
921 }
922
BranchIfFastRegExp(Node * const context,Node * const object,Label * const if_isunmodified,Label * const if_ismodified)923 void RegExpBuiltinsAssembler::BranchIfFastRegExp(Node* const context,
924 Node* const object,
925 Label* const if_isunmodified,
926 Label* const if_ismodified) {
927 CSA_ASSERT(this, TaggedIsNotSmi(object));
928 BranchIfFastRegExp(context, object, LoadMap(object), if_isunmodified,
929 if_ismodified);
930 }
931
IsFastRegExp(SloppyTNode<Context> context,SloppyTNode<Object> object)932 TNode<BoolT> RegExpBuiltinsAssembler::IsFastRegExp(SloppyTNode<Context> context,
933 SloppyTNode<Object> object) {
934 Label yup(this), nope(this), out(this);
935 TVARIABLE(BoolT, var_result);
936
937 BranchIfFastRegExp(context, object, &yup, &nope);
938
939 BIND(&yup);
940 var_result = Int32TrueConstant();
941 Goto(&out);
942
943 BIND(&nope);
944 var_result = Int32FalseConstant();
945 Goto(&out);
946
947 BIND(&out);
948 return var_result.value();
949 }
950
BranchIfFastRegExpResult(Node * const context,Node * const object,Label * if_isunmodified,Label * if_ismodified)951 void RegExpBuiltinsAssembler::BranchIfFastRegExpResult(Node* const context,
952 Node* const object,
953 Label* if_isunmodified,
954 Label* if_ismodified) {
955 // Could be a Smi.
956 Node* const map = LoadReceiverMap(object);
957
958 Node* const native_context = LoadNativeContext(context);
959 Node* const initial_regexp_result_map =
960 LoadContextElement(native_context, Context::REGEXP_RESULT_MAP_INDEX);
961
962 Branch(WordEqual(map, initial_regexp_result_map), if_isunmodified,
963 if_ismodified);
964 }
965
966 // Slow path stub for RegExpPrototypeExec to decrease code size.
TF_BUILTIN(RegExpPrototypeExecSlow,RegExpBuiltinsAssembler)967 TF_BUILTIN(RegExpPrototypeExecSlow, RegExpBuiltinsAssembler) {
968 Node* const regexp = Parameter(Descriptor::kReceiver);
969 TNode<String> const string = CAST(Parameter(Descriptor::kString));
970 Node* const context = Parameter(Descriptor::kContext);
971
972 Return(RegExpPrototypeExecBody(context, regexp, string, false));
973 }
974
975 // Fast path stub for ATOM regexps. String matching is done by StringIndexOf,
976 // and {match_info} is updated on success.
977 // The slow path is implemented in RegExpImpl::AtomExec.
TF_BUILTIN(RegExpExecAtom,RegExpBuiltinsAssembler)978 TF_BUILTIN(RegExpExecAtom, RegExpBuiltinsAssembler) {
979 Node* const regexp = Parameter(Descriptor::kRegExp);
980 Node* const subject_string = Parameter(Descriptor::kString);
981 Node* const last_index = Parameter(Descriptor::kLastIndex);
982 Node* const match_info = Parameter(Descriptor::kMatchInfo);
983 Node* const context = Parameter(Descriptor::kContext);
984
985 CSA_ASSERT(this, IsJSRegExp(regexp));
986 CSA_ASSERT(this, IsString(subject_string));
987 CSA_ASSERT(this, TaggedIsPositiveSmi(last_index));
988 CSA_ASSERT(this, IsFixedArray(match_info));
989
990 Node* const data = LoadObjectField(regexp, JSRegExp::kDataOffset);
991 CSA_ASSERT(this, IsFixedArray(data));
992 CSA_ASSERT(this,
993 SmiEqual(CAST(LoadFixedArrayElement(data, JSRegExp::kTagIndex)),
994 SmiConstant(JSRegExp::ATOM)));
995
996 // Callers ensure that last_index is in-bounds.
997 CSA_ASSERT(this,
998 UintPtrLessThanOrEqual(SmiUntag(last_index),
999 LoadStringLengthAsWord(subject_string)));
1000
1001 Node* const needle_string =
1002 LoadFixedArrayElement(data, JSRegExp::kAtomPatternIndex);
1003 CSA_ASSERT(this, IsString(needle_string));
1004
1005 TNode<Smi> const match_from =
1006 CAST(CallBuiltin(Builtins::kStringIndexOf, context, subject_string,
1007 needle_string, last_index));
1008
1009 Label if_failure(this), if_success(this);
1010 Branch(SmiEqual(match_from, SmiConstant(-1)), &if_failure, &if_success);
1011
1012 BIND(&if_success);
1013 {
1014 CSA_ASSERT(this, TaggedIsPositiveSmi(match_from));
1015 CSA_ASSERT(this, UintPtrLessThan(SmiUntag(match_from),
1016 LoadStringLengthAsWord(subject_string)));
1017
1018 const int kNumRegisters = 2;
1019 STATIC_ASSERT(RegExpMatchInfo::kInitialCaptureIndices >= kNumRegisters);
1020
1021 TNode<Smi> const match_to =
1022 SmiAdd(match_from, LoadStringLengthAsSmi(needle_string));
1023
1024 StoreFixedArrayElement(match_info, RegExpMatchInfo::kNumberOfCapturesIndex,
1025 SmiConstant(kNumRegisters), SKIP_WRITE_BARRIER);
1026 StoreFixedArrayElement(match_info, RegExpMatchInfo::kLastSubjectIndex,
1027 subject_string);
1028 StoreFixedArrayElement(match_info, RegExpMatchInfo::kLastInputIndex,
1029 subject_string);
1030 StoreFixedArrayElement(match_info, RegExpMatchInfo::kFirstCaptureIndex,
1031 match_from, SKIP_WRITE_BARRIER);
1032 StoreFixedArrayElement(match_info, RegExpMatchInfo::kFirstCaptureIndex + 1,
1033 match_to, SKIP_WRITE_BARRIER);
1034
1035 Return(match_info);
1036 }
1037
1038 BIND(&if_failure);
1039 Return(NullConstant());
1040 }
1041
1042 // ES#sec-regexp.prototype.exec
1043 // RegExp.prototype.exec ( string )
TF_BUILTIN(RegExpPrototypeExec,RegExpBuiltinsAssembler)1044 TF_BUILTIN(RegExpPrototypeExec, RegExpBuiltinsAssembler) {
1045 Node* const maybe_receiver = Parameter(Descriptor::kReceiver);
1046 Node* const maybe_string = Parameter(Descriptor::kString);
1047 Node* const context = Parameter(Descriptor::kContext);
1048
1049 // Ensure {maybe_receiver} is a JSRegExp.
1050 ThrowIfNotInstanceType(context, maybe_receiver, JS_REGEXP_TYPE,
1051 "RegExp.prototype.exec");
1052 Node* const receiver = maybe_receiver;
1053
1054 // Convert {maybe_string} to a String.
1055 TNode<String> const string = ToString_Inline(context, maybe_string);
1056
1057 Label if_isfastpath(this), if_isslowpath(this);
1058 Branch(IsFastRegExpNoPrototype(context, receiver), &if_isfastpath,
1059 &if_isslowpath);
1060
1061 BIND(&if_isfastpath);
1062 {
1063 Node* const result =
1064 RegExpPrototypeExecBody(context, receiver, string, true);
1065 Return(result);
1066 }
1067
1068 BIND(&if_isslowpath);
1069 {
1070 Node* const result = CallBuiltin(Builtins::kRegExpPrototypeExecSlow,
1071 context, receiver, string);
1072 Return(result);
1073 }
1074 }
1075
FlagsGetter(Node * const context,Node * const regexp,bool is_fastpath)1076 Node* RegExpBuiltinsAssembler::FlagsGetter(Node* const context,
1077 Node* const regexp,
1078 bool is_fastpath) {
1079 Isolate* isolate = this->isolate();
1080
1081 TNode<IntPtrT> const int_one = IntPtrConstant(1);
1082 TVARIABLE(Smi, var_length, SmiConstant(0));
1083 TVARIABLE(IntPtrT, var_flags);
1084
1085 // First, count the number of characters we will need and check which flags
1086 // are set.
1087
1088 if (is_fastpath) {
1089 // Refer to JSRegExp's flag property on the fast-path.
1090 CSA_ASSERT(this, IsJSRegExp(regexp));
1091 Node* const flags_smi = LoadObjectField(regexp, JSRegExp::kFlagsOffset);
1092 var_flags = SmiUntag(flags_smi);
1093
1094 #define CASE_FOR_FLAG(FLAG) \
1095 do { \
1096 Label next(this); \
1097 GotoIfNot(IsSetWord(var_flags.value(), FLAG), &next); \
1098 var_length = SmiAdd(var_length.value(), SmiConstant(1)); \
1099 Goto(&next); \
1100 BIND(&next); \
1101 } while (false)
1102
1103 CASE_FOR_FLAG(JSRegExp::kGlobal);
1104 CASE_FOR_FLAG(JSRegExp::kIgnoreCase);
1105 CASE_FOR_FLAG(JSRegExp::kMultiline);
1106 CASE_FOR_FLAG(JSRegExp::kDotAll);
1107 CASE_FOR_FLAG(JSRegExp::kUnicode);
1108 CASE_FOR_FLAG(JSRegExp::kSticky);
1109 #undef CASE_FOR_FLAG
1110 } else {
1111 DCHECK(!is_fastpath);
1112
1113 // Fall back to GetProperty stub on the slow-path.
1114 var_flags = IntPtrConstant(0);
1115
1116 #define CASE_FOR_FLAG(NAME, FLAG) \
1117 do { \
1118 Label next(this); \
1119 Node* const flag = GetProperty( \
1120 context, regexp, isolate->factory()->InternalizeUtf8String(NAME)); \
1121 Label if_isflagset(this); \
1122 BranchIfToBooleanIsTrue(flag, &if_isflagset, &next); \
1123 BIND(&if_isflagset); \
1124 var_length = SmiAdd(var_length.value(), SmiConstant(1)); \
1125 var_flags = Signed(WordOr(var_flags.value(), IntPtrConstant(FLAG))); \
1126 Goto(&next); \
1127 BIND(&next); \
1128 } while (false)
1129
1130 CASE_FOR_FLAG("global", JSRegExp::kGlobal);
1131 CASE_FOR_FLAG("ignoreCase", JSRegExp::kIgnoreCase);
1132 CASE_FOR_FLAG("multiline", JSRegExp::kMultiline);
1133 CASE_FOR_FLAG("dotAll", JSRegExp::kDotAll);
1134 CASE_FOR_FLAG("unicode", JSRegExp::kUnicode);
1135 CASE_FOR_FLAG("sticky", JSRegExp::kSticky);
1136 #undef CASE_FOR_FLAG
1137 }
1138
1139 // Allocate a string of the required length and fill it with the corresponding
1140 // char for each set flag.
1141
1142 {
1143 Node* const result = AllocateSeqOneByteString(context, var_length.value());
1144
1145 VARIABLE(var_offset, MachineType::PointerRepresentation(),
1146 IntPtrConstant(SeqOneByteString::kHeaderSize - kHeapObjectTag));
1147
1148 #define CASE_FOR_FLAG(FLAG, CHAR) \
1149 do { \
1150 Label next(this); \
1151 GotoIfNot(IsSetWord(var_flags.value(), FLAG), &next); \
1152 Node* const value = Int32Constant(CHAR); \
1153 StoreNoWriteBarrier(MachineRepresentation::kWord8, result, \
1154 var_offset.value(), value); \
1155 var_offset.Bind(IntPtrAdd(var_offset.value(), int_one)); \
1156 Goto(&next); \
1157 BIND(&next); \
1158 } while (false)
1159
1160 CASE_FOR_FLAG(JSRegExp::kGlobal, 'g');
1161 CASE_FOR_FLAG(JSRegExp::kIgnoreCase, 'i');
1162 CASE_FOR_FLAG(JSRegExp::kMultiline, 'm');
1163 CASE_FOR_FLAG(JSRegExp::kDotAll, 's');
1164 CASE_FOR_FLAG(JSRegExp::kUnicode, 'u');
1165 CASE_FOR_FLAG(JSRegExp::kSticky, 'y');
1166 #undef CASE_FOR_FLAG
1167
1168 return result;
1169 }
1170 }
1171
1172 // ES#sec-isregexp IsRegExp ( argument )
IsRegExp(Node * const context,Node * const maybe_receiver)1173 Node* RegExpBuiltinsAssembler::IsRegExp(Node* const context,
1174 Node* const maybe_receiver) {
1175 Label out(this), if_isregexp(this);
1176
1177 VARIABLE(var_result, MachineRepresentation::kWord32, Int32Constant(0));
1178
1179 GotoIf(TaggedIsSmi(maybe_receiver), &out);
1180 GotoIfNot(IsJSReceiver(maybe_receiver), &out);
1181
1182 Node* const receiver = maybe_receiver;
1183
1184 // Check @@match.
1185 {
1186 Node* const value =
1187 GetProperty(context, receiver, isolate()->factory()->match_symbol());
1188
1189 Label match_isundefined(this), match_isnotundefined(this);
1190 Branch(IsUndefined(value), &match_isundefined, &match_isnotundefined);
1191
1192 BIND(&match_isundefined);
1193 Branch(IsJSRegExp(receiver), &if_isregexp, &out);
1194
1195 BIND(&match_isnotundefined);
1196 BranchIfToBooleanIsTrue(value, &if_isregexp, &out);
1197 }
1198
1199 BIND(&if_isregexp);
1200 var_result.Bind(Int32Constant(1));
1201 Goto(&out);
1202
1203 BIND(&out);
1204 return var_result.value();
1205 }
1206
1207 // ES#sec-regexpinitialize
1208 // Runtime Semantics: RegExpInitialize ( obj, pattern, flags )
RegExpInitialize(Node * const context,Node * const regexp,Node * const maybe_pattern,Node * const maybe_flags)1209 Node* RegExpBuiltinsAssembler::RegExpInitialize(Node* const context,
1210 Node* const regexp,
1211 Node* const maybe_pattern,
1212 Node* const maybe_flags) {
1213 CSA_ASSERT(this, IsJSRegExp(regexp));
1214
1215 // Normalize pattern.
1216 TNode<Object> const pattern = Select<Object>(
1217 IsUndefined(maybe_pattern), [=] { return EmptyStringConstant(); },
1218 [=] { return ToString_Inline(context, maybe_pattern); });
1219
1220 // Normalize flags.
1221 TNode<Object> const flags = Select<Object>(
1222 IsUndefined(maybe_flags), [=] { return EmptyStringConstant(); },
1223 [=] { return ToString_Inline(context, maybe_flags); });
1224
1225 // Initialize.
1226
1227 return CallRuntime(Runtime::kRegExpInitializeAndCompile, context, regexp,
1228 pattern, flags);
1229 }
1230
1231 // ES #sec-get-regexp.prototype.flags
TF_BUILTIN(RegExpPrototypeFlagsGetter,RegExpBuiltinsAssembler)1232 TF_BUILTIN(RegExpPrototypeFlagsGetter, RegExpBuiltinsAssembler) {
1233 Node* const maybe_receiver = Parameter(Descriptor::kReceiver);
1234 Node* const context = Parameter(Descriptor::kContext);
1235
1236 Node* const map = ThrowIfNotJSReceiver(context, maybe_receiver,
1237 MessageTemplate::kRegExpNonObject,
1238 "RegExp.prototype.flags");
1239 Node* const receiver = maybe_receiver;
1240
1241 Label if_isfastpath(this), if_isslowpath(this, Label::kDeferred);
1242 BranchIfFastRegExp(context, receiver, map, &if_isfastpath, &if_isslowpath);
1243
1244 BIND(&if_isfastpath);
1245 Return(FlagsGetter(context, receiver, true));
1246
1247 BIND(&if_isslowpath);
1248 Return(FlagsGetter(context, receiver, false));
1249 }
1250
1251 // ES#sec-regexp-pattern-flags
1252 // RegExp ( pattern, flags )
TF_BUILTIN(RegExpConstructor,RegExpBuiltinsAssembler)1253 TF_BUILTIN(RegExpConstructor, RegExpBuiltinsAssembler) {
1254 Node* const pattern = Parameter(Descriptor::kPattern);
1255 Node* const flags = Parameter(Descriptor::kFlags);
1256 Node* const new_target = Parameter(Descriptor::kNewTarget);
1257 Node* const context = Parameter(Descriptor::kContext);
1258
1259 Isolate* isolate = this->isolate();
1260
1261 VARIABLE(var_flags, MachineRepresentation::kTagged, flags);
1262 VARIABLE(var_pattern, MachineRepresentation::kTagged, pattern);
1263 VARIABLE(var_new_target, MachineRepresentation::kTagged, new_target);
1264
1265 Node* const native_context = LoadNativeContext(context);
1266 Node* const regexp_function =
1267 LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX);
1268
1269 Node* const pattern_is_regexp = IsRegExp(context, pattern);
1270
1271 {
1272 Label next(this);
1273
1274 GotoIfNot(IsUndefined(new_target), &next);
1275 var_new_target.Bind(regexp_function);
1276
1277 GotoIfNot(pattern_is_regexp, &next);
1278 GotoIfNot(IsUndefined(flags), &next);
1279
1280 Node* const value =
1281 GetProperty(context, pattern, isolate->factory()->constructor_string());
1282
1283 GotoIfNot(WordEqual(value, regexp_function), &next);
1284 Return(pattern);
1285
1286 BIND(&next);
1287 }
1288
1289 {
1290 Label next(this), if_patternisfastregexp(this),
1291 if_patternisslowregexp(this);
1292 GotoIf(TaggedIsSmi(pattern), &next);
1293
1294 GotoIf(IsJSRegExp(pattern), &if_patternisfastregexp);
1295
1296 Branch(pattern_is_regexp, &if_patternisslowregexp, &next);
1297
1298 BIND(&if_patternisfastregexp);
1299 {
1300 Node* const source = LoadObjectField(pattern, JSRegExp::kSourceOffset);
1301 var_pattern.Bind(source);
1302
1303 {
1304 Label inner_next(this);
1305 GotoIfNot(IsUndefined(flags), &inner_next);
1306
1307 Node* const value = FlagsGetter(context, pattern, true);
1308 var_flags.Bind(value);
1309 Goto(&inner_next);
1310
1311 BIND(&inner_next);
1312 }
1313
1314 Goto(&next);
1315 }
1316
1317 BIND(&if_patternisslowregexp);
1318 {
1319 {
1320 Node* const value =
1321 GetProperty(context, pattern, isolate->factory()->source_string());
1322 var_pattern.Bind(value);
1323 }
1324
1325 {
1326 Label inner_next(this);
1327 GotoIfNot(IsUndefined(flags), &inner_next);
1328
1329 Node* const value =
1330 GetProperty(context, pattern, isolate->factory()->flags_string());
1331 var_flags.Bind(value);
1332 Goto(&inner_next);
1333
1334 BIND(&inner_next);
1335 }
1336
1337 Goto(&next);
1338 }
1339
1340 BIND(&next);
1341 }
1342
1343 // Allocate.
1344
1345 VARIABLE(var_regexp, MachineRepresentation::kTagged);
1346 {
1347 Label allocate_jsregexp(this), allocate_generic(this, Label::kDeferred),
1348 next(this);
1349 Branch(WordEqual(var_new_target.value(), regexp_function),
1350 &allocate_jsregexp, &allocate_generic);
1351
1352 BIND(&allocate_jsregexp);
1353 {
1354 Node* const initial_map = LoadObjectField(
1355 regexp_function, JSFunction::kPrototypeOrInitialMapOffset);
1356 Node* const regexp = AllocateJSObjectFromMap(initial_map);
1357 var_regexp.Bind(regexp);
1358 Goto(&next);
1359 }
1360
1361 BIND(&allocate_generic);
1362 {
1363 ConstructorBuiltinsAssembler constructor_assembler(this->state());
1364 Node* const regexp = constructor_assembler.EmitFastNewObject(
1365 context, regexp_function, var_new_target.value());
1366 var_regexp.Bind(regexp);
1367 Goto(&next);
1368 }
1369
1370 BIND(&next);
1371 }
1372
1373 Node* const result = RegExpInitialize(context, var_regexp.value(),
1374 var_pattern.value(), var_flags.value());
1375 Return(result);
1376 }
1377
1378 // ES#sec-regexp.prototype.compile
1379 // RegExp.prototype.compile ( pattern, flags )
TF_BUILTIN(RegExpPrototypeCompile,RegExpBuiltinsAssembler)1380 TF_BUILTIN(RegExpPrototypeCompile, RegExpBuiltinsAssembler) {
1381 Node* const maybe_receiver = Parameter(Descriptor::kReceiver);
1382 Node* const maybe_pattern = Parameter(Descriptor::kPattern);
1383 Node* const maybe_flags = Parameter(Descriptor::kFlags);
1384 Node* const context = Parameter(Descriptor::kContext);
1385
1386 ThrowIfNotInstanceType(context, maybe_receiver, JS_REGEXP_TYPE,
1387 "RegExp.prototype.compile");
1388 Node* const receiver = maybe_receiver;
1389
1390 VARIABLE(var_flags, MachineRepresentation::kTagged, maybe_flags);
1391 VARIABLE(var_pattern, MachineRepresentation::kTagged, maybe_pattern);
1392
1393 // Handle a JSRegExp pattern.
1394 {
1395 Label next(this);
1396
1397 GotoIf(TaggedIsSmi(maybe_pattern), &next);
1398 GotoIfNot(IsJSRegExp(maybe_pattern), &next);
1399
1400 Node* const pattern = maybe_pattern;
1401
1402 // {maybe_flags} must be undefined in this case, otherwise throw.
1403 {
1404 Label next(this);
1405 GotoIf(IsUndefined(maybe_flags), &next);
1406
1407 ThrowTypeError(context, MessageTemplate::kRegExpFlags);
1408
1409 BIND(&next);
1410 }
1411
1412 Node* const new_flags = FlagsGetter(context, pattern, true);
1413 Node* const new_pattern = LoadObjectField(pattern, JSRegExp::kSourceOffset);
1414
1415 var_flags.Bind(new_flags);
1416 var_pattern.Bind(new_pattern);
1417
1418 Goto(&next);
1419 BIND(&next);
1420 }
1421
1422 Node* const result = RegExpInitialize(context, receiver, var_pattern.value(),
1423 var_flags.value());
1424 Return(result);
1425 }
1426
1427 // ES6 21.2.5.10.
1428 // ES #sec-get-regexp.prototype.source
TF_BUILTIN(RegExpPrototypeSourceGetter,RegExpBuiltinsAssembler)1429 TF_BUILTIN(RegExpPrototypeSourceGetter, RegExpBuiltinsAssembler) {
1430 Node* const receiver = Parameter(Descriptor::kReceiver);
1431 Node* const context = Parameter(Descriptor::kContext);
1432
1433 // Check whether we have an unmodified regexp instance.
1434 Label if_isjsregexp(this), if_isnotjsregexp(this, Label::kDeferred);
1435
1436 GotoIf(TaggedIsSmi(receiver), &if_isnotjsregexp);
1437 Branch(IsJSRegExp(receiver), &if_isjsregexp, &if_isnotjsregexp);
1438
1439 BIND(&if_isjsregexp);
1440 {
1441 Node* const source = LoadObjectField(receiver, JSRegExp::kSourceOffset);
1442 Return(source);
1443 }
1444
1445 BIND(&if_isnotjsregexp);
1446 {
1447 Isolate* isolate = this->isolate();
1448 Node* const native_context = LoadNativeContext(context);
1449 Node* const regexp_fun =
1450 LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX);
1451 Node* const initial_map =
1452 LoadObjectField(regexp_fun, JSFunction::kPrototypeOrInitialMapOffset);
1453 Node* const initial_prototype = LoadMapPrototype(initial_map);
1454
1455 Label if_isprototype(this), if_isnotprototype(this);
1456 Branch(WordEqual(receiver, initial_prototype), &if_isprototype,
1457 &if_isnotprototype);
1458
1459 BIND(&if_isprototype);
1460 {
1461 const int counter = v8::Isolate::kRegExpPrototypeSourceGetter;
1462 Node* const counter_smi = SmiConstant(counter);
1463 CallRuntime(Runtime::kIncrementUseCounter, context, counter_smi);
1464
1465 Node* const result =
1466 HeapConstant(isolate->factory()->NewStringFromAsciiChecked("(?:)"));
1467 Return(result);
1468 }
1469
1470 BIND(&if_isnotprototype);
1471 {
1472 ThrowTypeError(context, MessageTemplate::kRegExpNonRegExp,
1473 "RegExp.prototype.source");
1474 }
1475 }
1476 }
1477
1478 // Fast-path implementation for flag checks on an unmodified JSRegExp instance.
FastFlagGetter(Node * const regexp,JSRegExp::Flag flag)1479 Node* RegExpBuiltinsAssembler::FastFlagGetter(Node* const regexp,
1480 JSRegExp::Flag flag) {
1481 TNode<Smi> const flags =
1482 CAST(LoadObjectField(regexp, JSRegExp::kFlagsOffset));
1483 TNode<Smi> const mask = SmiConstant(flag);
1484 return SmiToInt32(SmiAnd(flags, mask));
1485 }
1486
1487 // Load through the GetProperty stub.
SlowFlagGetter(Node * const context,Node * const regexp,JSRegExp::Flag flag)1488 Node* RegExpBuiltinsAssembler::SlowFlagGetter(Node* const context,
1489 Node* const regexp,
1490 JSRegExp::Flag flag) {
1491 Factory* factory = isolate()->factory();
1492
1493 Label out(this);
1494 VARIABLE(var_result, MachineRepresentation::kWord32);
1495
1496 Handle<String> name;
1497 switch (flag) {
1498 case JSRegExp::kGlobal:
1499 name = factory->global_string();
1500 break;
1501 case JSRegExp::kIgnoreCase:
1502 name = factory->ignoreCase_string();
1503 break;
1504 case JSRegExp::kMultiline:
1505 name = factory->multiline_string();
1506 break;
1507 case JSRegExp::kDotAll:
1508 UNREACHABLE(); // Never called for dotAll.
1509 break;
1510 case JSRegExp::kSticky:
1511 name = factory->sticky_string();
1512 break;
1513 case JSRegExp::kUnicode:
1514 name = factory->unicode_string();
1515 break;
1516 default:
1517 UNREACHABLE();
1518 }
1519
1520 Node* const value = GetProperty(context, regexp, name);
1521
1522 Label if_true(this), if_false(this);
1523 BranchIfToBooleanIsTrue(value, &if_true, &if_false);
1524
1525 BIND(&if_true);
1526 {
1527 var_result.Bind(Int32Constant(1));
1528 Goto(&out);
1529 }
1530
1531 BIND(&if_false);
1532 {
1533 var_result.Bind(Int32Constant(0));
1534 Goto(&out);
1535 }
1536
1537 BIND(&out);
1538 return var_result.value();
1539 }
1540
FlagGetter(Node * const context,Node * const regexp,JSRegExp::Flag flag,bool is_fastpath)1541 Node* RegExpBuiltinsAssembler::FlagGetter(Node* const context,
1542 Node* const regexp,
1543 JSRegExp::Flag flag,
1544 bool is_fastpath) {
1545 return is_fastpath ? FastFlagGetter(regexp, flag)
1546 : SlowFlagGetter(context, regexp, flag);
1547 }
1548
FlagGetter(Node * context,Node * receiver,JSRegExp::Flag flag,int counter,const char * method_name)1549 void RegExpBuiltinsAssembler::FlagGetter(Node* context, Node* receiver,
1550 JSRegExp::Flag flag, int counter,
1551 const char* method_name) {
1552 // Check whether we have an unmodified regexp instance.
1553 Label if_isunmodifiedjsregexp(this),
1554 if_isnotunmodifiedjsregexp(this, Label::kDeferred);
1555
1556 GotoIf(TaggedIsSmi(receiver), &if_isnotunmodifiedjsregexp);
1557 Branch(IsJSRegExp(receiver), &if_isunmodifiedjsregexp,
1558 &if_isnotunmodifiedjsregexp);
1559
1560 BIND(&if_isunmodifiedjsregexp);
1561 {
1562 // Refer to JSRegExp's flag property on the fast-path.
1563 Node* const is_flag_set = FastFlagGetter(receiver, flag);
1564 Return(SelectBooleanConstant(is_flag_set));
1565 }
1566
1567 BIND(&if_isnotunmodifiedjsregexp);
1568 {
1569 Node* const native_context = LoadNativeContext(context);
1570 Node* const regexp_fun =
1571 LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX);
1572 Node* const initial_map =
1573 LoadObjectField(regexp_fun, JSFunction::kPrototypeOrInitialMapOffset);
1574 Node* const initial_prototype = LoadMapPrototype(initial_map);
1575
1576 Label if_isprototype(this), if_isnotprototype(this);
1577 Branch(WordEqual(receiver, initial_prototype), &if_isprototype,
1578 &if_isnotprototype);
1579
1580 BIND(&if_isprototype);
1581 {
1582 if (counter != -1) {
1583 Node* const counter_smi = SmiConstant(counter);
1584 CallRuntime(Runtime::kIncrementUseCounter, context, counter_smi);
1585 }
1586 Return(UndefinedConstant());
1587 }
1588
1589 BIND(&if_isnotprototype);
1590 { ThrowTypeError(context, MessageTemplate::kRegExpNonRegExp, method_name); }
1591 }
1592 }
1593
1594 // ES6 21.2.5.4.
1595 // ES #sec-get-regexp.prototype.global
TF_BUILTIN(RegExpPrototypeGlobalGetter,RegExpBuiltinsAssembler)1596 TF_BUILTIN(RegExpPrototypeGlobalGetter, RegExpBuiltinsAssembler) {
1597 Node* context = Parameter(Descriptor::kContext);
1598 Node* receiver = Parameter(Descriptor::kReceiver);
1599 FlagGetter(context, receiver, JSRegExp::kGlobal,
1600 v8::Isolate::kRegExpPrototypeOldFlagGetter,
1601 "RegExp.prototype.global");
1602 }
1603
1604 // ES6 21.2.5.5.
1605 // ES #sec-get-regexp.prototype.ignorecase
TF_BUILTIN(RegExpPrototypeIgnoreCaseGetter,RegExpBuiltinsAssembler)1606 TF_BUILTIN(RegExpPrototypeIgnoreCaseGetter, RegExpBuiltinsAssembler) {
1607 Node* context = Parameter(Descriptor::kContext);
1608 Node* receiver = Parameter(Descriptor::kReceiver);
1609 FlagGetter(context, receiver, JSRegExp::kIgnoreCase,
1610 v8::Isolate::kRegExpPrototypeOldFlagGetter,
1611 "RegExp.prototype.ignoreCase");
1612 }
1613
1614 // ES6 21.2.5.7.
1615 // ES #sec-get-regexp.prototype.multiline
TF_BUILTIN(RegExpPrototypeMultilineGetter,RegExpBuiltinsAssembler)1616 TF_BUILTIN(RegExpPrototypeMultilineGetter, RegExpBuiltinsAssembler) {
1617 Node* context = Parameter(Descriptor::kContext);
1618 Node* receiver = Parameter(Descriptor::kReceiver);
1619 FlagGetter(context, receiver, JSRegExp::kMultiline,
1620 v8::Isolate::kRegExpPrototypeOldFlagGetter,
1621 "RegExp.prototype.multiline");
1622 }
1623
1624 // ES #sec-get-regexp.prototype.dotAll
TF_BUILTIN(RegExpPrototypeDotAllGetter,RegExpBuiltinsAssembler)1625 TF_BUILTIN(RegExpPrototypeDotAllGetter, RegExpBuiltinsAssembler) {
1626 Node* context = Parameter(Descriptor::kContext);
1627 Node* receiver = Parameter(Descriptor::kReceiver);
1628 static const int kNoCounter = -1;
1629 FlagGetter(context, receiver, JSRegExp::kDotAll, kNoCounter,
1630 "RegExp.prototype.dotAll");
1631 }
1632
1633 // ES6 21.2.5.12.
1634 // ES #sec-get-regexp.prototype.sticky
TF_BUILTIN(RegExpPrototypeStickyGetter,RegExpBuiltinsAssembler)1635 TF_BUILTIN(RegExpPrototypeStickyGetter, RegExpBuiltinsAssembler) {
1636 Node* context = Parameter(Descriptor::kContext);
1637 Node* receiver = Parameter(Descriptor::kReceiver);
1638 FlagGetter(context, receiver, JSRegExp::kSticky,
1639 v8::Isolate::kRegExpPrototypeStickyGetter,
1640 "RegExp.prototype.sticky");
1641 }
1642
1643 // ES6 21.2.5.15.
1644 // ES #sec-get-regexp.prototype.unicode
TF_BUILTIN(RegExpPrototypeUnicodeGetter,RegExpBuiltinsAssembler)1645 TF_BUILTIN(RegExpPrototypeUnicodeGetter, RegExpBuiltinsAssembler) {
1646 Node* context = Parameter(Descriptor::kContext);
1647 Node* receiver = Parameter(Descriptor::kReceiver);
1648 FlagGetter(context, receiver, JSRegExp::kUnicode,
1649 v8::Isolate::kRegExpPrototypeUnicodeGetter,
1650 "RegExp.prototype.unicode");
1651 }
1652
1653 // ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S )
RegExpExec(Node * context,Node * regexp,Node * string)1654 Node* RegExpBuiltinsAssembler::RegExpExec(Node* context, Node* regexp,
1655 Node* string) {
1656 VARIABLE(var_result, MachineRepresentation::kTagged);
1657 Label out(this);
1658
1659 // Take the slow path of fetching the exec property, calling it, and
1660 // verifying its return value.
1661
1662 // Get the exec property.
1663 Node* const exec =
1664 GetProperty(context, regexp, isolate()->factory()->exec_string());
1665
1666 // Is {exec} callable?
1667 Label if_iscallable(this), if_isnotcallable(this);
1668
1669 GotoIf(TaggedIsSmi(exec), &if_isnotcallable);
1670
1671 Node* const exec_map = LoadMap(exec);
1672 Branch(IsCallableMap(exec_map), &if_iscallable, &if_isnotcallable);
1673
1674 BIND(&if_iscallable);
1675 {
1676 Callable call_callable = CodeFactory::Call(isolate());
1677 Node* const result = CallJS(call_callable, context, exec, regexp, string);
1678
1679 var_result.Bind(result);
1680 GotoIf(IsNull(result), &out);
1681
1682 ThrowIfNotJSReceiver(context, result,
1683 MessageTemplate::kInvalidRegExpExecResult, "");
1684
1685 Goto(&out);
1686 }
1687
1688 BIND(&if_isnotcallable);
1689 {
1690 ThrowIfNotInstanceType(context, regexp, JS_REGEXP_TYPE,
1691 "RegExp.prototype.exec");
1692
1693 Node* const result = CallBuiltin(Builtins::kRegExpPrototypeExecSlow,
1694 context, regexp, string);
1695 var_result.Bind(result);
1696 Goto(&out);
1697 }
1698
1699 BIND(&out);
1700 return var_result.value();
1701 }
1702
1703 // ES#sec-regexp.prototype.test
1704 // RegExp.prototype.test ( S )
TF_BUILTIN(RegExpPrototypeTest,RegExpBuiltinsAssembler)1705 TF_BUILTIN(RegExpPrototypeTest, RegExpBuiltinsAssembler) {
1706 Node* const maybe_receiver = Parameter(Descriptor::kReceiver);
1707 Node* const maybe_string = Parameter(Descriptor::kString);
1708 Node* const context = Parameter(Descriptor::kContext);
1709
1710 // Ensure {maybe_receiver} is a JSReceiver.
1711 ThrowIfNotJSReceiver(context, maybe_receiver,
1712 MessageTemplate::kIncompatibleMethodReceiver,
1713 "RegExp.prototype.test");
1714 Node* const receiver = maybe_receiver;
1715
1716 // Convert {maybe_string} to a String.
1717 TNode<String> const string = ToString_Inline(context, maybe_string);
1718
1719 Label fast_path(this), slow_path(this);
1720 BranchIfFastRegExp(context, receiver, &fast_path, &slow_path);
1721
1722 BIND(&fast_path);
1723 {
1724 Label if_didnotmatch(this);
1725 RegExpPrototypeExecBodyWithoutResult(context, receiver, string,
1726 &if_didnotmatch, true);
1727 Return(TrueConstant());
1728
1729 BIND(&if_didnotmatch);
1730 Return(FalseConstant());
1731 }
1732
1733 BIND(&slow_path);
1734 {
1735 // Call exec.
1736 Node* const match_indices = RegExpExec(context, receiver, string);
1737
1738 // Return true iff exec matched successfully.
1739 Node* const result = SelectBooleanConstant(IsNotNull(match_indices));
1740 Return(result);
1741 }
1742 }
1743
AdvanceStringIndex(Node * const string,Node * const index,Node * const is_unicode,bool is_fastpath)1744 Node* RegExpBuiltinsAssembler::AdvanceStringIndex(Node* const string,
1745 Node* const index,
1746 Node* const is_unicode,
1747 bool is_fastpath) {
1748 CSA_ASSERT(this, IsString(string));
1749 CSA_ASSERT(this, IsNumberNormalized(index));
1750 if (is_fastpath) CSA_ASSERT(this, TaggedIsPositiveSmi(index));
1751
1752 // Default to last_index + 1.
1753 Node* const index_plus_one = NumberInc(index);
1754 VARIABLE(var_result, MachineRepresentation::kTagged, index_plus_one);
1755
1756 // Advancing the index has some subtle issues involving the distinction
1757 // between Smis and HeapNumbers. There's three cases:
1758 // * {index} is a Smi, {index_plus_one} is a Smi. The standard case.
1759 // * {index} is a Smi, {index_plus_one} overflows into a HeapNumber.
1760 // In this case we can return the result early, because
1761 // {index_plus_one} > {string}.length.
1762 // * {index} is a HeapNumber, {index_plus_one} is a HeapNumber. This can only
1763 // occur when {index} is outside the Smi range since we normalize
1764 // explicitly. Again we can return early.
1765 if (is_fastpath) {
1766 // Must be in Smi range on the fast path. We control the value of {index}
1767 // on all call-sites and can never exceed the length of the string.
1768 STATIC_ASSERT(String::kMaxLength + 2 < Smi::kMaxValue);
1769 CSA_ASSERT(this, TaggedIsPositiveSmi(index_plus_one));
1770 }
1771
1772 Label if_isunicode(this), out(this);
1773 GotoIfNot(is_unicode, &out);
1774
1775 // Keep this unconditional (even on the fast path) just to be safe.
1776 Branch(TaggedIsPositiveSmi(index_plus_one), &if_isunicode, &out);
1777
1778 BIND(&if_isunicode);
1779 {
1780 TNode<IntPtrT> const string_length = LoadStringLengthAsWord(string);
1781 TNode<IntPtrT> untagged_plus_one = SmiUntag(index_plus_one);
1782 GotoIfNot(IntPtrLessThan(untagged_plus_one, string_length), &out);
1783
1784 Node* const lead = StringCharCodeAt(string, SmiUntag(index));
1785 GotoIfNot(Word32Equal(Word32And(lead, Int32Constant(0xFC00)),
1786 Int32Constant(0xD800)),
1787 &out);
1788
1789 Node* const trail = StringCharCodeAt(string, untagged_plus_one);
1790 GotoIfNot(Word32Equal(Word32And(trail, Int32Constant(0xFC00)),
1791 Int32Constant(0xDC00)),
1792 &out);
1793
1794 // At a surrogate pair, return index + 2.
1795 Node* const index_plus_two = NumberInc(index_plus_one);
1796 var_result.Bind(index_plus_two);
1797
1798 Goto(&out);
1799 }
1800
1801 BIND(&out);
1802 return var_result.value();
1803 }
1804
RegExpPrototypeMatchBody(Node * const context,Node * const regexp,TNode<String> string,const bool is_fastpath)1805 void RegExpBuiltinsAssembler::RegExpPrototypeMatchBody(Node* const context,
1806 Node* const regexp,
1807 TNode<String> string,
1808 const bool is_fastpath) {
1809 if (is_fastpath) CSA_ASSERT(this, IsFastRegExp(context, regexp));
1810
1811 Node* const int_zero = IntPtrConstant(0);
1812 Node* const smi_zero = SmiConstant(0);
1813 Node* const is_global =
1814 FlagGetter(context, regexp, JSRegExp::kGlobal, is_fastpath);
1815
1816 Label if_isglobal(this), if_isnotglobal(this);
1817 Branch(is_global, &if_isglobal, &if_isnotglobal);
1818
1819 BIND(&if_isnotglobal);
1820 {
1821 Node* const result =
1822 is_fastpath ? RegExpPrototypeExecBody(context, regexp, string, true)
1823 : RegExpExec(context, regexp, string);
1824 Return(result);
1825 }
1826
1827 BIND(&if_isglobal);
1828 {
1829 Node* const is_unicode =
1830 FlagGetter(context, regexp, JSRegExp::kUnicode, is_fastpath);
1831
1832 StoreLastIndex(context, regexp, smi_zero, is_fastpath);
1833
1834 // Allocate an array to store the resulting match strings.
1835
1836 GrowableFixedArray array(state());
1837
1838 // Loop preparations. Within the loop, collect results from RegExpExec
1839 // and store match strings in the array.
1840
1841 Variable* vars[] = {array.var_array(), array.var_length(),
1842 array.var_capacity()};
1843 Label loop(this, 3, vars), out(this);
1844 Goto(&loop);
1845
1846 BIND(&loop);
1847 {
1848 VARIABLE(var_match, MachineRepresentation::kTagged);
1849
1850 Label if_didmatch(this), if_didnotmatch(this);
1851 if (is_fastpath) {
1852 // On the fast path, grab the matching string from the raw match index
1853 // array.
1854 Node* const match_indices = RegExpPrototypeExecBodyWithoutResult(
1855 context, regexp, string, &if_didnotmatch, true);
1856
1857 Node* const match_from = LoadFixedArrayElement(
1858 match_indices, RegExpMatchInfo::kFirstCaptureIndex);
1859 Node* const match_to = LoadFixedArrayElement(
1860 match_indices, RegExpMatchInfo::kFirstCaptureIndex + 1);
1861
1862 var_match.Bind(
1863 SubString(string, SmiUntag(match_from), SmiUntag(match_to)));
1864 Goto(&if_didmatch);
1865 } else {
1866 DCHECK(!is_fastpath);
1867 Node* const result = RegExpExec(context, regexp, string);
1868
1869 Label load_match(this);
1870 Branch(IsNull(result), &if_didnotmatch, &load_match);
1871
1872 BIND(&load_match);
1873 var_match.Bind(
1874 ToString_Inline(context, GetProperty(context, result, smi_zero)));
1875 Goto(&if_didmatch);
1876 }
1877
1878 BIND(&if_didnotmatch);
1879 {
1880 // Return null if there were no matches, otherwise just exit the loop.
1881 GotoIfNot(IntPtrEqual(array.length(), int_zero), &out);
1882 Return(NullConstant());
1883 }
1884
1885 BIND(&if_didmatch);
1886 {
1887 Node* match = var_match.value();
1888
1889 // Store the match, growing the fixed array if needed.
1890
1891 array.Push(CAST(match));
1892
1893 // Advance last index if the match is the empty string.
1894
1895 TNode<Smi> const match_length = LoadStringLengthAsSmi(match);
1896 GotoIfNot(SmiEqual(match_length, SmiConstant(0)), &loop);
1897
1898 Node* last_index = LoadLastIndex(context, regexp, is_fastpath);
1899 if (is_fastpath) {
1900 CSA_ASSERT(this, TaggedIsPositiveSmi(last_index));
1901 } else {
1902 last_index = ToLength_Inline(context, last_index);
1903 }
1904
1905 Node* const new_last_index =
1906 AdvanceStringIndex(string, last_index, is_unicode, is_fastpath);
1907
1908 if (is_fastpath) {
1909 // On the fast path, we can be certain that lastIndex can never be
1910 // incremented to overflow the Smi range since the maximal string
1911 // length is less than the maximal Smi value.
1912 STATIC_ASSERT(String::kMaxLength < Smi::kMaxValue);
1913 CSA_ASSERT(this, TaggedIsPositiveSmi(new_last_index));
1914 }
1915
1916 StoreLastIndex(context, regexp, new_last_index, is_fastpath);
1917
1918 Goto(&loop);
1919 }
1920 }
1921
1922 BIND(&out);
1923 {
1924 // Wrap the match in a JSArray.
1925
1926 Node* const result = array.ToJSArray(CAST(context));
1927 Return(result);
1928 }
1929 }
1930 }
1931
1932 // ES#sec-regexp.prototype-@@match
1933 // RegExp.prototype [ @@match ] ( string )
TF_BUILTIN(RegExpPrototypeMatch,RegExpBuiltinsAssembler)1934 TF_BUILTIN(RegExpPrototypeMatch, RegExpBuiltinsAssembler) {
1935 Node* const maybe_receiver = Parameter(Descriptor::kReceiver);
1936 Node* const maybe_string = Parameter(Descriptor::kString);
1937 Node* const context = Parameter(Descriptor::kContext);
1938
1939 // Ensure {maybe_receiver} is a JSReceiver.
1940 ThrowIfNotJSReceiver(context, maybe_receiver,
1941 MessageTemplate::kIncompatibleMethodReceiver,
1942 "RegExp.prototype.@@match");
1943 Node* const receiver = maybe_receiver;
1944
1945 // Convert {maybe_string} to a String.
1946 TNode<String> const string = ToString_Inline(context, maybe_string);
1947
1948 Label fast_path(this), slow_path(this);
1949 BranchIfFastRegExp(context, receiver, &fast_path, &slow_path);
1950
1951 BIND(&fast_path);
1952 // TODO(pwong): Could be optimized to remove the overhead of calling the
1953 // builtin (at the cost of a larger builtin).
1954 Return(CallBuiltin(Builtins::kRegExpMatchFast, context, receiver, string));
1955
1956 BIND(&slow_path);
1957 RegExpPrototypeMatchBody(context, receiver, string, false);
1958 }
1959
MatchAllIterator(TNode<Context> context,TNode<Context> native_context,TNode<Object> maybe_regexp,TNode<String> string,TNode<BoolT> is_fast_regexp,char const * method_name)1960 TNode<Object> RegExpBuiltinsAssembler::MatchAllIterator(
1961 TNode<Context> context, TNode<Context> native_context,
1962 TNode<Object> maybe_regexp, TNode<String> string,
1963 TNode<BoolT> is_fast_regexp, char const* method_name) {
1964 Label create_iterator(this), if_fast_regexp(this),
1965 if_slow_regexp(this, Label::kDeferred), if_not_regexp(this),
1966 throw_type_error(this, Label::kDeferred);
1967
1968 // 1. Let S be ? ToString(O).
1969 // Handled by the caller of MatchAllIterator.
1970 CSA_ASSERT(this, IsString(string));
1971
1972 TVARIABLE(Object, var_matcher);
1973 TVARIABLE(Int32T, var_global);
1974 TVARIABLE(Int32T, var_unicode);
1975
1976 // 2. If ? IsRegExp(R) is true, then
1977 GotoIf(is_fast_regexp, &if_fast_regexp);
1978 Branch(IsRegExp(context, maybe_regexp), &if_slow_regexp, &if_not_regexp);
1979 BIND(&if_fast_regexp);
1980 {
1981 CSA_ASSERT(this, IsFastRegExp(context, maybe_regexp));
1982 TNode<JSRegExp> fast_regexp = CAST(maybe_regexp);
1983 TNode<Object> source =
1984 LoadObjectField(fast_regexp, JSRegExp::kSourceOffset);
1985 TNode<String> flags = CAST(FlagsGetter(context, fast_regexp, true));
1986
1987 // c. Let matcher be ? Construct(C, « R, flags »).
1988 var_matcher = RegExpCreate(context, native_context, source, flags);
1989 CSA_ASSERT(this, IsFastRegExp(context, var_matcher.value()));
1990
1991 // d. Let global be ? ToBoolean(? Get(matcher, "global")).
1992 var_global = UncheckedCast<Int32T>(
1993 FastFlagGetter(var_matcher.value(), JSRegExp::kGlobal));
1994
1995 // e. Let fullUnicode be ? ToBoolean(? Get(matcher, "unicode").
1996 var_unicode = UncheckedCast<Int32T>(
1997 FastFlagGetter(var_matcher.value(), JSRegExp::kUnicode));
1998
1999 // f. Let lastIndex be ? ToLength(? Get(R, "lastIndex")).
2000 // g. Perform ? Set(matcher, "lastIndex", lastIndex, true).
2001 FastStoreLastIndex(var_matcher.value(), FastLoadLastIndex(fast_regexp));
2002 Goto(&create_iterator);
2003 }
2004 BIND(&if_slow_regexp);
2005 {
2006 // a. Let C be ? SpeciesConstructor(R, %RegExp%).
2007 TNode<Object> regexp_fun =
2008 LoadContextElement(native_context, Context::REGEXP_FUNCTION_INDEX);
2009 TNode<Object> species_constructor =
2010 SpeciesConstructor(native_context, maybe_regexp, regexp_fun);
2011
2012 // b. Let flags be ? ToString(? Get(R, "flags")).
2013 TNode<Object> flags = GetProperty(context, maybe_regexp,
2014 isolate()->factory()->flags_string());
2015 TNode<String> flags_string = ToString_Inline(context, flags);
2016
2017 // c. Let matcher be ? Construct(C, « R, flags »).
2018 var_matcher =
2019 CAST(ConstructJS(CodeFactory::Construct(isolate()), context,
2020 species_constructor, maybe_regexp, flags_string));
2021
2022 // d. Let global be ? ToBoolean(? Get(matcher, "global")).
2023 var_global = UncheckedCast<Int32T>(
2024 SlowFlagGetter(context, var_matcher.value(), JSRegExp::kGlobal));
2025
2026 // e. Let fullUnicode be ? ToBoolean(? Get(matcher, "unicode").
2027 var_unicode = UncheckedCast<Int32T>(
2028 SlowFlagGetter(context, var_matcher.value(), JSRegExp::kUnicode));
2029
2030 // f. Let lastIndex be ? ToLength(? Get(R, "lastIndex")).
2031 TNode<Number> last_index = UncheckedCast<Number>(
2032 ToLength_Inline(context, SlowLoadLastIndex(context, maybe_regexp)));
2033
2034 // g. Perform ? Set(matcher, "lastIndex", lastIndex, true).
2035 SlowStoreLastIndex(context, var_matcher.value(), last_index);
2036
2037 Goto(&create_iterator);
2038 }
2039 // 3. Else,
2040 BIND(&if_not_regexp);
2041 {
2042 // a. Let flags be "g".
2043 // b. Let matcher be ? RegExpCreate(R, flags).
2044 var_matcher = RegExpCreate(context, native_context, maybe_regexp,
2045 StringConstant("g"));
2046
2047 // d. Let global be true.
2048 var_global = Int32Constant(1);
2049
2050 // e. Let fullUnicode be false.
2051 var_unicode = Int32Constant(0);
2052
2053 Label if_matcher_slow_regexp(this, Label::kDeferred);
2054 BranchIfFastRegExp(context, var_matcher.value(), &create_iterator,
2055 &if_matcher_slow_regexp);
2056 BIND(&if_matcher_slow_regexp);
2057 {
2058 // c. If ? IsRegExp(matcher) is not true, throw a TypeError exception.
2059 GotoIfNot(IsRegExp(context, var_matcher.value()), &throw_type_error);
2060
2061 // f. If ? Get(matcher, "lastIndex") is not 0, throw a TypeError
2062 // exception.
2063 TNode<Object> last_index =
2064 CAST(LoadLastIndex(context, var_matcher.value(), false));
2065 Branch(WordEqual(SmiConstant(0), last_index), &create_iterator,
2066 &throw_type_error);
2067 }
2068 }
2069 BIND(&throw_type_error);
2070 {
2071 ThrowTypeError(context, MessageTemplate::kIncompatibleMethodReceiver,
2072 StringConstant(method_name), maybe_regexp);
2073 }
2074 // 4. Return ! CreateRegExpStringIterator(matcher, S, global, fullUnicode).
2075 // CreateRegExpStringIterator ( R, S, global, fullUnicode )
2076 BIND(&create_iterator);
2077 {
2078 TNode<Map> map = CAST(LoadContextElement(
2079 native_context,
2080 Context::INITIAL_REGEXP_STRING_ITERATOR_PROTOTYPE_MAP_INDEX));
2081
2082 // 4. Let iterator be ObjectCreate(%RegExpStringIteratorPrototype%, «
2083 // [[IteratingRegExp]], [[IteratedString]], [[Global]], [[Unicode]],
2084 // [[Done]] »).
2085 TNode<Object> iterator = CAST(Allocate(JSRegExpStringIterator::kSize));
2086 StoreMapNoWriteBarrier(iterator, map);
2087 StoreObjectFieldRoot(iterator,
2088 JSRegExpStringIterator::kPropertiesOrHashOffset,
2089 Heap::kEmptyFixedArrayRootIndex);
2090 StoreObjectFieldRoot(iterator, JSRegExpStringIterator::kElementsOffset,
2091 Heap::kEmptyFixedArrayRootIndex);
2092
2093 // 5. Set iterator.[[IteratingRegExp]] to R.
2094 StoreObjectFieldNoWriteBarrier(
2095 iterator, JSRegExpStringIterator::kIteratingRegExpOffset,
2096 var_matcher.value());
2097
2098 // 6. Set iterator.[[IteratedString]] to S.
2099 StoreObjectFieldNoWriteBarrier(
2100 iterator, JSRegExpStringIterator::kIteratedStringOffset, string);
2101
2102 #ifdef DEBUG
2103 // Verify global and unicode can be bitwise shifted without masking.
2104 TNode<Int32T> zero = Int32Constant(0);
2105 TNode<Int32T> one = Int32Constant(1);
2106 CSA_ASSERT(this, Word32Or(Word32Equal(var_global.value(), zero),
2107 Word32Equal(var_global.value(), one)));
2108 CSA_ASSERT(this, Word32Or(Word32Equal(var_unicode.value(), zero),
2109 Word32Equal(var_unicode.value(), one)));
2110 #endif // DEBUG
2111
2112 // 7. Set iterator.[[Global]] to global.
2113 // 8. Set iterator.[[Unicode]] to fullUnicode.
2114 // 9. Set iterator.[[Done]] to false.
2115 TNode<Word32T> global_flag = Word32Shl(
2116 var_global.value(), Int32Constant(JSRegExpStringIterator::kGlobalBit));
2117 TNode<Word32T> unicode_flag =
2118 Word32Shl(var_unicode.value(),
2119 Int32Constant(JSRegExpStringIterator::kUnicodeBit));
2120 TNode<Word32T> iterator_flags = Word32Or(global_flag, unicode_flag);
2121 StoreObjectFieldNoWriteBarrier(iterator,
2122 JSRegExpStringIterator::kFlagsOffset,
2123 SmiFromInt32(Signed(iterator_flags)));
2124
2125 return iterator;
2126 }
2127 }
2128
2129 // https://tc39.github.io/proposal-string-matchall/
2130 // RegExp.prototype [ @@matchAll ] ( string )
TF_BUILTIN(RegExpPrototypeMatchAll,RegExpBuiltinsAssembler)2131 TF_BUILTIN(RegExpPrototypeMatchAll, RegExpBuiltinsAssembler) {
2132 TNode<Context> context = CAST(Parameter(Descriptor::kContext));
2133 TNode<Context> native_context = LoadNativeContext(context);
2134 TNode<Object> receiver = CAST(Parameter(Descriptor::kReceiver));
2135 TNode<Object> string = CAST(Parameter(Descriptor::kString));
2136
2137 // 1. Let R be the this value.
2138 // 2. If Type(R) is not Object, throw a TypeError exception.
2139 ThrowIfNotJSReceiver(context, receiver,
2140 MessageTemplate::kIncompatibleMethodReceiver,
2141 "RegExp.prototype.@@matchAll");
2142
2143 // 3. Return ? MatchAllIterator(R, string).
2144 Return(MatchAllIterator(
2145 context, native_context, receiver, ToString_Inline(context, string),
2146 IsFastRegExp(context, receiver), "RegExp.prototype.@@matchAll"));
2147 }
2148
2149 // Helper that skips a few initial checks. and assumes...
2150 // 1) receiver is a "fast" RegExp
2151 // 2) pattern is a string
TF_BUILTIN(RegExpMatchFast,RegExpBuiltinsAssembler)2152 TF_BUILTIN(RegExpMatchFast, RegExpBuiltinsAssembler) {
2153 Node* const receiver = Parameter(Descriptor::kReceiver);
2154 TNode<String> const string = CAST(Parameter(Descriptor::kPattern));
2155 Node* const context = Parameter(Descriptor::kContext);
2156
2157 RegExpPrototypeMatchBody(context, receiver, string, true);
2158 }
2159
RegExpPrototypeSearchBodyFast(Node * const context,Node * const regexp,Node * const string)2160 void RegExpBuiltinsAssembler::RegExpPrototypeSearchBodyFast(
2161 Node* const context, Node* const regexp, Node* const string) {
2162 CSA_ASSERT(this, IsFastRegExp(context, regexp));
2163 CSA_ASSERT(this, IsString(string));
2164
2165 // Grab the initial value of last index.
2166 Node* const previous_last_index = FastLoadLastIndex(regexp);
2167
2168 // Ensure last index is 0.
2169 FastStoreLastIndex(regexp, SmiConstant(0));
2170
2171 // Call exec.
2172 Label if_didnotmatch(this);
2173 Node* const match_indices = RegExpPrototypeExecBodyWithoutResult(
2174 context, regexp, string, &if_didnotmatch, true);
2175
2176 // Successful match.
2177 {
2178 // Reset last index.
2179 FastStoreLastIndex(regexp, previous_last_index);
2180
2181 // Return the index of the match.
2182 Node* const index = LoadFixedArrayElement(
2183 match_indices, RegExpMatchInfo::kFirstCaptureIndex);
2184 Return(index);
2185 }
2186
2187 BIND(&if_didnotmatch);
2188 {
2189 // Reset last index and return -1.
2190 FastStoreLastIndex(regexp, previous_last_index);
2191 Return(SmiConstant(-1));
2192 }
2193 }
2194
RegExpPrototypeSearchBodySlow(Node * const context,Node * const regexp,Node * const string)2195 void RegExpBuiltinsAssembler::RegExpPrototypeSearchBodySlow(
2196 Node* const context, Node* const regexp, Node* const string) {
2197 CSA_ASSERT(this, IsJSReceiver(regexp));
2198 CSA_ASSERT(this, IsString(string));
2199
2200 Isolate* const isolate = this->isolate();
2201
2202 Node* const smi_zero = SmiConstant(0);
2203
2204 // Grab the initial value of last index.
2205 Node* const previous_last_index = SlowLoadLastIndex(context, regexp);
2206
2207 // Ensure last index is 0.
2208 {
2209 Label next(this), slow(this, Label::kDeferred);
2210 BranchIfSameValue(previous_last_index, smi_zero, &next, &slow);
2211
2212 BIND(&slow);
2213 SlowStoreLastIndex(context, regexp, smi_zero);
2214 Goto(&next);
2215 BIND(&next);
2216 }
2217
2218 // Call exec.
2219 Node* const exec_result = RegExpExec(context, regexp, string);
2220
2221 // Reset last index if necessary.
2222 {
2223 Label next(this), slow(this, Label::kDeferred);
2224 Node* const current_last_index = SlowLoadLastIndex(context, regexp);
2225
2226 BranchIfSameValue(current_last_index, previous_last_index, &next, &slow);
2227
2228 BIND(&slow);
2229 SlowStoreLastIndex(context, regexp, previous_last_index);
2230 Goto(&next);
2231 BIND(&next);
2232 }
2233
2234 // Return -1 if no match was found.
2235 {
2236 Label next(this);
2237 GotoIfNot(IsNull(exec_result), &next);
2238 Return(SmiConstant(-1));
2239 BIND(&next);
2240 }
2241
2242 // Return the index of the match.
2243 {
2244 Label fast_result(this), slow_result(this, Label::kDeferred);
2245 BranchIfFastRegExpResult(context, exec_result, &fast_result, &slow_result);
2246
2247 BIND(&fast_result);
2248 {
2249 Node* const index =
2250 LoadObjectField(exec_result, JSRegExpResult::kIndexOffset);
2251 Return(index);
2252 }
2253
2254 BIND(&slow_result);
2255 {
2256 Return(GetProperty(context, exec_result,
2257 isolate->factory()->index_string()));
2258 }
2259 }
2260 }
2261
2262 // ES#sec-regexp.prototype-@@search
2263 // RegExp.prototype [ @@search ] ( string )
TF_BUILTIN(RegExpPrototypeSearch,RegExpBuiltinsAssembler)2264 TF_BUILTIN(RegExpPrototypeSearch, RegExpBuiltinsAssembler) {
2265 Node* const maybe_receiver = Parameter(Descriptor::kReceiver);
2266 Node* const maybe_string = Parameter(Descriptor::kString);
2267 Node* const context = Parameter(Descriptor::kContext);
2268
2269 // Ensure {maybe_receiver} is a JSReceiver.
2270 ThrowIfNotJSReceiver(context, maybe_receiver,
2271 MessageTemplate::kIncompatibleMethodReceiver,
2272 "RegExp.prototype.@@search");
2273 Node* const receiver = maybe_receiver;
2274
2275 // Convert {maybe_string} to a String.
2276 TNode<String> const string = ToString_Inline(context, maybe_string);
2277
2278 Label fast_path(this), slow_path(this);
2279 BranchIfFastRegExp(context, receiver, &fast_path, &slow_path);
2280
2281 BIND(&fast_path);
2282 // TODO(pwong): Could be optimized to remove the overhead of calling the
2283 // builtin (at the cost of a larger builtin).
2284 Return(CallBuiltin(Builtins::kRegExpSearchFast, context, receiver, string));
2285
2286 BIND(&slow_path);
2287 RegExpPrototypeSearchBodySlow(context, receiver, string);
2288 }
2289
2290 // Helper that skips a few initial checks. and assumes...
2291 // 1) receiver is a "fast" RegExp
2292 // 2) pattern is a string
TF_BUILTIN(RegExpSearchFast,RegExpBuiltinsAssembler)2293 TF_BUILTIN(RegExpSearchFast, RegExpBuiltinsAssembler) {
2294 Node* const receiver = Parameter(Descriptor::kReceiver);
2295 Node* const string = Parameter(Descriptor::kPattern);
2296 Node* const context = Parameter(Descriptor::kContext);
2297
2298 RegExpPrototypeSearchBodyFast(context, receiver, string);
2299 }
2300
2301 // Generates the fast path for @@split. {regexp} is an unmodified, non-sticky
2302 // JSRegExp, {string} is a String, and {limit} is a Smi.
RegExpPrototypeSplitBody(Node * const context,Node * const regexp,TNode<String> string,TNode<Smi> const limit)2303 void RegExpBuiltinsAssembler::RegExpPrototypeSplitBody(Node* const context,
2304 Node* const regexp,
2305 TNode<String> string,
2306 TNode<Smi> const limit) {
2307 CSA_ASSERT(this, IsFastRegExp(context, regexp));
2308 CSA_ASSERT(this, Word32BinaryNot(FastFlagGetter(regexp, JSRegExp::kSticky)));
2309
2310 TNode<Smi> const smi_zero = SmiConstant(0);
2311 TNode<IntPtrT> const int_zero = IntPtrConstant(0);
2312 TNode<IntPtrT> const int_limit = SmiUntag(limit);
2313
2314 const ElementsKind kind = PACKED_ELEMENTS;
2315 const ParameterMode mode = CodeStubAssembler::INTPTR_PARAMETERS;
2316
2317 Node* const allocation_site = nullptr;
2318 Node* const native_context = LoadNativeContext(context);
2319 Node* const array_map = LoadJSArrayElementsMap(kind, native_context);
2320
2321 Label return_empty_array(this, Label::kDeferred);
2322
2323 // If limit is zero, return an empty array.
2324 {
2325 Label next(this), if_limitiszero(this, Label::kDeferred);
2326 Branch(SmiEqual(limit, smi_zero), &return_empty_array, &next);
2327 BIND(&next);
2328 }
2329
2330 TNode<Smi> const string_length = LoadStringLengthAsSmi(string);
2331
2332 // If passed the empty {string}, return either an empty array or a singleton
2333 // array depending on whether the {regexp} matches.
2334 {
2335 Label next(this), if_stringisempty(this, Label::kDeferred);
2336 Branch(SmiEqual(string_length, smi_zero), &if_stringisempty, &next);
2337
2338 BIND(&if_stringisempty);
2339 {
2340 Node* const last_match_info = LoadContextElement(
2341 native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);
2342
2343 Node* const match_indices = RegExpExecInternal(context, regexp, string,
2344 smi_zero, last_match_info);
2345
2346 Label return_singleton_array(this);
2347 Branch(IsNull(match_indices), &return_singleton_array,
2348 &return_empty_array);
2349
2350 BIND(&return_singleton_array);
2351 {
2352 Node* const length = SmiConstant(1);
2353 Node* const capacity = IntPtrConstant(1);
2354 Node* const result = AllocateJSArray(kind, array_map, capacity, length,
2355 allocation_site, mode);
2356
2357 Node* const fixed_array = LoadElements(result);
2358 StoreFixedArrayElement(fixed_array, 0, string);
2359
2360 Return(result);
2361 }
2362 }
2363
2364 BIND(&next);
2365 }
2366
2367 // Loop preparations.
2368
2369 GrowableFixedArray array(state());
2370
2371 TVARIABLE(Smi, var_last_matched_until, smi_zero);
2372 TVARIABLE(Smi, var_next_search_from, smi_zero);
2373
2374 Variable* vars[] = {array.var_array(), array.var_length(),
2375 array.var_capacity(), &var_last_matched_until,
2376 &var_next_search_from};
2377 const int vars_count = sizeof(vars) / sizeof(vars[0]);
2378 Label loop(this, vars_count, vars), push_suffix_and_out(this), out(this);
2379 Goto(&loop);
2380
2381 BIND(&loop);
2382 {
2383 TNode<Smi> const next_search_from = var_next_search_from.value();
2384 TNode<Smi> const last_matched_until = var_last_matched_until.value();
2385
2386 // We're done if we've reached the end of the string.
2387 {
2388 Label next(this);
2389 Branch(SmiEqual(next_search_from, string_length), &push_suffix_and_out,
2390 &next);
2391 BIND(&next);
2392 }
2393
2394 // Search for the given {regexp}.
2395
2396 Node* const last_match_info = LoadContextElement(
2397 native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);
2398
2399 Node* const match_indices = RegExpExecInternal(
2400 context, regexp, string, next_search_from, last_match_info);
2401
2402 // We're done if no match was found.
2403 {
2404 Label next(this);
2405 Branch(IsNull(match_indices), &push_suffix_and_out, &next);
2406 BIND(&next);
2407 }
2408
2409 TNode<Smi> const match_from = CAST(LoadFixedArrayElement(
2410 match_indices, RegExpMatchInfo::kFirstCaptureIndex));
2411
2412 // We're done if the match starts beyond the string.
2413 {
2414 Label next(this);
2415 Branch(SmiEqual(match_from, string_length), &push_suffix_and_out, &next);
2416 BIND(&next);
2417 }
2418
2419 TNode<Smi> const match_to = CAST(LoadFixedArrayElement(
2420 match_indices, RegExpMatchInfo::kFirstCaptureIndex + 1));
2421
2422 // Advance index and continue if the match is empty.
2423 {
2424 Label next(this);
2425
2426 GotoIfNot(SmiEqual(match_to, next_search_from), &next);
2427 GotoIfNot(SmiEqual(match_to, last_matched_until), &next);
2428
2429 Node* const is_unicode = FastFlagGetter(regexp, JSRegExp::kUnicode);
2430 Node* const new_next_search_from =
2431 AdvanceStringIndex(string, next_search_from, is_unicode, true);
2432 var_next_search_from = CAST(new_next_search_from);
2433 Goto(&loop);
2434
2435 BIND(&next);
2436 }
2437
2438 // A valid match was found, add the new substring to the array.
2439 {
2440 TNode<Smi> const from = last_matched_until;
2441 TNode<Smi> const to = match_from;
2442 array.Push(SubString(string, SmiUntag(from), SmiUntag(to)));
2443 GotoIf(WordEqual(array.length(), int_limit), &out);
2444 }
2445
2446 // Add all captures to the array.
2447 {
2448 Node* const num_registers = LoadFixedArrayElement(
2449 match_indices, RegExpMatchInfo::kNumberOfCapturesIndex);
2450 Node* const int_num_registers = SmiUntag(num_registers);
2451
2452 VARIABLE(var_reg, MachineType::PointerRepresentation());
2453 var_reg.Bind(IntPtrConstant(2));
2454
2455 Variable* vars[] = {array.var_array(), array.var_length(),
2456 array.var_capacity(), &var_reg};
2457 const int vars_count = sizeof(vars) / sizeof(vars[0]);
2458 Label nested_loop(this, vars_count, vars), nested_loop_out(this);
2459 Branch(IntPtrLessThan(var_reg.value(), int_num_registers), &nested_loop,
2460 &nested_loop_out);
2461
2462 BIND(&nested_loop);
2463 {
2464 Node* const reg = var_reg.value();
2465 Node* const from = LoadFixedArrayElement(
2466 match_indices, reg,
2467 RegExpMatchInfo::kFirstCaptureIndex * kPointerSize, mode);
2468 TNode<Smi> const to = CAST(LoadFixedArrayElement(
2469 match_indices, reg,
2470 (RegExpMatchInfo::kFirstCaptureIndex + 1) * kPointerSize, mode));
2471
2472 Label select_capture(this), select_undefined(this), store_value(this);
2473 VARIABLE(var_value, MachineRepresentation::kTagged);
2474 Branch(SmiEqual(to, SmiConstant(-1)), &select_undefined,
2475 &select_capture);
2476
2477 BIND(&select_capture);
2478 {
2479 var_value.Bind(SubString(string, SmiUntag(from), SmiUntag(to)));
2480 Goto(&store_value);
2481 }
2482
2483 BIND(&select_undefined);
2484 {
2485 var_value.Bind(UndefinedConstant());
2486 Goto(&store_value);
2487 }
2488
2489 BIND(&store_value);
2490 {
2491 array.Push(CAST(var_value.value()));
2492 GotoIf(WordEqual(array.length(), int_limit), &out);
2493
2494 Node* const new_reg = IntPtrAdd(reg, IntPtrConstant(2));
2495 var_reg.Bind(new_reg);
2496
2497 Branch(IntPtrLessThan(new_reg, int_num_registers), &nested_loop,
2498 &nested_loop_out);
2499 }
2500 }
2501
2502 BIND(&nested_loop_out);
2503 }
2504
2505 var_last_matched_until = match_to;
2506 var_next_search_from = match_to;
2507 Goto(&loop);
2508 }
2509
2510 BIND(&push_suffix_and_out);
2511 {
2512 Node* const from = var_last_matched_until.value();
2513 Node* const to = string_length;
2514 array.Push(SubString(string, SmiUntag(from), SmiUntag(to)));
2515 Goto(&out);
2516 }
2517
2518 BIND(&out);
2519 {
2520 Node* const result = array.ToJSArray(CAST(context));
2521 Return(result);
2522 }
2523
2524 BIND(&return_empty_array);
2525 {
2526 Node* const length = smi_zero;
2527 Node* const capacity = int_zero;
2528 Node* const result = AllocateJSArray(kind, array_map, capacity, length,
2529 allocation_site, mode);
2530 Return(result);
2531 }
2532 }
2533
2534 // Helper that skips a few initial checks.
TF_BUILTIN(RegExpSplit,RegExpBuiltinsAssembler)2535 TF_BUILTIN(RegExpSplit, RegExpBuiltinsAssembler) {
2536 Node* const regexp = Parameter(Descriptor::kRegExp);
2537 TNode<String> const string = CAST(Parameter(Descriptor::kString));
2538 Node* const maybe_limit = Parameter(Descriptor::kLimit);
2539 Node* const context = Parameter(Descriptor::kContext);
2540
2541 CSA_ASSERT(this, IsFastRegExp(context, regexp));
2542
2543 // TODO(jgruber): Even if map checks send us to the fast path, we still need
2544 // to verify the constructor property and jump to the slow path if it has
2545 // been changed.
2546
2547 // Verify {maybe_limit}.
2548
2549 VARIABLE(var_limit, MachineRepresentation::kTagged, maybe_limit);
2550 Label if_limitissmimax(this), runtime(this, Label::kDeferred);
2551
2552 {
2553 Label next(this);
2554
2555 GotoIf(IsUndefined(maybe_limit), &if_limitissmimax);
2556 Branch(TaggedIsPositiveSmi(maybe_limit), &next, &runtime);
2557
2558 // We need to be extra-strict and require the given limit to be either
2559 // undefined or a positive smi. We can't call ToUint32(maybe_limit) since
2560 // that might move us onto the slow path, resulting in ordering spec
2561 // violations (see https://crbug.com/801171).
2562
2563 BIND(&if_limitissmimax);
2564 {
2565 // TODO(jgruber): In this case, we can probably avoid generation of limit
2566 // checks in Generate_RegExpPrototypeSplitBody.
2567 var_limit.Bind(SmiConstant(Smi::kMaxValue));
2568 Goto(&next);
2569 }
2570
2571 BIND(&next);
2572 }
2573
2574 // Due to specific shortcuts we take on the fast path (specifically, we don't
2575 // allocate a new regexp instance as specced), we need to ensure that the
2576 // given regexp is non-sticky to avoid invalid results. See crbug.com/v8/6706.
2577
2578 GotoIf(FastFlagGetter(regexp, JSRegExp::kSticky), &runtime);
2579
2580 // We're good to go on the fast path, which is inlined here.
2581
2582 RegExpPrototypeSplitBody(context, regexp, string, CAST(var_limit.value()));
2583
2584 BIND(&runtime);
2585 Return(CallRuntime(Runtime::kRegExpSplit, context, regexp, string,
2586 var_limit.value()));
2587 }
2588
2589 // ES#sec-regexp.prototype-@@split
2590 // RegExp.prototype [ @@split ] ( string, limit )
TF_BUILTIN(RegExpPrototypeSplit,RegExpBuiltinsAssembler)2591 TF_BUILTIN(RegExpPrototypeSplit, RegExpBuiltinsAssembler) {
2592 const int kStringArg = 0;
2593 const int kLimitArg = 1;
2594
2595 Node* argc =
2596 ChangeInt32ToIntPtr(Parameter(BuiltinDescriptor::kArgumentsCount));
2597 CodeStubArguments args(this, argc);
2598
2599 Node* const maybe_receiver = args.GetReceiver();
2600 Node* const maybe_string = args.GetOptionalArgumentValue(kStringArg);
2601 Node* const maybe_limit = args.GetOptionalArgumentValue(kLimitArg);
2602 Node* const context = Parameter(BuiltinDescriptor::kContext);
2603
2604 // Ensure {maybe_receiver} is a JSReceiver.
2605 ThrowIfNotJSReceiver(context, maybe_receiver,
2606 MessageTemplate::kIncompatibleMethodReceiver,
2607 "RegExp.prototype.@@split");
2608 Node* const receiver = maybe_receiver;
2609
2610 // Convert {maybe_string} to a String.
2611 TNode<String> const string = ToString_Inline(context, maybe_string);
2612
2613 Label stub(this), runtime(this, Label::kDeferred);
2614 BranchIfFastRegExp(context, receiver, &stub, &runtime);
2615
2616 BIND(&stub);
2617 args.PopAndReturn(CallBuiltin(Builtins::kRegExpSplit, context, receiver,
2618 string, maybe_limit));
2619
2620 BIND(&runtime);
2621 args.PopAndReturn(CallRuntime(Runtime::kRegExpSplit, context, receiver,
2622 string, maybe_limit));
2623 }
2624
ReplaceGlobalCallableFastPath(Node * context,Node * regexp,Node * string,Node * replace_callable)2625 Node* RegExpBuiltinsAssembler::ReplaceGlobalCallableFastPath(
2626 Node* context, Node* regexp, Node* string, Node* replace_callable) {
2627 // The fast path is reached only if {receiver} is a global unmodified
2628 // JSRegExp instance and {replace_callable} is callable.
2629
2630 CSA_ASSERT(this, IsFastRegExp(context, regexp));
2631 CSA_ASSERT(this, IsCallable(replace_callable));
2632 CSA_ASSERT(this, IsString(string));
2633
2634 Isolate* const isolate = this->isolate();
2635
2636 Node* const undefined = UndefinedConstant();
2637 TNode<IntPtrT> const int_zero = IntPtrConstant(0);
2638 TNode<IntPtrT> const int_one = IntPtrConstant(1);
2639 TNode<Smi> const smi_zero = SmiConstant(0);
2640
2641 Node* const native_context = LoadNativeContext(context);
2642
2643 Label out(this);
2644 VARIABLE(var_result, MachineRepresentation::kTagged);
2645
2646 // Set last index to 0.
2647 FastStoreLastIndex(regexp, smi_zero);
2648
2649 // Allocate {result_array}.
2650 Node* result_array;
2651 {
2652 ElementsKind kind = PACKED_ELEMENTS;
2653 Node* const array_map = LoadJSArrayElementsMap(kind, native_context);
2654 TNode<IntPtrT> const capacity = IntPtrConstant(16);
2655 TNode<Smi> const length = smi_zero;
2656 Node* const allocation_site = nullptr;
2657 ParameterMode capacity_mode = CodeStubAssembler::INTPTR_PARAMETERS;
2658
2659 result_array = AllocateJSArray(kind, array_map, capacity, length,
2660 allocation_site, capacity_mode);
2661 }
2662
2663 // Call into runtime for RegExpExecMultiple.
2664 Node* last_match_info =
2665 LoadContextElement(native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);
2666 Node* const res = CallRuntime(Runtime::kRegExpExecMultiple, context, regexp,
2667 string, last_match_info, result_array);
2668
2669 // Reset last index to 0.
2670 FastStoreLastIndex(regexp, smi_zero);
2671
2672 // If no matches, return the subject string.
2673 var_result.Bind(string);
2674 GotoIf(IsNull(res), &out);
2675
2676 // Reload last match info since it might have changed.
2677 last_match_info =
2678 LoadContextElement(native_context, Context::REGEXP_LAST_MATCH_INFO_INDEX);
2679
2680 Node* const res_length = LoadJSArrayLength(res);
2681 Node* const res_elems = LoadElements(res);
2682 CSA_ASSERT(this, HasInstanceType(res_elems, FIXED_ARRAY_TYPE));
2683
2684 TNode<Smi> const num_capture_registers = CAST(LoadFixedArrayElement(
2685 last_match_info, RegExpMatchInfo::kNumberOfCapturesIndex));
2686
2687 Label if_hasexplicitcaptures(this), if_noexplicitcaptures(this),
2688 create_result(this);
2689 Branch(SmiEqual(num_capture_registers, SmiConstant(2)),
2690 &if_noexplicitcaptures, &if_hasexplicitcaptures);
2691
2692 BIND(&if_noexplicitcaptures);
2693 {
2694 // If the number of captures is two then there are no explicit captures in
2695 // the regexp, just the implicit capture that captures the whole match. In
2696 // this case we can simplify quite a bit and end up with something faster.
2697 // The builder will consist of some integers that indicate slices of the
2698 // input string and some replacements that were returned from the replace
2699 // function.
2700
2701 TVARIABLE(Smi, var_match_start, smi_zero);
2702
2703 TNode<IntPtrT> const end = SmiUntag(res_length);
2704 TVARIABLE(IntPtrT, var_i, int_zero);
2705
2706 Variable* vars[] = {&var_i, &var_match_start};
2707 Label loop(this, 2, vars);
2708 Goto(&loop);
2709 BIND(&loop);
2710 {
2711 GotoIfNot(IntPtrLessThan(var_i.value(), end), &create_result);
2712
2713 Node* const elem = LoadFixedArrayElement(res_elems, var_i.value());
2714
2715 Label if_issmi(this), if_isstring(this), loop_epilogue(this);
2716 Branch(TaggedIsSmi(elem), &if_issmi, &if_isstring);
2717
2718 BIND(&if_issmi);
2719 {
2720 TNode<Smi> smi_elem = CAST(elem);
2721 // Integers represent slices of the original string.
2722 Label if_isnegativeorzero(this), if_ispositive(this);
2723 BranchIfSmiLessThanOrEqual(smi_elem, smi_zero, &if_isnegativeorzero,
2724 &if_ispositive);
2725
2726 BIND(&if_ispositive);
2727 {
2728 TNode<IntPtrT> int_elem = SmiUntag(smi_elem);
2729 TNode<IntPtrT> new_match_start =
2730 Signed(IntPtrAdd(WordShr(int_elem, IntPtrConstant(11)),
2731 WordAnd(int_elem, IntPtrConstant(0x7FF))));
2732 var_match_start = SmiTag(new_match_start);
2733 Goto(&loop_epilogue);
2734 }
2735
2736 BIND(&if_isnegativeorzero);
2737 {
2738 var_i = IntPtrAdd(var_i.value(), int_one);
2739
2740 TNode<Smi> const next_elem =
2741 CAST(LoadFixedArrayElement(res_elems, var_i.value()));
2742
2743 var_match_start = SmiSub(next_elem, smi_elem);
2744 Goto(&loop_epilogue);
2745 }
2746 }
2747
2748 BIND(&if_isstring);
2749 {
2750 CSA_ASSERT(this, IsString(elem));
2751
2752 Callable call_callable = CodeFactory::Call(isolate);
2753 TNode<Smi> match_start = var_match_start.value();
2754 Node* const replacement_obj =
2755 CallJS(call_callable, context, replace_callable, undefined, elem,
2756 match_start, string);
2757
2758 TNode<String> const replacement_str =
2759 ToString_Inline(context, replacement_obj);
2760 StoreFixedArrayElement(res_elems, var_i.value(), replacement_str);
2761
2762 TNode<Smi> const elem_length = LoadStringLengthAsSmi(elem);
2763 var_match_start = SmiAdd(match_start, elem_length);
2764
2765 Goto(&loop_epilogue);
2766 }
2767
2768 BIND(&loop_epilogue);
2769 {
2770 var_i = IntPtrAdd(var_i.value(), int_one);
2771 Goto(&loop);
2772 }
2773 }
2774 }
2775
2776 BIND(&if_hasexplicitcaptures);
2777 {
2778 Node* const from = int_zero;
2779 Node* const to = SmiUntag(res_length);
2780 const int increment = 1;
2781
2782 BuildFastLoop(from, to,
2783 [this, res_elems, isolate, native_context, context, undefined,
2784 replace_callable](Node* index) {
2785 Node* const elem = LoadFixedArrayElement(res_elems, index);
2786
2787 Label do_continue(this);
2788 GotoIf(TaggedIsSmi(elem), &do_continue);
2789
2790 // elem must be an Array.
2791 // Use the apply argument as backing for global RegExp
2792 // properties.
2793
2794 CSA_ASSERT(this, HasInstanceType(elem, JS_ARRAY_TYPE));
2795
2796 // TODO(jgruber): Remove indirection through
2797 // Call->ReflectApply.
2798 Callable call_callable = CodeFactory::Call(isolate);
2799 Node* const reflect_apply = LoadContextElement(
2800 native_context, Context::REFLECT_APPLY_INDEX);
2801
2802 Node* const replacement_obj =
2803 CallJS(call_callable, context, reflect_apply, undefined,
2804 replace_callable, undefined, elem);
2805
2806 // Overwrite the i'th element in the results with the string
2807 // we got back from the callback function.
2808
2809 TNode<String> const replacement_str =
2810 ToString_Inline(context, replacement_obj);
2811 StoreFixedArrayElement(res_elems, index, replacement_str);
2812
2813 Goto(&do_continue);
2814 BIND(&do_continue);
2815 },
2816 increment, CodeStubAssembler::INTPTR_PARAMETERS,
2817 CodeStubAssembler::IndexAdvanceMode::kPost);
2818
2819 Goto(&create_result);
2820 }
2821
2822 BIND(&create_result);
2823 {
2824 Node* const result = CallRuntime(Runtime::kStringBuilderConcat, context,
2825 res, res_length, string);
2826 var_result.Bind(result);
2827 Goto(&out);
2828 }
2829
2830 BIND(&out);
2831 return var_result.value();
2832 }
2833
ReplaceSimpleStringFastPath(Node * context,Node * regexp,TNode<String> string,TNode<String> replace_string)2834 Node* RegExpBuiltinsAssembler::ReplaceSimpleStringFastPath(
2835 Node* context, Node* regexp, TNode<String> string,
2836 TNode<String> replace_string) {
2837 // The fast path is reached only if {receiver} is an unmodified
2838 // JSRegExp instance, {replace_value} is non-callable, and
2839 // ToString({replace_value}) does not contain '$', i.e. we're doing a simple
2840 // string replacement.
2841
2842 CSA_ASSERT(this, IsFastRegExp(context, regexp));
2843
2844 TNode<Smi> const smi_zero = SmiConstant(0);
2845 const bool kIsFastPath = true;
2846
2847 TVARIABLE(String, var_result, EmptyStringConstant());
2848 VARIABLE(var_match_indices, MachineRepresentation::kTagged);
2849 VARIABLE(var_last_match_end, MachineRepresentation::kTagged, smi_zero);
2850 VARIABLE(var_is_unicode, MachineRepresentation::kWord32, Int32Constant(0));
2851 Variable* vars[] = {&var_result, &var_last_match_end};
2852 Label out(this), loop(this, 2, vars), loop_end(this),
2853 if_nofurthermatches(this);
2854
2855 // Is {regexp} global?
2856 Node* const is_global = FastFlagGetter(regexp, JSRegExp::kGlobal);
2857 GotoIfNot(is_global, &loop);
2858
2859 var_is_unicode.Bind(FastFlagGetter(regexp, JSRegExp::kUnicode));
2860 FastStoreLastIndex(regexp, smi_zero);
2861 Goto(&loop);
2862
2863 BIND(&loop);
2864 {
2865 var_match_indices.Bind(RegExpPrototypeExecBodyWithoutResult(
2866 context, regexp, string, &if_nofurthermatches, kIsFastPath));
2867
2868 // Successful match.
2869 {
2870 TNode<Smi> const match_start = CAST(LoadFixedArrayElement(
2871 var_match_indices.value(), RegExpMatchInfo::kFirstCaptureIndex));
2872 TNode<Smi> const match_end = CAST(LoadFixedArrayElement(
2873 var_match_indices.value(), RegExpMatchInfo::kFirstCaptureIndex + 1));
2874
2875 Label if_replaceisempty(this), if_replaceisnotempty(this);
2876 TNode<Smi> const replace_length = LoadStringLengthAsSmi(replace_string);
2877 Branch(SmiEqual(replace_length, smi_zero), &if_replaceisempty,
2878 &if_replaceisnotempty);
2879
2880 BIND(&if_replaceisempty);
2881 {
2882 // TODO(jgruber): We could skip many of the checks that using SubString
2883 // here entails.
2884 TNode<String> const first_part =
2885 SubString(string, SmiUntag(var_last_match_end.value()),
2886 SmiUntag(match_start));
2887 var_result = StringAdd(context, var_result.value(), first_part);
2888 Goto(&loop_end);
2889 }
2890
2891 BIND(&if_replaceisnotempty);
2892 {
2893 TNode<String> const first_part =
2894 SubString(string, SmiUntag(var_last_match_end.value()),
2895 SmiUntag(match_start));
2896 TNode<String> result =
2897 StringAdd(context, var_result.value(), first_part);
2898 var_result = StringAdd(context, result, replace_string);
2899 Goto(&loop_end);
2900 }
2901
2902 BIND(&loop_end);
2903 {
2904 var_last_match_end.Bind(match_end);
2905 // Non-global case ends here after the first replacement.
2906 GotoIfNot(is_global, &if_nofurthermatches);
2907
2908 GotoIf(SmiNotEqual(match_end, match_start), &loop);
2909 // If match is the empty string, we have to increment lastIndex.
2910 Node* const this_index = FastLoadLastIndex(regexp);
2911 Node* const next_index = AdvanceStringIndex(
2912 string, this_index, var_is_unicode.value(), kIsFastPath);
2913 FastStoreLastIndex(regexp, next_index);
2914 Goto(&loop);
2915 }
2916 }
2917 }
2918
2919 BIND(&if_nofurthermatches);
2920 {
2921 TNode<Smi> const string_length = LoadStringLengthAsSmi(string);
2922 TNode<String> const last_part = SubString(
2923 string, SmiUntag(var_last_match_end.value()), SmiUntag(string_length));
2924 var_result = StringAdd(context, var_result.value(), last_part);
2925 Goto(&out);
2926 }
2927
2928 BIND(&out);
2929 return var_result.value();
2930 }
2931
2932 // Helper that skips a few initial checks.
TF_BUILTIN(RegExpReplace,RegExpBuiltinsAssembler)2933 TF_BUILTIN(RegExpReplace, RegExpBuiltinsAssembler) {
2934 Node* const regexp = Parameter(Descriptor::kRegExp);
2935 TNode<String> const string = CAST(Parameter(Descriptor::kString));
2936 Node* const replace_value = Parameter(Descriptor::kReplaceValue);
2937 Node* const context = Parameter(Descriptor::kContext);
2938
2939 CSA_ASSERT(this, IsFastRegExp(context, regexp));
2940
2941 Label checkreplacestring(this), if_iscallable(this),
2942 runtime(this, Label::kDeferred);
2943
2944 // 2. Is {replace_value} callable?
2945 GotoIf(TaggedIsSmi(replace_value), &checkreplacestring);
2946 Branch(IsCallableMap(LoadMap(replace_value)), &if_iscallable,
2947 &checkreplacestring);
2948
2949 // 3. Does ToString({replace_value}) contain '$'?
2950 BIND(&checkreplacestring);
2951 {
2952 TNode<String> const replace_string =
2953 ToString_Inline(context, replace_value);
2954
2955 // ToString(replaceValue) could potentially change the shape of the RegExp
2956 // object. Recheck that we are still on the fast path and bail to runtime
2957 // otherwise.
2958 {
2959 Label next(this);
2960 BranchIfFastRegExp(context, regexp, &next, &runtime);
2961 BIND(&next);
2962 }
2963
2964 TNode<String> const dollar_string = HeapConstant(
2965 isolate()->factory()->LookupSingleCharacterStringFromCode('$'));
2966 TNode<Smi> const dollar_ix =
2967 CAST(CallBuiltin(Builtins::kStringIndexOf, context, replace_string,
2968 dollar_string, SmiConstant(0)));
2969 GotoIfNot(SmiEqual(dollar_ix, SmiConstant(-1)), &runtime);
2970
2971 Return(
2972 ReplaceSimpleStringFastPath(context, regexp, string, replace_string));
2973 }
2974
2975 // {regexp} is unmodified and {replace_value} is callable.
2976 BIND(&if_iscallable);
2977 {
2978 Node* const replace_fn = replace_value;
2979
2980 // Check if the {regexp} is global.
2981 Label if_isglobal(this), if_isnotglobal(this);
2982
2983 Node* const is_global = FastFlagGetter(regexp, JSRegExp::kGlobal);
2984 Branch(is_global, &if_isglobal, &if_isnotglobal);
2985
2986 BIND(&if_isglobal);
2987 Return(ReplaceGlobalCallableFastPath(context, regexp, string, replace_fn));
2988
2989 BIND(&if_isnotglobal);
2990 Return(CallRuntime(Runtime::kStringReplaceNonGlobalRegExpWithFunction,
2991 context, string, regexp, replace_fn));
2992 }
2993
2994 BIND(&runtime);
2995 Return(CallRuntime(Runtime::kRegExpReplace, context, regexp, string,
2996 replace_value));
2997 }
2998
2999 // ES#sec-regexp.prototype-@@replace
3000 // RegExp.prototype [ @@replace ] ( string, replaceValue )
TF_BUILTIN(RegExpPrototypeReplace,RegExpBuiltinsAssembler)3001 TF_BUILTIN(RegExpPrototypeReplace, RegExpBuiltinsAssembler) {
3002 const int kStringArg = 0;
3003 const int kReplaceValueArg = 1;
3004
3005 Node* argc =
3006 ChangeInt32ToIntPtr(Parameter(BuiltinDescriptor::kArgumentsCount));
3007 CodeStubArguments args(this, argc);
3008
3009 Node* const maybe_receiver = args.GetReceiver();
3010 Node* const maybe_string = args.GetOptionalArgumentValue(kStringArg);
3011 Node* const replace_value = args.GetOptionalArgumentValue(kReplaceValueArg);
3012 Node* const context = Parameter(BuiltinDescriptor::kContext);
3013
3014 // RegExpPrototypeReplace is a bit of a beast - a summary of dispatch logic:
3015 //
3016 // if (!IsFastRegExp(receiver)) CallRuntime(RegExpReplace)
3017 // if (IsCallable(replace)) {
3018 // if (IsGlobal(receiver)) {
3019 // // Called 'fast-path' but contains several runtime calls.
3020 // ReplaceGlobalCallableFastPath()
3021 // } else {
3022 // CallRuntime(StringReplaceNonGlobalRegExpWithFunction)
3023 // }
3024 // } else {
3025 // if (replace.contains("$")) {
3026 // CallRuntime(RegExpReplace)
3027 // } else {
3028 // ReplaceSimpleStringFastPath()
3029 // }
3030 // }
3031
3032 // Ensure {maybe_receiver} is a JSReceiver.
3033 ThrowIfNotJSReceiver(context, maybe_receiver,
3034 MessageTemplate::kIncompatibleMethodReceiver,
3035 "RegExp.prototype.@@replace");
3036 Node* const receiver = maybe_receiver;
3037
3038 // Convert {maybe_string} to a String.
3039 TNode<String> const string = ToString_Inline(context, maybe_string);
3040
3041 // Fast-path checks: 1. Is the {receiver} an unmodified JSRegExp instance?
3042 Label stub(this), runtime(this, Label::kDeferred);
3043 BranchIfFastRegExp(context, receiver, &stub, &runtime);
3044
3045 BIND(&stub);
3046 args.PopAndReturn(CallBuiltin(Builtins::kRegExpReplace, context, receiver,
3047 string, replace_value));
3048
3049 BIND(&runtime);
3050 args.PopAndReturn(CallRuntime(Runtime::kRegExpReplace, context, receiver,
3051 string, replace_value));
3052 }
3053
3054 // Simple string matching functionality for internal use which does not modify
3055 // the last match info.
TF_BUILTIN(RegExpInternalMatch,RegExpBuiltinsAssembler)3056 TF_BUILTIN(RegExpInternalMatch, RegExpBuiltinsAssembler) {
3057 TNode<JSRegExp> const regexp = CAST(Parameter(Descriptor::kRegExp));
3058 TNode<String> const string = CAST(Parameter(Descriptor::kString));
3059 Node* const context = Parameter(Descriptor::kContext);
3060
3061 Node* const smi_zero = SmiConstant(0);
3062 Node* const native_context = LoadNativeContext(context);
3063 Node* const internal_match_info = LoadContextElement(
3064 native_context, Context::REGEXP_INTERNAL_MATCH_INFO_INDEX);
3065 Node* const match_indices = RegExpExecInternal(context, regexp, string,
3066 smi_zero, internal_match_info);
3067 Node* const null = NullConstant();
3068 Label if_matched(this);
3069 GotoIfNot(WordEqual(match_indices, null), &if_matched);
3070 Return(null);
3071
3072 BIND(&if_matched);
3073 {
3074 Node* result =
3075 ConstructNewResultFromMatchInfo(context, regexp, match_indices, string);
3076 Return(result);
3077 }
3078 }
3079
3080 class RegExpStringIteratorAssembler : public RegExpBuiltinsAssembler {
3081 public:
RegExpStringIteratorAssembler(compiler::CodeAssemblerState * state)3082 explicit RegExpStringIteratorAssembler(compiler::CodeAssemblerState* state)
3083 : RegExpBuiltinsAssembler(state) {}
3084
3085 protected:
LoadFlags(TNode<HeapObject> iterator)3086 TNode<Smi> LoadFlags(TNode<HeapObject> iterator) {
3087 return LoadObjectField<Smi>(iterator, JSRegExpStringIterator::kFlagsOffset);
3088 }
3089
HasDoneFlag(TNode<Smi> flags)3090 TNode<BoolT> HasDoneFlag(TNode<Smi> flags) {
3091 return UncheckedCast<BoolT>(
3092 IsSetSmi(flags, 1 << JSRegExpStringIterator::kDoneBit));
3093 }
3094
HasGlobalFlag(TNode<Smi> flags)3095 TNode<BoolT> HasGlobalFlag(TNode<Smi> flags) {
3096 return UncheckedCast<BoolT>(
3097 IsSetSmi(flags, 1 << JSRegExpStringIterator::kGlobalBit));
3098 }
3099
HasUnicodeFlag(TNode<Smi> flags)3100 TNode<BoolT> HasUnicodeFlag(TNode<Smi> flags) {
3101 return UncheckedCast<BoolT>(
3102 IsSetSmi(flags, 1 << JSRegExpStringIterator::kUnicodeBit));
3103 }
3104
SetDoneFlag(TNode<HeapObject> iterator,TNode<Smi> flags)3105 void SetDoneFlag(TNode<HeapObject> iterator, TNode<Smi> flags) {
3106 TNode<Smi> new_flags =
3107 SmiOr(flags, SmiConstant(1 << JSRegExpStringIterator::kDoneBit));
3108 StoreObjectFieldNoWriteBarrier(
3109 iterator, JSRegExpStringIterator::kFlagsOffset, new_flags);
3110 }
3111 };
3112
3113 // https://tc39.github.io/proposal-string-matchall/
3114 // %RegExpStringIteratorPrototype%.next ( )
TF_BUILTIN(RegExpStringIteratorPrototypeNext,RegExpStringIteratorAssembler)3115 TF_BUILTIN(RegExpStringIteratorPrototypeNext, RegExpStringIteratorAssembler) {
3116 TNode<Context> context = CAST(Parameter(Descriptor::kContext));
3117 TNode<Object> maybe_receiver = CAST(Parameter(Descriptor::kReceiver));
3118
3119 Label if_match(this), if_no_match(this, Label::kDeferred),
3120 return_empty_done_result(this, Label::kDeferred),
3121 throw_bad_receiver(this, Label::kDeferred);
3122
3123 // 1. Let O be the this value.
3124 // 2. If Type(O) is not Object, throw a TypeError exception.
3125 GotoIf(TaggedIsSmi(maybe_receiver), &throw_bad_receiver);
3126 TNode<HeapObject> receiver = CAST(maybe_receiver);
3127 GotoIfNot(IsJSReceiver(receiver), &throw_bad_receiver);
3128
3129 // 3. If O does not have all of the internal slots of a RegExp String Iterator
3130 // Object Instance (see 5.3), throw a TypeError exception.
3131 GotoIfNot(InstanceTypeEqual(LoadInstanceType(receiver),
3132 JS_REGEXP_STRING_ITERATOR_TYPE),
3133 &throw_bad_receiver);
3134
3135 // 4. If O.[[Done]] is true, then
3136 // a. Return ! CreateIterResultObject(undefined, true).
3137 TNode<Smi> flags = LoadFlags(receiver);
3138 GotoIf(HasDoneFlag(flags), &return_empty_done_result);
3139
3140 // 5. Let R be O.[[IteratingRegExp]].
3141 TNode<Object> iterating_regexp =
3142 LoadObjectField(receiver, JSRegExpStringIterator::kIteratingRegExpOffset);
3143
3144 // 6. Let S be O.[[IteratedString]].
3145 TNode<String> iterating_string = CAST(
3146 LoadObjectField(receiver, JSRegExpStringIterator::kIteratedStringOffset));
3147
3148 // 7. Let global be O.[[Global]].
3149 // See if_match.
3150
3151 // 8. Let fullUnicode be O.[[Unicode]].
3152 // See if_global.
3153
3154 // 9. Let match be ? RegExpExec(R, S).
3155 TVARIABLE(Object, var_match);
3156 TVARIABLE(BoolT, var_is_fast_regexp);
3157 {
3158 Label if_fast(this), if_slow(this, Label::kDeferred);
3159 BranchIfFastRegExp(context, iterating_regexp, &if_fast, &if_slow);
3160 BIND(&if_fast);
3161 {
3162 TNode<Object> indices_or_null = CAST(RegExpPrototypeExecBodyWithoutResult(
3163 context, iterating_regexp, iterating_string, &if_no_match, true));
3164 var_match = CAST(ConstructNewResultFromMatchInfo(
3165 context, iterating_regexp, indices_or_null, iterating_string));
3166 var_is_fast_regexp = Int32TrueConstant();
3167 Goto(&if_match);
3168 }
3169 BIND(&if_slow);
3170 {
3171 var_match = CAST(RegExpExec(context, iterating_regexp, iterating_string));
3172 var_is_fast_regexp = Int32FalseConstant();
3173 Branch(IsNull(var_match.value()), &if_no_match, &if_match);
3174 }
3175 }
3176
3177 // 10. If match is null, then
3178 BIND(&if_no_match);
3179 {
3180 // a. Set O.[[Done]] to true.
3181 SetDoneFlag(receiver, flags);
3182
3183 // b. Return ! CreateIterResultObject(undefined, true).
3184 Goto(&return_empty_done_result);
3185 }
3186 // 11. Else,
3187 BIND(&if_match);
3188 {
3189 Label if_global(this), if_not_global(this, Label::kDeferred),
3190 return_result(this);
3191
3192 // a. If global is true,
3193 Branch(HasGlobalFlag(flags), &if_global, &if_not_global);
3194 BIND(&if_global);
3195 {
3196 Label if_fast(this), if_slow(this, Label::kDeferred);
3197
3198 // ii. If matchStr is the empty string,
3199 Branch(var_is_fast_regexp.value(), &if_fast, &if_slow);
3200 BIND(&if_fast);
3201 {
3202 // i. Let matchStr be ? ToString(? Get(match, "0")).
3203 CSA_ASSERT_BRANCH(this, [&](Label* ok, Label* not_ok) {
3204 BranchIfFastRegExpResult(context, var_match.value(), ok, not_ok);
3205 });
3206 CSA_ASSERT(this,
3207 SmiNotEqual(LoadFastJSArrayLength(CAST(var_match.value())),
3208 SmiConstant(0)));
3209 TNode<FixedArrayBase> result_fixed_array =
3210 LoadElements(CAST(var_match.value()));
3211 TNode<String> match_str =
3212 CAST(LoadFixedArrayElement(result_fixed_array, 0));
3213
3214 // When iterating_regexp is fast, we assume it stays fast even after
3215 // accessing the first match from the RegExp result.
3216 CSA_ASSERT(this, IsFastRegExp(context, iterating_regexp));
3217 GotoIfNot(IsEmptyString(match_str), &return_result);
3218
3219 // 1. Let thisIndex be ? ToLength(? Get(R, "lastIndex")).
3220 TNode<Smi> this_index = CAST(FastLoadLastIndex(iterating_regexp));
3221 CSA_ASSERT(this, TaggedIsSmi(this_index));
3222
3223 // 2. Let nextIndex be ! AdvanceStringIndex(S, thisIndex, fullUnicode).
3224 TNode<Smi> next_index = CAST(AdvanceStringIndex(
3225 iterating_string, this_index, HasUnicodeFlag(flags), true));
3226 CSA_ASSERT(this, TaggedIsSmi(next_index));
3227
3228 // 3. Perform ? Set(R, "lastIndex", nextIndex, true).
3229 FastStoreLastIndex(iterating_regexp, next_index);
3230
3231 // iii. Return ! CreateIterResultObject(match, false).
3232 Goto(&return_result);
3233 }
3234 BIND(&if_slow);
3235 {
3236 // i. Let matchStr be ? ToString(? Get(match, "0")).
3237 TNode<String> match_str = ToString_Inline(
3238 context, GetProperty(context, var_match.value(), SmiConstant(0)));
3239
3240 GotoIfNot(IsEmptyString(match_str), &return_result);
3241
3242 // 1. Let thisIndex be ? ToLength(? Get(R, "lastIndex")).
3243 TNode<Object> last_index =
3244 CAST(SlowLoadLastIndex(context, iterating_regexp));
3245 TNode<Number> this_index = ToLength_Inline(context, last_index);
3246
3247 // 2. Let nextIndex be ! AdvanceStringIndex(S, thisIndex, fullUnicode).
3248 TNode<Object> next_index = CAST(AdvanceStringIndex(
3249 iterating_string, this_index, HasUnicodeFlag(flags), false));
3250
3251 // 3. Perform ? Set(R, "lastIndex", nextIndex, true).
3252 SlowStoreLastIndex(context, iterating_regexp, next_index);
3253
3254 // iii. Return ! CreateIterResultObject(match, false).
3255 Goto(&return_result);
3256 }
3257 }
3258 // b. Else,
3259 BIND(&if_not_global);
3260 {
3261 // i. Set O.[[Done]] to true.
3262 SetDoneFlag(receiver, flags);
3263
3264 // ii. Return ! CreateIterResultObject(match, false).
3265 Goto(&return_result);
3266 }
3267 BIND(&return_result);
3268 {
3269 Return(AllocateJSIteratorResult(context, var_match.value(),
3270 FalseConstant()));
3271 }
3272 }
3273 BIND(&return_empty_done_result);
3274 Return(
3275 AllocateJSIteratorResult(context, UndefinedConstant(), TrueConstant()));
3276
3277 BIND(&throw_bad_receiver);
3278 {
3279 ThrowTypeError(context, MessageTemplate::kIncompatibleMethodReceiver,
3280 StringConstant("%RegExpStringIterator%.prototype.next"),
3281 receiver);
3282 }
3283 }
3284
3285 } // namespace internal
3286 } // namespace v8
3287