1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #if V8_TARGET_ARCH_ARM64
6 
7 #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
8 
9 #include "src/codegen/arm64/macro-assembler-arm64-inl.h"
10 #include "src/codegen/macro-assembler.h"
11 #include "src/logging/log.h"
12 #include "src/objects/objects-inl.h"
13 #include "src/regexp/regexp-macro-assembler.h"
14 #include "src/regexp/regexp-stack.h"
15 #include "src/snapshot/embedded/embedded-data.h"
16 #include "src/strings/unicode.h"
17 
18 namespace v8 {
19 namespace internal {
20 
21 /*
22  * This assembler uses the following register assignment convention:
23  * - w19     : Used to temporarely store a value before a call to C code.
24  *             See CheckNotBackReferenceIgnoreCase.
25  * - x20     : Pointer to the current Code object,
26  *             it includes the heap object tag.
27  * - w21     : Current position in input, as negative offset from
28  *             the end of the string. Please notice that this is
29  *             the byte offset, not the character offset!
30  * - w22     : Currently loaded character. Must be loaded using
31  *             LoadCurrentCharacter before using any of the dispatch methods.
32  * - x23     : Points to tip of backtrack stack.
33  * - w24     : Position of the first character minus one: non_position_value.
34  *             Used to initialize capture registers.
35  * - x25     : Address at the end of the input string: input_end.
36  *             Points to byte after last character in input.
37  * - x26     : Address at the start of the input string: input_start.
38  * - w27     : Where to start in the input string.
39  * - x28     : Output array pointer.
40  * - x29/fp  : Frame pointer. Used to access arguments, local variables and
41  *             RegExp registers.
42  * - x16/x17 : IP registers, used by assembler. Very volatile.
43  * - sp      : Points to tip of C stack.
44  *
45  * - x0-x7   : Used as a cache to store 32 bit capture registers. These
46  *             registers need to be retained every time a call to C code
47  *             is done.
48  *
49  * The remaining registers are free for computations.
50  * Each call to a public method should retain this convention.
51  *
52  * The stack will have the following structure:
53  *
54  *  Location     Name               Description
55  *               (as referred to
56  *               in the code)
57  *
58  *  - fp[104]    Address regexp     Address of the JSRegExp object. Unused in
59  *                                  native code, passed to match signature of
60  *                                  the interpreter.
61  *  - fp[96]     isolate            Address of the current isolate.
62  *  ^^^^^^^^^ sp when called ^^^^^^^^^
63  *  - fp[16..88] r19-r28            Backup of CalleeSaved registers.
64  *  - fp[8]      lr                 Return from the RegExp code.
65  *  - fp[0]      fp                 Old frame pointer.
66  *  ^^^^^^^^^ fp ^^^^^^^^^
67  *  - fp[-8]     direct_call        1 => Direct call from JavaScript code.
68  *                                  0 => Call through the runtime system.
69  *  - fp[-16]    output_size        Output may fit multiple sets of matches.
70  *  - fp[-24]    input              Handle containing the input string.
71  *  - fp[-32]    success_counter
72  *  ^^^^^^^^^^^^^ From here and downwards we store 32 bit values ^^^^^^^^^^^^^
73  *  - fp[-40]    register N         Capture registers initialized with
74  *  - fp[-44]    register N + 1     non_position_value.
75  *               ...                The first kNumCachedRegisters (N) registers
76  *               ...                are cached in x0 to x7.
77  *               ...                Only positions must be stored in the first
78  *  -            ...                num_saved_registers_ registers.
79  *  -            ...
80  *  -            register N + num_registers - 1
81  *  ^^^^^^^^^ sp ^^^^^^^^^
82  *
83  * The first num_saved_registers_ registers are initialized to point to
84  * "character -1" in the string (i.e., char_size() bytes before the first
85  * character of the string). The remaining registers start out as garbage.
86  *
87  * The data up to the return address must be placed there by the calling
88  * code and the remaining arguments are passed in registers, e.g. by calling the
89  * code entry as cast to a function with the signature:
90  * int (*match)(String input_string,
91  *              int start_index,
92  *              Address start,
93  *              Address end,
94  *              int* capture_output_array,
95  *              int num_capture_registers,
96  *              bool direct_call = false,
97  *              Isolate* isolate,
98  *              Address regexp);
99  * The call is performed by NativeRegExpMacroAssembler::Execute()
100  * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
101  */
102 
103 #define __ ACCESS_MASM(masm_)
104 
105 const int RegExpMacroAssemblerARM64::kRegExpCodeSize;
106 
RegExpMacroAssemblerARM64(Isolate * isolate,Zone * zone,Mode mode,int registers_to_save)107 RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(Isolate* isolate,
108                                                      Zone* zone, Mode mode,
109                                                      int registers_to_save)
110     : NativeRegExpMacroAssembler(isolate, zone),
111       masm_(std::make_unique<MacroAssembler>(
112           isolate, CodeObjectRequired::kYes,
113           NewAssemblerBuffer(kRegExpCodeSize))),
114       no_root_array_scope_(masm_.get()),
115       mode_(mode),
116       num_registers_(registers_to_save),
117       num_saved_registers_(registers_to_save),
118       entry_label_(),
119       start_label_(),
120       success_label_(),
121       backtrack_label_(),
122       exit_label_() {
123   DCHECK_EQ(0, registers_to_save % 2);
124   // We can cache at most 16 W registers in x0-x7.
125   STATIC_ASSERT(kNumCachedRegisters <= 16);
126   STATIC_ASSERT((kNumCachedRegisters % 2) == 0);
127   __ CallTarget();
128 
129   __ B(&entry_label_);   // We'll write the entry code later.
130   __ Bind(&start_label_);  // And then continue from here.
131 }
132 
~RegExpMacroAssemblerARM64()133 RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() {
134   // Unuse labels in case we throw away the assembler without calling GetCode.
135   entry_label_.Unuse();
136   start_label_.Unuse();
137   success_label_.Unuse();
138   backtrack_label_.Unuse();
139   exit_label_.Unuse();
140   check_preempt_label_.Unuse();
141   stack_overflow_label_.Unuse();
142   fallback_label_.Unuse();
143 }
144 
stack_limit_slack()145 int RegExpMacroAssemblerARM64::stack_limit_slack()  {
146   return RegExpStack::kStackLimitSlack;
147 }
148 
149 
AdvanceCurrentPosition(int by)150 void RegExpMacroAssemblerARM64::AdvanceCurrentPosition(int by) {
151   if (by != 0) {
152     __ Add(current_input_offset(),
153            current_input_offset(), by * char_size());
154   }
155 }
156 
157 
AdvanceRegister(int reg,int by)158 void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) {
159   DCHECK((reg >= 0) && (reg < num_registers_));
160   if (by != 0) {
161     RegisterState register_state = GetRegisterState(reg);
162     switch (register_state) {
163       case STACKED:
164         __ Ldr(w10, register_location(reg));
165         __ Add(w10, w10, by);
166         __ Str(w10, register_location(reg));
167         break;
168       case CACHED_LSW: {
169         Register to_advance = GetCachedRegister(reg);
170         __ Add(to_advance, to_advance, by);
171         break;
172       }
173       case CACHED_MSW: {
174         Register to_advance = GetCachedRegister(reg);
175         // Sign-extend to int64, shift as uint64, cast back to int64.
176         __ Add(
177             to_advance, to_advance,
178             static_cast<int64_t>(static_cast<uint64_t>(static_cast<int64_t>(by))
179                                  << kWRegSizeInBits));
180         break;
181       }
182       default:
183         UNREACHABLE();
184     }
185   }
186 }
187 
188 
Backtrack()189 void RegExpMacroAssemblerARM64::Backtrack() {
190   CheckPreemption();
191   if (has_backtrack_limit()) {
192     Label next;
193     UseScratchRegisterScope temps(masm_.get());
194     Register scratch = temps.AcquireW();
195     __ Ldr(scratch, MemOperand(frame_pointer(), kBacktrackCount));
196     __ Add(scratch, scratch, 1);
197     __ Str(scratch, MemOperand(frame_pointer(), kBacktrackCount));
198     __ Cmp(scratch, Operand(backtrack_limit()));
199     __ B(ne, &next);
200 
201     // Backtrack limit exceeded.
202     if (can_fallback()) {
203       __ B(&fallback_label_);
204     } else {
205       // Can't fallback, so we treat it as a failed match.
206       Fail();
207     }
208 
209     __ bind(&next);
210   }
211   Pop(w10);
212   __ Add(x10, code_pointer(), Operand(w10, UXTW));
213   __ Br(x10);
214 }
215 
216 
Bind(Label * label)217 void RegExpMacroAssemblerARM64::Bind(Label* label) {
218   __ Bind(label);
219 }
220 
BindJumpTarget(Label * label)221 void RegExpMacroAssemblerARM64::BindJumpTarget(Label* label) {
222   __ BindJumpTarget(label);
223 }
224 
CheckCharacter(uint32_t c,Label * on_equal)225 void RegExpMacroAssemblerARM64::CheckCharacter(uint32_t c, Label* on_equal) {
226   CompareAndBranchOrBacktrack(current_character(), c, eq, on_equal);
227 }
228 
CheckCharacterGT(base::uc16 limit,Label * on_greater)229 void RegExpMacroAssemblerARM64::CheckCharacterGT(base::uc16 limit,
230                                                  Label* on_greater) {
231   CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater);
232 }
233 
CheckAtStart(int cp_offset,Label * on_at_start)234 void RegExpMacroAssemblerARM64::CheckAtStart(int cp_offset,
235                                              Label* on_at_start) {
236   __ Add(w10, current_input_offset(),
237          Operand(-char_size() + cp_offset * char_size()));
238   __ Cmp(w10, string_start_minus_one());
239   BranchOrBacktrack(eq, on_at_start);
240 }
241 
CheckNotAtStart(int cp_offset,Label * on_not_at_start)242 void RegExpMacroAssemblerARM64::CheckNotAtStart(int cp_offset,
243                                                 Label* on_not_at_start) {
244   __ Add(w10, current_input_offset(),
245          Operand(-char_size() + cp_offset * char_size()));
246   __ Cmp(w10, string_start_minus_one());
247   BranchOrBacktrack(ne, on_not_at_start);
248 }
249 
CheckCharacterLT(base::uc16 limit,Label * on_less)250 void RegExpMacroAssemblerARM64::CheckCharacterLT(base::uc16 limit,
251                                                  Label* on_less) {
252   CompareAndBranchOrBacktrack(current_character(), limit, lo, on_less);
253 }
254 
CheckCharacters(base::Vector<const base::uc16> str,int cp_offset,Label * on_failure,bool check_end_of_string)255 void RegExpMacroAssemblerARM64::CheckCharacters(
256     base::Vector<const base::uc16> str, int cp_offset, Label* on_failure,
257     bool check_end_of_string) {
258   // This method is only ever called from the cctests.
259 
260   if (check_end_of_string) {
261     // Is last character of required match inside string.
262     CheckPosition(cp_offset + str.length() - 1, on_failure);
263   }
264 
265   Register characters_address = x11;
266 
267   __ Add(characters_address,
268          input_end(),
269          Operand(current_input_offset(), SXTW));
270   if (cp_offset != 0) {
271     __ Add(characters_address, characters_address, cp_offset * char_size());
272   }
273 
274   for (int i = 0; i < str.length(); i++) {
275     if (mode_ == LATIN1) {
276       __ Ldrb(w10, MemOperand(characters_address, 1, PostIndex));
277       DCHECK_GE(String::kMaxOneByteCharCode, str[i]);
278     } else {
279       __ Ldrh(w10, MemOperand(characters_address, 2, PostIndex));
280     }
281     CompareAndBranchOrBacktrack(w10, str[i], ne, on_failure);
282   }
283 }
284 
CheckGreedyLoop(Label * on_equal)285 void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
286   __ Ldr(w10, MemOperand(backtrack_stackpointer()));
287   __ Cmp(current_input_offset(), w10);
288   __ Cset(x11, eq);
289   __ Add(backtrack_stackpointer(),
290          backtrack_stackpointer(), Operand(x11, LSL, kWRegSizeLog2));
291   BranchOrBacktrack(eq, on_equal);
292 }
293 
CheckNotBackReferenceIgnoreCase(int start_reg,bool read_backward,bool unicode,Label * on_no_match)294 void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
295     int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
296   Label fallthrough;
297 
298   Register capture_start_offset = w10;
299   // Save the capture length in a callee-saved register so it will
300   // be preserved if we call a C helper.
301   Register capture_length = w19;
302   DCHECK(kCalleeSaved.IncludesAliasOf(capture_length));
303 
304   // Find length of back-referenced capture.
305   DCHECK_EQ(0, start_reg % 2);
306   if (start_reg < kNumCachedRegisters) {
307     __ Mov(capture_start_offset.X(), GetCachedRegister(start_reg));
308     __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
309   } else {
310     __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10));
311   }
312   __ Sub(capture_length, w11, capture_start_offset);  // Length to check.
313 
314   // At this point, the capture registers are either both set or both cleared.
315   // If the capture length is zero, then the capture is either empty or cleared.
316   // Fall through in both cases.
317   __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
318 
319   // Check that there are enough characters left in the input.
320   if (read_backward) {
321     __ Add(w12, string_start_minus_one(), capture_length);
322     __ Cmp(current_input_offset(), w12);
323     BranchOrBacktrack(le, on_no_match);
324   } else {
325     __ Cmn(capture_length, current_input_offset());
326     BranchOrBacktrack(gt, on_no_match);
327   }
328 
329   if (mode_ == LATIN1) {
330     Label success;
331     Label fail;
332     Label loop_check;
333 
334     Register capture_start_address = x12;
335     Register capture_end_addresss = x13;
336     Register current_position_address = x14;
337 
338     __ Add(capture_start_address,
339            input_end(),
340            Operand(capture_start_offset, SXTW));
341     __ Add(capture_end_addresss,
342            capture_start_address,
343            Operand(capture_length, SXTW));
344     __ Add(current_position_address,
345            input_end(),
346            Operand(current_input_offset(), SXTW));
347     if (read_backward) {
348       // Offset by length when matching backwards.
349       __ Sub(current_position_address, current_position_address,
350              Operand(capture_length, SXTW));
351     }
352 
353     Label loop;
354     __ Bind(&loop);
355     __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
356     __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
357     __ Cmp(w10, w11);
358     __ B(eq, &loop_check);
359 
360     // Mismatch, try case-insensitive match (converting letters to lower-case).
361     __ Orr(w10, w10, 0x20);  // Convert capture character to lower-case.
362     __ Orr(w11, w11, 0x20);  // Also convert input character.
363     __ Cmp(w11, w10);
364     __ B(ne, &fail);
365     __ Sub(w10, w10, 'a');
366     __ Cmp(w10, 'z' - 'a');  // Is w10 a lowercase letter?
367     __ B(ls, &loop_check);  // In range 'a'-'z'.
368     // Latin-1: Check for values in range [224,254] but not 247.
369     __ Sub(w10, w10, 224 - 'a');
370     __ Cmp(w10, 254 - 224);
371     __ Ccmp(w10, 247 - 224, ZFlag, ls);  // Check for 247.
372     __ B(eq, &fail);  // Weren't Latin-1 letters.
373 
374     __ Bind(&loop_check);
375     __ Cmp(capture_start_address, capture_end_addresss);
376     __ B(lt, &loop);
377     __ B(&success);
378 
379     __ Bind(&fail);
380     BranchOrBacktrack(al, on_no_match);
381 
382     __ Bind(&success);
383     // Compute new value of character position after the matched part.
384     __ Sub(current_input_offset().X(), current_position_address, input_end());
385     if (read_backward) {
386       __ Sub(current_input_offset().X(), current_input_offset().X(),
387              Operand(capture_length, SXTW));
388     }
389     if (FLAG_debug_code) {
390       __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
391       __ Ccmp(current_input_offset(), 0, NoFlag, eq);
392       // The current input offset should be <= 0, and fit in a W register.
393       __ Check(le, AbortReason::kOffsetOutOfRange);
394     }
395   } else {
396     DCHECK(mode_ == UC16);
397     int argument_count = 4;
398 
399     // The cached registers need to be retained.
400     CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
401     DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
402     __ PushCPURegList(cached_registers);
403 
404     // Put arguments into arguments registers.
405     // Parameters are
406     //   x0: Address byte_offset1 - Address captured substring's start.
407     //   x1: Address byte_offset2 - Address of current character position.
408     //   w2: size_t byte_length - length of capture in bytes(!)
409     //   x3: Isolate* isolate.
410 
411     // Address of start of capture.
412     __ Add(x0, input_end(), Operand(capture_start_offset, SXTW));
413     // Length of capture.
414     __ Mov(w2, capture_length);
415     // Address of current input position.
416     __ Add(x1, input_end(), Operand(current_input_offset(), SXTW));
417     if (read_backward) {
418       __ Sub(x1, x1, Operand(capture_length, SXTW));
419     }
420     // Isolate.
421     __ Mov(x3, ExternalReference::isolate_address(isolate()));
422 
423     {
424       AllowExternalCallThatCantCauseGC scope(masm_.get());
425       ExternalReference function =
426           unicode ? ExternalReference::re_case_insensitive_compare_unicode(
427                         isolate())
428                   : ExternalReference::re_case_insensitive_compare_non_unicode(
429                         isolate());
430       __ CallCFunction(function, argument_count);
431     }
432 
433     // Check if function returned non-zero for success or zero for failure.
434     // x0 is one of the registers used as a cache so it must be tested before
435     // the cache is restored.
436     __ Cmp(x0, 0);
437     __ PopCPURegList(cached_registers);
438     BranchOrBacktrack(eq, on_no_match);
439 
440     // On success, advance position by length of capture.
441     if (read_backward) {
442       __ Sub(current_input_offset(), current_input_offset(), capture_length);
443     } else {
444       __ Add(current_input_offset(), current_input_offset(), capture_length);
445     }
446   }
447 
448   __ Bind(&fallthrough);
449 }
450 
CheckNotBackReference(int start_reg,bool read_backward,Label * on_no_match)451 void RegExpMacroAssemblerARM64::CheckNotBackReference(int start_reg,
452                                                       bool read_backward,
453                                                       Label* on_no_match) {
454   Label fallthrough;
455 
456   Register capture_start_address = x12;
457   Register capture_end_address = x13;
458   Register current_position_address = x14;
459   Register capture_length = w15;
460 
461   // Find length of back-referenced capture.
462   DCHECK_EQ(0, start_reg % 2);
463   if (start_reg < kNumCachedRegisters) {
464     __ Mov(x10, GetCachedRegister(start_reg));
465     __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
466   } else {
467     __ Ldp(w11, w10, capture_location(start_reg, x10));
468   }
469   __ Sub(capture_length, w11, w10);  // Length to check.
470 
471   // At this point, the capture registers are either both set or both cleared.
472   // If the capture length is zero, then the capture is either empty or cleared.
473   // Fall through in both cases.
474   __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
475 
476   // Check that there are enough characters left in the input.
477   if (read_backward) {
478     __ Add(w12, string_start_minus_one(), capture_length);
479     __ Cmp(current_input_offset(), w12);
480     BranchOrBacktrack(le, on_no_match);
481   } else {
482     __ Cmn(capture_length, current_input_offset());
483     BranchOrBacktrack(gt, on_no_match);
484   }
485 
486   // Compute pointers to match string and capture string
487   __ Add(capture_start_address, input_end(), Operand(w10, SXTW));
488   __ Add(capture_end_address,
489          capture_start_address,
490          Operand(capture_length, SXTW));
491   __ Add(current_position_address,
492          input_end(),
493          Operand(current_input_offset(), SXTW));
494   if (read_backward) {
495     // Offset by length when matching backwards.
496     __ Sub(current_position_address, current_position_address,
497            Operand(capture_length, SXTW));
498   }
499 
500   Label loop;
501   __ Bind(&loop);
502   if (mode_ == LATIN1) {
503     __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
504     __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
505   } else {
506     DCHECK(mode_ == UC16);
507     __ Ldrh(w10, MemOperand(capture_start_address, 2, PostIndex));
508     __ Ldrh(w11, MemOperand(current_position_address, 2, PostIndex));
509   }
510   __ Cmp(w10, w11);
511   BranchOrBacktrack(ne, on_no_match);
512   __ Cmp(capture_start_address, capture_end_address);
513   __ B(lt, &loop);
514 
515   // Move current character position to position after match.
516   __ Sub(current_input_offset().X(), current_position_address, input_end());
517   if (read_backward) {
518     __ Sub(current_input_offset().X(), current_input_offset().X(),
519            Operand(capture_length, SXTW));
520   }
521 
522   if (FLAG_debug_code) {
523     __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
524     __ Ccmp(current_input_offset(), 0, NoFlag, eq);
525     // The current input offset should be <= 0, and fit in a W register.
526     __ Check(le, AbortReason::kOffsetOutOfRange);
527   }
528   __ Bind(&fallthrough);
529 }
530 
531 
CheckNotCharacter(unsigned c,Label * on_not_equal)532 void RegExpMacroAssemblerARM64::CheckNotCharacter(unsigned c,
533                                                   Label* on_not_equal) {
534   CompareAndBranchOrBacktrack(current_character(), c, ne, on_not_equal);
535 }
536 
537 
CheckCharacterAfterAnd(uint32_t c,uint32_t mask,Label * on_equal)538 void RegExpMacroAssemblerARM64::CheckCharacterAfterAnd(uint32_t c,
539                                                        uint32_t mask,
540                                                        Label* on_equal) {
541   __ And(w10, current_character(), mask);
542   CompareAndBranchOrBacktrack(w10, c, eq, on_equal);
543 }
544 
545 
CheckNotCharacterAfterAnd(unsigned c,unsigned mask,Label * on_not_equal)546 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterAnd(unsigned c,
547                                                           unsigned mask,
548                                                           Label* on_not_equal) {
549   __ And(w10, current_character(), mask);
550   CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
551 }
552 
CheckNotCharacterAfterMinusAnd(base::uc16 c,base::uc16 minus,base::uc16 mask,Label * on_not_equal)553 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterMinusAnd(
554     base::uc16 c, base::uc16 minus, base::uc16 mask, Label* on_not_equal) {
555   DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
556   __ Sub(w10, current_character(), minus);
557   __ And(w10, w10, mask);
558   CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
559 }
560 
CheckCharacterInRange(base::uc16 from,base::uc16 to,Label * on_in_range)561 void RegExpMacroAssemblerARM64::CheckCharacterInRange(base::uc16 from,
562                                                       base::uc16 to,
563                                                       Label* on_in_range) {
564   __ Sub(w10, current_character(), from);
565   // Unsigned lower-or-same condition.
566   CompareAndBranchOrBacktrack(w10, to - from, ls, on_in_range);
567 }
568 
CheckCharacterNotInRange(base::uc16 from,base::uc16 to,Label * on_not_in_range)569 void RegExpMacroAssemblerARM64::CheckCharacterNotInRange(
570     base::uc16 from, base::uc16 to, Label* on_not_in_range) {
571   __ Sub(w10, current_character(), from);
572   // Unsigned higher condition.
573   CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range);
574 }
575 
CheckBitInTable(Handle<ByteArray> table,Label * on_bit_set)576 void RegExpMacroAssemblerARM64::CheckBitInTable(
577     Handle<ByteArray> table,
578     Label* on_bit_set) {
579   __ Mov(x11, Operand(table));
580   if ((mode_ != LATIN1) || (kTableMask != String::kMaxOneByteCharCode)) {
581     __ And(w10, current_character(), kTableMask);
582     __ Add(w10, w10, ByteArray::kHeaderSize - kHeapObjectTag);
583   } else {
584     __ Add(w10, current_character(), ByteArray::kHeaderSize - kHeapObjectTag);
585   }
586   __ Ldrb(w11, MemOperand(x11, w10, UXTW));
587   CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set);
588 }
589 
CheckSpecialCharacterClass(base::uc16 type,Label * on_no_match)590 bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(base::uc16 type,
591                                                            Label* on_no_match) {
592   // Range checks (c in min..max) are generally implemented by an unsigned
593   // (c - min) <= (max - min) check
594   switch (type) {
595   case 's':
596     // Match space-characters
597     if (mode_ == LATIN1) {
598       // One byte space characters are '\t'..'\r', ' ' and \u00a0.
599       Label success;
600       // Check for ' ' or 0x00A0.
601       __ Cmp(current_character(), ' ');
602       __ Ccmp(current_character(), 0x00A0, ZFlag, ne);
603       __ B(eq, &success);
604       // Check range 0x09..0x0D.
605       __ Sub(w10, current_character(), '\t');
606       CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match);
607       __ Bind(&success);
608       return true;
609     }
610     return false;
611   case 'S':
612     // The emitted code for generic character classes is good enough.
613     return false;
614   case 'd':
615     // Match ASCII digits ('0'..'9').
616     __ Sub(w10, current_character(), '0');
617     CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match);
618     return true;
619   case 'D':
620     // Match ASCII non-digits.
621     __ Sub(w10, current_character(), '0');
622     CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match);
623     return true;
624   case '.': {
625     // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
626     // Here we emit the conditional branch only once at the end to make branch
627     // prediction more efficient, even though we could branch out of here
628     // as soon as a character matches.
629     __ Cmp(current_character(), 0x0A);
630     __ Ccmp(current_character(), 0x0D, ZFlag, ne);
631     if (mode_ == UC16) {
632       __ Sub(w10, current_character(), 0x2028);
633       // If the Z flag was set we clear the flags to force a branch.
634       __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
635       // ls -> !((C==1) && (Z==0))
636       BranchOrBacktrack(ls, on_no_match);
637     } else {
638       BranchOrBacktrack(eq, on_no_match);
639     }
640     return true;
641   }
642   case 'n': {
643     // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
644     // We have to check all 4 newline characters before emitting
645     // the conditional branch.
646     __ Cmp(current_character(), 0x0A);
647     __ Ccmp(current_character(), 0x0D, ZFlag, ne);
648     if (mode_ == UC16) {
649       __ Sub(w10, current_character(), 0x2028);
650       // If the Z flag was set we clear the flags to force a fall-through.
651       __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
652       // hi -> (C==1) && (Z==0)
653       BranchOrBacktrack(hi, on_no_match);
654     } else {
655       BranchOrBacktrack(ne, on_no_match);
656     }
657     return true;
658   }
659   case 'w': {
660     if (mode_ != LATIN1) {
661       // Table is 256 entries, so all Latin1 characters can be tested.
662       CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match);
663     }
664     ExternalReference map = ExternalReference::re_word_character_map(isolate());
665     __ Mov(x10, map);
666     __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
667     CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match);
668     return true;
669   }
670   case 'W': {
671     Label done;
672     if (mode_ != LATIN1) {
673       // Table is 256 entries, so all Latin1 characters can be tested.
674       __ Cmp(current_character(), 'z');
675       __ B(hi, &done);
676     }
677     ExternalReference map = ExternalReference::re_word_character_map(isolate());
678     __ Mov(x10, map);
679     __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
680     CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match);
681     __ Bind(&done);
682     return true;
683   }
684   case '*':
685     // Match any character.
686     return true;
687   // No custom implementation (yet): s(UC16), S(UC16).
688   default:
689     return false;
690   }
691 }
692 
Fail()693 void RegExpMacroAssemblerARM64::Fail() {
694   __ Mov(w0, FAILURE);
695   __ B(&exit_label_);
696 }
697 
LoadRegExpStackPointerFromMemory(Register dst)698 void RegExpMacroAssemblerARM64::LoadRegExpStackPointerFromMemory(Register dst) {
699   ExternalReference ref =
700       ExternalReference::address_of_regexp_stack_stack_pointer(isolate());
701   __ Mov(dst, ref);
702   __ Ldr(dst, MemOperand(dst));
703 }
704 
StoreRegExpStackPointerToMemory(Register src,Register scratch)705 void RegExpMacroAssemblerARM64::StoreRegExpStackPointerToMemory(
706     Register src, Register scratch) {
707   ExternalReference ref =
708       ExternalReference::address_of_regexp_stack_stack_pointer(isolate());
709   __ Mov(scratch, ref);
710   __ Str(src, MemOperand(scratch));
711 }
712 
PushRegExpBasePointer(Register stack_pointer,Register scratch)713 void RegExpMacroAssemblerARM64::PushRegExpBasePointer(Register stack_pointer,
714                                                       Register scratch) {
715   ExternalReference ref =
716       ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
717   __ Mov(scratch, ref);
718   __ Ldr(scratch, MemOperand(scratch));
719   __ Sub(scratch, stack_pointer, scratch);
720   __ Str(scratch, MemOperand(frame_pointer(), kRegExpStackBasePointer));
721 }
722 
PopRegExpBasePointer(Register stack_pointer_out,Register scratch)723 void RegExpMacroAssemblerARM64::PopRegExpBasePointer(Register stack_pointer_out,
724                                                      Register scratch) {
725   ExternalReference ref =
726       ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
727   __ Ldr(stack_pointer_out,
728          MemOperand(frame_pointer(), kRegExpStackBasePointer));
729   __ Mov(scratch, ref);
730   __ Ldr(scratch, MemOperand(scratch));
731   __ Add(stack_pointer_out, stack_pointer_out, scratch);
732   StoreRegExpStackPointerToMemory(stack_pointer_out, scratch);
733 }
734 
GetCode(Handle<String> source)735 Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
736   Label return_w0;
737   // Finalize code - write the entry point code now we know how many
738   // registers we need.
739 
740   // Entry code:
741   __ Bind(&entry_label_);
742 
743   // Arguments on entry:
744   // x0:  String   input
745   // x1:  int      start_offset
746   // x2:  byte*    input_start
747   // x3:  byte*    input_end
748   // x4:  int*     output array
749   // x5:  int      output array size
750   // x6:  int      direct_call
751   // x7:  Isolate* isolate
752   //
753   // sp[0]:  secondary link/return address used by native call
754 
755   // Tell the system that we have a stack frame.  Because the type is MANUAL, no
756   // code is generated.
757   FrameScope scope(masm_.get(), StackFrame::MANUAL);
758 
759   // Push registers on the stack, only push the argument registers that we need.
760   CPURegList argument_registers(x0, x5, x6, x7);
761 
762   CPURegList registers_to_retain = kCalleeSaved;
763   DCHECK_EQ(registers_to_retain.Count(), kNumCalleeSavedRegisters);
764 
765   __ PushCPURegList<TurboAssembler::kDontStoreLR>(registers_to_retain);
766   __ Push<TurboAssembler::kSignLR>(lr, fp);
767   __ PushCPURegList(argument_registers);
768 
769   // Set frame pointer in place.
770   __ Add(frame_pointer(), sp, argument_registers.Count() * kSystemPointerSize);
771 
772   // Initialize callee-saved registers.
773   __ Mov(start_offset(), w1);
774   __ Mov(input_start(), x2);
775   __ Mov(input_end(), x3);
776   __ Mov(output_array(), x4);
777 
778   // Make sure the stack alignment will be respected.
779   const int alignment = masm_->ActivationFrameAlignment();
780   DCHECK_EQ(alignment % 16, 0);
781   const int align_mask = (alignment / kWRegSize) - 1;
782 
783   // Make room for stack locals.
784   static constexpr int kWRegPerXReg = kXRegSize / kWRegSize;
785   DCHECK_EQ(kNumberOfStackLocals * kWRegPerXReg,
786             ((kNumberOfStackLocals * kWRegPerXReg) + align_mask) & ~align_mask);
787   __ Claim(kNumberOfStackLocals * kWRegPerXReg);
788 
789   // Initialize backtrack stack pointer. It must not be clobbered from here on.
790   // Note the backtrack_stackpointer is callee-saved.
791   STATIC_ASSERT(backtrack_stackpointer() == x23);
792   LoadRegExpStackPointerFromMemory(backtrack_stackpointer());
793 
794   // Store the regexp base pointer - we'll later restore it / write it to
795   // memory when returning from this irregexp code object.
796   PushRegExpBasePointer(backtrack_stackpointer(), x11);
797 
798   // Set the number of registers we will need to allocate, that is:
799   //   - (num_registers_ - kNumCachedRegisters) (W registers)
800   const int num_stack_registers =
801       std::max(0, num_registers_ - kNumCachedRegisters);
802   const int num_wreg_to_allocate =
803       (num_stack_registers + align_mask) & ~align_mask;
804 
805   {
806     // Check if we have space on the stack.
807     Label stack_limit_hit, stack_ok;
808 
809     ExternalReference stack_limit =
810         ExternalReference::address_of_jslimit(isolate());
811     __ Mov(x10, stack_limit);
812     __ Ldr(x10, MemOperand(x10));
813     __ Subs(x10, sp, x10);
814 
815     // Handle it if the stack pointer is already below the stack limit.
816     __ B(ls, &stack_limit_hit);
817 
818     // Check if there is room for the variable number of registers above
819     // the stack limit.
820     __ Cmp(x10, num_wreg_to_allocate * kWRegSize);
821     __ B(hs, &stack_ok);
822 
823     // Exit with OutOfMemory exception. There is not enough space on the stack
824     // for our working registers.
825     __ Mov(w0, EXCEPTION);
826     __ B(&return_w0);
827 
828     __ Bind(&stack_limit_hit);
829     CallCheckStackGuardState(x10);
830     // If returned value is non-zero, we exit with the returned value as result.
831     __ Cbnz(w0, &return_w0);
832 
833     __ Bind(&stack_ok);
834   }
835 
836   // Allocate space on stack.
837   __ Claim(num_wreg_to_allocate, kWRegSize);
838 
839   // Initialize success_counter and kBacktrackCount with 0.
840   __ Str(wzr, MemOperand(frame_pointer(), kSuccessCounter));
841   __ Str(wzr, MemOperand(frame_pointer(), kBacktrackCount));
842 
843   // Find negative length (offset of start relative to end).
844   __ Sub(x10, input_start(), input_end());
845   if (FLAG_debug_code) {
846     // Check that the size of the input string chars is in range.
847     __ Neg(x11, x10);
848     __ Cmp(x11, SeqTwoByteString::kMaxCharsSize);
849     __ Check(ls, AbortReason::kInputStringTooLong);
850   }
851   __ Mov(current_input_offset(), w10);
852 
853   // The non-position value is used as a clearing value for the
854   // capture registers, it corresponds to the position of the first character
855   // minus one.
856   __ Sub(string_start_minus_one(), current_input_offset(), char_size());
857   __ Sub(string_start_minus_one(), string_start_minus_one(),
858          Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0));
859   // We can store this value twice in an X register for initializing
860   // on-stack registers later.
861   __ Orr(twice_non_position_value(), string_start_minus_one().X(),
862          Operand(string_start_minus_one().X(), LSL, kWRegSizeInBits));
863 
864   // Initialize code pointer register.
865   __ Mov(code_pointer(), Operand(masm_->CodeObject()));
866 
867   Label load_char_start_regexp;
868   {
869     Label start_regexp;
870     // Load newline if index is at start, previous character otherwise.
871     __ Cbnz(start_offset(), &load_char_start_regexp);
872     __ Mov(current_character(), '\n');
873     __ B(&start_regexp);
874 
875     // Global regexp restarts matching here.
876     __ Bind(&load_char_start_regexp);
877     // Load previous char as initial value of current character register.
878     LoadCurrentCharacterUnchecked(-1, 1);
879     __ Bind(&start_regexp);
880   }
881 
882   // Initialize on-stack registers.
883   if (num_saved_registers_ > 0) {
884     ClearRegisters(0, num_saved_registers_ - 1);
885   }
886 
887   // Execute.
888   __ B(&start_label_);
889 
890   if (backtrack_label_.is_linked()) {
891     __ Bind(&backtrack_label_);
892     Backtrack();
893   }
894 
895   if (success_label_.is_linked()) {
896     Register first_capture_start = w15;
897 
898     // Save captures when successful.
899     __ Bind(&success_label_);
900 
901     if (num_saved_registers_ > 0) {
902       // V8 expects the output to be an int32_t array.
903       Register capture_start = w12;
904       Register capture_end = w13;
905       Register input_length = w14;
906 
907       // Copy captures to output.
908 
909       // Get string length.
910       __ Sub(x10, input_end(), input_start());
911       if (FLAG_debug_code) {
912         // Check that the size of the input string chars is in range.
913         __ Cmp(x10, SeqTwoByteString::kMaxCharsSize);
914         __ Check(ls, AbortReason::kInputStringTooLong);
915       }
916       // input_start has a start_offset offset on entry. We need to include
917       // it when computing the length of the whole string.
918       if (mode_ == UC16) {
919         __ Add(input_length, start_offset(), Operand(w10, LSR, 1));
920       } else {
921         __ Add(input_length, start_offset(), w10);
922       }
923 
924       // Copy the results to the output array from the cached registers first.
925       for (int i = 0;
926            (i < num_saved_registers_) && (i < kNumCachedRegisters);
927            i += 2) {
928         __ Mov(capture_start.X(), GetCachedRegister(i));
929         __ Lsr(capture_end.X(), capture_start.X(), kWRegSizeInBits);
930         if ((i == 0) && global_with_zero_length_check()) {
931           // Keep capture start for the zero-length check later.
932           __ Mov(first_capture_start, capture_start);
933         }
934         // Offsets need to be relative to the start of the string.
935         if (mode_ == UC16) {
936           __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
937           __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
938         } else {
939           __ Add(capture_start, input_length, capture_start);
940           __ Add(capture_end, input_length, capture_end);
941         }
942         // The output pointer advances for a possible global match.
943         __ Stp(capture_start, capture_end,
944                MemOperand(output_array(), kSystemPointerSize, PostIndex));
945       }
946 
947       // Only carry on if there are more than kNumCachedRegisters capture
948       // registers.
949       int num_registers_left_on_stack =
950           num_saved_registers_ - kNumCachedRegisters;
951       if (num_registers_left_on_stack > 0) {
952         Register base = x10;
953         // There are always an even number of capture registers. A couple of
954         // registers determine one match with two offsets.
955         DCHECK_EQ(0, num_registers_left_on_stack % 2);
956         __ Add(base, frame_pointer(), kFirstCaptureOnStack);
957 
958         // We can unroll the loop here, we should not unroll for less than 2
959         // registers.
960         STATIC_ASSERT(kNumRegistersToUnroll > 2);
961         if (num_registers_left_on_stack <= kNumRegistersToUnroll) {
962           for (int i = 0; i < num_registers_left_on_stack / 2; i++) {
963             __ Ldp(capture_end, capture_start,
964                    MemOperand(base, -kSystemPointerSize, PostIndex));
965             if ((i == 0) && global_with_zero_length_check()) {
966               // Keep capture start for the zero-length check later.
967               __ Mov(first_capture_start, capture_start);
968             }
969             // Offsets need to be relative to the start of the string.
970             if (mode_ == UC16) {
971               __ Add(capture_start,
972                      input_length,
973                      Operand(capture_start, ASR, 1));
974               __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
975             } else {
976               __ Add(capture_start, input_length, capture_start);
977               __ Add(capture_end, input_length, capture_end);
978             }
979             // The output pointer advances for a possible global match.
980             __ Stp(capture_start, capture_end,
981                    MemOperand(output_array(), kSystemPointerSize, PostIndex));
982           }
983         } else {
984           Label loop, start;
985           __ Mov(x11, num_registers_left_on_stack);
986 
987           __ Ldp(capture_end, capture_start,
988                  MemOperand(base, -kSystemPointerSize, PostIndex));
989           if (global_with_zero_length_check()) {
990             __ Mov(first_capture_start, capture_start);
991           }
992           __ B(&start);
993 
994           __ Bind(&loop);
995           __ Ldp(capture_end, capture_start,
996                  MemOperand(base, -kSystemPointerSize, PostIndex));
997           __ Bind(&start);
998           if (mode_ == UC16) {
999             __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
1000             __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
1001           } else {
1002             __ Add(capture_start, input_length, capture_start);
1003             __ Add(capture_end, input_length, capture_end);
1004           }
1005           // The output pointer advances for a possible global match.
1006           __ Stp(capture_start, capture_end,
1007                  MemOperand(output_array(), kSystemPointerSize, PostIndex));
1008           __ Sub(x11, x11, 2);
1009           __ Cbnz(x11, &loop);
1010         }
1011       }
1012     }
1013 
1014     if (global()) {
1015       Register success_counter = w0;
1016       Register output_size = x10;
1017       // Restart matching if the regular expression is flagged as global.
1018 
1019       // Increment success counter.
1020       __ Ldr(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
1021       __ Add(success_counter, success_counter, 1);
1022       __ Str(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
1023 
1024       // Capture results have been stored, so the number of remaining global
1025       // output registers is reduced by the number of stored captures.
1026       __ Ldr(output_size, MemOperand(frame_pointer(), kOutputSize));
1027       __ Sub(output_size, output_size, num_saved_registers_);
1028       // Check whether we have enough room for another set of capture results.
1029       __ Cmp(output_size, num_saved_registers_);
1030       __ B(lt, &return_w0);
1031 
1032       // The output pointer is already set to the next field in the output
1033       // array.
1034       // Update output size on the frame before we restart matching.
1035       __ Str(output_size, MemOperand(frame_pointer(), kOutputSize));
1036 
1037       // Restore the original regexp stack pointer value (effectively, pop the
1038       // stored base pointer).
1039       PopRegExpBasePointer(backtrack_stackpointer(), x11);
1040 
1041       if (global_with_zero_length_check()) {
1042         // Special case for zero-length matches.
1043         __ Cmp(current_input_offset(), first_capture_start);
1044         // Not a zero-length match, restart.
1045         __ B(ne, &load_char_start_regexp);
1046         // Offset from the end is zero if we already reached the end.
1047         __ Cbz(current_input_offset(), &return_w0);
1048         // Advance current position after a zero-length match.
1049         Label advance;
1050         __ bind(&advance);
1051         __ Add(current_input_offset(), current_input_offset(),
1052                Operand((mode_ == UC16) ? 2 : 1));
1053         if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
1054       }
1055 
1056       __ B(&load_char_start_regexp);
1057     } else {
1058       __ Mov(w0, SUCCESS);
1059     }
1060   }
1061 
1062   if (exit_label_.is_linked()) {
1063     // Exit and return w0.
1064     __ Bind(&exit_label_);
1065     if (global()) {
1066       __ Ldr(w0, MemOperand(frame_pointer(), kSuccessCounter));
1067     }
1068   }
1069 
1070   __ Bind(&return_w0);
1071   // Restore the original regexp stack pointer value (effectively, pop the
1072   // stored base pointer).
1073   PopRegExpBasePointer(backtrack_stackpointer(), x11);
1074 
1075   // Set stack pointer back to first register to retain.
1076   __ Mov(sp, fp);
1077   __ Pop<TurboAssembler::kAuthLR>(fp, lr);
1078 
1079   // Restore registers.
1080   __ PopCPURegList<TurboAssembler::kDontLoadLR>(registers_to_retain);
1081 
1082   __ Ret();
1083 
1084   Label exit_with_exception;
1085   // Registers x0 to x7 are used to store the first captures, they need to be
1086   // retained over calls to C++ code.
1087   CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
1088   DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
1089 
1090   if (check_preempt_label_.is_linked()) {
1091     __ Bind(&check_preempt_label_);
1092 
1093     StoreRegExpStackPointerToMemory(backtrack_stackpointer(), x10);
1094 
1095     SaveLinkRegister();
1096     // The cached registers need to be retained.
1097     __ PushCPURegList(cached_registers);
1098     CallCheckStackGuardState(x10);
1099     // Returning from the regexp code restores the stack (sp <- fp)
1100     // so we don't need to drop the link register from it before exiting.
1101     __ Cbnz(w0, &return_w0);
1102     // Reset the cached registers.
1103     __ PopCPURegList(cached_registers);
1104 
1105     LoadRegExpStackPointerFromMemory(backtrack_stackpointer());
1106 
1107     RestoreLinkRegister();
1108     __ Ret();
1109   }
1110 
1111   if (stack_overflow_label_.is_linked()) {
1112     __ Bind(&stack_overflow_label_);
1113 
1114     StoreRegExpStackPointerToMemory(backtrack_stackpointer(), x10);
1115 
1116     SaveLinkRegister();
1117     // The cached registers need to be retained.
1118     __ PushCPURegList(cached_registers);
1119     // Call GrowStack(isolate)
1120     static constexpr int kNumArguments = 1;
1121     __ Mov(x0, ExternalReference::isolate_address(isolate()));
1122     __ CallCFunction(ExternalReference::re_grow_stack(isolate()),
1123                      kNumArguments);
1124     // If return nullptr, we have failed to grow the stack, and must exit with
1125     // a stack-overflow exception.  Returning from the regexp code restores the
1126     // stack (sp <- fp) so we don't need to drop the link register from it
1127     // before exiting.
1128     __ Cbz(w0, &exit_with_exception);
1129     // Otherwise use return value as new stack pointer.
1130     __ Mov(backtrack_stackpointer(), x0);
1131     // Reset the cached registers.
1132     __ PopCPURegList(cached_registers);
1133     RestoreLinkRegister();
1134     __ Ret();
1135   }
1136 
1137   if (exit_with_exception.is_linked()) {
1138     __ Bind(&exit_with_exception);
1139     __ Mov(w0, EXCEPTION);
1140     __ B(&return_w0);
1141   }
1142 
1143   if (fallback_label_.is_linked()) {
1144     __ Bind(&fallback_label_);
1145     __ Mov(w0, FALLBACK_TO_EXPERIMENTAL);
1146     __ B(&return_w0);
1147   }
1148 
1149   CodeDesc code_desc;
1150   masm_->GetCode(isolate(), &code_desc);
1151   Handle<Code> code =
1152       Factory::CodeBuilder(isolate(), code_desc, CodeKind::REGEXP)
1153           .set_self_reference(masm_->CodeObject())
1154           .Build();
1155   PROFILE(masm_->isolate(),
1156           RegExpCodeCreateEvent(Handle<AbstractCode>::cast(code), source));
1157   return Handle<HeapObject>::cast(code);
1158 }
1159 
1160 
GoTo(Label * to)1161 void RegExpMacroAssemblerARM64::GoTo(Label* to) {
1162   BranchOrBacktrack(al, to);
1163 }
1164 
IfRegisterGE(int reg,int comparand,Label * if_ge)1165 void RegExpMacroAssemblerARM64::IfRegisterGE(int reg, int comparand,
1166                                              Label* if_ge) {
1167   Register to_compare = GetRegister(reg, w10);
1168   CompareAndBranchOrBacktrack(to_compare, comparand, ge, if_ge);
1169 }
1170 
1171 
IfRegisterLT(int reg,int comparand,Label * if_lt)1172 void RegExpMacroAssemblerARM64::IfRegisterLT(int reg, int comparand,
1173                                              Label* if_lt) {
1174   Register to_compare = GetRegister(reg, w10);
1175   CompareAndBranchOrBacktrack(to_compare, comparand, lt, if_lt);
1176 }
1177 
1178 
IfRegisterEqPos(int reg,Label * if_eq)1179 void RegExpMacroAssemblerARM64::IfRegisterEqPos(int reg, Label* if_eq) {
1180   Register to_compare = GetRegister(reg, w10);
1181   __ Cmp(to_compare, current_input_offset());
1182   BranchOrBacktrack(eq, if_eq);
1183 }
1184 
1185 RegExpMacroAssembler::IrregexpImplementation
Implementation()1186     RegExpMacroAssemblerARM64::Implementation() {
1187   return kARM64Implementation;
1188 }
1189 
1190 
PopCurrentPosition()1191 void RegExpMacroAssemblerARM64::PopCurrentPosition() {
1192   Pop(current_input_offset());
1193 }
1194 
1195 
PopRegister(int register_index)1196 void RegExpMacroAssemblerARM64::PopRegister(int register_index) {
1197   Pop(w10);
1198   StoreRegister(register_index, w10);
1199 }
1200 
1201 
PushBacktrack(Label * label)1202 void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) {
1203   if (label->is_bound()) {
1204     int target = label->pos();
1205     __ Mov(w10, target + Code::kHeaderSize - kHeapObjectTag);
1206   } else {
1207     __ Adr(x10, label, MacroAssembler::kAdrFar);
1208     __ Sub(x10, x10, code_pointer());
1209     if (FLAG_debug_code) {
1210       __ Cmp(x10, kWRegMask);
1211       // The code offset has to fit in a W register.
1212       __ Check(ls, AbortReason::kOffsetOutOfRange);
1213     }
1214   }
1215   Push(w10);
1216   CheckStackLimit();
1217 }
1218 
1219 
PushCurrentPosition()1220 void RegExpMacroAssemblerARM64::PushCurrentPosition() {
1221   Push(current_input_offset());
1222 }
1223 
1224 
PushRegister(int register_index,StackCheckFlag check_stack_limit)1225 void RegExpMacroAssemblerARM64::PushRegister(int register_index,
1226                                              StackCheckFlag check_stack_limit) {
1227   Register to_push = GetRegister(register_index, w10);
1228   Push(to_push);
1229   if (check_stack_limit) CheckStackLimit();
1230 }
1231 
1232 
ReadCurrentPositionFromRegister(int reg)1233 void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) {
1234   RegisterState register_state = GetRegisterState(reg);
1235   switch (register_state) {
1236     case STACKED:
1237       __ Ldr(current_input_offset(), register_location(reg));
1238       break;
1239     case CACHED_LSW:
1240       __ Mov(current_input_offset(), GetCachedRegister(reg).W());
1241       break;
1242     case CACHED_MSW:
1243       __ Lsr(current_input_offset().X(), GetCachedRegister(reg),
1244              kWRegSizeInBits);
1245       break;
1246     default:
1247       UNREACHABLE();
1248   }
1249 }
1250 
WriteStackPointerToRegister(int reg)1251 void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) {
1252   ExternalReference ref =
1253       ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
1254   __ Mov(x10, ref);
1255   __ Ldr(x10, MemOperand(x10));
1256   __ Sub(x10, backtrack_stackpointer(), x10);
1257   if (FLAG_debug_code) {
1258     __ Cmp(x10, Operand(w10, SXTW));
1259     // The stack offset needs to fit in a W register.
1260     __ Check(eq, AbortReason::kOffsetOutOfRange);
1261   }
1262   StoreRegister(reg, w10);
1263 }
1264 
ReadStackPointerFromRegister(int reg)1265 void RegExpMacroAssemblerARM64::ReadStackPointerFromRegister(int reg) {
1266   ExternalReference ref =
1267       ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
1268   Register read_from = GetRegister(reg, w10);
1269   __ Mov(x11, ref);
1270   __ Ldr(x11, MemOperand(x11));
1271   __ Add(backtrack_stackpointer(), x11, Operand(read_from, SXTW));
1272 }
1273 
SetCurrentPositionFromEnd(int by)1274 void RegExpMacroAssemblerARM64::SetCurrentPositionFromEnd(int by) {
1275   Label after_position;
1276   __ Cmp(current_input_offset(), -by * char_size());
1277   __ B(ge, &after_position);
1278   __ Mov(current_input_offset(), -by * char_size());
1279   // On RegExp code entry (where this operation is used), the character before
1280   // the current position is expected to be already loaded.
1281   // We have advanced the position, so it's safe to read backwards.
1282   LoadCurrentCharacterUnchecked(-1, 1);
1283   __ Bind(&after_position);
1284 }
1285 
1286 
SetRegister(int register_index,int to)1287 void RegExpMacroAssemblerARM64::SetRegister(int register_index, int to) {
1288   DCHECK(register_index >= num_saved_registers_);  // Reserved for positions!
1289   Register set_to = wzr;
1290   if (to != 0) {
1291     set_to = w10;
1292     __ Mov(set_to, to);
1293   }
1294   StoreRegister(register_index, set_to);
1295 }
1296 
1297 
Succeed()1298 bool RegExpMacroAssemblerARM64::Succeed() {
1299   __ B(&success_label_);
1300   return global();
1301 }
1302 
1303 
WriteCurrentPositionToRegister(int reg,int cp_offset)1304 void RegExpMacroAssemblerARM64::WriteCurrentPositionToRegister(int reg,
1305                                                                int cp_offset) {
1306   Register position = current_input_offset();
1307   if (cp_offset != 0) {
1308     position = w10;
1309     __ Add(position, current_input_offset(), cp_offset * char_size());
1310   }
1311   StoreRegister(reg, position);
1312 }
1313 
1314 
ClearRegisters(int reg_from,int reg_to)1315 void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) {
1316   DCHECK(reg_from <= reg_to);
1317   int num_registers = reg_to - reg_from + 1;
1318 
1319   // If the first capture register is cached in a hardware register but not
1320   // aligned on a 64-bit one, we need to clear the first one specifically.
1321   if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) {
1322     StoreRegister(reg_from, string_start_minus_one());
1323     num_registers--;
1324     reg_from++;
1325   }
1326 
1327   // Clear cached registers in pairs as far as possible.
1328   while ((num_registers >= 2) && (reg_from < kNumCachedRegisters)) {
1329     DCHECK(GetRegisterState(reg_from) == CACHED_LSW);
1330     __ Mov(GetCachedRegister(reg_from), twice_non_position_value());
1331     reg_from += 2;
1332     num_registers -= 2;
1333   }
1334 
1335   if ((num_registers % 2) == 1) {
1336     StoreRegister(reg_from, string_start_minus_one());
1337     num_registers--;
1338     reg_from++;
1339   }
1340 
1341   if (num_registers > 0) {
1342     // If there are some remaining registers, they are stored on the stack.
1343     DCHECK_LE(kNumCachedRegisters, reg_from);
1344 
1345     // Move down the indexes of the registers on stack to get the correct offset
1346     // in memory.
1347     reg_from -= kNumCachedRegisters;
1348     reg_to -= kNumCachedRegisters;
1349     // We should not unroll the loop for less than 2 registers.
1350     STATIC_ASSERT(kNumRegistersToUnroll > 2);
1351     // We position the base pointer to (reg_from + 1).
1352     int base_offset = kFirstRegisterOnStack -
1353         kWRegSize - (kWRegSize * reg_from);
1354     if (num_registers > kNumRegistersToUnroll) {
1355       Register base = x10;
1356       __ Add(base, frame_pointer(), base_offset);
1357 
1358       Label loop;
1359       __ Mov(x11, num_registers);
1360       __ Bind(&loop);
1361       __ Str(twice_non_position_value(),
1362              MemOperand(base, -kSystemPointerSize, PostIndex));
1363       __ Sub(x11, x11, 2);
1364       __ Cbnz(x11, &loop);
1365     } else {
1366       for (int i = reg_from; i <= reg_to; i += 2) {
1367         __ Str(twice_non_position_value(),
1368                MemOperand(frame_pointer(), base_offset));
1369         base_offset -= kWRegSize * 2;
1370       }
1371     }
1372   }
1373 }
1374 
1375 // Helper function for reading a value out of a stack frame.
1376 template <typename T>
frame_entry(Address re_frame,int frame_offset)1377 static T& frame_entry(Address re_frame, int frame_offset) {
1378   return *reinterpret_cast<T*>(re_frame + frame_offset);
1379 }
1380 
1381 
1382 template <typename T>
frame_entry_address(Address re_frame,int frame_offset)1383 static T* frame_entry_address(Address re_frame, int frame_offset) {
1384   return reinterpret_cast<T*>(re_frame + frame_offset);
1385 }
1386 
CheckStackGuardState(Address * return_address,Address raw_code,Address re_frame,int start_index,const byte ** input_start,const byte ** input_end)1387 int RegExpMacroAssemblerARM64::CheckStackGuardState(
1388     Address* return_address, Address raw_code, Address re_frame,
1389     int start_index, const byte** input_start, const byte** input_end) {
1390   Code re_code = Code::cast(Object(raw_code));
1391   return NativeRegExpMacroAssembler::CheckStackGuardState(
1392       frame_entry<Isolate*>(re_frame, kIsolate), start_index,
1393       static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
1394       return_address, re_code, frame_entry_address<Address>(re_frame, kInput),
1395       input_start, input_end);
1396 }
1397 
1398 
CheckPosition(int cp_offset,Label * on_outside_input)1399 void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset,
1400                                               Label* on_outside_input) {
1401   if (cp_offset >= 0) {
1402     CompareAndBranchOrBacktrack(current_input_offset(),
1403                                 -cp_offset * char_size(), ge, on_outside_input);
1404   } else {
1405     __ Add(w12, current_input_offset(), Operand(cp_offset * char_size()));
1406     __ Cmp(w12, string_start_minus_one());
1407     BranchOrBacktrack(le, on_outside_input);
1408   }
1409 }
1410 
1411 
1412 // Private methods:
1413 
CallCheckStackGuardState(Register scratch)1414 void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) {
1415   DCHECK(!isolate()->IsGeneratingEmbeddedBuiltins());
1416   DCHECK(!masm_->options().isolate_independent_code);
1417 
1418   // Allocate space on the stack to store the return address. The
1419   // CheckStackGuardState C++ function will override it if the code
1420   // moved. Allocate extra space for 2 arguments passed by pointers.
1421   // AAPCS64 requires the stack to be 16 byte aligned.
1422   int alignment = masm_->ActivationFrameAlignment();
1423   DCHECK_EQ(alignment % 16, 0);
1424   int align_mask = (alignment / kXRegSize) - 1;
1425   int xreg_to_claim = (3 + align_mask) & ~align_mask;
1426 
1427   __ Claim(xreg_to_claim);
1428 
1429   // CheckStackGuardState needs the end and start addresses of the input string.
1430   __ Poke(input_end(), 2 * kSystemPointerSize);
1431   __ Add(x5, sp, 2 * kSystemPointerSize);
1432   __ Poke(input_start(), kSystemPointerSize);
1433   __ Add(x4, sp, kSystemPointerSize);
1434 
1435   __ Mov(w3, start_offset());
1436   // RegExp code frame pointer.
1437   __ Mov(x2, frame_pointer());
1438   // Code of self.
1439   __ Mov(x1, Operand(masm_->CodeObject()));
1440 
1441   // We need to pass a pointer to the return address as first argument.
1442   // DirectCEntry will place the return address on the stack before calling so
1443   // the stack pointer will point to it.
1444   __ Mov(x0, sp);
1445 
1446   DCHECK_EQ(scratch, x10);
1447   ExternalReference check_stack_guard_state =
1448       ExternalReference::re_check_stack_guard_state(isolate());
1449   __ Mov(scratch, check_stack_guard_state);
1450 
1451   __ CallBuiltin(Builtin::kDirectCEntry);
1452 
1453   // The input string may have been moved in memory, we need to reload it.
1454   __ Peek(input_start(), kSystemPointerSize);
1455   __ Peek(input_end(), 2 * kSystemPointerSize);
1456 
1457   __ Drop(xreg_to_claim);
1458 
1459   // Reload the Code pointer.
1460   __ Mov(code_pointer(), Operand(masm_->CodeObject()));
1461 }
1462 
BranchOrBacktrack(Condition condition,Label * to)1463 void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition,
1464                                                   Label* to) {
1465   if (condition == al) {  // Unconditional.
1466     if (to == nullptr) {
1467       Backtrack();
1468       return;
1469     }
1470     __ B(to);
1471     return;
1472   }
1473   if (to == nullptr) {
1474     to = &backtrack_label_;
1475   }
1476   __ B(condition, to);
1477 }
1478 
CompareAndBranchOrBacktrack(Register reg,int immediate,Condition condition,Label * to)1479 void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg,
1480                                                             int immediate,
1481                                                             Condition condition,
1482                                                             Label* to) {
1483   if ((immediate == 0) && ((condition == eq) || (condition == ne))) {
1484     if (to == nullptr) {
1485       to = &backtrack_label_;
1486     }
1487     if (condition == eq) {
1488       __ Cbz(reg, to);
1489     } else {
1490       __ Cbnz(reg, to);
1491     }
1492   } else {
1493     __ Cmp(reg, immediate);
1494     BranchOrBacktrack(condition, to);
1495   }
1496 }
1497 
1498 
CheckPreemption()1499 void RegExpMacroAssemblerARM64::CheckPreemption() {
1500   // Check for preemption.
1501   ExternalReference stack_limit =
1502       ExternalReference::address_of_jslimit(isolate());
1503   __ Mov(x10, stack_limit);
1504   __ Ldr(x10, MemOperand(x10));
1505   __ Cmp(sp, x10);
1506   CallIf(&check_preempt_label_, ls);
1507 }
1508 
1509 
CheckStackLimit()1510 void RegExpMacroAssemblerARM64::CheckStackLimit() {
1511   ExternalReference stack_limit =
1512       ExternalReference::address_of_regexp_stack_limit_address(isolate());
1513   __ Mov(x10, stack_limit);
1514   __ Ldr(x10, MemOperand(x10));
1515   __ Cmp(backtrack_stackpointer(), x10);
1516   CallIf(&stack_overflow_label_, ls);
1517 }
1518 
1519 
Push(Register source)1520 void RegExpMacroAssemblerARM64::Push(Register source) {
1521   DCHECK(source.Is32Bits());
1522   DCHECK_NE(source, backtrack_stackpointer());
1523   __ Str(source,
1524          MemOperand(backtrack_stackpointer(),
1525                     -static_cast<int>(kWRegSize),
1526                     PreIndex));
1527 }
1528 
1529 
Pop(Register target)1530 void RegExpMacroAssemblerARM64::Pop(Register target) {
1531   DCHECK(target.Is32Bits());
1532   DCHECK_NE(target, backtrack_stackpointer());
1533   __ Ldr(target,
1534          MemOperand(backtrack_stackpointer(), kWRegSize, PostIndex));
1535 }
1536 
1537 
GetCachedRegister(int register_index)1538 Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) {
1539   DCHECK_GT(kNumCachedRegisters, register_index);
1540   return Register::Create(register_index / 2, kXRegSizeInBits);
1541 }
1542 
1543 
GetRegister(int register_index,Register maybe_result)1544 Register RegExpMacroAssemblerARM64::GetRegister(int register_index,
1545                                                 Register maybe_result) {
1546   DCHECK(maybe_result.Is32Bits());
1547   DCHECK_LE(0, register_index);
1548   if (num_registers_ <= register_index) {
1549     num_registers_ = register_index + 1;
1550   }
1551   Register result = NoReg;
1552   RegisterState register_state = GetRegisterState(register_index);
1553   switch (register_state) {
1554     case STACKED:
1555       __ Ldr(maybe_result, register_location(register_index));
1556       result = maybe_result;
1557       break;
1558     case CACHED_LSW:
1559       result = GetCachedRegister(register_index).W();
1560       break;
1561     case CACHED_MSW:
1562       __ Lsr(maybe_result.X(), GetCachedRegister(register_index),
1563              kWRegSizeInBits);
1564       result = maybe_result;
1565       break;
1566     default:
1567       UNREACHABLE();
1568   }
1569   DCHECK(result.Is32Bits());
1570   return result;
1571 }
1572 
1573 
StoreRegister(int register_index,Register source)1574 void RegExpMacroAssemblerARM64::StoreRegister(int register_index,
1575                                               Register source) {
1576   DCHECK(source.Is32Bits());
1577   DCHECK_LE(0, register_index);
1578   if (num_registers_ <= register_index) {
1579     num_registers_ = register_index + 1;
1580   }
1581 
1582   RegisterState register_state = GetRegisterState(register_index);
1583   switch (register_state) {
1584     case STACKED:
1585       __ Str(source, register_location(register_index));
1586       break;
1587     case CACHED_LSW: {
1588       Register cached_register = GetCachedRegister(register_index);
1589       if (source != cached_register.W()) {
1590         __ Bfi(cached_register, source.X(), 0, kWRegSizeInBits);
1591       }
1592       break;
1593     }
1594     case CACHED_MSW: {
1595       Register cached_register = GetCachedRegister(register_index);
1596       __ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits);
1597       break;
1598     }
1599     default:
1600       UNREACHABLE();
1601   }
1602 }
1603 
1604 
CallIf(Label * to,Condition condition)1605 void RegExpMacroAssemblerARM64::CallIf(Label* to, Condition condition) {
1606   Label skip_call;
1607   if (condition != al) __ B(&skip_call, NegateCondition(condition));
1608   __ Bl(to);
1609   __ Bind(&skip_call);
1610 }
1611 
1612 
RestoreLinkRegister()1613 void RegExpMacroAssemblerARM64::RestoreLinkRegister() {
1614   // TODO(v8:10026): Remove when we stop compacting for code objects that are
1615   // active on the call stack.
1616   __ Pop<TurboAssembler::kAuthLR>(padreg, lr);
1617   __ Add(lr, lr, Operand(masm_->CodeObject()));
1618 }
1619 
1620 
SaveLinkRegister()1621 void RegExpMacroAssemblerARM64::SaveLinkRegister() {
1622   __ Sub(lr, lr, Operand(masm_->CodeObject()));
1623   __ Push<TurboAssembler::kSignLR>(lr, padreg);
1624 }
1625 
1626 
register_location(int register_index)1627 MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) {
1628   DCHECK(register_index < (1<<30));
1629   DCHECK_LE(kNumCachedRegisters, register_index);
1630   if (num_registers_ <= register_index) {
1631     num_registers_ = register_index + 1;
1632   }
1633   register_index -= kNumCachedRegisters;
1634   int offset = kFirstRegisterOnStack - register_index * kWRegSize;
1635   return MemOperand(frame_pointer(), offset);
1636 }
1637 
capture_location(int register_index,Register scratch)1638 MemOperand RegExpMacroAssemblerARM64::capture_location(int register_index,
1639                                                      Register scratch) {
1640   DCHECK(register_index < (1<<30));
1641   DCHECK(register_index < num_saved_registers_);
1642   DCHECK_LE(kNumCachedRegisters, register_index);
1643   DCHECK_EQ(register_index % 2, 0);
1644   register_index -= kNumCachedRegisters;
1645   int offset = kFirstCaptureOnStack - register_index * kWRegSize;
1646   // capture_location is used with Stp instructions to load/store 2 registers.
1647   // The immediate field in the encoding is limited to 7 bits (signed).
1648   if (is_int7(offset)) {
1649     return MemOperand(frame_pointer(), offset);
1650   } else {
1651     __ Add(scratch, frame_pointer(), offset);
1652     return MemOperand(scratch);
1653   }
1654 }
1655 
LoadCurrentCharacterUnchecked(int cp_offset,int characters)1656 void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset,
1657                                                               int characters) {
1658   Register offset = current_input_offset();
1659 
1660   // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU
1661   // and the operating system running on the target allow it.
1662   // If unaligned load/stores are not supported then this function must only
1663   // be used to load a single character at a time.
1664 
1665   // ARMv8 supports unaligned accesses but V8 or the kernel can decide to
1666   // disable it.
1667   // TODO(pielan): See whether or not we should disable unaligned accesses.
1668   if (!CanReadUnaligned()) {
1669     DCHECK_EQ(1, characters);
1670   }
1671 
1672   if (cp_offset != 0) {
1673     if (FLAG_debug_code) {
1674       __ Mov(x10, cp_offset * char_size());
1675       __ Add(x10, x10, Operand(current_input_offset(), SXTW));
1676       __ Cmp(x10, Operand(w10, SXTW));
1677       // The offset needs to fit in a W register.
1678       __ Check(eq, AbortReason::kOffsetOutOfRange);
1679     } else {
1680       __ Add(w10, current_input_offset(), cp_offset * char_size());
1681     }
1682     offset = w10;
1683   }
1684 
1685   if (mode_ == LATIN1) {
1686     if (characters == 4) {
1687       __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1688     } else if (characters == 2) {
1689       __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1690     } else {
1691       DCHECK_EQ(1, characters);
1692       __ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW));
1693     }
1694   } else {
1695     DCHECK(mode_ == UC16);
1696     if (characters == 2) {
1697       __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1698     } else {
1699       DCHECK_EQ(1, characters);
1700       __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1701     }
1702   }
1703 }
1704 
1705 }  // namespace internal
1706 }  // namespace v8
1707 
1708 #undef __
1709 
1710 #endif  // V8_TARGET_ARCH_ARM64
1711