1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #if V8_TARGET_ARCH_ARM64
6
7 #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
8
9 #include "src/codegen/arm64/macro-assembler-arm64-inl.h"
10 #include "src/codegen/macro-assembler.h"
11 #include "src/logging/log.h"
12 #include "src/objects/objects-inl.h"
13 #include "src/regexp/regexp-macro-assembler.h"
14 #include "src/regexp/regexp-stack.h"
15 #include "src/snapshot/embedded/embedded-data.h"
16 #include "src/strings/unicode.h"
17
18 namespace v8 {
19 namespace internal {
20
21 /*
22 * This assembler uses the following register assignment convention:
23 * - w19 : Used to temporarely store a value before a call to C code.
24 * See CheckNotBackReferenceIgnoreCase.
25 * - x20 : Pointer to the current Code object,
26 * it includes the heap object tag.
27 * - w21 : Current position in input, as negative offset from
28 * the end of the string. Please notice that this is
29 * the byte offset, not the character offset!
30 * - w22 : Currently loaded character. Must be loaded using
31 * LoadCurrentCharacter before using any of the dispatch methods.
32 * - x23 : Points to tip of backtrack stack.
33 * - w24 : Position of the first character minus one: non_position_value.
34 * Used to initialize capture registers.
35 * - x25 : Address at the end of the input string: input_end.
36 * Points to byte after last character in input.
37 * - x26 : Address at the start of the input string: input_start.
38 * - w27 : Where to start in the input string.
39 * - x28 : Output array pointer.
40 * - x29/fp : Frame pointer. Used to access arguments, local variables and
41 * RegExp registers.
42 * - x16/x17 : IP registers, used by assembler. Very volatile.
43 * - sp : Points to tip of C stack.
44 *
45 * - x0-x7 : Used as a cache to store 32 bit capture registers. These
46 * registers need to be retained every time a call to C code
47 * is done.
48 *
49 * The remaining registers are free for computations.
50 * Each call to a public method should retain this convention.
51 *
52 * The stack will have the following structure:
53 *
54 * Location Name Description
55 * (as referred to
56 * in the code)
57 *
58 * - fp[104] Address regexp Address of the JSRegExp object. Unused in
59 * native code, passed to match signature of
60 * the interpreter.
61 * - fp[96] isolate Address of the current isolate.
62 * ^^^^^^^^^ sp when called ^^^^^^^^^
63 * - fp[16..88] r19-r28 Backup of CalleeSaved registers.
64 * - fp[8] lr Return from the RegExp code.
65 * - fp[0] fp Old frame pointer.
66 * ^^^^^^^^^ fp ^^^^^^^^^
67 * - fp[-8] direct_call 1 => Direct call from JavaScript code.
68 * 0 => Call through the runtime system.
69 * - fp[-16] output_size Output may fit multiple sets of matches.
70 * - fp[-24] input Handle containing the input string.
71 * - fp[-32] success_counter
72 * ^^^^^^^^^^^^^ From here and downwards we store 32 bit values ^^^^^^^^^^^^^
73 * - fp[-40] register N Capture registers initialized with
74 * - fp[-44] register N + 1 non_position_value.
75 * ... The first kNumCachedRegisters (N) registers
76 * ... are cached in x0 to x7.
77 * ... Only positions must be stored in the first
78 * - ... num_saved_registers_ registers.
79 * - ...
80 * - register N + num_registers - 1
81 * ^^^^^^^^^ sp ^^^^^^^^^
82 *
83 * The first num_saved_registers_ registers are initialized to point to
84 * "character -1" in the string (i.e., char_size() bytes before the first
85 * character of the string). The remaining registers start out as garbage.
86 *
87 * The data up to the return address must be placed there by the calling
88 * code and the remaining arguments are passed in registers, e.g. by calling the
89 * code entry as cast to a function with the signature:
90 * int (*match)(String input_string,
91 * int start_index,
92 * Address start,
93 * Address end,
94 * int* capture_output_array,
95 * int num_capture_registers,
96 * bool direct_call = false,
97 * Isolate* isolate,
98 * Address regexp);
99 * The call is performed by NativeRegExpMacroAssembler::Execute()
100 * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
101 */
102
103 #define __ ACCESS_MASM(masm_)
104
105 const int RegExpMacroAssemblerARM64::kRegExpCodeSize;
106
RegExpMacroAssemblerARM64(Isolate * isolate,Zone * zone,Mode mode,int registers_to_save)107 RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(Isolate* isolate,
108 Zone* zone, Mode mode,
109 int registers_to_save)
110 : NativeRegExpMacroAssembler(isolate, zone),
111 masm_(std::make_unique<MacroAssembler>(
112 isolate, CodeObjectRequired::kYes,
113 NewAssemblerBuffer(kRegExpCodeSize))),
114 no_root_array_scope_(masm_.get()),
115 mode_(mode),
116 num_registers_(registers_to_save),
117 num_saved_registers_(registers_to_save),
118 entry_label_(),
119 start_label_(),
120 success_label_(),
121 backtrack_label_(),
122 exit_label_() {
123 DCHECK_EQ(0, registers_to_save % 2);
124 // We can cache at most 16 W registers in x0-x7.
125 STATIC_ASSERT(kNumCachedRegisters <= 16);
126 STATIC_ASSERT((kNumCachedRegisters % 2) == 0);
127 __ CallTarget();
128
129 __ B(&entry_label_); // We'll write the entry code later.
130 __ Bind(&start_label_); // And then continue from here.
131 }
132
~RegExpMacroAssemblerARM64()133 RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() {
134 // Unuse labels in case we throw away the assembler without calling GetCode.
135 entry_label_.Unuse();
136 start_label_.Unuse();
137 success_label_.Unuse();
138 backtrack_label_.Unuse();
139 exit_label_.Unuse();
140 check_preempt_label_.Unuse();
141 stack_overflow_label_.Unuse();
142 fallback_label_.Unuse();
143 }
144
stack_limit_slack()145 int RegExpMacroAssemblerARM64::stack_limit_slack() {
146 return RegExpStack::kStackLimitSlack;
147 }
148
149
AdvanceCurrentPosition(int by)150 void RegExpMacroAssemblerARM64::AdvanceCurrentPosition(int by) {
151 if (by != 0) {
152 __ Add(current_input_offset(),
153 current_input_offset(), by * char_size());
154 }
155 }
156
157
AdvanceRegister(int reg,int by)158 void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) {
159 DCHECK((reg >= 0) && (reg < num_registers_));
160 if (by != 0) {
161 RegisterState register_state = GetRegisterState(reg);
162 switch (register_state) {
163 case STACKED:
164 __ Ldr(w10, register_location(reg));
165 __ Add(w10, w10, by);
166 __ Str(w10, register_location(reg));
167 break;
168 case CACHED_LSW: {
169 Register to_advance = GetCachedRegister(reg);
170 __ Add(to_advance, to_advance, by);
171 break;
172 }
173 case CACHED_MSW: {
174 Register to_advance = GetCachedRegister(reg);
175 // Sign-extend to int64, shift as uint64, cast back to int64.
176 __ Add(
177 to_advance, to_advance,
178 static_cast<int64_t>(static_cast<uint64_t>(static_cast<int64_t>(by))
179 << kWRegSizeInBits));
180 break;
181 }
182 default:
183 UNREACHABLE();
184 }
185 }
186 }
187
188
Backtrack()189 void RegExpMacroAssemblerARM64::Backtrack() {
190 CheckPreemption();
191 if (has_backtrack_limit()) {
192 Label next;
193 UseScratchRegisterScope temps(masm_.get());
194 Register scratch = temps.AcquireW();
195 __ Ldr(scratch, MemOperand(frame_pointer(), kBacktrackCount));
196 __ Add(scratch, scratch, 1);
197 __ Str(scratch, MemOperand(frame_pointer(), kBacktrackCount));
198 __ Cmp(scratch, Operand(backtrack_limit()));
199 __ B(ne, &next);
200
201 // Backtrack limit exceeded.
202 if (can_fallback()) {
203 __ B(&fallback_label_);
204 } else {
205 // Can't fallback, so we treat it as a failed match.
206 Fail();
207 }
208
209 __ bind(&next);
210 }
211 Pop(w10);
212 __ Add(x10, code_pointer(), Operand(w10, UXTW));
213 __ Br(x10);
214 }
215
216
Bind(Label * label)217 void RegExpMacroAssemblerARM64::Bind(Label* label) {
218 __ Bind(label);
219 }
220
BindJumpTarget(Label * label)221 void RegExpMacroAssemblerARM64::BindJumpTarget(Label* label) {
222 __ BindJumpTarget(label);
223 }
224
CheckCharacter(uint32_t c,Label * on_equal)225 void RegExpMacroAssemblerARM64::CheckCharacter(uint32_t c, Label* on_equal) {
226 CompareAndBranchOrBacktrack(current_character(), c, eq, on_equal);
227 }
228
CheckCharacterGT(base::uc16 limit,Label * on_greater)229 void RegExpMacroAssemblerARM64::CheckCharacterGT(base::uc16 limit,
230 Label* on_greater) {
231 CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater);
232 }
233
CheckAtStart(int cp_offset,Label * on_at_start)234 void RegExpMacroAssemblerARM64::CheckAtStart(int cp_offset,
235 Label* on_at_start) {
236 __ Add(w10, current_input_offset(),
237 Operand(-char_size() + cp_offset * char_size()));
238 __ Cmp(w10, string_start_minus_one());
239 BranchOrBacktrack(eq, on_at_start);
240 }
241
CheckNotAtStart(int cp_offset,Label * on_not_at_start)242 void RegExpMacroAssemblerARM64::CheckNotAtStart(int cp_offset,
243 Label* on_not_at_start) {
244 __ Add(w10, current_input_offset(),
245 Operand(-char_size() + cp_offset * char_size()));
246 __ Cmp(w10, string_start_minus_one());
247 BranchOrBacktrack(ne, on_not_at_start);
248 }
249
CheckCharacterLT(base::uc16 limit,Label * on_less)250 void RegExpMacroAssemblerARM64::CheckCharacterLT(base::uc16 limit,
251 Label* on_less) {
252 CompareAndBranchOrBacktrack(current_character(), limit, lo, on_less);
253 }
254
CheckCharacters(base::Vector<const base::uc16> str,int cp_offset,Label * on_failure,bool check_end_of_string)255 void RegExpMacroAssemblerARM64::CheckCharacters(
256 base::Vector<const base::uc16> str, int cp_offset, Label* on_failure,
257 bool check_end_of_string) {
258 // This method is only ever called from the cctests.
259
260 if (check_end_of_string) {
261 // Is last character of required match inside string.
262 CheckPosition(cp_offset + str.length() - 1, on_failure);
263 }
264
265 Register characters_address = x11;
266
267 __ Add(characters_address,
268 input_end(),
269 Operand(current_input_offset(), SXTW));
270 if (cp_offset != 0) {
271 __ Add(characters_address, characters_address, cp_offset * char_size());
272 }
273
274 for (int i = 0; i < str.length(); i++) {
275 if (mode_ == LATIN1) {
276 __ Ldrb(w10, MemOperand(characters_address, 1, PostIndex));
277 DCHECK_GE(String::kMaxOneByteCharCode, str[i]);
278 } else {
279 __ Ldrh(w10, MemOperand(characters_address, 2, PostIndex));
280 }
281 CompareAndBranchOrBacktrack(w10, str[i], ne, on_failure);
282 }
283 }
284
CheckGreedyLoop(Label * on_equal)285 void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
286 __ Ldr(w10, MemOperand(backtrack_stackpointer()));
287 __ Cmp(current_input_offset(), w10);
288 __ Cset(x11, eq);
289 __ Add(backtrack_stackpointer(),
290 backtrack_stackpointer(), Operand(x11, LSL, kWRegSizeLog2));
291 BranchOrBacktrack(eq, on_equal);
292 }
293
CheckNotBackReferenceIgnoreCase(int start_reg,bool read_backward,bool unicode,Label * on_no_match)294 void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
295 int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
296 Label fallthrough;
297
298 Register capture_start_offset = w10;
299 // Save the capture length in a callee-saved register so it will
300 // be preserved if we call a C helper.
301 Register capture_length = w19;
302 DCHECK(kCalleeSaved.IncludesAliasOf(capture_length));
303
304 // Find length of back-referenced capture.
305 DCHECK_EQ(0, start_reg % 2);
306 if (start_reg < kNumCachedRegisters) {
307 __ Mov(capture_start_offset.X(), GetCachedRegister(start_reg));
308 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
309 } else {
310 __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10));
311 }
312 __ Sub(capture_length, w11, capture_start_offset); // Length to check.
313
314 // At this point, the capture registers are either both set or both cleared.
315 // If the capture length is zero, then the capture is either empty or cleared.
316 // Fall through in both cases.
317 __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
318
319 // Check that there are enough characters left in the input.
320 if (read_backward) {
321 __ Add(w12, string_start_minus_one(), capture_length);
322 __ Cmp(current_input_offset(), w12);
323 BranchOrBacktrack(le, on_no_match);
324 } else {
325 __ Cmn(capture_length, current_input_offset());
326 BranchOrBacktrack(gt, on_no_match);
327 }
328
329 if (mode_ == LATIN1) {
330 Label success;
331 Label fail;
332 Label loop_check;
333
334 Register capture_start_address = x12;
335 Register capture_end_addresss = x13;
336 Register current_position_address = x14;
337
338 __ Add(capture_start_address,
339 input_end(),
340 Operand(capture_start_offset, SXTW));
341 __ Add(capture_end_addresss,
342 capture_start_address,
343 Operand(capture_length, SXTW));
344 __ Add(current_position_address,
345 input_end(),
346 Operand(current_input_offset(), SXTW));
347 if (read_backward) {
348 // Offset by length when matching backwards.
349 __ Sub(current_position_address, current_position_address,
350 Operand(capture_length, SXTW));
351 }
352
353 Label loop;
354 __ Bind(&loop);
355 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
356 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
357 __ Cmp(w10, w11);
358 __ B(eq, &loop_check);
359
360 // Mismatch, try case-insensitive match (converting letters to lower-case).
361 __ Orr(w10, w10, 0x20); // Convert capture character to lower-case.
362 __ Orr(w11, w11, 0x20); // Also convert input character.
363 __ Cmp(w11, w10);
364 __ B(ne, &fail);
365 __ Sub(w10, w10, 'a');
366 __ Cmp(w10, 'z' - 'a'); // Is w10 a lowercase letter?
367 __ B(ls, &loop_check); // In range 'a'-'z'.
368 // Latin-1: Check for values in range [224,254] but not 247.
369 __ Sub(w10, w10, 224 - 'a');
370 __ Cmp(w10, 254 - 224);
371 __ Ccmp(w10, 247 - 224, ZFlag, ls); // Check for 247.
372 __ B(eq, &fail); // Weren't Latin-1 letters.
373
374 __ Bind(&loop_check);
375 __ Cmp(capture_start_address, capture_end_addresss);
376 __ B(lt, &loop);
377 __ B(&success);
378
379 __ Bind(&fail);
380 BranchOrBacktrack(al, on_no_match);
381
382 __ Bind(&success);
383 // Compute new value of character position after the matched part.
384 __ Sub(current_input_offset().X(), current_position_address, input_end());
385 if (read_backward) {
386 __ Sub(current_input_offset().X(), current_input_offset().X(),
387 Operand(capture_length, SXTW));
388 }
389 if (FLAG_debug_code) {
390 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
391 __ Ccmp(current_input_offset(), 0, NoFlag, eq);
392 // The current input offset should be <= 0, and fit in a W register.
393 __ Check(le, AbortReason::kOffsetOutOfRange);
394 }
395 } else {
396 DCHECK(mode_ == UC16);
397 int argument_count = 4;
398
399 // The cached registers need to be retained.
400 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
401 DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
402 __ PushCPURegList(cached_registers);
403
404 // Put arguments into arguments registers.
405 // Parameters are
406 // x0: Address byte_offset1 - Address captured substring's start.
407 // x1: Address byte_offset2 - Address of current character position.
408 // w2: size_t byte_length - length of capture in bytes(!)
409 // x3: Isolate* isolate.
410
411 // Address of start of capture.
412 __ Add(x0, input_end(), Operand(capture_start_offset, SXTW));
413 // Length of capture.
414 __ Mov(w2, capture_length);
415 // Address of current input position.
416 __ Add(x1, input_end(), Operand(current_input_offset(), SXTW));
417 if (read_backward) {
418 __ Sub(x1, x1, Operand(capture_length, SXTW));
419 }
420 // Isolate.
421 __ Mov(x3, ExternalReference::isolate_address(isolate()));
422
423 {
424 AllowExternalCallThatCantCauseGC scope(masm_.get());
425 ExternalReference function =
426 unicode ? ExternalReference::re_case_insensitive_compare_unicode(
427 isolate())
428 : ExternalReference::re_case_insensitive_compare_non_unicode(
429 isolate());
430 __ CallCFunction(function, argument_count);
431 }
432
433 // Check if function returned non-zero for success or zero for failure.
434 // x0 is one of the registers used as a cache so it must be tested before
435 // the cache is restored.
436 __ Cmp(x0, 0);
437 __ PopCPURegList(cached_registers);
438 BranchOrBacktrack(eq, on_no_match);
439
440 // On success, advance position by length of capture.
441 if (read_backward) {
442 __ Sub(current_input_offset(), current_input_offset(), capture_length);
443 } else {
444 __ Add(current_input_offset(), current_input_offset(), capture_length);
445 }
446 }
447
448 __ Bind(&fallthrough);
449 }
450
CheckNotBackReference(int start_reg,bool read_backward,Label * on_no_match)451 void RegExpMacroAssemblerARM64::CheckNotBackReference(int start_reg,
452 bool read_backward,
453 Label* on_no_match) {
454 Label fallthrough;
455
456 Register capture_start_address = x12;
457 Register capture_end_address = x13;
458 Register current_position_address = x14;
459 Register capture_length = w15;
460
461 // Find length of back-referenced capture.
462 DCHECK_EQ(0, start_reg % 2);
463 if (start_reg < kNumCachedRegisters) {
464 __ Mov(x10, GetCachedRegister(start_reg));
465 __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
466 } else {
467 __ Ldp(w11, w10, capture_location(start_reg, x10));
468 }
469 __ Sub(capture_length, w11, w10); // Length to check.
470
471 // At this point, the capture registers are either both set or both cleared.
472 // If the capture length is zero, then the capture is either empty or cleared.
473 // Fall through in both cases.
474 __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
475
476 // Check that there are enough characters left in the input.
477 if (read_backward) {
478 __ Add(w12, string_start_minus_one(), capture_length);
479 __ Cmp(current_input_offset(), w12);
480 BranchOrBacktrack(le, on_no_match);
481 } else {
482 __ Cmn(capture_length, current_input_offset());
483 BranchOrBacktrack(gt, on_no_match);
484 }
485
486 // Compute pointers to match string and capture string
487 __ Add(capture_start_address, input_end(), Operand(w10, SXTW));
488 __ Add(capture_end_address,
489 capture_start_address,
490 Operand(capture_length, SXTW));
491 __ Add(current_position_address,
492 input_end(),
493 Operand(current_input_offset(), SXTW));
494 if (read_backward) {
495 // Offset by length when matching backwards.
496 __ Sub(current_position_address, current_position_address,
497 Operand(capture_length, SXTW));
498 }
499
500 Label loop;
501 __ Bind(&loop);
502 if (mode_ == LATIN1) {
503 __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
504 __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
505 } else {
506 DCHECK(mode_ == UC16);
507 __ Ldrh(w10, MemOperand(capture_start_address, 2, PostIndex));
508 __ Ldrh(w11, MemOperand(current_position_address, 2, PostIndex));
509 }
510 __ Cmp(w10, w11);
511 BranchOrBacktrack(ne, on_no_match);
512 __ Cmp(capture_start_address, capture_end_address);
513 __ B(lt, &loop);
514
515 // Move current character position to position after match.
516 __ Sub(current_input_offset().X(), current_position_address, input_end());
517 if (read_backward) {
518 __ Sub(current_input_offset().X(), current_input_offset().X(),
519 Operand(capture_length, SXTW));
520 }
521
522 if (FLAG_debug_code) {
523 __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
524 __ Ccmp(current_input_offset(), 0, NoFlag, eq);
525 // The current input offset should be <= 0, and fit in a W register.
526 __ Check(le, AbortReason::kOffsetOutOfRange);
527 }
528 __ Bind(&fallthrough);
529 }
530
531
CheckNotCharacter(unsigned c,Label * on_not_equal)532 void RegExpMacroAssemblerARM64::CheckNotCharacter(unsigned c,
533 Label* on_not_equal) {
534 CompareAndBranchOrBacktrack(current_character(), c, ne, on_not_equal);
535 }
536
537
CheckCharacterAfterAnd(uint32_t c,uint32_t mask,Label * on_equal)538 void RegExpMacroAssemblerARM64::CheckCharacterAfterAnd(uint32_t c,
539 uint32_t mask,
540 Label* on_equal) {
541 __ And(w10, current_character(), mask);
542 CompareAndBranchOrBacktrack(w10, c, eq, on_equal);
543 }
544
545
CheckNotCharacterAfterAnd(unsigned c,unsigned mask,Label * on_not_equal)546 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterAnd(unsigned c,
547 unsigned mask,
548 Label* on_not_equal) {
549 __ And(w10, current_character(), mask);
550 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
551 }
552
CheckNotCharacterAfterMinusAnd(base::uc16 c,base::uc16 minus,base::uc16 mask,Label * on_not_equal)553 void RegExpMacroAssemblerARM64::CheckNotCharacterAfterMinusAnd(
554 base::uc16 c, base::uc16 minus, base::uc16 mask, Label* on_not_equal) {
555 DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
556 __ Sub(w10, current_character(), minus);
557 __ And(w10, w10, mask);
558 CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
559 }
560
CheckCharacterInRange(base::uc16 from,base::uc16 to,Label * on_in_range)561 void RegExpMacroAssemblerARM64::CheckCharacterInRange(base::uc16 from,
562 base::uc16 to,
563 Label* on_in_range) {
564 __ Sub(w10, current_character(), from);
565 // Unsigned lower-or-same condition.
566 CompareAndBranchOrBacktrack(w10, to - from, ls, on_in_range);
567 }
568
CheckCharacterNotInRange(base::uc16 from,base::uc16 to,Label * on_not_in_range)569 void RegExpMacroAssemblerARM64::CheckCharacterNotInRange(
570 base::uc16 from, base::uc16 to, Label* on_not_in_range) {
571 __ Sub(w10, current_character(), from);
572 // Unsigned higher condition.
573 CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range);
574 }
575
CheckBitInTable(Handle<ByteArray> table,Label * on_bit_set)576 void RegExpMacroAssemblerARM64::CheckBitInTable(
577 Handle<ByteArray> table,
578 Label* on_bit_set) {
579 __ Mov(x11, Operand(table));
580 if ((mode_ != LATIN1) || (kTableMask != String::kMaxOneByteCharCode)) {
581 __ And(w10, current_character(), kTableMask);
582 __ Add(w10, w10, ByteArray::kHeaderSize - kHeapObjectTag);
583 } else {
584 __ Add(w10, current_character(), ByteArray::kHeaderSize - kHeapObjectTag);
585 }
586 __ Ldrb(w11, MemOperand(x11, w10, UXTW));
587 CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set);
588 }
589
CheckSpecialCharacterClass(base::uc16 type,Label * on_no_match)590 bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(base::uc16 type,
591 Label* on_no_match) {
592 // Range checks (c in min..max) are generally implemented by an unsigned
593 // (c - min) <= (max - min) check
594 switch (type) {
595 case 's':
596 // Match space-characters
597 if (mode_ == LATIN1) {
598 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
599 Label success;
600 // Check for ' ' or 0x00A0.
601 __ Cmp(current_character(), ' ');
602 __ Ccmp(current_character(), 0x00A0, ZFlag, ne);
603 __ B(eq, &success);
604 // Check range 0x09..0x0D.
605 __ Sub(w10, current_character(), '\t');
606 CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match);
607 __ Bind(&success);
608 return true;
609 }
610 return false;
611 case 'S':
612 // The emitted code for generic character classes is good enough.
613 return false;
614 case 'd':
615 // Match ASCII digits ('0'..'9').
616 __ Sub(w10, current_character(), '0');
617 CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match);
618 return true;
619 case 'D':
620 // Match ASCII non-digits.
621 __ Sub(w10, current_character(), '0');
622 CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match);
623 return true;
624 case '.': {
625 // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
626 // Here we emit the conditional branch only once at the end to make branch
627 // prediction more efficient, even though we could branch out of here
628 // as soon as a character matches.
629 __ Cmp(current_character(), 0x0A);
630 __ Ccmp(current_character(), 0x0D, ZFlag, ne);
631 if (mode_ == UC16) {
632 __ Sub(w10, current_character(), 0x2028);
633 // If the Z flag was set we clear the flags to force a branch.
634 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
635 // ls -> !((C==1) && (Z==0))
636 BranchOrBacktrack(ls, on_no_match);
637 } else {
638 BranchOrBacktrack(eq, on_no_match);
639 }
640 return true;
641 }
642 case 'n': {
643 // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
644 // We have to check all 4 newline characters before emitting
645 // the conditional branch.
646 __ Cmp(current_character(), 0x0A);
647 __ Ccmp(current_character(), 0x0D, ZFlag, ne);
648 if (mode_ == UC16) {
649 __ Sub(w10, current_character(), 0x2028);
650 // If the Z flag was set we clear the flags to force a fall-through.
651 __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
652 // hi -> (C==1) && (Z==0)
653 BranchOrBacktrack(hi, on_no_match);
654 } else {
655 BranchOrBacktrack(ne, on_no_match);
656 }
657 return true;
658 }
659 case 'w': {
660 if (mode_ != LATIN1) {
661 // Table is 256 entries, so all Latin1 characters can be tested.
662 CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match);
663 }
664 ExternalReference map = ExternalReference::re_word_character_map(isolate());
665 __ Mov(x10, map);
666 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
667 CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match);
668 return true;
669 }
670 case 'W': {
671 Label done;
672 if (mode_ != LATIN1) {
673 // Table is 256 entries, so all Latin1 characters can be tested.
674 __ Cmp(current_character(), 'z');
675 __ B(hi, &done);
676 }
677 ExternalReference map = ExternalReference::re_word_character_map(isolate());
678 __ Mov(x10, map);
679 __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
680 CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match);
681 __ Bind(&done);
682 return true;
683 }
684 case '*':
685 // Match any character.
686 return true;
687 // No custom implementation (yet): s(UC16), S(UC16).
688 default:
689 return false;
690 }
691 }
692
Fail()693 void RegExpMacroAssemblerARM64::Fail() {
694 __ Mov(w0, FAILURE);
695 __ B(&exit_label_);
696 }
697
LoadRegExpStackPointerFromMemory(Register dst)698 void RegExpMacroAssemblerARM64::LoadRegExpStackPointerFromMemory(Register dst) {
699 ExternalReference ref =
700 ExternalReference::address_of_regexp_stack_stack_pointer(isolate());
701 __ Mov(dst, ref);
702 __ Ldr(dst, MemOperand(dst));
703 }
704
StoreRegExpStackPointerToMemory(Register src,Register scratch)705 void RegExpMacroAssemblerARM64::StoreRegExpStackPointerToMemory(
706 Register src, Register scratch) {
707 ExternalReference ref =
708 ExternalReference::address_of_regexp_stack_stack_pointer(isolate());
709 __ Mov(scratch, ref);
710 __ Str(src, MemOperand(scratch));
711 }
712
PushRegExpBasePointer(Register stack_pointer,Register scratch)713 void RegExpMacroAssemblerARM64::PushRegExpBasePointer(Register stack_pointer,
714 Register scratch) {
715 ExternalReference ref =
716 ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
717 __ Mov(scratch, ref);
718 __ Ldr(scratch, MemOperand(scratch));
719 __ Sub(scratch, stack_pointer, scratch);
720 __ Str(scratch, MemOperand(frame_pointer(), kRegExpStackBasePointer));
721 }
722
PopRegExpBasePointer(Register stack_pointer_out,Register scratch)723 void RegExpMacroAssemblerARM64::PopRegExpBasePointer(Register stack_pointer_out,
724 Register scratch) {
725 ExternalReference ref =
726 ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
727 __ Ldr(stack_pointer_out,
728 MemOperand(frame_pointer(), kRegExpStackBasePointer));
729 __ Mov(scratch, ref);
730 __ Ldr(scratch, MemOperand(scratch));
731 __ Add(stack_pointer_out, stack_pointer_out, scratch);
732 StoreRegExpStackPointerToMemory(stack_pointer_out, scratch);
733 }
734
GetCode(Handle<String> source)735 Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
736 Label return_w0;
737 // Finalize code - write the entry point code now we know how many
738 // registers we need.
739
740 // Entry code:
741 __ Bind(&entry_label_);
742
743 // Arguments on entry:
744 // x0: String input
745 // x1: int start_offset
746 // x2: byte* input_start
747 // x3: byte* input_end
748 // x4: int* output array
749 // x5: int output array size
750 // x6: int direct_call
751 // x7: Isolate* isolate
752 //
753 // sp[0]: secondary link/return address used by native call
754
755 // Tell the system that we have a stack frame. Because the type is MANUAL, no
756 // code is generated.
757 FrameScope scope(masm_.get(), StackFrame::MANUAL);
758
759 // Push registers on the stack, only push the argument registers that we need.
760 CPURegList argument_registers(x0, x5, x6, x7);
761
762 CPURegList registers_to_retain = kCalleeSaved;
763 DCHECK_EQ(registers_to_retain.Count(), kNumCalleeSavedRegisters);
764
765 __ PushCPURegList<TurboAssembler::kDontStoreLR>(registers_to_retain);
766 __ Push<TurboAssembler::kSignLR>(lr, fp);
767 __ PushCPURegList(argument_registers);
768
769 // Set frame pointer in place.
770 __ Add(frame_pointer(), sp, argument_registers.Count() * kSystemPointerSize);
771
772 // Initialize callee-saved registers.
773 __ Mov(start_offset(), w1);
774 __ Mov(input_start(), x2);
775 __ Mov(input_end(), x3);
776 __ Mov(output_array(), x4);
777
778 // Make sure the stack alignment will be respected.
779 const int alignment = masm_->ActivationFrameAlignment();
780 DCHECK_EQ(alignment % 16, 0);
781 const int align_mask = (alignment / kWRegSize) - 1;
782
783 // Make room for stack locals.
784 static constexpr int kWRegPerXReg = kXRegSize / kWRegSize;
785 DCHECK_EQ(kNumberOfStackLocals * kWRegPerXReg,
786 ((kNumberOfStackLocals * kWRegPerXReg) + align_mask) & ~align_mask);
787 __ Claim(kNumberOfStackLocals * kWRegPerXReg);
788
789 // Initialize backtrack stack pointer. It must not be clobbered from here on.
790 // Note the backtrack_stackpointer is callee-saved.
791 STATIC_ASSERT(backtrack_stackpointer() == x23);
792 LoadRegExpStackPointerFromMemory(backtrack_stackpointer());
793
794 // Store the regexp base pointer - we'll later restore it / write it to
795 // memory when returning from this irregexp code object.
796 PushRegExpBasePointer(backtrack_stackpointer(), x11);
797
798 // Set the number of registers we will need to allocate, that is:
799 // - (num_registers_ - kNumCachedRegisters) (W registers)
800 const int num_stack_registers =
801 std::max(0, num_registers_ - kNumCachedRegisters);
802 const int num_wreg_to_allocate =
803 (num_stack_registers + align_mask) & ~align_mask;
804
805 {
806 // Check if we have space on the stack.
807 Label stack_limit_hit, stack_ok;
808
809 ExternalReference stack_limit =
810 ExternalReference::address_of_jslimit(isolate());
811 __ Mov(x10, stack_limit);
812 __ Ldr(x10, MemOperand(x10));
813 __ Subs(x10, sp, x10);
814
815 // Handle it if the stack pointer is already below the stack limit.
816 __ B(ls, &stack_limit_hit);
817
818 // Check if there is room for the variable number of registers above
819 // the stack limit.
820 __ Cmp(x10, num_wreg_to_allocate * kWRegSize);
821 __ B(hs, &stack_ok);
822
823 // Exit with OutOfMemory exception. There is not enough space on the stack
824 // for our working registers.
825 __ Mov(w0, EXCEPTION);
826 __ B(&return_w0);
827
828 __ Bind(&stack_limit_hit);
829 CallCheckStackGuardState(x10);
830 // If returned value is non-zero, we exit with the returned value as result.
831 __ Cbnz(w0, &return_w0);
832
833 __ Bind(&stack_ok);
834 }
835
836 // Allocate space on stack.
837 __ Claim(num_wreg_to_allocate, kWRegSize);
838
839 // Initialize success_counter and kBacktrackCount with 0.
840 __ Str(wzr, MemOperand(frame_pointer(), kSuccessCounter));
841 __ Str(wzr, MemOperand(frame_pointer(), kBacktrackCount));
842
843 // Find negative length (offset of start relative to end).
844 __ Sub(x10, input_start(), input_end());
845 if (FLAG_debug_code) {
846 // Check that the size of the input string chars is in range.
847 __ Neg(x11, x10);
848 __ Cmp(x11, SeqTwoByteString::kMaxCharsSize);
849 __ Check(ls, AbortReason::kInputStringTooLong);
850 }
851 __ Mov(current_input_offset(), w10);
852
853 // The non-position value is used as a clearing value for the
854 // capture registers, it corresponds to the position of the first character
855 // minus one.
856 __ Sub(string_start_minus_one(), current_input_offset(), char_size());
857 __ Sub(string_start_minus_one(), string_start_minus_one(),
858 Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0));
859 // We can store this value twice in an X register for initializing
860 // on-stack registers later.
861 __ Orr(twice_non_position_value(), string_start_minus_one().X(),
862 Operand(string_start_minus_one().X(), LSL, kWRegSizeInBits));
863
864 // Initialize code pointer register.
865 __ Mov(code_pointer(), Operand(masm_->CodeObject()));
866
867 Label load_char_start_regexp;
868 {
869 Label start_regexp;
870 // Load newline if index is at start, previous character otherwise.
871 __ Cbnz(start_offset(), &load_char_start_regexp);
872 __ Mov(current_character(), '\n');
873 __ B(&start_regexp);
874
875 // Global regexp restarts matching here.
876 __ Bind(&load_char_start_regexp);
877 // Load previous char as initial value of current character register.
878 LoadCurrentCharacterUnchecked(-1, 1);
879 __ Bind(&start_regexp);
880 }
881
882 // Initialize on-stack registers.
883 if (num_saved_registers_ > 0) {
884 ClearRegisters(0, num_saved_registers_ - 1);
885 }
886
887 // Execute.
888 __ B(&start_label_);
889
890 if (backtrack_label_.is_linked()) {
891 __ Bind(&backtrack_label_);
892 Backtrack();
893 }
894
895 if (success_label_.is_linked()) {
896 Register first_capture_start = w15;
897
898 // Save captures when successful.
899 __ Bind(&success_label_);
900
901 if (num_saved_registers_ > 0) {
902 // V8 expects the output to be an int32_t array.
903 Register capture_start = w12;
904 Register capture_end = w13;
905 Register input_length = w14;
906
907 // Copy captures to output.
908
909 // Get string length.
910 __ Sub(x10, input_end(), input_start());
911 if (FLAG_debug_code) {
912 // Check that the size of the input string chars is in range.
913 __ Cmp(x10, SeqTwoByteString::kMaxCharsSize);
914 __ Check(ls, AbortReason::kInputStringTooLong);
915 }
916 // input_start has a start_offset offset on entry. We need to include
917 // it when computing the length of the whole string.
918 if (mode_ == UC16) {
919 __ Add(input_length, start_offset(), Operand(w10, LSR, 1));
920 } else {
921 __ Add(input_length, start_offset(), w10);
922 }
923
924 // Copy the results to the output array from the cached registers first.
925 for (int i = 0;
926 (i < num_saved_registers_) && (i < kNumCachedRegisters);
927 i += 2) {
928 __ Mov(capture_start.X(), GetCachedRegister(i));
929 __ Lsr(capture_end.X(), capture_start.X(), kWRegSizeInBits);
930 if ((i == 0) && global_with_zero_length_check()) {
931 // Keep capture start for the zero-length check later.
932 __ Mov(first_capture_start, capture_start);
933 }
934 // Offsets need to be relative to the start of the string.
935 if (mode_ == UC16) {
936 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
937 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
938 } else {
939 __ Add(capture_start, input_length, capture_start);
940 __ Add(capture_end, input_length, capture_end);
941 }
942 // The output pointer advances for a possible global match.
943 __ Stp(capture_start, capture_end,
944 MemOperand(output_array(), kSystemPointerSize, PostIndex));
945 }
946
947 // Only carry on if there are more than kNumCachedRegisters capture
948 // registers.
949 int num_registers_left_on_stack =
950 num_saved_registers_ - kNumCachedRegisters;
951 if (num_registers_left_on_stack > 0) {
952 Register base = x10;
953 // There are always an even number of capture registers. A couple of
954 // registers determine one match with two offsets.
955 DCHECK_EQ(0, num_registers_left_on_stack % 2);
956 __ Add(base, frame_pointer(), kFirstCaptureOnStack);
957
958 // We can unroll the loop here, we should not unroll for less than 2
959 // registers.
960 STATIC_ASSERT(kNumRegistersToUnroll > 2);
961 if (num_registers_left_on_stack <= kNumRegistersToUnroll) {
962 for (int i = 0; i < num_registers_left_on_stack / 2; i++) {
963 __ Ldp(capture_end, capture_start,
964 MemOperand(base, -kSystemPointerSize, PostIndex));
965 if ((i == 0) && global_with_zero_length_check()) {
966 // Keep capture start for the zero-length check later.
967 __ Mov(first_capture_start, capture_start);
968 }
969 // Offsets need to be relative to the start of the string.
970 if (mode_ == UC16) {
971 __ Add(capture_start,
972 input_length,
973 Operand(capture_start, ASR, 1));
974 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
975 } else {
976 __ Add(capture_start, input_length, capture_start);
977 __ Add(capture_end, input_length, capture_end);
978 }
979 // The output pointer advances for a possible global match.
980 __ Stp(capture_start, capture_end,
981 MemOperand(output_array(), kSystemPointerSize, PostIndex));
982 }
983 } else {
984 Label loop, start;
985 __ Mov(x11, num_registers_left_on_stack);
986
987 __ Ldp(capture_end, capture_start,
988 MemOperand(base, -kSystemPointerSize, PostIndex));
989 if (global_with_zero_length_check()) {
990 __ Mov(first_capture_start, capture_start);
991 }
992 __ B(&start);
993
994 __ Bind(&loop);
995 __ Ldp(capture_end, capture_start,
996 MemOperand(base, -kSystemPointerSize, PostIndex));
997 __ Bind(&start);
998 if (mode_ == UC16) {
999 __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
1000 __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
1001 } else {
1002 __ Add(capture_start, input_length, capture_start);
1003 __ Add(capture_end, input_length, capture_end);
1004 }
1005 // The output pointer advances for a possible global match.
1006 __ Stp(capture_start, capture_end,
1007 MemOperand(output_array(), kSystemPointerSize, PostIndex));
1008 __ Sub(x11, x11, 2);
1009 __ Cbnz(x11, &loop);
1010 }
1011 }
1012 }
1013
1014 if (global()) {
1015 Register success_counter = w0;
1016 Register output_size = x10;
1017 // Restart matching if the regular expression is flagged as global.
1018
1019 // Increment success counter.
1020 __ Ldr(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
1021 __ Add(success_counter, success_counter, 1);
1022 __ Str(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
1023
1024 // Capture results have been stored, so the number of remaining global
1025 // output registers is reduced by the number of stored captures.
1026 __ Ldr(output_size, MemOperand(frame_pointer(), kOutputSize));
1027 __ Sub(output_size, output_size, num_saved_registers_);
1028 // Check whether we have enough room for another set of capture results.
1029 __ Cmp(output_size, num_saved_registers_);
1030 __ B(lt, &return_w0);
1031
1032 // The output pointer is already set to the next field in the output
1033 // array.
1034 // Update output size on the frame before we restart matching.
1035 __ Str(output_size, MemOperand(frame_pointer(), kOutputSize));
1036
1037 // Restore the original regexp stack pointer value (effectively, pop the
1038 // stored base pointer).
1039 PopRegExpBasePointer(backtrack_stackpointer(), x11);
1040
1041 if (global_with_zero_length_check()) {
1042 // Special case for zero-length matches.
1043 __ Cmp(current_input_offset(), first_capture_start);
1044 // Not a zero-length match, restart.
1045 __ B(ne, &load_char_start_regexp);
1046 // Offset from the end is zero if we already reached the end.
1047 __ Cbz(current_input_offset(), &return_w0);
1048 // Advance current position after a zero-length match.
1049 Label advance;
1050 __ bind(&advance);
1051 __ Add(current_input_offset(), current_input_offset(),
1052 Operand((mode_ == UC16) ? 2 : 1));
1053 if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
1054 }
1055
1056 __ B(&load_char_start_regexp);
1057 } else {
1058 __ Mov(w0, SUCCESS);
1059 }
1060 }
1061
1062 if (exit_label_.is_linked()) {
1063 // Exit and return w0.
1064 __ Bind(&exit_label_);
1065 if (global()) {
1066 __ Ldr(w0, MemOperand(frame_pointer(), kSuccessCounter));
1067 }
1068 }
1069
1070 __ Bind(&return_w0);
1071 // Restore the original regexp stack pointer value (effectively, pop the
1072 // stored base pointer).
1073 PopRegExpBasePointer(backtrack_stackpointer(), x11);
1074
1075 // Set stack pointer back to first register to retain.
1076 __ Mov(sp, fp);
1077 __ Pop<TurboAssembler::kAuthLR>(fp, lr);
1078
1079 // Restore registers.
1080 __ PopCPURegList<TurboAssembler::kDontLoadLR>(registers_to_retain);
1081
1082 __ Ret();
1083
1084 Label exit_with_exception;
1085 // Registers x0 to x7 are used to store the first captures, they need to be
1086 // retained over calls to C++ code.
1087 CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
1088 DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
1089
1090 if (check_preempt_label_.is_linked()) {
1091 __ Bind(&check_preempt_label_);
1092
1093 StoreRegExpStackPointerToMemory(backtrack_stackpointer(), x10);
1094
1095 SaveLinkRegister();
1096 // The cached registers need to be retained.
1097 __ PushCPURegList(cached_registers);
1098 CallCheckStackGuardState(x10);
1099 // Returning from the regexp code restores the stack (sp <- fp)
1100 // so we don't need to drop the link register from it before exiting.
1101 __ Cbnz(w0, &return_w0);
1102 // Reset the cached registers.
1103 __ PopCPURegList(cached_registers);
1104
1105 LoadRegExpStackPointerFromMemory(backtrack_stackpointer());
1106
1107 RestoreLinkRegister();
1108 __ Ret();
1109 }
1110
1111 if (stack_overflow_label_.is_linked()) {
1112 __ Bind(&stack_overflow_label_);
1113
1114 StoreRegExpStackPointerToMemory(backtrack_stackpointer(), x10);
1115
1116 SaveLinkRegister();
1117 // The cached registers need to be retained.
1118 __ PushCPURegList(cached_registers);
1119 // Call GrowStack(isolate)
1120 static constexpr int kNumArguments = 1;
1121 __ Mov(x0, ExternalReference::isolate_address(isolate()));
1122 __ CallCFunction(ExternalReference::re_grow_stack(isolate()),
1123 kNumArguments);
1124 // If return nullptr, we have failed to grow the stack, and must exit with
1125 // a stack-overflow exception. Returning from the regexp code restores the
1126 // stack (sp <- fp) so we don't need to drop the link register from it
1127 // before exiting.
1128 __ Cbz(w0, &exit_with_exception);
1129 // Otherwise use return value as new stack pointer.
1130 __ Mov(backtrack_stackpointer(), x0);
1131 // Reset the cached registers.
1132 __ PopCPURegList(cached_registers);
1133 RestoreLinkRegister();
1134 __ Ret();
1135 }
1136
1137 if (exit_with_exception.is_linked()) {
1138 __ Bind(&exit_with_exception);
1139 __ Mov(w0, EXCEPTION);
1140 __ B(&return_w0);
1141 }
1142
1143 if (fallback_label_.is_linked()) {
1144 __ Bind(&fallback_label_);
1145 __ Mov(w0, FALLBACK_TO_EXPERIMENTAL);
1146 __ B(&return_w0);
1147 }
1148
1149 CodeDesc code_desc;
1150 masm_->GetCode(isolate(), &code_desc);
1151 Handle<Code> code =
1152 Factory::CodeBuilder(isolate(), code_desc, CodeKind::REGEXP)
1153 .set_self_reference(masm_->CodeObject())
1154 .Build();
1155 PROFILE(masm_->isolate(),
1156 RegExpCodeCreateEvent(Handle<AbstractCode>::cast(code), source));
1157 return Handle<HeapObject>::cast(code);
1158 }
1159
1160
GoTo(Label * to)1161 void RegExpMacroAssemblerARM64::GoTo(Label* to) {
1162 BranchOrBacktrack(al, to);
1163 }
1164
IfRegisterGE(int reg,int comparand,Label * if_ge)1165 void RegExpMacroAssemblerARM64::IfRegisterGE(int reg, int comparand,
1166 Label* if_ge) {
1167 Register to_compare = GetRegister(reg, w10);
1168 CompareAndBranchOrBacktrack(to_compare, comparand, ge, if_ge);
1169 }
1170
1171
IfRegisterLT(int reg,int comparand,Label * if_lt)1172 void RegExpMacroAssemblerARM64::IfRegisterLT(int reg, int comparand,
1173 Label* if_lt) {
1174 Register to_compare = GetRegister(reg, w10);
1175 CompareAndBranchOrBacktrack(to_compare, comparand, lt, if_lt);
1176 }
1177
1178
IfRegisterEqPos(int reg,Label * if_eq)1179 void RegExpMacroAssemblerARM64::IfRegisterEqPos(int reg, Label* if_eq) {
1180 Register to_compare = GetRegister(reg, w10);
1181 __ Cmp(to_compare, current_input_offset());
1182 BranchOrBacktrack(eq, if_eq);
1183 }
1184
1185 RegExpMacroAssembler::IrregexpImplementation
Implementation()1186 RegExpMacroAssemblerARM64::Implementation() {
1187 return kARM64Implementation;
1188 }
1189
1190
PopCurrentPosition()1191 void RegExpMacroAssemblerARM64::PopCurrentPosition() {
1192 Pop(current_input_offset());
1193 }
1194
1195
PopRegister(int register_index)1196 void RegExpMacroAssemblerARM64::PopRegister(int register_index) {
1197 Pop(w10);
1198 StoreRegister(register_index, w10);
1199 }
1200
1201
PushBacktrack(Label * label)1202 void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) {
1203 if (label->is_bound()) {
1204 int target = label->pos();
1205 __ Mov(w10, target + Code::kHeaderSize - kHeapObjectTag);
1206 } else {
1207 __ Adr(x10, label, MacroAssembler::kAdrFar);
1208 __ Sub(x10, x10, code_pointer());
1209 if (FLAG_debug_code) {
1210 __ Cmp(x10, kWRegMask);
1211 // The code offset has to fit in a W register.
1212 __ Check(ls, AbortReason::kOffsetOutOfRange);
1213 }
1214 }
1215 Push(w10);
1216 CheckStackLimit();
1217 }
1218
1219
PushCurrentPosition()1220 void RegExpMacroAssemblerARM64::PushCurrentPosition() {
1221 Push(current_input_offset());
1222 }
1223
1224
PushRegister(int register_index,StackCheckFlag check_stack_limit)1225 void RegExpMacroAssemblerARM64::PushRegister(int register_index,
1226 StackCheckFlag check_stack_limit) {
1227 Register to_push = GetRegister(register_index, w10);
1228 Push(to_push);
1229 if (check_stack_limit) CheckStackLimit();
1230 }
1231
1232
ReadCurrentPositionFromRegister(int reg)1233 void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) {
1234 RegisterState register_state = GetRegisterState(reg);
1235 switch (register_state) {
1236 case STACKED:
1237 __ Ldr(current_input_offset(), register_location(reg));
1238 break;
1239 case CACHED_LSW:
1240 __ Mov(current_input_offset(), GetCachedRegister(reg).W());
1241 break;
1242 case CACHED_MSW:
1243 __ Lsr(current_input_offset().X(), GetCachedRegister(reg),
1244 kWRegSizeInBits);
1245 break;
1246 default:
1247 UNREACHABLE();
1248 }
1249 }
1250
WriteStackPointerToRegister(int reg)1251 void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) {
1252 ExternalReference ref =
1253 ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
1254 __ Mov(x10, ref);
1255 __ Ldr(x10, MemOperand(x10));
1256 __ Sub(x10, backtrack_stackpointer(), x10);
1257 if (FLAG_debug_code) {
1258 __ Cmp(x10, Operand(w10, SXTW));
1259 // The stack offset needs to fit in a W register.
1260 __ Check(eq, AbortReason::kOffsetOutOfRange);
1261 }
1262 StoreRegister(reg, w10);
1263 }
1264
ReadStackPointerFromRegister(int reg)1265 void RegExpMacroAssemblerARM64::ReadStackPointerFromRegister(int reg) {
1266 ExternalReference ref =
1267 ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
1268 Register read_from = GetRegister(reg, w10);
1269 __ Mov(x11, ref);
1270 __ Ldr(x11, MemOperand(x11));
1271 __ Add(backtrack_stackpointer(), x11, Operand(read_from, SXTW));
1272 }
1273
SetCurrentPositionFromEnd(int by)1274 void RegExpMacroAssemblerARM64::SetCurrentPositionFromEnd(int by) {
1275 Label after_position;
1276 __ Cmp(current_input_offset(), -by * char_size());
1277 __ B(ge, &after_position);
1278 __ Mov(current_input_offset(), -by * char_size());
1279 // On RegExp code entry (where this operation is used), the character before
1280 // the current position is expected to be already loaded.
1281 // We have advanced the position, so it's safe to read backwards.
1282 LoadCurrentCharacterUnchecked(-1, 1);
1283 __ Bind(&after_position);
1284 }
1285
1286
SetRegister(int register_index,int to)1287 void RegExpMacroAssemblerARM64::SetRegister(int register_index, int to) {
1288 DCHECK(register_index >= num_saved_registers_); // Reserved for positions!
1289 Register set_to = wzr;
1290 if (to != 0) {
1291 set_to = w10;
1292 __ Mov(set_to, to);
1293 }
1294 StoreRegister(register_index, set_to);
1295 }
1296
1297
Succeed()1298 bool RegExpMacroAssemblerARM64::Succeed() {
1299 __ B(&success_label_);
1300 return global();
1301 }
1302
1303
WriteCurrentPositionToRegister(int reg,int cp_offset)1304 void RegExpMacroAssemblerARM64::WriteCurrentPositionToRegister(int reg,
1305 int cp_offset) {
1306 Register position = current_input_offset();
1307 if (cp_offset != 0) {
1308 position = w10;
1309 __ Add(position, current_input_offset(), cp_offset * char_size());
1310 }
1311 StoreRegister(reg, position);
1312 }
1313
1314
ClearRegisters(int reg_from,int reg_to)1315 void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) {
1316 DCHECK(reg_from <= reg_to);
1317 int num_registers = reg_to - reg_from + 1;
1318
1319 // If the first capture register is cached in a hardware register but not
1320 // aligned on a 64-bit one, we need to clear the first one specifically.
1321 if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) {
1322 StoreRegister(reg_from, string_start_minus_one());
1323 num_registers--;
1324 reg_from++;
1325 }
1326
1327 // Clear cached registers in pairs as far as possible.
1328 while ((num_registers >= 2) && (reg_from < kNumCachedRegisters)) {
1329 DCHECK(GetRegisterState(reg_from) == CACHED_LSW);
1330 __ Mov(GetCachedRegister(reg_from), twice_non_position_value());
1331 reg_from += 2;
1332 num_registers -= 2;
1333 }
1334
1335 if ((num_registers % 2) == 1) {
1336 StoreRegister(reg_from, string_start_minus_one());
1337 num_registers--;
1338 reg_from++;
1339 }
1340
1341 if (num_registers > 0) {
1342 // If there are some remaining registers, they are stored on the stack.
1343 DCHECK_LE(kNumCachedRegisters, reg_from);
1344
1345 // Move down the indexes of the registers on stack to get the correct offset
1346 // in memory.
1347 reg_from -= kNumCachedRegisters;
1348 reg_to -= kNumCachedRegisters;
1349 // We should not unroll the loop for less than 2 registers.
1350 STATIC_ASSERT(kNumRegistersToUnroll > 2);
1351 // We position the base pointer to (reg_from + 1).
1352 int base_offset = kFirstRegisterOnStack -
1353 kWRegSize - (kWRegSize * reg_from);
1354 if (num_registers > kNumRegistersToUnroll) {
1355 Register base = x10;
1356 __ Add(base, frame_pointer(), base_offset);
1357
1358 Label loop;
1359 __ Mov(x11, num_registers);
1360 __ Bind(&loop);
1361 __ Str(twice_non_position_value(),
1362 MemOperand(base, -kSystemPointerSize, PostIndex));
1363 __ Sub(x11, x11, 2);
1364 __ Cbnz(x11, &loop);
1365 } else {
1366 for (int i = reg_from; i <= reg_to; i += 2) {
1367 __ Str(twice_non_position_value(),
1368 MemOperand(frame_pointer(), base_offset));
1369 base_offset -= kWRegSize * 2;
1370 }
1371 }
1372 }
1373 }
1374
1375 // Helper function for reading a value out of a stack frame.
1376 template <typename T>
frame_entry(Address re_frame,int frame_offset)1377 static T& frame_entry(Address re_frame, int frame_offset) {
1378 return *reinterpret_cast<T*>(re_frame + frame_offset);
1379 }
1380
1381
1382 template <typename T>
frame_entry_address(Address re_frame,int frame_offset)1383 static T* frame_entry_address(Address re_frame, int frame_offset) {
1384 return reinterpret_cast<T*>(re_frame + frame_offset);
1385 }
1386
CheckStackGuardState(Address * return_address,Address raw_code,Address re_frame,int start_index,const byte ** input_start,const byte ** input_end)1387 int RegExpMacroAssemblerARM64::CheckStackGuardState(
1388 Address* return_address, Address raw_code, Address re_frame,
1389 int start_index, const byte** input_start, const byte** input_end) {
1390 Code re_code = Code::cast(Object(raw_code));
1391 return NativeRegExpMacroAssembler::CheckStackGuardState(
1392 frame_entry<Isolate*>(re_frame, kIsolate), start_index,
1393 static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
1394 return_address, re_code, frame_entry_address<Address>(re_frame, kInput),
1395 input_start, input_end);
1396 }
1397
1398
CheckPosition(int cp_offset,Label * on_outside_input)1399 void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset,
1400 Label* on_outside_input) {
1401 if (cp_offset >= 0) {
1402 CompareAndBranchOrBacktrack(current_input_offset(),
1403 -cp_offset * char_size(), ge, on_outside_input);
1404 } else {
1405 __ Add(w12, current_input_offset(), Operand(cp_offset * char_size()));
1406 __ Cmp(w12, string_start_minus_one());
1407 BranchOrBacktrack(le, on_outside_input);
1408 }
1409 }
1410
1411
1412 // Private methods:
1413
CallCheckStackGuardState(Register scratch)1414 void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) {
1415 DCHECK(!isolate()->IsGeneratingEmbeddedBuiltins());
1416 DCHECK(!masm_->options().isolate_independent_code);
1417
1418 // Allocate space on the stack to store the return address. The
1419 // CheckStackGuardState C++ function will override it if the code
1420 // moved. Allocate extra space for 2 arguments passed by pointers.
1421 // AAPCS64 requires the stack to be 16 byte aligned.
1422 int alignment = masm_->ActivationFrameAlignment();
1423 DCHECK_EQ(alignment % 16, 0);
1424 int align_mask = (alignment / kXRegSize) - 1;
1425 int xreg_to_claim = (3 + align_mask) & ~align_mask;
1426
1427 __ Claim(xreg_to_claim);
1428
1429 // CheckStackGuardState needs the end and start addresses of the input string.
1430 __ Poke(input_end(), 2 * kSystemPointerSize);
1431 __ Add(x5, sp, 2 * kSystemPointerSize);
1432 __ Poke(input_start(), kSystemPointerSize);
1433 __ Add(x4, sp, kSystemPointerSize);
1434
1435 __ Mov(w3, start_offset());
1436 // RegExp code frame pointer.
1437 __ Mov(x2, frame_pointer());
1438 // Code of self.
1439 __ Mov(x1, Operand(masm_->CodeObject()));
1440
1441 // We need to pass a pointer to the return address as first argument.
1442 // DirectCEntry will place the return address on the stack before calling so
1443 // the stack pointer will point to it.
1444 __ Mov(x0, sp);
1445
1446 DCHECK_EQ(scratch, x10);
1447 ExternalReference check_stack_guard_state =
1448 ExternalReference::re_check_stack_guard_state(isolate());
1449 __ Mov(scratch, check_stack_guard_state);
1450
1451 __ CallBuiltin(Builtin::kDirectCEntry);
1452
1453 // The input string may have been moved in memory, we need to reload it.
1454 __ Peek(input_start(), kSystemPointerSize);
1455 __ Peek(input_end(), 2 * kSystemPointerSize);
1456
1457 __ Drop(xreg_to_claim);
1458
1459 // Reload the Code pointer.
1460 __ Mov(code_pointer(), Operand(masm_->CodeObject()));
1461 }
1462
BranchOrBacktrack(Condition condition,Label * to)1463 void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition,
1464 Label* to) {
1465 if (condition == al) { // Unconditional.
1466 if (to == nullptr) {
1467 Backtrack();
1468 return;
1469 }
1470 __ B(to);
1471 return;
1472 }
1473 if (to == nullptr) {
1474 to = &backtrack_label_;
1475 }
1476 __ B(condition, to);
1477 }
1478
CompareAndBranchOrBacktrack(Register reg,int immediate,Condition condition,Label * to)1479 void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg,
1480 int immediate,
1481 Condition condition,
1482 Label* to) {
1483 if ((immediate == 0) && ((condition == eq) || (condition == ne))) {
1484 if (to == nullptr) {
1485 to = &backtrack_label_;
1486 }
1487 if (condition == eq) {
1488 __ Cbz(reg, to);
1489 } else {
1490 __ Cbnz(reg, to);
1491 }
1492 } else {
1493 __ Cmp(reg, immediate);
1494 BranchOrBacktrack(condition, to);
1495 }
1496 }
1497
1498
CheckPreemption()1499 void RegExpMacroAssemblerARM64::CheckPreemption() {
1500 // Check for preemption.
1501 ExternalReference stack_limit =
1502 ExternalReference::address_of_jslimit(isolate());
1503 __ Mov(x10, stack_limit);
1504 __ Ldr(x10, MemOperand(x10));
1505 __ Cmp(sp, x10);
1506 CallIf(&check_preempt_label_, ls);
1507 }
1508
1509
CheckStackLimit()1510 void RegExpMacroAssemblerARM64::CheckStackLimit() {
1511 ExternalReference stack_limit =
1512 ExternalReference::address_of_regexp_stack_limit_address(isolate());
1513 __ Mov(x10, stack_limit);
1514 __ Ldr(x10, MemOperand(x10));
1515 __ Cmp(backtrack_stackpointer(), x10);
1516 CallIf(&stack_overflow_label_, ls);
1517 }
1518
1519
Push(Register source)1520 void RegExpMacroAssemblerARM64::Push(Register source) {
1521 DCHECK(source.Is32Bits());
1522 DCHECK_NE(source, backtrack_stackpointer());
1523 __ Str(source,
1524 MemOperand(backtrack_stackpointer(),
1525 -static_cast<int>(kWRegSize),
1526 PreIndex));
1527 }
1528
1529
Pop(Register target)1530 void RegExpMacroAssemblerARM64::Pop(Register target) {
1531 DCHECK(target.Is32Bits());
1532 DCHECK_NE(target, backtrack_stackpointer());
1533 __ Ldr(target,
1534 MemOperand(backtrack_stackpointer(), kWRegSize, PostIndex));
1535 }
1536
1537
GetCachedRegister(int register_index)1538 Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) {
1539 DCHECK_GT(kNumCachedRegisters, register_index);
1540 return Register::Create(register_index / 2, kXRegSizeInBits);
1541 }
1542
1543
GetRegister(int register_index,Register maybe_result)1544 Register RegExpMacroAssemblerARM64::GetRegister(int register_index,
1545 Register maybe_result) {
1546 DCHECK(maybe_result.Is32Bits());
1547 DCHECK_LE(0, register_index);
1548 if (num_registers_ <= register_index) {
1549 num_registers_ = register_index + 1;
1550 }
1551 Register result = NoReg;
1552 RegisterState register_state = GetRegisterState(register_index);
1553 switch (register_state) {
1554 case STACKED:
1555 __ Ldr(maybe_result, register_location(register_index));
1556 result = maybe_result;
1557 break;
1558 case CACHED_LSW:
1559 result = GetCachedRegister(register_index).W();
1560 break;
1561 case CACHED_MSW:
1562 __ Lsr(maybe_result.X(), GetCachedRegister(register_index),
1563 kWRegSizeInBits);
1564 result = maybe_result;
1565 break;
1566 default:
1567 UNREACHABLE();
1568 }
1569 DCHECK(result.Is32Bits());
1570 return result;
1571 }
1572
1573
StoreRegister(int register_index,Register source)1574 void RegExpMacroAssemblerARM64::StoreRegister(int register_index,
1575 Register source) {
1576 DCHECK(source.Is32Bits());
1577 DCHECK_LE(0, register_index);
1578 if (num_registers_ <= register_index) {
1579 num_registers_ = register_index + 1;
1580 }
1581
1582 RegisterState register_state = GetRegisterState(register_index);
1583 switch (register_state) {
1584 case STACKED:
1585 __ Str(source, register_location(register_index));
1586 break;
1587 case CACHED_LSW: {
1588 Register cached_register = GetCachedRegister(register_index);
1589 if (source != cached_register.W()) {
1590 __ Bfi(cached_register, source.X(), 0, kWRegSizeInBits);
1591 }
1592 break;
1593 }
1594 case CACHED_MSW: {
1595 Register cached_register = GetCachedRegister(register_index);
1596 __ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits);
1597 break;
1598 }
1599 default:
1600 UNREACHABLE();
1601 }
1602 }
1603
1604
CallIf(Label * to,Condition condition)1605 void RegExpMacroAssemblerARM64::CallIf(Label* to, Condition condition) {
1606 Label skip_call;
1607 if (condition != al) __ B(&skip_call, NegateCondition(condition));
1608 __ Bl(to);
1609 __ Bind(&skip_call);
1610 }
1611
1612
RestoreLinkRegister()1613 void RegExpMacroAssemblerARM64::RestoreLinkRegister() {
1614 // TODO(v8:10026): Remove when we stop compacting for code objects that are
1615 // active on the call stack.
1616 __ Pop<TurboAssembler::kAuthLR>(padreg, lr);
1617 __ Add(lr, lr, Operand(masm_->CodeObject()));
1618 }
1619
1620
SaveLinkRegister()1621 void RegExpMacroAssemblerARM64::SaveLinkRegister() {
1622 __ Sub(lr, lr, Operand(masm_->CodeObject()));
1623 __ Push<TurboAssembler::kSignLR>(lr, padreg);
1624 }
1625
1626
register_location(int register_index)1627 MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) {
1628 DCHECK(register_index < (1<<30));
1629 DCHECK_LE(kNumCachedRegisters, register_index);
1630 if (num_registers_ <= register_index) {
1631 num_registers_ = register_index + 1;
1632 }
1633 register_index -= kNumCachedRegisters;
1634 int offset = kFirstRegisterOnStack - register_index * kWRegSize;
1635 return MemOperand(frame_pointer(), offset);
1636 }
1637
capture_location(int register_index,Register scratch)1638 MemOperand RegExpMacroAssemblerARM64::capture_location(int register_index,
1639 Register scratch) {
1640 DCHECK(register_index < (1<<30));
1641 DCHECK(register_index < num_saved_registers_);
1642 DCHECK_LE(kNumCachedRegisters, register_index);
1643 DCHECK_EQ(register_index % 2, 0);
1644 register_index -= kNumCachedRegisters;
1645 int offset = kFirstCaptureOnStack - register_index * kWRegSize;
1646 // capture_location is used with Stp instructions to load/store 2 registers.
1647 // The immediate field in the encoding is limited to 7 bits (signed).
1648 if (is_int7(offset)) {
1649 return MemOperand(frame_pointer(), offset);
1650 } else {
1651 __ Add(scratch, frame_pointer(), offset);
1652 return MemOperand(scratch);
1653 }
1654 }
1655
LoadCurrentCharacterUnchecked(int cp_offset,int characters)1656 void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset,
1657 int characters) {
1658 Register offset = current_input_offset();
1659
1660 // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU
1661 // and the operating system running on the target allow it.
1662 // If unaligned load/stores are not supported then this function must only
1663 // be used to load a single character at a time.
1664
1665 // ARMv8 supports unaligned accesses but V8 or the kernel can decide to
1666 // disable it.
1667 // TODO(pielan): See whether or not we should disable unaligned accesses.
1668 if (!CanReadUnaligned()) {
1669 DCHECK_EQ(1, characters);
1670 }
1671
1672 if (cp_offset != 0) {
1673 if (FLAG_debug_code) {
1674 __ Mov(x10, cp_offset * char_size());
1675 __ Add(x10, x10, Operand(current_input_offset(), SXTW));
1676 __ Cmp(x10, Operand(w10, SXTW));
1677 // The offset needs to fit in a W register.
1678 __ Check(eq, AbortReason::kOffsetOutOfRange);
1679 } else {
1680 __ Add(w10, current_input_offset(), cp_offset * char_size());
1681 }
1682 offset = w10;
1683 }
1684
1685 if (mode_ == LATIN1) {
1686 if (characters == 4) {
1687 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1688 } else if (characters == 2) {
1689 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1690 } else {
1691 DCHECK_EQ(1, characters);
1692 __ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW));
1693 }
1694 } else {
1695 DCHECK(mode_ == UC16);
1696 if (characters == 2) {
1697 __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1698 } else {
1699 DCHECK_EQ(1, characters);
1700 __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1701 }
1702 }
1703 }
1704
1705 } // namespace internal
1706 } // namespace v8
1707
1708 #undef __
1709
1710 #endif // V8_TARGET_ARCH_ARM64
1711