1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "src/compiler/backend/code-generator.h"
6
7 #include "src/codegen/arm64/assembler-arm64-inl.h"
8 #include "src/codegen/arm64/macro-assembler-arm64-inl.h"
9 #include "src/codegen/optimized-compilation-info.h"
10 #include "src/compiler/backend/code-generator-impl.h"
11 #include "src/compiler/backend/gap-resolver.h"
12 #include "src/compiler/node-matchers.h"
13 #include "src/compiler/osr.h"
14 #include "src/execution/frame-constants.h"
15 #include "src/heap/heap-inl.h" // crbug.com/v8/8499
16 #include "src/wasm/wasm-code-manager.h"
17 #include "src/wasm/wasm-objects.h"
18
19 namespace v8 {
20 namespace internal {
21 namespace compiler {
22
23 #define __ tasm()->
24
25 // Adds Arm64-specific methods to convert InstructionOperands.
26 class Arm64OperandConverter final : public InstructionOperandConverter {
27 public:
Arm64OperandConverter(CodeGenerator * gen,Instruction * instr)28 Arm64OperandConverter(CodeGenerator* gen, Instruction* instr)
29 : InstructionOperandConverter(gen, instr) {}
30
InputFloat32Register(size_t index)31 DoubleRegister InputFloat32Register(size_t index) {
32 return InputDoubleRegister(index).S();
33 }
34
InputFloat64Register(size_t index)35 DoubleRegister InputFloat64Register(size_t index) {
36 return InputDoubleRegister(index);
37 }
38
InputSimd128Register(size_t index)39 DoubleRegister InputSimd128Register(size_t index) {
40 return InputDoubleRegister(index).Q();
41 }
42
InputFloat32OrZeroRegister(size_t index)43 CPURegister InputFloat32OrZeroRegister(size_t index) {
44 if (instr_->InputAt(index)->IsImmediate()) {
45 DCHECK_EQ(0, bit_cast<int32_t>(InputFloat32(index)));
46 return wzr;
47 }
48 DCHECK(instr_->InputAt(index)->IsFPRegister());
49 return InputDoubleRegister(index).S();
50 }
51
InputFloat64OrZeroRegister(size_t index)52 CPURegister InputFloat64OrZeroRegister(size_t index) {
53 if (instr_->InputAt(index)->IsImmediate()) {
54 DCHECK_EQ(0, bit_cast<int64_t>(InputDouble(index)));
55 return xzr;
56 }
57 DCHECK(instr_->InputAt(index)->IsDoubleRegister());
58 return InputDoubleRegister(index);
59 }
60
OutputCount()61 size_t OutputCount() { return instr_->OutputCount(); }
62
OutputFloat32Register()63 DoubleRegister OutputFloat32Register() { return OutputDoubleRegister().S(); }
64
OutputFloat64Register()65 DoubleRegister OutputFloat64Register() { return OutputDoubleRegister(); }
66
OutputSimd128Register()67 DoubleRegister OutputSimd128Register() { return OutputDoubleRegister().Q(); }
68
InputRegister32(size_t index)69 Register InputRegister32(size_t index) {
70 return ToRegister(instr_->InputAt(index)).W();
71 }
72
InputOrZeroRegister32(size_t index)73 Register InputOrZeroRegister32(size_t index) {
74 DCHECK(instr_->InputAt(index)->IsRegister() ||
75 (instr_->InputAt(index)->IsImmediate() && (InputInt32(index) == 0)));
76 if (instr_->InputAt(index)->IsImmediate()) {
77 return wzr;
78 }
79 return InputRegister32(index);
80 }
81
InputRegister64(size_t index)82 Register InputRegister64(size_t index) { return InputRegister(index); }
83
InputOrZeroRegister64(size_t index)84 Register InputOrZeroRegister64(size_t index) {
85 DCHECK(instr_->InputAt(index)->IsRegister() ||
86 (instr_->InputAt(index)->IsImmediate() && (InputInt64(index) == 0)));
87 if (instr_->InputAt(index)->IsImmediate()) {
88 return xzr;
89 }
90 return InputRegister64(index);
91 }
92
InputOperand(size_t index)93 Operand InputOperand(size_t index) {
94 return ToOperand(instr_->InputAt(index));
95 }
96
InputOperand64(size_t index)97 Operand InputOperand64(size_t index) { return InputOperand(index); }
98
InputOperand32(size_t index)99 Operand InputOperand32(size_t index) {
100 return ToOperand32(instr_->InputAt(index));
101 }
102
OutputRegister64()103 Register OutputRegister64() { return OutputRegister(); }
104
OutputRegister32()105 Register OutputRegister32() { return ToRegister(instr_->Output()).W(); }
106
TempRegister32(size_t index)107 Register TempRegister32(size_t index) {
108 return ToRegister(instr_->TempAt(index)).W();
109 }
110
InputOperand2_32(size_t index)111 Operand InputOperand2_32(size_t index) {
112 switch (AddressingModeField::decode(instr_->opcode())) {
113 case kMode_None:
114 return InputOperand32(index);
115 case kMode_Operand2_R_LSL_I:
116 return Operand(InputRegister32(index), LSL, InputInt5(index + 1));
117 case kMode_Operand2_R_LSR_I:
118 return Operand(InputRegister32(index), LSR, InputInt5(index + 1));
119 case kMode_Operand2_R_ASR_I:
120 return Operand(InputRegister32(index), ASR, InputInt5(index + 1));
121 case kMode_Operand2_R_ROR_I:
122 return Operand(InputRegister32(index), ROR, InputInt5(index + 1));
123 case kMode_Operand2_R_UXTB:
124 return Operand(InputRegister32(index), UXTB);
125 case kMode_Operand2_R_UXTH:
126 return Operand(InputRegister32(index), UXTH);
127 case kMode_Operand2_R_SXTB:
128 return Operand(InputRegister32(index), SXTB);
129 case kMode_Operand2_R_SXTH:
130 return Operand(InputRegister32(index), SXTH);
131 case kMode_Operand2_R_SXTW:
132 return Operand(InputRegister32(index), SXTW);
133 case kMode_MRI:
134 case kMode_MRR:
135 case kMode_Root:
136 break;
137 }
138 UNREACHABLE();
139 }
140
InputOperand2_64(size_t index)141 Operand InputOperand2_64(size_t index) {
142 switch (AddressingModeField::decode(instr_->opcode())) {
143 case kMode_None:
144 return InputOperand64(index);
145 case kMode_Operand2_R_LSL_I:
146 return Operand(InputRegister64(index), LSL, InputInt6(index + 1));
147 case kMode_Operand2_R_LSR_I:
148 return Operand(InputRegister64(index), LSR, InputInt6(index + 1));
149 case kMode_Operand2_R_ASR_I:
150 return Operand(InputRegister64(index), ASR, InputInt6(index + 1));
151 case kMode_Operand2_R_ROR_I:
152 return Operand(InputRegister64(index), ROR, InputInt6(index + 1));
153 case kMode_Operand2_R_UXTB:
154 return Operand(InputRegister64(index), UXTB);
155 case kMode_Operand2_R_UXTH:
156 return Operand(InputRegister64(index), UXTH);
157 case kMode_Operand2_R_SXTB:
158 return Operand(InputRegister64(index), SXTB);
159 case kMode_Operand2_R_SXTH:
160 return Operand(InputRegister64(index), SXTH);
161 case kMode_Operand2_R_SXTW:
162 return Operand(InputRegister64(index), SXTW);
163 case kMode_MRI:
164 case kMode_MRR:
165 case kMode_Root:
166 break;
167 }
168 UNREACHABLE();
169 }
170
MemoryOperand(size_t index=0)171 MemOperand MemoryOperand(size_t index = 0) {
172 switch (AddressingModeField::decode(instr_->opcode())) {
173 case kMode_None:
174 case kMode_Operand2_R_LSR_I:
175 case kMode_Operand2_R_ASR_I:
176 case kMode_Operand2_R_ROR_I:
177 case kMode_Operand2_R_UXTB:
178 case kMode_Operand2_R_UXTH:
179 case kMode_Operand2_R_SXTB:
180 case kMode_Operand2_R_SXTH:
181 case kMode_Operand2_R_SXTW:
182 break;
183 case kMode_Root:
184 return MemOperand(kRootRegister, InputInt64(index));
185 case kMode_Operand2_R_LSL_I:
186 return MemOperand(InputRegister(index + 0), InputRegister(index + 1),
187 LSL, InputInt32(index + 2));
188 case kMode_MRI:
189 return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
190 case kMode_MRR:
191 return MemOperand(InputRegister(index + 0), InputRegister(index + 1));
192 }
193 UNREACHABLE();
194 }
195
ToOperand(InstructionOperand * op)196 Operand ToOperand(InstructionOperand* op) {
197 if (op->IsRegister()) {
198 return Operand(ToRegister(op));
199 }
200 return ToImmediate(op);
201 }
202
ToOperand32(InstructionOperand * op)203 Operand ToOperand32(InstructionOperand* op) {
204 if (op->IsRegister()) {
205 return Operand(ToRegister(op).W());
206 }
207 return ToImmediate(op);
208 }
209
ToImmediate(InstructionOperand * operand)210 Operand ToImmediate(InstructionOperand* operand) {
211 Constant constant = ToConstant(operand);
212 switch (constant.type()) {
213 case Constant::kInt32:
214 return Operand(constant.ToInt32());
215 case Constant::kInt64:
216 if (RelocInfo::IsWasmReference(constant.rmode())) {
217 return Operand(constant.ToInt64(), constant.rmode());
218 } else {
219 return Operand(constant.ToInt64());
220 }
221 case Constant::kFloat32:
222 return Operand(Operand::EmbeddedNumber(constant.ToFloat32()));
223 case Constant::kFloat64:
224 return Operand(Operand::EmbeddedNumber(constant.ToFloat64().value()));
225 case Constant::kExternalReference:
226 return Operand(constant.ToExternalReference());
227 case Constant::kCompressedHeapObject: // Fall through.
228 case Constant::kHeapObject:
229 return Operand(constant.ToHeapObject());
230 case Constant::kDelayedStringConstant:
231 return Operand::EmbeddedStringConstant(
232 constant.ToDelayedStringConstant());
233 case Constant::kRpoNumber:
234 UNREACHABLE(); // TODO(dcarney): RPO immediates on arm64.
235 break;
236 }
237 UNREACHABLE();
238 }
239
ToMemOperand(InstructionOperand * op,TurboAssembler * tasm) const240 MemOperand ToMemOperand(InstructionOperand* op, TurboAssembler* tasm) const {
241 DCHECK_NOT_NULL(op);
242 DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
243 return SlotToMemOperand(AllocatedOperand::cast(op)->index(), tasm);
244 }
245
SlotToMemOperand(int slot,TurboAssembler * tasm) const246 MemOperand SlotToMemOperand(int slot, TurboAssembler* tasm) const {
247 FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
248 if (offset.from_frame_pointer()) {
249 int from_sp = offset.offset() + frame_access_state()->GetSPToFPOffset();
250 // Convert FP-offsets to SP-offsets if it results in better code.
251 if (Assembler::IsImmLSUnscaled(from_sp) ||
252 Assembler::IsImmLSScaled(from_sp, 3)) {
253 offset = FrameOffset::FromStackPointer(from_sp);
254 }
255 }
256 return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
257 }
258 };
259
260 namespace {
261
262 class OutOfLineRecordWrite final : public OutOfLineCode {
263 public:
OutOfLineRecordWrite(CodeGenerator * gen,Register object,Operand offset,Register value,RecordWriteMode mode,StubCallMode stub_mode,UnwindingInfoWriter * unwinding_info_writer)264 OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand offset,
265 Register value, RecordWriteMode mode,
266 StubCallMode stub_mode,
267 UnwindingInfoWriter* unwinding_info_writer)
268 : OutOfLineCode(gen),
269 object_(object),
270 offset_(offset),
271 value_(value),
272 mode_(mode),
273 stub_mode_(stub_mode),
274 must_save_lr_(!gen->frame_access_state()->has_frame()),
275 unwinding_info_writer_(unwinding_info_writer),
276 zone_(gen->zone()) {}
277
Generate()278 void Generate() final {
279 if (mode_ > RecordWriteMode::kValueIsPointer) {
280 __ JumpIfSmi(value_, exit());
281 }
282 if (COMPRESS_POINTERS_BOOL) {
283 __ DecompressTaggedPointer(value_, value_);
284 }
285 __ CheckPageFlag(value_, MemoryChunk::kPointersToHereAreInterestingMask, ne,
286 exit());
287 RememberedSetAction const remembered_set_action =
288 mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
289 : OMIT_REMEMBERED_SET;
290 SaveFPRegsMode const save_fp_mode =
291 frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
292 if (must_save_lr_) {
293 // We need to save and restore lr if the frame was elided.
294 __ Push<TurboAssembler::kSignLR>(lr, padreg);
295 unwinding_info_writer_->MarkLinkRegisterOnTopOfStack(__ pc_offset(), sp);
296 }
297 if (mode_ == RecordWriteMode::kValueIsEphemeronKey) {
298 __ CallEphemeronKeyBarrier(object_, offset_, save_fp_mode);
299 } else if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
300 // A direct call to a wasm runtime stub defined in this module.
301 // Just encode the stub index. This will be patched when the code
302 // is added to the native module and copied into wasm code space.
303 __ CallRecordWriteStub(object_, offset_, remembered_set_action,
304 save_fp_mode, wasm::WasmCode::kRecordWrite);
305 } else {
306 __ CallRecordWriteStub(object_, offset_, remembered_set_action,
307 save_fp_mode);
308 }
309 if (must_save_lr_) {
310 __ Pop<TurboAssembler::kAuthLR>(padreg, lr);
311 unwinding_info_writer_->MarkPopLinkRegisterFromTopOfStack(__ pc_offset());
312 }
313 }
314
315 private:
316 Register const object_;
317 Operand const offset_;
318 Register const value_;
319 RecordWriteMode const mode_;
320 StubCallMode const stub_mode_;
321 bool must_save_lr_;
322 UnwindingInfoWriter* const unwinding_info_writer_;
323 Zone* zone_;
324 };
325
FlagsConditionToCondition(FlagsCondition condition)326 Condition FlagsConditionToCondition(FlagsCondition condition) {
327 switch (condition) {
328 case kEqual:
329 return eq;
330 case kNotEqual:
331 return ne;
332 case kSignedLessThan:
333 return lt;
334 case kSignedGreaterThanOrEqual:
335 return ge;
336 case kSignedLessThanOrEqual:
337 return le;
338 case kSignedGreaterThan:
339 return gt;
340 case kUnsignedLessThan:
341 return lo;
342 case kUnsignedGreaterThanOrEqual:
343 return hs;
344 case kUnsignedLessThanOrEqual:
345 return ls;
346 case kUnsignedGreaterThan:
347 return hi;
348 case kFloatLessThanOrUnordered:
349 return lt;
350 case kFloatGreaterThanOrEqual:
351 return ge;
352 case kFloatLessThanOrEqual:
353 return ls;
354 case kFloatGreaterThanOrUnordered:
355 return hi;
356 case kFloatLessThan:
357 return lo;
358 case kFloatGreaterThanOrEqualOrUnordered:
359 return hs;
360 case kFloatLessThanOrEqualOrUnordered:
361 return le;
362 case kFloatGreaterThan:
363 return gt;
364 case kOverflow:
365 return vs;
366 case kNotOverflow:
367 return vc;
368 case kUnorderedEqual:
369 case kUnorderedNotEqual:
370 break;
371 case kPositiveOrZero:
372 return pl;
373 case kNegative:
374 return mi;
375 }
376 UNREACHABLE();
377 }
378
EmitWordLoadPoisoningIfNeeded(CodeGenerator * codegen,InstructionCode opcode,Instruction * instr,Arm64OperandConverter const & i)379 void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
380 InstructionCode opcode, Instruction* instr,
381 Arm64OperandConverter const& i) {
382 const MemoryAccessMode access_mode =
383 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
384 if (access_mode == kMemoryAccessPoisoned) {
385 Register value = i.OutputRegister();
386 Register poison = value.Is64Bits() ? kSpeculationPoisonRegister
387 : kSpeculationPoisonRegister.W();
388 codegen->tasm()->And(value, value, Operand(poison));
389 }
390 }
391
EmitMaybePoisonedFPLoad(CodeGenerator * codegen,InstructionCode opcode,Arm64OperandConverter * i,VRegister output_reg)392 void EmitMaybePoisonedFPLoad(CodeGenerator* codegen, InstructionCode opcode,
393 Arm64OperandConverter* i, VRegister output_reg) {
394 const MemoryAccessMode access_mode =
395 static_cast<MemoryAccessMode>(MiscField::decode(opcode));
396 AddressingMode address_mode = AddressingModeField::decode(opcode);
397 if (access_mode == kMemoryAccessPoisoned && address_mode != kMode_Root) {
398 UseScratchRegisterScope temps(codegen->tasm());
399 Register address = temps.AcquireX();
400 switch (address_mode) {
401 case kMode_MRI: // Fall through.
402 case kMode_MRR:
403 codegen->tasm()->Add(address, i->InputRegister(0), i->InputOperand(1));
404 break;
405 case kMode_Operand2_R_LSL_I:
406 codegen->tasm()->Add(address, i->InputRegister(0),
407 i->InputOperand2_64(1));
408 break;
409 default:
410 // Note: we don't need poisoning for kMode_Root loads as those loads
411 // target a fixed offset from root register which is set once when
412 // initializing the vm.
413 UNREACHABLE();
414 }
415 codegen->tasm()->And(address, address, Operand(kSpeculationPoisonRegister));
416 codegen->tasm()->Ldr(output_reg, MemOperand(address));
417 } else {
418 codegen->tasm()->Ldr(output_reg, i->MemoryOperand());
419 }
420 }
421
422 } // namespace
423
424 #define ASSEMBLE_SHIFT(asm_instr, width) \
425 do { \
426 if (instr->InputAt(1)->IsRegister()) { \
427 __ asm_instr(i.OutputRegister##width(), i.InputRegister##width(0), \
428 i.InputRegister##width(1)); \
429 } else { \
430 uint32_t imm = \
431 static_cast<uint32_t>(i.InputOperand##width(1).ImmediateValue()); \
432 __ asm_instr(i.OutputRegister##width(), i.InputRegister##width(0), \
433 imm % (width)); \
434 } \
435 } while (0)
436
437 #define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr, reg) \
438 do { \
439 __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
440 __ asm_instr(i.Output##reg(), i.TempRegister(0)); \
441 } while (0)
442
443 #define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr, reg) \
444 do { \
445 __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
446 __ asm_instr(i.Input##reg(2), i.TempRegister(0)); \
447 } while (0)
448
449 #define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(load_instr, store_instr, reg) \
450 do { \
451 Label exchange; \
452 __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
453 __ Bind(&exchange); \
454 __ load_instr(i.Output##reg(), i.TempRegister(0)); \
455 __ store_instr(i.TempRegister32(1), i.Input##reg(2), i.TempRegister(0)); \
456 __ Cbnz(i.TempRegister32(1), &exchange); \
457 } while (0)
458
459 #define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_instr, store_instr, ext, \
460 reg) \
461 do { \
462 Label compareExchange; \
463 Label exit; \
464 __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
465 __ Bind(&compareExchange); \
466 __ load_instr(i.Output##reg(), i.TempRegister(0)); \
467 __ Cmp(i.Output##reg(), Operand(i.Input##reg(2), ext)); \
468 __ B(ne, &exit); \
469 __ store_instr(i.TempRegister32(1), i.Input##reg(3), i.TempRegister(0)); \
470 __ Cbnz(i.TempRegister32(1), &compareExchange); \
471 __ Bind(&exit); \
472 } while (0)
473
474 #define ASSEMBLE_ATOMIC_BINOP(load_instr, store_instr, bin_instr, reg) \
475 do { \
476 Label binop; \
477 __ Add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1)); \
478 __ Bind(&binop); \
479 __ load_instr(i.Output##reg(), i.TempRegister(0)); \
480 __ bin_instr(i.Temp##reg(1), i.Output##reg(), Operand(i.Input##reg(2))); \
481 __ store_instr(i.TempRegister32(2), i.Temp##reg(1), i.TempRegister(0)); \
482 __ Cbnz(i.TempRegister32(2), &binop); \
483 } while (0)
484
485 #define ASSEMBLE_IEEE754_BINOP(name) \
486 do { \
487 FrameScope scope(tasm(), StackFrame::MANUAL); \
488 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2); \
489 } while (0)
490
491 #define ASSEMBLE_IEEE754_UNOP(name) \
492 do { \
493 FrameScope scope(tasm(), StackFrame::MANUAL); \
494 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1); \
495 } while (0)
496
497 // If shift value is an immediate, we can call asm_imm, taking the shift value
498 // modulo 2^width. Otherwise, emit code to perform the modulus operation, and
499 // call asm_shl.
500 #define ASSEMBLE_SIMD_SHIFT_LEFT(asm_imm, width, format, asm_shl, gp) \
501 do { \
502 if (instr->InputAt(1)->IsImmediate()) { \
503 __ asm_imm(i.OutputSimd128Register().format(), \
504 i.InputSimd128Register(0).format(), i.InputInt##width(1)); \
505 } else { \
506 VRegister tmp = i.TempSimd128Register(0); \
507 Register shift = i.TempRegister(1).gp(); \
508 constexpr int mask = (1 << width) - 1; \
509 __ And(shift, i.InputRegister32(1), mask); \
510 __ Dup(tmp.format(), shift); \
511 __ asm_shl(i.OutputSimd128Register().format(), \
512 i.InputSimd128Register(0).format(), tmp.format()); \
513 } \
514 } while (0)
515
516 // If shift value is an immediate, we can call asm_imm, taking the shift value
517 // modulo 2^width. Otherwise, emit code to perform the modulus operation, and
518 // call asm_shl, passing in the negative shift value (treated as right shift).
519 #define ASSEMBLE_SIMD_SHIFT_RIGHT(asm_imm, width, format, asm_shl, gp) \
520 do { \
521 if (instr->InputAt(1)->IsImmediate()) { \
522 __ asm_imm(i.OutputSimd128Register().format(), \
523 i.InputSimd128Register(0).format(), i.InputInt##width(1)); \
524 } else { \
525 VRegister tmp = i.TempSimd128Register(0); \
526 Register shift = i.TempRegister(1).gp(); \
527 constexpr int mask = (1 << width) - 1; \
528 __ And(shift, i.InputRegister32(1), mask); \
529 __ Dup(tmp.format(), shift); \
530 __ Neg(tmp.format(), tmp.format()); \
531 __ asm_shl(i.OutputSimd128Register().format(), \
532 i.InputSimd128Register(0).format(), tmp.format()); \
533 } \
534 } while (0)
535
AssembleDeconstructFrame()536 void CodeGenerator::AssembleDeconstructFrame() {
537 __ Mov(sp, fp);
538 __ Pop<TurboAssembler::kAuthLR>(fp, lr);
539
540 unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
541 }
542
AssemblePrepareTailCall()543 void CodeGenerator::AssemblePrepareTailCall() {
544 if (frame_access_state()->has_frame()) {
545 __ RestoreFPAndLR();
546 }
547 frame_access_state()->SetFrameAccessToSP();
548 }
549
AssemblePopArgumentsAdaptorFrame(Register args_reg,Register scratch1,Register scratch2,Register scratch3)550 void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
551 Register scratch1,
552 Register scratch2,
553 Register scratch3) {
554 DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
555 Label done;
556
557 // Check if current frame is an arguments adaptor frame.
558 __ Ldr(scratch1, MemOperand(fp, StandardFrameConstants::kContextOffset));
559 __ Cmp(scratch1,
560 Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
561 __ B(ne, &done);
562
563 // Load arguments count from current arguments adaptor frame (note, it
564 // does not include receiver).
565 Register caller_args_count_reg = scratch1;
566 __ Ldr(caller_args_count_reg,
567 MemOperand(fp, ArgumentsAdaptorFrameConstants::kLengthOffset));
568 __ SmiUntag(caller_args_count_reg);
569
570 __ PrepareForTailCall(args_reg, caller_args_count_reg, scratch2, scratch3);
571 __ bind(&done);
572 }
573
574 namespace {
575
AdjustStackPointerForTailCall(TurboAssembler * tasm,FrameAccessState * state,int new_slot_above_sp,bool allow_shrinkage=true)576 void AdjustStackPointerForTailCall(TurboAssembler* tasm,
577 FrameAccessState* state,
578 int new_slot_above_sp,
579 bool allow_shrinkage = true) {
580 int current_sp_offset = state->GetSPToFPSlotCount() +
581 StandardFrameConstants::kFixedSlotCountAboveFp;
582 int stack_slot_delta = new_slot_above_sp - current_sp_offset;
583 DCHECK_EQ(stack_slot_delta % 2, 0);
584 if (stack_slot_delta > 0) {
585 tasm->Claim(stack_slot_delta);
586 state->IncreaseSPDelta(stack_slot_delta);
587 } else if (allow_shrinkage && stack_slot_delta < 0) {
588 tasm->Drop(-stack_slot_delta);
589 state->IncreaseSPDelta(stack_slot_delta);
590 }
591 }
592
593 } // namespace
594
AssembleTailCallBeforeGap(Instruction * instr,int first_unused_stack_slot)595 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
596 int first_unused_stack_slot) {
597 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
598 first_unused_stack_slot, false);
599 }
600
AssembleTailCallAfterGap(Instruction * instr,int first_unused_stack_slot)601 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
602 int first_unused_stack_slot) {
603 DCHECK_EQ(first_unused_stack_slot % 2, 0);
604 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
605 first_unused_stack_slot);
606 DCHECK(instr->IsTailCall());
607 InstructionOperandConverter g(this, instr);
608 int optional_padding_slot = g.InputInt32(instr->InputCount() - 2);
609 if (optional_padding_slot % 2) {
610 __ Poke(padreg, optional_padding_slot * kSystemPointerSize);
611 }
612 }
613
614 // Check that {kJavaScriptCallCodeStartRegister} is correct.
AssembleCodeStartRegisterCheck()615 void CodeGenerator::AssembleCodeStartRegisterCheck() {
616 UseScratchRegisterScope temps(tasm());
617 Register scratch = temps.AcquireX();
618 __ ComputeCodeStartAddress(scratch);
619 __ cmp(scratch, kJavaScriptCallCodeStartRegister);
620 __ Assert(eq, AbortReason::kWrongFunctionCodeStart);
621 }
622
623 // Check if the code object is marked for deoptimization. If it is, then it
624 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
625 // to:
626 // 1. read from memory the word that contains that bit, which can be found in
627 // the flags in the referenced {CodeDataContainer} object;
628 // 2. test kMarkedForDeoptimizationBit in those flags; and
629 // 3. if it is not zero then it jumps to the builtin.
BailoutIfDeoptimized()630 void CodeGenerator::BailoutIfDeoptimized() {
631 UseScratchRegisterScope temps(tasm());
632 Register scratch = temps.AcquireX();
633 int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
634 __ LoadTaggedPointerField(
635 scratch, MemOperand(kJavaScriptCallCodeStartRegister, offset));
636 __ Ldr(scratch.W(),
637 FieldMemOperand(scratch, CodeDataContainer::kKindSpecificFlagsOffset));
638 Label not_deoptimized;
639 __ Tbz(scratch.W(), Code::kMarkedForDeoptimizationBit, ¬_deoptimized);
640 __ Jump(BUILTIN_CODE(isolate(), CompileLazyDeoptimizedCode),
641 RelocInfo::CODE_TARGET);
642 __ Bind(¬_deoptimized);
643 }
644
GenerateSpeculationPoisonFromCodeStartRegister()645 void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
646 UseScratchRegisterScope temps(tasm());
647 Register scratch = temps.AcquireX();
648
649 // Set a mask which has all bits set in the normal case, but has all
650 // bits cleared if we are speculatively executing the wrong PC.
651 __ ComputeCodeStartAddress(scratch);
652 __ Cmp(kJavaScriptCallCodeStartRegister, scratch);
653 __ Csetm(kSpeculationPoisonRegister, eq);
654 __ Csdb();
655 }
656
AssembleRegisterArgumentPoisoning()657 void CodeGenerator::AssembleRegisterArgumentPoisoning() {
658 UseScratchRegisterScope temps(tasm());
659 Register scratch = temps.AcquireX();
660
661 __ Mov(scratch, sp);
662 __ And(kJSFunctionRegister, kJSFunctionRegister, kSpeculationPoisonRegister);
663 __ And(kContextRegister, kContextRegister, kSpeculationPoisonRegister);
664 __ And(scratch, scratch, kSpeculationPoisonRegister);
665 __ Mov(sp, scratch);
666 }
667
668 // Assembles an instruction after register allocation, producing machine code.
AssembleArchInstruction(Instruction * instr)669 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
670 Instruction* instr) {
671 Arm64OperandConverter i(this, instr);
672 InstructionCode opcode = instr->opcode();
673 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
674 switch (arch_opcode) {
675 case kArchCallCodeObject: {
676 if (instr->InputAt(0)->IsImmediate()) {
677 __ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
678 } else {
679 Register reg = i.InputRegister(0);
680 DCHECK_IMPLIES(
681 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
682 reg == kJavaScriptCallCodeStartRegister);
683 __ CallCodeObject(reg);
684 }
685 RecordCallPosition(instr);
686 frame_access_state()->ClearSPDelta();
687 break;
688 }
689 case kArchCallBuiltinPointer: {
690 DCHECK(!instr->InputAt(0)->IsImmediate());
691 Register builtin_index = i.InputRegister(0);
692 __ CallBuiltinByIndex(builtin_index);
693 RecordCallPosition(instr);
694 frame_access_state()->ClearSPDelta();
695 break;
696 }
697 case kArchCallWasmFunction: {
698 if (instr->InputAt(0)->IsImmediate()) {
699 Constant constant = i.ToConstant(instr->InputAt(0));
700 Address wasm_code = static_cast<Address>(constant.ToInt64());
701 __ Call(wasm_code, constant.rmode());
702 } else {
703 Register target = i.InputRegister(0);
704 __ Call(target);
705 }
706 RecordCallPosition(instr);
707 frame_access_state()->ClearSPDelta();
708 break;
709 }
710 case kArchTailCallCodeObjectFromJSFunction:
711 case kArchTailCallCodeObject: {
712 if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
713 AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
714 i.TempRegister(0), i.TempRegister(1),
715 i.TempRegister(2));
716 }
717 if (instr->InputAt(0)->IsImmediate()) {
718 __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
719 } else {
720 Register reg = i.InputRegister(0);
721 DCHECK_IMPLIES(
722 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
723 reg == kJavaScriptCallCodeStartRegister);
724 __ JumpCodeObject(reg);
725 }
726 unwinding_info_writer_.MarkBlockWillExit();
727 frame_access_state()->ClearSPDelta();
728 frame_access_state()->SetFrameAccessToDefault();
729 break;
730 }
731 case kArchTailCallWasm: {
732 if (instr->InputAt(0)->IsImmediate()) {
733 Constant constant = i.ToConstant(instr->InputAt(0));
734 Address wasm_code = static_cast<Address>(constant.ToInt64());
735 __ Jump(wasm_code, constant.rmode());
736 } else {
737 Register target = i.InputRegister(0);
738 __ Jump(target);
739 }
740 unwinding_info_writer_.MarkBlockWillExit();
741 frame_access_state()->ClearSPDelta();
742 frame_access_state()->SetFrameAccessToDefault();
743 break;
744 }
745 case kArchTailCallAddress: {
746 CHECK(!instr->InputAt(0)->IsImmediate());
747 Register reg = i.InputRegister(0);
748 DCHECK_IMPLIES(
749 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
750 reg == kJavaScriptCallCodeStartRegister);
751 __ Jump(reg);
752 unwinding_info_writer_.MarkBlockWillExit();
753 frame_access_state()->ClearSPDelta();
754 frame_access_state()->SetFrameAccessToDefault();
755 break;
756 }
757 case kArchCallJSFunction: {
758 Register func = i.InputRegister(0);
759 if (FLAG_debug_code) {
760 // Check the function's context matches the context argument.
761 UseScratchRegisterScope scope(tasm());
762 Register temp = scope.AcquireX();
763 __ LoadTaggedPointerField(
764 temp, FieldMemOperand(func, JSFunction::kContextOffset));
765 __ cmp(cp, temp);
766 __ Assert(eq, AbortReason::kWrongFunctionContext);
767 }
768 static_assert(kJavaScriptCallCodeStartRegister == x2, "ABI mismatch");
769 __ LoadTaggedPointerField(x2,
770 FieldMemOperand(func, JSFunction::kCodeOffset));
771 __ CallCodeObject(x2);
772 RecordCallPosition(instr);
773 frame_access_state()->ClearSPDelta();
774 break;
775 }
776 case kArchPrepareCallCFunction:
777 // We don't need kArchPrepareCallCFunction on arm64 as the instruction
778 // selector has already performed a Claim to reserve space on the stack.
779 // Frame alignment is always 16 bytes, and the stack pointer is already
780 // 16-byte aligned, therefore we do not need to align the stack pointer
781 // by an unknown value, and it is safe to continue accessing the frame
782 // via the stack pointer.
783 UNREACHABLE();
784 case kArchSaveCallerRegisters: {
785 fp_mode_ =
786 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
787 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
788 // kReturnRegister0 should have been saved before entering the stub.
789 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
790 DCHECK(IsAligned(bytes, kSystemPointerSize));
791 DCHECK_EQ(0, frame_access_state()->sp_delta());
792 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
793 DCHECK(!caller_registers_saved_);
794 caller_registers_saved_ = true;
795 break;
796 }
797 case kArchRestoreCallerRegisters: {
798 DCHECK(fp_mode_ ==
799 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
800 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
801 // Don't overwrite the returned value.
802 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
803 frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
804 DCHECK_EQ(0, frame_access_state()->sp_delta());
805 DCHECK(caller_registers_saved_);
806 caller_registers_saved_ = false;
807 break;
808 }
809 case kArchPrepareTailCall:
810 AssemblePrepareTailCall();
811 break;
812 case kArchCallCFunction: {
813 int const num_parameters = MiscField::decode(instr->opcode());
814 Label return_location;
815 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
816 // Put the return address in a stack slot.
817 __ StoreReturnAddressInWasmExitFrame(&return_location);
818 }
819
820 if (instr->InputAt(0)->IsImmediate()) {
821 ExternalReference ref = i.InputExternalReference(0);
822 __ CallCFunction(ref, num_parameters, 0);
823 } else {
824 Register func = i.InputRegister(0);
825 __ CallCFunction(func, num_parameters, 0);
826 }
827 __ Bind(&return_location);
828 if (linkage()->GetIncomingDescriptor()->IsWasmCapiFunction()) {
829 RecordSafepoint(instr->reference_map(), Safepoint::kNoLazyDeopt);
830 }
831 frame_access_state()->SetFrameAccessToDefault();
832 // Ideally, we should decrement SP delta to match the change of stack
833 // pointer in CallCFunction. However, for certain architectures (e.g.
834 // ARM), there may be more strict alignment requirement, causing old SP
835 // to be saved on the stack. In those cases, we can not calculate the SP
836 // delta statically.
837 frame_access_state()->ClearSPDelta();
838 if (caller_registers_saved_) {
839 // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
840 // Here, we assume the sequence to be:
841 // kArchSaveCallerRegisters;
842 // kArchCallCFunction;
843 // kArchRestoreCallerRegisters;
844 int bytes =
845 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
846 frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
847 }
848 break;
849 }
850 case kArchJmp:
851 AssembleArchJump(i.InputRpo(0));
852 break;
853 case kArchTableSwitch:
854 AssembleArchTableSwitch(instr);
855 break;
856 case kArchBinarySearchSwitch:
857 AssembleArchBinarySearchSwitch(instr);
858 break;
859 case kArchAbortCSAAssert:
860 DCHECK_EQ(i.InputRegister(0), x1);
861 {
862 // We don't actually want to generate a pile of code for this, so just
863 // claim there is a stack frame, without generating one.
864 FrameScope scope(tasm(), StackFrame::NONE);
865 __ Call(
866 isolate()->builtins()->builtin_handle(Builtins::kAbortCSAAssert),
867 RelocInfo::CODE_TARGET);
868 }
869 __ Debug("kArchAbortCSAAssert", 0, BREAK);
870 unwinding_info_writer_.MarkBlockWillExit();
871 break;
872 case kArchDebugBreak:
873 __ DebugBreak();
874 break;
875 case kArchComment:
876 __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
877 break;
878 case kArchThrowTerminator:
879 unwinding_info_writer_.MarkBlockWillExit();
880 break;
881 case kArchNop:
882 // don't emit code for nops.
883 break;
884 case kArchDeoptimize: {
885 DeoptimizationExit* exit =
886 BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
887 __ B(exit->label());
888 break;
889 }
890 case kArchRet:
891 AssembleReturn(instr->InputAt(0));
892 break;
893 case kArchFramePointer:
894 __ mov(i.OutputRegister(), fp);
895 break;
896 case kArchParentFramePointer:
897 if (frame_access_state()->has_frame()) {
898 __ ldr(i.OutputRegister(), MemOperand(fp, 0));
899 } else {
900 __ mov(i.OutputRegister(), fp);
901 }
902 break;
903 case kArchStackPointerGreaterThan: {
904 // Potentially apply an offset to the current stack pointer before the
905 // comparison to consider the size difference of an optimized frame versus
906 // the contained unoptimized frames.
907
908 Register lhs_register = sp;
909 uint32_t offset;
910
911 if (ShouldApplyOffsetToStackCheck(instr, &offset)) {
912 lhs_register = i.TempRegister(0);
913 __ Sub(lhs_register, sp, offset);
914 }
915
916 constexpr size_t kValueIndex = 0;
917 DCHECK(instr->InputAt(kValueIndex)->IsRegister());
918 __ Cmp(lhs_register, i.InputRegister(kValueIndex));
919 break;
920 }
921 case kArchStackCheckOffset:
922 __ Move(i.OutputRegister(), Smi::FromInt(GetStackCheckOffset()));
923 break;
924 case kArchTruncateDoubleToI:
925 __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
926 i.InputDoubleRegister(0), DetermineStubCallMode(),
927 frame_access_state()->has_frame()
928 ? kLRHasBeenSaved
929 : kLRHasNotBeenSaved);
930
931 break;
932 case kArchStoreWithWriteBarrier: {
933 RecordWriteMode mode =
934 static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
935 AddressingMode addressing_mode =
936 AddressingModeField::decode(instr->opcode());
937 Register object = i.InputRegister(0);
938 Operand offset(0);
939 if (addressing_mode == kMode_MRI) {
940 offset = Operand(i.InputInt64(1));
941 } else {
942 DCHECK_EQ(addressing_mode, kMode_MRR);
943 offset = Operand(i.InputRegister(1));
944 }
945 Register value = i.InputRegister(2);
946 auto ool = new (zone()) OutOfLineRecordWrite(
947 this, object, offset, value, mode, DetermineStubCallMode(),
948 &unwinding_info_writer_);
949 __ StoreTaggedField(value, MemOperand(object, offset));
950 __ CheckPageFlag(object, MemoryChunk::kPointersFromHereAreInterestingMask,
951 eq, ool->entry());
952 __ Bind(ool->exit());
953 break;
954 }
955 case kArchStackSlot: {
956 FrameOffset offset =
957 frame_access_state()->GetFrameOffset(i.InputInt32(0));
958 Register base = offset.from_stack_pointer() ? sp : fp;
959 __ Add(i.OutputRegister(0), base, Operand(offset.offset()));
960 break;
961 }
962 case kIeee754Float64Acos:
963 ASSEMBLE_IEEE754_UNOP(acos);
964 break;
965 case kIeee754Float64Acosh:
966 ASSEMBLE_IEEE754_UNOP(acosh);
967 break;
968 case kIeee754Float64Asin:
969 ASSEMBLE_IEEE754_UNOP(asin);
970 break;
971 case kIeee754Float64Asinh:
972 ASSEMBLE_IEEE754_UNOP(asinh);
973 break;
974 case kIeee754Float64Atan:
975 ASSEMBLE_IEEE754_UNOP(atan);
976 break;
977 case kIeee754Float64Atanh:
978 ASSEMBLE_IEEE754_UNOP(atanh);
979 break;
980 case kIeee754Float64Atan2:
981 ASSEMBLE_IEEE754_BINOP(atan2);
982 break;
983 case kIeee754Float64Cos:
984 ASSEMBLE_IEEE754_UNOP(cos);
985 break;
986 case kIeee754Float64Cosh:
987 ASSEMBLE_IEEE754_UNOP(cosh);
988 break;
989 case kIeee754Float64Cbrt:
990 ASSEMBLE_IEEE754_UNOP(cbrt);
991 break;
992 case kIeee754Float64Exp:
993 ASSEMBLE_IEEE754_UNOP(exp);
994 break;
995 case kIeee754Float64Expm1:
996 ASSEMBLE_IEEE754_UNOP(expm1);
997 break;
998 case kIeee754Float64Log:
999 ASSEMBLE_IEEE754_UNOP(log);
1000 break;
1001 case kIeee754Float64Log1p:
1002 ASSEMBLE_IEEE754_UNOP(log1p);
1003 break;
1004 case kIeee754Float64Log2:
1005 ASSEMBLE_IEEE754_UNOP(log2);
1006 break;
1007 case kIeee754Float64Log10:
1008 ASSEMBLE_IEEE754_UNOP(log10);
1009 break;
1010 case kIeee754Float64Pow:
1011 ASSEMBLE_IEEE754_BINOP(pow);
1012 break;
1013 case kIeee754Float64Sin:
1014 ASSEMBLE_IEEE754_UNOP(sin);
1015 break;
1016 case kIeee754Float64Sinh:
1017 ASSEMBLE_IEEE754_UNOP(sinh);
1018 break;
1019 case kIeee754Float64Tan:
1020 ASSEMBLE_IEEE754_UNOP(tan);
1021 break;
1022 case kIeee754Float64Tanh:
1023 ASSEMBLE_IEEE754_UNOP(tanh);
1024 break;
1025 case kArm64Float32RoundDown:
1026 __ Frintm(i.OutputFloat32Register(), i.InputFloat32Register(0));
1027 break;
1028 case kArm64Float64RoundDown:
1029 __ Frintm(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1030 break;
1031 case kArm64Float32RoundUp:
1032 __ Frintp(i.OutputFloat32Register(), i.InputFloat32Register(0));
1033 break;
1034 case kArm64Float64RoundUp:
1035 __ Frintp(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1036 break;
1037 case kArm64Float64RoundTiesAway:
1038 __ Frinta(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1039 break;
1040 case kArm64Float32RoundTruncate:
1041 __ Frintz(i.OutputFloat32Register(), i.InputFloat32Register(0));
1042 break;
1043 case kArm64Float64RoundTruncate:
1044 __ Frintz(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1045 break;
1046 case kArm64Float32RoundTiesEven:
1047 __ Frintn(i.OutputFloat32Register(), i.InputFloat32Register(0));
1048 break;
1049 case kArm64Float64RoundTiesEven:
1050 __ Frintn(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1051 break;
1052 case kArm64Add:
1053 if (FlagsModeField::decode(opcode) != kFlags_none) {
1054 __ Adds(i.OutputRegister(), i.InputOrZeroRegister64(0),
1055 i.InputOperand2_64(1));
1056 } else {
1057 __ Add(i.OutputRegister(), i.InputOrZeroRegister64(0),
1058 i.InputOperand2_64(1));
1059 }
1060 break;
1061 case kArm64Add32:
1062 if (FlagsModeField::decode(opcode) != kFlags_none) {
1063 __ Adds(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1064 i.InputOperand2_32(1));
1065 } else {
1066 __ Add(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1067 i.InputOperand2_32(1));
1068 }
1069 break;
1070 case kArm64And:
1071 if (FlagsModeField::decode(opcode) != kFlags_none) {
1072 // The ands instruction only sets N and Z, so only the following
1073 // conditions make sense.
1074 DCHECK(FlagsConditionField::decode(opcode) == kEqual ||
1075 FlagsConditionField::decode(opcode) == kNotEqual ||
1076 FlagsConditionField::decode(opcode) == kPositiveOrZero ||
1077 FlagsConditionField::decode(opcode) == kNegative);
1078 __ Ands(i.OutputRegister(), i.InputOrZeroRegister64(0),
1079 i.InputOperand2_64(1));
1080 } else {
1081 __ And(i.OutputRegister(), i.InputOrZeroRegister64(0),
1082 i.InputOperand2_64(1));
1083 }
1084 break;
1085 case kArm64And32:
1086 if (FlagsModeField::decode(opcode) != kFlags_none) {
1087 // The ands instruction only sets N and Z, so only the following
1088 // conditions make sense.
1089 DCHECK(FlagsConditionField::decode(opcode) == kEqual ||
1090 FlagsConditionField::decode(opcode) == kNotEqual ||
1091 FlagsConditionField::decode(opcode) == kPositiveOrZero ||
1092 FlagsConditionField::decode(opcode) == kNegative);
1093 __ Ands(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1094 i.InputOperand2_32(1));
1095 } else {
1096 __ And(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1097 i.InputOperand2_32(1));
1098 }
1099 break;
1100 case kArm64Bic:
1101 __ Bic(i.OutputRegister(), i.InputOrZeroRegister64(0),
1102 i.InputOperand2_64(1));
1103 break;
1104 case kArm64Bic32:
1105 __ Bic(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1106 i.InputOperand2_32(1));
1107 break;
1108 case kArm64Mul:
1109 __ Mul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1110 break;
1111 case kArm64Mul32:
1112 __ Mul(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
1113 break;
1114 case kArm64Smull:
1115 __ Smull(i.OutputRegister(), i.InputRegister32(0), i.InputRegister32(1));
1116 break;
1117 case kArm64Umull:
1118 __ Umull(i.OutputRegister(), i.InputRegister32(0), i.InputRegister32(1));
1119 break;
1120 case kArm64Madd:
1121 __ Madd(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1122 i.InputRegister(2));
1123 break;
1124 case kArm64Madd32:
1125 __ Madd(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1),
1126 i.InputRegister32(2));
1127 break;
1128 case kArm64Msub:
1129 __ Msub(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1130 i.InputRegister(2));
1131 break;
1132 case kArm64Msub32:
1133 __ Msub(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1),
1134 i.InputRegister32(2));
1135 break;
1136 case kArm64Mneg:
1137 __ Mneg(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1138 break;
1139 case kArm64Mneg32:
1140 __ Mneg(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
1141 break;
1142 case kArm64Idiv:
1143 __ Sdiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1144 break;
1145 case kArm64Idiv32:
1146 __ Sdiv(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
1147 break;
1148 case kArm64Udiv:
1149 __ Udiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1150 break;
1151 case kArm64Udiv32:
1152 __ Udiv(i.OutputRegister32(), i.InputRegister32(0), i.InputRegister32(1));
1153 break;
1154 case kArm64Imod: {
1155 UseScratchRegisterScope scope(tasm());
1156 Register temp = scope.AcquireX();
1157 __ Sdiv(temp, i.InputRegister(0), i.InputRegister(1));
1158 __ Msub(i.OutputRegister(), temp, i.InputRegister(1), i.InputRegister(0));
1159 break;
1160 }
1161 case kArm64Imod32: {
1162 UseScratchRegisterScope scope(tasm());
1163 Register temp = scope.AcquireW();
1164 __ Sdiv(temp, i.InputRegister32(0), i.InputRegister32(1));
1165 __ Msub(i.OutputRegister32(), temp, i.InputRegister32(1),
1166 i.InputRegister32(0));
1167 break;
1168 }
1169 case kArm64Umod: {
1170 UseScratchRegisterScope scope(tasm());
1171 Register temp = scope.AcquireX();
1172 __ Udiv(temp, i.InputRegister(0), i.InputRegister(1));
1173 __ Msub(i.OutputRegister(), temp, i.InputRegister(1), i.InputRegister(0));
1174 break;
1175 }
1176 case kArm64Umod32: {
1177 UseScratchRegisterScope scope(tasm());
1178 Register temp = scope.AcquireW();
1179 __ Udiv(temp, i.InputRegister32(0), i.InputRegister32(1));
1180 __ Msub(i.OutputRegister32(), temp, i.InputRegister32(1),
1181 i.InputRegister32(0));
1182 break;
1183 }
1184 case kArm64Not:
1185 __ Mvn(i.OutputRegister(), i.InputOperand(0));
1186 break;
1187 case kArm64Not32:
1188 __ Mvn(i.OutputRegister32(), i.InputOperand32(0));
1189 break;
1190 case kArm64Or:
1191 __ Orr(i.OutputRegister(), i.InputOrZeroRegister64(0),
1192 i.InputOperand2_64(1));
1193 break;
1194 case kArm64Or32:
1195 __ Orr(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1196 i.InputOperand2_32(1));
1197 break;
1198 case kArm64Orn:
1199 __ Orn(i.OutputRegister(), i.InputOrZeroRegister64(0),
1200 i.InputOperand2_64(1));
1201 break;
1202 case kArm64Orn32:
1203 __ Orn(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1204 i.InputOperand2_32(1));
1205 break;
1206 case kArm64Eor:
1207 __ Eor(i.OutputRegister(), i.InputOrZeroRegister64(0),
1208 i.InputOperand2_64(1));
1209 break;
1210 case kArm64Eor32:
1211 __ Eor(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1212 i.InputOperand2_32(1));
1213 break;
1214 case kArm64Eon:
1215 __ Eon(i.OutputRegister(), i.InputOrZeroRegister64(0),
1216 i.InputOperand2_64(1));
1217 break;
1218 case kArm64Eon32:
1219 __ Eon(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1220 i.InputOperand2_32(1));
1221 break;
1222 case kArm64Sub:
1223 if (FlagsModeField::decode(opcode) != kFlags_none) {
1224 __ Subs(i.OutputRegister(), i.InputOrZeroRegister64(0),
1225 i.InputOperand2_64(1));
1226 } else {
1227 __ Sub(i.OutputRegister(), i.InputOrZeroRegister64(0),
1228 i.InputOperand2_64(1));
1229 }
1230 break;
1231 case kArm64Sub32:
1232 if (FlagsModeField::decode(opcode) != kFlags_none) {
1233 __ Subs(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1234 i.InputOperand2_32(1));
1235 } else {
1236 __ Sub(i.OutputRegister32(), i.InputOrZeroRegister32(0),
1237 i.InputOperand2_32(1));
1238 }
1239 break;
1240 case kArm64Lsl:
1241 ASSEMBLE_SHIFT(Lsl, 64);
1242 break;
1243 case kArm64Lsl32:
1244 ASSEMBLE_SHIFT(Lsl, 32);
1245 break;
1246 case kArm64Lsr:
1247 ASSEMBLE_SHIFT(Lsr, 64);
1248 break;
1249 case kArm64Lsr32:
1250 ASSEMBLE_SHIFT(Lsr, 32);
1251 break;
1252 case kArm64Asr:
1253 ASSEMBLE_SHIFT(Asr, 64);
1254 break;
1255 case kArm64Asr32:
1256 ASSEMBLE_SHIFT(Asr, 32);
1257 break;
1258 case kArm64Ror:
1259 ASSEMBLE_SHIFT(Ror, 64);
1260 break;
1261 case kArm64Ror32:
1262 ASSEMBLE_SHIFT(Ror, 32);
1263 break;
1264 case kArm64Mov32:
1265 __ Mov(i.OutputRegister32(), i.InputRegister32(0));
1266 break;
1267 case kArm64Sxtb32:
1268 __ Sxtb(i.OutputRegister32(), i.InputRegister32(0));
1269 break;
1270 case kArm64Sxth32:
1271 __ Sxth(i.OutputRegister32(), i.InputRegister32(0));
1272 break;
1273 case kArm64Sxtb:
1274 __ Sxtb(i.OutputRegister(), i.InputRegister32(0));
1275 break;
1276 case kArm64Sxth:
1277 __ Sxth(i.OutputRegister(), i.InputRegister32(0));
1278 break;
1279 case kArm64Sxtw:
1280 __ Sxtw(i.OutputRegister(), i.InputRegister32(0));
1281 break;
1282 case kArm64Sbfx:
1283 __ Sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt6(1),
1284 i.InputInt6(2));
1285 break;
1286 case kArm64Sbfx32:
1287 __ Sbfx(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
1288 i.InputInt5(2));
1289 break;
1290 case kArm64Ubfx:
1291 __ Ubfx(i.OutputRegister(), i.InputRegister(0), i.InputInt6(1),
1292 i.InputInt32(2));
1293 break;
1294 case kArm64Ubfx32:
1295 __ Ubfx(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
1296 i.InputInt32(2));
1297 break;
1298 case kArm64Ubfiz32:
1299 __ Ubfiz(i.OutputRegister32(), i.InputRegister32(0), i.InputInt5(1),
1300 i.InputInt5(2));
1301 break;
1302 case kArm64Bfi:
1303 __ Bfi(i.OutputRegister(), i.InputRegister(1), i.InputInt6(2),
1304 i.InputInt6(3));
1305 break;
1306 case kArm64TestAndBranch32:
1307 case kArm64TestAndBranch:
1308 // Pseudo instructions turned into tbz/tbnz in AssembleArchBranch.
1309 break;
1310 case kArm64CompareAndBranch32:
1311 case kArm64CompareAndBranch:
1312 // Pseudo instruction handled in AssembleArchBranch.
1313 break;
1314 case kArm64Claim: {
1315 int count = i.InputInt32(0);
1316 DCHECK_EQ(count % 2, 0);
1317 __ AssertSpAligned();
1318 if (count > 0) {
1319 __ Claim(count);
1320 frame_access_state()->IncreaseSPDelta(count);
1321 }
1322 break;
1323 }
1324 case kArm64Poke: {
1325 Operand operand(i.InputInt32(1) * kSystemPointerSize);
1326 if (instr->InputAt(0)->IsSimd128Register()) {
1327 __ Poke(i.InputSimd128Register(0), operand);
1328 } else if (instr->InputAt(0)->IsFPRegister()) {
1329 __ Poke(i.InputFloat64Register(0), operand);
1330 } else {
1331 __ Poke(i.InputOrZeroRegister64(0), operand);
1332 }
1333 break;
1334 }
1335 case kArm64PokePair: {
1336 int slot = i.InputInt32(2) - 1;
1337 if (instr->InputAt(0)->IsFPRegister()) {
1338 __ PokePair(i.InputFloat64Register(1), i.InputFloat64Register(0),
1339 slot * kSystemPointerSize);
1340 } else {
1341 __ PokePair(i.InputRegister(1), i.InputRegister(0),
1342 slot * kSystemPointerSize);
1343 }
1344 break;
1345 }
1346 case kArm64Peek: {
1347 int reverse_slot = i.InputInt32(0);
1348 int offset =
1349 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1350 if (instr->OutputAt(0)->IsFPRegister()) {
1351 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1352 if (op->representation() == MachineRepresentation::kFloat64) {
1353 __ Ldr(i.OutputDoubleRegister(), MemOperand(fp, offset));
1354 } else {
1355 DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
1356 __ Ldr(i.OutputFloatRegister(), MemOperand(fp, offset));
1357 }
1358 } else {
1359 __ Ldr(i.OutputRegister(), MemOperand(fp, offset));
1360 }
1361 break;
1362 }
1363 case kArm64Clz:
1364 __ Clz(i.OutputRegister64(), i.InputRegister64(0));
1365 break;
1366 case kArm64Clz32:
1367 __ Clz(i.OutputRegister32(), i.InputRegister32(0));
1368 break;
1369 case kArm64Rbit:
1370 __ Rbit(i.OutputRegister64(), i.InputRegister64(0));
1371 break;
1372 case kArm64Rbit32:
1373 __ Rbit(i.OutputRegister32(), i.InputRegister32(0));
1374 break;
1375 case kArm64Rev:
1376 __ Rev(i.OutputRegister64(), i.InputRegister64(0));
1377 break;
1378 case kArm64Rev32:
1379 __ Rev(i.OutputRegister32(), i.InputRegister32(0));
1380 break;
1381 case kArm64Cmp:
1382 __ Cmp(i.InputOrZeroRegister64(0), i.InputOperand2_64(1));
1383 break;
1384 case kArm64Cmp32:
1385 __ Cmp(i.InputOrZeroRegister32(0), i.InputOperand2_32(1));
1386 break;
1387 case kArm64Cmn:
1388 __ Cmn(i.InputOrZeroRegister64(0), i.InputOperand2_64(1));
1389 break;
1390 case kArm64Cmn32:
1391 __ Cmn(i.InputOrZeroRegister32(0), i.InputOperand2_32(1));
1392 break;
1393 case kArm64Tst:
1394 __ Tst(i.InputOrZeroRegister64(0), i.InputOperand2_64(1));
1395 break;
1396 case kArm64Tst32:
1397 __ Tst(i.InputOrZeroRegister32(0), i.InputOperand2_32(1));
1398 break;
1399 case kArm64Float32Cmp:
1400 if (instr->InputAt(1)->IsFPRegister()) {
1401 __ Fcmp(i.InputFloat32Register(0), i.InputFloat32Register(1));
1402 } else {
1403 DCHECK(instr->InputAt(1)->IsImmediate());
1404 // 0.0 is the only immediate supported by fcmp instructions.
1405 DCHECK_EQ(0.0f, i.InputFloat32(1));
1406 __ Fcmp(i.InputFloat32Register(0), i.InputFloat32(1));
1407 }
1408 break;
1409 case kArm64Float32Add:
1410 __ Fadd(i.OutputFloat32Register(), i.InputFloat32Register(0),
1411 i.InputFloat32Register(1));
1412 break;
1413 case kArm64Float32Sub:
1414 __ Fsub(i.OutputFloat32Register(), i.InputFloat32Register(0),
1415 i.InputFloat32Register(1));
1416 break;
1417 case kArm64Float32Mul:
1418 __ Fmul(i.OutputFloat32Register(), i.InputFloat32Register(0),
1419 i.InputFloat32Register(1));
1420 break;
1421 case kArm64Float32Div:
1422 __ Fdiv(i.OutputFloat32Register(), i.InputFloat32Register(0),
1423 i.InputFloat32Register(1));
1424 break;
1425 case kArm64Float32Abs:
1426 __ Fabs(i.OutputFloat32Register(), i.InputFloat32Register(0));
1427 break;
1428 case kArm64Float32Neg:
1429 __ Fneg(i.OutputFloat32Register(), i.InputFloat32Register(0));
1430 break;
1431 case kArm64Float32Sqrt:
1432 __ Fsqrt(i.OutputFloat32Register(), i.InputFloat32Register(0));
1433 break;
1434 case kArm64Float32Fnmul: {
1435 __ Fnmul(i.OutputFloat32Register(), i.InputFloat32Register(0),
1436 i.InputFloat32Register(1));
1437 break;
1438 }
1439 case kArm64Float64Cmp:
1440 if (instr->InputAt(1)->IsFPRegister()) {
1441 __ Fcmp(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1442 } else {
1443 DCHECK(instr->InputAt(1)->IsImmediate());
1444 // 0.0 is the only immediate supported by fcmp instructions.
1445 DCHECK_EQ(0.0, i.InputDouble(1));
1446 __ Fcmp(i.InputDoubleRegister(0), i.InputDouble(1));
1447 }
1448 break;
1449 case kArm64Float64Add:
1450 __ Fadd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1451 i.InputDoubleRegister(1));
1452 break;
1453 case kArm64Float64Sub:
1454 __ Fsub(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1455 i.InputDoubleRegister(1));
1456 break;
1457 case kArm64Float64Mul:
1458 __ Fmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1459 i.InputDoubleRegister(1));
1460 break;
1461 case kArm64Float64Div:
1462 __ Fdiv(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1463 i.InputDoubleRegister(1));
1464 break;
1465 case kArm64Float64Mod: {
1466 // TODO(turbofan): implement directly.
1467 FrameScope scope(tasm(), StackFrame::MANUAL);
1468 DCHECK_EQ(d0, i.InputDoubleRegister(0));
1469 DCHECK_EQ(d1, i.InputDoubleRegister(1));
1470 DCHECK_EQ(d0, i.OutputDoubleRegister());
1471 // TODO(turbofan): make sure this saves all relevant registers.
1472 __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
1473 break;
1474 }
1475 case kArm64Float32Max: {
1476 __ Fmax(i.OutputFloat32Register(), i.InputFloat32Register(0),
1477 i.InputFloat32Register(1));
1478 break;
1479 }
1480 case kArm64Float64Max: {
1481 __ Fmax(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1482 i.InputDoubleRegister(1));
1483 break;
1484 }
1485 case kArm64Float32Min: {
1486 __ Fmin(i.OutputFloat32Register(), i.InputFloat32Register(0),
1487 i.InputFloat32Register(1));
1488 break;
1489 }
1490 case kArm64Float64Min: {
1491 __ Fmin(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1492 i.InputDoubleRegister(1));
1493 break;
1494 }
1495 case kArm64Float64Abs:
1496 __ Fabs(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1497 break;
1498 case kArm64Float64Neg:
1499 __ Fneg(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1500 break;
1501 case kArm64Float64Sqrt:
1502 __ Fsqrt(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1503 break;
1504 case kArm64Float64Fnmul:
1505 __ Fnmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1506 i.InputDoubleRegister(1));
1507 break;
1508 case kArm64Float32ToFloat64:
1509 __ Fcvt(i.OutputDoubleRegister(), i.InputDoubleRegister(0).S());
1510 break;
1511 case kArm64Float64ToFloat32:
1512 __ Fcvt(i.OutputDoubleRegister().S(), i.InputDoubleRegister(0));
1513 break;
1514 case kArm64Float32ToInt32:
1515 __ Fcvtzs(i.OutputRegister32(), i.InputFloat32Register(0));
1516 // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
1517 // because INT32_MIN allows easier out-of-bounds detection.
1518 __ Cmn(i.OutputRegister32(), 1);
1519 __ Csinc(i.OutputRegister32(), i.OutputRegister32(), i.OutputRegister32(),
1520 vc);
1521 break;
1522 case kArm64Float64ToInt32:
1523 __ Fcvtzs(i.OutputRegister32(), i.InputDoubleRegister(0));
1524 break;
1525 case kArm64Float32ToUint32:
1526 __ Fcvtzu(i.OutputRegister32(), i.InputFloat32Register(0));
1527 // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
1528 // because 0 allows easier out-of-bounds detection.
1529 __ Cmn(i.OutputRegister32(), 1);
1530 __ Adc(i.OutputRegister32(), i.OutputRegister32(), Operand(0));
1531 break;
1532 case kArm64Float64ToUint32:
1533 __ Fcvtzu(i.OutputRegister32(), i.InputDoubleRegister(0));
1534 break;
1535 case kArm64Float32ToInt64:
1536 __ Fcvtzs(i.OutputRegister64(), i.InputFloat32Register(0));
1537 if (i.OutputCount() > 1) {
1538 // Check for inputs below INT64_MIN and NaN.
1539 __ Fcmp(i.InputFloat32Register(0), static_cast<float>(INT64_MIN));
1540 // Check overflow.
1541 // -1 value is used to indicate a possible overflow which will occur
1542 // when subtracting (-1) from the provided INT64_MAX operand.
1543 // OutputRegister(1) is set to 0 if the input was out of range or NaN.
1544 __ Ccmp(i.OutputRegister(0), -1, VFlag, ge);
1545 __ Cset(i.OutputRegister(1), vc);
1546 }
1547 break;
1548 case kArm64Float64ToInt64:
1549 __ Fcvtzs(i.OutputRegister(0), i.InputDoubleRegister(0));
1550 if (i.OutputCount() > 1) {
1551 // See kArm64Float32ToInt64 for a detailed description.
1552 __ Fcmp(i.InputDoubleRegister(0), static_cast<double>(INT64_MIN));
1553 __ Ccmp(i.OutputRegister(0), -1, VFlag, ge);
1554 __ Cset(i.OutputRegister(1), vc);
1555 }
1556 break;
1557 case kArm64Float32ToUint64:
1558 __ Fcvtzu(i.OutputRegister64(), i.InputFloat32Register(0));
1559 if (i.OutputCount() > 1) {
1560 // See kArm64Float32ToInt64 for a detailed description.
1561 __ Fcmp(i.InputFloat32Register(0), -1.0);
1562 __ Ccmp(i.OutputRegister(0), -1, ZFlag, gt);
1563 __ Cset(i.OutputRegister(1), ne);
1564 }
1565 break;
1566 case kArm64Float64ToUint64:
1567 __ Fcvtzu(i.OutputRegister64(), i.InputDoubleRegister(0));
1568 if (i.OutputCount() > 1) {
1569 // See kArm64Float32ToInt64 for a detailed description.
1570 __ Fcmp(i.InputDoubleRegister(0), -1.0);
1571 __ Ccmp(i.OutputRegister(0), -1, ZFlag, gt);
1572 __ Cset(i.OutputRegister(1), ne);
1573 }
1574 break;
1575 case kArm64Int32ToFloat32:
1576 __ Scvtf(i.OutputFloat32Register(), i.InputRegister32(0));
1577 break;
1578 case kArm64Int32ToFloat64:
1579 __ Scvtf(i.OutputDoubleRegister(), i.InputRegister32(0));
1580 break;
1581 case kArm64Int64ToFloat32:
1582 __ Scvtf(i.OutputDoubleRegister().S(), i.InputRegister64(0));
1583 break;
1584 case kArm64Int64ToFloat64:
1585 __ Scvtf(i.OutputDoubleRegister(), i.InputRegister64(0));
1586 break;
1587 case kArm64Uint32ToFloat32:
1588 __ Ucvtf(i.OutputFloat32Register(), i.InputRegister32(0));
1589 break;
1590 case kArm64Uint32ToFloat64:
1591 __ Ucvtf(i.OutputDoubleRegister(), i.InputRegister32(0));
1592 break;
1593 case kArm64Uint64ToFloat32:
1594 __ Ucvtf(i.OutputDoubleRegister().S(), i.InputRegister64(0));
1595 break;
1596 case kArm64Uint64ToFloat64:
1597 __ Ucvtf(i.OutputDoubleRegister(), i.InputRegister64(0));
1598 break;
1599 case kArm64Float64ExtractLowWord32:
1600 __ Fmov(i.OutputRegister32(), i.InputFloat32Register(0));
1601 break;
1602 case kArm64Float64ExtractHighWord32:
1603 __ Umov(i.OutputRegister32(), i.InputFloat64Register(0).V2S(), 1);
1604 break;
1605 case kArm64Float64InsertLowWord32:
1606 DCHECK_EQ(i.OutputFloat64Register(), i.InputFloat64Register(0));
1607 __ Ins(i.OutputFloat64Register().V2S(), 0, i.InputRegister32(1));
1608 break;
1609 case kArm64Float64InsertHighWord32:
1610 DCHECK_EQ(i.OutputFloat64Register(), i.InputFloat64Register(0));
1611 __ Ins(i.OutputFloat64Register().V2S(), 1, i.InputRegister32(1));
1612 break;
1613 case kArm64Float64MoveU64:
1614 __ Fmov(i.OutputFloat64Register(), i.InputRegister(0));
1615 break;
1616 case kArm64Float64SilenceNaN:
1617 __ CanonicalizeNaN(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1618 break;
1619 case kArm64U64MoveFloat64:
1620 __ Fmov(i.OutputRegister(), i.InputDoubleRegister(0));
1621 break;
1622 case kArm64Ldrb:
1623 __ Ldrb(i.OutputRegister(), i.MemoryOperand());
1624 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1625 break;
1626 case kArm64Ldrsb:
1627 __ Ldrsb(i.OutputRegister(), i.MemoryOperand());
1628 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1629 break;
1630 case kArm64Strb:
1631 __ Strb(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
1632 break;
1633 case kArm64Ldrh:
1634 __ Ldrh(i.OutputRegister(), i.MemoryOperand());
1635 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1636 break;
1637 case kArm64Ldrsh:
1638 __ Ldrsh(i.OutputRegister(), i.MemoryOperand());
1639 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1640 break;
1641 case kArm64Strh:
1642 __ Strh(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
1643 break;
1644 case kArm64Ldrsw:
1645 __ Ldrsw(i.OutputRegister(), i.MemoryOperand());
1646 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1647 break;
1648 case kArm64LdrW:
1649 __ Ldr(i.OutputRegister32(), i.MemoryOperand());
1650 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1651 break;
1652 case kArm64StrW:
1653 __ Str(i.InputOrZeroRegister32(0), i.MemoryOperand(1));
1654 break;
1655 case kArm64Ldr:
1656 __ Ldr(i.OutputRegister(), i.MemoryOperand());
1657 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1658 break;
1659 case kArm64LdrDecompressTaggedSigned:
1660 __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand());
1661 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1662 break;
1663 case kArm64LdrDecompressTaggedPointer:
1664 __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand());
1665 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1666 break;
1667 case kArm64LdrDecompressAnyTagged:
1668 __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand());
1669 EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1670 break;
1671 case kArm64Str:
1672 __ Str(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
1673 break;
1674 case kArm64StrCompressTagged:
1675 __ StoreTaggedField(i.InputOrZeroRegister64(0), i.MemoryOperand(1));
1676 break;
1677 case kArm64LdrS:
1678 EmitMaybePoisonedFPLoad(this, opcode, &i, i.OutputDoubleRegister().S());
1679 break;
1680 case kArm64StrS:
1681 __ Str(i.InputFloat32OrZeroRegister(0), i.MemoryOperand(1));
1682 break;
1683 case kArm64LdrD:
1684 EmitMaybePoisonedFPLoad(this, opcode, &i, i.OutputDoubleRegister());
1685 break;
1686 case kArm64StrD:
1687 __ Str(i.InputFloat64OrZeroRegister(0), i.MemoryOperand(1));
1688 break;
1689 case kArm64LdrQ:
1690 __ Ldr(i.OutputSimd128Register(), i.MemoryOperand());
1691 break;
1692 case kArm64StrQ:
1693 __ Str(i.InputSimd128Register(0), i.MemoryOperand(1));
1694 break;
1695 case kArm64DmbIsh:
1696 __ Dmb(InnerShareable, BarrierAll);
1697 break;
1698 case kArm64DsbIsb:
1699 __ Dsb(FullSystem, BarrierAll);
1700 __ Isb();
1701 break;
1702 case kArchWordPoisonOnSpeculation:
1703 __ And(i.OutputRegister(0), i.InputRegister(0),
1704 Operand(kSpeculationPoisonRegister));
1705 break;
1706 case kWord32AtomicLoadInt8:
1707 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarb, Register32);
1708 __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));
1709 break;
1710 case kWord32AtomicLoadUint8:
1711 case kArm64Word64AtomicLoadUint8:
1712 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarb, Register32);
1713 break;
1714 case kWord32AtomicLoadInt16:
1715 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarh, Register32);
1716 __ Sxth(i.OutputRegister(0), i.OutputRegister(0));
1717 break;
1718 case kWord32AtomicLoadUint16:
1719 case kArm64Word64AtomicLoadUint16:
1720 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldarh, Register32);
1721 break;
1722 case kWord32AtomicLoadWord32:
1723 case kArm64Word64AtomicLoadUint32:
1724 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldar, Register32);
1725 break;
1726 case kArm64Word64AtomicLoadUint64:
1727 ASSEMBLE_ATOMIC_LOAD_INTEGER(Ldar, Register);
1728 break;
1729 case kWord32AtomicStoreWord8:
1730 case kArm64Word64AtomicStoreWord8:
1731 ASSEMBLE_ATOMIC_STORE_INTEGER(Stlrb, Register32);
1732 break;
1733 case kWord32AtomicStoreWord16:
1734 case kArm64Word64AtomicStoreWord16:
1735 ASSEMBLE_ATOMIC_STORE_INTEGER(Stlrh, Register32);
1736 break;
1737 case kWord32AtomicStoreWord32:
1738 case kArm64Word64AtomicStoreWord32:
1739 ASSEMBLE_ATOMIC_STORE_INTEGER(Stlr, Register32);
1740 break;
1741 case kArm64Word64AtomicStoreWord64:
1742 ASSEMBLE_ATOMIC_STORE_INTEGER(Stlr, Register);
1743 break;
1744 case kWord32AtomicExchangeInt8:
1745 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxrb, stlxrb, Register32);
1746 __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));
1747 break;
1748 case kWord32AtomicExchangeUint8:
1749 case kArm64Word64AtomicExchangeUint8:
1750 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxrb, stlxrb, Register32);
1751 break;
1752 case kWord32AtomicExchangeInt16:
1753 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxrh, stlxrh, Register32);
1754 __ Sxth(i.OutputRegister(0), i.OutputRegister(0));
1755 break;
1756 case kWord32AtomicExchangeUint16:
1757 case kArm64Word64AtomicExchangeUint16:
1758 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxrh, stlxrh, Register32);
1759 break;
1760 case kWord32AtomicExchangeWord32:
1761 case kArm64Word64AtomicExchangeUint32:
1762 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxr, stlxr, Register32);
1763 break;
1764 case kArm64Word64AtomicExchangeUint64:
1765 ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldaxr, stlxr, Register);
1766 break;
1767 case kWord32AtomicCompareExchangeInt8:
1768 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxrb, stlxrb, UXTB,
1769 Register32);
1770 __ Sxtb(i.OutputRegister(0), i.OutputRegister(0));
1771 break;
1772 case kWord32AtomicCompareExchangeUint8:
1773 case kArm64Word64AtomicCompareExchangeUint8:
1774 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxrb, stlxrb, UXTB,
1775 Register32);
1776 break;
1777 case kWord32AtomicCompareExchangeInt16:
1778 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxrh, stlxrh, UXTH,
1779 Register32);
1780 __ Sxth(i.OutputRegister(0), i.OutputRegister(0));
1781 break;
1782 case kWord32AtomicCompareExchangeUint16:
1783 case kArm64Word64AtomicCompareExchangeUint16:
1784 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxrh, stlxrh, UXTH,
1785 Register32);
1786 break;
1787 case kWord32AtomicCompareExchangeWord32:
1788 case kArm64Word64AtomicCompareExchangeUint32:
1789 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxr, stlxr, UXTW, Register32);
1790 break;
1791 case kArm64Word64AtomicCompareExchangeUint64:
1792 ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldaxr, stlxr, UXTX, Register);
1793 break;
1794 #define ATOMIC_BINOP_CASE(op, inst) \
1795 case kWord32Atomic##op##Int8: \
1796 ASSEMBLE_ATOMIC_BINOP(ldaxrb, stlxrb, inst, Register32); \
1797 __ Sxtb(i.OutputRegister(0), i.OutputRegister(0)); \
1798 break; \
1799 case kWord32Atomic##op##Uint8: \
1800 case kArm64Word64Atomic##op##Uint8: \
1801 ASSEMBLE_ATOMIC_BINOP(ldaxrb, stlxrb, inst, Register32); \
1802 break; \
1803 case kWord32Atomic##op##Int16: \
1804 ASSEMBLE_ATOMIC_BINOP(ldaxrh, stlxrh, inst, Register32); \
1805 __ Sxth(i.OutputRegister(0), i.OutputRegister(0)); \
1806 break; \
1807 case kWord32Atomic##op##Uint16: \
1808 case kArm64Word64Atomic##op##Uint16: \
1809 ASSEMBLE_ATOMIC_BINOP(ldaxrh, stlxrh, inst, Register32); \
1810 break; \
1811 case kWord32Atomic##op##Word32: \
1812 case kArm64Word64Atomic##op##Uint32: \
1813 ASSEMBLE_ATOMIC_BINOP(ldaxr, stlxr, inst, Register32); \
1814 break; \
1815 case kArm64Word64Atomic##op##Uint64: \
1816 ASSEMBLE_ATOMIC_BINOP(ldaxr, stlxr, inst, Register); \
1817 break;
1818 ATOMIC_BINOP_CASE(Add, Add)
1819 ATOMIC_BINOP_CASE(Sub, Sub)
1820 ATOMIC_BINOP_CASE(And, And)
1821 ATOMIC_BINOP_CASE(Or, Orr)
1822 ATOMIC_BINOP_CASE(Xor, Eor)
1823 #undef ATOMIC_BINOP_CASE
1824 #undef ASSEMBLE_SHIFT
1825 #undef ASSEMBLE_ATOMIC_LOAD_INTEGER
1826 #undef ASSEMBLE_ATOMIC_STORE_INTEGER
1827 #undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER
1828 #undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER
1829 #undef ASSEMBLE_ATOMIC_BINOP
1830 #undef ASSEMBLE_IEEE754_BINOP
1831 #undef ASSEMBLE_IEEE754_UNOP
1832
1833 #define SIMD_UNOP_CASE(Op, Instr, FORMAT) \
1834 case Op: \
1835 __ Instr(i.OutputSimd128Register().V##FORMAT(), \
1836 i.InputSimd128Register(0).V##FORMAT()); \
1837 break;
1838 #define SIMD_WIDENING_UNOP_CASE(Op, Instr, WIDE, NARROW) \
1839 case Op: \
1840 __ Instr(i.OutputSimd128Register().V##WIDE(), \
1841 i.InputSimd128Register(0).V##NARROW()); \
1842 break;
1843 #define SIMD_BINOP_CASE(Op, Instr, FORMAT) \
1844 case Op: \
1845 __ Instr(i.OutputSimd128Register().V##FORMAT(), \
1846 i.InputSimd128Register(0).V##FORMAT(), \
1847 i.InputSimd128Register(1).V##FORMAT()); \
1848 break;
1849
1850 case kArm64F64x2Splat: {
1851 __ Dup(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).D(), 0);
1852 break;
1853 }
1854 case kArm64F64x2ExtractLane: {
1855 __ Mov(i.OutputSimd128Register().D(), i.InputSimd128Register(0).V2D(),
1856 i.InputInt8(1));
1857 break;
1858 }
1859 case kArm64F64x2ReplaceLane: {
1860 VRegister dst = i.OutputSimd128Register().V2D(),
1861 src1 = i.InputSimd128Register(0).V2D();
1862 if (dst != src1) {
1863 __ Mov(dst, src1);
1864 }
1865 __ Mov(dst, i.InputInt8(1), i.InputSimd128Register(2).V2D(), 0);
1866 break;
1867 }
1868 SIMD_UNOP_CASE(kArm64F64x2Abs, Fabs, 2D);
1869 SIMD_UNOP_CASE(kArm64F64x2Neg, Fneg, 2D);
1870 SIMD_UNOP_CASE(kArm64F64x2Sqrt, Fsqrt, 2D);
1871 SIMD_BINOP_CASE(kArm64F64x2Add, Fadd, 2D);
1872 SIMD_BINOP_CASE(kArm64F64x2Sub, Fsub, 2D);
1873 SIMD_BINOP_CASE(kArm64F64x2Mul, Fmul, 2D);
1874 SIMD_BINOP_CASE(kArm64F64x2Div, Fdiv, 2D);
1875 SIMD_BINOP_CASE(kArm64F64x2Min, Fmin, 2D);
1876 SIMD_BINOP_CASE(kArm64F64x2Max, Fmax, 2D);
1877 SIMD_BINOP_CASE(kArm64F64x2Eq, Fcmeq, 2D);
1878 case kArm64F64x2Ne: {
1879 VRegister dst = i.OutputSimd128Register().V2D();
1880 __ Fcmeq(dst, i.InputSimd128Register(0).V2D(),
1881 i.InputSimd128Register(1).V2D());
1882 __ Mvn(dst, dst);
1883 break;
1884 }
1885 case kArm64F64x2Lt: {
1886 __ Fcmgt(i.OutputSimd128Register().V2D(), i.InputSimd128Register(1).V2D(),
1887 i.InputSimd128Register(0).V2D());
1888 break;
1889 }
1890 case kArm64F64x2Le: {
1891 __ Fcmge(i.OutputSimd128Register().V2D(), i.InputSimd128Register(1).V2D(),
1892 i.InputSimd128Register(0).V2D());
1893 break;
1894 }
1895 case kArm64F64x2Qfma: {
1896 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1897 __ Fmla(i.OutputSimd128Register().V2D(), i.InputSimd128Register(1).V2D(),
1898 i.InputSimd128Register(2).V2D());
1899 break;
1900 }
1901 case kArm64F64x2Qfms: {
1902 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1903 __ Fmls(i.OutputSimd128Register().V2D(), i.InputSimd128Register(1).V2D(),
1904 i.InputSimd128Register(2).V2D());
1905 break;
1906 }
1907 case kArm64F32x4Splat: {
1908 __ Dup(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).S(), 0);
1909 break;
1910 }
1911 case kArm64F32x4ExtractLane: {
1912 __ Mov(i.OutputSimd128Register().S(), i.InputSimd128Register(0).V4S(),
1913 i.InputInt8(1));
1914 break;
1915 }
1916 case kArm64F32x4ReplaceLane: {
1917 VRegister dst = i.OutputSimd128Register().V4S(),
1918 src1 = i.InputSimd128Register(0).V4S();
1919 if (dst != src1) {
1920 __ Mov(dst, src1);
1921 }
1922 __ Mov(dst, i.InputInt8(1), i.InputSimd128Register(2).V4S(), 0);
1923 break;
1924 }
1925 SIMD_UNOP_CASE(kArm64F32x4SConvertI32x4, Scvtf, 4S);
1926 SIMD_UNOP_CASE(kArm64F32x4UConvertI32x4, Ucvtf, 4S);
1927 SIMD_UNOP_CASE(kArm64F32x4Abs, Fabs, 4S);
1928 SIMD_UNOP_CASE(kArm64F32x4Neg, Fneg, 4S);
1929 SIMD_UNOP_CASE(kArm64F32x4Sqrt, Fsqrt, 4S);
1930 SIMD_UNOP_CASE(kArm64F32x4RecipApprox, Frecpe, 4S);
1931 SIMD_UNOP_CASE(kArm64F32x4RecipSqrtApprox, Frsqrte, 4S);
1932 SIMD_BINOP_CASE(kArm64F32x4Add, Fadd, 4S);
1933 SIMD_BINOP_CASE(kArm64F32x4AddHoriz, Faddp, 4S);
1934 SIMD_BINOP_CASE(kArm64F32x4Sub, Fsub, 4S);
1935 SIMD_BINOP_CASE(kArm64F32x4Mul, Fmul, 4S);
1936 SIMD_BINOP_CASE(kArm64F32x4Div, Fdiv, 4S);
1937 SIMD_BINOP_CASE(kArm64F32x4Min, Fmin, 4S);
1938 SIMD_BINOP_CASE(kArm64F32x4Max, Fmax, 4S);
1939 SIMD_BINOP_CASE(kArm64F32x4Eq, Fcmeq, 4S);
1940 case kArm64F32x4Ne: {
1941 VRegister dst = i.OutputSimd128Register().V4S();
1942 __ Fcmeq(dst, i.InputSimd128Register(0).V4S(),
1943 i.InputSimd128Register(1).V4S());
1944 __ Mvn(dst, dst);
1945 break;
1946 }
1947 case kArm64F32x4Lt: {
1948 __ Fcmgt(i.OutputSimd128Register().V4S(), i.InputSimd128Register(1).V4S(),
1949 i.InputSimd128Register(0).V4S());
1950 break;
1951 }
1952 case kArm64F32x4Le: {
1953 __ Fcmge(i.OutputSimd128Register().V4S(), i.InputSimd128Register(1).V4S(),
1954 i.InputSimd128Register(0).V4S());
1955 break;
1956 }
1957 case kArm64F32x4Qfma: {
1958 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1959 __ Fmla(i.OutputSimd128Register().V4S(), i.InputSimd128Register(1).V4S(),
1960 i.InputSimd128Register(2).V4S());
1961 break;
1962 }
1963 case kArm64F32x4Qfms: {
1964 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1965 __ Fmls(i.OutputSimd128Register().V4S(), i.InputSimd128Register(1).V4S(),
1966 i.InputSimd128Register(2).V4S());
1967 break;
1968 }
1969 case kArm64I64x2Splat: {
1970 __ Dup(i.OutputSimd128Register().V2D(), i.InputRegister64(0));
1971 break;
1972 }
1973 case kArm64I64x2ExtractLane: {
1974 __ Mov(i.OutputRegister64(), i.InputSimd128Register(0).V2D(),
1975 i.InputInt8(1));
1976 break;
1977 }
1978 case kArm64I64x2ReplaceLane: {
1979 VRegister dst = i.OutputSimd128Register().V2D(),
1980 src1 = i.InputSimd128Register(0).V2D();
1981 if (dst != src1) {
1982 __ Mov(dst, src1);
1983 }
1984 __ Mov(dst, i.InputInt8(1), i.InputRegister64(2));
1985 break;
1986 }
1987 SIMD_UNOP_CASE(kArm64I64x2Neg, Neg, 2D);
1988 case kArm64I64x2Shl: {
1989 ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 6, V2D, Sshl, X);
1990 break;
1991 }
1992 case kArm64I64x2ShrS: {
1993 ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 6, V2D, Sshl, X);
1994 break;
1995 }
1996 SIMD_BINOP_CASE(kArm64I64x2Add, Add, 2D);
1997 SIMD_BINOP_CASE(kArm64I64x2Sub, Sub, 2D);
1998 case kArm64I64x2Mul: {
1999 UseScratchRegisterScope scope(tasm());
2000 VRegister dst = i.OutputSimd128Register();
2001 VRegister src1 = i.InputSimd128Register(0);
2002 VRegister src2 = i.InputSimd128Register(1);
2003 VRegister tmp1 = scope.AcquireSameSizeAs(dst);
2004 VRegister tmp2 = scope.AcquireSameSizeAs(dst);
2005 VRegister tmp3 = i.ToSimd128Register(instr->TempAt(0));
2006
2007 // This 2x64-bit multiplication is performed with several 32-bit
2008 // multiplications.
2009
2010 // 64-bit numbers x and y, can be represented as:
2011 // x = a + 2^32(b)
2012 // y = c + 2^32(d)
2013
2014 // A 64-bit multiplication is:
2015 // x * y = ac + 2^32(ad + bc) + 2^64(bd)
2016 // note: `2^64(bd)` can be ignored, the value is too large to fit in
2017 // 64-bits.
2018
2019 // This sequence implements a 2x64bit multiply, where the registers
2020 // `src1` and `src2` are split up into 32-bit components:
2021 // src1 = |d|c|b|a|
2022 // src2 = |h|g|f|e|
2023 //
2024 // src1 * src2 = |cg + 2^32(ch + dg)|ae + 2^32(af + be)|
2025
2026 // Reverse the 32-bit elements in the 64-bit words.
2027 // tmp2 = |g|h|e|f|
2028 __ Rev64(tmp2.V4S(), src2.V4S());
2029
2030 // Calculate the high half components.
2031 // tmp2 = |dg|ch|be|af|
2032 __ Mul(tmp2.V4S(), tmp2.V4S(), src1.V4S());
2033
2034 // Extract the low half components of src1.
2035 // tmp1 = |c|a|
2036 __ Xtn(tmp1.V2S(), src1.V2D());
2037
2038 // Sum the respective high half components.
2039 // tmp2 = |dg+ch|be+af||dg+ch|be+af|
2040 __ Addp(tmp2.V4S(), tmp2.V4S(), tmp2.V4S());
2041
2042 // Extract the low half components of src2.
2043 // tmp3 = |g|e|
2044 __ Xtn(tmp3.V2S(), src2.V2D());
2045
2046 // Shift the high half components, into the high half.
2047 // dst = |dg+ch << 32|be+af << 32|
2048 __ Shll(dst.V2D(), tmp2.V2S(), 32);
2049
2050 // Multiply the low components together, and accumulate with the high
2051 // half.
2052 // dst = |dst[1] + cg|dst[0] + ae|
2053 __ Umlal(dst.V2D(), tmp3.V2S(), tmp1.V2S());
2054
2055 break;
2056 }
2057 SIMD_BINOP_CASE(kArm64I64x2Eq, Cmeq, 2D);
2058 case kArm64I64x2Ne: {
2059 VRegister dst = i.OutputSimd128Register().V2D();
2060 __ Cmeq(dst, i.InputSimd128Register(0).V2D(),
2061 i.InputSimd128Register(1).V2D());
2062 __ Mvn(dst, dst);
2063 break;
2064 }
2065 SIMD_BINOP_CASE(kArm64I64x2GtS, Cmgt, 2D);
2066 SIMD_BINOP_CASE(kArm64I64x2GeS, Cmge, 2D);
2067 case kArm64I64x2ShrU: {
2068 ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 6, V2D, Ushl, X);
2069 break;
2070 }
2071 SIMD_BINOP_CASE(kArm64I64x2GtU, Cmhi, 2D);
2072 SIMD_BINOP_CASE(kArm64I64x2GeU, Cmhs, 2D);
2073 case kArm64I32x4Splat: {
2074 __ Dup(i.OutputSimd128Register().V4S(), i.InputRegister32(0));
2075 break;
2076 }
2077 case kArm64I32x4ExtractLane: {
2078 __ Mov(i.OutputRegister32(), i.InputSimd128Register(0).V4S(),
2079 i.InputInt8(1));
2080 break;
2081 }
2082 case kArm64I32x4ReplaceLane: {
2083 VRegister dst = i.OutputSimd128Register().V4S(),
2084 src1 = i.InputSimd128Register(0).V4S();
2085 if (dst != src1) {
2086 __ Mov(dst, src1);
2087 }
2088 __ Mov(dst, i.InputInt8(1), i.InputRegister32(2));
2089 break;
2090 }
2091 SIMD_UNOP_CASE(kArm64I32x4SConvertF32x4, Fcvtzs, 4S);
2092 SIMD_WIDENING_UNOP_CASE(kArm64I32x4SConvertI16x8Low, Sxtl, 4S, 4H);
2093 SIMD_WIDENING_UNOP_CASE(kArm64I32x4SConvertI16x8High, Sxtl2, 4S, 8H);
2094 SIMD_UNOP_CASE(kArm64I32x4Neg, Neg, 4S);
2095 case kArm64I32x4Shl: {
2096 ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 5, V4S, Sshl, W);
2097 break;
2098 }
2099 case kArm64I32x4ShrS: {
2100 ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 5, V4S, Sshl, W);
2101 break;
2102 }
2103 SIMD_BINOP_CASE(kArm64I32x4Add, Add, 4S);
2104 SIMD_BINOP_CASE(kArm64I32x4AddHoriz, Addp, 4S);
2105 SIMD_BINOP_CASE(kArm64I32x4Sub, Sub, 4S);
2106 SIMD_BINOP_CASE(kArm64I32x4Mul, Mul, 4S);
2107 SIMD_BINOP_CASE(kArm64I32x4MinS, Smin, 4S);
2108 SIMD_BINOP_CASE(kArm64I32x4MaxS, Smax, 4S);
2109 SIMD_BINOP_CASE(kArm64I32x4Eq, Cmeq, 4S);
2110 case kArm64I32x4Ne: {
2111 VRegister dst = i.OutputSimd128Register().V4S();
2112 __ Cmeq(dst, i.InputSimd128Register(0).V4S(),
2113 i.InputSimd128Register(1).V4S());
2114 __ Mvn(dst, dst);
2115 break;
2116 }
2117 SIMD_BINOP_CASE(kArm64I32x4GtS, Cmgt, 4S);
2118 SIMD_BINOP_CASE(kArm64I32x4GeS, Cmge, 4S);
2119 SIMD_UNOP_CASE(kArm64I32x4UConvertF32x4, Fcvtzu, 4S);
2120 SIMD_WIDENING_UNOP_CASE(kArm64I32x4UConvertI16x8Low, Uxtl, 4S, 4H);
2121 SIMD_WIDENING_UNOP_CASE(kArm64I32x4UConvertI16x8High, Uxtl2, 4S, 8H);
2122 case kArm64I32x4ShrU: {
2123 ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 5, V4S, Ushl, W);
2124 break;
2125 }
2126 SIMD_BINOP_CASE(kArm64I32x4MinU, Umin, 4S);
2127 SIMD_BINOP_CASE(kArm64I32x4MaxU, Umax, 4S);
2128 SIMD_BINOP_CASE(kArm64I32x4GtU, Cmhi, 4S);
2129 SIMD_BINOP_CASE(kArm64I32x4GeU, Cmhs, 4S);
2130 SIMD_UNOP_CASE(kArm64I32x4Abs, Abs, 4S);
2131 case kArm64I32x4BitMask: {
2132 Register dst = i.OutputRegister32();
2133 VRegister src = i.InputSimd128Register(0);
2134 VRegister tmp = i.TempSimd128Register(0);
2135 VRegister mask = i.TempSimd128Register(1);
2136
2137 __ Sshr(tmp.V4S(), src.V4S(), 31);
2138 // Set i-th bit of each lane i. When AND with tmp, the lanes that
2139 // are signed will have i-th bit set, unsigned will be 0.
2140 __ Movi(mask.V2D(), 0x0000'0008'0000'0004, 0x0000'0002'0000'0001);
2141 __ And(tmp.V16B(), mask.V16B(), tmp.V16B());
2142 __ Addv(tmp.S(), tmp.V4S());
2143 __ Mov(dst.W(), tmp.V4S(), 0);
2144 break;
2145 }
2146 case kArm64I16x8Splat: {
2147 __ Dup(i.OutputSimd128Register().V8H(), i.InputRegister32(0));
2148 break;
2149 }
2150 case kArm64I16x8ExtractLaneU: {
2151 __ Umov(i.OutputRegister32(), i.InputSimd128Register(0).V8H(),
2152 i.InputInt8(1));
2153 break;
2154 }
2155 case kArm64I16x8ExtractLaneS: {
2156 __ Smov(i.OutputRegister32(), i.InputSimd128Register(0).V8H(),
2157 i.InputInt8(1));
2158 break;
2159 }
2160 case kArm64I16x8ReplaceLane: {
2161 VRegister dst = i.OutputSimd128Register().V8H(),
2162 src1 = i.InputSimd128Register(0).V8H();
2163 if (dst != src1) {
2164 __ Mov(dst, src1);
2165 }
2166 __ Mov(dst, i.InputInt8(1), i.InputRegister32(2));
2167 break;
2168 }
2169 SIMD_WIDENING_UNOP_CASE(kArm64I16x8SConvertI8x16Low, Sxtl, 8H, 8B);
2170 SIMD_WIDENING_UNOP_CASE(kArm64I16x8SConvertI8x16High, Sxtl2, 8H, 16B);
2171 SIMD_UNOP_CASE(kArm64I16x8Neg, Neg, 8H);
2172 case kArm64I16x8Shl: {
2173 ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 4, V8H, Sshl, W);
2174 break;
2175 }
2176 case kArm64I16x8ShrS: {
2177 ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 4, V8H, Sshl, W);
2178 break;
2179 }
2180 case kArm64I16x8SConvertI32x4: {
2181 VRegister dst = i.OutputSimd128Register(),
2182 src0 = i.InputSimd128Register(0),
2183 src1 = i.InputSimd128Register(1);
2184 UseScratchRegisterScope scope(tasm());
2185 VRegister temp = scope.AcquireV(kFormat4S);
2186 if (dst == src1) {
2187 __ Mov(temp, src1.V4S());
2188 src1 = temp;
2189 }
2190 __ Sqxtn(dst.V4H(), src0.V4S());
2191 __ Sqxtn2(dst.V8H(), src1.V4S());
2192 break;
2193 }
2194 SIMD_BINOP_CASE(kArm64I16x8Add, Add, 8H);
2195 SIMD_BINOP_CASE(kArm64I16x8AddSaturateS, Sqadd, 8H);
2196 SIMD_BINOP_CASE(kArm64I16x8AddHoriz, Addp, 8H);
2197 SIMD_BINOP_CASE(kArm64I16x8Sub, Sub, 8H);
2198 SIMD_BINOP_CASE(kArm64I16x8SubSaturateS, Sqsub, 8H);
2199 SIMD_BINOP_CASE(kArm64I16x8Mul, Mul, 8H);
2200 SIMD_BINOP_CASE(kArm64I16x8MinS, Smin, 8H);
2201 SIMD_BINOP_CASE(kArm64I16x8MaxS, Smax, 8H);
2202 SIMD_BINOP_CASE(kArm64I16x8Eq, Cmeq, 8H);
2203 case kArm64I16x8Ne: {
2204 VRegister dst = i.OutputSimd128Register().V8H();
2205 __ Cmeq(dst, i.InputSimd128Register(0).V8H(),
2206 i.InputSimd128Register(1).V8H());
2207 __ Mvn(dst, dst);
2208 break;
2209 }
2210 SIMD_BINOP_CASE(kArm64I16x8GtS, Cmgt, 8H);
2211 SIMD_BINOP_CASE(kArm64I16x8GeS, Cmge, 8H);
2212 case kArm64I16x8UConvertI8x16Low: {
2213 __ Uxtl(i.OutputSimd128Register().V8H(), i.InputSimd128Register(0).V8B());
2214 break;
2215 }
2216 case kArm64I16x8UConvertI8x16High: {
2217 __ Uxtl2(i.OutputSimd128Register().V8H(),
2218 i.InputSimd128Register(0).V16B());
2219 break;
2220 }
2221 case kArm64I16x8ShrU: {
2222 ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 4, V8H, Ushl, W);
2223 break;
2224 }
2225 case kArm64I16x8UConvertI32x4: {
2226 VRegister dst = i.OutputSimd128Register(),
2227 src0 = i.InputSimd128Register(0),
2228 src1 = i.InputSimd128Register(1);
2229 UseScratchRegisterScope scope(tasm());
2230 VRegister temp = scope.AcquireV(kFormat4S);
2231 if (dst == src1) {
2232 __ Mov(temp, src1.V4S());
2233 src1 = temp;
2234 }
2235 __ Sqxtun(dst.V4H(), src0.V4S());
2236 __ Sqxtun2(dst.V8H(), src1.V4S());
2237 break;
2238 }
2239 SIMD_BINOP_CASE(kArm64I16x8AddSaturateU, Uqadd, 8H);
2240 SIMD_BINOP_CASE(kArm64I16x8SubSaturateU, Uqsub, 8H);
2241 SIMD_BINOP_CASE(kArm64I16x8MinU, Umin, 8H);
2242 SIMD_BINOP_CASE(kArm64I16x8MaxU, Umax, 8H);
2243 SIMD_BINOP_CASE(kArm64I16x8GtU, Cmhi, 8H);
2244 SIMD_BINOP_CASE(kArm64I16x8GeU, Cmhs, 8H);
2245 SIMD_BINOP_CASE(kArm64I16x8RoundingAverageU, Urhadd, 8H);
2246 SIMD_UNOP_CASE(kArm64I16x8Abs, Abs, 8H);
2247 case kArm64I16x8BitMask: {
2248 Register dst = i.OutputRegister32();
2249 VRegister src = i.InputSimd128Register(0);
2250 VRegister tmp = i.TempSimd128Register(0);
2251 VRegister mask = i.TempSimd128Register(1);
2252
2253 __ Sshr(tmp.V8H(), src.V8H(), 15);
2254 // Set i-th bit of each lane i. When AND with tmp, the lanes that
2255 // are signed will have i-th bit set, unsigned will be 0.
2256 __ Movi(mask.V2D(), 0x0080'0040'0020'0010, 0x0008'0004'0002'0001);
2257 __ And(tmp.V16B(), mask.V16B(), tmp.V16B());
2258 __ Addv(tmp.H(), tmp.V8H());
2259 __ Mov(dst.W(), tmp.V8H(), 0);
2260 break;
2261 }
2262 case kArm64I8x16Splat: {
2263 __ Dup(i.OutputSimd128Register().V16B(), i.InputRegister32(0));
2264 break;
2265 }
2266 case kArm64I8x16ExtractLaneU: {
2267 __ Umov(i.OutputRegister32(), i.InputSimd128Register(0).V16B(),
2268 i.InputInt8(1));
2269 break;
2270 }
2271 case kArm64I8x16ExtractLaneS: {
2272 __ Smov(i.OutputRegister32(), i.InputSimd128Register(0).V16B(),
2273 i.InputInt8(1));
2274 break;
2275 }
2276 case kArm64I8x16ReplaceLane: {
2277 VRegister dst = i.OutputSimd128Register().V16B(),
2278 src1 = i.InputSimd128Register(0).V16B();
2279 if (dst != src1) {
2280 __ Mov(dst, src1);
2281 }
2282 __ Mov(dst, i.InputInt8(1), i.InputRegister32(2));
2283 break;
2284 }
2285 SIMD_UNOP_CASE(kArm64I8x16Neg, Neg, 16B);
2286 case kArm64I8x16Shl: {
2287 ASSEMBLE_SIMD_SHIFT_LEFT(Shl, 3, V16B, Sshl, W);
2288 break;
2289 }
2290 case kArm64I8x16ShrS: {
2291 ASSEMBLE_SIMD_SHIFT_RIGHT(Sshr, 3, V16B, Sshl, W);
2292 break;
2293 }
2294 case kArm64I8x16SConvertI16x8: {
2295 VRegister dst = i.OutputSimd128Register(),
2296 src0 = i.InputSimd128Register(0),
2297 src1 = i.InputSimd128Register(1);
2298 UseScratchRegisterScope scope(tasm());
2299 VRegister temp = scope.AcquireV(kFormat8H);
2300 if (dst == src1) {
2301 __ Mov(temp, src1.V8H());
2302 src1 = temp;
2303 }
2304 __ Sqxtn(dst.V8B(), src0.V8H());
2305 __ Sqxtn2(dst.V16B(), src1.V8H());
2306 break;
2307 }
2308 SIMD_BINOP_CASE(kArm64I8x16Add, Add, 16B);
2309 SIMD_BINOP_CASE(kArm64I8x16AddSaturateS, Sqadd, 16B);
2310 SIMD_BINOP_CASE(kArm64I8x16Sub, Sub, 16B);
2311 SIMD_BINOP_CASE(kArm64I8x16SubSaturateS, Sqsub, 16B);
2312 SIMD_BINOP_CASE(kArm64I8x16Mul, Mul, 16B);
2313 SIMD_BINOP_CASE(kArm64I8x16MinS, Smin, 16B);
2314 SIMD_BINOP_CASE(kArm64I8x16MaxS, Smax, 16B);
2315 SIMD_BINOP_CASE(kArm64I8x16Eq, Cmeq, 16B);
2316 case kArm64I8x16Ne: {
2317 VRegister dst = i.OutputSimd128Register().V16B();
2318 __ Cmeq(dst, i.InputSimd128Register(0).V16B(),
2319 i.InputSimd128Register(1).V16B());
2320 __ Mvn(dst, dst);
2321 break;
2322 }
2323 SIMD_BINOP_CASE(kArm64I8x16GtS, Cmgt, 16B);
2324 SIMD_BINOP_CASE(kArm64I8x16GeS, Cmge, 16B);
2325 case kArm64I8x16ShrU: {
2326 ASSEMBLE_SIMD_SHIFT_RIGHT(Ushr, 3, V16B, Ushl, W);
2327 break;
2328 }
2329 case kArm64I8x16UConvertI16x8: {
2330 VRegister dst = i.OutputSimd128Register(),
2331 src0 = i.InputSimd128Register(0),
2332 src1 = i.InputSimd128Register(1);
2333 UseScratchRegisterScope scope(tasm());
2334 VRegister temp = scope.AcquireV(kFormat8H);
2335 if (dst == src1) {
2336 __ Mov(temp, src1.V8H());
2337 src1 = temp;
2338 }
2339 __ Sqxtun(dst.V8B(), src0.V8H());
2340 __ Sqxtun2(dst.V16B(), src1.V8H());
2341 break;
2342 }
2343 SIMD_BINOP_CASE(kArm64I8x16AddSaturateU, Uqadd, 16B);
2344 SIMD_BINOP_CASE(kArm64I8x16SubSaturateU, Uqsub, 16B);
2345 SIMD_BINOP_CASE(kArm64I8x16MinU, Umin, 16B);
2346 SIMD_BINOP_CASE(kArm64I8x16MaxU, Umax, 16B);
2347 SIMD_BINOP_CASE(kArm64I8x16GtU, Cmhi, 16B);
2348 SIMD_BINOP_CASE(kArm64I8x16GeU, Cmhs, 16B);
2349 SIMD_BINOP_CASE(kArm64I8x16RoundingAverageU, Urhadd, 16B);
2350 SIMD_UNOP_CASE(kArm64I8x16Abs, Abs, 16B);
2351 case kArm64I8x16BitMask: {
2352 Register dst = i.OutputRegister32();
2353 VRegister src = i.InputSimd128Register(0);
2354 VRegister tmp = i.TempSimd128Register(0);
2355 VRegister mask = i.TempSimd128Register(1);
2356
2357 // Set i-th bit of each lane i. When AND with tmp, the lanes that
2358 // are signed will have i-th bit set, unsigned will be 0.
2359 __ Sshr(tmp.V16B(), src.V16B(), 7);
2360 __ Movi(mask.V2D(), 0x8040'2010'0804'0201);
2361 __ And(tmp.V16B(), mask.V16B(), tmp.V16B());
2362 __ Ext(mask.V16B(), tmp.V16B(), tmp.V16B(), 8);
2363 __ Zip1(tmp.V16B(), tmp.V16B(), mask.V16B());
2364 __ Addv(tmp.H(), tmp.V8H());
2365 __ Mov(dst.W(), tmp.V8H(), 0);
2366 break;
2367 }
2368 case kArm64S128Zero: {
2369 __ Movi(i.OutputSimd128Register().V16B(), 0);
2370 break;
2371 }
2372 SIMD_BINOP_CASE(kArm64S128And, And, 16B);
2373 SIMD_BINOP_CASE(kArm64S128Or, Orr, 16B);
2374 SIMD_BINOP_CASE(kArm64S128Xor, Eor, 16B);
2375 SIMD_UNOP_CASE(kArm64S128Not, Mvn, 16B);
2376 case kArm64S128Dup: {
2377 VRegister dst = i.OutputSimd128Register(),
2378 src = i.InputSimd128Register(0);
2379 int lanes = i.InputInt32(1);
2380 int index = i.InputInt32(2);
2381 switch (lanes) {
2382 case 4:
2383 __ Dup(dst.V4S(), src.V4S(), index);
2384 break;
2385 case 8:
2386 __ Dup(dst.V8H(), src.V8H(), index);
2387 break;
2388 case 16:
2389 __ Dup(dst.V16B(), src.V16B(), index);
2390 break;
2391 default:
2392 UNREACHABLE();
2393 break;
2394 }
2395 break;
2396 }
2397 case kArm64S128Select: {
2398 VRegister dst = i.OutputSimd128Register().V16B();
2399 DCHECK_EQ(dst, i.InputSimd128Register(0).V16B());
2400 __ Bsl(dst, i.InputSimd128Register(1).V16B(),
2401 i.InputSimd128Register(2).V16B());
2402 break;
2403 }
2404 SIMD_BINOP_CASE(kArm64S128AndNot, Bic, 16B);
2405 case kArm64S32x4Shuffle: {
2406 Simd128Register dst = i.OutputSimd128Register().V4S(),
2407 src0 = i.InputSimd128Register(0).V4S(),
2408 src1 = i.InputSimd128Register(1).V4S();
2409 // Check for in-place shuffles.
2410 // If dst == src0 == src1, then the shuffle is unary and we only use src0.
2411 UseScratchRegisterScope scope(tasm());
2412 VRegister temp = scope.AcquireV(kFormat4S);
2413 if (dst == src0) {
2414 __ Mov(temp, src0);
2415 src0 = temp;
2416 } else if (dst == src1) {
2417 __ Mov(temp, src1);
2418 src1 = temp;
2419 }
2420 // Perform shuffle as a vmov per lane.
2421 int32_t shuffle = i.InputInt32(2);
2422 for (int i = 0; i < 4; i++) {
2423 VRegister src = src0;
2424 int lane = shuffle & 0x7;
2425 if (lane >= 4) {
2426 src = src1;
2427 lane &= 0x3;
2428 }
2429 __ Mov(dst, i, src, lane);
2430 shuffle >>= 8;
2431 }
2432 break;
2433 }
2434 SIMD_BINOP_CASE(kArm64S32x4ZipLeft, Zip1, 4S);
2435 SIMD_BINOP_CASE(kArm64S32x4ZipRight, Zip2, 4S);
2436 SIMD_BINOP_CASE(kArm64S32x4UnzipLeft, Uzp1, 4S);
2437 SIMD_BINOP_CASE(kArm64S32x4UnzipRight, Uzp2, 4S);
2438 SIMD_BINOP_CASE(kArm64S32x4TransposeLeft, Trn1, 4S);
2439 SIMD_BINOP_CASE(kArm64S32x4TransposeRight, Trn2, 4S);
2440 SIMD_BINOP_CASE(kArm64S16x8ZipLeft, Zip1, 8H);
2441 SIMD_BINOP_CASE(kArm64S16x8ZipRight, Zip2, 8H);
2442 SIMD_BINOP_CASE(kArm64S16x8UnzipLeft, Uzp1, 8H);
2443 SIMD_BINOP_CASE(kArm64S16x8UnzipRight, Uzp2, 8H);
2444 SIMD_BINOP_CASE(kArm64S16x8TransposeLeft, Trn1, 8H);
2445 SIMD_BINOP_CASE(kArm64S16x8TransposeRight, Trn2, 8H);
2446 SIMD_BINOP_CASE(kArm64S8x16ZipLeft, Zip1, 16B);
2447 SIMD_BINOP_CASE(kArm64S8x16ZipRight, Zip2, 16B);
2448 SIMD_BINOP_CASE(kArm64S8x16UnzipLeft, Uzp1, 16B);
2449 SIMD_BINOP_CASE(kArm64S8x16UnzipRight, Uzp2, 16B);
2450 SIMD_BINOP_CASE(kArm64S8x16TransposeLeft, Trn1, 16B);
2451 SIMD_BINOP_CASE(kArm64S8x16TransposeRight, Trn2, 16B);
2452 case kArm64S8x16Concat: {
2453 __ Ext(i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(),
2454 i.InputSimd128Register(1).V16B(), i.InputInt4(2));
2455 break;
2456 }
2457 case kArm64S8x16Swizzle: {
2458 __ Tbl(i.OutputSimd128Register().V16B(), i.InputSimd128Register(0).V16B(),
2459 i.InputSimd128Register(1).V16B());
2460 break;
2461 }
2462 case kArm64S8x16Shuffle: {
2463 Simd128Register dst = i.OutputSimd128Register().V16B(),
2464 src0 = i.InputSimd128Register(0).V16B(),
2465 src1 = i.InputSimd128Register(1).V16B();
2466 // Unary shuffle table is in src0, binary shuffle table is in src0, src1,
2467 // which must be consecutive.
2468 int64_t mask = 0;
2469 if (src0 == src1) {
2470 mask = 0x0F0F0F0F;
2471 } else {
2472 mask = 0x1F1F1F1F;
2473 DCHECK(AreConsecutive(src0, src1));
2474 }
2475 int64_t imm1 =
2476 (i.InputInt32(2) & mask) | ((i.InputInt32(3) & mask) << 32);
2477 int64_t imm2 =
2478 (i.InputInt32(4) & mask) | ((i.InputInt32(5) & mask) << 32);
2479 UseScratchRegisterScope scope(tasm());
2480 VRegister temp = scope.AcquireV(kFormat16B);
2481 __ Movi(temp, imm2, imm1);
2482
2483 if (src0 == src1) {
2484 __ Tbl(dst, src0, temp.V16B());
2485 } else {
2486 __ Tbl(dst, src0, src1, temp.V16B());
2487 }
2488 break;
2489 }
2490 SIMD_UNOP_CASE(kArm64S32x2Reverse, Rev64, 4S);
2491 SIMD_UNOP_CASE(kArm64S16x4Reverse, Rev64, 8H);
2492 SIMD_UNOP_CASE(kArm64S16x2Reverse, Rev32, 8H);
2493 SIMD_UNOP_CASE(kArm64S8x8Reverse, Rev64, 16B);
2494 SIMD_UNOP_CASE(kArm64S8x4Reverse, Rev32, 16B);
2495 SIMD_UNOP_CASE(kArm64S8x2Reverse, Rev16, 16B);
2496 case kArm64S1x2AllTrue: {
2497 UseScratchRegisterScope scope(tasm());
2498 VRegister temp1 = scope.AcquireV(kFormat2D);
2499 VRegister temp2 = scope.AcquireV(kFormatS);
2500
2501 __ Cmeq(temp1, i.InputSimd128Register(0).V2D(), 0);
2502 __ Umaxv(temp2, temp1.V4S());
2503 __ Umov(i.OutputRegister32(), temp2, 0);
2504 __ Add(i.OutputRegister32(), i.OutputRegister32(), 1);
2505 break;
2506 }
2507 case kArm64S8x16LoadSplat: {
2508 __ ld1r(i.OutputSimd128Register().V16B(), i.MemoryOperand(0));
2509 break;
2510 }
2511 case kArm64S16x8LoadSplat: {
2512 __ ld1r(i.OutputSimd128Register().V8H(), i.MemoryOperand(0));
2513 break;
2514 }
2515 case kArm64S32x4LoadSplat: {
2516 __ ld1r(i.OutputSimd128Register().V4S(), i.MemoryOperand(0));
2517 break;
2518 }
2519 case kArm64S64x2LoadSplat: {
2520 __ ld1r(i.OutputSimd128Register().V2D(), i.MemoryOperand(0));
2521 break;
2522 }
2523 case kArm64I16x8Load8x8S: {
2524 __ ld1(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
2525 __ Sxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
2526 break;
2527 }
2528 case kArm64I16x8Load8x8U: {
2529 __ ld1(i.OutputSimd128Register().V8B(), i.MemoryOperand(0));
2530 __ Uxtl(i.OutputSimd128Register().V8H(), i.OutputSimd128Register().V8B());
2531 break;
2532 }
2533 case kArm64I32x4Load16x4S: {
2534 __ ld1(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
2535 __ Sxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H());
2536 break;
2537 }
2538 case kArm64I32x4Load16x4U: {
2539 __ ld1(i.OutputSimd128Register().V4H(), i.MemoryOperand(0));
2540 __ Uxtl(i.OutputSimd128Register().V4S(), i.OutputSimd128Register().V4H());
2541 break;
2542 }
2543 case kArm64I64x2Load32x2S: {
2544 __ ld1(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
2545 __ Sxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
2546 break;
2547 }
2548 case kArm64I64x2Load32x2U: {
2549 __ ld1(i.OutputSimd128Register().V2S(), i.MemoryOperand(0));
2550 __ Uxtl(i.OutputSimd128Register().V2D(), i.OutputSimd128Register().V2S());
2551 break;
2552 }
2553 #define SIMD_REDUCE_OP_CASE(Op, Instr, format, FORMAT) \
2554 case Op: { \
2555 UseScratchRegisterScope scope(tasm()); \
2556 VRegister temp = scope.AcquireV(format); \
2557 __ Instr(temp, i.InputSimd128Register(0).V##FORMAT()); \
2558 __ Umov(i.OutputRegister32(), temp, 0); \
2559 __ Cmp(i.OutputRegister32(), 0); \
2560 __ Cset(i.OutputRegister32(), ne); \
2561 break; \
2562 }
2563 // for AnyTrue, the format does not matter, umaxv does not support 2D
2564 SIMD_REDUCE_OP_CASE(kArm64S1x2AnyTrue, Umaxv, kFormatS, 4S);
2565 SIMD_REDUCE_OP_CASE(kArm64S1x4AnyTrue, Umaxv, kFormatS, 4S);
2566 SIMD_REDUCE_OP_CASE(kArm64S1x4AllTrue, Uminv, kFormatS, 4S);
2567 SIMD_REDUCE_OP_CASE(kArm64S1x8AnyTrue, Umaxv, kFormatH, 8H);
2568 SIMD_REDUCE_OP_CASE(kArm64S1x8AllTrue, Uminv, kFormatH, 8H);
2569 SIMD_REDUCE_OP_CASE(kArm64S1x16AnyTrue, Umaxv, kFormatB, 16B);
2570 SIMD_REDUCE_OP_CASE(kArm64S1x16AllTrue, Uminv, kFormatB, 16B);
2571 }
2572 return kSuccess;
2573 } // NOLINT(readability/fn_size)
2574
2575 #undef SIMD_UNOP_CASE
2576 #undef SIMD_WIDENING_UNOP_CASE
2577 #undef SIMD_BINOP_CASE
2578 #undef SIMD_REDUCE_OP_CASE
2579 #undef ASSEMBLE_SIMD_SHIFT_LEFT
2580 #undef ASSEMBLE_SIMD_SHIFT_RIGHT
2581
2582 // Assemble branches after this instruction.
AssembleArchBranch(Instruction * instr,BranchInfo * branch)2583 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
2584 Arm64OperandConverter i(this, instr);
2585 Label* tlabel = branch->true_label;
2586 Label* flabel = branch->false_label;
2587 FlagsCondition condition = branch->condition;
2588 ArchOpcode opcode = instr->arch_opcode();
2589
2590 if (opcode == kArm64CompareAndBranch32) {
2591 DCHECK(FlagsModeField::decode(instr->opcode()) != kFlags_branch_and_poison);
2592 switch (condition) {
2593 case kEqual:
2594 __ Cbz(i.InputRegister32(0), tlabel);
2595 break;
2596 case kNotEqual:
2597 __ Cbnz(i.InputRegister32(0), tlabel);
2598 break;
2599 default:
2600 UNREACHABLE();
2601 }
2602 } else if (opcode == kArm64CompareAndBranch) {
2603 DCHECK(FlagsModeField::decode(instr->opcode()) != kFlags_branch_and_poison);
2604 switch (condition) {
2605 case kEqual:
2606 __ Cbz(i.InputRegister64(0), tlabel);
2607 break;
2608 case kNotEqual:
2609 __ Cbnz(i.InputRegister64(0), tlabel);
2610 break;
2611 default:
2612 UNREACHABLE();
2613 }
2614 } else if (opcode == kArm64TestAndBranch32) {
2615 DCHECK(FlagsModeField::decode(instr->opcode()) != kFlags_branch_and_poison);
2616 switch (condition) {
2617 case kEqual:
2618 __ Tbz(i.InputRegister32(0), i.InputInt5(1), tlabel);
2619 break;
2620 case kNotEqual:
2621 __ Tbnz(i.InputRegister32(0), i.InputInt5(1), tlabel);
2622 break;
2623 default:
2624 UNREACHABLE();
2625 }
2626 } else if (opcode == kArm64TestAndBranch) {
2627 DCHECK(FlagsModeField::decode(instr->opcode()) != kFlags_branch_and_poison);
2628 switch (condition) {
2629 case kEqual:
2630 __ Tbz(i.InputRegister64(0), i.InputInt6(1), tlabel);
2631 break;
2632 case kNotEqual:
2633 __ Tbnz(i.InputRegister64(0), i.InputInt6(1), tlabel);
2634 break;
2635 default:
2636 UNREACHABLE();
2637 }
2638 } else {
2639 Condition cc = FlagsConditionToCondition(condition);
2640 __ B(cc, tlabel);
2641 }
2642 if (!branch->fallthru) __ B(flabel); // no fallthru to flabel.
2643 }
2644
AssembleBranchPoisoning(FlagsCondition condition,Instruction * instr)2645 void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
2646 Instruction* instr) {
2647 // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
2648 if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
2649 return;
2650 }
2651
2652 condition = NegateFlagsCondition(condition);
2653 __ CmovX(kSpeculationPoisonRegister, xzr,
2654 FlagsConditionToCondition(condition));
2655 __ Csdb();
2656 }
2657
AssembleArchDeoptBranch(Instruction * instr,BranchInfo * branch)2658 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
2659 BranchInfo* branch) {
2660 AssembleArchBranch(instr, branch);
2661 }
2662
AssembleArchJump(RpoNumber target)2663 void CodeGenerator::AssembleArchJump(RpoNumber target) {
2664 if (!IsNextInAssemblyOrder(target)) __ B(GetLabel(target));
2665 }
2666
AssembleArchTrap(Instruction * instr,FlagsCondition condition)2667 void CodeGenerator::AssembleArchTrap(Instruction* instr,
2668 FlagsCondition condition) {
2669 class OutOfLineTrap final : public OutOfLineCode {
2670 public:
2671 OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
2672 : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
2673 void Generate() final {
2674 Arm64OperandConverter i(gen_, instr_);
2675 TrapId trap_id =
2676 static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
2677 GenerateCallToTrap(trap_id);
2678 }
2679
2680 private:
2681 void GenerateCallToTrap(TrapId trap_id) {
2682 if (trap_id == TrapId::kInvalid) {
2683 // We cannot test calls to the runtime in cctest/test-run-wasm.
2684 // Therefore we emit a call to C here instead of a call to the runtime.
2685 __ CallCFunction(
2686 ExternalReference::wasm_call_trap_callback_for_testing(), 0);
2687 __ LeaveFrame(StackFrame::WASM_COMPILED);
2688 auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
2689 int pop_count =
2690 static_cast<int>(call_descriptor->StackParameterCount());
2691 pop_count += (pop_count & 1); // align
2692 __ Drop(pop_count);
2693 __ Ret();
2694 } else {
2695 gen_->AssembleSourcePosition(instr_);
2696 // A direct call to a wasm runtime stub defined in this module.
2697 // Just encode the stub index. This will be patched when the code
2698 // is added to the native module and copied into wasm code space.
2699 __ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
2700 ReferenceMap* reference_map =
2701 new (gen_->zone()) ReferenceMap(gen_->zone());
2702 gen_->RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
2703 if (FLAG_debug_code) {
2704 // The trap code should never return.
2705 __ Brk(0);
2706 }
2707 }
2708 }
2709 Instruction* instr_;
2710 CodeGenerator* gen_;
2711 };
2712 auto ool = new (zone()) OutOfLineTrap(this, instr);
2713 Label* tlabel = ool->entry();
2714 Condition cc = FlagsConditionToCondition(condition);
2715 __ B(cc, tlabel);
2716 }
2717
2718 // Assemble boolean materializations after this instruction.
AssembleArchBoolean(Instruction * instr,FlagsCondition condition)2719 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
2720 FlagsCondition condition) {
2721 Arm64OperandConverter i(this, instr);
2722
2723 // Materialize a full 64-bit 1 or 0 value. The result register is always the
2724 // last output of the instruction.
2725 DCHECK_NE(0u, instr->OutputCount());
2726 Register reg = i.OutputRegister(instr->OutputCount() - 1);
2727 Condition cc = FlagsConditionToCondition(condition);
2728 __ Cset(reg, cc);
2729 }
2730
AssembleArchBinarySearchSwitch(Instruction * instr)2731 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
2732 Arm64OperandConverter i(this, instr);
2733 Register input = i.InputRegister32(0);
2734 std::vector<std::pair<int32_t, Label*>> cases;
2735 for (size_t index = 2; index < instr->InputCount(); index += 2) {
2736 cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
2737 }
2738 AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
2739 cases.data() + cases.size());
2740 }
2741
AssembleArchTableSwitch(Instruction * instr)2742 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
2743 Arm64OperandConverter i(this, instr);
2744 UseScratchRegisterScope scope(tasm());
2745 Register input = i.InputRegister32(0);
2746 Register temp = scope.AcquireX();
2747 size_t const case_count = instr->InputCount() - 2;
2748 Label table;
2749 __ Cmp(input, case_count);
2750 __ B(hs, GetLabel(i.InputRpo(1)));
2751 __ Adr(temp, &table);
2752 int entry_size_log2 = 2;
2753 #ifdef V8_ENABLE_CONTROL_FLOW_INTEGRITY
2754 ++entry_size_log2; // Account for BTI.
2755 #endif
2756 __ Add(temp, temp, Operand(input, UXTW, entry_size_log2));
2757 __ Br(temp);
2758 {
2759 TurboAssembler::BlockPoolsScope block_pools(tasm(),
2760 case_count * kInstrSize);
2761 __ Bind(&table);
2762 for (size_t index = 0; index < case_count; ++index) {
2763 __ JumpTarget();
2764 __ B(GetLabel(i.InputRpo(index + 2)));
2765 }
2766 __ JumpTarget();
2767 }
2768 }
2769
FinishFrame(Frame * frame)2770 void CodeGenerator::FinishFrame(Frame* frame) {
2771 frame->AlignFrame(16);
2772 auto call_descriptor = linkage()->GetIncomingDescriptor();
2773
2774 // Save FP registers.
2775 CPURegList saves_fp = CPURegList(CPURegister::kVRegister, kDRegSizeInBits,
2776 call_descriptor->CalleeSavedFPRegisters());
2777 int saved_count = saves_fp.Count();
2778 if (saved_count != 0) {
2779 DCHECK(saves_fp.list() == CPURegList::GetCalleeSavedV().list());
2780 DCHECK_EQ(saved_count % 2, 0);
2781 frame->AllocateSavedCalleeRegisterSlots(saved_count *
2782 (kDoubleSize / kSystemPointerSize));
2783 }
2784
2785 CPURegList saves = CPURegList(CPURegister::kRegister, kXRegSizeInBits,
2786 call_descriptor->CalleeSavedRegisters());
2787 saved_count = saves.Count();
2788 if (saved_count != 0) {
2789 DCHECK_EQ(saved_count % 2, 0);
2790 frame->AllocateSavedCalleeRegisterSlots(saved_count);
2791 }
2792 }
2793
AssembleConstructFrame()2794 void CodeGenerator::AssembleConstructFrame() {
2795 auto call_descriptor = linkage()->GetIncomingDescriptor();
2796 __ AssertSpAligned();
2797
2798 // The frame has been previously padded in CodeGenerator::FinishFrame().
2799 DCHECK_EQ(frame()->GetTotalFrameSlotCount() % 2, 0);
2800 int required_slots =
2801 frame()->GetTotalFrameSlotCount() - frame()->GetFixedSlotCount();
2802
2803 CPURegList saves = CPURegList(CPURegister::kRegister, kXRegSizeInBits,
2804 call_descriptor->CalleeSavedRegisters());
2805 DCHECK_EQ(saves.Count() % 2, 0);
2806 CPURegList saves_fp = CPURegList(CPURegister::kVRegister, kDRegSizeInBits,
2807 call_descriptor->CalleeSavedFPRegisters());
2808 DCHECK_EQ(saves_fp.Count() % 2, 0);
2809 // The number of slots for returns has to be even to ensure the correct stack
2810 // alignment.
2811 const int returns = RoundUp(frame()->GetReturnSlotCount(), 2);
2812
2813 if (frame_access_state()->has_frame()) {
2814 // Link the frame
2815 if (call_descriptor->IsJSFunctionCall()) {
2816 __ Prologue();
2817 } else {
2818 __ Push<TurboAssembler::kSignLR>(lr, fp);
2819 __ Mov(fp, sp);
2820 }
2821 unwinding_info_writer_.MarkFrameConstructed(__ pc_offset());
2822
2823 // Create OSR entry if applicable
2824 if (info()->is_osr()) {
2825 // TurboFan OSR-compiled functions cannot be entered directly.
2826 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
2827
2828 // Unoptimized code jumps directly to this entrypoint while the
2829 // unoptimized frame is still on the stack. Optimized code uses OSR values
2830 // directly from the unoptimized frame. Thus, all that needs to be done is
2831 // to allocate the remaining stack slots.
2832 if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
2833 osr_pc_offset_ = __ pc_offset();
2834 required_slots -= osr_helper()->UnoptimizedFrameSlots();
2835 ResetSpeculationPoison();
2836 }
2837
2838 if (info()->IsWasm() && required_slots > 128) {
2839 // For WebAssembly functions with big frames we have to do the stack
2840 // overflow check before we construct the frame. Otherwise we may not
2841 // have enough space on the stack to call the runtime for the stack
2842 // overflow.
2843 Label done;
2844 // If the frame is bigger than the stack, we throw the stack overflow
2845 // exception unconditionally. Thereby we can avoid the integer overflow
2846 // check in the condition code.
2847 if (required_slots * kSystemPointerSize < FLAG_stack_size * 1024) {
2848 UseScratchRegisterScope scope(tasm());
2849 Register scratch = scope.AcquireX();
2850 __ Ldr(scratch, FieldMemOperand(
2851 kWasmInstanceRegister,
2852 WasmInstanceObject::kRealStackLimitAddressOffset));
2853 __ Ldr(scratch, MemOperand(scratch));
2854 __ Add(scratch, scratch, required_slots * kSystemPointerSize);
2855 __ Cmp(sp, scratch);
2856 __ B(hs, &done);
2857 }
2858
2859 {
2860 // Finish the frame that hasn't been fully built yet.
2861 UseScratchRegisterScope temps(tasm());
2862 Register scratch = temps.AcquireX();
2863 __ Mov(scratch,
2864 StackFrame::TypeToMarker(info()->GetOutputStackFrameType()));
2865 __ Push(scratch, kWasmInstanceRegister);
2866 }
2867
2868 __ Call(wasm::WasmCode::kWasmStackOverflow, RelocInfo::WASM_STUB_CALL);
2869 // We come from WebAssembly, there are no references for the GC.
2870 ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
2871 RecordSafepoint(reference_map, Safepoint::kNoLazyDeopt);
2872 if (FLAG_debug_code) {
2873 __ Brk(0);
2874 }
2875 __ Bind(&done);
2876 }
2877
2878 // Skip callee-saved slots, which are pushed below.
2879 required_slots -= saves.Count();
2880 required_slots -= saves_fp.Count();
2881 required_slots -= returns;
2882
2883 // Build remainder of frame, including accounting for and filling-in
2884 // frame-specific header information, i.e. claiming the extra slot that
2885 // other platforms explicitly push for STUB (code object) frames and frames
2886 // recording their argument count.
2887 switch (call_descriptor->kind()) {
2888 case CallDescriptor::kCallJSFunction:
2889 if (call_descriptor->PushArgumentCount()) {
2890 __ Claim(required_slots + 1); // Claim extra slot for argc.
2891 __ Str(kJavaScriptCallArgCountRegister,
2892 MemOperand(fp, OptimizedBuiltinFrameConstants::kArgCOffset));
2893 } else {
2894 __ Claim(required_slots);
2895 }
2896 break;
2897 case CallDescriptor::kCallCodeObject: {
2898 UseScratchRegisterScope temps(tasm());
2899 Register scratch = temps.AcquireX();
2900 __ Mov(scratch,
2901 StackFrame::TypeToMarker(info()->GetOutputStackFrameType()));
2902 __ Push(scratch, padreg);
2903 // One of the extra slots has just been claimed when pushing the frame
2904 // type marker above. We also know that we have at least one slot to
2905 // claim here, as the typed frame has an odd number of fixed slots, and
2906 // all other parts of the total frame slots are even, leaving
2907 // {required_slots} to be odd.
2908 DCHECK_GE(required_slots, 1);
2909 __ Claim(required_slots - 1);
2910 } break;
2911 case CallDescriptor::kCallWasmFunction: {
2912 UseScratchRegisterScope temps(tasm());
2913 Register scratch = temps.AcquireX();
2914 __ Mov(scratch,
2915 StackFrame::TypeToMarker(info()->GetOutputStackFrameType()));
2916 __ Push(scratch, kWasmInstanceRegister);
2917 __ Claim(required_slots);
2918 } break;
2919 case CallDescriptor::kCallWasmImportWrapper:
2920 case CallDescriptor::kCallWasmCapiFunction: {
2921 UseScratchRegisterScope temps(tasm());
2922 __ LoadTaggedPointerField(
2923 kJSFunctionRegister,
2924 FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
2925 __ LoadTaggedPointerField(
2926 kWasmInstanceRegister,
2927 FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
2928 Register scratch = temps.AcquireX();
2929 __ Mov(scratch,
2930 StackFrame::TypeToMarker(info()->GetOutputStackFrameType()));
2931 __ Push(scratch, kWasmInstanceRegister);
2932 int extra_slots =
2933 call_descriptor->kind() == CallDescriptor::kCallWasmImportWrapper
2934 ? 0 // Import wrapper: none.
2935 : 1; // C-API function: PC.
2936 __ Claim(required_slots + extra_slots);
2937 } break;
2938 case CallDescriptor::kCallAddress:
2939 if (info()->GetOutputStackFrameType() == StackFrame::C_WASM_ENTRY) {
2940 UseScratchRegisterScope temps(tasm());
2941 Register scratch = temps.AcquireX();
2942 __ Mov(scratch, StackFrame::TypeToMarker(StackFrame::C_WASM_ENTRY));
2943 __ Push(scratch, padreg);
2944 // The additional slot will be used for the saved c_entry_fp.
2945 }
2946 __ Claim(required_slots);
2947 break;
2948 default:
2949 UNREACHABLE();
2950 }
2951 }
2952
2953 // Save FP registers.
2954 DCHECK_IMPLIES(saves_fp.Count() != 0,
2955 saves_fp.list() == CPURegList::GetCalleeSavedV().list());
2956 __ PushCPURegList(saves_fp);
2957
2958 // Save registers.
2959 DCHECK_IMPLIES(!saves.IsEmpty(),
2960 saves.list() == CPURegList::GetCalleeSaved().list());
2961 __ PushCPURegList<TurboAssembler::kSignLR>(saves);
2962
2963 if (returns != 0) {
2964 __ Claim(returns);
2965 }
2966 }
2967
AssembleReturn(InstructionOperand * pop)2968 void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
2969 auto call_descriptor = linkage()->GetIncomingDescriptor();
2970
2971 const int returns = RoundUp(frame()->GetReturnSlotCount(), 2);
2972
2973 if (returns != 0) {
2974 __ Drop(returns);
2975 }
2976
2977 // Restore registers.
2978 CPURegList saves = CPURegList(CPURegister::kRegister, kXRegSizeInBits,
2979 call_descriptor->CalleeSavedRegisters());
2980 __ PopCPURegList<TurboAssembler::kAuthLR>(saves);
2981
2982 // Restore fp registers.
2983 CPURegList saves_fp = CPURegList(CPURegister::kVRegister, kDRegSizeInBits,
2984 call_descriptor->CalleeSavedFPRegisters());
2985 __ PopCPURegList(saves_fp);
2986
2987 unwinding_info_writer_.MarkBlockWillExit();
2988
2989 Arm64OperandConverter g(this, nullptr);
2990 int pop_count = static_cast<int>(call_descriptor->StackParameterCount());
2991 if (call_descriptor->IsCFunctionCall()) {
2992 AssembleDeconstructFrame();
2993 } else if (frame_access_state()->has_frame()) {
2994 // Canonicalize JSFunction return sites for now unless they have an variable
2995 // number of stack slot pops.
2996 if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
2997 if (return_label_.is_bound()) {
2998 __ B(&return_label_);
2999 return;
3000 } else {
3001 __ Bind(&return_label_);
3002 AssembleDeconstructFrame();
3003 }
3004 } else {
3005 AssembleDeconstructFrame();
3006 }
3007 }
3008
3009 if (pop->IsImmediate()) {
3010 pop_count += g.ToConstant(pop).ToInt32();
3011 __ DropArguments(pop_count);
3012 } else {
3013 Register pop_reg = g.ToRegister(pop);
3014 __ Add(pop_reg, pop_reg, pop_count);
3015 __ DropArguments(pop_reg);
3016 }
3017
3018 __ AssertSpAligned();
3019 __ Ret();
3020 }
3021
FinishCode()3022 void CodeGenerator::FinishCode() { __ ForceConstantPoolEmissionWithoutJump(); }
3023
PrepareForDeoptimizationExits(int deopt_count)3024 void CodeGenerator::PrepareForDeoptimizationExits(int deopt_count) {
3025 __ ForceConstantPoolEmissionWithoutJump();
3026 // We are conservative here, assuming all deopts are lazy deopts.
3027 DCHECK_GE(Deoptimizer::kLazyDeoptExitSize,
3028 Deoptimizer::kNonLazyDeoptExitSize);
3029 __ CheckVeneerPool(false, false,
3030 deopt_count * Deoptimizer::kLazyDeoptExitSize);
3031 }
3032
AssembleMove(InstructionOperand * source,InstructionOperand * destination)3033 void CodeGenerator::AssembleMove(InstructionOperand* source,
3034 InstructionOperand* destination) {
3035 Arm64OperandConverter g(this, nullptr);
3036 // Helper function to write the given constant to the dst register.
3037 auto MoveConstantToRegister = [&](Register dst, Constant src) {
3038 if (src.type() == Constant::kHeapObject) {
3039 Handle<HeapObject> src_object = src.ToHeapObject();
3040 RootIndex index;
3041 if (IsMaterializableFromRoot(src_object, &index)) {
3042 __ LoadRoot(dst, index);
3043 } else {
3044 __ Mov(dst, src_object);
3045 }
3046 } else if (src.type() == Constant::kCompressedHeapObject) {
3047 Handle<HeapObject> src_object = src.ToHeapObject();
3048 RootIndex index;
3049 if (IsMaterializableFromRoot(src_object, &index)) {
3050 __ LoadRoot(dst, index);
3051 } else {
3052 // TODO(v8:8977): Even though this mov happens on 32 bits (Note the
3053 // .W()) and we are passing along the RelocInfo, we still haven't made
3054 // the address embedded in the code-stream actually be compressed.
3055 __ Mov(dst.W(),
3056 Immediate(src_object, RelocInfo::COMPRESSED_EMBEDDED_OBJECT));
3057 }
3058 } else {
3059 __ Mov(dst, g.ToImmediate(source));
3060 }
3061 };
3062 switch (MoveType::InferMove(source, destination)) {
3063 case MoveType::kRegisterToRegister:
3064 if (source->IsRegister()) {
3065 __ Mov(g.ToRegister(destination), g.ToRegister(source));
3066 } else if (source->IsFloatRegister() || source->IsDoubleRegister()) {
3067 __ Mov(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
3068 } else {
3069 DCHECK(source->IsSimd128Register());
3070 __ Mov(g.ToDoubleRegister(destination).Q(),
3071 g.ToDoubleRegister(source).Q());
3072 }
3073 return;
3074 case MoveType::kRegisterToStack: {
3075 MemOperand dst = g.ToMemOperand(destination, tasm());
3076 if (source->IsRegister()) {
3077 __ Str(g.ToRegister(source), dst);
3078 } else {
3079 VRegister src = g.ToDoubleRegister(source);
3080 if (source->IsFloatRegister() || source->IsDoubleRegister()) {
3081 __ Str(src, dst);
3082 } else {
3083 DCHECK(source->IsSimd128Register());
3084 __ Str(src.Q(), dst);
3085 }
3086 }
3087 return;
3088 }
3089 case MoveType::kStackToRegister: {
3090 MemOperand src = g.ToMemOperand(source, tasm());
3091 if (destination->IsRegister()) {
3092 __ Ldr(g.ToRegister(destination), src);
3093 } else {
3094 VRegister dst = g.ToDoubleRegister(destination);
3095 if (destination->IsFloatRegister() || destination->IsDoubleRegister()) {
3096 __ Ldr(dst, src);
3097 } else {
3098 DCHECK(destination->IsSimd128Register());
3099 __ Ldr(dst.Q(), src);
3100 }
3101 }
3102 return;
3103 }
3104 case MoveType::kStackToStack: {
3105 MemOperand src = g.ToMemOperand(source, tasm());
3106 MemOperand dst = g.ToMemOperand(destination, tasm());
3107 if (source->IsSimd128StackSlot()) {
3108 UseScratchRegisterScope scope(tasm());
3109 VRegister temp = scope.AcquireQ();
3110 __ Ldr(temp, src);
3111 __ Str(temp, dst);
3112 } else {
3113 UseScratchRegisterScope scope(tasm());
3114 Register temp = scope.AcquireX();
3115 __ Ldr(temp, src);
3116 __ Str(temp, dst);
3117 }
3118 return;
3119 }
3120 case MoveType::kConstantToRegister: {
3121 Constant src = g.ToConstant(source);
3122 if (destination->IsRegister()) {
3123 MoveConstantToRegister(g.ToRegister(destination), src);
3124 } else {
3125 VRegister dst = g.ToDoubleRegister(destination);
3126 if (destination->IsFloatRegister()) {
3127 __ Fmov(dst.S(), src.ToFloat32());
3128 } else {
3129 DCHECK(destination->IsDoubleRegister());
3130 __ Fmov(dst, src.ToFloat64().value());
3131 }
3132 }
3133 return;
3134 }
3135 case MoveType::kConstantToStack: {
3136 Constant src = g.ToConstant(source);
3137 MemOperand dst = g.ToMemOperand(destination, tasm());
3138 if (destination->IsStackSlot()) {
3139 UseScratchRegisterScope scope(tasm());
3140 Register temp = scope.AcquireX();
3141 MoveConstantToRegister(temp, src);
3142 __ Str(temp, dst);
3143 } else if (destination->IsFloatStackSlot()) {
3144 if (bit_cast<int32_t>(src.ToFloat32()) == 0) {
3145 __ Str(wzr, dst);
3146 } else {
3147 UseScratchRegisterScope scope(tasm());
3148 VRegister temp = scope.AcquireS();
3149 __ Fmov(temp, src.ToFloat32());
3150 __ Str(temp, dst);
3151 }
3152 } else {
3153 DCHECK(destination->IsDoubleStackSlot());
3154 if (src.ToFloat64().AsUint64() == 0) {
3155 __ Str(xzr, dst);
3156 } else {
3157 UseScratchRegisterScope scope(tasm());
3158 VRegister temp = scope.AcquireD();
3159 __ Fmov(temp, src.ToFloat64().value());
3160 __ Str(temp, dst);
3161 }
3162 }
3163 return;
3164 }
3165 }
3166 UNREACHABLE();
3167 }
3168
AssembleSwap(InstructionOperand * source,InstructionOperand * destination)3169 void CodeGenerator::AssembleSwap(InstructionOperand* source,
3170 InstructionOperand* destination) {
3171 Arm64OperandConverter g(this, nullptr);
3172 switch (MoveType::InferSwap(source, destination)) {
3173 case MoveType::kRegisterToRegister:
3174 if (source->IsRegister()) {
3175 __ Swap(g.ToRegister(source), g.ToRegister(destination));
3176 } else {
3177 VRegister src = g.ToDoubleRegister(source);
3178 VRegister dst = g.ToDoubleRegister(destination);
3179 if (source->IsFloatRegister() || source->IsDoubleRegister()) {
3180 __ Swap(src, dst);
3181 } else {
3182 DCHECK(source->IsSimd128Register());
3183 __ Swap(src.Q(), dst.Q());
3184 }
3185 }
3186 return;
3187 case MoveType::kRegisterToStack: {
3188 UseScratchRegisterScope scope(tasm());
3189 MemOperand dst = g.ToMemOperand(destination, tasm());
3190 if (source->IsRegister()) {
3191 Register temp = scope.AcquireX();
3192 Register src = g.ToRegister(source);
3193 __ Mov(temp, src);
3194 __ Ldr(src, dst);
3195 __ Str(temp, dst);
3196 } else {
3197 UseScratchRegisterScope scope(tasm());
3198 VRegister src = g.ToDoubleRegister(source);
3199 if (source->IsFloatRegister() || source->IsDoubleRegister()) {
3200 VRegister temp = scope.AcquireD();
3201 __ Mov(temp, src);
3202 __ Ldr(src, dst);
3203 __ Str(temp, dst);
3204 } else {
3205 DCHECK(source->IsSimd128Register());
3206 VRegister temp = scope.AcquireQ();
3207 __ Mov(temp, src.Q());
3208 __ Ldr(src.Q(), dst);
3209 __ Str(temp, dst);
3210 }
3211 }
3212 return;
3213 }
3214 case MoveType::kStackToStack: {
3215 UseScratchRegisterScope scope(tasm());
3216 MemOperand src = g.ToMemOperand(source, tasm());
3217 MemOperand dst = g.ToMemOperand(destination, tasm());
3218 VRegister temp_0 = scope.AcquireD();
3219 VRegister temp_1 = scope.AcquireD();
3220 if (source->IsSimd128StackSlot()) {
3221 __ Ldr(temp_0.Q(), src);
3222 __ Ldr(temp_1.Q(), dst);
3223 __ Str(temp_0.Q(), dst);
3224 __ Str(temp_1.Q(), src);
3225 } else {
3226 __ Ldr(temp_0, src);
3227 __ Ldr(temp_1, dst);
3228 __ Str(temp_0, dst);
3229 __ Str(temp_1, src);
3230 }
3231 return;
3232 }
3233 default:
3234 UNREACHABLE();
3235 }
3236 }
3237
AssembleJumpTable(Label ** targets,size_t target_count)3238 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
3239 // On 64-bit ARM we emit the jump tables inline.
3240 UNREACHABLE();
3241 }
3242
3243 #undef __
3244
3245 } // namespace compiler
3246 } // namespace internal
3247 } // namespace v8
3248